Upload 43 files
Browse files- .gitattributes +7 -0
- .python-version +1 -0
- LICENSE +21 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 +3 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/config.json +15 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.mp4 +0 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json +15 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 +3 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json +15 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 +3 -0
- apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json +15 -0
- apps/gradio_app.py +136 -0
- apps/gradio_app/__init__.py +0 -0
- apps/gradio_app/abc.py +0 -0
- apps/gradio_app/inference.py +104 -0
- apps/gradio_app/new-inference.py +104 -0
- apps/gradio_app/old-inference.py +73 -0
- apps/gradio_app/setup_scripts.py +46 -0
- apps/gradio_app/static/__init__.py +0 -0
- apps/gradio_app/static/scripts.js +50 -0
- apps/gradio_app/static/styles.css +154 -0
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif +3 -0
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif +3 -0
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json +1 -1
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif +3 -0
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json +1 -1
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif +3 -0
- assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json +1 -1
- configs/config_multi_videos.yaml +131 -0
- notebooks/zeroscope_v2_576w_Ghibli_LoRA-Inference.ipynb +0 -0
- notebooks/zeroscope_v2_576w_Ghibli_LoRA-Training.ipynb +802 -0
- requirements/requirements.txt +26 -0
- requirements/requirements_compatible.txt +23 -0
- scripts/download_ckpts.py +96 -0
- scripts/process_dataset.py +48 -0
- scripts/setup_third_party.py +38 -0
- src/text2video_ghibli_style/inference.py +96 -0
- src/text2video_ghibli_style/train.py +73 -0
- src/third_party/.gitkeep +0 -0
.gitattributes
CHANGED
|
@@ -39,3 +39,10 @@ assets/zeroscope_v2_576w-Ghibli-LoRA/examples/4/Studio_Ghibli_style_Two_women_wa
|
|
| 39 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif filter=lfs diff=lfs merge=lfs -text
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.11.13
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Danh Tran
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a77fed344231dc9e9cf3b271646183b84c2edbe94cd15bf2d2b192cec9ac89ae
|
| 3 |
+
size 288959
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompt": "A dog is running with Ghibli style",
|
| 3 |
+
"negative-prompt": "ugly, noise, fragment, blur, static video",
|
| 4 |
+
"height": 512,
|
| 5 |
+
"width": 288,
|
| 6 |
+
"num-frames": 24,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
+
"guidance_scale": 12,
|
| 9 |
+
"fps": 16,
|
| 10 |
+
"lora_rank": 64,
|
| 11 |
+
"lora_scale": 1.0,
|
| 12 |
+
"noise_prior": 0.0,
|
| 13 |
+
"seed": 42,
|
| 14 |
+
"video": "A_dog_is_running_with_Ghibli_style_42.mp4"
|
| 15 |
+
}
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.mp4
ADDED
|
Binary file (60.9 kB). View file
|
|
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompt": "A girl is walking with Ghibli style",
|
| 3 |
+
"negative-prompt": "ugly, noise, fragment, blur, static video",
|
| 4 |
+
"height": 384,
|
| 5 |
+
"width": 384,
|
| 6 |
+
"num-frames": 28,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
+
"guidance_scale": 15,
|
| 9 |
+
"fps": 16,
|
| 10 |
+
"lora_rank": 128,
|
| 11 |
+
"lora_scale": 0.8,
|
| 12 |
+
"noise_prior": 0.3,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"video": "A_girl_is_walking_with_Ghibli_style_0.mp4"
|
| 15 |
+
}
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a833dba0ad2cd264556d40e586b3a1cb7656e0239f9cae30f82ea635ed75d3b
|
| 3 |
+
size 156033
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompt": "Studio Ghibli style. Young man contemplates, walks away from ivy-covered yellow building.",
|
| 3 |
+
"negative-prompt": "ugly, noise, fragment, blur, static video",
|
| 4 |
+
"height": 384,
|
| 5 |
+
"width": 384,
|
| 6 |
+
"num-frames": 28,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
+
"guidance_scale": 15,
|
| 9 |
+
"fps": 16,
|
| 10 |
+
"lora_rank": 32,
|
| 11 |
+
"lora_scale": 0.9,
|
| 12 |
+
"noise_prior": 0.3,
|
| 13 |
+
"seed": 12345,
|
| 14 |
+
"video": "Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.mp4"
|
| 15 |
+
}
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd172829c209c5cc4fc064190891161a1145adceda7766676b8b8d8d57100156
|
| 3 |
+
size 134892
|
apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompt": "Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
|
| 3 |
+
"negative-prompt": "ugly, noise, fragment, blur, static video",
|
| 4 |
+
"height": 512,
|
| 5 |
+
"width": 512,
|
| 6 |
+
"num-frames": 16,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
+
"guidance_scale": 30,
|
| 9 |
+
"fps": 16,
|
| 10 |
+
"lora_rank": 96,
|
| 11 |
+
"lora_scale": 0.7,
|
| 12 |
+
"noise_prior": 0.1,
|
| 13 |
+
"seed": 100,
|
| 14 |
+
"video": "Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.mp4"
|
| 15 |
+
}
|
apps/gradio_app.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import json
|
| 4 |
+
from gradio_app.inference import run_inference, run_setup_script
|
| 5 |
+
|
| 6 |
+
def create_app():
|
| 7 |
+
# Run setup script at startup
|
| 8 |
+
setup_output = run_setup_script()
|
| 9 |
+
# Load CSS file
|
| 10 |
+
CSS = open("apps/gradio_app/static/styles.css", "r").read()
|
| 11 |
+
|
| 12 |
+
with gr.Blocks(css=CSS) as app:
|
| 13 |
+
gr.HTML('<script src="file=apps/gradio_app/static/scripts.js"></script>')
|
| 14 |
+
gr.Markdown(
|
| 15 |
+
"""
|
| 16 |
+
# Text to Video Ghibli style
|
| 17 |
+
Generate videos using the `zeroscope_v2_576w` model with Studio Ghibli style LoRA.
|
| 18 |
+
"""
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
with gr.Row(elem_classes="row-container"):
|
| 22 |
+
with gr.Column(elem_classes="column-container"):
|
| 23 |
+
model_path = gr.Dropdown(
|
| 24 |
+
label="Base Model",
|
| 25 |
+
choices=["./ckpts/zeroscope_v2_576w"],
|
| 26 |
+
value="./ckpts/zeroscope_v2_576w"
|
| 27 |
+
)
|
| 28 |
+
checkpoint_folder = gr.Dropdown(
|
| 29 |
+
label="LoRA folder",
|
| 30 |
+
choices=["./ckpts/zeroscope_v2_576w-Ghibli-LoRA"],
|
| 31 |
+
value="./ckpts/zeroscope_v2_576w-Ghibli-LoRA"
|
| 32 |
+
)
|
| 33 |
+
prompt = gr.Textbox(
|
| 34 |
+
label="Prompt",
|
| 35 |
+
value="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible."
|
| 36 |
+
)
|
| 37 |
+
negative_prompt = gr.Textbox(
|
| 38 |
+
label="Negative Prompt",
|
| 39 |
+
value="ugly, noise, fragment, blur, static video"
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Video Dimensions & Timing
|
| 43 |
+
with gr.Row(elem_classes="slider-row"):
|
| 44 |
+
with gr.Group(elem_classes="slider-group"):
|
| 45 |
+
gr.Markdown("### Video Dimensions & Timing")
|
| 46 |
+
width = gr.Slider(label="Width", minimum=256, maximum=1024, step=8, value=512)
|
| 47 |
+
height = gr.Slider(label="Height", minimum=256, maximum=1024, step=8, value=512)
|
| 48 |
+
num_frames = gr.Slider(label="Number of Frames", minimum=8, maximum=64, step=1, value=16)
|
| 49 |
+
fps = gr.Slider(label="FPS", minimum=10, maximum=60, step=1, value=16)
|
| 50 |
+
seed = gr.Number(label="Seed", value=100)
|
| 51 |
+
|
| 52 |
+
generate_btn = gr.Button("Generate Video", elem_classes="generate-btn")
|
| 53 |
+
|
| 54 |
+
with gr.Column(elem_classes="column-container"):
|
| 55 |
+
video_output = gr.Video(label="Generated Video")
|
| 56 |
+
log_output = gr.Textbox(label="Logs", lines=3, max_lines=20)
|
| 57 |
+
|
| 58 |
+
# Model Parameters
|
| 59 |
+
with gr.Row(elem_classes="slider-row"):
|
| 60 |
+
with gr.Group(elem_classes="slider-group"):
|
| 61 |
+
gr.Markdown("### Model Parameters")
|
| 62 |
+
num_steps = gr.Slider(label="Number of Steps", minimum=10, maximum=100, step=1, value=50)
|
| 63 |
+
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=50.0, step=0.1, value=30.0)
|
| 64 |
+
lora_rank = gr.Slider(label="LoRA Rank", minimum=16, maximum=128, step=8, value=96)
|
| 65 |
+
lora_scale = gr.Slider(label="LoRA Scale", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
|
| 66 |
+
noise_prior = gr.Slider(label="Noise Prior", minimum=0.0, maximum=1.0, step=0.01, value=0.1)
|
| 67 |
+
|
| 68 |
+
# Example Buttons Section
|
| 69 |
+
gr.Markdown("## Example Configurations")
|
| 70 |
+
example_base_path = "apps/assets/examples/zeroscope_v2_576w-Ghibli-LoRA"
|
| 71 |
+
example_buttons = []
|
| 72 |
+
configs = []
|
| 73 |
+
|
| 74 |
+
for i in range(1, 5):
|
| 75 |
+
example_dir = os.path.join(example_base_path, str(i))
|
| 76 |
+
config_path = os.path.join(example_dir, "config.json")
|
| 77 |
+
if os.path.exists(config_path):
|
| 78 |
+
with open(config_path, "r") as f:
|
| 79 |
+
config = json.load(f)
|
| 80 |
+
video_path = os.path.join(example_dir, config["video"])
|
| 81 |
+
if os.path.exists(video_path):
|
| 82 |
+
configs.append((config, video_path))
|
| 83 |
+
example_buttons.append(gr.Button(f"Load Example {i}"))
|
| 84 |
+
|
| 85 |
+
def create_example_fn(config, video_path):
|
| 86 |
+
def load_example():
|
| 87 |
+
return [
|
| 88 |
+
"./ckpts/zeroscope_v2_576w", # model_path
|
| 89 |
+
"./ckpts/zeroscope_v2_576w-Ghibli-LoRA", # checkpoint_folder
|
| 90 |
+
config.get("prompt", ""),
|
| 91 |
+
config.get("negative-prompt", ""),
|
| 92 |
+
config.get("width", 512),
|
| 93 |
+
config.get("height", 512),
|
| 94 |
+
config.get("num-frames", 16),
|
| 95 |
+
config.get("num-steps", 50),
|
| 96 |
+
config.get("guidance_scale", 30.0),
|
| 97 |
+
config.get("fps", 16),
|
| 98 |
+
config.get("lora_rank", 96),
|
| 99 |
+
config.get("lora_scale", 0.7),
|
| 100 |
+
config.get("noise_prior", 0.1),
|
| 101 |
+
config.get("seed", 100),
|
| 102 |
+
video_path, # video_output
|
| 103 |
+
f"Loaded example with prompt: {config.get('prompt', '')}" # log_output
|
| 104 |
+
]
|
| 105 |
+
return load_example
|
| 106 |
+
|
| 107 |
+
for btn, (config, video_path) in zip(example_buttons, configs):
|
| 108 |
+
btn.click(
|
| 109 |
+
fn=create_example_fn(config, video_path),
|
| 110 |
+
inputs=[],
|
| 111 |
+
outputs=[
|
| 112 |
+
model_path, checkpoint_folder, prompt, negative_prompt,
|
| 113 |
+
width, height, num_frames, num_steps, guidance_scale,
|
| 114 |
+
fps, lora_rank, lora_scale, noise_prior, seed,
|
| 115 |
+
video_output, log_output
|
| 116 |
+
]
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
generate_btn.click(
|
| 120 |
+
fn=run_inference,
|
| 121 |
+
inputs=[
|
| 122 |
+
model_path, checkpoint_folder, prompt, negative_prompt,
|
| 123 |
+
width, height, num_frames, num_steps, guidance_scale,
|
| 124 |
+
fps, lora_rank, lora_scale, noise_prior, seed
|
| 125 |
+
],
|
| 126 |
+
outputs=[video_output, log_output]
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
gr.Markdown("""
|
| 130 |
+
This repository is trained from [](https://github.com/danhtran2mind/MotionDirector), a fork of [](https://github.com/showlab/MotionDirector), with numerous bug fixes and rewritten code for improved performance and stability.
|
| 131 |
+
""")
|
| 132 |
+
return app
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
app = create_app()
|
| 136 |
+
app.launch()
|
apps/gradio_app/__init__.py
ADDED
|
File without changes
|
apps/gradio_app/abc.py
ADDED
|
File without changes
|
apps/gradio_app/inference.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import uuid
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# Append the current directory to sys.path
|
| 9 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 10 |
+
|
| 11 |
+
def run_setup_script():
|
| 12 |
+
setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
|
| 13 |
+
try:
|
| 14 |
+
result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
|
| 15 |
+
return result.stdout
|
| 16 |
+
except subprocess.CalledProcessError as e:
|
| 17 |
+
return f"Setup script failed: {e.stderr}"
|
| 18 |
+
|
| 19 |
+
def run_inference(
|
| 20 |
+
model_path="./ckpts/zeroscope_v2_576w",
|
| 21 |
+
checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
|
| 22 |
+
prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
|
| 23 |
+
negative_prompt="ugly, noise, fragment, blur, static video",
|
| 24 |
+
width=256,
|
| 25 |
+
height=256,
|
| 26 |
+
num_frames=8,
|
| 27 |
+
num_steps=30,
|
| 28 |
+
guidance_scale=30.0,
|
| 29 |
+
fps=8,
|
| 30 |
+
lora_rank=32,
|
| 31 |
+
lora_scale=0.7,
|
| 32 |
+
noise_prior=0.1,
|
| 33 |
+
# device="cuda",
|
| 34 |
+
seed=100
|
| 35 |
+
):
|
| 36 |
+
print("Start Inference")
|
| 37 |
+
output_dir = "apps/gradio_app/temp_data"
|
| 38 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 39 |
+
|
| 40 |
+
# Get list of files in output_dir
|
| 41 |
+
for file_name in os.listdir(output_dir):
|
| 42 |
+
# Check if file ends with .mp4
|
| 43 |
+
if file_name.endswith(".mp4"):
|
| 44 |
+
# Remove the file
|
| 45 |
+
os.remove(os.path.join(output_dir, file_name))
|
| 46 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 47 |
+
command = [
|
| 48 |
+
"python", "src/third_party/MotionDirector/main_inference.py",
|
| 49 |
+
"--model", model_path,
|
| 50 |
+
"--checkpoint_folder", checkpoint_folder,
|
| 51 |
+
"--prompt", prompt,
|
| 52 |
+
"--negative-prompt", negative_prompt,
|
| 53 |
+
"--width", str(width),
|
| 54 |
+
"--height", str(height),
|
| 55 |
+
"--num-frames", str(num_frames),
|
| 56 |
+
"--num-steps", str(num_steps),
|
| 57 |
+
"--guidance-scale", str(guidance_scale),
|
| 58 |
+
"--fps", str(fps),
|
| 59 |
+
"--lora_rank", str(lora_rank),
|
| 60 |
+
"--lora_scale", str(lora_scale),
|
| 61 |
+
"--noise_prior", str(noise_prior),
|
| 62 |
+
"--device", device,
|
| 63 |
+
"--seed", str(seed),
|
| 64 |
+
"--output_dir", output_dir,
|
| 65 |
+
"--no-prompt-name"
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
# Use Popen to execute the command
|
| 69 |
+
process = subprocess.Popen(
|
| 70 |
+
command,
|
| 71 |
+
stdout=subprocess.PIPE,
|
| 72 |
+
stderr=subprocess.PIPE,
|
| 73 |
+
text=True,
|
| 74 |
+
bufsize=1 # Line buffering
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Read output line-by-line in real-time
|
| 78 |
+
output_lines = []
|
| 79 |
+
try:
|
| 80 |
+
for line in process.stdout:
|
| 81 |
+
output_lines.append(line.strip())
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return None, f"Error reading output: {str(e)}"
|
| 84 |
+
|
| 85 |
+
# Capture stderr and wait for process to complete
|
| 86 |
+
stderr_output = process.communicate()[1]
|
| 87 |
+
if process.returncode != 0:
|
| 88 |
+
return None, f"Error: {stderr_output.strip()}"
|
| 89 |
+
|
| 90 |
+
# Check for MP4 files in output directory
|
| 91 |
+
output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
|
| 92 |
+
if output_file:
|
| 93 |
+
output_path = os.path.join(output_dir, output_file[-1])
|
| 94 |
+
if os.path.exists(output_path):
|
| 95 |
+
return output_path, "\n".join(output_lines)
|
| 96 |
+
else:
|
| 97 |
+
return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
|
| 98 |
+
return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
# Example usage
|
| 102 |
+
video_path, logs = run_inference()
|
| 103 |
+
print(f"Generated Video: {video_path}")
|
| 104 |
+
print(f"Logs: {logs}")
|
apps/gradio_app/new-inference.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import uuid
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# Append the current directory to sys.path
|
| 9 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 10 |
+
|
| 11 |
+
def run_setup_script():
|
| 12 |
+
setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
|
| 13 |
+
try:
|
| 14 |
+
result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
|
| 15 |
+
return result.stdout
|
| 16 |
+
except subprocess.CalledProcessError as e:
|
| 17 |
+
return f"Setup script failed: {e.stderr}"
|
| 18 |
+
|
| 19 |
+
def run_inference(
|
| 20 |
+
model_path="./ckpts/zeroscope_v2_576w",
|
| 21 |
+
checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
|
| 22 |
+
prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
|
| 23 |
+
negative_prompt="ugly, noise, fragment, blur, static video",
|
| 24 |
+
width=256,
|
| 25 |
+
height=256,
|
| 26 |
+
num_frames=8,
|
| 27 |
+
num_steps=30,
|
| 28 |
+
guidance_scale=30.0,
|
| 29 |
+
fps=8,
|
| 30 |
+
lora_rank=32,
|
| 31 |
+
lora_scale=0.7,
|
| 32 |
+
noise_prior=0.1,
|
| 33 |
+
device="cuda",
|
| 34 |
+
seed=100
|
| 35 |
+
):
|
| 36 |
+
print("Start Inference")
|
| 37 |
+
output_dir = "apps/gradio_app/temp_data"
|
| 38 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 39 |
+
|
| 40 |
+
# Get list of files in output_dir
|
| 41 |
+
for file_name in os.listdir(output_dir):
|
| 42 |
+
# Check if file ends with .mp4
|
| 43 |
+
if file_name.endswith(".mp4"):
|
| 44 |
+
# Remove the file
|
| 45 |
+
os.remove(os.path.join(output_dir, file_name))
|
| 46 |
+
|
| 47 |
+
command = [
|
| 48 |
+
"python", "src/third_party/MotionDirector/main_inference.py",
|
| 49 |
+
"--model", model_path,
|
| 50 |
+
"--checkpoint_folder", checkpoint_folder,
|
| 51 |
+
"--prompt", prompt,
|
| 52 |
+
"--negative-prompt", negative_prompt,
|
| 53 |
+
"--width", str(width),
|
| 54 |
+
"--height", str(height),
|
| 55 |
+
"--num-frames", str(num_frames),
|
| 56 |
+
"--num-steps", str(num_steps),
|
| 57 |
+
"--guidance-scale", str(guidance_scale),
|
| 58 |
+
"--fps", str(fps),
|
| 59 |
+
"--lora_rank", str(lora_rank),
|
| 60 |
+
"--lora_scale", str(lora_scale),
|
| 61 |
+
"--noise_prior", str(noise_prior),
|
| 62 |
+
"--device", device,
|
| 63 |
+
"--seed", str(seed),
|
| 64 |
+
"--output_dir", output_dir,
|
| 65 |
+
"--no-prompt-name"
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
# Use Popen to execute the command
|
| 69 |
+
process = subprocess.Popen(
|
| 70 |
+
command,
|
| 71 |
+
stdout=subprocess.PIPE,
|
| 72 |
+
stderr=subprocess.PIPE,
|
| 73 |
+
text=True,
|
| 74 |
+
bufsize=1 # Line buffering
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Read output line-by-line in real-time
|
| 78 |
+
output_lines = []
|
| 79 |
+
try:
|
| 80 |
+
for line in process.stdout:
|
| 81 |
+
output_lines.append(line.strip())
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return None, f"Error reading output: {str(e)}"
|
| 84 |
+
|
| 85 |
+
# Capture stderr and wait for process to complete
|
| 86 |
+
stderr_output = process.communicate()[1]
|
| 87 |
+
if process.returncode != 0:
|
| 88 |
+
return None, f"Error: {stderr_output.strip()}"
|
| 89 |
+
|
| 90 |
+
# Check for MP4 files in output directory
|
| 91 |
+
output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
|
| 92 |
+
if output_file:
|
| 93 |
+
output_path = os.path.join(output_dir, output_file[-1])
|
| 94 |
+
if os.path.exists(output_path):
|
| 95 |
+
return output_path, "\n".join(output_lines)
|
| 96 |
+
else:
|
| 97 |
+
return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
|
| 98 |
+
return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
# Example usage
|
| 102 |
+
video_path, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
|
| 103 |
+
print(f"Generated Video: {video_path}")
|
| 104 |
+
print(f"Logs: {logs}")
|
apps/gradio_app/old-inference.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import uuid
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# Append the current directory to sys.path
|
| 9 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 10 |
+
|
| 11 |
+
def run_setup_script():
|
| 12 |
+
setup_script = os.path.join(os.path.dirname(__file__), "setup_scripts.py")
|
| 13 |
+
try:
|
| 14 |
+
result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
|
| 15 |
+
return result.stdout
|
| 16 |
+
except subprocess.CalledProcessError as e:
|
| 17 |
+
return f"Setup script failed: {e.stderr}"
|
| 18 |
+
|
| 19 |
+
def run_inference(
|
| 20 |
+
model_path="./ckpts/zeroscope_v2_576w",
|
| 21 |
+
checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
|
| 22 |
+
prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
|
| 23 |
+
negative_prompt="ugly, noise, fragment, blur, static video",
|
| 24 |
+
width=512,
|
| 25 |
+
height=512,
|
| 26 |
+
num_frames=16,
|
| 27 |
+
num_steps=50,
|
| 28 |
+
guidance_scale=30.0,
|
| 29 |
+
fps=16,
|
| 30 |
+
lora_rank=96,
|
| 31 |
+
lora_scale=0.7,
|
| 32 |
+
noise_prior=0.1,
|
| 33 |
+
device="cuda",
|
| 34 |
+
seed=100
|
| 35 |
+
):
|
| 36 |
+
output_dir = "apps/gradio_app/temp_data"
|
| 37 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 38 |
+
|
| 39 |
+
command = [
|
| 40 |
+
"python", "src/third_party/MotionDirector/main_inference.py",
|
| 41 |
+
"--model", model_path,
|
| 42 |
+
"--checkpoint_folder", checkpoint_folder,
|
| 43 |
+
"--prompt", prompt,
|
| 44 |
+
"--negative-prompt", negative_prompt,
|
| 45 |
+
"--width", str(width),
|
| 46 |
+
"--height", str(height),
|
| 47 |
+
"--num-frames", str(num_frames),
|
| 48 |
+
"--num-steps", str(num_steps),
|
| 49 |
+
"--guidance-scale", str(guidance_scale),
|
| 50 |
+
"--fps", str(fps),
|
| 51 |
+
"--lora_rank", str(lora_rank),
|
| 52 |
+
"--lora_scale", str(lora_scale),
|
| 53 |
+
"--noise_prior", str(noise_prior),
|
| 54 |
+
"--device", device,
|
| 55 |
+
"--seed", str(seed),
|
| 56 |
+
"--output_dir", output_dir,
|
| 57 |
+
"--no-prompt-name"
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
|
| 61 |
+
print(os.path.join(output_dir, output_file[0]) if output_file else "No MP4 files found.")
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
| 65 |
+
return str(output_file), result.stdout
|
| 66 |
+
except subprocess.CalledProcessError as e:
|
| 67 |
+
return None, f"Error: {e.stderr}"
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
# Example usage
|
| 71 |
+
video, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
|
| 72 |
+
print(f"Generated Video: {video}")
|
| 73 |
+
print(f"Logs: {logs}")
|
apps/gradio_app/setup_scripts.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def run_script(script_path):
|
| 6 |
+
"""
|
| 7 |
+
Run a Python script using subprocess and handle potential errors.
|
| 8 |
+
Returns True if successful, False otherwise.
|
| 9 |
+
"""
|
| 10 |
+
try:
|
| 11 |
+
result = subprocess.run(
|
| 12 |
+
[sys.executable, script_path],
|
| 13 |
+
check=True,
|
| 14 |
+
text=True,
|
| 15 |
+
capture_output=True
|
| 16 |
+
)
|
| 17 |
+
print(f"Successfully executed {script_path}")
|
| 18 |
+
print(result.stdout)
|
| 19 |
+
return True
|
| 20 |
+
except subprocess.CalledProcessError as e:
|
| 21 |
+
print(f"Error executing {script_path}:")
|
| 22 |
+
print(e.stderr)
|
| 23 |
+
return False
|
| 24 |
+
except FileNotFoundError:
|
| 25 |
+
print(f"Script not found: {script_path}")
|
| 26 |
+
return False
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
"""
|
| 30 |
+
Main function to execute setup_third_party.py and download_ckpts.py in sequence.
|
| 31 |
+
"""
|
| 32 |
+
scripts_dir = "scripts"
|
| 33 |
+
scripts = [
|
| 34 |
+
os.path.join(scripts_dir, "setup_third_party.py"),
|
| 35 |
+
os.path.join(scripts_dir, "download_ckpts.py")
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
for script in scripts:
|
| 39 |
+
print(f"Start running {script}\n")
|
| 40 |
+
if not run_script(script):
|
| 41 |
+
print(f"Stopping execution due to error in {script}")
|
| 42 |
+
sys.exit(1)
|
| 43 |
+
print(f"Completed {script}\n")
|
| 44 |
+
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
main()
|
apps/gradio_app/static/__init__.py
ADDED
|
File without changes
|
apps/gradio_app/static/scripts.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 2 |
+
// Add loading animation to generate button
|
| 3 |
+
const generateBtn = document.querySelector('.generate-btn');
|
| 4 |
+
if (generateBtn) {
|
| 5 |
+
generateBtn.addEventListener('click', () => {
|
| 6 |
+
generateBtn.textContent = 'Generating...';
|
| 7 |
+
generateBtn.disabled = true;
|
| 8 |
+
generateBtn.style.opacity = '0.7';
|
| 9 |
+
|
| 10 |
+
// Reset button after 2 seconds (simulating async operation)
|
| 11 |
+
setTimeout(() => {
|
| 12 |
+
generateBtn.textContent = 'Generate Video';
|
| 13 |
+
generateBtn.disabled = false;
|
| 14 |
+
generateBtn.style.opacity = '1';
|
| 15 |
+
}, 2000);
|
| 16 |
+
});
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
// Add input validation feedback
|
| 20 |
+
const inputs = document.querySelectorAll('input[type="text"]');
|
| 21 |
+
inputs.forEach(input => {
|
| 22 |
+
input.addEventListener('input', () => {
|
| 23 |
+
if (input.value.trim() === '') {
|
| 24 |
+
input.style.borderColor = '#e53e3e';
|
| 25 |
+
} else {
|
| 26 |
+
input.style.borderColor = '#4c51bf';
|
| 27 |
+
}
|
| 28 |
+
});
|
| 29 |
+
});
|
| 30 |
+
|
| 31 |
+
// Add subtle animation to sliders
|
| 32 |
+
const sliders = document.querySelectorAll('input[type="range"]');
|
| 33 |
+
sliders.forEach(slider => {
|
| 34 |
+
slider.addEventListener('input', () => {
|
| 35 |
+
slider.style.transform = 'scale(1.02)';
|
| 36 |
+
setTimeout(() => {
|
| 37 |
+
slider.style.transform = 'scale(1)';
|
| 38 |
+
}, 200);
|
| 39 |
+
});
|
| 40 |
+
});
|
| 41 |
+
|
| 42 |
+
// Auto-resize textarea
|
| 43 |
+
const textarea = document.querySelector('textarea');
|
| 44 |
+
if (textarea) {
|
| 45 |
+
textarea.addEventListener('input', () => {
|
| 46 |
+
textarea.style.height = 'auto';
|
| 47 |
+
textarea.style.height = `${textarea.scrollHeight}px`;
|
| 48 |
+
});
|
| 49 |
+
}
|
| 50 |
+
});
|
apps/gradio_app/static/styles.css
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--primary-color: #007bff;
|
| 3 |
+
--secondary-color: #6c757d;
|
| 4 |
+
--background-light: #f8f9fa;
|
| 5 |
+
--background-dark: #1a1a1a;
|
| 6 |
+
--text-light: #212529;
|
| 7 |
+
--text-dark: #e9ecef;
|
| 8 |
+
--accent-color: #28a745;
|
| 9 |
+
--border-color-light: #dee2e6;
|
| 10 |
+
--border-color-dark: #343a40;
|
| 11 |
+
--button-hover-light: #0056b3;
|
| 12 |
+
--button-hover-dark: #4dabf7;
|
| 13 |
+
--shadow-light: rgba(0, 0, 0, 0.1);
|
| 14 |
+
--shadow-dark: rgba(255, 255, 255, 0.1);
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
body {
|
| 18 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 19 |
+
margin: 0;
|
| 20 |
+
padding: 20px;
|
| 21 |
+
transition: background-color 0.3s, color 0.3s;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
.light-theme {
|
| 25 |
+
background-color: var(--background-light);
|
| 26 |
+
color: var(--text-light);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.dark-theme {
|
| 30 |
+
background-color: var(--background-dark);
|
| 31 |
+
color: var(--text-dark);
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.row-container {
|
| 35 |
+
display: flex;
|
| 36 |
+
gap: 20px;
|
| 37 |
+
margin-bottom: 20px;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.column-container {
|
| 41 |
+
background: var(--background-light);
|
| 42 |
+
border: 1px solid var(--border-color-light);
|
| 43 |
+
border-radius: 8px;
|
| 44 |
+
padding: 20px;
|
| 45 |
+
box-shadow: 0 2px 4px var(--shadow-light);
|
| 46 |
+
transition: background-color 0.3s, border-color 0.3s, box-shadow 0.3s;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.dark-theme .column-container {
|
| 50 |
+
background: var(--background-dark);
|
| 51 |
+
border-color: var(--border-color-dark);
|
| 52 |
+
box-shadow: 0 2px 4px var(--shadow-dark);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.slider-row {
|
| 56 |
+
margin: 10px 0;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.slider-group {
|
| 60 |
+
background: rgba(255, 255, 255, 0.5);
|
| 61 |
+
border-radius: 6px;
|
| 62 |
+
padding: 15px;
|
| 63 |
+
border: 1px solid var(--border-color-light);
|
| 64 |
+
transition: background-color 0.3s, border-color 0.3s;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.dark-theme .slider-group {
|
| 68 |
+
background: rgba(0, 0, 0, 0.3);
|
| 69 |
+
border-color: var(--border-color-dark);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.generate-btn {
|
| 73 |
+
background-color: var(--primary-color);
|
| 74 |
+
color: white;
|
| 75 |
+
border: none;
|
| 76 |
+
padding: 12px 24px;
|
| 77 |
+
border-radius: 6px;
|
| 78 |
+
font-size: 16px;
|
| 79 |
+
font-weight: 500;
|
| 80 |
+
cursor: pointer;
|
| 81 |
+
transition: background-color 0.3s, transform 0.2s;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.generate-btn:hover {
|
| 85 |
+
background-color: var(--button-hover-light);
|
| 86 |
+
transform: translateY(-2px);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.dark-theme .generate-btn {
|
| 90 |
+
background-color: var(--primary-color);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.dark-theme .generate-btn:hover {
|
| 94 |
+
background-color: var(--button-hover-dark);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.gr-button, .gr-textbox, .gr-slider, .gr-dropdown, .gr-number, .gr-video, .gr-markdown {
|
| 98 |
+
border-radius: 6px !important;
|
| 99 |
+
border: 1px solid var(--border-color-light) !important;
|
| 100 |
+
transition: border-color 0.3s, background-color 0.3s;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
.dark-theme .gr-button,
|
| 104 |
+
.dark-theme .gr-textbox,
|
| 105 |
+
.dark-theme .gr-slider,
|
| 106 |
+
.dark-theme .gr-dropdown,
|
| 107 |
+
.dark-theme .gr-number,
|
| 108 |
+
.dark-theme .gr-video,
|
| 109 |
+
.dark-theme .gr-markdown {
|
| 110 |
+
border-color: var(--border-color-dark) !important;
|
| 111 |
+
background-color: rgba(255, 255, 255, 0.05) !important;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.gr-textbox input, .gr-number input {
|
| 115 |
+
background: transparent !important;
|
| 116 |
+
color: inherit !important;
|
| 117 |
+
font-size: 14px;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
.gr-slider input[type="range"] {
|
| 121 |
+
accent-color: var(--primary-color);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.gr-dropdown select {
|
| 125 |
+
background: transparent !important;
|
| 126 |
+
color: inherit !important;
|
| 127 |
+
padding: 8px;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
|
| 131 |
+
font-weight: 600;
|
| 132 |
+
margin-bottom: 10px;
|
| 133 |
+
color: var(--primary-color);
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.dark-theme .gr-markdown h1,
|
| 137 |
+
.dark-theme .gr-markdown h2,
|
| 138 |
+
.dark-theme .gr-markdown h3 {
|
| 139 |
+
color: var(--button-hover-dark);
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
@media (max-width: 768px) {
|
| 143 |
+
.row-container {
|
| 144 |
+
flex-direction: column;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
.column-container {
|
| 148 |
+
padding: 15px;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.generate-btn {
|
| 152 |
+
width: 100%;
|
| 153 |
+
}
|
| 154 |
+
}
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/1/A_dog_is_running_with_Ghibli_style_42.gif
ADDED
|
Git LFS Details
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/A_girl_is_walking_with_Ghibli_style_0.gif
ADDED
|
Git LFS Details
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/2/config.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
"height": 384,
|
| 5 |
"width": 384,
|
| 6 |
"num-frames": 28,
|
| 7 |
-
"num-steps": 50
|
| 8 |
"guidance_scale": 15,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 128,
|
|
|
|
| 4 |
"height": 384,
|
| 5 |
"width": 384,
|
| 6 |
"num-frames": 28,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
"guidance_scale": 15,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 128,
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/Studio_Ghibli_style_Young_man_contemplates_walks_away_from_ivy-covered_yellow_building_12345.gif
ADDED
|
Git LFS Details
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/3/config.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
"height": 384,
|
| 5 |
"width": 384,
|
| 6 |
"num-frames": 28,
|
| 7 |
-
"num-steps": 50
|
| 8 |
"guidance_scale": 15,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 32,
|
|
|
|
| 4 |
"height": 384,
|
| 5 |
"width": 384,
|
| 6 |
"num-frames": 28,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
"guidance_scale": 15,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 32,
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/Studio_Ghibli_style_Two_women_walk_down_coastal_village_path_toward_sea_passing_colorful_houses_sailboats_visible_100.gif
ADDED
|
Git LFS Details
|
assets/examples/zeroscope_v2_576w-Ghibli-LoRA/4/config.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
"height": 512,
|
| 5 |
"width": 512,
|
| 6 |
"num-frames": 16,
|
| 7 |
-
"num-steps": 50
|
| 8 |
"guidance_scale": 30,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 96,
|
|
|
|
| 4 |
"height": 512,
|
| 5 |
"width": 512,
|
| 6 |
"num-frames": 16,
|
| 7 |
+
"num-steps": 50,
|
| 8 |
"guidance_scale": 30,
|
| 9 |
"fps": 16,
|
| 10 |
"lora_rank": 96,
|
configs/config_multi_videos.yaml
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pretrained diffusers model path.
|
| 2 |
+
pretrained_model_path: "./ckpts/zeroscope_v2_576w"
|
| 3 |
+
# pretrained_model_path: "./ckpts/text-to-video-ms-1.7b"
|
| 4 |
+
# The folder where your training outputs will be placed.
|
| 5 |
+
output_dir: "./zeroscope_v2_576w-Ghibli-LoRA"
|
| 6 |
+
# resume_step: 500
|
| 7 |
+
# resume_from_checkpoint: "./zeroscope_v2_576w-Scenery_Anime_Bright-lora/train_2025-07-10T13-46-57"
|
| 8 |
+
# lora_path: "zeroscope_v2_576w-Scenery_Anime_Bright-lora/checkpoint-500" # This argument is used for training resumption
|
| 9 |
+
# lora_path: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/checkpoint-200
|
| 10 |
+
|
| 11 |
+
dataset_types:
|
| 12 |
+
- 'folder'
|
| 13 |
+
|
| 14 |
+
# Caches the latents (Frames-Image -> VAE -> Latent) to a HDD or SDD.
|
| 15 |
+
# The latents will be saved under your training folder, and loaded automatically for training.
|
| 16 |
+
# This both saves memory and speeds up training and takes very little disk space.
|
| 17 |
+
cache_latents: True
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# If you have cached latents set to `True` and have a directory of cached latents,
|
| 21 |
+
# you can skip the caching process and load previously saved ones.
|
| 22 |
+
cached_latent_dir: null #/path/to/cached_latents
|
| 23 |
+
# cached_latent_dir: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/cached_latents
|
| 24 |
+
|
| 25 |
+
# Use LoRA for the UNET model.
|
| 26 |
+
use_unet_lora: True
|
| 27 |
+
|
| 28 |
+
# LoRA Dropout. This parameter adds the probability of randomly zeros out elements. Helps prevent overfitting.
|
| 29 |
+
# See: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
|
| 30 |
+
lora_unet_dropout: 0.1
|
| 31 |
+
|
| 32 |
+
# Choose whether or not ito save the full pretrained model weights for both checkpoints and after training.
|
| 33 |
+
# The only time you want this off is if you're doing full LoRA training.
|
| 34 |
+
save_pretrained_model: True
|
| 35 |
+
# save_pretrained_model: True
|
| 36 |
+
|
| 37 |
+
# The rank for LoRA training. With ModelScope, the maximum should be 1024.
|
| 38 |
+
# VRAM increases with higher rank, lower when decreased.
|
| 39 |
+
lora_rank: 16
|
| 40 |
+
|
| 41 |
+
# Training data parameters
|
| 42 |
+
train_data:
|
| 43 |
+
# 'multiple videos'
|
| 44 |
+
path: "./data/ghibli/videos"
|
| 45 |
+
# The width and height in which you want your training data to be resized to.
|
| 46 |
+
width: 384
|
| 47 |
+
height: 384
|
| 48 |
+
|
| 49 |
+
# This will find the closest aspect ratio to your input width and height.
|
| 50 |
+
# For example, 512x512 width and height with a video of resolution 1280x720 will be resized to 512x256
|
| 51 |
+
use_bucketing: True
|
| 52 |
+
gradient_accumulation_steps: 2
|
| 53 |
+
batch_size: 1
|
| 54 |
+
# The start frame index where your videos should start (Leave this at one for json and folder based training).
|
| 55 |
+
sample_start_idx: 1
|
| 56 |
+
|
| 57 |
+
# Used for 'folder'. The rate at which your frames are sampled. Does nothing for 'json' and 'single_video' dataset.
|
| 58 |
+
fps: 16
|
| 59 |
+
|
| 60 |
+
# For 'single_video' and 'json'. The number of frames to "step" (1,2,3,4) (frame_step=2) -> (1,3,5,7, ...).
|
| 61 |
+
frame_step: 1
|
| 62 |
+
|
| 63 |
+
# The number of frames to sample. The higher this number, the higher the VRAM (acts similar to batch size).
|
| 64 |
+
n_sample_frames: 24
|
| 65 |
+
|
| 66 |
+
# The prompt when using a a single video file
|
| 67 |
+
# fallback_prompt: "A person is riding a bicycle."
|
| 68 |
+
|
| 69 |
+
# Validation data parameters.
|
| 70 |
+
validation_data:
|
| 71 |
+
# A custom prompt that is different from your training dataset.
|
| 72 |
+
prompt:
|
| 73 |
+
- "Studio Ghibli style. The video showcases a vibrant and lively scene set in the early."
|
| 74 |
+
- "Studio Ghibli style. A woman with black hair is holding a gun in her hand."
|
| 75 |
+
|
| 76 |
+
# Whether or not to sample preview during training (Requires more VRAM).
|
| 77 |
+
# sample_preview: True
|
| 78 |
+
sample_preview: False
|
| 79 |
+
|
| 80 |
+
# The number of frames to sample during validation.
|
| 81 |
+
num_frames: 24
|
| 82 |
+
|
| 83 |
+
# Height and width of validation sample.
|
| 84 |
+
width: 384
|
| 85 |
+
height: 384
|
| 86 |
+
|
| 87 |
+
# Number of inference steps when generating the video.
|
| 88 |
+
num_inference_steps: 15
|
| 89 |
+
|
| 90 |
+
# CFG scale
|
| 91 |
+
guidance_scale: 12
|
| 92 |
+
|
| 93 |
+
# scale of spatial LoRAs, default is 0
|
| 94 |
+
spatial_scale: 0
|
| 95 |
+
|
| 96 |
+
# scale of noise prior, i.e. the scale of inversion noises
|
| 97 |
+
noise_prior: 0
|
| 98 |
+
|
| 99 |
+
use_offset_noise: False
|
| 100 |
+
offset_noise_strength: 0.
|
| 101 |
+
|
| 102 |
+
# Learning rate for AdamW
|
| 103 |
+
learning_rate: 5e-4
|
| 104 |
+
|
| 105 |
+
# Weight decay. Higher = more regularization. Lower = closer to dataset.
|
| 106 |
+
adam_weight_decay: 1e-4
|
| 107 |
+
|
| 108 |
+
# Maximum number of train steps. Model is saved after training.
|
| 109 |
+
max_train_steps: 5000
|
| 110 |
+
|
| 111 |
+
# Saves a model every nth step.
|
| 112 |
+
checkpointing_steps: 5000
|
| 113 |
+
|
| 114 |
+
# How many steps to do for validation if sample_preview is enabled.
|
| 115 |
+
validation_steps: 5000
|
| 116 |
+
|
| 117 |
+
# Whether or not we want to use mixed precision with accelerate
|
| 118 |
+
mixed_precision: "fp16"
|
| 119 |
+
# mixed_precision: "no"
|
| 120 |
+
|
| 121 |
+
# Trades VRAM usage for speed. You lose roughly 20% of training speed, but save a lot of VRAM.
|
| 122 |
+
# If you need to save more VRAM, it can also be enabled for the text encoder, but reduces speed x2.
|
| 123 |
+
gradient_checkpointing: True
|
| 124 |
+
text_encoder_gradient_checkpointing: True
|
| 125 |
+
|
| 126 |
+
# Xformers must be installed for best memory savings and performance (< Pytorch 2.0)
|
| 127 |
+
enable_xformers_memory_efficient_attention: True
|
| 128 |
+
use_8bit_adam: True
|
| 129 |
+
|
| 130 |
+
# Use scaled dot product attention (Only available with >= Torch 2.0)
|
| 131 |
+
enable_torch_2_attn: True
|
notebooks/zeroscope_v2_576w_Ghibli_LoRA-Inference.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/zeroscope_v2_576w_Ghibli_LoRA-Training.ipynb
ADDED
|
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
|
| 8 |
+
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
|
| 9 |
+
"execution": {
|
| 10 |
+
"iopub.execute_input": "2025-07-16T05:27:38.872329Z",
|
| 11 |
+
"iopub.status.busy": "2025-07-16T05:27:38.872068Z",
|
| 12 |
+
"iopub.status.idle": "2025-07-16T05:29:50.846263Z",
|
| 13 |
+
"shell.execute_reply": "2025-07-16T05:29:50.845486Z",
|
| 14 |
+
"shell.execute_reply.started": "2025-07-16T05:27:38.872302Z"
|
| 15 |
+
},
|
| 16 |
+
"trusted": true
|
| 17 |
+
},
|
| 18 |
+
"outputs": [
|
| 19 |
+
{
|
| 20 |
+
"name": "stdout",
|
| 21 |
+
"output_type": "stream",
|
| 22 |
+
"text": [
|
| 23 |
+
"/content\n",
|
| 24 |
+
"Cloning into 'MotionDirector'...\n",
|
| 25 |
+
"remote: Enumerating objects: 657, done.\u001b[K\n",
|
| 26 |
+
"remote: Counting objects: 100% (163/163), done.\u001b[K\n",
|
| 27 |
+
"remote: Compressing objects: 100% (82/82), done.\u001b[K\n",
|
| 28 |
+
"remote: Total 657 (delta 108), reused 88 (delta 81), pack-reused 494 (from 1)\u001b[K\n",
|
| 29 |
+
"Receiving objects: 100% (657/657), 132.29 MiB | 50.34 MiB/s, done.\n",
|
| 30 |
+
"Resolving deltas: 100% (349/349), done.\n",
|
| 31 |
+
"/content/MotionDirector\n",
|
| 32 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 33 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 34 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
| 35 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 36 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 37 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 38 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m362.1/362.1 kB\u001b[0m \u001b[31m24.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 39 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 40 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m90.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 41 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 42 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m44.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 43 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 44 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 45 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 46 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
| 47 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 48 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m73.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 49 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 50 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.9/72.9 MB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 51 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.8/44.8 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 52 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 53 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.6/35.6 MB\u001b[0m \u001b[31m47.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
| 54 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.9/294.9 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 55 |
+
"\u001b[?25h Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 56 |
+
" Building wheel for lora_diffusion (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 57 |
+
" Building wheel for loralib (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 58 |
+
" Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 59 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
| 60 |
+
"bigframes 2.8.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.\n",
|
| 61 |
+
"google-api-core 1.34.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<4.0.0dev,>=3.19.5, but you have protobuf 4.25.8 which is incompatible.\n",
|
| 62 |
+
"pandas-gbq 0.29.1 requires google-api-core<3.0.0,>=2.10.2, but you have google-api-core 1.34.1 which is incompatible.\n",
|
| 63 |
+
"google-cloud-storage 2.19.0 requires google-api-core<3.0.0dev,>=2.15.0, but you have google-api-core 1.34.1 which is incompatible.\n",
|
| 64 |
+
"dataproc-spark-connect 0.7.5 requires google-api-core>=2.19, but you have google-api-core 1.34.1 which is incompatible.\n",
|
| 65 |
+
"bigframes 2.8.0 requires google-cloud-bigquery[bqstorage,pandas]>=3.31.0, but you have google-cloud-bigquery 3.25.0 which is incompatible.\n",
|
| 66 |
+
"bigframes 2.8.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
|
| 67 |
+
"\u001b[0m"
|
| 68 |
+
]
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"source": [
|
| 72 |
+
"# install packages\n",
|
| 73 |
+
"%cd /content\n",
|
| 74 |
+
"!git clone https://github.com/danhtran2mind/MotionDirector\n",
|
| 75 |
+
"%cd MotionDirector\n",
|
| 76 |
+
"!pip install -r requirements.txt -q"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 2,
|
| 82 |
+
"metadata": {
|
| 83 |
+
"execution": {
|
| 84 |
+
"iopub.execute_input": "2025-07-16T05:29:50.848033Z",
|
| 85 |
+
"iopub.status.busy": "2025-07-16T05:29:50.847771Z",
|
| 86 |
+
"iopub.status.idle": "2025-07-16T05:29:54.955247Z",
|
| 87 |
+
"shell.execute_reply": "2025-07-16T05:29:54.954373Z",
|
| 88 |
+
"shell.execute_reply.started": "2025-07-16T05:29:50.848010Z"
|
| 89 |
+
},
|
| 90 |
+
"trusted": true
|
| 91 |
+
},
|
| 92 |
+
"outputs": [
|
| 93 |
+
{
|
| 94 |
+
"name": "stdout",
|
| 95 |
+
"output_type": "stream",
|
| 96 |
+
"text": [
|
| 97 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m235.8/235.8 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 98 |
+
"\u001b[?25h"
|
| 99 |
+
]
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"source": [
|
| 103 |
+
"!pip install -q bitsandbytes unidecode"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": 3,
|
| 109 |
+
"metadata": {
|
| 110 |
+
"execution": {
|
| 111 |
+
"iopub.execute_input": "2025-07-16T05:29:54.956819Z",
|
| 112 |
+
"iopub.status.busy": "2025-07-16T05:29:54.956511Z",
|
| 113 |
+
"iopub.status.idle": "2025-07-16T05:29:54.963707Z",
|
| 114 |
+
"shell.execute_reply": "2025-07-16T05:29:54.962891Z",
|
| 115 |
+
"shell.execute_reply.started": "2025-07-16T05:29:54.956786Z"
|
| 116 |
+
},
|
| 117 |
+
"trusted": true
|
| 118 |
+
},
|
| 119 |
+
"outputs": [],
|
| 120 |
+
"source": [
|
| 121 |
+
"import os\n",
|
| 122 |
+
"import shutil\n",
|
| 123 |
+
"import random\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"def copy_file_pairs(source_dir, dest_dir, max_pairs=20, seed=None):\n",
|
| 126 |
+
" if seed is not None:\n",
|
| 127 |
+
" random.seed(seed)\n",
|
| 128 |
+
" os.makedirs(dest_dir, exist_ok=True)\n",
|
| 129 |
+
" mp4_files = [f for f in os.listdir(source_dir) if f.endswith('.mp4')]\n",
|
| 130 |
+
" selected_mp4_files = random.sample(mp4_files, min(len(mp4_files), max_pairs))\n",
|
| 131 |
+
" for mp4 in selected_mp4_files:\n",
|
| 132 |
+
" base = os.path.splitext(mp4)[0]\n",
|
| 133 |
+
" txt = f\"{base}.txt\"\n",
|
| 134 |
+
" if os.path.exists(os.path.join(source_dir, txt)):\n",
|
| 135 |
+
" shutil.copy2(os.path.join(source_dir, mp4), os.path.join(dest_dir, mp4))\n",
|
| 136 |
+
" shutil.copy2(os.path.join(source_dir, txt), os.path.join(dest_dir, txt))\n",
|
| 137 |
+
" return len(selected_mp4_files)\n"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": 4,
|
| 143 |
+
"metadata": {
|
| 144 |
+
"execution": {
|
| 145 |
+
"iopub.execute_input": "2025-07-16T05:29:54.965605Z",
|
| 146 |
+
"iopub.status.busy": "2025-07-16T05:29:54.965374Z",
|
| 147 |
+
"iopub.status.idle": "2025-07-16T05:30:00.766653Z",
|
| 148 |
+
"shell.execute_reply": "2025-07-16T05:30:00.766019Z",
|
| 149 |
+
"shell.execute_reply.started": "2025-07-16T05:29:54.965578Z"
|
| 150 |
+
},
|
| 151 |
+
"trusted": true
|
| 152 |
+
},
|
| 153 |
+
"outputs": [
|
| 154 |
+
{
|
| 155 |
+
"data": {
|
| 156 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 157 |
+
"model_id": "2b0f51df7a5047bd8d404fca30add463",
|
| 158 |
+
"version_major": 2,
|
| 159 |
+
"version_minor": 0
|
| 160 |
+
},
|
| 161 |
+
"text/plain": [
|
| 162 |
+
"Fetching 2 files: 0%| | 0/2 [00:00<?, ?it/s]"
|
| 163 |
+
]
|
| 164 |
+
},
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"output_type": "display_data"
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"data": {
|
| 170 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 171 |
+
"model_id": "b800f95e15fb4064854902602aa3a3dd",
|
| 172 |
+
"version_major": 2,
|
| 173 |
+
"version_minor": 0
|
| 174 |
+
},
|
| 175 |
+
"text/plain": [
|
| 176 |
+
".gitattributes: 0.00B [00:00, ?B/s]"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
"metadata": {},
|
| 180 |
+
"output_type": "display_data"
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"data": {
|
| 184 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 185 |
+
"model_id": "6a6cc4ba523f4a18adbc9ffaa3525340",
|
| 186 |
+
"version_major": 2,
|
| 187 |
+
"version_minor": 0
|
| 188 |
+
},
|
| 189 |
+
"text/plain": [
|
| 190 |
+
"studio_ghibli_wan14b_t2v_v01_dataset.zip: 0%| | 0.00/300M [00:00<?, ?B/s]"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"output_type": "display_data"
|
| 195 |
+
}
|
| 196 |
+
],
|
| 197 |
+
"source": [
|
| 198 |
+
"import os\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"from huggingface_hub import snapshot_download\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"# Create directory if it doesn't exist\n",
|
| 203 |
+
"os.makedirs(\"data/ghibli/raw\", exist_ok=True)\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"# Download the dataset using snapshot_download\n",
|
| 206 |
+
"snapshot_download(repo_id=\"raymondt/ghibi_t2v\", \n",
|
| 207 |
+
" local_dir=\"data/ghibli/raw\", \n",
|
| 208 |
+
" repo_type=\"dataset\")\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"# Assuming the zip file is downloaded, unzip it to the target directory\n",
|
| 211 |
+
"import zipfile\n",
|
| 212 |
+
"zip_path = \"data/ghibli/raw/studio_ghibli_wan14b_t2v_v01_dataset.zip\"\n",
|
| 213 |
+
"extract_path = \"data/ghibli/raw\"\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n",
|
| 216 |
+
" zip_ref.extractall(extract_path)\n",
|
| 217 |
+
"\n"
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"cell_type": "code",
|
| 222 |
+
"execution_count": 5,
|
| 223 |
+
"metadata": {
|
| 224 |
+
"execution": {
|
| 225 |
+
"iopub.execute_input": "2025-07-16T05:30:00.767663Z",
|
| 226 |
+
"iopub.status.busy": "2025-07-16T05:30:00.767419Z",
|
| 227 |
+
"iopub.status.idle": "2025-07-16T05:30:01.056828Z",
|
| 228 |
+
"shell.execute_reply": "2025-07-16T05:30:01.056142Z",
|
| 229 |
+
"shell.execute_reply.started": "2025-07-16T05:30:00.767643Z"
|
| 230 |
+
},
|
| 231 |
+
"trusted": true
|
| 232 |
+
},
|
| 233 |
+
"outputs": [
|
| 234 |
+
{
|
| 235 |
+
"name": "stdout",
|
| 236 |
+
"output_type": "stream",
|
| 237 |
+
"text": [
|
| 238 |
+
"Copied 240 pairs to data/ghibli/videos\n"
|
| 239 |
+
]
|
| 240 |
+
}
|
| 241 |
+
],
|
| 242 |
+
"source": [
|
| 243 |
+
"# Copy the videos directory to the desired location\n",
|
| 244 |
+
"source = \"data/ghibli/raw/videos/1920x1040\"\n",
|
| 245 |
+
"dest = \"data/ghibli/videos\"\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"copied = copy_file_pairs(source, dest, max_pairs=240, seed=42)\n",
|
| 248 |
+
"print(f\"Copied {copied} pairs to {dest}\")"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"cell_type": "code",
|
| 253 |
+
"execution_count": 6,
|
| 254 |
+
"metadata": {
|
| 255 |
+
"execution": {
|
| 256 |
+
"iopub.execute_input": "2025-07-16T05:30:01.057811Z",
|
| 257 |
+
"iopub.status.busy": "2025-07-16T05:30:01.057583Z",
|
| 258 |
+
"iopub.status.idle": "2025-07-16T05:30:29.547286Z",
|
| 259 |
+
"shell.execute_reply": "2025-07-16T05:30:29.546428Z",
|
| 260 |
+
"shell.execute_reply.started": "2025-07-16T05:30:01.057784Z"
|
| 261 |
+
},
|
| 262 |
+
"trusted": true
|
| 263 |
+
},
|
| 264 |
+
"outputs": [
|
| 265 |
+
{
|
| 266 |
+
"data": {
|
| 267 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 268 |
+
"model_id": "98bbd6ea501745bcabdb7f89bdb7af95",
|
| 269 |
+
"version_major": 2,
|
| 270 |
+
"version_minor": 0
|
| 271 |
+
},
|
| 272 |
+
"text/plain": [
|
| 273 |
+
"Fetching 16 files: 0%| | 0/16 [00:00<?, ?it/s]"
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"output_type": "display_data"
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"data": {
|
| 281 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 282 |
+
"model_id": "6b20bb02614d4dfdb037484f1bbf06ed",
|
| 283 |
+
"version_major": 2,
|
| 284 |
+
"version_minor": 0
|
| 285 |
+
},
|
| 286 |
+
"text/plain": [
|
| 287 |
+
"merges.txt: 0.00B [00:00, ?B/s]"
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
"metadata": {},
|
| 291 |
+
"output_type": "display_data"
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"data": {
|
| 295 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 296 |
+
"model_id": "d7d127ca77964c7a9ab58b55dced9701",
|
| 297 |
+
"version_major": 2,
|
| 298 |
+
"version_minor": 0
|
| 299 |
+
},
|
| 300 |
+
"text/plain": [
|
| 301 |
+
"model_index.json: 0%| | 0.00/384 [00:00<?, ?B/s]"
|
| 302 |
+
]
|
| 303 |
+
},
|
| 304 |
+
"metadata": {},
|
| 305 |
+
"output_type": "display_data"
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"data": {
|
| 309 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 310 |
+
"model_id": "1b9272edd1784c12baaa2826a1effcd3",
|
| 311 |
+
"version_major": 2,
|
| 312 |
+
"version_minor": 0
|
| 313 |
+
},
|
| 314 |
+
"text/plain": [
|
| 315 |
+
"README.md: 0.00B [00:00, ?B/s]"
|
| 316 |
+
]
|
| 317 |
+
},
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"output_type": "display_data"
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"data": {
|
| 323 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 324 |
+
"model_id": "8f7a542514544640af6d140501a7a05b",
|
| 325 |
+
"version_major": 2,
|
| 326 |
+
"version_minor": 0
|
| 327 |
+
},
|
| 328 |
+
"text/plain": [
|
| 329 |
+
"special_tokens_map.json: 0%| | 0.00/460 [00:00<?, ?B/s]"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
"metadata": {},
|
| 333 |
+
"output_type": "display_data"
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"data": {
|
| 337 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 338 |
+
"model_id": "d33b170a93dd430386b8bd5f045f3419",
|
| 339 |
+
"version_major": 2,
|
| 340 |
+
"version_minor": 0
|
| 341 |
+
},
|
| 342 |
+
"text/plain": [
|
| 343 |
+
"scheduler_config.json: 0%| | 0.00/465 [00:00<?, ?B/s]"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"output_type": "display_data"
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"data": {
|
| 351 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 352 |
+
"model_id": "d64f3a8914004c2283450052563fa0cd",
|
| 353 |
+
"version_major": 2,
|
| 354 |
+
"version_minor": 0
|
| 355 |
+
},
|
| 356 |
+
"text/plain": [
|
| 357 |
+
".gitattributes: 0.00B [00:00, ?B/s]"
|
| 358 |
+
]
|
| 359 |
+
},
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"output_type": "display_data"
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"data": {
|
| 365 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 366 |
+
"model_id": "7909289ad6a747f4a6fbd81db836e7b5",
|
| 367 |
+
"version_major": 2,
|
| 368 |
+
"version_minor": 0
|
| 369 |
+
},
|
| 370 |
+
"text/plain": [
|
| 371 |
+
"config.json: 0%| | 0.00/609 [00:00<?, ?B/s]"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
"metadata": {},
|
| 375 |
+
"output_type": "display_data"
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"data": {
|
| 379 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 380 |
+
"model_id": "d562fad6588646f3929d7dfd805e001e",
|
| 381 |
+
"version_major": 2,
|
| 382 |
+
"version_minor": 0
|
| 383 |
+
},
|
| 384 |
+
"text/plain": [
|
| 385 |
+
"pytorch_model.bin: 0%| | 0.00/681M [00:00<?, ?B/s]"
|
| 386 |
+
]
|
| 387 |
+
},
|
| 388 |
+
"metadata": {},
|
| 389 |
+
"output_type": "display_data"
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"data": {
|
| 393 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 394 |
+
"model_id": "0b4361967a684e7c99075ba14b7ec864",
|
| 395 |
+
"version_major": 2,
|
| 396 |
+
"version_minor": 0
|
| 397 |
+
},
|
| 398 |
+
"text/plain": [
|
| 399 |
+
"config.json: 0%| | 0.00/727 [00:00<?, ?B/s]"
|
| 400 |
+
]
|
| 401 |
+
},
|
| 402 |
+
"metadata": {},
|
| 403 |
+
"output_type": "display_data"
|
| 404 |
+
},
|
| 405 |
+
{
|
| 406 |
+
"data": {
|
| 407 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 408 |
+
"model_id": "55a17f9d827c4656b1ae272d679b0a26",
|
| 409 |
+
"version_major": 2,
|
| 410 |
+
"version_minor": 0
|
| 411 |
+
},
|
| 412 |
+
"text/plain": [
|
| 413 |
+
"tokenizer_config.json: 0%| | 0.00/737 [00:00<?, ?B/s]"
|
| 414 |
+
]
|
| 415 |
+
},
|
| 416 |
+
"metadata": {},
|
| 417 |
+
"output_type": "display_data"
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"data": {
|
| 421 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 422 |
+
"model_id": "d5ebea88cba841d0a117156384a7af6d",
|
| 423 |
+
"version_major": 2,
|
| 424 |
+
"version_minor": 0
|
| 425 |
+
},
|
| 426 |
+
"text/plain": [
|
| 427 |
+
"config.json: 0%| | 0.00/636 [00:00<?, ?B/s]"
|
| 428 |
+
]
|
| 429 |
+
},
|
| 430 |
+
"metadata": {},
|
| 431 |
+
"output_type": "display_data"
|
| 432 |
+
},
|
| 433 |
+
{
|
| 434 |
+
"data": {
|
| 435 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 436 |
+
"model_id": "f7bc8fb35a9f44deb336bb1b109298ee",
|
| 437 |
+
"version_major": 2,
|
| 438 |
+
"version_minor": 0
|
| 439 |
+
},
|
| 440 |
+
"text/plain": [
|
| 441 |
+
"open_clip_pytorch_model.bin: 0%| | 0.00/1.97G [00:00<?, ?B/s]"
|
| 442 |
+
]
|
| 443 |
+
},
|
| 444 |
+
"metadata": {},
|
| 445 |
+
"output_type": "display_data"
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"data": {
|
| 449 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 450 |
+
"model_id": "2ba9dca7dfb2455aaf4627d43390d550",
|
| 451 |
+
"version_major": 2,
|
| 452 |
+
"version_minor": 0
|
| 453 |
+
},
|
| 454 |
+
"text/plain": [
|
| 455 |
+
"diffusion_pytorch_model.bin: 0%| | 0.00/2.82G [00:00<?, ?B/s]"
|
| 456 |
+
]
|
| 457 |
+
},
|
| 458 |
+
"metadata": {},
|
| 459 |
+
"output_type": "display_data"
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"data": {
|
| 463 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 464 |
+
"model_id": "aab2fd449c9b44a58ff281b30deb6e37",
|
| 465 |
+
"version_major": 2,
|
| 466 |
+
"version_minor": 0
|
| 467 |
+
},
|
| 468 |
+
"text/plain": [
|
| 469 |
+
"vocab.json: 0.00B [00:00, ?B/s]"
|
| 470 |
+
]
|
| 471 |
+
},
|
| 472 |
+
"metadata": {},
|
| 473 |
+
"output_type": "display_data"
|
| 474 |
+
},
|
| 475 |
+
{
|
| 476 |
+
"data": {
|
| 477 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 478 |
+
"model_id": "6aca918dcb4f4b4b9f6804859ea65ab6",
|
| 479 |
+
"version_major": 2,
|
| 480 |
+
"version_minor": 0
|
| 481 |
+
},
|
| 482 |
+
"text/plain": [
|
| 483 |
+
"text2video_pytorch_model.pth: 0%| | 0.00/2.82G [00:00<?, ?B/s]"
|
| 484 |
+
]
|
| 485 |
+
},
|
| 486 |
+
"metadata": {},
|
| 487 |
+
"output_type": "display_data"
|
| 488 |
+
},
|
| 489 |
+
{
|
| 490 |
+
"data": {
|
| 491 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 492 |
+
"model_id": "bf61eb3a364e455b89379e66e8f304d7",
|
| 493 |
+
"version_major": 2,
|
| 494 |
+
"version_minor": 0
|
| 495 |
+
},
|
| 496 |
+
"text/plain": [
|
| 497 |
+
"diffusion_pytorch_model.bin: 0%| | 0.00/167M [00:00<?, ?B/s]"
|
| 498 |
+
]
|
| 499 |
+
},
|
| 500 |
+
"metadata": {},
|
| 501 |
+
"output_type": "display_data"
|
| 502 |
+
},
|
| 503 |
+
{
|
| 504 |
+
"data": {
|
| 505 |
+
"text/plain": [
|
| 506 |
+
"'/content/MotionDirector/ckpts/zeroscope_v2_576w'"
|
| 507 |
+
]
|
| 508 |
+
},
|
| 509 |
+
"execution_count": 6,
|
| 510 |
+
"metadata": {},
|
| 511 |
+
"output_type": "execute_result"
|
| 512 |
+
}
|
| 513 |
+
],
|
| 514 |
+
"source": [
|
| 515 |
+
"from huggingface_hub import snapshot_download\n",
|
| 516 |
+
"# Download ZeroScope model snapshot\n",
|
| 517 |
+
"repo_id = \"cerspense/zeroscope_v2_576w\"\n",
|
| 518 |
+
"snapshot_download(repo_id=repo_id,\n",
|
| 519 |
+
" local_dir=\"./ckpts/zeroscope_v2_576w\")"
|
| 520 |
+
]
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"cell_type": "code",
|
| 524 |
+
"execution_count": 9,
|
| 525 |
+
"metadata": {
|
| 526 |
+
"execution": {
|
| 527 |
+
"iopub.execute_input": "2025-07-15T14:12:26.903891Z",
|
| 528 |
+
"iopub.status.busy": "2025-07-15T14:12:26.903623Z",
|
| 529 |
+
"iopub.status.idle": "2025-07-15T14:12:26.907531Z",
|
| 530 |
+
"shell.execute_reply": "2025-07-15T14:12:26.906880Z",
|
| 531 |
+
"shell.execute_reply.started": "2025-07-15T14:12:26.903873Z"
|
| 532 |
+
},
|
| 533 |
+
"trusted": true
|
| 534 |
+
},
|
| 535 |
+
"outputs": [],
|
| 536 |
+
"source": [
|
| 537 |
+
"import torch\n",
|
| 538 |
+
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
| 539 |
+
"# device"
|
| 540 |
+
]
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"cell_type": "code",
|
| 544 |
+
"execution_count": null,
|
| 545 |
+
"metadata": {
|
| 546 |
+
"execution": {
|
| 547 |
+
"iopub.execute_input": "2025-07-16T05:35:53.783879Z",
|
| 548 |
+
"iopub.status.busy": "2025-07-16T05:35:53.783504Z",
|
| 549 |
+
"iopub.status.idle": "2025-07-16T05:35:53.791697Z",
|
| 550 |
+
"shell.execute_reply": "2025-07-16T05:35:53.791129Z",
|
| 551 |
+
"shell.execute_reply.started": "2025-07-16T05:35:53.783849Z"
|
| 552 |
+
},
|
| 553 |
+
"trusted": true
|
| 554 |
+
},
|
| 555 |
+
"outputs": [
|
| 556 |
+
{
|
| 557 |
+
"name": "stdout",
|
| 558 |
+
"output_type": "stream",
|
| 559 |
+
"text": [
|
| 560 |
+
"Overwriting configs/config_multi_videos.yaml\n"
|
| 561 |
+
]
|
| 562 |
+
}
|
| 563 |
+
],
|
| 564 |
+
"source": [
|
| 565 |
+
"%%writefile configs/config_multi_videos.yaml\n",
|
| 566 |
+
"# Pretrained diffusers model path.\n",
|
| 567 |
+
"pretrained_model_path: \"./ckpts/zeroscope_v2_576w\"\n",
|
| 568 |
+
"# pretrained_model_path: \"./ckpts/text-to-video-ms-1.7b\"\n",
|
| 569 |
+
"# The folder where your training outputs will be placed.\n",
|
| 570 |
+
"output_dir: \"./zeroscope_v2_576w-Ghibli-LoRA\"\n",
|
| 571 |
+
"# resume_step: 500\n",
|
| 572 |
+
"# resume_from_checkpoint: \"./zeroscope_v2_576w-Scenery_Anime_Bright-lora/train_2025-07-10T13-46-57\"\n",
|
| 573 |
+
"# lora_path: \"zeroscope_v2_576w-Scenery_Anime_Bright-lora/checkpoint-500\" # This argument is used for training resumption\n",
|
| 574 |
+
"# lora_path: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/checkpoint-200\n",
|
| 575 |
+
"\n",
|
| 576 |
+
"dataset_types:\n",
|
| 577 |
+
" - 'folder'\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"# Caches the latents (Frames-Image -> VAE -> Latent) to a HDD or SDD.\n",
|
| 580 |
+
"# The latents will be saved under your training folder, and loaded automatically for training.\n",
|
| 581 |
+
"# This both saves memory and speeds up training and takes very little disk space.\n",
|
| 582 |
+
"cache_latents: True\n",
|
| 583 |
+
"\n",
|
| 584 |
+
"\n",
|
| 585 |
+
"# If you have cached latents set to `True` and have a directory of cached latents,\n",
|
| 586 |
+
"# you can skip the caching process and load previously saved ones.\n",
|
| 587 |
+
"cached_latent_dir: null #/path/to/cached_latents\n",
|
| 588 |
+
"# cached_latent_dir: zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-13T06-46-47/cached_latents\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"# Use LoRA for the UNET model.\n",
|
| 591 |
+
"use_unet_lora: True\n",
|
| 592 |
+
"\n",
|
| 593 |
+
"# LoRA Dropout. This parameter adds the probability of randomly zeros out elements. Helps prevent overfitting.\n",
|
| 594 |
+
"# See: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html\n",
|
| 595 |
+
"lora_unet_dropout: 0.1\n",
|
| 596 |
+
"\n",
|
| 597 |
+
"# Choose whether or not ito save the full pretrained model weights for both checkpoints and after training.\n",
|
| 598 |
+
"# The only time you want this off is if you're doing full LoRA training.\n",
|
| 599 |
+
"save_pretrained_model: True\n",
|
| 600 |
+
"# save_pretrained_model: True\n",
|
| 601 |
+
"\n",
|
| 602 |
+
"# The rank for LoRA training. With ModelScope, the maximum should be 1024.\n",
|
| 603 |
+
"# VRAM increases with higher rank, lower when decreased.\n",
|
| 604 |
+
"lora_rank: 16\n",
|
| 605 |
+
"\n",
|
| 606 |
+
"# Training data parameters\n",
|
| 607 |
+
"train_data:\n",
|
| 608 |
+
" # 'multiple videos'\n",
|
| 609 |
+
" path: \"./data/ghibli/videos\"\n",
|
| 610 |
+
" # The width and height in which you want your training data to be resized to.\n",
|
| 611 |
+
" width: 384\n",
|
| 612 |
+
" height: 384\n",
|
| 613 |
+
"\n",
|
| 614 |
+
" # This will find the closest aspect ratio to your input width and height.\n",
|
| 615 |
+
" # For example, 512x512 width and height with a video of resolution 1280x720 will be resized to 512x256\n",
|
| 616 |
+
" use_bucketing: True\n",
|
| 617 |
+
" gradient_accumulation_steps: 2\n",
|
| 618 |
+
" batch_size: 1\n",
|
| 619 |
+
" # The start frame index where your videos should start (Leave this at one for json and folder based training).\n",
|
| 620 |
+
" sample_start_idx: 1\n",
|
| 621 |
+
"\n",
|
| 622 |
+
" # Used for 'folder'. The rate at which your frames are sampled. Does nothing for 'json' and 'single_video' dataset.\n",
|
| 623 |
+
" fps: 16\n",
|
| 624 |
+
"\n",
|
| 625 |
+
" # For 'single_video' and 'json'. The number of frames to \"step\" (1,2,3,4) (frame_step=2) -> (1,3,5,7, ...).\n",
|
| 626 |
+
" frame_step: 1\n",
|
| 627 |
+
"\n",
|
| 628 |
+
" # The number of frames to sample. The higher this number, the higher the VRAM (acts similar to batch size).\n",
|
| 629 |
+
" n_sample_frames: 24\n",
|
| 630 |
+
"\n",
|
| 631 |
+
" # The prompt when using a a single video file\n",
|
| 632 |
+
" # fallback_prompt: \"A person is riding a bicycle.\"\n",
|
| 633 |
+
"\n",
|
| 634 |
+
"# Validation data parameters.\n",
|
| 635 |
+
"validation_data:\n",
|
| 636 |
+
" # A custom prompt that is different from your training dataset.\n",
|
| 637 |
+
" prompt:\n",
|
| 638 |
+
" - \"Studio Ghibli style. The video showcases a vibrant and lively scene set in the early.\"\n",
|
| 639 |
+
" - \"Studio Ghibli style. A woman with black hair is holding a gun in her hand.\"\n",
|
| 640 |
+
"\n",
|
| 641 |
+
" # Whether or not to sample preview during training (Requires more VRAM).\n",
|
| 642 |
+
" # sample_preview: True\n",
|
| 643 |
+
" sample_preview: False\n",
|
| 644 |
+
"\n",
|
| 645 |
+
" # The number of frames to sample during validation.\n",
|
| 646 |
+
" num_frames: 24\n",
|
| 647 |
+
"\n",
|
| 648 |
+
" # Height and width of validation sample.\n",
|
| 649 |
+
" width: 384\n",
|
| 650 |
+
" height: 384\n",
|
| 651 |
+
"\n",
|
| 652 |
+
" # Number of inference steps when generating the video.\n",
|
| 653 |
+
" num_inference_steps: 15\n",
|
| 654 |
+
"\n",
|
| 655 |
+
" # CFG scale\n",
|
| 656 |
+
" guidance_scale: 12\n",
|
| 657 |
+
"\n",
|
| 658 |
+
" # scale of spatial LoRAs, default is 0\n",
|
| 659 |
+
" spatial_scale: 0\n",
|
| 660 |
+
"\n",
|
| 661 |
+
" # scale of noise prior, i.e. the scale of inversion noises\n",
|
| 662 |
+
" noise_prior: 0\n",
|
| 663 |
+
"\n",
|
| 664 |
+
"use_offset_noise: False\n",
|
| 665 |
+
"offset_noise_strength: 0.\n",
|
| 666 |
+
"\n",
|
| 667 |
+
"# Learning rate for AdamW\n",
|
| 668 |
+
"learning_rate: 5e-4\n",
|
| 669 |
+
"\n",
|
| 670 |
+
"# Weight decay. Higher = more regularization. Lower = closer to dataset.\n",
|
| 671 |
+
"adam_weight_decay: 1e-4\n",
|
| 672 |
+
"\n",
|
| 673 |
+
"# Maximum number of train steps. Model is saved after training.\n",
|
| 674 |
+
"max_train_steps: 5000\n",
|
| 675 |
+
"\n",
|
| 676 |
+
"# Saves a model every nth step.\n",
|
| 677 |
+
"checkpointing_steps: 5000\n",
|
| 678 |
+
"\n",
|
| 679 |
+
"# How many steps to do for validation if sample_preview is enabled.\n",
|
| 680 |
+
"validation_steps: 5000\n",
|
| 681 |
+
"\n",
|
| 682 |
+
"# Whether or not we want to use mixed precision with accelerate\n",
|
| 683 |
+
"mixed_precision: \"fp16\"\n",
|
| 684 |
+
"# mixed_precision: \"no\"\n",
|
| 685 |
+
"\n",
|
| 686 |
+
"# Trades VRAM usage for speed. You lose roughly 20% of training speed, but save a lot of VRAM.\n",
|
| 687 |
+
"# If you need to save more VRAM, it can also be enabled for the text encoder, but reduces speed x2.\n",
|
| 688 |
+
"gradient_checkpointing: True\n",
|
| 689 |
+
"text_encoder_gradient_checkpointing: True\n",
|
| 690 |
+
"\n",
|
| 691 |
+
"# Xformers must be installed for best memory savings and performance (< Pytorch 2.0)\n",
|
| 692 |
+
"enable_xformers_memory_efficient_attention: True\n",
|
| 693 |
+
"use_8bit_adam: True\n",
|
| 694 |
+
"\n",
|
| 695 |
+
"# Use scaled dot product attention (Only available with >= Torch 2.0)\n",
|
| 696 |
+
"enable_torch_2_attn: True"
|
| 697 |
+
]
|
| 698 |
+
},
|
| 699 |
+
{
|
| 700 |
+
"cell_type": "code",
|
| 701 |
+
"execution_count": 8,
|
| 702 |
+
"metadata": {
|
| 703 |
+
"execution": {
|
| 704 |
+
"iopub.execute_input": "2025-07-16T05:36:02.733520Z",
|
| 705 |
+
"iopub.status.busy": "2025-07-16T05:36:02.732856Z",
|
| 706 |
+
"iopub.status.idle": "2025-07-16T16:01:06.692095Z",
|
| 707 |
+
"shell.execute_reply": "2025-07-16T16:01:06.688451Z",
|
| 708 |
+
"shell.execute_reply.started": "2025-07-16T05:36:02.733496Z"
|
| 709 |
+
},
|
| 710 |
+
"trusted": true
|
| 711 |
+
},
|
| 712 |
+
"outputs": [
|
| 713 |
+
{
|
| 714 |
+
"name": "stdout",
|
| 715 |
+
"output_type": "stream",
|
| 716 |
+
"text": [
|
| 717 |
+
"2025-07-16 05:36:13.391674: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
| 718 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
| 719 |
+
"E0000 00:00:1752644173.574411 316 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
| 720 |
+
"E0000 00:00:1752644173.625685 316 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
| 721 |
+
"Initializing the conversion map\n",
|
| 722 |
+
"{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
|
| 723 |
+
"An error occurred while trying to fetch ./ckpts/zeroscope_v2_576w: Error no file named diffusion_pytorch_model.safetensors found in directory ./ckpts/zeroscope_v2_576w.\n",
|
| 724 |
+
"Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n",
|
| 725 |
+
"{'latents_mean', 'use_post_quant_conv', 'mid_block_add_attention', 'force_upcast', 'use_quant_conv', 'shift_factor', 'latents_std'} was not found in config. Values will be initialized to default values.\n",
|
| 726 |
+
"All model checkpoint weights were used when initializing AutoencoderKL.\n",
|
| 727 |
+
"\n",
|
| 728 |
+
"All the weights of AutoencoderKL were initialized from the model checkpoint at ./ckpts/zeroscope_v2_576w.\n",
|
| 729 |
+
"If your task is similar to the task the model of the checkpoint was trained on, you can already use AutoencoderKL for predictions without further training.\n",
|
| 730 |
+
"An error occurred while trying to fetch ./ckpts/zeroscope_v2_576w: Error no file named diffusion_pytorch_model.safetensors found in directory ./ckpts/zeroscope_v2_576w.\n",
|
| 731 |
+
"Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n",
|
| 732 |
+
"All model checkpoint weights were used when initializing UNet3DConditionModel.\n",
|
| 733 |
+
"\n",
|
| 734 |
+
"All the weights of UNet3DConditionModel were initialized from the model checkpoint at ./ckpts/zeroscope_v2_576w.\n",
|
| 735 |
+
"If your task is similar to the task the model of the checkpoint was trained on, you can already use UNet3DConditionModel for predictions without further training.\n",
|
| 736 |
+
"Could not enable memory efficient attention for xformers or Torch 2.0.\n",
|
| 737 |
+
"Loading pipeline components...: 0%| | 0/5 [00:00<?, ?it/s]Loaded text_encoder as CLIPTextModel from `text_encoder` subfolder of ./ckpts/zeroscope_v2_576w.\n",
|
| 738 |
+
"Loading pipeline components...: 40%|█████▏ | 2/5 [00:00<00:01, 2.52it/s]{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
|
| 739 |
+
"Loaded scheduler as DDIMScheduler from `scheduler` subfolder of ./ckpts/zeroscope_v2_576w.\n",
|
| 740 |
+
"Loaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of ./ckpts/zeroscope_v2_576w.\n",
|
| 741 |
+
"Loading pipeline components...: 100%|█████████████| 5/5 [00:00<00:00, 5.76it/s]\n",
|
| 742 |
+
"Expected types for unet: (<class 'diffusers.models.unets.unet_3d_condition.UNet3DConditionModel'>,), got <class 'models.unet_3d_condition.UNet3DConditionModel'>.\n",
|
| 743 |
+
"The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after Diffusers version 0.33.1. \n",
|
| 744 |
+
"Caching Latents.: 100%|███████████████████████| 240/240 [09:41<00:00, 2.42s/it]\n",
|
| 745 |
+
"Lora successfully injected into UNet3DConditionModel.\n",
|
| 746 |
+
"Lora successfully injected into UNet3DConditionModel.\n",
|
| 747 |
+
"unet._set_gradient_checkpointing(unet_enable)\n",
|
| 748 |
+
"Steps: 0%| | 0/5000 [00:00<?, ?it/s]1942 params have been unfrozen for training.\n",
|
| 749 |
+
"/usr/local/lib/python3.11/dist-packages/diffusers/models/transformers/transformer_2d.py:35: FutureWarning: `Transformer2DModelOutput` is deprecated and will be removed in version 1.0.0. Importing `Transformer2DModelOutput` from `diffusers.models.transformer_2d` is deprecated and this will be removed in a future version. Please use `from diffusers.models.modeling_outputs import Transformer2DModelOutput`, instead.\n",
|
| 750 |
+
" deprecate(\"Transformer2DModelOutput\", \"1.0.0\", deprecation_message)\n",
|
| 751 |
+
"Steps: 100%|█████████████████████████████| 5000/5000 [10:14:13<00:00, 7.28s/it][2025-07-16 16:00:44,146] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
|
| 752 |
+
"[2025-07-16 16:00:46,892] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n",
|
| 753 |
+
"\n",
|
| 754 |
+
"Loading pipeline components...: 0%| | 0/5 [00:00<?, ?it/s]\u001b[A{'rescale_betas_zero_snr', 'timestep_spacing'} was not found in config. Values will be initialized to default values.\n",
|
| 755 |
+
"Loaded scheduler as DDIMScheduler from `scheduler` subfolder of ./ckpts/zeroscope_v2_576w.\n",
|
| 756 |
+
"Loaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of ./ckpts/zeroscope_v2_576w.\n",
|
| 757 |
+
"Loading pipeline components...: 100%|█████████████| 5/5 [00:00<00:00, 50.50it/s]\n",
|
| 758 |
+
"Expected types for unet: (<class 'diffusers.models.unets.unet_3d_condition.UNet3DConditionModel'>,), got <class 'models.unet_3d_condition.UNet3DConditionModel'>.\n",
|
| 759 |
+
"The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after Diffusers version 0.33.1. \n",
|
| 760 |
+
"Configuration saved in ./zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-16T05-36-26/checkpoint-5000/vae/config.json\n",
|
| 761 |
+
"Model weights saved in ./zeroscope_v2_576w-Ghibli-LoRA/train_2025-07-16T05-36-26/checkpoint-5000/vae/diffusion_pytorch_model.safetensors\n"
|
| 762 |
+
]
|
| 763 |
+
}
|
| 764 |
+
],
|
| 765 |
+
"source": [
|
| 766 |
+
"# Train\n",
|
| 767 |
+
"!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n",
|
| 768 |
+
"!python main_train.py --config ./configs/config_multi_videos.yaml"
|
| 769 |
+
]
|
| 770 |
+
}
|
| 771 |
+
],
|
| 772 |
+
"metadata": {
|
| 773 |
+
"kaggle": {
|
| 774 |
+
"accelerator": "gpu",
|
| 775 |
+
"dataSources": [],
|
| 776 |
+
"dockerImageVersionId": 31090,
|
| 777 |
+
"isGpuEnabled": true,
|
| 778 |
+
"isInternetEnabled": true,
|
| 779 |
+
"language": "python",
|
| 780 |
+
"sourceType": "notebook"
|
| 781 |
+
},
|
| 782 |
+
"kernelspec": {
|
| 783 |
+
"display_name": "Python 3",
|
| 784 |
+
"language": "python",
|
| 785 |
+
"name": "python3"
|
| 786 |
+
},
|
| 787 |
+
"language_info": {
|
| 788 |
+
"codemirror_mode": {
|
| 789 |
+
"name": "ipython",
|
| 790 |
+
"version": 3
|
| 791 |
+
},
|
| 792 |
+
"file_extension": ".py",
|
| 793 |
+
"mimetype": "text/x-python",
|
| 794 |
+
"name": "python",
|
| 795 |
+
"nbconvert_exporter": "python",
|
| 796 |
+
"pygments_lexer": "ipython3",
|
| 797 |
+
"version": "3.11.13"
|
| 798 |
+
}
|
| 799 |
+
},
|
| 800 |
+
"nbformat": 4,
|
| 801 |
+
"nbformat_minor": 4
|
| 802 |
+
}
|
requirements/requirements.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate>=1.7.0
|
| 2 |
+
deepspeed
|
| 3 |
+
diffusers>=0.33.0
|
| 4 |
+
huggingface-hub
|
| 5 |
+
git+https://github.com/cloneofsimo/lora.git
|
| 6 |
+
git+https://github.com/microsoft/LoRA
|
| 7 |
+
loralib
|
| 8 |
+
numpy==1.26.4
|
| 9 |
+
tqdm
|
| 10 |
+
einops
|
| 11 |
+
imageio
|
| 12 |
+
imageio-ffmpeg
|
| 13 |
+
torch>=2.6.0
|
| 14 |
+
torchvision>=0.21.0
|
| 15 |
+
torchaudio
|
| 16 |
+
transformers>=4.51.3
|
| 17 |
+
decord
|
| 18 |
+
safetensors
|
| 19 |
+
omegaconf
|
| 20 |
+
opencv-python
|
| 21 |
+
pydantic
|
| 22 |
+
triton
|
| 23 |
+
compel
|
| 24 |
+
peft>=0.15.0
|
| 25 |
+
pytorch_lightning>=2.5.0
|
| 26 |
+
bitsandbytes
|
requirements/requirements_compatible.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
deepspeed==0.17.3
|
| 2 |
+
diffusers==0.34.0
|
| 3 |
+
huggingface_hub==0.34.1
|
| 4 |
+
loralib==0.1.2
|
| 5 |
+
numpy==1.26.4
|
| 6 |
+
tqdm==4.67.1
|
| 7 |
+
einops==0.8.1
|
| 8 |
+
imageio==2.37.0
|
| 9 |
+
imageio_ffmpeg==0.6.0
|
| 10 |
+
torch==2.6.0
|
| 11 |
+
torchvision==0.21.0
|
| 12 |
+
torchaudio==2.6.0
|
| 13 |
+
transformers==4.54.0
|
| 14 |
+
decord==0.6.0
|
| 15 |
+
safetensors==0.5.3
|
| 16 |
+
omegaconf==2.3.0
|
| 17 |
+
cv2==4.11.0
|
| 18 |
+
pydantic==2.11.7
|
| 19 |
+
triton==3.2.0
|
| 20 |
+
compel==2.1.1
|
| 21 |
+
peft==0.16.0
|
| 22 |
+
pytorch_lightning==2.5.2
|
| 23 |
+
bitsandbytes==0.46.1
|
scripts/download_ckpts.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi, snapshot_download
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
import argparse
|
| 5 |
+
|
| 6 |
+
def download_checkpoint(repo_id, save_path, repo_type="model"):
|
| 7 |
+
"""
|
| 8 |
+
Download a model checkpoint from Hugging Face Hub to the specified local directory.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
repo_id (str): The repository ID on Hugging Face Hub
|
| 12 |
+
save_path (str): Local directory path to save the checkpoint
|
| 13 |
+
repo_type (str): Type of repository (default: "model")
|
| 14 |
+
"""
|
| 15 |
+
# Initialize Hugging Face API
|
| 16 |
+
api = HfApi()
|
| 17 |
+
|
| 18 |
+
# Create the directory if it doesn't exist
|
| 19 |
+
os.makedirs(save_path, exist_ok=True)
|
| 20 |
+
|
| 21 |
+
# Download the checkpoint
|
| 22 |
+
print(f"Downloading {repo_id} to {save_path}...")
|
| 23 |
+
snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=save_path)
|
| 24 |
+
print(f"Successfully downloaded {repo_id}")
|
| 25 |
+
|
| 26 |
+
def main(args):
|
| 27 |
+
# Define checkpoint configurations
|
| 28 |
+
checkpoints = [
|
| 29 |
+
{
|
| 30 |
+
"repo_id": args.repo_id,
|
| 31 |
+
"save_path": args.save_path,
|
| 32 |
+
"repo_type": args.repo_type
|
| 33 |
+
}
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
# Add LoRA checkpoint if provided
|
| 37 |
+
if args.lora_repo_id and args.lora_save_path:
|
| 38 |
+
checkpoints.append({
|
| 39 |
+
"repo_id": args.lora_repo_id,
|
| 40 |
+
"save_path": args.lora_save_path,
|
| 41 |
+
"repo_type": args.lora_repo_type
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
# Download each checkpoint
|
| 45 |
+
for checkpoint in checkpoints:
|
| 46 |
+
download_checkpoint(
|
| 47 |
+
repo_id=checkpoint["repo_id"],
|
| 48 |
+
save_path=checkpoint["save_path"],
|
| 49 |
+
repo_type=checkpoint["repo_type"]
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
# Set up argument parser
|
| 54 |
+
parser = argparse.ArgumentParser(description="Download model checkpoints from Hugging Face Hub")
|
| 55 |
+
parser.add_argument(
|
| 56 |
+
"--repo_id",
|
| 57 |
+
type=str,
|
| 58 |
+
default="cerspense/zeroscope_v2_576w",
|
| 59 |
+
help="Hugging Face repository ID for the checkpoint"
|
| 60 |
+
)
|
| 61 |
+
parser.add_argument(
|
| 62 |
+
"--save_path",
|
| 63 |
+
type=str,
|
| 64 |
+
default="./ckpts/zeroscope_v2_576w",
|
| 65 |
+
help="Local directory to save the checkpoint"
|
| 66 |
+
)
|
| 67 |
+
parser.add_argument(
|
| 68 |
+
"--repo_type",
|
| 69 |
+
type=str,
|
| 70 |
+
default="model",
|
| 71 |
+
help="Type of repository (e.g., model, dataset)"
|
| 72 |
+
)
|
| 73 |
+
parser.add_argument(
|
| 74 |
+
"--lora_repo_id",
|
| 75 |
+
type=str,
|
| 76 |
+
default="danhtran2mind/zeroscope_v2_576w-Ghibli-LoRA",
|
| 77 |
+
help="Hugging Face repository ID for the LoRA checkpoint"
|
| 78 |
+
)
|
| 79 |
+
parser.add_argument(
|
| 80 |
+
"--lora_save_path",
|
| 81 |
+
type=str,
|
| 82 |
+
default="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
|
| 83 |
+
help="Local directory to save the LoRA checkpoint"
|
| 84 |
+
)
|
| 85 |
+
parser.add_argument(
|
| 86 |
+
"--lora_repo_type",
|
| 87 |
+
type=str,
|
| 88 |
+
default="model",
|
| 89 |
+
help="Type of repository for the LoRA checkpoint (e.g., model, dataset)"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Parse arguments
|
| 93 |
+
args = parser.parse_args()
|
| 94 |
+
|
| 95 |
+
# Call main with parsed arguments
|
| 96 |
+
main(args)
|
scripts/process_dataset.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import random
|
| 4 |
+
import argparse
|
| 5 |
+
from huggingface_hub import snapshot_download
|
| 6 |
+
import zipfile
|
| 7 |
+
|
| 8 |
+
def copy_file_pairs(source_dir, dest_dir, max_pairs=20, seed=None):
|
| 9 |
+
if seed is not None:
|
| 10 |
+
random.seed(seed)
|
| 11 |
+
os.makedirs(dest_dir, exist_ok=True)
|
| 12 |
+
mp4_files = [f for f in os.listdir(source_dir) if f.endswith('.mp4')]
|
| 13 |
+
selected_mp4_files = random.sample(mp4_files, min(len(mp4_files), max_pairs))
|
| 14 |
+
for mp4 in selected_mp4_files:
|
| 15 |
+
base = os.path.splitext(mp4)[0]
|
| 16 |
+
txt = f"{base}.txt"
|
| 17 |
+
if os.path.exists(os.path.join(source_dir, txt)):
|
| 18 |
+
shutil.copy2(os.path.join(source_dir, mp4), os.path.join(dest_dir, mp4))
|
| 19 |
+
shutil.copy2(os.path.join(source_dir, txt), os.path.join(dest_dir, txt))
|
| 20 |
+
return len(selected_mp4_files)
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
parser = argparse.ArgumentParser(description="Process Studio Ghibli dataset by downloading, extracting, and copying file pairs.")
|
| 24 |
+
parser.add_argument("--source_dir", default="data/ghibli/raw/videos/1920x1040", help="Source directory containing video and text files")
|
| 25 |
+
parser.add_argument("--dest_dir", default="data/ghibli/videos", help="Destination directory for copied file pairs")
|
| 26 |
+
parser.add_argument("--max_pairs", type=int, default=240, help="Maximum number of file pairs to copy")
|
| 27 |
+
parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
|
| 28 |
+
parser.add_argument("--repo_id", default="raymondt/ghibi_t2v", help="Hugging Face dataset repository ID")
|
| 29 |
+
parser.add_argument("--local_dir", default="data/ghibli/raw", help="Local directory to download the dataset")
|
| 30 |
+
parser.add_argument("--zip_path", default="data/ghibli/raw/studio_ghibli_wan14b_t2v_v01_dataset.zip", help="Path to the downloaded zip file")
|
| 31 |
+
|
| 32 |
+
args = parser.parse_args()
|
| 33 |
+
|
| 34 |
+
# Create directory if it doesn't exist
|
| 35 |
+
os.makedirs(args.local_dir, exist_ok=True)
|
| 36 |
+
|
| 37 |
+
# Download the dataset using snapshot_download
|
| 38 |
+
snapshot_download(repo_id=args.repo_id,
|
| 39 |
+
local_dir=args.local_dir,
|
| 40 |
+
repo_type="dataset")
|
| 41 |
+
|
| 42 |
+
# Unzip the dataset
|
| 43 |
+
with zipfile.ZipFile(args.zip_path, 'r') as zip_ref:
|
| 44 |
+
zip_ref.extractall(args.local_dir)
|
| 45 |
+
|
| 46 |
+
# Copy file pairs
|
| 47 |
+
copied = copy_file_pairs(args.source_dir, args.dest_dir, max_pairs=args.max_pairs, seed=args.seed)
|
| 48 |
+
print(f"Copied {copied} pairs to {args.dest_dir}")
|
scripts/setup_third_party.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import argparse
|
| 4 |
+
import sys
|
| 5 |
+
|
| 6 |
+
def clone_repository(repo_url, target_dir, branch="main"):
|
| 7 |
+
"""Clone a git repository to the specified directory with specific branch."""
|
| 8 |
+
if os.path.exists(target_dir):
|
| 9 |
+
print(f"Directory {target_dir} already exists. Skipping clone.")
|
| 10 |
+
return
|
| 11 |
+
|
| 12 |
+
os.makedirs(os.path.dirname(target_dir), exist_ok=True)
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
subprocess.run(
|
| 16 |
+
["git", "clone", "-b", branch, repo_url, target_dir],
|
| 17 |
+
check=True,
|
| 18 |
+
capture_output=True,
|
| 19 |
+
text=True
|
| 20 |
+
)
|
| 21 |
+
print(f"Successfully cloned {repo_url} (branch: {branch}) to {target_dir}")
|
| 22 |
+
except subprocess.CalledProcessError as e:
|
| 23 |
+
print(f"Failed to clone repository: {e.stderr}")
|
| 24 |
+
sys.exit(1)
|
| 25 |
+
|
| 26 |
+
def main(motiondirector_url="https://github.com/danhtran2mind/MotionDirector", branch="main"):
|
| 27 |
+
# Define target directory
|
| 28 |
+
target_dir = os.path.join("src", "third_party", "MotionDirector")
|
| 29 |
+
|
| 30 |
+
# Clone MotionDirector repository
|
| 31 |
+
clone_repository(motiondirector_url, target_dir, branch)
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
# Set arguments directly
|
| 35 |
+
main(
|
| 36 |
+
motiondirector_url="https://github.com/danhtran2mind/MotionDirector",
|
| 37 |
+
branch="main"
|
| 38 |
+
)
|
src/text2video_ghibli_style/inference.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import uuid
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# Append the current directory to sys.path
|
| 9 |
+
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 10 |
+
|
| 11 |
+
def run_inference(
|
| 12 |
+
model_path="./ckpts/zeroscope_v2_576w",
|
| 13 |
+
checkpoint_folder="./ckpts/zeroscope_v2_576w-Ghibli-LoRA",
|
| 14 |
+
prompt="Studio Ghibli style. Two women walk down coastal village path toward sea, passing colorful houses, sailboats visible.",
|
| 15 |
+
negative_prompt="ugly, noise, fragment, blur, static video",
|
| 16 |
+
width=256,
|
| 17 |
+
height=256,
|
| 18 |
+
num_frames=8,
|
| 19 |
+
num_steps=30,
|
| 20 |
+
guidance_scale=30.0,
|
| 21 |
+
fps=8,
|
| 22 |
+
lora_rank=32,
|
| 23 |
+
lora_scale=0.7,
|
| 24 |
+
noise_prior=0.1,
|
| 25 |
+
device="cuda",
|
| 26 |
+
seed=100
|
| 27 |
+
):
|
| 28 |
+
print("Start Inference")
|
| 29 |
+
output_dir = "apps/gradio_app/temp_data"
|
| 30 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 31 |
+
|
| 32 |
+
# Get list of files in output_dir
|
| 33 |
+
for file_name in os.listdir(output_dir):
|
| 34 |
+
# Check if file ends with .mp4
|
| 35 |
+
if file_name.endswith(".mp4"):
|
| 36 |
+
# Remove the file
|
| 37 |
+
os.remove(os.path.join(output_dir, file_name))
|
| 38 |
+
|
| 39 |
+
command = [
|
| 40 |
+
"python", "src/third_party/MotionDirector/main_inference.py",
|
| 41 |
+
"--model", model_path,
|
| 42 |
+
"--checkpoint_folder", checkpoint_folder,
|
| 43 |
+
"--prompt", prompt,
|
| 44 |
+
"--negative-prompt", negative_prompt,
|
| 45 |
+
"--width", str(width),
|
| 46 |
+
"--height", str(height),
|
| 47 |
+
"--num-frames", str(num_frames),
|
| 48 |
+
"--num-steps", str(num_steps),
|
| 49 |
+
"--guidance-scale", str(guidance_scale),
|
| 50 |
+
"--fps", str(fps),
|
| 51 |
+
"--lora_rank", str(lora_rank),
|
| 52 |
+
"--lora_scale", str(lora_scale),
|
| 53 |
+
"--noise_prior", str(noise_prior),
|
| 54 |
+
"--device", device,
|
| 55 |
+
"--seed", str(seed),
|
| 56 |
+
"--output_dir", output_dir,
|
| 57 |
+
# "--no-prompt-name"
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
# Use Popen to execute the command
|
| 61 |
+
process = subprocess.Popen(
|
| 62 |
+
command,
|
| 63 |
+
stdout=subprocess.PIPE,
|
| 64 |
+
stderr=subprocess.PIPE,
|
| 65 |
+
text=True,
|
| 66 |
+
bufsize=1 # Line buffering
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Read output line-by-line in real-time
|
| 70 |
+
output_lines = []
|
| 71 |
+
try:
|
| 72 |
+
for line in process.stdout:
|
| 73 |
+
output_lines.append(line.strip())
|
| 74 |
+
except Exception as e:
|
| 75 |
+
return None, f"Error reading output: {str(e)}"
|
| 76 |
+
|
| 77 |
+
# Capture stderr and wait for process to complete
|
| 78 |
+
stderr_output = process.communicate()[1]
|
| 79 |
+
if process.returncode != 0:
|
| 80 |
+
return None, f"Error: {stderr_output.strip()}"
|
| 81 |
+
|
| 82 |
+
# Check for MP4 files in output directory
|
| 83 |
+
output_file = [f for f in os.listdir(output_dir) if f.lower().endswith('.mp4')]
|
| 84 |
+
if output_file:
|
| 85 |
+
output_path = os.path.join(output_dir, output_file[-1])
|
| 86 |
+
if os.path.exists(output_path):
|
| 87 |
+
return output_path, "\n".join(output_lines)
|
| 88 |
+
else:
|
| 89 |
+
return None, f"Video file not found at {output_path}\nLogs:\n" + "\n".join(output_lines)
|
| 90 |
+
return None, f"No MP4 files found in {output_dir}\nLogs:\n" + "\n".join(output_lines)
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
# Example usage
|
| 94 |
+
video_path, logs = run_inference(device="cpu" if not torch.cuda.is_available() else "cuda")
|
| 95 |
+
print(f"Generated Video: {video_path}")
|
| 96 |
+
print(f"Logs: {logs}")
|
src/text2video_ghibli_style/train.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import argparse
|
| 5 |
+
|
| 6 |
+
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'third_party', 'MotionDirector')))
|
| 7 |
+
|
| 8 |
+
def run_training(config_path, pytorch_cuda_alloc_conf="expandable_segments:True"):
|
| 9 |
+
# Set the environment variable
|
| 10 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = pytorch_cuda_alloc_conf
|
| 11 |
+
|
| 12 |
+
# Command to execute
|
| 13 |
+
command = ["python", "src/third_party/MotionDirector/main_train.py", "--config", config_path]
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
# Run the command using subprocess.Popen
|
| 17 |
+
process = subprocess.Popen(
|
| 18 |
+
command,
|
| 19 |
+
stdout=subprocess.PIPE,
|
| 20 |
+
stderr=subprocess.PIPE,
|
| 21 |
+
text=True,
|
| 22 |
+
env=os.environ.copy()
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Stream output in real-time
|
| 26 |
+
while True:
|
| 27 |
+
output = process.stdout.readline()
|
| 28 |
+
if output == '' and process.poll() is not None:
|
| 29 |
+
break
|
| 30 |
+
if output:
|
| 31 |
+
print(output.strip())
|
| 32 |
+
|
| 33 |
+
# Get any remaining output and errors
|
| 34 |
+
stdout, stderr = process.communicate()
|
| 35 |
+
|
| 36 |
+
# Print any errors
|
| 37 |
+
if stderr:
|
| 38 |
+
print("Errors:", stderr)
|
| 39 |
+
|
| 40 |
+
# Check the return code
|
| 41 |
+
if process.returncode == 0:
|
| 42 |
+
print("Training completed successfully")
|
| 43 |
+
else:
|
| 44 |
+
print(f"Training failed with return code: {process.returncode}")
|
| 45 |
+
|
| 46 |
+
except subprocess.SubprocessError as e:
|
| 47 |
+
print(f"Error running training: {e}")
|
| 48 |
+
except FileNotFoundError:
|
| 49 |
+
print("Error: main_train.py or config file not found")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Unexpected error: {e}")
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
# Set up argument parser
|
| 55 |
+
parser = argparse.ArgumentParser(description="Run training script with specified config")
|
| 56 |
+
parser.add_argument(
|
| 57 |
+
"--config",
|
| 58 |
+
type=str,
|
| 59 |
+
default="./configs/config_multi_videos.yaml",
|
| 60 |
+
help="Path to the config file"
|
| 61 |
+
)
|
| 62 |
+
parser.add_argument(
|
| 63 |
+
"--pytorch-cuda-alloc",
|
| 64 |
+
type=str,
|
| 65 |
+
default="expandable_segments:True",
|
| 66 |
+
help="Value for PYTORCH_CUDA_ALLOC_CONF environment variable"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Parse arguments
|
| 70 |
+
args = parser.parse_args()
|
| 71 |
+
|
| 72 |
+
# Run training with provided arguments
|
| 73 |
+
run_training(args.config, args.pytorch_cuda_alloc)
|
src/third_party/.gitkeep
ADDED
|
File without changes
|