{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup Working directory" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "if os.path.basename(os.getcwd()) == \"notebooks\":\n", " os.chdir(\"../\")" ] }, { "cell_type": "markdown", "metadata": { "id": "S8sD1GQtjigf" }, "source": [ "# Install Dependencies" ] }, { "cell_type": "markdown", "metadata": { "id": "K0qLae83jqIA" }, "source": [ "## Download Needed Libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:32:48.672386Z", "iopub.status.busy": "2025-04-18T08:32:48.672068Z", "iopub.status.idle": "2025-04-18T08:34:02.102973Z", "shell.execute_reply": "2025-04-18T08:34:02.102076Z", "shell.execute_reply.started": "2025-04-18T08:32:48.672360Z" }, "id": "fB4kRKLnjL10", "outputId": "0586e0c6-1b95-464b-81dd-547b9a8779b1", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.1/76.1 MB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "pylibcugraph-cu12 24.12.0 requires pylibraft-cu12==24.12.*, but you have pylibraft-cu12 25.2.0 which is incompatible.\n", "pylibcugraph-cu12 24.12.0 requires rmm-cu12==24.12.*, but you have rmm-cu12 25.2.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "!pip install -q huggingface-hub\n", "!pip install -q bitsandbytes" ] }, { "cell_type": "markdown", "metadata": { "id": "BQaa2oX9jpqI" }, "source": [ "## Clone `diffusers` and Install it" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:34:02.821808Z", "iopub.status.busy": "2025-04-18T08:34:02.821531Z", "iopub.status.idle": "2025-04-18T08:34:22.083491Z", "shell.execute_reply": "2025-04-18T08:34:22.082799Z", "shell.execute_reply.started": "2025-04-18T08:34:02.821785Z" }, "id": "9UsqoRLUjL12", "outputId": "30da9cfd-6004-4745-a26b-28fd693bad11", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cloning into 'diffusers'...\n", "remote: Enumerating objects: 88432, done.\u001b[K\n", "remote: Counting objects: 100% (471/471), done.\u001b[K\n", "remote: Compressing objects: 100% (250/250), done.\u001b[K\n", "remote: Total 88432 (delta 358), reused 226 (delta 219), pack-reused 87961 (from 4)\u001b[K\n", "Receiving objects: 100% (88432/88432), 64.20 MiB | 28.54 MiB/s, done.\n", "Resolving deltas: 100% (64971/64971), done.\n", "/kaggle/working/diffusers\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for diffusers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" ] } ], "source": [ "!git clone https://github.com/huggingface/diffusers\n", "%cd diffusers\n", "!pip install -q ." ] }, { "cell_type": "markdown", "metadata": { "id": "NtQvzfAIkCxq" }, "source": [ "## Install `requirements` in `diffusers/examples/text_to_image`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:34:22.085835Z", "iopub.status.busy": "2025-04-18T08:34:22.085612Z", "iopub.status.idle": "2025-04-18T08:34:26.465524Z", "shell.execute_reply": "2025-04-18T08:34:26.464817Z", "shell.execute_reply.started": "2025-04-18T08:34:22.085817Z" }, "id": "t-vKdrGjjL13", "outputId": "d033d72a-b9d4-4b37-b97a-a944ab5f797d", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/kaggle/working\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.8/44.8 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.12.0 which is incompatible.\n", "bigframes 1.36.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "%cd ..\n", "!pip install -q -r diffusers/examples/text_to_image/requirements.txt" ] }, { "cell_type": "markdown", "metadata": { "id": "6QvnezZPlt06" }, "source": [ "## Download an Convert Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "F40wja-9nXQq" }, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from PIL import Image\n", "import io\n", "import shutil\n", "import json\n", "from huggingface_hub import snapshot_download\n", "\n", "# Download the dataset to the specified local directory\n", "snapshot_download(\n", " repo_id=\"uwunish/ghibli-dataset\",\n", " repo_type=\"dataset\",\n", " local_dir=\"data\",\n", " local_dir_use_symlinks=False\n", ")\n", "\n", "# Define paths\n", "dataset_root = \"data\"\n", "parquet_path = os.path.join(dataset_root, \"data/train-00000-of-00001.parquet\")\n", "output_dir = dataset_root\n", "\n", "# Ensure the output directory exists\n", "os.makedirs(output_dir, exist_ok=True)\n", "\n", "# Read the Parquet file\n", "df = pd.read_parquet(parquet_path)\n", "\n", "# Check the columns in the Parquet file\n", "print(\"Columns in Parquet file:\", df.columns)\n", "\n", "# Assume columns are 'image' (bytes) and 'text' (caption)\n", "# Adjust column names if different (e.g., 'caption' instead of 'text')\n", "image_column = \"image\" # Column containing image bytes\n", "text_column = \"text\" # Column containing captions\n", "\n", "# Verify that the expected columns exist\n", "if image_column not in df.columns or text_column not in df.columns:\n", " raise ValueError(f\"Expected columns '{image_column}' and '{text_column}' not found in Parquet file. Available columns: {df.columns}\")\n", "\n", "# Open metadata.jsonl for writing\n", "metadata_path = os.path.join(output_dir, \"metadata.jsonl\")\n", "with open(metadata_path, \"w\") as f:\n", " # Process each row in the Parquet file\n", " for idx, row in df.iterrows():\n", " # Extract image bytes and caption\n", " image_bytes = row[image_column]['bytes'] # Access the 'bytes' key to get the actual bytes\n", " caption = row[text_column]\n", "\n", " # Convert image bytes to an image and save it\n", " image = Image.open(io.BytesIO(image_bytes))\n", " image_filename = f\"image_{idx:05d}.png\" # e.g., image_00000.png\n", " image_path = os.path.join(output_dir, image_filename)\n", " image.save(image_path, format=\"PNG\")\n", "\n", " # Write metadata entry\n", " metadata_entry = {\"file_name\": image_filename, \"text\": caption}\n", " f.write(json.dumps(metadata_entry) + \"\\n\")\n", "\n", "# Clean up: Remove the 'dataset' folder and any other unnecessary files\n", "shutil.rmtree(os.path.join(dataset_root, \"data\"))\n", "readme_path = os.path.join(dataset_root, \"README.md\")\n", "if os.path.exists(readme_path):\n", " os.remove(readme_path)" ] }, { "cell_type": "markdown", "metadata": { "id": "iGHceo0vkJlY" }, "source": [ "# Configure `accelerate`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:34:26.466623Z", "iopub.status.busy": "2025-04-18T08:34:26.466401Z", "iopub.status.idle": "2025-04-18T08:34:40.369472Z", "shell.execute_reply": "2025-04-18T08:34:40.368729Z", "shell.execute_reply.started": "2025-04-18T08:34:26.466601Z" }, "id": "kQCmxtu1jL13", "outputId": "929e8720-a737-49b4-eb39-7da44e447334", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml\n" ] } ], "source": [ "!accelerate config default" ] }, { "cell_type": "markdown", "metadata": { "id": "rl1xuxOtkhog" }, "source": [ "# Import Libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:35:28.565167Z", "iopub.status.busy": "2025-04-18T08:35:28.564426Z", "iopub.status.idle": "2025-04-18T08:35:46.203125Z", "shell.execute_reply": "2025-04-18T08:35:46.202459Z", "shell.execute_reply.started": "2025-04-18T08:35:28.565141Z" }, "id": "uioMpe73jL15", "trusted": true }, "outputs": [], "source": [ "import huggingface_hub\n", "import accelerate\n", "import bitsandbytes\n", "\n", "import torch\n", "import PIL, transformers, diffusers\n", "from PIL import Image\n", "import numpy as np\n", "from transformers import CLIPTextModel, CLIPTokenizer\n", "from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler\n", "from diffusers import LMSDiscreteScheduler" ] }, { "cell_type": "markdown", "metadata": { "id": "gKLO2Tsmnjdq" }, "source": [ "# Training" ] }, { "cell_type": "markdown", "metadata": { "id": "YshjAKbVnm5y" }, "source": [ "## Using a Local Converted Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Svgqw6gFnqdJ" }, "outputs": [], "source": [ "# !accelerate launch diffusers/examples/text_to_image/train_text_to_image.py \\\n", "# --pretrained_model_name_or_path=\"stabilityai/stable-diffusion-2-1-base\" \\\n", "# --train_data_dir=\"./data\" \\\n", "# --use_ema \\\n", "# --use_8bit_adam \\\n", "# --mixed_precision=\"fp16\" \\\n", "# --resolution=64 --center_crop \\\n", "# --random_flip \\\n", "# --train_batch_size=1 \\\n", "# --gradient_accumulation_steps=1 \\\n", "# --gradient_checkpointing \\\n", "# --num_train_epochs=40 \\\n", "# --learning_rate=1e-05 \\\n", "# --max_grad_norm=1 \\\n", "# --lr_scheduler=\"constant\" --lr_warmup_steps=0 \\\n", "# --output_dir=\"ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning\" \\\n", "# --checkpoints_total_limit=1 \\\n", "# --caption_column=\"text\"" ] }, { "cell_type": "markdown", "metadata": { "id": "ZUc2aP1Mnpv4" }, "source": [ "## Using a Hugging Face Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-04-18T08:46:27.497664Z", "iopub.status.busy": "2025-04-18T08:46:27.496795Z", "iopub.status.idle": "2025-04-18T20:18:49.033041Z", "shell.execute_reply": "2025-04-18T20:18:49.032068Z", "shell.execute_reply.started": "2025-04-18T08:46:27.497632Z" }, "id": "NP4d-Z2vjL17", "outputId": "4c30b43d-6ec7-411e-c4cf-54b734bf1ee6", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2025-04-18 08:46:38.428942: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1744965998.451775 160 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1744965998.458527 160 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "scheduler_config.json: 100%|███████████████████| 346/346 [00:00<00:00, 2.52MB/s]\n", "{'timestep_spacing', 'thresholding', 'variance_type', 'clip_sample_range', 'dynamic_thresholding_ratio', 'sample_max_value', 'rescale_betas_zero_snr'} was not found in config. Values will be initialized to default values.\n", "tokenizer_config.json: 100%|███████████████████| 807/807 [00:00<00:00, 5.44MB/s]\n", "vocab.json: 100%|██████████████████████████| 1.06M/1.06M [00:00<00:00, 5.56MB/s]\n", "merges.txt: 100%|█████████████████████████████| 525k/525k [00:00<00:00, 110MB/s]\n", "special_tokens_map.json: 100%|█████████████████| 460/460 [00:00<00:00, 3.80MB/s]\n", "config.json: 100%|█████████████████████████████| 613/613 [00:00<00:00, 4.99MB/s]\n", "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n", "model.safetensors: 100%|████████████████████| 1.36G/1.36G [00:04<00:00, 317MB/s]\n", "config.json: 100%|█████████████████████████████| 553/553 [00:00<00:00, 3.27MB/s]\n", "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n", "diffusion_pytorch_model.safetensors: 100%|████| 335M/335M [00:01<00:00, 322MB/s]\n", "{'shift_factor', 'latents_mean', 'mid_block_add_attention', 'use_post_quant_conv', 'scaling_factor', 'latents_std', 'force_upcast', 'use_quant_conv'} was not found in config. Values will be initialized to default values.\n", "All model checkpoint weights were used when initializing AutoencoderKL.\n", "\n", "All the weights of AutoencoderKL were initialized from the model checkpoint at stabilityai/stable-diffusion-2-1-base.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use AutoencoderKL for predictions without further training.\n", "config.json: 100%|█████████████████████████████| 911/911 [00:00<00:00, 6.77MB/s]\n", "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n", "diffusion_pytorch_model.safetensors: 100%|██| 3.46G/3.46G [00:12<00:00, 275MB/s]\n", "{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "All model checkpoint weights were used when initializing UNet2DConditionModel.\n", "\n", "All the weights of UNet2DConditionModel were initialized from the model checkpoint at stabilityai/stable-diffusion-2-1-base.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use UNet2DConditionModel for predictions without further training.\n", "{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "All model checkpoint weights were used when initializing UNet2DConditionModel.\n", "\n", "All the weights of UNet2DConditionModel were initialized from the model checkpoint at stabilityai/stable-diffusion-2-1-base.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use UNet2DConditionModel for predictions without further training.\n", "README.md: 100%|███████████████████████████████| 316/316 [00:00<00:00, 2.40MB/s]\n", "train-00000-of-00001.parquet: 100%|███████████| 257M/257M [00:00<00:00, 264MB/s]\n", "Generating train split: 100%|████████| 913/913 [00:00<00:00, 1119.82 examples/s]\n", "Steps: 5%|▏ | 500/9160 [37:12<10:45:19, 4.47s/it, lr=1e-5, step_loss=0.0502]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 11%| | 1000/9160 [1:14:54<10:08:24, 4.47s/it, lr=1e-5, step_loss=0.0773{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 16%|▎ | 1500/9160 [1:52:38<9:30:57, 4.47s/it, lr=1e-5, step_loss=0.117]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-1500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 22%|▏| 2000/9160 [2:30:21<8:53:48, 4.47s/it, lr=1e-5, step_loss=0.00651{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 27%|▌ | 2500/9160 [3:08:04<8:17:29, 4.48s/it, lr=1e-5, step_loss=0.118]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-2500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 33%|▋ | 3000/9160 [3:45:46<7:39:18, 4.47s/it, lr=1e-5, step_loss=0.252]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 38%|▊ | 3500/9160 [4:23:29<7:01:56, 4.47s/it, lr=1e-5, step_loss=0.128]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-3500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 44%|▊ | 4000/9160 [5:01:24<6:28:16, 4.51s/it, lr=1e-5, step_loss=0.133]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 49%|▉ | 4500/9160 [5:39:07<5:47:25, 4.47s/it, lr=1e-5, step_loss=0.276]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-4500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 55%|█ | 5000/9160 [6:16:50<5:09:53, 4.47s/it, lr=1e-5, step_loss=0.242]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 60%|█▏| 5500/9160 [6:54:30<4:21:16, 4.28s/it, lr=1e-5, step_loss=0.153]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-5500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 66%|█▎| 6000/9160 [7:32:12<3:56:14, 4.49s/it, lr=1e-5, step_loss=0.047]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 71%|▋| 6500/9160 [8:09:57<3:18:10, 4.47s/it, lr=1e-5, step_loss=0.0137]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-6500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 76%|▊| 7000/9160 [8:47:40<2:41:00, 4.47s/it, lr=1e-5, step_loss=0.00949{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 82%|█▋| 7500/9160 [9:25:24<2:03:52, 4.48s/it, lr=1e-5, step_loss=0.335]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-7500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 87%|▊| 8000/9160 [10:03:13<1:26:30, 4.47s/it, lr=1e-5, step_loss=0.011]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 93%|██▊| 8500/9160 [10:40:56<49:10, 4.47s/it, lr=1e-5, step_loss=0.112]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8500/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8500/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8500/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-8500/unet/diffusion_pytorch_model.safetensors\n", "Steps: 98%|▉| 9000/9160 [11:18:42<11:55, 4.47s/it, lr=1e-5, step_loss=0.00467]{'class_embeddings_concat', 'upcast_attention', 'conv_out_kernel', 'time_embedding_type', 'encoder_hid_dim', 'cross_attention_norm', 'reverse_transformer_layers_per_block', 'num_attention_heads', 'dropout', 'attention_type', 'transformer_layers_per_block', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'addition_time_embed_dim', 'time_cond_proj_dim', 'class_embed_type', 'conv_in_kernel', 'resnet_time_scale_shift', 'timestep_post_act', 'time_embedding_dim', 'resnet_skip_time_act', 'mid_block_only_cross_attention', 'resnet_out_scale_factor', 'mid_block_type', 'addition_embed_type_num_heads', 'time_embedding_act_fn', 'encoder_hid_dim_type'} was not found in config. Values will be initialized to default values.\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-9000/unet_ema/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-9000/unet_ema/diffusion_pytorch_model.safetensors\n", "Configuration saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-9000/unet/config.json\n", "Model weights saved in ckpts/Ghibli-Stable-Diffusion-2.1-Base-finetuning/checkpoint-9000/unet/diffusion_pytorch_model.safetensors\n", "Steps: 100%|██| 9160/9160 [11:31:07<00:00, 3.68s/it, lr=1e-5, step_loss=0.0345]\n", "model_index.json: 100%|████████████████████████| 543/543 [00:00<00:00, 3.06MB/s]\u001b[A\n", "\n", "Fetching 7 files: 0%| | 0/7 [00:00" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pil_image" ] } ], "metadata": { "colab": { "provenance": [] }, "kaggle": { "accelerator": "gpu", "dataSources": [ { "sourceId": 234719053, "sourceType": "kernelVersion" } ], "dockerImageVersionId": 31011, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 0 }