Update README.md
Browse files
README.md
CHANGED
|
@@ -18,87 +18,137 @@ Here is a code snippet to use the code.
|
|
| 18 |
|
| 19 |
|
| 20 |
```python
|
|
|
|
|
|
|
|
|
|
| 21 |
import torch
|
| 22 |
from diffusers import FluxFillPipeline
|
| 23 |
from diffusers.utils import load_image
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
kernel = np.ones((3, 3), np.uint8)
|
| 47 |
-
mask_np= np.array(mask)
|
| 48 |
-
h, w, c = np.shape(foreground)
|
| 49 |
-
|
| 50 |
-
#print(h,w,c)
|
| 51 |
-
# Add random Gaussian noise
|
| 52 |
-
noise = np.random.rand(h, w)*255
|
| 53 |
-
noise = np.array(noise, dtype=np.uint8)
|
| 54 |
-
if(blur):
|
| 55 |
-
noise = cv2.GaussianBlur(noise, (5,5), 0)
|
| 56 |
-
noise = np.stack([noise, noise, noise], -1)
|
| 57 |
-
|
| 58 |
-
if(isinstance(foreground,PIL.Image.Image)):
|
| 59 |
-
foreground = np.array(foreground)
|
| 60 |
-
|
| 61 |
-
black_image = Image.fromarray(np.zeros_like(foreground))
|
| 62 |
-
background = np.array(black_image)
|
| 63 |
-
|
| 64 |
-
dilated_mask = np.array(cv2.dilate(np.array(mask), kernel, iterations=10))
|
| 65 |
-
center = (np.shape(foreground)[1]//2,np.shape(foreground)[0]//2)
|
| 66 |
-
|
| 67 |
-
black_image = cv2.seamlessClone(foreground, background, dilated_mask, center, cv2.MIXED_CLONE)
|
| 68 |
-
#black_image = cv2.seamlessClone(foreground, background, dilated_mask, center, cv2.NORMAL_CLONE)
|
| 69 |
-
|
| 70 |
-
noisy_background = np.array(alpha*np.array(black_image) + (1-alpha)*noise, dtype=np.uint8)
|
| 71 |
-
if(np.max(mask_np)>1.0):
|
| 72 |
-
mask_np = mask_np/255.0
|
| 73 |
-
if(mask_np.shape[-1]!=3):
|
| 74 |
-
mask_np = np.stack([mask_np]*3,-1)
|
| 75 |
-
|
| 76 |
-
masked_image = Image.fromarray(np.array((1 - mask_np) * foreground + mask_np * noisy_background, dtype=np.uint8))
|
| 77 |
-
|
| 78 |
-
return masked_image
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
image = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup.png")
|
| 82 |
-
mask = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup_mask.png")
|
| 83 |
-
|
| 84 |
-
fg_mask, alpha, input_img = self.get_mask_and_image()
|
| 85 |
-
|
| 86 |
-
masked_image = self.prepare_masked_image(foreground=input_img_resized, mask=fg_mask_resized)
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
image.save(f"flux-fill-dev.png")
|
| 102 |
```
|
| 103 |
|
| 104 |
To learn more check out the [diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) documentation
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
```python
|
| 21 |
+
import numpy as np
|
| 22 |
+
import cv2
|
| 23 |
+
from PIL import Image
|
| 24 |
import torch
|
| 25 |
from diffusers import FluxFillPipeline
|
| 26 |
from diffusers.utils import load_image
|
| 27 |
+
from typing import Union
|
| 28 |
+
|
| 29 |
+
def prepare_masked_image(
|
| 30 |
+
foreground: Union[Image.Image, np.ndarray],
|
| 31 |
+
mask: Union[Image.Image, np.ndarray],
|
| 32 |
+
alpha: float = 0.001,
|
| 33 |
+
blur: bool = True
|
| 34 |
+
) -> Image.Image:
|
| 35 |
+
"""
|
| 36 |
+
Combines the foreground and mask to produce a masked image with noise in the masked region.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
foreground (PIL.Image.Image or np.ndarray): The input image to be inpainted.
|
| 40 |
+
mask (PIL.Image.Image or np.ndarray): A binary mask (0 or 255) indicating the foreground region.
|
| 41 |
+
alpha (float): Blending factor for noise. Lower alpha → more noise in the masked area.
|
| 42 |
+
blur (bool): Whether to blur the randomly generated noise.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
PIL.Image.Image: The resulting masked image with noise in the masked area.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
# Ensure foreground is an ndarray
|
| 49 |
+
if isinstance(foreground, Image.Image):
|
| 50 |
+
foreground_np = np.array(foreground)
|
| 51 |
+
else:
|
| 52 |
+
foreground_np = foreground # assume already a NumPy array
|
| 53 |
+
|
| 54 |
+
# Ensure mask is a NumPy array and single-channel
|
| 55 |
+
if isinstance(mask, Image.Image):
|
| 56 |
+
mask_np = np.array(mask.convert("L")) # convert to grayscale
|
| 57 |
+
else:
|
| 58 |
+
mask_np = mask
|
| 59 |
+
if mask_np.ndim == 3:
|
| 60 |
+
mask_np = cv2.cvtColor(mask_np, cv2.COLOR_BGR2GRAY)
|
| 61 |
+
|
| 62 |
+
h, w, c = foreground_np.shape # height, width, channels
|
| 63 |
+
|
| 64 |
+
# Create 3×3 kernel for dilation (used later)
|
| 65 |
kernel = np.ones((3, 3), np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
# Generate random Gaussian noise
|
| 68 |
+
noise = np.random.rand(h, w) * 255
|
| 69 |
+
noise = noise.astype(np.uint8)
|
| 70 |
+
if blur:
|
| 71 |
+
noise = cv2.GaussianBlur(noise, (5, 5), 0)
|
| 72 |
+
# Stack to 3 channels
|
| 73 |
+
noise_rgb = np.stack([noise, noise, noise], axis=-1)
|
| 74 |
+
|
| 75 |
+
# Prepare a black background
|
| 76 |
+
black_bg = np.zeros_like(foreground_np, dtype=np.uint8)
|
| 77 |
+
|
| 78 |
+
# Dilate the mask to get smoother boundaries for seamlessClone
|
| 79 |
+
dilated_mask = cv2.dilate(mask_np, kernel, iterations=10)
|
| 80 |
+
|
| 81 |
+
# Compute center for seamlessClone (center of the image)
|
| 82 |
+
center = (w // 2, h // 2)
|
| 83 |
+
|
| 84 |
+
# Use mixed clone to merge the foreground onto a black background, using the dilated mask
|
| 85 |
+
cloned = cv2.seamlessClone(
|
| 86 |
+
src=foreground_np,
|
| 87 |
+
dst=black_bg,
|
| 88 |
+
mask=dilated_mask,
|
| 89 |
+
p=center,
|
| 90 |
+
flags=cv2.MIXED_CLONE
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Blend cloned result (mostly black except where mask is) with noise
|
| 94 |
+
noisy_bg = (alpha * cloned + (1 - alpha) * noise_rgb).astype(np.uint8)
|
| 95 |
+
|
| 96 |
+
# Normalize mask to [0,1] float if it’s in [0,255]
|
| 97 |
+
|
| 98 |
+
if mask_np.max() > 1:
|
| 99 |
+
mask_norm = (mask_np / 255.0).astype(np.float32)
|
| 100 |
+
else:
|
| 101 |
+
mask_norm = mask_np.astype(np.float32)
|
| 102 |
+
|
| 103 |
+
# Expand mask to 3 channels if needed
|
| 104 |
+
if mask_norm.ndim == 2:
|
| 105 |
+
mask_norm = np.stack([mask_norm] * 3, axis=-1)
|
| 106 |
+
|
| 107 |
+
# Combine: keep original pixels where mask=0, use noisy_bg where mask=1
|
| 108 |
+
combined = ((1 - mask_norm) * foreground_np + mask_norm * noisy_bg).astype(np.uint8)
|
| 109 |
+
|
| 110 |
+
return Image.fromarray(combined)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def main():
|
| 114 |
+
"""Entry point for running the FluxFill pipeline."""
|
| 115 |
+
# Load input image and its corresponding mask
|
| 116 |
+
input_image = load_image(
|
| 117 |
+
"https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup.png"
|
| 118 |
+
)
|
| 119 |
+
fg_mask = load_image(
|
| 120 |
+
"https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup_mask.png"
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
masked_image = prepare_masked_image(foreground=input_image, mask=fg_mask)
|
| 124 |
+
|
| 125 |
+
# Initialize the FluxFill pipeline
|
| 126 |
+
pipe = FluxFillPipeline.from_pretrained(
|
| 127 |
+
"black-forest-labs/FLUX.1-Fill-dev",
|
| 128 |
+
torch_dtype=torch.bfloat16
|
| 129 |
+
).to("cuda")
|
| 130 |
+
|
| 131 |
+
# Run the pipeline
|
| 132 |
+
output = pipe(
|
| 133 |
+
prompt="a white paper cup",
|
| 134 |
+
image=masked_image,
|
| 135 |
+
mask_image=fg_mask,
|
| 136 |
+
height=1024,
|
| 137 |
+
width=1024,
|
| 138 |
+
guidance_scale=30,
|
| 139 |
+
num_inference_steps=50,
|
| 140 |
+
max_sequence_length=512,
|
| 141 |
+
generator=torch.Generator(device="cpu").manual_seed(0)
|
| 142 |
+
).images[0]
|
| 143 |
+
|
| 144 |
+
# Save the resulting image
|
| 145 |
+
output.save("flux-fill-dev.png")
|
| 146 |
+
print("Saved output to flux-fill-dev.png")
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if __name__ == "__main__":
|
| 150 |
+
main()
|
| 151 |
|
|
|
|
| 152 |
```
|
| 153 |
|
| 154 |
To learn more check out the [diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) documentation
|