rkv1990
/

FLUX.1-Fill-dev-outpainting

@@ -18,87 +18,137 @@ Here is a code snippet to use the code.
 ```python
 import torch
 from diffusers import FluxFillPipeline
 from diffusers.utils import load_image
-def get_mask_and_image(self, original_image, model_w=1024, model_h=1024):
-    orig_h, orig_w = original_image.size[0:2]
-    pil_image = original_image
-    np_image = np.asarray(pil_image)
-    np_input_image = np_image[:, :, :3]
-    np_input_mask = np_image[:, :, 3]
-    pure_fg_image = np.uint8(np_input_image)
-    np_input_mask = 255 - np_input_mask
-    alpha = 1 - (np.array(np_input_mask) / 255)
-    alpha = np.stack([alpha, alpha, alpha], -1)
-    input_image = Image.fromarray(pure_fg_image)
-    kernel = np.ones((3,3))
-    np_input_mask = cv2.erode(np_input_mask, kernel, iterations=1)
-    input_mask = Image.fromarray(np_input_mask)
-    return input_mask, alpha, input_image
-def prepare_masked_image(self, foreground, mask, alpha=0.001, blur=True):
-    # Creating kernel
     kernel = np.ones((3, 3), np.uint8)
-    mask_np= np.array(mask)
-    h, w, c = np.shape(foreground)
-    #print(h,w,c)
-    # Add random Gaussian noise
-    noise = np.random.rand(h, w)*255
-    noise = np.array(noise, dtype=np.uint8)
-    if(blur):
-        noise = cv2.GaussianBlur(noise, (5,5), 0)
-    noise = np.stack([noise, noise, noise], -1)
-    if(isinstance(foreground,PIL.Image.Image)):
-        foreground = np.array(foreground)
-    black_image = Image.fromarray(np.zeros_like(foreground))
-    background = np.array(black_image)
-    dilated_mask = np.array(cv2.dilate(np.array(mask), kernel, iterations=10))
-    center = (np.shape(foreground)[1]//2,np.shape(foreground)[0]//2)
-    black_image = cv2.seamlessClone(foreground, background, dilated_mask, center, cv2.MIXED_CLONE)
-    #black_image = cv2.seamlessClone(foreground, background, dilated_mask, center, cv2.NORMAL_CLONE)
-    noisy_background = np.array(alpha*np.array(black_image) + (1-alpha)*noise, dtype=np.uint8)
-    if(np.max(mask_np)>1.0):
-        mask_np = mask_np/255.0
-    if(mask_np.shape[-1]!=3):
-        mask_np = np.stack([mask_np]*3,-1)
-    masked_image = Image.fromarray(np.array((1 - mask_np) * foreground + mask_np * noisy_background, dtype=np.uint8))
-    return masked_image
-image = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup.png")
-mask = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup_mask.png")
-fg_mask, alpha, input_img = self.get_mask_and_image()
-masked_image = self.prepare_masked_image(foreground=input_img_resized, mask=fg_mask_resized)
-pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16).to("cuda")
-image = pipe(
-    prompt="a white paper cup",
-    image=masked_image,
-    mask_image=mask,
-    height=1632,
-    width=1232,
-    guidance_scale=30,
-    num_inference_steps=50,
-    max_sequence_length=512,
-    generator=torch.Generator("cpu").manual_seed(0)
-).images[0]
-image.save(f"flux-fill-dev.png")
 ```
 To learn more check out the [diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) documentation

 ```python
+import numpy as np
+import cv2
+from PIL import Image
 import torch
 from diffusers import FluxFillPipeline
 from diffusers.utils import load_image
+from typing import Union
+def prepare_masked_image(
+    foreground: Union[Image.Image, np.ndarray],
+    mask: Union[Image.Image, np.ndarray],
+    alpha: float = 0.001,
+    blur: bool = True
+) -> Image.Image:
+    """
+    Combines the foreground and mask to produce a masked image with noise in the masked region.
+    Args:
+        foreground (PIL.Image.Image or np.ndarray): The input image to be inpainted.
+        mask (PIL.Image.Image or np.ndarray): A binary mask (0 or 255) indicating the foreground region.
+        alpha (float): Blending factor for noise. Lower alpha → more noise in the masked area.
+        blur (bool): Whether to blur the randomly generated noise.
+    Returns:
+        PIL.Image.Image: The resulting masked image with noise in the masked area.
+    """
+    # Ensure foreground is an ndarray
+    if isinstance(foreground, Image.Image):
+        foreground_np = np.array(foreground)
+    else:
+        foreground_np = foreground  # assume already a NumPy array
+    # Ensure mask is a NumPy array and single-channel
+    if isinstance(mask, Image.Image):
+        mask_np = np.array(mask.convert("L"))  # convert to grayscale
+    else:
+        mask_np = mask
+        if mask_np.ndim == 3:
+            mask_np = cv2.cvtColor(mask_np, cv2.COLOR_BGR2GRAY)
+    h, w, c = foreground_np.shape  # height, width, channels
+    # Create 3×3 kernel for dilation (used later)
     kernel = np.ones((3, 3), np.uint8)
+    # Generate random Gaussian noise
+    noise = np.random.rand(h, w) * 255
+    noise = noise.astype(np.uint8)
+    if blur:
+        noise = cv2.GaussianBlur(noise, (5, 5), 0)
+    # Stack to 3 channels
+    noise_rgb = np.stack([noise, noise, noise], axis=-1)
+    # Prepare a black background
+    black_bg = np.zeros_like(foreground_np, dtype=np.uint8)
+    # Dilate the mask to get smoother boundaries for seamlessClone
+    dilated_mask = cv2.dilate(mask_np, kernel, iterations=10)
+    # Compute center for seamlessClone (center of the image)
+    center = (w // 2, h // 2)
+    # Use mixed clone to merge the foreground onto a black background, using the dilated mask
+    cloned = cv2.seamlessClone(
+        src=foreground_np,
+        dst=black_bg,
+        mask=dilated_mask,
+        p=center,
+        flags=cv2.MIXED_CLONE
+    )
+    # Blend cloned result (mostly black except where mask is) with noise
+    noisy_bg = (alpha * cloned + (1 - alpha) * noise_rgb).astype(np.uint8)
+    # Normalize mask to [0,1] float if it’s in [0,255]
+    if mask_np.max() > 1:
+        mask_norm = (mask_np / 255.0).astype(np.float32)
+    else:
+        mask_norm = mask_np.astype(np.float32)
+    # Expand mask to 3 channels if needed
+    if mask_norm.ndim == 2:
+        mask_norm = np.stack([mask_norm] * 3, axis=-1)
+    # Combine: keep original pixels where mask=0, use noisy_bg where mask=1
+    combined = ((1 - mask_norm) * foreground_np + mask_norm * noisy_bg).astype(np.uint8)
+    return Image.fromarray(combined)
+def main():
+    """Entry point for running the FluxFill pipeline."""
+    # Load input image and its corresponding mask
+    input_image = load_image(
+        "https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup.png"
+    )
+    fg_mask = load_image(
+        "https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup_mask.png"
+    )
+    masked_image = prepare_masked_image(foreground=input_image, mask=fg_mask)
+    # Initialize the FluxFill pipeline
+    pipe = FluxFillPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-Fill-dev",
+        torch_dtype=torch.bfloat16
+    ).to("cuda")
+    # Run the pipeline
+    output = pipe(
+        prompt="a white paper cup",
+        image=masked_image,
+        mask_image=fg_mask,
+        height=1024,
+        width=1024,
+        guidance_scale=30,
+        num_inference_steps=50,
+        max_sequence_length=512,
+        generator=torch.Generator(device="cpu").manual_seed(0)
+    ).images[0]
+    # Save the resulting image
+    output.save("flux-fill-dev.png")
+    print("Saved output to flux-fill-dev.png")
+if __name__ == "__main__":
+    main()
 ```
 To learn more check out the [diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) documentation