Inpainting Stable Diffusion
Hi. I’m trying to change the background of a photo from a wall to a forest, but Stable Diffusion distorts the girl’s face very much. Can you look at my python code and tell me what I did wrong? I tried different models, but the result is still not very good.
import os
from dotenv import load_dotenv
import torch
import numpy as np
from diffusers import AutoPipelineForInpainting, EulerDiscreteScheduler, ControlNetModel, StableDiffusionControlNetInpaintPipeline
from PIL import Image, ImageOps
from diffusers.utils import load_image, make_image_grid
import rembg
load_dotenv()
#stable-diffusion-v1-5
SDV5_MODEL_PATH = os.getenv('SDV5_MODEL_PATH')
SAVE_PATH = os.getenv('SAVE_PATH')
#majicMIX_realistic_v6
MMIX_MODEL_PATH=os.getenv('MMIX_MODEL_PATH')
#control_v11p_sd15_openpose
CONTROLNET_MODEL_PATH=os.getenv('CONTROLNET_MODEL_PATH')
#ReV_Animated_Inpainting
REV_ANIMATED_MODEL_PATH=os.getenv('REV_ANIMATED_MODEL_PATH')
prompt = 'best quality, high definition masterpiece, trending on artstation, unrealistic volumetric fog on background, girl dancing in foggy forest'
negative_prompt = ' worst quality, low quality, normal quality, lowres, watermark, monochrome, light color, low resolution, ugly face'
num_of_img_per_prompt = 1
def binary_mask(init_image):
#init_image = load_image(url).resize((512, 768))
input_array = np.array(init_image)
mask_array = rembg.remove(input_array, only_mask=True)
mask_image = Image.fromarray(mask_array)
mask_image = ImageOps.invert(mask_image)
return mask_image
def make_inpaint_condition(init_img, mask):
init_image = np.array(init_img.convert("RGB")).astype(np.float32) / 255.0
mask_image = np.array(mask.convert("L")).astype(np.float32) / 255.0
init_image[mask_image > 0.5] = -1.0 # set as masked pixel
init_image = np.expand_dims(init_image, 0).transpose(0, 3, 1, 2)
init_image = torch.from_numpy(init_image)
return init_image
if name == 'main':
controlnet = ControlNetModel.from_pretrained(CONTROLNET_MODEL_PATH, torch_dtype=torch.float32)
scheduler = EulerDiscreteScheduler.from_pretrained(REV_ANIMATED_MODEL_PATH, subfolder="scheduler")
pipeline = StableDiffusionControlNetInpaintPipeline.from_pretrained(
MMIX_MODEL_PATH,
controlnet=controlnet,
torch_dtype=torch.float32
).to('cpu')
base_img = 'ddw.jpg'
init_image = load_image(base_img).resize((512, 768))
bin_mask=binary_mask(init_image)
control_image = make_inpaint_condition(init_image, bin_mask)
image = pipeline(
prompt=prompt,
scheduler=scheduler,
negative_prompt=negative_prompt,
width=512,
height=768,
num_inference_steps=150,
image=init_image,
mask_image=bin_mask,
control_image=control_image,
guidance_scale=45,
strength=1
).images[0]
image.save('test_img.jpg')