RinnaStableDiffusion

Rinnaから日本語対応のStable Diffusionが出たのでをGoogle Colab上で使ってみました。コードとしては以下のような形です。bashのコードはJupyterから投げます。

pip install gradio

try:
    from japanese_stable_diffusion import JapaneseStableDiffusionPipeline
except:
    res = subprocess.run(['pip', 'install', 'git+https://github.com/rinnakk/japanese-stable-diffusion'], stdout=subprocess.PIPE).stdout.decode('utf-8')
    print(res)
    from japanese_stable_diffusion import JapaneseStableDiffusionPipeline
import torch
from torch import autocast
from diffusers import LMSDiscreteScheduler
from PIL import Image
from IPython import display
import gradio as gr

def make_grid_from_pils(pil_images):
    w, h = pil_images[0].size
    grid_img = Image.new("RGB", ((len(pil_images)) * w, h))
    for idx, image in enumerate(pil_images):
        grid_img.paste(image, (idx * w, 0))
    return grid_img

from huggingface_hub import notebook_login

notebook_login()

model_id = "rinna/japanese-stable-diffusion"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Use the K-LMS scheduler here instead
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085, beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000
)
pipe = JapaneseStableDiffusionPipeline.from_pretrained(
    pretrained_model_name_or_path=model_id,
    scheduler=scheduler,
    torch_dtype=torch.float16,
    use_auth_token=True
).to(device)

#@markdown ###**Inference Setting:**
# the number of output images. If you encounter Out Of Memory error, decrease this number.
n_samples = 1 #@param{type: 'integer'}
# `classifier-free guidance scale` adjusts how much the image will be like your prompt. Higher values keep your image closer to your prompt.
guidance_scale = 7.5 #@param {type:"number"}
# How many steps to spend generating (diffusing) your image.
steps = 50 #@param{type: 'integer'}
# The width of the generated image.
width = 512 #@param{type: 'integer'}
# The height of the generated image.
height = 512 #@param{type: 'integer'}
# The seed used to generate your image. Enable to manually set a seed.
seed = 'random' #@param{type: 'string'}

import torch
from torch import autocast
from diffusers import LMSDiscreteScheduler
from japanese_stable_diffusion import JapaneseStableDiffusionPipeline

model_id = "rinna/japanese-stable-diffusion"
device = "cuda"
# Use the K-LMS scheduler here instead
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
pipe = JapaneseStableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, use_auth_token=True)
pipe = pipe.to(device)

prompt = "富士山をバックに二大スーパーロボットががっちりと握手"
with autocast("cuda"):
    image = pipe(prompt, guidance_scale=7.5)["sample"][0]

image.save("output.png")

image

“富士山をバックに二大スーパーロボットががっちりと握手"から画像を作成し、以下のような画像になります。

galaxy