Rinnaから日本語対応のStable Diffusionが出たのでをGoogle Colab上で使ってみました。 コードとしては以下のような形です。bashのコードはJupyterから投げます。
pip install gradio
try:
from japanese_stable_diffusion import JapaneseStableDiffusionPipeline
except:
res = subprocess.run(['pip', 'install', 'git+https://github.com/rinnakk/japanese-stable-diffusion'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(res)
from japanese_stable_diffusion import JapaneseStableDiffusionPipeline
import torch
from torch import autocast
from diffusers import LMSDiscreteScheduler
from PIL import Image
from IPython import display
import gradio as gr
def make_grid_from_pils(pil_images):
w, h = pil_images[0].size
grid_img = Image.new("RGB", ((len(pil_images)) * w, h))
for idx, image in enumerate(pil_images):
grid_img.paste(image, (idx * w, 0))
return grid_img
from huggingface_hub import notebook_login
notebook_login()
model_id = "rinna/japanese-stable-diffusion"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Use the K-LMS scheduler here instead
scheduler = LMSDiscreteScheduler(
beta_start=0.00085, beta_end=0.012,
beta_schedule="scaled_linear",
num_train_timesteps=1000
)
pipe = JapaneseStableDiffusionPipeline.from_pretrained(
pretrained_model_name_or_path=model_id,
scheduler=scheduler,
torch_dtype=torch.float16,
use_auth_token=True
).to(device)
#@markdown ###**Inference Setting:**
# the number of output images. If you encounter Out Of Memory error, decrease this number.
n_samples = 1 #@param{type: 'integer'}
# `classifier-free guidance scale` adjusts how much the image will be like your prompt. Higher values keep your image closer to your prompt.
guidance_scale = 7.5 #@param {type:"number"}
# How many steps to spend generating (diffusing) your image.
steps = 50 #@param{type: 'integer'}
# The width of the generated image.
width = 512 #@param{type: 'integer'}
# The height of the generated image.
height = 512 #@param{type: 'integer'}
# The seed used to generate your image. Enable to manually set a seed.
seed = 'random' #@param{type: 'string'}
import torch
from torch import autocast
from diffusers import LMSDiscreteScheduler
from japanese_stable_diffusion import JapaneseStableDiffusionPipeline
model_id = "rinna/japanese-stable-diffusion"
device = "cuda"
# Use the K-LMS scheduler here instead
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
pipe = JapaneseStableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, use_auth_token=True)
pipe = pipe.to(device)
prompt = "富士山をバックに二大スーパーロボットががっちりと握手"
with autocast("cuda"):
image = pipe(prompt, guidance_scale=7.5)["sample"][0]
image.save("output.png")
image
“富士山をバックに二大スーパーロボットががっちりと握手"から画像を作成し、以下のような画像になります。