!pip install diffusers transformers accelerate
from diffusers import DiffusionPipeline savename = 'drive/MyDrive/mj1.png' id = 'midjourney-community/midjourney-mini' device = 'cuda' pipe = DiffusionPipeline.from_pretrained(id) pipe = pipe.to(device) prompt = 'rabbit, Studio Ghibli --ar 16:9' image = pipe(prompt).images[0] image.save(savename)
アップロードするファイルは こちらから ダウンロードできます。
[ { "times": 3, "prompt": "good quality photography of a singular red apple on simple background for e-commerce site --stop 75" }, { "times": 1, "prompts": [ "Detailing oil painting of The great white castle on deep forest landscape ", "by CASPAR DAVID FRIEDRICH and CLAUDE LORRAIN,perfect lighting, ", "golden hour, taken with Canon 5D Mk4 --ar 16:9" ] }, { "times": 2, "prompt": "(Vintage Victorian Design Style, Elsa Beskow, Colored Pencils and Watercolors, Softly Lit Lanterns)" } ]
from diffusers import DiffusionPipeline class ImgGenAI: def __init__(self, id, device = 'cuda'): self.pipe = DiffusionPipeline.from_pretrained(id, use_safetensors=False) self.pipe = self.pipe.to(device) def generate(self, prompt): image = self.pipe(prompt).images[0] return image
import json import sys class Orders: def __init__(self, filename): with open(filename, 'r') as f: jsonData = json.load(f) self.orders = [] for order in jsonData: if 'times' in order: t = int(order['times']) else: keyword = 'times' print(f'必須キー[ {keyword} ]が存在しません!') sys.exit(-1) if 'prompt' in order: prompt = order['prompt'] elif 'prompts' in order: prompt = '' for p in order['prompts']: prompt = prompt + p if prompt[-1] != ' ': prompt = prompt + ' ' else: keywords = ['prompt', 'prompts'] print(f'必須キー[ {keywords[0]} ] もしくは [ {keywords[1]} ]が存在しません!') sys.exit(-1) for i in range(t): self.orders.append(prompt) def __len__(self): return len(self.orders) def __getitem__(self, index): return self.orders[index]
!pip install diffusers transformers accelerate
import sys sys.path.append('drive/MyDrive/libAI') from Orders import Orders from ImgGenAI import ImgGenAI import datetime import pytz import os def createFolderName(): dt_now = datetime.datetime.now(pytz.timezone('Asia/Tokyo')) s = dt_now.strftime('%Y%m%d_%H%M%S') return s target_json = 'drive/MyDrive/gen1.json' id = 'midjourney-community/midjourney-mini' folder_name = 'drive/MyDrive/result/' + createFolderName() os.makedirs(folder_name) ai = ImgGenAI(id) order = Orders(target_json) n = len(order) for i in range(n): save_name = os.path.join(folder_name, f'mj{i+1}.png') img = ai.generate(order[i]) img.save(save_name)
import torch x = torch.ones(2, 3, requires_grad=True) print(x)
y = x + 2 print(y) print(y.grad_fn)
z = y * 3 print(z) out = z.mean() print(out)
a = torch.tensor([1.0], requires_grad=True) b = a * 2 # bの変化量はaの2倍 b.backward() # 逆伝播 print(a.grad) # aの勾配(aの変化に対するbの変化の割合)
def calc(a): b = a*2 + 1 c = b*b d = c/(c + 2) e = d.mean() return e x = [1.0, 2.0, 3.0] x = torch.tensor(x, requires_grad=True) y = calc(x) y.backward() print(x.grad) # xの勾配(xの各値の変化に対するyの変化の割合)
delta = 0.001 #xの微小変化 x = [1.0, 2.0, 3.0] x = torch.tensor(x) y = calc(x) x_1 = [1.0+delta, 2.0, 3.0] x_1 = torch.tensor(x_1) y_1 = calc(x_1) x_2 = [1.0, 2.0+delta, 3.0] x_2 = torch.tensor(x_2) y_2 = calc(x_2) x_3 = [1.0, 2.0, 3.0+delta] x_3 = torch.tensor(x_3) y_3 = calc(x_3) # 勾配の計算 (yの微小変化)/(xの微小変化) grad_1 = (y_1 - y) / delta grad_2 = (y_2 - y) / delta grad_3 = (y_3 - y) / delta grads = torch.stack((grad_1, grad_2, grad_3)) # Tensorを結合 print(grads)
from torchvision.datasets import MNIST from torchvision import transforms img_size = 28 # 画像の高さと幅 # 訓練データを取得 mnist_train = MNIST("./data", train=True, download=True, transform=transforms.ToTensor()) # Tensorに変換 # テストデータの取得 mnist_test = MNIST("./data", train=False, download=True, transform=transforms.ToTensor()) # Tensorに変換 print("訓練データの数:", len(mnist_train), "テストデータの数:", len(mnist_test))
from torch.utils.data import DataLoader # DataLoaderの設定 batch_size = 256 # バッチサイズ train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True) test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
import torch.nn as nn class Net(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(img_size*img_size, 1024) # 全結合層 self.fc2 = nn.Linear(1024, 512) self.fc3 = nn.Linear(512, 10) self.relu = nn.ReLU() # ReLU 学習するパラメータがないので使い回しできる def forward(self, x): x = x.view(-1, img_size*img_size) # (バッチサイズ, 入力の数): 画像を1次元に変換 x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() net.cuda() # GPU対応 print(net)
from torch import optim # 交差エントロピー誤差関数 loss_fnc = nn.CrossEntropyLoss() # SGD optimizer = optim.SGD(net.parameters(), lr=0.01) # 損失のログ record_loss_train = [] record_loss_test = [] # 学習 for i in range(10): # 10エポック学習 net.train() # 訓練モード loss_train = 0 for j, (x, t) in enumerate(train_loader): # ミニバッチ(x, t)を取り出す x, t = x.cuda(), t.cuda() # GPU対応 y = net(x) loss = loss_fnc(y, t) loss_train += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() loss_train /= j+1 record_loss_train.append(loss_train) net.eval() # 評価モード loss_test = 0 for j, (x, t) in enumerate(test_loader): # ミニバッチ(x, t)を取り出す x, t = x.cuda(), t.cuda() # GPU対応 y = net(x) loss = loss_fnc(y, t) loss_test += loss.item() loss_test /= j+1 record_loss_test.append(loss_test) if i%1 == 0: print("Epoch:", i, "Loss_Train:", loss_train, "Loss_Test:", loss_test)
import matplotlib.pyplot as plt plt.plot(range(len(record_loss_train)), record_loss_train, label="Train") plt.plot(range(len(record_loss_test)), record_loss_test, label="Test") plt.legend() plt.xlabel("Epochs") plt.ylabel("Error") plt.show()
correct = 0 total = 0 net.eval() # 評価モード for i, (x, t) in enumerate(test_loader): x, t = x.cuda(), t.cuda() # GPU対応 y = net(x) correct += (y.argmax(1) == t).sum().item() total += len(x) print("正解率:", str(correct/total*100) + "%")
from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms from torch.utils.data import DataLoader import matplotlib.pyplot as plt cifar10_data = CIFAR10(root="./data", train=False,download=True, transform=transforms.ToTensor()) cifar10_classes = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] print("データの数:", len(cifar10_data)) n_image = 25 # 表示する画像の数 cifar10_loader = DataLoader(cifar10_data, batch_size=n_image, shuffle=True) dataiter = iter(cifar10_loader) # イテレータ: 要素を順番に取り出せるようにする images, labels = next(dataiter) # 最初のバッチを取り出す plt.figure(figsize=(10,10)) # 画像の表示サイズ for i in range(n_image): ax = plt.subplot(5,5,i+1) ax.imshow(images[i].permute(1, 2, 0)) # チャンネルを一番後の次元に label = cifar10_classes[labels[i]] ax.set_title(label) ax.get_xaxis().set_visible(False) # 軸を非表示に ax.get_yaxis().set_visible(False) plt.show()
transform = transforms.Compose([transforms.RandomAffine((-45, 45), scale=(0.5, 1.5)), # 回転とリサイズ transforms.ToTensor()]) cifar10_data = CIFAR10(root="./data", train=False,download=True, transform=transform) cifar10_loader = DataLoader(cifar10_data, batch_size=n_image, shuffle=True) dataiter = iter(cifar10_loader) images, labels = next(dataiter) plt.figure(figsize=(10,10)) # 画像の表示サイズ for i in range(n_image): ax = plt.subplot(5,5,i+1) ax.imshow(images[i].permute(1, 2, 0)) label = cifar10_classes[labels[i]] ax.set_title(label) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show()
transform = transforms.Compose([transforms.RandomAffine((0, 0), translate=(0.5, 0.5)), # 上下左右へのシフト transforms.ToTensor()]) cifar10_data = CIFAR10(root="./data", train=False,download=True, transform=transform) cifar10_loader = DataLoader(cifar10_data, batch_size=n_image, shuffle=True) dataiter = iter(cifar10_loader) images, labels = next(dataiter) plt.figure(figsize=(10,10)) # 画像の表示サイズ for i in range(n_image): ax = plt.subplot(5,5,i+1) ax.imshow(images[i].permute(1, 2, 0)) label = cifar10_classes[labels[i]] ax.set_title(label) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show()
transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5), # 左右反転 transforms.RandomVerticalFlip(p=0.5), # 上下反転 transforms.ToTensor()]) cifar10_data = CIFAR10(root="./data", train=False,download=True, transform=transform) cifar10_loader = DataLoader(cifar10_data, batch_size=n_image, shuffle=True) dataiter = iter(cifar10_loader) images, labels = next(dataiter) plt.figure(figsize=(10,10)) # 画像の表示サイズ for i in range(n_image): ax = plt.subplot(5,5,i+1) ax.imshow(images[i].permute(1, 2, 0)) label = cifar10_classes[labels[i]] ax.set_title(label) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show()
transform = transforms.Compose([transforms.ToTensor(), transforms.RandomErasing(p=0.5)]) # 一部を消去 cifar10_data = CIFAR10(root="./data", train=False,download=True, transform=transform) cifar10_loader = DataLoader(cifar10_data, batch_size=n_image, shuffle=True) dataiter = iter(cifar10_loader) images, labels = next(dataiter) plt.figure(figsize=(10,10)) # 画像の表示サイズ for i in range(n_image): ax = plt.subplot(5,5,i+1) ax.imshow(images[i].permute(1, 2, 0)) label = cifar10_classes[labels[i]] ax.set_title(label) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show()
from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms from torch.utils.data import DataLoader cifar10_classes = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] affine = transforms.RandomAffine((-30, 30), scale=(0.8, 1.2)) # 回転とリサイズ flip = transforms.RandomHorizontalFlip(p=0.5) # 左右反転 normalize = transforms.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0)) # 平均値を0、標準偏差を1に to_tensor = transforms.ToTensor() transform_train = transforms.Compose([affine, flip, to_tensor, normalize]) transform_test = transforms.Compose([to_tensor, normalize]) cifar10_train = CIFAR10("./data", train=True, download=True, transform=transform_train) cifar10_test = CIFAR10("./data", train=False, download=True, transform=transform_test) # DataLoaderの設定 batch_size = 64 train_loader = DataLoader(cifar10_train, batch_size=batch_size, shuffle=True) test_loader = DataLoader(cifar10_test, batch_size=batch_size, shuffle=False)
import torch.nn as nn class Net(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(3, 8, 5) # 畳み込み層:(入力チャンネル数, フィルタ数、フィルタサイズ) self.relu = nn.ReLU() # ReLU self.pool = nn.MaxPool2d(2, 2) # プーリング層:(領域のサイズ, 領域の間隔) self.conv2 = nn.Conv2d(8, 16, 5) self.fc1 = nn.Linear(16*5*5, 256) # 全結合層 self.dropout = nn.Dropout(p=0.5) # ドロップアウト:(p=ドロップアウト率) self.fc2 = nn.Linear(256, 10) def forward(self, x): x = self.relu(self.conv1(x)) x = self.pool(x) x = self.relu(self.conv2(x)) x = self.pool(x) x = x.view(-1, 16*5*5) x = self.relu(self.fc1(x)) x = self.dropout(x) x = self.fc2(x) return x net = Net() net.cuda() # GPU対応 print(net)
from torch import optim # 交差エントロピー誤差関数 loss_fnc = nn.CrossEntropyLoss() # 最適化アルゴリズム optimizer = optim.Adam(net.parameters()) # 損失のログ record_loss_train = [] record_loss_test = [] # 学習 for i in range(20): # 20エポック学習 net.train() # 訓練モード loss_train = 0 for j, (x, t) in enumerate(train_loader): # ミニバッチ(x, t)を取り出す x, t = x.cuda(), t.cuda() # GPU対応 y = net(x) loss = loss_fnc(y, t) loss_train += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() loss_train /= j+1 record_loss_train.append(loss_train) net.eval() # 評価モード loss_test = 0 for j, (x, t) in enumerate(test_loader): # ミニバッチ(x, t)を取り出す x, t = x.cuda(), t.cuda() y = net(x) loss = loss_fnc(y, t) loss_test += loss.item() loss_test /= j+1 record_loss_test.append(loss_test) if i%1 == 0: print("Epoch:", i, "Loss_Train:", loss_train, "Loss_Test:", loss_test)
import matplotlib.pyplot as plt plt.plot(range(len(record_loss_train)), record_loss_train, label="Train") plt.plot(range(len(record_loss_test)), record_loss_test, label="Test") plt.legend() plt.xlabel("Epochs") plt.ylabel("Error") plt.show()
correct = 0 total = 0 net.eval() # 評価モード for i, (x, t) in enumerate(test_loader): x, t = x.cuda(), t.cuda() # GPU対応 y = net(x) correct += (y.argmax(1) == t).sum().item() total += len(x) print("正解率:", str(correct/total*100) + "%")
cifar10_loader = DataLoader(cifar10_test, batch_size=1, shuffle=True) dataiter = iter(cifar10_loader) images, labels = next(dataiter) # サンプルを1つだけ取り出す plt.imshow(images[0].permute(1, 2, 0)) # チャンネルを一番後ろに plt.tick_params(labelbottom=False, labelleft=False, bottom=False, left=False) # ラベルとメモリを非表示に plt.show() net.eval() # 評価モード x, t = images.cuda(), labels.cuda() # GPU対応 y = net(x) print("正解:", cifar10_classes[labels[0]], "予測結果:", cifar10_classes[y.argmax().item()])