인공지능 공부/컴퓨터 비전
(NIA 데이터셋 과제준비) Oxford-IIIT Pet Dataset
앨런튜링_
2022. 4. 11. 14:02
# Images: https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
# Annotations: https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
from IPython.display import Image, display
from tensorflow.keras.preprocessing.image import load_img
import PIL
from PIL import ImageOps
import os
from tensorflow.keras import layers
input_dir = "/root/yj/yj/Kaggle/data/images"
target_dir = "/root/yj/yj/Kaggle/data/annotations/trimaps"
img_size = (160,160)
num_classes = 3
batch_size = 32
input_img_paths = sorted([os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith('.jpg')])
target_img_paths = sorted([os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith('.png') and not fname.startswith('.')])
display(Image(filename=input_img_paths[7]))
img = PIL.ImageOps.autocontrast(load_img(target_img_paths[7]))
display(img)
display(Image(filename=input_img_paths[20]))
img = PIL.ImageOps.autocontrast(load_img(target_img_paths[20]))
display(img)
데이터전처리
import numpy as np
from tensorflow import keras
class OxfordPets(keras.utils.Sequence):
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
i = idx*self.batch_size
batch_input_img_paths = self.input_img_paths[i:i+self.batch_size]
batcg_target_img_paths = self.target_img_paths[i:i+self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype = "float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size = self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype = "uint8")
for j, path in enumerate(batcg_target_img_paths):
img = load_img(path, target_size = self.img_size, color_mode = 'grayscale')
y[j] = np.expand_dims(img, 2)
y[j] -= 1
return x,y
import random
val_samples = 1000
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
train_gen = OxfordPets(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = OxfordPets(batch_size, img_size, val_input_img_paths, val_target_img_paths)
def get_model(img_size, num_classes):
inputs = keras.Input(shape=img_size + (3,))
x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x
for filters in [64, 128, 256]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual])
previous_block_activation = x
for filters in [256, 128, 64, 32]:
x = layers.Activation("relu")(x)
x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.UpSampling2D(2)(x)
residual = layers.UpSampling2D(2)(previous_block_activation)
residual = layers.Conv2D(filters, 1, padding="same")(residual)
x = layers.add([x, residual])
previous_block_activation = x
outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)
model = keras.Model(inputs, outputs)
return model
model = get_model(img_size, num_classes)
model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 160, 160, 3 0 []
)]
conv2d (Conv2D) (None, 80, 80, 32) 896 ['input_1[0][0]']
batch_normalization (BatchNorm (None, 80, 80, 32) 128 ['conv2d[0][0]']
alization)
activation (Activation) (None, 80, 80, 32) 0 ['batch_normalization[0][0]']
activation_1 (Activation) (None, 80, 80, 32) 0 ['activation[0][0]']
separable_conv2d (SeparableCon (None, 80, 80, 64) 2400 ['activation_1[0][0]']
v2D)
batch_normalization_1 (BatchNo (None, 80, 80, 64) 256 ['separable_conv2d[0][0]']
rmalization)
activation_2 (Activation) (None, 80, 80, 64) 0 ['batch_normalization_1[0][0]']
separable_conv2d_1 (SeparableC (None, 80, 80, 64) 4736 ['activation_2[0][0]']
onv2D)
batch_normalization_2 (BatchNo (None, 80, 80, 64) 256 ['separable_conv2d_1[0][0]']
rmalization)
max_pooling2d (MaxPooling2D) (None, 40, 40, 64) 0 ['batch_normalization_2[0][0]']
conv2d_1 (Conv2D) (None, 40, 40, 64) 2112 ['activation[0][0]']
add (Add) (None, 40, 40, 64) 0 ['max_pooling2d[0][0]',
'conv2d_1[0][0]']
activation_3 (Activation) (None, 40, 40, 64) 0 ['add[0][0]']
separable_conv2d_2 (SeparableC (None, 40, 40, 128) 8896 ['activation_3[0][0]']
onv2D)
batch_normalization_3 (BatchNo (None, 40, 40, 128) 512 ['separable_conv2d_2[0][0]']
rmalization)
activation_4 (Activation) (None, 40, 40, 128) 0 ['batch_normalization_3[0][0]']
separable_conv2d_3 (SeparableC (None, 40, 40, 128) 17664 ['activation_4[0][0]']
onv2D)
batch_normalization_4 (BatchNo (None, 40, 40, 128) 512 ['separable_conv2d_3[0][0]']
rmalization)
max_pooling2d_1 (MaxPooling2D) (None, 20, 20, 128) 0 ['batch_normalization_4[0][0]']
conv2d_2 (Conv2D) (None, 20, 20, 128) 8320 ['add[0][0]']
add_1 (Add) (None, 20, 20, 128) 0 ['max_pooling2d_1[0][0]',
'conv2d_2[0][0]']
activation_5 (Activation) (None, 20, 20, 128) 0 ['add_1[0][0]']
separable_conv2d_4 (SeparableC (None, 20, 20, 256) 34176 ['activation_5[0][0]']
onv2D)
batch_normalization_5 (BatchNo (None, 20, 20, 256) 1024 ['separable_conv2d_4[0][0]']
rmalization)
activation_6 (Activation) (None, 20, 20, 256) 0 ['batch_normalization_5[0][0]']
separable_conv2d_5 (SeparableC (None, 20, 20, 256) 68096 ['activation_6[0][0]']
onv2D)
batch_normalization_6 (BatchNo (None, 20, 20, 256) 1024 ['separable_conv2d_5[0][0]']
rmalization)
max_pooling2d_2 (MaxPooling2D) (None, 10, 10, 256) 0 ['batch_normalization_6[0][0]']
conv2d_3 (Conv2D) (None, 10, 10, 256) 33024 ['add_1[0][0]']
add_2 (Add) (None, 10, 10, 256) 0 ['max_pooling2d_2[0][0]',
'conv2d_3[0][0]']
activation_7 (Activation) (None, 10, 10, 256) 0 ['add_2[0][0]']
conv2d_transpose (Conv2DTransp (None, 10, 10, 256) 590080 ['activation_7[0][0]']
ose)
batch_normalization_7 (BatchNo (None, 10, 10, 256) 1024 ['conv2d_transpose[0][0]']
rmalization)
activation_8 (Activation) (None, 10, 10, 256) 0 ['batch_normalization_7[0][0]']
conv2d_transpose_1 (Conv2DTran (None, 10, 10, 256) 590080 ['activation_8[0][0]']
spose)
batch_normalization_8 (BatchNo (None, 10, 10, 256) 1024 ['conv2d_transpose_1[0][0]']
rmalization)
up_sampling2d_1 (UpSampling2D) (None, 20, 20, 256) 0 ['add_2[0][0]']
up_sampling2d (UpSampling2D) (None, 20, 20, 256) 0 ['batch_normalization_8[0][0]']
conv2d_4 (Conv2D) (None, 20, 20, 256) 65792 ['up_sampling2d_1[0][0]']
add_3 (Add) (None, 20, 20, 256) 0 ['up_sampling2d[0][0]',
'conv2d_4[0][0]']
activation_9 (Activation) (None, 20, 20, 256) 0 ['add_3[0][0]']
conv2d_transpose_2 (Conv2DTran (None, 20, 20, 128) 295040 ['activation_9[0][0]']
spose)
batch_normalization_9 (BatchNo (None, 20, 20, 128) 512 ['conv2d_transpose_2[0][0]']
rmalization)
activation_10 (Activation) (None, 20, 20, 128) 0 ['batch_normalization_9[0][0]']
conv2d_transpose_3 (Conv2DTran (None, 20, 20, 128) 147584 ['activation_10[0][0]']
spose)
batch_normalization_10 (BatchN (None, 20, 20, 128) 512 ['conv2d_transpose_3[0][0]']
ormalization)
up_sampling2d_3 (UpSampling2D) (None, 40, 40, 256) 0 ['add_3[0][0]']
up_sampling2d_2 (UpSampling2D) (None, 40, 40, 128) 0 ['batch_normalization_10[0][0]']
conv2d_5 (Conv2D) (None, 40, 40, 128) 32896 ['up_sampling2d_3[0][0]']
add_4 (Add) (None, 40, 40, 128) 0 ['up_sampling2d_2[0][0]',
'conv2d_5[0][0]']
activation_11 (Activation) (None, 40, 40, 128) 0 ['add_4[0][0]']
conv2d_transpose_4 (Conv2DTran (None, 40, 40, 64) 73792 ['activation_11[0][0]']
spose)
batch_normalization_11 (BatchN (None, 40, 40, 64) 256 ['conv2d_transpose_4[0][0]']
ormalization)
activation_12 (Activation) (None, 40, 40, 64) 0 ['batch_normalization_11[0][0]']
conv2d_transpose_5 (Conv2DTran (None, 40, 40, 64) 36928 ['activation_12[0][0]']
spose)
batch_normalization_12 (BatchN (None, 40, 40, 64) 256 ['conv2d_transpose_5[0][0]']
ormalization)
up_sampling2d_5 (UpSampling2D) (None, 80, 80, 128) 0 ['add_4[0][0]']
up_sampling2d_4 (UpSampling2D) (None, 80, 80, 64) 0 ['batch_normalization_12[0][0]']
conv2d_6 (Conv2D) (None, 80, 80, 64) 8256 ['up_sampling2d_5[0][0]']
add_5 (Add) (None, 80, 80, 64) 0 ['up_sampling2d_4[0][0]',
'conv2d_6[0][0]']
activation_13 (Activation) (None, 80, 80, 64) 0 ['add_5[0][0]']
conv2d_transpose_6 (Conv2DTran (None, 80, 80, 32) 18464 ['activation_13[0][0]']
spose)
batch_normalization_13 (BatchN (None, 80, 80, 32) 128 ['conv2d_transpose_6[0][0]']
ormalization)
activation_14 (Activation) (None, 80, 80, 32) 0 ['batch_normalization_13[0][0]']
conv2d_transpose_7 (Conv2DTran (None, 80, 80, 32) 9248 ['activation_14[0][0]']
spose)
batch_normalization_14 (BatchN (None, 80, 80, 32) 128 ['conv2d_transpose_7[0][0]']
ormalization)
up_sampling2d_7 (UpSampling2D) (None, 160, 160, 64 0 ['add_5[0][0]']
)
up_sampling2d_6 (UpSampling2D) (None, 160, 160, 32 0 ['batch_normalization_14[0][0]']
)
conv2d_7 (Conv2D) (None, 160, 160, 32 2080 ['up_sampling2d_7[0][0]']
)
add_6 (Add) (None, 160, 160, 32 0 ['up_sampling2d_6[0][0]',
) 'conv2d_7[0][0]']
conv2d_8 (Conv2D) (None, 160, 160, 3) 867 ['add_6[0][0]']
==================================================================================================
Total params: 2,058,979
Trainable params: 2,055,203
Non-trainable params: 3,776
__________________________________________________________________________________________________
None
model.compile(optimizer = 'rmsprop', loss = "sparse_categorical_crossentropy")
callbacks = [keras.callbacks.ModelCheckpoint('oxford_segmentation.h5', save_best_only = True)]
epochs = 20
model.fit(train_gen, epochs=epochs, validation_data = val_gen, callbacks = callbacks)
val_preds = model.predict(val_gen)
def display_mask(i):
mask = np.argmax(val_preds[i], axis=-1)
mask = np.expand_dims(mask, axis = -1)
img = PIL.ImageOps.autocontrast(keras.preprocessing.image.array_to_img(mask))
display(img)
i = 5
display(Image(filename=val_input_img_paths[i]))
img = PIL.ImageOps.autocontrast(load_img(val_target_img_paths[i]))
display(img)
display_mask(i)