인공지능 공부/컴퓨터 비전

(NIA 데이터셋 과제준비) Oxford-IIIT Pet Dataset

앨런튜링_ 2022. 4. 11. 14:02
# Images: https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
# Annotations: https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
from IPython.display import Image, display
from tensorflow.keras.preprocessing.image import load_img
import PIL
from PIL import ImageOps
import os
from tensorflow.keras import layers
input_dir = "/root/yj/yj/Kaggle/data/images"
target_dir = "/root/yj/yj/Kaggle/data/annotations/trimaps"

img_size = (160,160)
num_classes = 3
batch_size = 32

input_img_paths = sorted([os.path.join(input_dir, fname) 
                          for fname in os.listdir(input_dir)
                             if fname.endswith('.jpg')])

target_img_paths = sorted([os.path.join(target_dir, fname) 
                          for fname in os.listdir(target_dir)
                             if fname.endswith('.png') and not fname.startswith('.')])
display(Image(filename=input_img_paths[7]))
img = PIL.ImageOps.autocontrast(load_img(target_img_paths[7]))
display(img)

display(Image(filename=input_img_paths[20]))
img = PIL.ImageOps.autocontrast(load_img(target_img_paths[20]))
display(img)

데이터전처리

import numpy as np
from tensorflow import keras

class OxfordPets(keras.utils.Sequence):
    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths
    
    def __len__(self):
        return len(self.target_img_paths) // self.batch_size
    
    def __getitem__(self, idx):
        i = idx*self.batch_size
        batch_input_img_paths = self.input_img_paths[i:i+self.batch_size]
        batcg_target_img_paths = self.target_img_paths[i:i+self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype = "float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size = self.img_size)
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype = "uint8")
        for j, path in enumerate(batcg_target_img_paths):
            img = load_img(path, target_size = self.img_size, color_mode = 'grayscale')
            y[j] = np.expand_dims(img, 2)
            y[j] -= 1
            
        return x,y
import random

val_samples = 1000
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)


train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]

val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]

train_gen = OxfordPets(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = OxfordPets(batch_size, img_size, val_input_img_paths, val_target_img_paths)
def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))

    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  

    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  
        previous_block_activation = x  


    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)


        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  
        previous_block_activation = x  

    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)


    model = keras.Model(inputs, outputs)
    return model


model = get_model(img_size, num_classes)
model.summary()
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 input_1 (InputLayer)           [(None, 160, 160, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 80, 80, 32)   896         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 80, 80, 32)  128         ['conv2d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 activation (Activation)        (None, 80, 80, 32)   0           ['batch_normalization[0][0]']    
                                                                                                  
 activation_1 (Activation)      (None, 80, 80, 32)   0           ['activation[0][0]']             
                                                                                                  
 separable_conv2d (SeparableCon  (None, 80, 80, 64)  2400        ['activation_1[0][0]']           
 v2D)                                                                                             
                                                                                                  
 batch_normalization_1 (BatchNo  (None, 80, 80, 64)  256         ['separable_conv2d[0][0]']       
 rmalization)                                                                                     
                                                                                                  
 activation_2 (Activation)      (None, 80, 80, 64)   0           ['batch_normalization_1[0][0]']  
                                                                                                  
 separable_conv2d_1 (SeparableC  (None, 80, 80, 64)  4736        ['activation_2[0][0]']           
 onv2D)                                                                                           
                                                                                                  
 batch_normalization_2 (BatchNo  (None, 80, 80, 64)  256         ['separable_conv2d_1[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 40, 40, 64)   0           ['batch_normalization_2[0][0]']  
                                                                                                  
 conv2d_1 (Conv2D)              (None, 40, 40, 64)   2112        ['activation[0][0]']             
                                                                                                  
 add (Add)                      (None, 40, 40, 64)   0           ['max_pooling2d[0][0]',          
                                                                  'conv2d_1[0][0]']               
                                                                                                  
 activation_3 (Activation)      (None, 40, 40, 64)   0           ['add[0][0]']                    
                                                                                                  
 separable_conv2d_2 (SeparableC  (None, 40, 40, 128)  8896       ['activation_3[0][0]']           
 onv2D)                                                                                           
                                                                                                  
 batch_normalization_3 (BatchNo  (None, 40, 40, 128)  512        ['separable_conv2d_2[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 activation_4 (Activation)      (None, 40, 40, 128)  0           ['batch_normalization_3[0][0]']  
                                                                                                  
 separable_conv2d_3 (SeparableC  (None, 40, 40, 128)  17664      ['activation_4[0][0]']           
 onv2D)                                                                                           
                                                                                                  
 batch_normalization_4 (BatchNo  (None, 40, 40, 128)  512        ['separable_conv2d_3[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 max_pooling2d_1 (MaxPooling2D)  (None, 20, 20, 128)  0          ['batch_normalization_4[0][0]']  
                                                                                                  
 conv2d_2 (Conv2D)              (None, 20, 20, 128)  8320        ['add[0][0]']                    
                                                                                                  
 add_1 (Add)                    (None, 20, 20, 128)  0           ['max_pooling2d_1[0][0]',        
                                                                  'conv2d_2[0][0]']               
                                                                                                  
 activation_5 (Activation)      (None, 20, 20, 128)  0           ['add_1[0][0]']                  
                                                                                                  
 separable_conv2d_4 (SeparableC  (None, 20, 20, 256)  34176      ['activation_5[0][0]']           
 onv2D)                                                                                           
                                                                                                  
 batch_normalization_5 (BatchNo  (None, 20, 20, 256)  1024       ['separable_conv2d_4[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 activation_6 (Activation)      (None, 20, 20, 256)  0           ['batch_normalization_5[0][0]']  
                                                                                                  
 separable_conv2d_5 (SeparableC  (None, 20, 20, 256)  68096      ['activation_6[0][0]']           
 onv2D)                                                                                           
                                                                                                  
 batch_normalization_6 (BatchNo  (None, 20, 20, 256)  1024       ['separable_conv2d_5[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 max_pooling2d_2 (MaxPooling2D)  (None, 10, 10, 256)  0          ['batch_normalization_6[0][0]']  
                                                                                                  
 conv2d_3 (Conv2D)              (None, 10, 10, 256)  33024       ['add_1[0][0]']                  
                                                                                                  
 add_2 (Add)                    (None, 10, 10, 256)  0           ['max_pooling2d_2[0][0]',        
                                                                  'conv2d_3[0][0]']               
                                                                                                  
 activation_7 (Activation)      (None, 10, 10, 256)  0           ['add_2[0][0]']                  
                                                                                                  
 conv2d_transpose (Conv2DTransp  (None, 10, 10, 256)  590080     ['activation_7[0][0]']           
 ose)                                                                                             
                                                                                                  
 batch_normalization_7 (BatchNo  (None, 10, 10, 256)  1024       ['conv2d_transpose[0][0]']       
 rmalization)                                                                                     
                                                                                                  
 activation_8 (Activation)      (None, 10, 10, 256)  0           ['batch_normalization_7[0][0]']  
                                                                                                  
 conv2d_transpose_1 (Conv2DTran  (None, 10, 10, 256)  590080     ['activation_8[0][0]']           
 spose)                                                                                           
                                                                                                  
 batch_normalization_8 (BatchNo  (None, 10, 10, 256)  1024       ['conv2d_transpose_1[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 up_sampling2d_1 (UpSampling2D)  (None, 20, 20, 256)  0          ['add_2[0][0]']                  
                                                                                                  
 up_sampling2d (UpSampling2D)   (None, 20, 20, 256)  0           ['batch_normalization_8[0][0]']  
                                                                                                  
 conv2d_4 (Conv2D)              (None, 20, 20, 256)  65792       ['up_sampling2d_1[0][0]']        
                                                                                                  
 add_3 (Add)                    (None, 20, 20, 256)  0           ['up_sampling2d[0][0]',          
                                                                  'conv2d_4[0][0]']               
                                                                                                  
 activation_9 (Activation)      (None, 20, 20, 256)  0           ['add_3[0][0]']                  
                                                                                                  
 conv2d_transpose_2 (Conv2DTran  (None, 20, 20, 128)  295040     ['activation_9[0][0]']           
 spose)                                                                                           
                                                                                                  
 batch_normalization_9 (BatchNo  (None, 20, 20, 128)  512        ['conv2d_transpose_2[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 activation_10 (Activation)     (None, 20, 20, 128)  0           ['batch_normalization_9[0][0]']  
                                                                                                  
 conv2d_transpose_3 (Conv2DTran  (None, 20, 20, 128)  147584     ['activation_10[0][0]']          
 spose)                                                                                           
                                                                                                  
 batch_normalization_10 (BatchN  (None, 20, 20, 128)  512        ['conv2d_transpose_3[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 up_sampling2d_3 (UpSampling2D)  (None, 40, 40, 256)  0          ['add_3[0][0]']                  
                                                                                                  
 up_sampling2d_2 (UpSampling2D)  (None, 40, 40, 128)  0          ['batch_normalization_10[0][0]'] 
                                                                                                  
 conv2d_5 (Conv2D)              (None, 40, 40, 128)  32896       ['up_sampling2d_3[0][0]']        
                                                                                                  
 add_4 (Add)                    (None, 40, 40, 128)  0           ['up_sampling2d_2[0][0]',        
                                                                  'conv2d_5[0][0]']               
                                                                                                  
 activation_11 (Activation)     (None, 40, 40, 128)  0           ['add_4[0][0]']                  
                                                                                                  
 conv2d_transpose_4 (Conv2DTran  (None, 40, 40, 64)  73792       ['activation_11[0][0]']          
 spose)                                                                                           
                                                                                                  
 batch_normalization_11 (BatchN  (None, 40, 40, 64)  256         ['conv2d_transpose_4[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 activation_12 (Activation)     (None, 40, 40, 64)   0           ['batch_normalization_11[0][0]'] 
                                                                                                  
 conv2d_transpose_5 (Conv2DTran  (None, 40, 40, 64)  36928       ['activation_12[0][0]']          
 spose)                                                                                           
                                                                                                  
 batch_normalization_12 (BatchN  (None, 40, 40, 64)  256         ['conv2d_transpose_5[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 up_sampling2d_5 (UpSampling2D)  (None, 80, 80, 128)  0          ['add_4[0][0]']                  
                                                                                                  
 up_sampling2d_4 (UpSampling2D)  (None, 80, 80, 64)  0           ['batch_normalization_12[0][0]'] 
                                                                                                  
 conv2d_6 (Conv2D)              (None, 80, 80, 64)   8256        ['up_sampling2d_5[0][0]']        
                                                                                                  
 add_5 (Add)                    (None, 80, 80, 64)   0           ['up_sampling2d_4[0][0]',        
                                                                  'conv2d_6[0][0]']               
                                                                                                  
 activation_13 (Activation)     (None, 80, 80, 64)   0           ['add_5[0][0]']                  
                                                                                                  
 conv2d_transpose_6 (Conv2DTran  (None, 80, 80, 32)  18464       ['activation_13[0][0]']          
 spose)                                                                                           
                                                                                                  
 batch_normalization_13 (BatchN  (None, 80, 80, 32)  128         ['conv2d_transpose_6[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 activation_14 (Activation)     (None, 80, 80, 32)   0           ['batch_normalization_13[0][0]'] 
                                                                                                  
 conv2d_transpose_7 (Conv2DTran  (None, 80, 80, 32)  9248        ['activation_14[0][0]']          
 spose)                                                                                           
                                                                                                  
 batch_normalization_14 (BatchN  (None, 80, 80, 32)  128         ['conv2d_transpose_7[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 up_sampling2d_7 (UpSampling2D)  (None, 160, 160, 64  0          ['add_5[0][0]']                  
                                )                                                                 
                                                                                                  
 up_sampling2d_6 (UpSampling2D)  (None, 160, 160, 32  0          ['batch_normalization_14[0][0]'] 
                                )                                                                 
                                                                                                  
 conv2d_7 (Conv2D)              (None, 160, 160, 32  2080        ['up_sampling2d_7[0][0]']        
                                )                                                                 
                                                                                                  
 add_6 (Add)                    (None, 160, 160, 32  0           ['up_sampling2d_6[0][0]',        
                                )                                 'conv2d_7[0][0]']               
                                                                                                  
 conv2d_8 (Conv2D)              (None, 160, 160, 3)  867         ['add_6[0][0]']                  
                                                                                                  
==================================================================================================
Total params: 2,058,979
Trainable params: 2,055,203
Non-trainable params: 3,776
__________________________________________________________________________________________________
None
model.compile(optimizer = 'rmsprop', loss = "sparse_categorical_crossentropy")

callbacks = [keras.callbacks.ModelCheckpoint('oxford_segmentation.h5', save_best_only = True)]

epochs = 20

model.fit(train_gen, epochs=epochs, validation_data = val_gen, callbacks = callbacks)
val_preds = model.predict(val_gen)

def display_mask(i):
    mask = np.argmax(val_preds[i], axis=-1)
    mask = np.expand_dims(mask, axis = -1)
    
    img = PIL.ImageOps.autocontrast(keras.preprocessing.image.array_to_img(mask))
    display(img)
    
i = 5
display(Image(filename=val_input_img_paths[i]))
img = PIL.ImageOps.autocontrast(load_img(val_target_img_paths[i]))
display(img)
display_mask(i)