inoltre su Colab non sono riuscito a salvare il modello in .keras o in .hd5.Per questo motivo ho usato il formato dati legacy
Su Colab sono riuscito a portare il training fino a 25 epochs. Si nota come la T4 sia nettamente superiore al M1
Nel post precedente avevo trovato che il letteratura i risultati migliori per la segmentazione delle frane e' stati ottenuti con la rete DeepLabV3.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import keras
from keras import layers
from keras import ops
import os
import numpy as np
from glob import glob
import cv2
from scipy.io import loadmat
import matplotlib.pyplot as plt
# For data preprocessing
get_ipython().system('pip install tensorflow')
from tensorflow import image as tf_image
from tensorflow import data as tf_data
from tensorflow import io as tf_io
# In[2]:
IMAGE_SIZE = 512
BATCH_SIZE = 4
NUM_CLASSES = 2
#DATA_DIR = "./1/instance-level_human_parsing/instance-level_human_parsing/Training"
DATA_DIR = "./UAV"
NUM_TRAIN_IMAGES = 900
NUM_VAL_IMAGES = 100
train_images = sorted(glob(os.path.join(DATA_DIR, "img/*")))[:NUM_TRAIN_IMAGES]
train_masks = sorted(glob(os.path.join(DATA_DIR, "mask/*")))[:NUM_TRAIN_IMAGES]
val_images = sorted(glob(os.path.join(DATA_DIR, "img/*")))[
NUM_TRAIN_IMAGES : NUM_VAL_IMAGES + NUM_TRAIN_IMAGES
]
val_masks = sorted(glob(os.path.join(DATA_DIR, "mask/*")))[
NUM_TRAIN_IMAGES : NUM_VAL_IMAGES + NUM_TRAIN_IMAGES
]
def read_image(image_path, mask=False):
image = tf_io.read_file(image_path)
if mask:
image = tf_image.decode_png(image, channels=1)
image.set_shape([None, None, 1])
image = tf_image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
else:
image = tf_image.decode_png(image, channels=3)
image.set_shape([None, None, 3])
image = tf_image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
return image
def load_data(image_list, mask_list):
image = read_image(image_list)
mask = read_image(mask_list, mask=True)
return image, mask
def data_generator(image_list, mask_list):
dataset = tf_data.Dataset.from_tensor_slices((image_list, mask_list))
dataset = dataset.map(load_data, num_parallel_calls=tf_data.AUTOTUNE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
return dataset
train_dataset = data_generator(train_images, train_masks)
val_dataset = data_generator(val_images, val_masks)
print("Train Dataset:", train_dataset)
print("Val Dataset:", val_dataset)
# In[3]:
def convolution_block(
block_input,
num_filters=256,
kernel_size=3,
dilation_rate=1,
use_bias=False,
):
x = layers.Conv2D(
num_filters,
kernel_size=kernel_size,
dilation_rate=dilation_rate,
padding="same",
use_bias=use_bias,
kernel_initializer=keras.initializers.HeNormal(),
)(block_input)
x = layers.BatchNormalization()(x)
return ops.nn.relu(x)
def DilatedSpatialPyramidPooling(dspp_input):
dims = dspp_input.shape
x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
x = convolution_block(x, kernel_size=1, use_bias=True)
out_pool = layers.UpSampling2D(
size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]),
interpolation="bilinear",
)(x)
out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)
x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
output = convolution_block(x, kernel_size=1)
return output
# In[4]:
def DeeplabV3Plus(image_size, num_classes):
model_input = keras.Input(shape=(image_size, image_size, 3))
preprocessed = keras.applications.resnet50.preprocess_input(model_input)
resnet50 = keras.applications.ResNet50(
weights="imagenet", include_top=False, input_tensor=preprocessed
)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DilatedSpatialPyramidPooling(x)
input_a = layers.UpSampling2D(
size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
interpolation="bilinear",
)(x)
input_b = resnet50.get_layer("conv2_block3_2_relu").output
input_b = convolution_block(input_b, num_filters=48, kernel_size=1)
x = layers.Concatenate(axis=-1)([input_a, input_b])
x = convolution_block(x)
x = convolution_block(x)
x = layers.UpSampling2D(
size=(image_size // x.shape[1], image_size // x.shape[2]),
interpolation="bilinear",
)(x)
model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs=model_input, outputs=model_output)
model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()
# In[5]:
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss=loss,
metrics=["accuracy"],
)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)
plt.plot(history.history["loss"])
plt.title("Training Loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.show()
plt.plot(history.history["accuracy"])
plt.title("Training Accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.show()
plt.plot(history.history["val_loss"])
plt.title("Validation Loss")
plt.ylabel("val_loss")
plt.xlabel("epoch")
plt.show()
plt.plot(history.history["val_accuracy"])
plt.title("Validation Accuracy")
plt.ylabel("val_accuracy")
plt.xlabel("epoch")
plt.show()
# In[6]:
# Loading the Colormap
colormap = loadmat(
"./1/instance-level_human_parsing/instance-level_human_parsing/human_colormap.mat"
)["colormap"]
colormap = colormap * 100
colormap = colormap.astype(np.uint8)
def infer(model, image_tensor):
predictions = model.predict(np.expand_dims((image_tensor), axis=0))
predictions = np.squeeze(predictions)
predictions = np.argmax(predictions, axis=2)
return predictions
def decode_segmentation_masks(mask, colormap, n_classes):
r = np.zeros_like(mask).astype(np.uint8)
g = np.zeros_like(mask).astype(np.uint8)
b = np.zeros_like(mask).astype(np.uint8)
for l in range(0, n_classes):
idx = mask == l
r[idx] = colormap[l, 0]
g[idx] = colormap[l, 1]
b[idx] = colormap[l, 2]
rgb = np.stack([r, g, b], axis=2)
return rgb
def get_overlay(image, colored_mask):
image = keras.utils.array_to_img(image)
image = np.array(image).astype(np.uint8)
overlay = cv2.addWeighted(image, 0.35, colored_mask, 0.65, 0)
return overlay
def plot_samples_matplotlib(display_list, figsize=(5, 3)):
_, axes = plt.subplots(nrows=1, ncols=len(display_list), figsize=figsize)
for i in range(len(display_list)):
if display_list[i].shape[-1] == 3:
axes[i].imshow(keras.utils.array_to_img(display_list[i]))
else:
axes[i].imshow(display_list[i])
plt.show()
def plot_predictions(images_list, colormap, model):
for image_file in images_list:
image_tensor = read_image(image_file)
prediction_mask = infer(image_tensor=image_tensor, model=model)
prediction_colormap = decode_segmentation_masks(prediction_mask, colormap, 20)
overlay = get_overlay(image_tensor, prediction_colormap)
plot_samples_matplotlib(
[image_tensor, overlay, prediction_colormap], figsize=(18, 14)
)
# In[7]:
plot_predictions(train_images[:4], colormap, model=model)
# ### Inference on Validation Images
#
# You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/deeplabv3p-resnet50)
# and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/Human-Part-Segmentation).
# In[8]:
plot_predictions(val_images[:4], colormap, model=model)
# In[12]:
get_ipython().system('pip install ipython')
get_ipython().system('mkdir -p saved_model_2')
model.save('saved_model_2/landslide.keras')
Anche su M1 il calcolo e' molto lento....sarebbe stati necessari piu' di 10 epochs per trovare l'asintoto della Loss ma ogni epoch ha impiegato ogni 25 minuti per circa 1000 immagini
in conclusione si vede che le accuracy e' nettamente superiore rispetto a Unet a conferma di quanto riportato in letteratura