Visualizzazione post con etichetta DeepLabV3. Mostra tutti i post
Visualizzazione post con etichetta DeepLabV3. Mostra tutti i post

lunedì 11 agosto 2025

Getaberget Image Segmentation

update : un articolo di Nature sullo stesso argomento

Usando i dati del post precedente ho provate le reti neurali Unet, U2Net e Deeplab per vedere quale si comportava meglio nella segmentazione 

Il progetto e' complesso e non e' possibile inserirlo in un post. E' stato quindi quindi creato un apposito repository Github. i files di training e file .h5 dei modelli sono troppo grandi per l'hosting di Github (circa 2.8 Gb) e sono depositati sul mio GDrive.

Di seguito il confronto tra l'immagine di partenza, i risultati dei tre algoritmi di segmentazione ed la machera generata in modo manuale per l'addrestramente. Da notare come il risultato migliore sia in DeepLab V3+ ma che comunque tutti gli algoritmi abbiamo trovato delle fratture nell'immagine che non erano presenti nella maschera di training

Immagine di partenza


Segmentazione U2Net

Segmentazione Unet

DeepLab V3+


Maschera di training

per finire vediamo come DeepLab V3+ si comporta come due immagini del dataset di test, ovvero che non e' mai stata usata in fase di training. Direi che e' soddisfacente










mercoledì 6 agosto 2025

DeepLab V3+ su carote di sondaggio

Usando lo stesso dataset e le stesse maschere  di training del precedente post ho provato la rete DeepLab V3+ 



Computazionalmente DeepLab e' risultata piu' impegnativa di Unet ma i risultati, come si vede dall'immagine di confronto soprastante, sono decisamente migliori in particolare per quanto riguarda i falsi positivi

pip install torch torchvision albumentations opencv-python matplotlib


import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt

# -------- CONFIG --------
IMAGE_DIR = "data/images"
MASK_DIR = "data/masks"
NUM_CLASSES = 4
EPOCHS = 20
BATCH_SIZE = 4
IMG_SIZE = 512
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# ------------------------

# Map pixel values to class indices
PIXEL_TO_CLASS = {
0: 0, # background
85: 1, # sample
170: 2, # joint
255: 3 # strata
}

def convert_mask(mask):
"""Map 8-bit pixel values to class indices (0-3)."""
new_mask = np.zeros_like(mask, dtype=np.uint8)
for pixel_val, class_idx in PIXEL_TO_CLASS.items():
new_mask[mask == pixel_val] = class_idx
return new_mask

class SegmentationDataset(Dataset):
def __init__(self, image_dir, mask_dir, transform=None):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.filenames = [f for f in os.listdir(image_dir) if f.endswith(".png")]

def __len__(self):
return len(self.filenames)

def __getitem__(self, idx):
img_path = os.path.join(self.image_dir, self.filenames[idx])
mask_path = os.path.join(self.mask_dir, self.filenames[idx])

image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
mask = convert_mask(mask)

if self.transform:
augmented = self.transform(image=image, mask=mask)
image = augmented["image"]
mask = augmented["mask"]

return image, mask.long()

def get_transforms():
return A.Compose([
A.Resize(IMG_SIZE, IMG_SIZE),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2()
])

def train():
# Load dataset
transform = get_transforms()
dataset = SegmentationDataset(IMAGE_DIR, MASK_DIR, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Load model
model = models.segmentation.deeplabv3_resnet50(weights=None, num_classes=NUM_CLASSES)
model = model.to(DEVICE)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(EPOCHS):
model.train()
total_loss = 0
for images, masks in dataloader:
images, masks = images.to(DEVICE), masks.to(DEVICE)

outputs = model(images)["out"]
loss = criterion(outputs, masks)

optimizer.zero_grad()
loss.backward()
optimizer.step()

total_loss += loss.item()

print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss:.4f}")

# Save model
torch.save(model.state_dict(), "deeplabv3_model.pth")

# Visualize prediction on 1 sample
visualize_prediction(model, dataset)

def visualize_prediction(model, dataset):
model.eval()
image, mask = dataset[0]
with torch.no_grad():
input_tensor = image.unsqueeze(0).to(DEVICE)
output = model(input_tensor)["out"]
pred = torch.argmax(output.squeeze(), dim=0).cpu().numpy()

# Convert back to pixel values
class_to_pixel = {v: k for k, v in PIXEL_TO_CLASS.items()}
pred_mask_rgb = np.vectorize(class_to_pixel.get)(pred)
true_mask_rgb = np.vectorize(class_to_pixel.get)(mask.numpy())

plt.figure(figsize=(12, 5))
plt.subplot(1, 3, 1)
plt.title("Input Image")
plt.imshow(image.permute(1, 2, 0).cpu())

plt.subplot(1, 3, 2)
plt.title("Prediction")
plt.imshow(pred_mask_rgb, cmap="jet", vmin=0, vmax=255)

plt.subplot(1, 3, 3)
plt.title("Ground Truth")
plt.imshow(true_mask_rgb, cmap="jet", vmin=0, vmax=255)

plt.tight_layout()
plt.show()

if __name__ == "__main__":
train()



sabato 20 aprile 2024

Frane con rete DeepLabV3

Aggiornamento

Per usare la GPU T4 sono migrato su Google Colab

Per avere Keras 3 sono state fatte le seguenti modifiche

!pip install keras --upgrade --quiet
!pip install keras-preprocessing==1.0.6
import keras
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

inoltre su Colab non sono riuscito a salvare il modello in .keras o in .hd5.Per questo motivo ho usato il formato dati legacy

tf.saved_model.save(model,"/content/drive/MyDrive/UAV/salvataggio/")

Su Colab sono riuscito a portare il training fino a 25 epochs. Si nota come la T4 sia nettamente superiore al M1








------------------------------------------------------------------------


Nel post precedente avevo trovato che il letteratura i risultati migliori per la segmentazione delle frane e' stati ottenuti con la rete DeepLabV3. 

Ho usato l'esempio a questo link per provare con lo stesso dataset del post precedente. Attenzione, e' necessario utilizzare Keras 3


questo e' il codice leggermente riadattato per puntare alle immagini delle frane

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import keras
from keras import layers
from keras import ops

import os
import numpy as np
from glob import glob
import cv2
from scipy.io import loadmat
import matplotlib.pyplot as plt

# For data preprocessing
get_ipython().system('pip install tensorflow')
from tensorflow import image as tf_image
from tensorflow import data as tf_data
from tensorflow import io as tf_io


# In[2]:


IMAGE_SIZE = 512
BATCH_SIZE = 4
NUM_CLASSES = 2
#DATA_DIR = "./1/instance-level_human_parsing/instance-level_human_parsing/Training"
DATA_DIR = "./UAV"

NUM_TRAIN_IMAGES = 900
NUM_VAL_IMAGES = 100

train_images = sorted(glob(os.path.join(DATA_DIR, "img/*")))[:NUM_TRAIN_IMAGES]
train_masks = sorted(glob(os.path.join(DATA_DIR, "mask/*")))[:NUM_TRAIN_IMAGES]
val_images = sorted(glob(os.path.join(DATA_DIR, "img/*")))[
NUM_TRAIN_IMAGES : NUM_VAL_IMAGES + NUM_TRAIN_IMAGES
]
val_masks = sorted(glob(os.path.join(DATA_DIR, "mask/*")))[
NUM_TRAIN_IMAGES : NUM_VAL_IMAGES + NUM_TRAIN_IMAGES
]


def read_image(image_path, mask=False):
image = tf_io.read_file(image_path)
if mask:
image = tf_image.decode_png(image, channels=1)
image.set_shape([None, None, 1])
image = tf_image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
else:
image = tf_image.decode_png(image, channels=3)
image.set_shape([None, None, 3])
image = tf_image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
return image


def load_data(image_list, mask_list):
image = read_image(image_list)
mask = read_image(mask_list, mask=True)
return image, mask


def data_generator(image_list, mask_list):
dataset = tf_data.Dataset.from_tensor_slices((image_list, mask_list))
dataset = dataset.map(load_data, num_parallel_calls=tf_data.AUTOTUNE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
return dataset


train_dataset = data_generator(train_images, train_masks)
val_dataset = data_generator(val_images, val_masks)

print("Train Dataset:", train_dataset)
print("Val Dataset:", val_dataset)


# In[3]:


def convolution_block(
block_input,
num_filters=256,
kernel_size=3,
dilation_rate=1,
use_bias=False,
):
x = layers.Conv2D(
num_filters,
kernel_size=kernel_size,
dilation_rate=dilation_rate,
padding="same",
use_bias=use_bias,
kernel_initializer=keras.initializers.HeNormal(),
)(block_input)
x = layers.BatchNormalization()(x)
return ops.nn.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
dims = dspp_input.shape
x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
x = convolution_block(x, kernel_size=1, use_bias=True)
out_pool = layers.UpSampling2D(
size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]),
interpolation="bilinear",
)(x)

out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
output = convolution_block(x, kernel_size=1)
return output


# In[4]:


def DeeplabV3Plus(image_size, num_classes):
model_input = keras.Input(shape=(image_size, image_size, 3))
preprocessed = keras.applications.resnet50.preprocess_input(model_input)
resnet50 = keras.applications.ResNet50(
weights="imagenet", include_top=False, input_tensor=preprocessed
)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DilatedSpatialPyramidPooling(x)

input_a = layers.UpSampling2D(
size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
interpolation="bilinear",
)(x)
input_b = resnet50.get_layer("conv2_block3_2_relu").output
input_b = convolution_block(input_b, num_filters=48, kernel_size=1)

x = layers.Concatenate(axis=-1)([input_a, input_b])
x = convolution_block(x)
x = convolution_block(x)
x = layers.UpSampling2D(
size=(image_size // x.shape[1], image_size // x.shape[2]),
interpolation="bilinear",
)(x)
model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs=model_input, outputs=model_output)


model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()


# In[5]:


loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss=loss,
metrics=["accuracy"],
)

history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

plt.plot(history.history["loss"])
plt.title("Training Loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.show()

plt.plot(history.history["accuracy"])
plt.title("Training Accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.show()

plt.plot(history.history["val_loss"])
plt.title("Validation Loss")
plt.ylabel("val_loss")
plt.xlabel("epoch")
plt.show()

plt.plot(history.history["val_accuracy"])
plt.title("Validation Accuracy")
plt.ylabel("val_accuracy")
plt.xlabel("epoch")
plt.show()


# In[6]:


# Loading the Colormap
colormap = loadmat(
"./1/instance-level_human_parsing/instance-level_human_parsing/human_colormap.mat"
)["colormap"]
colormap = colormap * 100
colormap = colormap.astype(np.uint8)


def infer(model, image_tensor):
predictions = model.predict(np.expand_dims((image_tensor), axis=0))
predictions = np.squeeze(predictions)
predictions = np.argmax(predictions, axis=2)
return predictions


def decode_segmentation_masks(mask, colormap, n_classes):
r = np.zeros_like(mask).astype(np.uint8)
g = np.zeros_like(mask).astype(np.uint8)
b = np.zeros_like(mask).astype(np.uint8)
for l in range(0, n_classes):
idx = mask == l
r[idx] = colormap[l, 0]
g[idx] = colormap[l, 1]
b[idx] = colormap[l, 2]
rgb = np.stack([r, g, b], axis=2)
return rgb


def get_overlay(image, colored_mask):
image = keras.utils.array_to_img(image)
image = np.array(image).astype(np.uint8)
overlay = cv2.addWeighted(image, 0.35, colored_mask, 0.65, 0)
return overlay


def plot_samples_matplotlib(display_list, figsize=(5, 3)):
_, axes = plt.subplots(nrows=1, ncols=len(display_list), figsize=figsize)
for i in range(len(display_list)):
if display_list[i].shape[-1] == 3:
axes[i].imshow(keras.utils.array_to_img(display_list[i]))
else:
axes[i].imshow(display_list[i])
plt.show()


def plot_predictions(images_list, colormap, model):
for image_file in images_list:
image_tensor = read_image(image_file)
prediction_mask = infer(image_tensor=image_tensor, model=model)
prediction_colormap = decode_segmentation_masks(prediction_mask, colormap, 20)
overlay = get_overlay(image_tensor, prediction_colormap)
plot_samples_matplotlib(
[image_tensor, overlay, prediction_colormap], figsize=(18, 14)
)


# In[7]:


plot_predictions(train_images[:4], colormap, model=model)


# ### Inference on Validation Images
#
# You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/deeplabv3p-resnet50)
# and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/Human-Part-Segmentation).

# In[8]:


plot_predictions(val_images[:4], colormap, model=model)


# In[12]:


get_ipython().system('pip install ipython')
get_ipython().system('mkdir -p saved_model_2')
model.save('saved_model_2/landslide.keras')


Anche su M1 il calcolo e' molto lento....sarebbe stati necessari piu' di 10 epochs per trovare l'asintoto della Loss ma ogni epoch ha impiegato ogni 25 minuti per circa 1000 immagini







in conclusione si vede che le accuracy e' nettamente superiore rispetto a Unet a conferma di quanto riportato in letteratura













Analisi MNF su spettri di riflettanza di plastica

Devo cerca di lavorare su spettri di riflettanza di plastica e la prima domanda e': quale sono le bande significative? Sono partito dal ...