Ho provato a vedere se riuscivo ad addestrare una rete neurale Unet a riconoscere gli strati geologici in una foto. Non avendo dei dati addestrati ho preso delle foto storiche https://www.bgs.ac.uk/geological-data/opengeoscience/ selezionando per semplicita' affioramenti di arenarie (dataset finale di 116 immagini)
Tramite Labelme installato via docker
xhost +local:docker # Allow Docker to use your X server
docker run -it \
--rm \
--env="DISPLAY" \
--env="QT_X11_NO_MITSHM=1" \
--volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
--volume="/home/luca:/home/user" \
wkentaro/labelme
ho disegnato delle linestrip
Tramite un programma in Python il file Json di Labelme e' stato convertito in una maschera per Unet aggiungendo un buffer di 10 attorno alla linestrip

import json
import numpy as np
import cv2
import os
from PIL import Image, ImageDraw
def labelme_json_to_mask(json_path, output_dir, line_thickness=3): # Removed label_to_id as it's not needed for binary
"""
Converts a LabelMe JSON file into a BINARY segmentation mask image.
All annotated objects will be treated as the foreground class (value 255).
Args:
json_path (str): Path to the LabelMe JSON file.
output_dir (str): Directory where the generated mask image will be saved.
line_thickness (int): For 'linestrip' shapes, defines the thickness
(in pixels) of the line to be drawn as a mask.
Set to 0 or None to ignore linestrips.
Returns:
numpy.ndarray: The generated binary mask image.
"""
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
img_height = data['imageHeight']
img_width = data['imageWidth']
# Initialize a blank mask with zeros (background)
mask = np.zeros((img_height, img_width), dtype=np.uint8)
# All annotated objects will be assigned this value
foreground_value = 255
# Iterate through shapes and draw them on the mask
for shape in data['shapes']:
label = shape['label'] # Even though we have one class, the label is still in JSON
points = shape['points']
shape_type = shape['shape_type']
# Create a temporary single-object mask using PIL for precise polygon filling
temp_mask_pil = Image.new('L', (img_width, img_height), 0) # 'L' for 8-bit pixels, black and white
draw = ImageDraw.Draw(temp_mask_pil)
if shape_type == 'polygon':
flat_points = [tuple(p) for p in points]
draw.polygon(flat_points, fill=1)
elif shape_type == 'rectangle':
x1, y1 = points[0]
x2, y2 = points[1]
draw.rectangle([x1, y1, x2, y2], fill=1)
elif shape_type == 'circle':
center_x, center_y = points[0]
radius_x, radius_y = points[1]
radius = np.sqrt((radius_x - center_x)**2 + (radius_y - center_y)**2)
bbox = [center_x - radius, center_y - radius, center_x + radius, center_y + radius]
draw.ellipse(bbox, fill=1)
elif shape_type == 'linestrip' and line_thickness > 0:
line_points = [tuple(map(int, p)) for p in points]
draw.line(line_points, fill=1, width=line_thickness)
elif shape_type == 'point':
print(f"Warning: Skipping shape type '{shape_type}' for label '{label}' in {json_path}")
continue
elif shape_type == 'mask':
print(f"Warning: Direct 'mask' shape type detected. This script currently handles polygon/rectangle/circle/linestrip by drawing. If your 'mask' type contains encoded mask data, you'll need to add decoding logic.")
continue
else:
print(f"Warning: Unsupported shape type '{shape_type}' for label '{label}' in {json_path}. Skipping.")
continue
temp_mask_np = np.array(temp_mask_pil, dtype=np.uint8) # Convert PIL mask to NumPy array
# Assign the foreground_value to pixels covered by the current shape
mask[temp_mask_np == 1] = foreground_value
# Construct output file path
base_filename = os.path.splitext(os.path.basename(json_path))[0]
output_filename = f"{base_filename}.png"
output_path = os.path.join(output_dir, output_filename)
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Save the mask
cv2.imwrite(output_path, mask) # OpenCV is good for saving grayscale PNGs
print(f"Generated mask for {os.path.basename(json_path)} saved to {output_path}")
return mask
if __name__ == "__main__":
masks_output_dir = "masks"
for root, _, files in os.walk("./"):
for file in files:
if file.endswith(".json"):
binary_mask = labelme_json_to_mask(file, masks_output_dir, line_thickness=5l)
infine la rete e' stata fatta girare su Colab
# -*- coding: utf-8 -*-
"""UNET.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/17TgxQ2MrR-xbeWzsHO86Cu4hOieY5o1P
"""
!pip install opencv-python-headless scikit-learn matplotlib
def augment(image, mask):
# Random flip
if tf.random.uniform(()) > 0.5:
image = tf.image.flip_left_right(image)
mask = tf.image.flip_left_right(mask)
# Random rotation (0°, 90°, 180°, 270°)
k = tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)
image = tf.image.rot90(image, k)
mask = tf.image.rot90(mask, k)
# Random brightness
image = tf.image.random_brightness(image, max_delta=0.2)
return image, mask
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import glob
import os
IMAGE_SIZE = 256
def load_images(path, size=IMAGE_SIZE):
images = []
for file in sorted(glob.glob(os.path.join(path, '*.png'))):
img = cv2.imread(file)
img = cv2.resize(img, (size, size))
images.append(img)
return np.array(images) / 255.0
def load_masks(path, size=IMAGE_SIZE):
masks = []
for file in sorted(glob.glob(os.path.join(path, '*.png'))):
mask = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
mask = cv2.resize(mask, (size, size))
mask = (mask > 127).astype(np.float32)
masks.append(mask[..., np.newaxis])
return np.array(masks)
from google.colab import drive
drive.mount('/content/drive')
drive_dataset_path = '/content/drive/MyDrive/unet/dataset'
images = load_images(os.path.join(drive_dataset_path, 'images'))
masks = load_masks(os.path.join(drive_dataset_path, 'masks_5'))
print("Images shape:", images.shape)
print("Masks shape:", masks.shape)
x_train, x_val, y_train, y_val = train_test_split(images, masks, test_size=0.2, random_state=42)
# @title Testo del titolo predefinito
import tensorflow as tf
from tensorflow.keras import layers, models
def conv_block(x, filters):
x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
return x
def encoder_block(x, filters):
f = conv_block(x, filters)
p = layers.MaxPooling2D((2, 2))(f)
return f, p
def decoder_block(x, skip, filters):
x = layers.Conv2DTranspose(filters, 2, strides=2, padding='same')(x)
x = layers.Concatenate()([x, skip])
x = conv_block(x, filters)
return x
def build_unet(input_shape=(256, 256, 3)):
inputs = layers.Input(input_shape)
s1, p1 = encoder_block(inputs, 64)
s2, p2 = encoder_block(p1, 128)
s3, p3 = encoder_block(p2, 256)
s4, p4 = encoder_block(p3, 512)
b = conv_block(p4, 1024)
d1 = decoder_block(b, s4, 512)
d2 = decoder_block(d1, s3, 256)
d3 = decoder_block(d2, s2, 128)
d4 = decoder_block(d3, s1, 64)
outputs = layers.Conv2D(1, 1, activation='sigmoid')(d4)
return models.Model(inputs, outputs)
BATCH_SIZE = 16
BUFFER_SIZE = 100
# Convert to tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32), tf.cast(y, tf.float32)))
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.map(lambda x, y: (tf.cast(x, tf.float32), tf.cast(y, tf.float32)))
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
model = build_unet()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=10
)
import matplotlib.pyplot as plt
def show_sample(idx):
image = x_val[idx]
true_mask = y_val[idx].squeeze()
pred_mask = model.predict(image[np.newaxis, ...])[0].squeeze() > 0.04
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
axs[0].imshow(image)
axs[0].set_title("Image")
axs[1].imshow(true_mask, cmap='gray')
axs[1].set_title("True Mask")
axs[2].imshow(pred_mask, cmap='gray')
axs[2].set_title("Predicted Mask")
plt.show()
show_sample(9)
from google.colab import files
model.save("lineation_unet.h5")
files.download("lineation_unet.h5")
Questi sono alcuni risultati....pensavo di riuscire a fare qualcosa di meglio