Visualizzazione post con etichetta TensorFlow. Mostra tutti i post
Visualizzazione post con etichetta TensorFlow. Mostra tutti i post

domenica 15 febbraio 2026

Marida dataset

Kikaki K, Kakogeorgiou I, Mikeli P, Raitsos DE, Karantzalos K (2022) MARIDA: A benchmark for Marine Debris detection from Sentinel-2 remote sensing data. PLoS ONE 17(1): e0262247. https://doi.org/10.1371/journal.pone.0262247

///////////////////////////////////////////  

MARIDA e' un dataset di 1381 immagini Sentinel 2 di dati classificati a libero download che puo' essere usato per testare gli algoritmi

 

Si puo' scaricare da questo link https://www.kaggle.com/datasets/weinima/marida o https://zenodo.org/records/5151941

Nel folder patches ci sono geotiff a 11 bande di dimensione 256x256 pixel ed un file con lo stesso nome *_cl.tif in cui e' indicata la classificazione dei pixel della patch multispettrale 

Classi

1: Marine Debris
2: Dense Sargassum
3: Sparse Sargassum
4: Natural Organic Material
5: Ship
6: Clouds
7: Marine Water
8: Sediment-Laden Water
9: Foam
10: Turbid Water
11: Shallow Water
12: Waves
13: Cloud Shadows
14: Wakes
15: Mixed Water
 

 


 come si vede la distribuzione del numero di elementi in ogni classe e' estremamente variabile


 


Per addestrare una rete random forest mi sono create un csv in cui per ogni riga sono riportati i valori delle 11 bande 

import rasterio
from pathlib import Path
import os

#awk -F, '{counts[$NF]++} END {for (val in counts) print val, counts[val]}' yourfile.csv | sort -n

csv_string =""

folder_path = './train/'
files = [f for f in os.listdir(folder_path)
if f.endswith('.tif') and not f.endswith('_cl.tif')]

for filename in files:
base_name = Path(filename).stem
print(base_name)

for r in range(255):
for c in range(255):
with rasterio.open(folder_path+ base_name+".tif") as src:
# Read all bands
data = src.read()
pixel_values = data[:, r, c]
csv_string = ",".join(map(str, pixel_values))
#print(csv_string)
# apre la maschera di categoria
with rasterio.open(folder_path+base_name+"_cl.tif") as src:
data = src.read()
pixel_cat = data[:, r, c]
pp = str(int(pixel_cat[0]))
csv_string = csv_string+","+ pp
print(csv_string)


Uno dei problemi e' che la classe 0 da sola rappresenta il 99% dei dati

0 740486
1 73
2 40
3 42
5 200
6 16
7 223
8 13828
9 2
10 120
11 67
13 27
14 254
15 5

Proviamo ad usare il file CSV per istruire una rete random forest (impiegato Google Colab)

 

from google.colab import drive
drive.mount('/content/drive')

# Install the matching pair for stability
!pip install tensorflow-decision-forests wurlitzer
import pandas as pd
import tensorflow as tf
import tensorflow_decision_forests as tfdf
from sklearn.model_selection import train_test_split
import os

# Force Keras 2 usage for TF-DF compatibility
os.environ['TF_USE_LEGACY_KERAS'] = '1'

# Verify the versions
print(f"TensorFlow version: {tf.__version__}")
print(f"TF-DF version: {tfdf.__version__}")

# 1. Load Data
df = pd.read_csv('/content/drive/My Drive/spettri3.csv')

# categoria nell'ultima colonna
LABEL = df.columns[-1]

# 2. Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# 3. Convert to TF Dataset
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label=LABEL)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_df, label=LABEL)

# 4. Create and Train Random Forest
model = tfdf.keras.RandomForestModel(task=tfdf.keras.Task.CLASSIFICATION)
model.compile(metrics=["accuracy"])

print("Starting Training...")
model.fit(train_ds)

# 5. Summary and Evaluation
print("\n--- Model Summary ---")
model.summary()

evaluation = model.evaluate(test_ds, return_dict=True)
print(f"\nTest Accuracy: {evaluation['accuracy']:.4f}")

# 6. Save the Model
model.save("exported_model")
print("Model saved to /app/exported_model")
import numpy as np
from sklearn.metrics import classification_report

# Get predictions on the test dataset
# The predict method returns probabilities for each class
predictions_prob = model.predict(test_ds)

# Convert probabilities to predicted class labels
predicted_labels = np.argmax(predictions_prob, axis=1)

# Extract true labels from the test_df
# Assuming LABEL was defined as the last column of df
true_labels = test_df[LABEL].values

# Get unique class labels (categories)
class_labels = np.unique(true_labels)

# Generate a classification report to show precision, recall, and F1-score for each class
print("\n--- Classification Report per Category ---")
print(classification_report(true_labels, predicted_labels, target_names=[str(c) for c in class_labels]))

# You can also manually calculate accuracy per class if preferred
print("\n--- Accuracy per Category ---")
for cls in class_labels:
idx = (true_labels == cls)
correct_predictions_for_cls = (predicted_labels[idx] == cls).sum()
total_predictions_for_cls = idx.sum()
if total_predictions_for_cls > 0:
accuracy_for_cls = correct_predictions_for_cls / total_predictions_for_cls
print(f"Category {cls}: Accuracy = {accuracy_for_cls:.4f}")
else:
print(f"Category {cls}: No true instances in test set.")
 
 

  

--- Model Summary ---
Model: "random_forest_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
=================================================================
Total params: 1 (1.00 Byte)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 1 (1.00 Byte)
_________________________________________________________________
Type: "RANDOM_FOREST"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (11):
	0.0111599155
	0.01483216
	0.01739901
	0.018406885
	0.018967822
	0.02049054
	0.020988442
	0.021692224
	0.026393838
	0.033805072
	0.036814593

No weights

Variable Importance: INV_MEAN_MIN_DEPTH:
    1.  "0.021692224"  0.278557 ################
    2.  "0.036814593"  0.164558 #####
    3.  "0.026393838"  0.146458 ###
    4.   "0.01483216"  0.134337 ##
    5.   "0.02049054"  0.125129 #
    6.  "0.018406885"  0.124548 #
    7.   "0.01739901"  0.119835 #
    8.  "0.033805072"  0.116388 #
    9.  "0.018967822"  0.114600 
   10.  "0.020988442"  0.105245 
   11. "0.0111599155"  0.105237 

Variable Importance: NUM_AS_ROOT:
    1. "0.021692224" 138.000000 ################
    2. "0.026393838" 82.000000 #########
    3. "0.033805072" 52.000000 #####
    4. "0.018406885" 17.000000 #
    5. "0.018967822" 10.000000 #
    6. "0.020988442"  1.000000 

Variable Importance: NUM_NODES:
    1.  "0.036814593" 33033.000000 ################
    2.   "0.01483216" 24259.000000 ######
    3.  "0.018406885" 23259.000000 #####
    4.  "0.020988442" 21752.000000 ###
    5.   "0.02049054" 21560.000000 ###
    6. "0.0111599155" 20757.000000 ##
    7.  "0.026393838" 20511.000000 #
    8.  "0.018967822" 20197.000000 #
    9.   "0.01739901" 19841.000000 #
   10.  "0.033805072" 18931.000000 
   11.  "0.021692224" 18752.000000 

Variable Importance: SUM_SCORE:
    1.  "0.021692224" 6680292.214150 ################
    2.  "0.026393838" 3547615.844493 ########
    3.  "0.033805072" 2038345.698294 ####
    4.  "0.036814593" 1304726.855687 ##
    5.  "0.018406885" 1110520.264613 #
    6.   "0.01483216" 836882.331232 #
    7.  "0.018967822" 783745.709187 
    8.   "0.02049054" 776627.158520 
    9.  "0.020988442" 530432.154355 
   10.   "0.01739901" 456566.598678 
   11. "0.0111599155" 401849.939701 



Winner takes all: true
Out-of-bag evaluation: accuracy:0.998054 logloss:0.0105165
Number of trees: 300
Total number of nodes: 486004

Number of nodes by tree:
Count: 300 Average: 1620.01 StdDev: 76.5775
Min: 1423 Max: 1799 Ignored: 0
----------------------------------------------
[ 1423, 1441)  2   0.67%   0.67%
[ 1441, 1460)  6   2.00%   2.67% #
[ 1460, 1479)  4   1.33%   4.00% #
[ 1479, 1498)  8   2.67%   6.67% ##
[ 1498, 1517)  9   3.00%   9.67% ##
[ 1517, 1536) 10   3.33%  13.00% ##
[ 1536, 1554) 16   5.33%  18.33% ####
[ 1554, 1573) 19   6.33%  24.67% #####
[ 1573, 1592) 26   8.67%  33.33% ######
[ 1592, 1611) 41  13.67%  47.00% ##########
[ 1611, 1630) 34  11.33%  58.33% ########
[ 1630, 1649) 24   8.00%  66.33% ######
[ 1649, 1668) 20   6.67%  73.00% #####
[ 1668, 1686) 19   6.33%  79.33% #####
[ 1686, 1705) 13   4.33%  83.67% ###
[ 1705, 1724) 18   6.00%  89.67% ####
[ 1724, 1743) 11   3.67%  93.33% ###
[ 1743, 1762) 12   4.00%  97.33% ###
[ 1762, 1781)  5   1.67%  99.00% #
[ 1781, 1799]  3   1.00% 100.00% #

Depth by leafs:
Count: 243152 Average: 12.0108 StdDev: 2.39733
Min: 3 Max: 15 Ignored: 0
----------------------------------------------
[  3,  4)   128   0.05%   0.05%
[  4,  5)   389   0.16%   0.21%
[  5,  6)  1084   0.45%   0.66%
[  6,  7)  2770   1.14%   1.80% #
[  7,  8)  6288   2.59%   4.38% #
[  8,  9) 11242   4.62%   9.01% ##
[  9, 10) 17797   7.32%  16.33% ####
[ 10, 11) 24395  10.03%  26.36% #####
[ 11, 12) 30327  12.47%  38.83% #######
[ 12, 13) 34392  14.14%  52.98% #######
[ 13, 14) 35111  14.44%  67.42% ########
[ 14, 15) 32791  13.49%  80.90% #######
[ 15, 15] 46438  19.10% 100.00% ##########

Number of training obs by leaf:
Count: 243152 Average: 745.589 StdDev: 6136.2
Min: 5 Max: 219057 Ignored: 0
----------------------------------------------
[      5,  10957) 239559  98.52%  98.52% ##########
[  10957,  21910)   1503   0.62%  99.14%
[  21910,  32862)    888   0.37%  99.51%
[  32862,  43815)    355   0.15%  99.65%
[  43815,  54768)    235   0.10%  99.75%
[  54768,  65720)    109   0.04%  99.79%
[  65720,  76673)     68   0.03%  99.82%
[  76673,  87626)     68   0.03%  99.85%
[  87626,  98578)     62   0.03%  99.87%
[  98578, 109531)    112   0.05%  99.92%
[ 109531, 120484)     56   0.02%  99.94%
[ 120484, 131436)     24   0.01%  99.95%
[ 131436, 142389)     32   0.01%  99.97%
[ 142389, 153342)     19   0.01%  99.97%
[ 153342, 164294)     18   0.01%  99.98%
[ 164294, 175247)     14   0.01%  99.99%
[ 175247, 186200)     20   0.01% 100.00%
[ 186200, 197152)      5   0.00% 100.00%
[ 197152, 208105)      2   0.00% 100.00%
[ 208105, 219057]      3   0.00% 100.00%

Attribute in nodes:
	33033 : 0.036814593 [NUMERICAL]
	24259 : 0.01483216 [NUMERICAL]
	23259 : 0.018406885 [NUMERICAL]
	21752 : 0.020988442 [NUMERICAL]
	21560 : 0.02049054 [NUMERICAL]
	20757 : 0.0111599155 [NUMERICAL]
	20511 : 0.026393838 [NUMERICAL]
	20197 : 0.018967822 [NUMERICAL]
	19841 : 0.01739901 [NUMERICAL]
	18931 : 0.033805072 [NUMERICAL]
	18752 : 0.021692224 [NUMERICAL]

Attribute in nodes with depth <= 0:
	138 : 0.021692224 [NUMERICAL]
	82 : 0.026393838 [NUMERICAL]
	52 : 0.033805072 [NUMERICAL]
	17 : 0.018406885 [NUMERICAL]
	10 : 0.018967822 [NUMERICAL]
	1 : 0.020988442 [NUMERICAL]

Attribute in nodes with depth <= 1:
	380 : 0.021692224 [NUMERICAL]
	122 : 0.026393838 [NUMERICAL]
	83 : 0.01739901 [NUMERICAL]
	72 : 0.033805072 [NUMERICAL]
	57 : 0.036814593 [NUMERICAL]
	54 : 0.018406885 [NUMERICAL]
	51 : 0.018967822 [NUMERICAL]
	38 : 0.02049054 [NUMERICAL]
	26 : 0.020988442 [NUMERICAL]
	10 : 0.01483216 [NUMERICAL]
	7 : 0.0111599155 [NUMERICAL]

Attribute in nodes with depth <= 2:
	581 : 0.021692224 [NUMERICAL]
	229 : 0.02049054 [NUMERICAL]
	219 : 0.01483216 [NUMERICAL]
	193 : 0.018406885 [NUMERICAL]
	186 : 0.01739901 [NUMERICAL]
	154 : 0.036814593 [NUMERICAL]
	150 : 0.026393838 [NUMERICAL]
	117 : 0.018967822 [NUMERICAL]
	100 : 0.020988442 [NUMERICAL]
	92 : 0.033805072 [NUMERICAL]
	79 : 0.0111599155 [NUMERICAL]

Attribute in nodes with depth <= 3:
	766 : 0.021692224 [NUMERICAL]
	633 : 0.036814593 [NUMERICAL]
	514 : 0.02049054 [NUMERICAL]
	503 : 0.01483216 [NUMERICAL]
	345 : 0.01739901 [NUMERICAL]
	300 : 0.020988442 [NUMERICAL]
	288 : 0.018406885 [NUMERICAL]
	284 : 0.018967822 [NUMERICAL]
	277 : 0.0111599155 [NUMERICAL]
	272 : 0.026393838 [NUMERICAL]
	190 : 0.033805072 [NUMERICAL]

Attribute in nodes with depth <= 5:
	2877 : 0.036814593 [NUMERICAL]
	1895 : 0.01483216 [NUMERICAL]
	1725 : 0.02049054 [NUMERICAL]
	1541 : 0.021692224 [NUMERICAL]
	1244 : 0.01739901 [NUMERICAL]
	1210 : 0.018406885 [NUMERICAL]
	1146 : 0.020988442 [NUMERICAL]
	1127 : 0.026393838 [NUMERICAL]
	1069 : 0.0111599155 [NUMERICAL]
	1041 : 0.018967822 [NUMERICAL]
	878 : 0.033805072 [NUMERICAL]

Condition type in nodes:
	242852 : HigherCondition
Condition type in nodes with depth <= 0:
	300 : HigherCondition
Condition type in nodes with depth <= 1:
	900 : HigherCondition
Condition type in nodes with depth <= 2:
	2100 : HigherCondition
Condition type in nodes with depth <= 3:
	4372 : HigherCondition
Condition type in nodes with depth <= 5:
	15753 : HigherCondition
Node format: NOT_SET

Training OOB:
	trees: 1, Out-of-bag evaluation: accuracy:0.996575 logloss:0.123458
	trees: 9, Out-of-bag evaluation: accuracy:0.997327 logloss:0.0479432
	trees: 19, Out-of-bag evaluation: accuracy:0.99767 logloss:0.0254875
	trees: 29, Out-of-bag evaluation: accuracy:0.997806 logloss:0.0193881
	trees: 39, Out-of-bag evaluation: accuracy:0.997889 logloss:0.0171792
	trees: 49, Out-of-bag evaluation: accuracy:0.997902 logloss:0.0153532
	trees: 59, Out-of-bag evaluation: accuracy:0.997955 logloss:0.0144014
	trees: 69, Out-of-bag evaluation: accuracy:0.998018 logloss:0.0137202
	trees: 79, Out-of-bag evaluation: accuracy:0.998029 logloss:0.0131981
	trees: 89, Out-of-bag evaluation: accuracy:0.998024 logloss:0.0128051
	trees: 99, Out-of-bag evaluation: accuracy:0.998036 logloss:0.0124228
	trees: 109, Out-of-bag evaluation: accuracy:0.998042 logloss:0.0120357
	trees: 119, Out-of-bag evaluation: accuracy:0.998049 logloss:0.0117484
	trees: 129, Out-of-bag evaluation: accuracy:0.998054 logloss:0.0116494
	trees: 139, Out-of-bag evaluation: accuracy:0.998069 logloss:0.0113382
	trees: 149, Out-of-bag evaluation: accuracy:0.998067 logloss:0.0112786
	trees: 159, Out-of-bag evaluation: accuracy:0.998066 logloss:0.0111747
	trees: 169, Out-of-bag evaluation: accuracy:0.998064 logloss:0.0111785
	trees: 179, Out-of-bag evaluation: accuracy:0.998071 logloss:0.0109639
	trees: 189, Out-of-bag evaluation: accuracy:0.998066 logloss:0.0108541
	trees: 199, Out-of-bag evaluation: accuracy:0.998052 logloss:0.0107001
	trees: 209, Out-of-bag evaluation: accuracy:0.998041 logloss:0.0106965
	trees: 219, Out-of-bag evaluation: accuracy:0.998056 logloss:0.0106957
	trees: 229, Out-of-bag evaluation: accuracy:0.998066 logloss:0.0106392
	trees: 239, Out-of-bag evaluation: accuracy:0.998075 logloss:0.010582
	trees: 249, Out-of-bag evaluation: accuracy:0.998064 logloss:0.0105818
	trees: 259, Out-of-bag evaluation: accuracy:0.998056 logloss:0.0105275
	trees: 269, Out-of-bag evaluation: accuracy:0.998072 logloss:0.0105194
	trees: 279, Out-of-bag evaluation: accuracy:0.998059 logloss:0.0105219
	trees: 289, Out-of-bag evaluation: accuracy:0.998056 logloss:0.0105255
	trees: 299, Out-of-bag evaluation: accuracy:0.998049 logloss:0.010518
	trees: 300, Out-of-bag evaluation: accuracy:0.998054 logloss:0.0105165

152/152 [==============================] - 10s 65ms/step - loss: 0.0000e+00 - accuracy: 0.9979

Test Accuracy: 0.9979
WARNING:absl:`0.0111599155` is not a valid tf.function parameter name. Sanitizing to `arg_0_0111599155`.
WARNING:absl:`0.01483216` is not a valid tf.function parameter name. Sanitizing to `arg_0_01483216`.
WARNING:absl:`0.01739901` is not a valid tf.function parameter name. Sanitizing to `arg_0_01739901`.
WARNING:absl:`0.018406885` is not a valid tf.function parameter name. Sanitizing to `arg_0_018406885`.
WARNING:absl:`0.018967822` is not a valid tf.function parameter name. Sanitizing to `arg_0_018967822`.
Model saved to /app/exported_model

---------------------------------------------------------------------

L'accuratezza e' molto buona pero' vediamo nel dettaglio come ogni classe viene prevista

--------------------------------------------------------------------- 

 

--- Classification Report per Category ---
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    148127
           1       1.00      0.36      0.53        11
           2       0.75      0.86      0.80         7
           3       0.75      0.60      0.67        10
           5       1.00      0.63      0.78        41
           6       0.00      0.00      0.00         4
           7       1.00      0.09      0.16        35
           8       0.96      0.97      0.97      2744
          10       1.00      0.20      0.33        25
          11       0.90      0.64      0.75        14
          13       0.50      0.33      0.40         3
          14       0.93      0.24      0.38        55
          15       0.00      0.00      0.00         1

    accuracy                           1.00    151077
   macro avg       0.75      0.46      0.52    151077
weighted avg       1.00      1.00      1.00    151077

 

lunedì 25 agosto 2025

Borehole images segmentation

Sempre continuando l'avventura delle reti neurali applicate alla geologia ho trovato questo articolo 




The segmentation and intelligent recognition of structural surfaces in borehole images based on the U2-Net network Yu, Qingjun; Wang, Guannan; Cheng, Hai; Guo, Wenzhi; Liu, Yanbiao (2024).  che ha pubblicato il dataset come Creative Commons in due parti (parte 1, parte 2)

Le immagini in totale sono 468 (le ho divise in 468 per il training ed il rimanente come dataset di controllo) ma non risultano annotate

Per questo motivo le ho annotate usando LabelMe

LabelMe


I file Json di LabelMe sono stati poi convertiti in maschera (nero background, bianco classe)


import json
import numpy as np
import cv2
from PIL import Image
import argparse

def create_mask_from_json(json_file_path, image_width, image_height, output_path=None, buffer_size=5):
"""
Create a binary mask from JSON label file.
Args:
json_file_path (str): Path to the JSON label file
image_width (int): Width of the output mask
image_height (int): Height of the output mask
output_path (str, optional): Path to save the mask image
buffer_size (int): Buffer size around lines (line thickness/2)
Returns:
numpy.ndarray: Binary mask array
"""
# Load JSON data
with open(json_file_path, 'r') as f:
data = json.load(f)
# Create black mask (background)
mask = np.zeros((image_height, image_width), dtype=np.uint8)
# Process shapes/annotations in the JSON
shapes = data.get('shapes', [])
for shape in shapes:
# Check if label equals 1
label = shape.get('label', '')
if label == '1' or label == 1:
# Get shape type and points
shape_type = shape.get('shape_type', '')
points = shape.get('points', [])
if shape_type == 'linestrip' and points:
# Convert points to numpy array and ensure integer coordinates
pts = np.array(points, dtype=np.int32)
# Method 1: Simple line thickness
# Draw linestrip with specified thickness
for i in range(len(pts) - 1):
cv2.line(mask, tuple(pts[i]), tuple(pts[i + 1]), 255, thickness=buffer_size * 2 + 1)
# Method 2: Morphological dilation (uncomment to use instead)
# Draw thin lines first, then dilate
# temp_mask = np.zeros_like(mask)
# for i in range(len(pts) - 1):
# cv2.line(temp_mask, tuple(pts[i]), tuple(pts[i + 1]), 255, thickness=1)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (buffer_size*2+1, buffer_size*2+1))
# temp_mask = cv2.dilate(temp_mask, kernel, iterations=1)
# mask = cv2.bitwise_or(mask, temp_mask)
elif shape_type == 'polygon' and points:
# Handle polygon shapes if present
pts = np.array(points, dtype=np.int32)
cv2.fillPoly(mask, [pts], 255)
elif shape_type == 'rectangle' and points:
# Handle rectangle shapes if present
if len(points) >= 2:
pt1 = tuple(map(int, points[0]))
pt2 = tuple(map(int, points[1]))
cv2.rectangle(mask, pt1, pt2, 255, -1)
# Save mask if output path is provided
if output_path:
cv2.imwrite(output_path, mask)
print(f"Mask saved to: {output_path}")
return mask

def create_mask_from_labelme_json(json_file_path, output_path=None):
"""
Create mask from LabelMe format JSON file (automatically gets image dimensions).
Args:
json_file_path (str): Path to the JSON label file
output_path (str, optional): Path to save the mask image
Returns:
numpy.ndarray: Binary mask array
"""
# Load JSON data
with open(json_file_path, 'r') as f:
data = json.load(f)
# Get image dimensions from JSON
image_width = data.get('imageWidth', 640)
image_height = data.get('imageHeight', 480)
return create_mask_from_json(json_file_path, image_width, image_height, output_path)

# Example usage functions
def example_usage():
"""
Example of how to use the functions
"""
# Example 1: If you know the image dimensions
json_path = "labels.json"
width, height = 1920, 1080
mask = create_mask_from_json(json_path, width, height, "output_mask.png")
# Example 2: If using LabelMe format JSON (contains image dimensions)
# mask = create_mask_from_labelme_json("labelme_annotations.json", "mask.png")
print(f"Created mask with shape: {mask.shape}")
print(f"Mask contains {np.sum(mask == 255)} white pixels")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Create binary mask from JSON labels')
parser.add_argument('json_file', help='Path to JSON label file')
parser.add_argument('--width', type=int, help='Image width (required if not in JSON)')
parser.add_argument('--height', type=int, help='Image height (required if not in JSON)')
parser.add_argument('--output', '-o', help='Output mask file path')
parser.add_argument('--labelme', action='store_true', help='Use LabelMe format (gets dimensions from JSON)')
args = parser.parse_args()
try:
if args.labelme:
mask = create_mask_from_labelme_json(args.json_file, args.output)
else:
if not args.width or not args.height:
print("Error: Width and height are required when not using LabelMe format")
exit(1)
mask = create_mask_from_json(args.json_file, args.width, args.height, args.output)
print(f"Successfully created mask with shape: {mask.shape}")
except Exception as e:
print(f"Error: {e}")


lo script di training e' il seguente ed e' parente stretto di quello usato nel post

"""
train_deeplabv3p.py

Requirements:
- tensorflow >= 2.8 (tested on TF 2.10+)
- matplotlib
- opencv-python (cv2) optional if you want to preview images locally

Dataset layout expected:
fin/images/<name>.png (RGB)
fin/mask/<name>.png (grayscale, 0 for background, 127 for class)

Usage:
python train_deeplabv3p.py
"""

import os
import random
import glob
import math
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# ----------------------
# Config - tweak here
# ----------------------
IM_SIZE = (256, 256) # input size for training (height, width)
BATCH_SIZE = 8
EPOCHS = 30
AUTOTUNE = tf.data.AUTOTUNE
DATA_DIR = "./dataset"
IMAGE_DIR = os.path.join(DATA_DIR, "images")
MASK_DIR = os.path.join(DATA_DIR, "masks")
MODEL_SAVE = "coredrill_deeplabv3p.h5"
VAL_SPLIT = 0.15
SEED = 42
LEARNING_RATE = 1e-4
# ----------------------

class BinaryMeanIoU(tf.keras.metrics.MeanIoU):
def __init__(self, name="iou"):
super().__init__(num_classes=2, name=name)

def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.cast(y_pred > 0.5, tf.int32)
y_true = tf.cast(y_true, tf.int32)
return super().update_state(y_true, y_pred, sample_weight)
# ----------------------
# Utility: DeepLabV3+ model (MobileNetV2 backbone)
# ----------------------
def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1):
x = layers.SeparableConv2D(filters, kernel_size=kernel_size, strides=stride,
padding='same', dilation_rate=rate,
use_bias=False, name=prefix + '_sepconv')(x)
x = layers.BatchNormalization(name=prefix + '_bn')(x)
x = layers.ReLU(name=prefix + '_relu')(x)
return x

def ASPP(x, out_channels=256):
# Atrous Spatial Pyramid Pooling
b0 = layers.Conv2D(out_channels, 1, padding='same', use_bias=False)(x)
b0 = layers.BatchNormalization()(b0)
b0 = layers.ReLU()(b0)

b1 = layers.SeparableConv2D(out_channels, 3, padding='same', dilation_rate=6, use_bias=False)(x)
b1 = layers.BatchNormalization()(b1)
b1 = layers.ReLU()(b1)

b2 = layers.SeparableConv2D(out_channels, 3, padding='same', dilation_rate=12, use_bias=False)(x)
b2 = layers.BatchNormalization()(b2)
b2 = layers.ReLU()(b2)

b3 = layers.SeparableConv2D(out_channels, 3, padding='same', dilation_rate=18, use_bias=False)(x)
b3 = layers.BatchNormalization()(b3)
b3 = layers.ReLU()(b3)

# Image pooling branch
b4 = layers.GlobalAveragePooling2D()(x)
b4 = layers.Reshape((1, 1, -1))(b4)
b4 = layers.Conv2D(out_channels, 1, padding='same', use_bias=False)(b4)
b4 = layers.BatchNormalization()(b4)
b4 = layers.ReLU()(b4)
# Instead of using tf.shape(x), we upsample by a fixed scale factor
b4 = layers.UpSampling2D(size=(x.shape[1], x.shape[2]), interpolation='bilinear')(b4)

# Concatenate and project
x = layers.Concatenate()([b0, b1, b2, b3, b4])
x = layers.Conv2D(out_channels, 1, padding='same', use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
return x


def DeepLabV3Plus(input_shape=(256,256,3), num_classes=1, backbone='mobilenetv2'):
# Encoder (MobileNetV2)
base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
# Extract feature maps
# low-level feature for decoder
low_level = base_model.get_layer('block_3_expand_relu').output # example low-level
# high-level feature for ASPP
high_level = base_model.get_layer('block_13_expand_relu').output

# ASPP on high-level features
x = ASPP(high_level, out_channels=256)
x = layers.UpSampling2D(size=(4,4), interpolation='bilinear')(x) # scale to match low-level approx

# Process low-level
low = layers.Conv2D(48, 1, padding='same', use_bias=False)(low_level)
low = layers.BatchNormalization()(low)
low = layers.ReLU()(low)

# Concatenate
x = layers.Concatenate()([x, low])
x = SepConv_BN(x, 256, 'decoder_separable_conv0')
x = SepConv_BN(x, 256, 'decoder_separable_conv1')

# Upsample to input size
x = layers.UpSampling2D(size=(4,4), interpolation='bilinear')(x)
# Final conv
if num_classes == 1:
activation = 'sigmoid'
out_filters = 1
else:
activation = 'softmax'
out_filters = num_classes

x = layers.Conv2D(out_filters, 1, padding='same')(x)
x = layers.Activation(activation)(x)

model = tf.keras.Model(inputs=base_model.input, outputs=x)
return model

# ----------------------
# Data pipeline
# ----------------------
def list_pairs(image_dir, mask_dir):
# match by filename (without extension)
images = sorted(glob.glob(os.path.join(image_dir, "*")))
image_map = {os.path.splitext(os.path.basename(p))[0]: p for p in images}
masks = sorted(glob.glob(os.path.join(mask_dir, "*")))
mask_map = {os.path.splitext(os.path.basename(p))[0]: p for p in masks}
common = sorted(set(image_map.keys()).intersection(mask_map.keys()))
pairs = [(image_map[k], mask_map[k]) for k in common]
return pairs

def decode_image(path, target_size=IM_SIZE):
img = tf.io.read_file(path)
img = tf.image.decode_image(img, channels=3)
img.set_shape([None, None, 3])
img = tf.image.resize(img, target_size)
img = tf.cast(img, tf.float32) / 255.0
return img

def decode_mask(path, target_size=IM_SIZE):
m = tf.io.read_file(path)
m = tf.image.decode_image(m, channels=1) # single-channel if possible
m.set_shape([None, None, 1])
m = tf.image.resize(m, target_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
m = tf.cast(m, tf.float32)

# Normalize mask: works for either 0/127 or 0/255 style masks
# Any value >64 becomes 1, otherwise 0
m = tf.where(m > 64.0, 1.0, 0.0)
return m

def load_pair(image_path, mask_path):
image = decode_image(image_path)
mask = decode_mask(mask_path)
return image, mask

def augment(image, mask):
# simple augmentation: random flip and random brightness
if tf.random.uniform(()) > 0.5:
image = tf.image.flip_left_right(image)
mask = tf.image.flip_left_right(mask)
if tf.random.uniform(()) > 0.5:
image = tf.image.flip_up_down(image)
mask = tf.image.flip_up_down(mask)
if tf.random.uniform(()) > 0.5:
image = tf.image.random_brightness(image, max_delta=0.1)
return image, mask

def make_datasets(pairs, batch_size=BATCH_SIZE, val_split=VAL_SPLIT):
random.seed(SEED)
random.shuffle(pairs)
n = len(pairs)
n_val = max(1, int(n * val_split))
val_pairs = pairs[:n_val]
train_pairs = pairs[n_val:]

def gen(pairs_list):
for img_p, m_p in pairs_list:
yield img_p, m_p

train_ds = tf.data.Dataset.from_generator(lambda: gen(train_pairs), output_types=(tf.string, tf.string))
val_ds = tf.data.Dataset.from_generator(lambda: gen(val_pairs), output_types=(tf.string, tf.string))

train_ds = (train_ds
.map(lambda i, m: tf.py_function(load_pair, [i, m], [tf.float32, tf.float32]),
num_parallel_calls=AUTOTUNE)
.map(lambda i, m: (tf.ensure_shape(i, [*IM_SIZE, 3]), tf.ensure_shape(m, [*IM_SIZE, 1])),
num_parallel_calls=AUTOTUNE)
.map(lambda i, m: augment(i, m), num_parallel_calls=AUTOTUNE)
.shuffle(256)
.batch(batch_size)
.prefetch(AUTOTUNE)
)

val_ds = (val_ds
.map(lambda i, m: tf.py_function(load_pair, [i, m], [tf.float32, tf.float32]),
num_parallel_calls=AUTOTUNE)
.map(lambda i, m: (tf.ensure_shape(i, [*IM_SIZE, 3]), tf.ensure_shape(m, [*IM_SIZE, 1])),
num_parallel_calls=AUTOTUNE)
.batch(batch_size)
.prefetch(AUTOTUNE)
)

return train_ds, val_ds, train_pairs, val_pairs

# ----------------------
# Metrics and Loss
# ----------------------
def dice_coef(y_true, y_pred, smooth=1e-6):
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred):
return 1.0 - dice_coef(y_true, y_pred)

def bce_dice_loss(y_true, y_pred):
bce = tf.keras.losses.BinaryCrossentropy()(y_true, y_pred)
return bce + dice_loss(y_true, y_pred)

# ----------------------
# Training routine
# ----------------------
def main():
pairs = list_pairs(IMAGE_DIR, MASK_DIR)
if len(pairs) == 0:
raise RuntimeError(f"No matching image/mask pairs found in {IMAGE_DIR} and {MASK_DIR}.")
print(f"Found {len(pairs)} pairs.")

train_ds, val_ds, train_pairs, val_pairs = make_datasets(pairs)

model = DeepLabV3Plus(input_shape=(*IM_SIZE, 3), num_classes=1)
model.summary()

# compile
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
loss=bce_dice_loss,
#metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
# tf.keras.metrics.MeanIoU(num_classes=2, name='iou'),
# dice_coef])
metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
BinaryMeanIoU(),
dice_coef])
# Callbacks
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(MODEL_SAVE, save_best_only=True, monitor='val_loss')
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

history = model.fit(train_ds,
epochs=EPOCHS,
validation_data=val_ds,
callbacks=[checkpoint_cb, reduce_lr, early_stop])

# Save final model
model.save(MODEL_SAVE)
print(f"Model saved to {MODEL_SAVE}")

# Visual comparison on a few validation samples
visualize_predictions(model, val_pairs, n=6)

def visualize_predictions(model, val_pairs, n=6):
# pick up to n validation examples randomly
samples = random.sample(val_pairs, min(n, len(val_pairs)))
fig_rows = len(samples)
plt.figure(figsize=(10, 4 * fig_rows))
for i, (img_p, mask_p) in enumerate(samples):
img = tf.io.read_file(img_p)
img = tf.image.decode_image(img, channels=3)
img = tf.image.resize(img, IM_SIZE)
img = tf.cast(img, tf.float32) / 255.0
mask = tf.io.read_file(mask_p)
mask = tf.image.decode_image(mask, channels=1)
mask = tf.image.resize(mask, IM_SIZE, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
mask = tf.cast(mask, tf.float32)
mask = (mask > 64.0).numpy().astype(np.uint8).squeeze()

# Predict
inp = tf.expand_dims(img, 0)
pred = model.predict(inp)[0]
pred_mask = (pred[..., 0] > 0.5).astype(np.uint8)

ax = plt.subplot(fig_rows, 3, i*3 + 1)
plt.imshow(img.numpy())
plt.title("Image")
plt.axis('off')

ax = plt.subplot(fig_rows, 3, i*3 + 2)
plt.imshow(mask, cmap='gray')
plt.title("Ground Truth")
plt.axis('off')

ax = plt.subplot(fig_rows, 3, i*3 + 3)
plt.imshow(pred_mask, cmap='gray')
plt.title("Prediction")
plt.axis('off')

plt.tight_layout()
plt.show()

if __name__ == "__main__":
main()




Il programma per la inferenza e' il seguente

import tensorflow as tf
import numpy as np
import cv2
import os

# ---------------- CONFIG ----------------
MODEL_PATH = "coredrill_deeplabv3p.h5" # path to your trained model
IMG_PATH = "./dataset/controllo/images/0402.png" # input image for inference
OUT_PATH = "402.png" # output mask path
IM_SIZE = (256, 256) # must match training size
# ----------------------------------------


def preprocess_image(image_path, target_size=IM_SIZE):
"""Load and preprocess input image"""
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, target_size)
img = img.astype(np.float32) / 255.0
return np.expand_dims(img, axis=0) # shape (1, H, W, 3)


def postprocess_mask(mask, original_shape):
"""Convert network output to binary mask"""
mask = (mask > 0.5).astype(np.uint8) * 255 # threshold
mask = mask[0, :, :, 0] # remove batch and channel dim
mask = cv2.resize(mask, (original_shape[1], original_shape[0]), interpolation=cv2.INTER_NEAREST)
return mask


def main():
# Load trained model
print(f"Loading model from {MODEL_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH, compile=False)

# Load input image
original = cv2.imread(IMG_PATH, cv2.IMREAD_COLOR)
H, W = original.shape[:2]

# Preprocess
x = preprocess_image(IMG_PATH)

# Predict
print("Running inference...")
pred = model.predict(x)

# Postprocess
mask = postprocess_mask(pred, (H, W))

# Save results
cv2.imwrite(OUT_PATH, mask)
print(f"Saved mask at {OUT_PATH}")

# Optional: save overlay
overlay = original.copy()
overlay[mask > 127] = (0, 0, 255) # red overlay for class
cv2.imwrite("overlay.png", overlay)
print("Saved overlay at overlay.png")


if __name__ == "__main__":
main()


un po' di verifiche sul dataset di controllo 

RGB 402

Inferenza della rete 402

---------------------------
426 RGB


Inferenza della rete 426



Inferenza della rete 426

---------------------------


446 RGB





Inferenza della rete 446


















Telerilevamento plastiche in mare

Bibliografia Biermann, L., Clewley, D., Martinez-Vicente, V. et al. Finding Plastic Patches in Coastal Waters using Optical Satellite Data. ...