DHOB (IU5SGN)

lunedì 23 febbraio 2026

Segformer (Indian Pines, Pavia)

Sulla base di quanto letto sul dataset SpectralWaste e codice allegato ho voluto provare SegFormer su due dataset pubblici e classici per testare algoritmi come Indian Pines e Pavia

Indian Pines ha un risoluzione di 220 bande con una risoluzione a terra di 30 m e 145x145 pixel. E' quindi un dataset molto piccolo che necessita ed data augmentation e inoltre e' anche sbilanciato in quanto la classe Background e' numericamente molto piu' numerosa delle altre classi

Pavia ha una risoluzione spettrale di 103 bande con una risoluzione a terra di 1.3 m pixel e 610x340 pixel

I dataset possono essere scaricati da qui

Pavia

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy.io import loadmat
from torch.utils.data import Dataset, DataLoader
from transformers import SegformerForSemanticSegmentation
from sklearn.metrics import cohen_kappa_score, accuracy_score, classification_report, confusion_matrix
import seaborn as sns

# --- CONFIGURATION FOR INDIAN PINES ---
CONFIG = {
    "dataset": "IndianPines",          
    "data_path": "Indian_pines_corrected.mat", 
    "gt_path": "Indian_pines_gt.mat",
    "in_channels": 200,           # Standard for corrected Indian Pines
    "num_classes": 17,            # 16 classes + 1 background (0)
    "window_size": 32,            # Smaller window for 145x145 image
    "stride": 4,                  # Smaller stride to increase patch count
    "train_ratio": 0.2,           # 20% training, 80% testing
    "batch_size": 16,
    "epochs": 80,                 # Increased epochs for convergence
    "lr": 1e-4
}

# 1. DATASET WITH AUTOMATIC KEY DETECTION
class HSIDataset(Dataset):
    def __init__(self, cfg, is_train=True, augment=True):
        raw_data = loadmat(cfg["data_path"])
        raw_gt = loadmat(cfg["gt_path"])
        
        # Auto-detect keys (ignores metadata keys starting with __)
        data_key = [k for k in raw_data.keys() if not k.startswith('__')][0]
        gt_key = [k for k in raw_gt.keys() if not k.startswith('__')][0]
        
        data = raw_data[data_key].astype(np.float32)
        gt = raw_gt[gt_key].astype(np.int64)
        
        # Normalization (Min-Max)
        data = (data - np.min(data)) / (np.max(data) - np.min(data))
        self.data = np.transpose(data, (2, 0, 1)) # [C, H, W]
        
        # Create Train/Test Split logic
        labeled_indices = np.where(gt > 0)
        num_labeled = len(labeled_indices[0])
        indices = np.arange(num_labeled)
        np.random.seed(42)
        np.random.shuffle(indices)
        
        train_count = int(num_labeled * cfg["train_ratio"])
        train_idx = indices[:train_count]
        test_idx = indices[train_count:]
        
        split_gt = np.zeros_like(gt)
        if is_train:
            split_gt[labeled_indices[0][train_idx], labeled_indices[1][train_idx]] = gt[labeled_indices[0][train_idx], labeled_indices[1][train_idx]]
        else:
            split_gt[labeled_indices[0][test_idx], labeled_indices[1][test_idx]] = gt[labeled_indices[0][test_idx], labeled_indices[1][test_idx]]
        
        self.gt = split_gt
        self.augment = augment and is_train
        
        # Patch Generation
        self.patches, self.labels = [], []
        c, h, w = self.data.shape
        for i in range(0, h - cfg["window_size"] + 1, cfg["stride"]):
            for j in range(0, w - cfg["window_size"] + 1, cfg["stride"]):
                patch_gt = self.gt[i:i+cfg["window_size"], j:j+cfg["window_size"]]
                if np.sum(patch_gt > 0) > 0: # Only keep if patch has labels for this split
                    self.patches.append(self.data[:, i:i+cfg["window_size"], j:j+cfg["window_size"]])
                    self.labels.append(patch_gt)
                
        self.patches = np.array(self.patches)
        self.labels = np.array(self.labels)

    def __len__(self): return len(self.patches)

    def __getitem__(self, idx):
        patch, label = self.patches[idx], self.labels[idx]
        if self.augment:
            if random.random() > 0.5: 
                patch = np.flip(patch, axis=2).copy()
                label = np.flip(label, axis=1).copy()
            if random.random() > 0.5: 
                patch = np.flip(patch, axis=1).copy()
                label = np.flip(label, axis=0).copy()
        return torch.from_numpy(patch), torch.from_numpy(label)

# 2. MODEL DEFINITION
class SegFormerHSI(nn.Module):
    def __init__(self, in_ch, num_cl):
        super().__init__()
        # Reducer compresses spectral dimension to 3 channels for SegFormer input
        self.reducer = nn.Conv2d(in_ch, 3, kernel_size=1)
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/mit-b0", 
            num_labels=num_cl, 
            ignore_mismatched_sizes=True
        )

    def forward(self, x):
        x = self.reducer(x)
        out = self.model(x)
        # Upsample logits to original patch size
        return nn.functional.interpolate(out.logits, size=x.shape[-2:], mode="bilinear", align_corners=False)

# 3. VISUALIZATION FUNCTIONS
def plot_results(train_gt, full_gt, pred_map):
    error_display = np.full(full_gt.shape + (3,), 1.0) 
    mask = full_gt > 0
    error_display[mask] = [1, 0, 0] # Red for errors
    error_display[mask & (full_gt == pred_map)] = [0, 1, 0] # Green for correct

    fig, ax = plt.subplots(1, 3, figsize=(18, 6))
    
    # Train Mask
    train_view = train_gt.astype(float)
    train_view[train_view == 0] = np.nan
    ax[0].imshow(train_view, cmap='nipy_spectral')
    ax[0].set_title("Training Pixels (20%)")
    
    # Prediction
    ax[1].imshow(pred_map, cmap='nipy_spectral')
    ax[1].set_title("Full Prediction Map")
    
    # Error Map
    ax[2].imshow(error_display)
    ax[2].set_title("Error Map (Green=Correct)")
    
    for a in ax: a.axis('off')
    plt.show()

# 4. MAIN EXECUTION
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    train_ds = HSIDataset(CONFIG, is_train=True)
    test_ds = HSIDataset(CONFIG, is_train=False, augment=False)
    train_loader = DataLoader(train_ds, batch_size=CONFIG["batch_size"], shuffle=True)
    
    model = SegFormerHSI(CONFIG["in_channels"], CONFIG["num_classes"]).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG["lr"])
    criterion = nn.CrossEntropyLoss(ignore_index=0) # Ignore background class

    print(f"Dataset: Indian Pines | Training Patches: {len(train_ds)}")

    # Training
    model.train()
    for epoch in range(CONFIG["epochs"]):
        total_loss = 0
        for imgs, masks in train_loader:
            imgs, masks = imgs.to(device), masks.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch} | Loss: {total_loss/len(train_loader):.4f}")

    # Evaluation
    model.eval()
    with torch.no_grad():
        full_img = torch.from_numpy(test_ds.data).unsqueeze(0).to(device)
        pred_map = torch.argmax(model(full_img), dim=1).squeeze(0).cpu().numpy()
        
    # Stats on Test Set
    mask = test_ds.gt > 0
    y_true = test_ds.gt[mask]
    y_pred = pred_map[mask]
    
    print(f"\n--- INDIAN PINES RESULTS ---")
    print(f"Overall Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print(f"Kappa: {cohen_kappa_score(y_true, y_pred):.4f}")
    
    # Final Visualizations
    raw_gt = loadmat(CONFIG["gt_path"])
    gt_key = [k for k in raw_gt.keys() if not k.startswith('__')][0]
    full_gt = raw_gt[gt_key].astype(np.int64)
    
    plot_results(train_ds.gt, full_gt, pred_map)

if __name__ == "__main__":
    main()

Indian Pines

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy.io import loadmat
from torch.utils.data import Dataset, DataLoader
from transformers import SegformerForSemanticSegmentation
from sklearn.metrics import cohen_kappa_score, accuracy_score, classification_report

from sklearn.metrics import confusion_matrix
import seaborn as sns

def plot_results_summary(train_gt, full_gt, pred_map, dataset_name):
    # Prepare Error Map: 1 for correct, 0 for error
    # Only evaluate where ground truth exists (full_gt > 0)
    error_map = np.zeros_like(full_gt, dtype=float)
    mask = full_gt > 0
    error_map[mask] = (full_gt[mask] == pred_map[mask]).astype(float)
    
    # For visualization, we make mistakes Red (0) and correct pixels Green (1)
    # Background stays White/Transparent
    error_display = np.full(full_gt.shape + (3,), 1.0) # White background
    error_display[full_gt > 0] = [1, 0, 0]             # Default Red (Error)
    error_display[(full_gt > 0) & (full_gt == pred_map)] = [0, 1, 0] # Green (Correct)

    plt.figure(figsize=(18, 6))
    
    # 1. Training Mask
    plt.subplot(1, 3, 1)
    train_view = train_gt.astype(float)
    train_view[train_view == 0] = np.nan
    plt.imshow(train_view, cmap='nipy_spectral')
    plt.title(f"Training Mask (Used pixels)")
    plt.axis('off')
    
    # 2. Prediction
    plt.subplot(1, 3, 2)
    plt.imshow(pred_map, cmap='nipy_spectral')
    plt.title(f"Model Prediction (Full Map)")
    plt.axis('off')
    
    # 3. Error Map (Green = Correct, Red = Wrong)
    plt.subplot(1, 3, 3)
    plt.imshow(error_display)
    plt.title(f"Error Map (Green=Correct, Red=Error)")
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()

def plot_train_vs_prediction(train_gt, pred_map, dataset_name):
    # Set background (0) to NaN for better visualization (appears white/empty)
    train_display = train_gt.astype(float)
    train_display[train_display == 0] = np.nan
    
    pred_display = pred_map.astype(float)
    # Optional: mask prediction with where labels actually exist in reality
    # pred_display[dataset.gt == 0] = np.nan 

    plt.figure(figsize=(14, 7))
    
    # Left: Training Mask
    plt.subplot(1, 2, 1)
    plt.imshow(train_display, cmap='nipy_spectral')
    plt.title(f"{dataset_name}: Training Pixels (20%)")
    plt.axis('off')
    
    # Right: Model Prediction
    plt.subplot(1, 2, 2)
    plt.imshow(pred_display, cmap='nipy_spectral')
    plt.title(f"{dataset_name}: SegFormer Full Prediction")
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()

def plot_confusion_matrix(y_true, y_pred, dataset_name):
    cm = confusion_matrix(y_true, y_pred)
    # Normalize by row (true labels)
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_norm, annot=True, fmt=".2f", cmap="Blues")
    plt.title(f"Normalized Confusion Matrix: {dataset_name}")
    plt.ylabel("True Class")
    plt.xlabel("Predicted Class")
    plt.show()

# --- CONFIGURATION ---
CONFIG = {
    "dataset": "PaviaU",          
    "data_path": "PaviaU.mat",
    "gt_path": "PaviaU_gt.mat",
    "in_channels": 103,           
    "num_classes": 10,            
    "window_size": 64,
    "stride": 8,                 
    "train_ratio": 0.2,           # Use 20% of pixels for training
    "batch_size": 16,
    "epochs": 60,
    "lr": 1e-4
}

# 1. DATASET WITH SPATIAL SPLIT
class HSIDataset(Dataset):
    def __init__(self, cfg, is_train=True, augment=True):
        raw_data = loadmat(cfg["data_path"])
        raw_gt = loadmat(cfg["gt_path"])
        
        data_key = "paviaU" if cfg["dataset"] == "PaviaU" else "indian_pines_corrected"
        gt_key = "paviaU_gt" if cfg["dataset"] == "PaviaU" else "indian_pines_gt"
        
        data = raw_data[data_key].astype(np.float32)
        gt = raw_gt[gt_key].astype(np.int64)
        
        # Normalize
        data = (data - np.min(data)) / (np.max(data) - np.min(data))
        self.data = np.transpose(data, (2, 0, 1)) 
        
        # Create Train/Test Mask
        # Only split labeled pixels (gt > 0)
        labeled_indices = np.where(gt > 0)
        num_labeled = len(labeled_indices[0])
        indices = np.arange(num_labeled)
        np.random.seed(42)
        np.random.shuffle(indices)
        
        train_count = int(num_labeled * cfg["train_ratio"])
        train_idx = indices[:train_count]
        test_idx = indices[train_count:]
        
        split_gt = np.zeros_like(gt)
        if is_train:
            split_gt[labeled_indices[0][train_idx], labeled_indices[1][train_idx]] = gt[labeled_indices[0][train_idx], labeled_indices[1][train_idx]]
        else:
            split_gt[labeled_indices[0][test_idx], labeled_indices[1][test_idx]] = gt[labeled_indices[0][test_idx], labeled_indices[1][test_idx]]
        
        self.gt = split_gt
        self.augment = augment and is_train
        
        # Generate Patches
        self.patches, self.labels = [], []
        c, h, w = self.data.shape
        for i in range(0, h - cfg["window_size"] + 1, cfg["stride"]):
            for j in range(0, w - cfg["window_size"] + 1, cfg["stride"]):
                patch_gt = self.gt[i:i+cfg["window_size"], j:j+cfg["window_size"]]
                # Only keep patch if it contains labeled pixels for this split
                if np.sum(patch_gt > 0) > 0:
                    self.patches.append(self.data[:, i:i+cfg["window_size"], j:j+cfg["window_size"]])
                    self.labels.append(patch_gt)
                
        self.patches = np.array(self.patches)
        self.labels = np.array(self.labels)

    def __len__(self): return len(self.patches)

    def __getitem__(self, idx):
        patch, label = self.patches[idx], self.labels[idx]
        if self.augment:
            if random.random() > 0.5: patch = np.flip(patch, axis=2).copy(); label = np.flip(label, axis=1).copy()
            if random.random() > 0.5: patch = np.flip(patch, axis=1).copy(); label = np.flip(label, axis=0).copy()
        return torch.from_numpy(patch), torch.from_numpy(label)

# 2. MODEL & METRICS (Same as before)
class SegFormerHSI(nn.Module):
    def __init__(self, in_ch, num_cl):
        super().__init__()
        self.reducer = nn.Conv2d(in_ch, 3, kernel_size=1)
        self.model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/mit-b0", num_labels=num_cl, ignore_mismatched_sizes=True
        )

    def forward(self, x):
        x = self.reducer(x)
        out = self.model(x)
        return nn.functional.interpolate(out.logits, size=x.shape[-2:], mode="bilinear", align_corners=False)

# 3. MAIN TRAINING & VALIDATION
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Datasets
    train_ds = HSIDataset(CONFIG, is_train=True)
    test_ds = HSIDataset(CONFIG, is_train=False, augment=False)
    
    train_loader = DataLoader(train_ds, batch_size=CONFIG["batch_size"], shuffle=True)
    
    model = SegFormerHSI(CONFIG["in_channels"], CONFIG["num_classes"]).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG["lr"])
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    print(f"Train Patches: {len(train_ds)} | Test Patches: {len(test_ds)}")

    # Train Loop
    for epoch in range(CONFIG["epochs"]):
        model.train()
        for imgs, masks in train_loader:
            imgs, masks = imgs.to(device), masks.to(device)
            optimizer.zero_grad(); loss = criterion(model(imgs), masks); loss.backward(); optimizer.step()
        if epoch % 10 == 0: print(f"Epoch {epoch} complete.")

    # Evaluation on the TEST split only
    model.eval()
    with torch.no_grad():
        full_img = torch.from_numpy(test_ds.data).unsqueeze(0).to(device)
        pred_map = torch.argmax(model(full_img), dim=1).squeeze(0).cpu().numpy()
        
    # Mask to only evaluate pixels assigned to TEST split
    mask = test_ds.gt > 0
    oa = accuracy_score(test_ds.gt[mask], pred_map[mask])
    kappa = cohen_kappa_score(test_ds.gt[mask], pred_map[mask])
    
    print(f"\n--- TEST SET RESULTS ---")
    print(f"Overall Accuracy: {oa:.4f}")
    print(f"Kappa: {kappa:.4f}")
    print("\nClass-wise Report:")
    print(classification_report(test_ds.gt[mask], pred_map[mask]))
    plot_confusion_matrix(test_ds.gt[mask], pred_map[mask], CONFIG["dataset"])
    plot_train_vs_prediction(train_ds.gt, pred_map, CONFIG["dataset"])

    mat_gt = loadmat(CONFIG["gt_path"])
    gt_key = "paviaU_gt" if CONFIG["dataset"] == "PaviaU" else "indian_pines_gt"
    full_gt = mat_gt[gt_key].astype(np.int64)

    # 2. Now call the plot function with the newly defined full_gt
    print("Generating Results Plots...")
    plot_results_summary(
        train_gt=train_ds.gt, 
        full_gt=full_gt, 
        pred_map=pred_map, 
        dataset_name=CONFIG["dataset"]
    )

if __name__ == "__main__":
    main()

venerdì 20 febbraio 2026

Elenco di dati iperspettrali per materiali plastici

Un elenco di dati spettrali di plastiche con licenza permissiva

MArine Debris hyperspectral reference Library MADLib (VNIR+SWIR circa 25000 oggetti) 210 Mb CSV circa 25000 oggetti licenza CC 4.0 Ohall, Ashley; Bisson, Kelsey; Rivero-Calle, Sara (2025): MArine Debris hyperspectral reference Library collection (MADLib). Version 1. 4TU.ResearchData. dataset. https://doi.org/10.4121/059551d3-2383-4e20-af2d-011c9a59d3ac.v1
Hyperspectral plastics dataset Data Repository 17 Gb plastiche vergini e rifiuti su argini licenza CC 4.0 Paolo Tasseron; van Emmerik, Tim; Louise Schreyers; Lauren Biermann; Joseph Peller (2021): Hyperspectral plastics dataset supplementary to the paper ‘Advancing floating plastic detection from space using hyperspectral imagery’. Version 3. 4TU.ResearchData. dataset. https://doi.org/10.4121/14518278.v3
Marine Data Archive Spettri ASD con livelli di alterazione Marine Data Archive articolo Leone, Giulia & Catarino, Ana Isabel & De Keukelaere, Liesbeth & Bossaer, Mattias & Knaeps, Els & Everaert, Gert. (2023). Hyperspectral reflectance dataset of pristine, weathered, and biofouled plastics. Earth System Science Data. 15. 745-752. 10.5194/essd-15-745-2023.
Swir data cubes Balsi et al. 1.8 Gb 7 immagini 700-1900 nm licenza CC 4.0 Balsi, Marco (2025), “SWIR hyperspectral data cubes for plastics detection in the environment”, Mendeley Data, V1, doi: 10.17632/y8cvcs8tt5.1
NIR/SWIR Library of Plastic-Substrate Mixtures Plastic Substrate Mixtures spettri ASD con plastica miscelata a suolo,cemento, polvere ed acqua 300 Mb licenza CC 4.0 Holt, Z. (2024). NIR/SWIR Spectral Library of Plastic-Substrate Mixtures [Data set]. Zenodo. https://doi.org/10.5281/zenodo.14419753 Non sembra essere stato pubblicato un articolo con questi dati
SLoPP & SLoPP-E Spettroscopia Raman Rochman Lab - Microplastic Libraries
Cefas Submerged Plastic Dataset Submerged Tarpaulin Data Dati da UAV 400-1000 nm 15.5 Gb Licenza Open Government 3.0 Arnott et al (2023). Hyperspectral data of a submerged tarpaulin at Whitlingham Broad, UK, 2021. Cefas, UK. V1. doi: https://doi.org/10.14466/CefasDataHub.145
Riverine Floating Plastic Database 9.4 Gb licenza CC 4.0 Riverine Plastic Database Dati Codice Calcolo Olyaei M, Ebtehaj A, Ellis CR. A Hyperspectral Reflectance Database of Plastic Debris with Different Fractional Abundance in River Systems. Sci Data. 2024 Nov 20;11(1):1253. doi: 10.1038/s41597-024-03974-x. PMID: 39567545; PMCID: PMC11579464.
DeepHS Debris University of Memphis Collection dati in vidpak licenza CC 4.0 Watson, T. (2026). Hyperspectral data collections at University of Memphis 2025-07-23 (1.1.0) [Data set]. University of Memphis. https://doi.org/10.5281/zenodo.18378977
Waste Classification Dataset Link 213 Mb 22500 jpg Licenza CC4.0 su Kaggle vi sono numerosi reti neurali Nnamoko, Nonso; Barrowclough, Joseph ; Procter, Jack (2022), “Waste Classification Dataset”, Mendeley Data, V2, doi: 10.17632/n3gtgm9jxj.2
HyperPlastic (HP) Database Link 42.7 Gb Licenza CC 4.0 Gb VIS-NIR + NIR-SWIR hyperspectral images of 5 plastic types (PET, HDPE, LDPE, PS, PP) in aquatic simulation El bergui, A., Porebski, A., & Vandenbroucke, N. (2025). HyperPlastic datasets. Zenodo. https://doi.org/10.5281/zenodo.14773387
Hyperspectral-Reflectance Dataset of Dry/Wet/Submerged Marine Litter (Knaeps et al., 2021) Dati_1 4Mb Dati_2 3Mb Licenza CC0 1.0 Hyperspectral-reflectance dataset of dry, wet and submerged marine litter Els Knaeps, Sindy Sterckx, Gert Strackx, Johan Mijnendonckx, Mehrdad Moshtaghi, Shungudzemwoyo P. Garaba, and Dieter Meire
Plastic Litter Project 2021 dataset (Sentinel-2 data + multispettrale + hyperspectral) Link > 100Gb Licenza CC 4.0 dati UAV RGB e dati iperspettrali, raccolti per campagne di monitoraggio di plastica galleggiante https://doi.org/10.5281/zenodo.7085112
MARIDA Link 1.2 Gb Licenza CC 4.0 Github Katerina Kikaki, Ioannis Kakogeorgiou, Paraskevi Mikeli, ‪Dionysios E. Raitsos, & Konstantinos Karantzalos. (2021). MARIDA: Marine Debris Archive (1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.5151941
TransPose Materiali trasparenti Dati https://arxiv.org/abs/2307.05016
SpectralWaste rifiuti in centro selezione Licenza CC 4.0 23Gb https://sites.google.com/unizar.es/spectralwaste Casao, S., Peña, F., Sabater, A., Castillón, R., Suárez, D., Montijano, E., Murillo, A. C. (2024). "SpectralWaste Dataset: Multimodal Data for Waste Sorting Automation," 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), Abu Dhabi, United Arab Emirates, 2024, pp. 5852-5858, doi: 10.1109/IROS58592.2024.10801797.
arXiv:2403.18033.
Trashnet 3.5 Gb Licenza MIT https://github.com/garythung/trashnet/tree/master/data Articolo
Trash Annotated 2.7 Gb Licenza CC 4.0 http://tacodataset.org/ https://arxiv.org/abs/2003.06975 https://github.com/pedropro/TACO https://zenodo.org/records/3587843

mercoledì 18 febbraio 2026

Spectral waste dataset

Questo interessante progetto (avrei voluto fare anche io una cosa praticamente identica) mette a disposizione una serie di immagini di rifiuti ripresi sia con camera RGB che iperspettrale a 224 bande e modelli gia' calcolati di segmentazione di alcune reti neurali

Lo scopo e' di effettuare una segmentazione sulle seguenti classi

film
basket
videotape
filament
trashbag
cardboard

lo scopo principale e' quindi quello di individuare il materiale che potrebbe bloccare i macchinari del trattamento rifiuti

Il link Github e' il seguente https://github.com/ferpb/spectralwaste-segmentation/tree/main mentre la pagina del progetto e' https://sites.google.com/unizar.es/spectralwaste

L'articolo e' consultabile a questo link

Per far funzionare il progetto e' necessario Python 3.9 (con Debian Trixie siamo a 13.3 e non compila con cython)

si deve quindi prima creare un ambiente idoneo



curl https://pyenv.run | bash
pyenv install 3.9.19
pyenv shell 3.9.19
python -m venv my_39_envgit clone https://github.com/ferpb/spectralwaste-segmentation 
pip install -e .

Le immagini raw subiscono un primo passaggio di riduzione della dimensionalita' tramite PCA o FastICA o FactorAnalysis (vedi dim_reduction.py)

Successivamente vengono testate le reti neurali Segformer, Segformer multimodale, Mininet, Mininet Multimodale, e CMX

A questo punto si avranno i checkpoint dei modelli gia' addestrati a questo link. I files .pth sono divisi per modello e per tipo di pretrattamento delle immagini

I files pth possono essere utilizzati per fare inferenza utilizzando il notebook python nel folder del repository GitHub

Le immagini iperspettrali sono state acquisite con una Specim FX17 (900-1700 nm)

Il formato in cui incluse nel dataset e' un tiff multipagina che non e' immediato da gestire



 Per otterne lo spettro di un punto a coordinate x,y mi sono fatto uno script

import matplotlib.pyplot as plt
from PIL import Image

x=50
y=100

pixel_values = []

with Image.open('1.tiff') as img:
    for i in range(img.n_frames):
        img.seek(i)
        pixel = img.getpixel((x , y))
        pixel_values.append(pixel)

x_values = [900 + (i * 3.57) for i in range(len(pixel_values))]

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(x_values, pixel_values, color='blue', label='Intensity')

plt.title('Pixel Value')
plt.xlabel('Lambda (nm)')
plt.ylabel('Pixel Intensity')
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.show()



 Le maschere di addestramento della rete sono in formato tiff e devono essere stretchate

 Il valore del pixel corrisponde alla classe



 per rendere la cosa piu' agevole l'immagine geotiff multipagina puo' essere convertita in formato ENVI

 In questo modo in ESA Snap si puo' usare lo strumento Spectrum View

import rasterio
from PIL import Image
import numpy as np

input_file = '1.tiff'
output_base = 'output_envi'
output_dat = f'{output_base}.dat'
output_hdr = f'{output_base}.hdr'
start_wavelength = 900.0
step = 3.57

with Image.open(input_file) as img:
    n_bands = img.n_frames
    width, height = img.size

wavelength_values = [start_wavelength + (i * step) for i in range(n_bands)]

with rasterio.open(
    output_dat, 'w',
    driver='ENVI',
    height=height, width=width,
    count=n_bands,
    dtype='float32'
) as dst:
    with Image.open(input_file) as img:
        for i in range(n_bands):
            img.seek(i)
            band_data = np.array(img).astype('float32') / 65535.0
            dst.write(band_data, i + 1)

wavelength_str = ", ".join([f"{w:.2f}" for w in wavelength_values])

hdr_content = f"""ENVI    
description = {{ Prodotto convertito per ESA SNAP }}
samples = {width}
lines   = {height}
bands   = {n_bands}
header offset = 0
file type = ENVI Standard
data type = 4
interleave = bsq
byte order = 0
wavelength units = nanometers
wavelength = {{
 {wavelength_str}
}}
"""

with open(output_hdr, 'w') as f:
    f.write(hdr_content)

print(f"Conversione completata. Apri il file {output_hdr} in SNAP.")