DHOB (IU5SGN)

giovedì 5 giugno 2025

Mandelbrot su CYD (Cheap Yellow Display) ESP32-2432S028

Ho provato ad usare ESP32 in CYD per generare Mandelbrot utilizzando entrambi i core

Il codice del progetto Platformio si trova a questo link

https://github.com/c1p81/mandelbrot_esp32_cyd

Preparazione dati Marida

ricampiona i file jp2 originali di Sentinel 2 in geotiff a 10 m

import os
import rasterio
import numpy as np
from rasterio.windows import Window
from pathlib import Path

def create_tiles_from_geotiffs(input_files, output_dir, tile_size_km=2.5):
    """
    Cut multiple geotiffs into tiles and stack them as bands in new geotiffs.
   
    Args:
        input_files: List of paths to input geotiffs (must cover same area)
        output_dir: Directory to save output tiles
        tile_size_km: Size of tiles in kilometers
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
   
    # Open first file to get metadata
    with rasterio.open(input_files[0]) as src:
        # Get spatial reference metadata
        crs = src.crs
        transform = src.transform
       
        # Calculate tile size in pixels
        # Get resolution in meters per pixel
        res_x = abs(transform.a)  # x resolution in map units/pixel
        res_y = abs(transform.e)  # y resolution in map units/pixel
       
        # Calculate tile size in pixels
        tile_size_x = int(tile_size_km * 1000 / res_x)
        tile_size_y = int(tile_size_km * 1000 / res_y)
       
        # Get image dimensions
        width = src.width
        height = src.height
       
        # Calculate number of tiles
        num_tiles_x = (width + tile_size_x - 1) // tile_size_x
        num_tiles_y = (height + tile_size_y - 1) // tile_size_y
       
        print(f"Input size: {width}x{height} pixels")
        print(f"Tile size: {tile_size_x}x{tile_size_y} pixels ({tile_size_km}km x {tile_size_km}km)")
        print(f"Creating {num_tiles_x * num_tiles_y} tiles ({num_tiles_x}x{num_tiles_y})")
   
    # Process each tile
    for tile_y in range(num_tiles_y):
        for tile_x in range(num_tiles_x):
            # Calculate window coordinates
            x_offset = tile_x * tile_size_x
            y_offset = tile_y * tile_size_y
           
            # Adjust for edge tiles
            actual_tile_width = min(tile_size_x, width - x_offset)
            actual_tile_height = min(tile_size_y, height - y_offset)
           
            # Skip if tile is empty or too small
            if actual_tile_width <= 0 or actual_tile_height <= 0:
                continue
           
            # Define the window to read
            window = Window(x_offset, y_offset, actual_tile_width, actual_tile_height)
           
            # Calculate new geotransform for the tile
            new_transform = rasterio.transform.from_origin(
                transform.c + transform.a * x_offset,  # upper left x
                transform.f + transform.e * y_offset,  # upper left y
                transform.a,  # pixel width
                transform.e   # pixel height
            )
           
            # Create empty array for the 11-band tile
            tile_data = np.zeros((len(input_files), actual_tile_height, actual_tile_width),
                                dtype=rasterio.float32)
           
            # Read data from each input file
            for band_idx, file_path in enumerate(input_files):
                with rasterio.open(file_path) as src:
                    tile_data[band_idx] = src.read(1, window=window)
           
            # Define output file path
            out_path = os.path.join(output_dir, f"tile_x{tile_x}_y{tile_y}.tif")
           
            # Write the tile to a new file
            with rasterio.open(
                out_path,
                'w',
                driver='GTiff',
                height=actual_tile_height,
                width=actual_tile_width,
                count=len(input_files),
                dtype=tile_data.dtype,
                crs=crs,
                transform=new_transform,
            ) as dst:
                dst.write(tile_data)
           
            print(f"Created tile: {out_path}")
           
            # Optional: Add metadata about original tile position
            with rasterio.open(out_path, 'r+') as dst:
                dst.update_tags(
                    tile_x=tile_x,
                    tile_y=tile_y,
                    original_extent=f"{width}x{height}",
                    tile_size_km=tile_size_km
                )

if __name__ == "__main__":
    # Directory containing input geotiffs
    input_dir = "output_10m"
   
    # List of input geotiff files (11 bands)
    input_files = [
        os.path.join(input_dir, f"B{i+1}.tif") for i in range(11)
    ]
   
    # Check if files exist
    for f in input_files:
        if not os.path.exists(f):
            print(f"Warning: File {f} does not exist!")
   
    # Directory to save output tiles
    output_dir = "output_tiles"
   
    # Create tiles
    create_tiles_from_geotiffs(input_files, output_dir, tile_size_km=2.56)

crea una tile di 2560x2560m 11 bande

import os
import rasterio
from rasterio.windows import Window
import numpy as np

# Parameters
input_folder = "./output_10m"  # folder with B1.tif to B11.tif
output_folder = "./output_tiles"
tile_size_m = 2560  # meters
pixel_size = 10  # meters per pixel
tile_size_px = tile_size_m // pixel_size

# Get list of sorted input files (assumes filenames like B1.tif, B2.tif, ..., B11.tif)
input_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".tif")])
input_paths = [os.path.join(input_folder, f) for f in input_files]

# Open all rasters
rasters = [rasterio.open(fp) for fp in input_paths]

# Check that all rasters have same dimensions
width, height = rasters[0].width, rasters[0].height
transform = rasters[0].transform
crs = rasters[0].crs

# Create output folder
os.makedirs(output_folder, exist_ok=True)

tile_id = 0
for y in range(0, height, tile_size_px):
    for x in range(0, width, tile_size_px):
        if x + tile_size_px > width or y + tile_size_px > height:
            continue  # Skip partial tiles

        # Read corresponding tile from each band
        tile_stack = []
        for r in rasters:
            tile = r.read(1, window=Window(x, y, tile_size_px, tile_size_px))
            tile_stack.append(tile)

        tile_stack = np.stack(tile_stack)  # shape: (bands, height, width)

        # Define transform for this tile
        tile_transform = rasterio.windows.transform(Window(x, y, tile_size_px, tile_size_px), transform)

        # Write output GeoTIFF with multiple bands
        out_path = os.path.join(output_folder, f"tile_{tile_id:04d}.tif")
        with rasterio.open(
            out_path,
            "w",
            driver="GTiff",
            height=tile_size_px,
            width=tile_size_px,
            count=len(rasters),
            dtype=tile_stack.dtype,
            crs=crs,
            transform=tile_transform,
        ) as dst:
            for i in range(len(rasters)):
                dst.write(tile_stack[i], i + 1)

        tile_id += 1

# Close all files
for r in rasters:
    r.close()

print(f"Done. Created {tile_id} multi-band tiles.")

venerdì 11 aprile 2025

Marida Marine Debris Archive

Sto provando ad utilizzare Marida, un archivio di immagini supervisionate di Sentinel 2, raccolte in area non mediterranea con 15 categorie

Le immagini del dataset si possono scaricare da questo link (1.13 Gb) e l'articolo di riferimento e' https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0262247

Il dataset e' costituito da patches (immagini geotiff con risoluzione spaziale di 10 m, dimensioni di 2560x2560 m, 256x256 pixels ed 11 bande). In questo senso si deve considerare che le bande a 20 e 60 m sono state ricampionate

Le immagini sono classificate tramite poligoni shapefile in un separatore folder

Su Github e' presente il codice PyTorch per addestrare due reti neurali (https://paperswithcode.com/paper/resattunet-detecting-marine-debris-using-an) ma si possono addestrare i modelli gia' addestrati da qui per Unet e Resnet

Per fare inferenza si puo' usare lo script sottostante

Esempio di inferenza classificata

import torch
import torch.nn as nn
import torchvision.models as models
import sys

class ResNet50Encoder(nn.Module):
    def __init__(self, input_bands=11, output_classes=11):
        super(ResNet50Encoder, self).__init__()

        resnet = models.resnet50(pretrained=False)

        self.encoder = nn.Sequential(
            nn.Conv2d(input_bands, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
            resnet.bn1,
            resnet.relu,
            resnet.maxpool,
            resnet.layer1,
            resnet.layer2,
            resnet.layer3,
            resnet.layer4,
            resnet.avgpool
        )

        self.fc = nn.Linear(2048, output_classes)

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        logits = self.fc(x)
        return logits







# Instantiate and load weights
model = ResNet50Encoder(input_bands=11, output_classes=11)
model.load_state_dict(torch.load("model_resnet.pth", map_location="cpu"))
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

import rasterio
import numpy as np

with rasterio.open("test4.tif") as src:
    image = src.read().astype(np.float32)  # (11, 256, 256)

# Normalize
image = (image - image.min()) / (image.max() - image.min())

import torch
image_tensor = torch.from_numpy(image).unsqueeze(0)  # (1, 11, 256, 256)
image_tensor = image_tensor.to(device)

with torch.no_grad():
    output = model(image_tensor)

# If it's a classifier:
predicted_class = torch.argmax(output, dim=1)
print("Predicted class:", predicted_class.item())







#classificazione a livello di pixel
output = model(image_tensor)  # [1, num_classes, 256, 256]
mask = torch.argmax(output, dim=1).squeeze(0)  # shape: [256, 256]



import torch
from torchvision.utils import save_image
from PIL import Image
import numpy as np

# Example: your predicted mask (dtype: torch.Tensor, shape: [H, W])
mask = torch.randint(0, 10, (256, 256), dtype=torch.uint8)  # demo mask

# Option 1: using PIL directly (recommended for class masks)
mask_np = mask.numpy().astype(np.uint8)
img = Image.fromarray(mask_np, mode="L")  # 'L' = grayscale (8-bit)
img.save("predicted_mask.png")

palette = [
    0, 0, 0,         # class 0 - black
    255, 0, 0,       # class 1 - red
    0, 255, 0,       # class 2 - green
    0, 0, 255,       # class 3 - blue
    255, 255, 0,     # class 4 - yellow
    255, 0, 255,     # class 5 - magenta
    0, 255, 255,     # class 6 - cyan
    128, 128, 128,   # class 7 - gray
    255, 128, 0,     # class 8 - orange
    128, 0, 255,     # class 9 - violet
    0, 128, 255,     # class 10 - sky blue
]

# Apply the palette
color_mask = Image.fromarray(mask_np, mode="P")
color_mask.putpalette(palette)
color_mask.save("colored_mask.png")

##########################################################################################
# Define the segmentation model
class ResNetSegmentationFromClassifier(nn.Module):
    def __init__(self, encoder_model):
        super().__init__()
        # Extract the encoder part (everything except avgpool and fc)
        resnet = models.resnet50(pretrained=False)
        
        # First re-use the custom first conv layer from the encoder model
        first_conv = encoder_model.encoder[0]
        
        # Build encoder using resnet blocks
        self.encoder = nn.Sequential(
            first_conv,
            resnet.bn1,
            resnet.relu,
            resnet.maxpool,
            resnet.layer1,
            resnet.layer2,
            resnet.layer3,
            resnet.layer4
        )
        
        # Load weights from the encoder model
        # Copy weights for the shared layers
        encoder_state_dict = encoder_model.state_dict()
        for name, param in self.encoder.named_parameters():
            if name in encoder_state_dict:
                param.data.copy_(encoder_state_dict[name])
        
        # Decoder to upsample to 256x256
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(2048, 512, 2, stride=2),  # 8x8 -> 16x16
            nn.ReLU(),
            nn.ConvTranspose2d(512, 256, 2, stride=2),   # 16x16 -> 32x32
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 2, stride=2),   # 32x32 -> 64x64
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 2, stride=2),    # 64x64 -> 128x128
            nn.ReLU(),
            nn.ConvTranspose2d(64, 11, 2, stride=2)      # 128x128 -> 256x256
        )
    
    def forward(self, x):
        x = self.encoder(x)        # [B, 2048, 8, 8]
        x = self.decoder(x)        # [B, 11, 256, 256]
        return x

# Load the classification model
model = ResNet50Encoder(input_bands=11, output_classes=11)
model.load_state_dict(torch.load("model_resnet.pth", map_location="cpu"))
model.eval()

# Create the segmentation model
segmentation_model = ResNetSegmentationFromClassifier(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
segmentation_model.to(device)
segmentation_model.eval()

# Load and preprocess the image
with rasterio.open("test4.tif") as src:
    image = src.read().astype(np.float32)  # (11, 256, 256)

# Normalize
image = (image - image.min()) / (image.max() - image.min())
image_tensor = torch.from_numpy(image).unsqueeze(0)  # (1, 11, 256, 256)
image_tensor = image_tensor.to(device)

# Perform pixel-based classification
with torch.no_grad():
    output = segmentation_model(image_tensor)  # [1, 11, 256, 256]

# Create class mask by taking argmax at each pixel
mask = torch.argmax(output, dim=1).squeeze(0)  # shape: [256, 256]
mask_np = mask.cpu().numpy().astype(np.uint8)

# Define color palette (11 classes)
palette = [
    0, 0, 0,         # class 0 - black
    255, 0, 0,       # class 1 - red
    0, 255, 0,       # class 2 - green
    0, 0, 255,       # class 3 - blue
    255, 255, 0,     # class 4 - yellow
    255, 0, 255,     # class 5 - magenta
    0, 255, 255,     # class 6 - cyan
    128, 128, 128,   # class 7 - gray
    255, 128, 0,     # class 8 - orange
    128, 0, 255,     # class 9 - violet
    0, 128, 255,     # class 10 - sky blue
]

# Save grayscale mask
img = Image.fromarray(mask_np, mode="L")
img.save("predicted_mask.png")

# Save colored mask
color_mask = Image.fromarray(mask_np, mode="P")
color_mask.putpalette(palette)
color_mask.save("colored_mask.png")

# Print class distribution
unique, counts = np.unique(mask_np, return_counts=True)
class_distribution = dict(zip(unique, counts))
print("Pixel class distribution:", class_distribution)