Visualizzazione post con etichetta Telerilevamento. Mostra tutti i post
Visualizzazione post con etichetta Telerilevamento. Mostra tutti i post

venerdì 23 gennaio 2026

Analisi MNF su spettri di riflettanza di plastica

Devo cerca di lavorare su spettri di riflettanza di plastica e la prima domanda e': quale sono le bande significative?

Sono partito dal dataset delle plastiche (36 in tutto) contenuti in USGS Spectral Library

Per estrarre le feature ho usato MNF Maximum Noise Fraction (al posto di PCA)  

Nel grafico sottostante sono plottate le prime 3 componenti MNF 

La terza componente (quella di solito in cui sono presenti i dati spettrali, la prima componente e' influenzata dalla ) e' stata utilizzata per addestrare un rete random tree per trovare quali siano le bande effettivamente significative




 Per finire un confronto tra gli spettri non elaborati e la selezione random forest 

 


 Le bande piu' significative estratte 

 ind           nm     peso

150          600     0.018671
1289        1739   0.018409
149          599     0.014763
143          593     0.013682
1998        2448   0.012601 

 

import numpy as np
import glob
import os
from scipy.signal import savgol_filter
from scipy.linalg import eigh

folder = "./plastica"
files = sorted(glob.glob(os.path.join(folder, "*.txt")))


arrays = []
for f in files:
    data = np.loadtxt(f, skiprows=1)
    if data.shape[0] > 2100:
        arrays.append(data[100:2100])
       

X = np.array(arrays)
X = np.where((X >= 0) & (X <= 1), X, 0)



print(f"Shape originale: {X.shape}")

# 1. Smoothing e calcolo rumore
X_smooth = savgol_filter(X, window_length=11, polyorder=2, axis=1)
noise_residue = X - X_smooth

# 2. Calcolo matrici di covarianza
noise_cov = np.cov(noise_residue, rowvar=False)
data_cov = np.cov(X, rowvar=False)

# Regolarizzazione (Ridge) alla diagonale della matrice di rumore.
# Questo garantisce che la matrice sia definita positiva.
eps = 1e-6 * np.trace(noise_cov) / noise_cov.shape[0]
noise_cov_reg = noise_cov + np.eye(noise_cov.shape[0]) * eps

# 3. Esecuzione MNF
eigenvalues, eigenvectors = eigh(data_cov, noise_cov_reg)

idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# 5. Proiezione
X_mnf = np.dot(X, eigenvectors)

print(f"Prime 5 componenti MNF (SNR più alto): {eigenvalues[:5]}")

import matplotlib.pyplot as plt

# L'indice 0 corrisponde alla prima colonna dopo l'ordinamento decrescente
first_mnf_loadings = eigenvectors[:, 0]

wavelengths = np.arange(350, 2500)
wl_tagliate = wavelengths[100:2100]

idx_max = np.argmax(np.abs(first_mnf_loadings))
print(f"La banda più importante è la numero {idx_max}, che corrisponde a {wl_tagliate[idx_max]} nm")

wls = np.arange(350, 2501)[100:2100]

fig, axs = plt.subplots(3, 1, figsize=(12, 15), sharex=True)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

for i in range(3):
    # Estraiamo i loadings per la componente i-esima
    loadings = eigenvectors[:, i]
   
    axs[i].plot(wls, loadings, color=colors[i], lw=1.5)
    axs[i].set_title(f"Loadings MNF Componente {i+1} (Autovalore: {eigenvalues[i]:.2e})")
    axs[i].set_ylabel("Peso")
    axs[i].grid(True, alpha=0.3)
   
    # Evidenziamo l'area diagnostica dei carbonati (2300-2350 nm)
    axs[i].axvspan(2300, 2350, color='red', alpha=0.1, label='Zona Carbonati')

axs[2].set_xlabel("Lunghezza d'onda (nm)")
plt.tight_layout()
plt.show()



from sklearn.ensemble import RandomForestRegressor
import pandas as pd


y = X_mnf[:, 2]

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

importances = rf.feature_importances_

wls = np.arange(350, 2501)[100:2100]
feature_results = pd.DataFrame({'Wavelength': wls, 'Importance': importances})

# Ordiniamo per importanza decrescente
top_features = feature_results.sort_values(by='Importance', ascending=False).head(10)

print("Le 10 lunghezze d'onda più influenti (Feature Importance):")
print(top_features)

# 5. Visualizzazione
plt.figure(figsize=(10, 5))
plt.plot(wls, importances, color='purple', label='Feature Importance')
plt.fill_between(wls, importances, color='purple', alpha=0.3)
plt.title("Rilevanza delle Bande Spettrali (Random Forest)")
plt.xlabel("Lunghezza d'onda (nm)")
plt.ylabel("Importanza Relativa")
plt.grid(True, alpha=0.3)
plt.show()

plt.figure(figsize=(12, 7))

plt.plot(wls, X.T, color='steelblue', alpha=0.3)

plt.title(f"Spettri di Riflettanza del Dataset ({X.shape[0]} campioni)")
plt.xlabel("Lunghezza d'onda (nm)")
plt.ylabel("Riflettanza (0-1)")
plt.grid(True, linestyle='--', alpha=0.5)

# Per evitare legende infinite, ne mettiamo una generica
plt.plot([], [], color='steelblue', label='Spettri campioni')
plt.legend()

plt.show()

# --- GRAFICO 1: SPETTRI DI RIFLETTANZA ---
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12), sharex=True)

ax1.plot(wls, X.T, color='steelblue', alpha=0.2)

ax1.plot(wls, np.mean(X, axis=0), color='red', lw=2, label='Spettro Medio')

ax1.set_title("Spettri di Riflettanza (Dati Originali Puliti)")
ax1.set_ylabel("Riflettanza")
ax1.grid(True, alpha=0.3)
ax1.legend()

# --- GRAFICO 2: RILEVANZA DELLE BANDE (Feature Importance) ---

ax2.plot(wls, importances, color='darkgreen', lw=1.5)
ax2.fill_between(wls, importances, color='darkgreen', alpha=0.2)

ax2.set_title("Rilevanza delle Bande Spettrali (Random Forest Importance)")
ax2.set_ylabel("Importanza Relativa")
ax2.set_xlabel("Lunghezza d'onda (nm)")
ax2.grid(True, alpha=0.3)


plt.tight_layout()
plt.show()

giovedì 5 giugno 2025

Preparazione dati Marida

 

 

 


 

ricampiona i file jp2 originali di Sentinel 2 in geotiff a 10 m

 

import os
import rasterio
import numpy as np
from rasterio.windows import Window
from pathlib import Path

def create_tiles_from_geotiffs(input_files, output_dir, tile_size_km=2.5):
"""
Cut multiple geotiffs into tiles and stack them as bands in new geotiffs.
Args:
input_files: List of paths to input geotiffs (must cover same area)
output_dir: Directory to save output tiles
tile_size_km: Size of tiles in kilometers
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Open first file to get metadata
with rasterio.open(input_files[0]) as src:
# Get spatial reference metadata
crs = src.crs
transform = src.transform
# Calculate tile size in pixels
# Get resolution in meters per pixel
res_x = abs(transform.a) # x resolution in map units/pixel
res_y = abs(transform.e) # y resolution in map units/pixel
# Calculate tile size in pixels
tile_size_x = int(tile_size_km * 1000 / res_x)
tile_size_y = int(tile_size_km * 1000 / res_y)
# Get image dimensions
width = src.width
height = src.height
# Calculate number of tiles
num_tiles_x = (width + tile_size_x - 1) // tile_size_x
num_tiles_y = (height + tile_size_y - 1) // tile_size_y
print(f"Input size: {width}x{height} pixels")
print(f"Tile size: {tile_size_x}x{tile_size_y} pixels ({tile_size_km}km x {tile_size_km}km)")
print(f"Creating {num_tiles_x * num_tiles_y} tiles ({num_tiles_x}x{num_tiles_y})")
# Process each tile
for tile_y in range(num_tiles_y):
for tile_x in range(num_tiles_x):
# Calculate window coordinates
x_offset = tile_x * tile_size_x
y_offset = tile_y * tile_size_y
# Adjust for edge tiles
actual_tile_width = min(tile_size_x, width - x_offset)
actual_tile_height = min(tile_size_y, height - y_offset)
# Skip if tile is empty or too small
if actual_tile_width <= 0 or actual_tile_height <= 0:
continue
# Define the window to read
window = Window(x_offset, y_offset, actual_tile_width, actual_tile_height)
# Calculate new geotransform for the tile
new_transform = rasterio.transform.from_origin(
transform.c + transform.a * x_offset, # upper left x
transform.f + transform.e * y_offset, # upper left y
transform.a, # pixel width
transform.e # pixel height
)
# Create empty array for the 11-band tile
tile_data = np.zeros((len(input_files), actual_tile_height, actual_tile_width),
dtype=rasterio.float32)
# Read data from each input file
for band_idx, file_path in enumerate(input_files):
with rasterio.open(file_path) as src:
tile_data[band_idx] = src.read(1, window=window)
# Define output file path
out_path = os.path.join(output_dir, f"tile_x{tile_x}_y{tile_y}.tif")
# Write the tile to a new file
with rasterio.open(
out_path,
'w',
driver='GTiff',
height=actual_tile_height,
width=actual_tile_width,
count=len(input_files),
dtype=tile_data.dtype,
crs=crs,
transform=new_transform,
) as dst:
dst.write(tile_data)
print(f"Created tile: {out_path}")
# Optional: Add metadata about original tile position
with rasterio.open(out_path, 'r+') as dst:
dst.update_tags(
tile_x=tile_x,
tile_y=tile_y,
original_extent=f"{width}x{height}",
tile_size_km=tile_size_km
)

if __name__ == "__main__":
# Directory containing input geotiffs
input_dir = "output_10m"
# List of input geotiff files (11 bands)
input_files = [
os.path.join(input_dir, f"B{i+1}.tif") for i in range(11)
]
# Check if files exist
for f in input_files:
if not os.path.exists(f):
print(f"Warning: File {f} does not exist!")
# Directory to save output tiles
output_dir = "output_tiles"
# Create tiles
create_tiles_from_geotiffs(input_files, output_dir, tile_size_km=2.56)


 

 

crea una tile di 2560x2560m 11 bande

import os
import rasterio
from rasterio.windows import Window
import numpy as np

# Parameters
input_folder = "./output_10m" # folder with B1.tif to B11.tif
output_folder = "./output_tiles"
tile_size_m = 2560 # meters
pixel_size = 10 # meters per pixel
tile_size_px = tile_size_m // pixel_size

# Get list of sorted input files (assumes filenames like B1.tif, B2.tif, ..., B11.tif)
input_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".tif")])
input_paths = [os.path.join(input_folder, f) for f in input_files]

# Open all rasters
rasters = [rasterio.open(fp) for fp in input_paths]

# Check that all rasters have same dimensions
width, height = rasters[0].width, rasters[0].height
transform = rasters[0].transform
crs = rasters[0].crs

# Create output folder
os.makedirs(output_folder, exist_ok=True)

tile_id = 0
for y in range(0, height, tile_size_px):
for x in range(0, width, tile_size_px):
if x + tile_size_px > width or y + tile_size_px > height:
continue # Skip partial tiles

# Read corresponding tile from each band
tile_stack = []
for r in rasters:
tile = r.read(1, window=Window(x, y, tile_size_px, tile_size_px))
tile_stack.append(tile)

tile_stack = np.stack(tile_stack) # shape: (bands, height, width)

# Define transform for this tile
tile_transform = rasterio.windows.transform(Window(x, y, tile_size_px, tile_size_px), transform)

# Write output GeoTIFF with multiple bands
out_path = os.path.join(output_folder, f"tile_{tile_id:04d}.tif")
with rasterio.open(
out_path,
"w",
driver="GTiff",
height=tile_size_px,
width=tile_size_px,
count=len(rasters),
dtype=tile_stack.dtype,
crs=crs,
transform=tile_transform,
) as dst:
for i in range(len(rasters)):
dst.write(tile_stack[i], i + 1)

tile_id += 1

# Close all files
for r in rasters:
r.close()

print(f"Done. Created {tile_id} multi-band tiles.")


 

Analisi MNF su spettri di riflettanza di plastica

Devo cerca di lavorare su spettri di riflettanza di plastica e la prima domanda e': quale sono le bande significative? Sono partito dal ...