domenica 24 agosto 2025

Rete neurale riconoscimento litologie The Drill Core Image Dataset (DCID)

Aggiornamento : lo script puo' essere modificato per fare girare EfficientNetB4 ma il tempo di training si allunga decisamente. Su M1 ogni epoch richiede almeno 25 minuti

=====================================

Ho trovato un dataset di fotografie ad alta qualita' 512x512 di carote di sondaggio annotate con la litologia corrispondente. https://github.com/JiayuLi1120/drill-core-image-dataset)

Ovviamente studi di questo genere sono gia' presenti come https://www.sciencedirect.com/science/article/pii/S0920410520309888

Si tratta di due dataset, il primo da 7 categorie di litologie, il secondo da 35 categorie. Ho usato il secondo che utilizza 800 immagini di train e 200 fotografie di test per ogni litologia

Esempio foto training



il modello prevede transfer learning da EfficientNetB0 per velocizzare il calcolo 

il modello dal punto di vista della rete neurale funziona ma quando viene messo alla prova fallisce spesso..per esempio queste due foto sottostanti rientrerebbero nella categoria Mudstone  ma vengonon predette in modo differente (lo score della seconda peraltro e' cosi' basso che dovrebbe essere scartata)






import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import json
import os
import numpy as np

# Clear session
tf.keras.backend.clear_session()

# --- Paths ---
train_dir = "../drill_core/DCID-512-35/train"
val_dir = "../drill_core/DCID-512-35/test"

# --- Parameters ---
batch_size = 16
img_size = (224, 224)

# --- Verify data directories ---
for path in [train_dir, val_dir]:
if not os.path.exists(path):
raise FileNotFoundError(f"Directory not found: {path}")

# --- Load datasets as RGB ---
print("Loading training data (RGB)...")
train_ds = tf.keras.utils.image_dataset_from_directory(
train_dir,
image_size=img_size,
batch_size=batch_size,
label_mode="int",
color_mode="rgb",
shuffle=True,
seed=123
)

print("Loading validation data (RGB)...")
val_ds = tf.keras.utils.image_dataset_from_directory(
val_dir,
image_size=img_size,
batch_size=batch_size,
label_mode="int",
color_mode="rgb",
shuffle=False
)

# Debug: Check one batch
for images, labels in train_ds.take(1):
print(f"Sample batch shape: {images.shape}")
print(f"Pixel range: {tf.reduce_min(images):.1f} to {tf.reduce_max(images):.1f}")
break

# --- Save class names ---
class_names = train_ds.class_names
num_classes = len(class_names)
with open("class_names.json", "w") as f:
json.dump(class_names, f)
print("Classes:", class_names)

# --- Performance optimization ---
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# --- Data Augmentation ---
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
])

# --- STEP: Download EfficientNetB0 weights WITHOUT hash check ---
print("Downloading EfficientNetB0 weights (skipping hash check)...")
try:
weights_path = keras.utils.get_file(
fname="efficientnetb0_notop.h5",
origin="https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5",
cache_subdir="models",
file_hash=None # ←←← SKIP HASH CHECK
)
print(f"✅ Weights downloaded at: {weights_path}")
except Exception as e:
print(f"❌ Download failed: {e}")
raise

# --- Build Model ---
print("Building model...")
base_model = keras.applications.EfficientNetB0(
input_shape=img_size + (3,),
include_top=False,
weights=None # Will load manually
)

# Load weights with skip_mismatch=True (robust)
base_model.load_weights(weights_path, by_name=True, skip_mismatch=True)
print("✅ Pretrained weights loaded (mismatches skipped)")

# Freeze base
base_model.trainable = False

# Add custom head
inputs = keras.Input(shape=img_size + (3,))
x = data_augmentation(inputs)
x = keras.applications.efficientnet.preprocess_input(x)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)

model = keras.Model(inputs, outputs)

# Compile
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"]
)

print("\n✅ Model built successfully!")
model.summary()

# --- Callbacks ---
early_stop = keras.callbacks.EarlyStopping(
monitor="val_loss", patience=5, restore_best_weights=True
)

# --- Phase 1: Train head ---
print("\n๐Ÿš€ Phase 1: Training classifier head")
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=20,
callbacks=[early_stop]
)

# --- Phase 2: Fine-tune top layers ---
print("\n๐Ÿš€ Phase 2: Fine-tuning last 20 layers")
base_model.trainable = True
fine_tune_at = len(base_model.layers) - 20
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False

model.compile(
optimizer=keras.optimizers.Adam(1e-5),
loss="sparse_categorical_crossentropy",
metrics=["accuracy"]
)

history_fine = model.fit(
train_ds,
validation_data=val_ds,
epochs=10,
callbacks=[early_stop]
)

# --- Save model ---
model.save("drill_core_model_rgb.keras")
print("\n๐ŸŽ‰ Model saved as 'drill_core_model_rgb.keras'")

# --- Plot results ---
plt.figure(figsize=(10, 6))
plt.plot(history.history["accuracy"], label="Train Acc (Phase 1)")
plt.plot(history.history["val_accuracy"], label="Val Acc (Phase 1)")
if 'history_fine' in locals():
plt.plot(range(len(history.history["accuracy"]), len(history.history["accuracy"]) + len(history_fine.history["accuracy"])),
history_fine.history["accuracy"], label="Train Acc (Fine-tune)")
plt.plot(range(len(history.history["val_accuracy"]), len(history.history["val_accuracy"]) + len(history_fine.history["val_accuracy"])),
history_fine.history["val_accuracy"], label="Val Acc (Fine-tune)")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training History")
plt.legend()
plt.grid(True)
plt.show()

# Final eval
loss, acc = model.evaluate(val_ds)
print(f"Final accuracy: {acc:.4f}")



✅ Model built successfully!
Model: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ input_layer_1 (InputLayer)           │ (None, 224, 224, 3)         │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ sequential (Sequential)              │ (None, 224, 224, 3)         │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ efficientnetb0 (Functional)          │ (None, 7, 7, 1280)          │       4,049,571 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ global_average_pooling2d             │ (None, 1280)                │               0 │
│ (GlobalAveragePooling2D)             │                             │                 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout (Dropout)                    │ (None, 1280)                │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 35)                  │          44,835 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 4,094,406 (15.62 MB)
 Trainable params: 44,835 (175.14 KB)
 Non-trainable params: 4,049,571 (15.45 MB)

๐Ÿš€ Phase 1: Training classifier head
Epoch 1/20
2025-08-24 10:55:54.541328: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 357s 201ms/step - accuracy: 0.8292 - loss: 0.7887 - val_accuracy: 0.8367 - val_loss: 0.5065
Epoch 2/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 351s 201ms/step - accuracy: 0.9231 - loss: 0.3001 - val_accuracy: 0.8489 - val_loss: 0.4491
Epoch 3/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 354s 202ms/step - accuracy: 0.9369 - loss: 0.2314 - val_accuracy: 0.8680 - val_loss: 0.3988
Epoch 4/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 363s 207ms/step - accuracy: 0.9413 - loss: 0.2039 - val_accuracy: 0.8666 - val_loss: 0.3918
Epoch 5/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 401s 229ms/step - accuracy: 0.9484 - loss: 0.1757 - val_accuracy: 0.8661 - val_loss: 0.4093
Epoch 6/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 480s 274ms/step - accuracy: 0.9478 - loss: 0.1723 - val_accuracy: 0.8796 - val_loss: 0.3748
Epoch 7/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 382s 218ms/step - accuracy: 0.9498 - loss: 0.1622 - val_accuracy: 0.8844 - val_loss: 0.3458
Epoch 8/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 364s 208ms/step - accuracy: 0.9524 - loss: 0.1521 - val_accuracy: 0.8744 - val_loss: 0.3995
Epoch 9/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 361s 206ms/step - accuracy: 0.9527 - loss: 0.1473 - val_accuracy: 0.8894 - val_loss: 0.3432
Epoch 10/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 401s 229ms/step - accuracy: 0.9573 - loss: 0.1403 - val_accuracy: 0.8930 - val_loss: 0.3206
Epoch 11/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 403s 230ms/step - accuracy: 0.9549 - loss: 0.1396 - val_accuracy: 0.8853 - val_loss: 0.3568
Epoch 12/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 400s 229ms/step - accuracy: 0.9558 - loss: 0.1361 - val_accuracy: 0.8936 - val_loss: 0.3324
Epoch 13/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 399s 228ms/step - accuracy: 0.9565 - loss: 0.1348 - val_accuracy: 0.8954 - val_loss: 0.3220
Epoch 14/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 401s 229ms/step - accuracy: 0.9564 - loss: 0.1350 - val_accuracy: 0.9009 - val_loss: 0.2988
Epoch 15/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 400s 229ms/step - accuracy: 0.9575 - loss: 0.1335 - val_accuracy: 0.9059 - val_loss: 0.2784
Epoch 16/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 346s 198ms/step - accuracy: 0.9581 - loss: 0.1300 - val_accuracy: 0.8990 - val_loss: 0.3237
Epoch 17/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 342s 195ms/step - accuracy: 0.9590 - loss: 0.1260 - val_accuracy: 0.8816 - val_loss: 0.3762
Epoch 18/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 344s 196ms/step - accuracy: 0.9599 - loss: 0.1224 - val_accuracy: 0.8989 - val_loss: 0.3151
Epoch 19/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 343s 196ms/step - accuracy: 0.9590 - loss: 0.1257 - val_accuracy: 0.9044 - val_loss: 0.2896
Epoch 20/20
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 344s 196ms/step - accuracy: 0.9579 - loss: 0.1261 - val_accuracy: 0.9111 - val_loss: 0.2622

๐Ÿš€ Phase 2: Fine-tuning last 20 layers
Epoch 1/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 397s 221ms/step - accuracy: 0.7774 - loss: 0.8018 - val_accuracy: 0.9043 - val_loss: 0.3364
Epoch 2/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 399s 228ms/step - accuracy: 0.8726 - loss: 0.4119 - val_accuracy: 0.9297 - val_loss: 0.2536
Epoch 3/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 385s 220ms/step - accuracy: 0.9009 - loss: 0.3149 - val_accuracy: 0.9349 - val_loss: 0.2383
Epoch 4/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 381s 218ms/step - accuracy: 0.9183 - loss: 0.2574 - val_accuracy: 0.9411 - val_loss: 0.2193
Epoch 5/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 384s 219ms/step - accuracy: 0.9282 - loss: 0.2232 - val_accuracy: 0.9423 - val_loss: 0.2088
Epoch 6/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 381s 218ms/step - accuracy: 0.9343 - loss: 0.2081 - val_accuracy: 0.9474 - val_loss: 0.1850
Epoch 7/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 383s 219ms/step - accuracy: 0.9419 - loss: 0.1797 - val_accuracy: 0.9473 - val_loss: 0.1850
Epoch 8/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 380s 217ms/step - accuracy: 0.9475 - loss: 0.1655 - val_accuracy: 0.9491 - val_loss: 0.1837
Epoch 9/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 382s 218ms/step - accuracy: 0.9490 - loss: 0.1581 - val_accuracy: 0.9516 - val_loss: 0.1717
Epoch 10/10
1750/1750 ━━━━━━━━━━━━━━━━━━━━ 381s 218ms/step - accuracy: 0.9530 - loss: 0.1425 - val_accuracy: 0.9500 - val_loss: 0.1753


import tensorflow as tf
import numpy as np
import json
import os
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt

# --- Paths ---
model_path = "drill_core_model_rgb.keras"
classes_path = "class_names.json"
image_path = "../drill_core/DCID-512-35/test/1.Red sandstone/1.Red sandstone.jpg" # ← Replace or pass as input
# Example: image_path = "../drill_core/DCID-512-35/test/class_name/sample.jpg"

# --- Load class names ---
if not os.path.exists(classes_path):
raise FileNotFoundError(f"Class names file not found: {classes_path}")
with open(classes_path, "r") as f:
class_names = json.load(f)
print("Loaded classes:", class_names)

# --- Load model ---
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found: {model_path}")
print("Loading model...")
model = tf.keras.models.load_model(model_path)
print("✅ Model loaded successfully!")

# --- Image preprocessing function ---
def preprocess_image(img_path, target_size=(224, 224)):
"""Load and preprocess a single image for inference."""
if not os.path.exists(img_path):
raise FileNotFoundError(f"Image not found: {img_path}")
# Load image
img = image.load_img(img_path, target_size=target_size) # Resize to 224x224
img_array = image.img_to_array(img) # To numpy array
img_array = tf.expand_dims(img_array, 0) # Add batch dim (1, 224, 224, 3)
img_array = tf.keras.applications.efficientnet.preprocess_input(img_array) # Preprocess
return img, img_array

# --- Prediction function ---
def predict_image(img_path, model, class_names, show=True):
"""Run inference on a single image."""
print(f"\n๐Ÿ“Œ Predicting: {os.path.basename(img_path)}")
# Preprocess
img, img_array = preprocess_image(img_path)
# Predict
predictions = model.predict(img_array, verbose=0)
predicted_class_idx = np.argmax(predictions[0])
predicted_class = class_names[predicted_class_idx]
confidence = predictions[0][predicted_class_idx]
# Get top-3 predictions
top_indices = np.argsort(predictions[0])[::-1][:3]
print("Top 3 predictions:")
for i in top_indices:
print(f" {class_names[i]}: {predictions[0][i]:.4f}")
# Show image and prediction
if show:
plt.figure(figsize=(6, 6))
plt.imshow(img)
plt.title(f"Predicted: {predicted_class}\nConfidence: {confidence:.4f}", fontsize=14)
plt.axis("off")
plt.tight_layout()
plt.show()
return predicted_class, confidence

# --- Batch prediction on a folder ---
def predict_folder(folder_path, model, class_names, max_images=10):
"""Run inference on all images in a folder."""
if not os.path.exists(folder_path):
print(f"Folder not found: {folder_path}")
return
print(f"\n๐Ÿ“ Predicting images in: {folder_path}")
count = 0
for root, _, files in os.walk(folder_path):
for f in files:
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
img_path = os.path.join(root, f)
predict_image(img_path, model, class_names, show=True)
count += 1
if count >= max_images:
print(f"\n๐Ÿ›‘ Stopped after {max_images} images.")
return

# --- Option 1: Predict single image ---
single_image_path = './immagini/arenaria-marina-roccia-sedimentaria-campione-by2dwg-592163397.jpg'

if os.path.exists(single_image_path):
predict_image(single_image_path, model, class_names, show=True)
else:
print(f"⚠️ Image not found: {single_image_path}. Update 'single_image_path'.")

# --- Option 2: Predict multiple images from a folder ---
# Uncomment to run batch prediction
# predict_folder("../drill_core/DCID-512-35/test", model, class_names, max_images=5)



venerdรฌ 22 agosto 2025

Persistence of tensor su nuvole di punti

Questo post testa il metodo presentato nell'articolo Line Structure Extraction from LiDAR Point Cloud Based on the Persistence of Tensor Feature Wang, X.; Lyu, H.; He, W.; Chen, Q. Line Structure Extraction from LiDAR Point Cloud Based on the Persistence of Tensor Feature. Appl. Sci. 2022, 12, 9190. https://doi.org/10.3390/app12189190

Il metodo permette di estrarre lineazioni, piani da una nuvola di punti (in pratica una generalizzazione dei RANSAC ma in questo caso non si riesce a discriminare tra piani a differente orientazione)

L'algoritmo e' stato testato sulla nuvola di punti derivante da questo modello 




L'unico parametro del metodo e' il radius a riga 55

[k, idx, _] = pcd_tree.search_knn_vector_3d(pcd.points[i], 100)


Esempio su dati di una scala (dataset compreso nei file di esempio)


import open3d as o3d
import numpy as np

def extract_tensor_features(pcd_path, neighborhood_radius=0.05):
"""
Extracts linear, planar, and spherical features from a point cloud using
the Tensor Feature method.

Args:
pcd_path (str): The path to the input.ply file.
neighborhood_radius (float): The radius for neighborhood search.
"""
# Step 1: Load the point cloud
try:
pcd = o3d.io.read_point_cloud(pcd_path)
if len(pcd.points) == 0:
print(f"Error: The point cloud file at {pcd_path} is empty.")
return
print(f"Loaded point cloud with {len(pcd.points)} points from {pcd_path}")
except Exception as e:
print(f"Error loading point cloud: {e}")
return

# Step 2: Estimate normals, which is necessary for tensor encoding
pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamRadius(radius=neighborhood_radius))
# CRITICAL CHECK: Ensure normals were successfully created
if not pcd.has_normals():
print("\nNormal estimation failed. This could be due to:")
print("- The point cloud is empty or contains only a few points.")
print("- The 'neighborhood_radius' is too small for the point cloud's density.")
print("Please check your input file or try increasing the 'neighborhood_radius' parameter.")
return

pcd.orient_normals_consistent_tangent_plane(k=10)
print("Normals estimated and oriented.")

# Create a KDTree for efficient neighborhood searches
pcd_tree = o3d.geometry.KDTreeFlann(pcd)

# Prepare arrays to store classification results
points = np.asarray(pcd.points)
num_points = len(points)
classification_labels = np.zeros(num_points, dtype=int)
colors = np.zeros((num_points, 3))

# Define colors for visualization
LINEAR_COLOR = np.array((1.0, 0.0, 0.0)) # Red for linear features (e.g., edges, lines)
PLANAR_COLOR = np.array((0.0, 1.0, 0.0)) # Green for planar features (e.g., surfaces, walls)
SPHERICAL_COLOR = np.array((0.0, 0.0, 1.0)) # Blue for spherical features (e.g., noise, isolated points)

# Step 3 & 4: Compute tensor, decompose, and classify
for i in range(num_points):
# Find neighbors within the specified radius
[k, idx, _] = pcd_tree.search_knn_vector_3d(pcd.points[i], 100)
# If not enough neighbors, skip the point
if len(idx) < 10:
continue
neighbors = points[idx, :]
# Compute the covariance tensor
center = np.mean(neighbors, axis=0)
cov_tensor = np.cov((neighbors - center).T)

# Eigen-decomposition
eigenvalues, _ = np.linalg.eig(cov_tensor)
eigenvalues = np.sort(eigenvalues)[::-1] # Sort descending

lambda1, lambda2, lambda3 = eigenvalues

# Calculate shape features based on eigenvalues
# Linear feature: one large eigenvalue
linearity = (lambda1 - lambda2) / lambda1 if lambda1 > 0 else 0
# Planar feature: two large eigenvalues
planarity = (lambda2 - lambda3) / lambda1 if lambda1 > 0 else 0
# Spherical feature: all eigenvalues are similar
sphericity = lambda3 / lambda1 if lambda1 > 0 else 0

# Classify the point based on the dominant feature
if linearity > 0.8: # Heuristic threshold for linearity
classification_labels[i] = 1
colors[i] = LINEAR_COLOR
elif planarity > 0.8: # Heuristic threshold for planarity
classification_labels[i] = 2
colors[i] = PLANAR_COLOR
else:
classification_labels[i] = 3
colors[i] = SPHERICAL_COLOR
# Step 5: Visualize the results
pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.visualization.draw_geometries([pcd], window_name="Point Cloud Feature Extraction")
# Print a summary of the results
linear_points = np.sum(classification_labels == 1)
planar_points = np.sum(classification_labels == 2)
spherical_points = np.sum(classification_labels == 3)
print("\n--- Classification Summary ---")
print(f"Linear Features: {linear_points} points ({linear_points/num_points*100:.2f}%)")
print(f"Planar Features: {planar_points} points ({planar_points/num_points*100:.2f}%)")
print(f"Spherical Features: {spherical_points} points ({spherical_points/num_points*100:.2f}%)")

if __name__ == "__main__":
file_path = "./output_depth/pointcloud.ply"
extract_tensor_features(file_path)

DepthAnything Metric

DepthAnything e' un modello (o meglio piu' modelli a seconda del training set) per monocular depth estimation. Il vantaggio rispetto a Midas che ha un set di addestramento outdoor che fornisce risposte di profondita' metriche invece che relative

Il dataset di training e' Kitti quindi ambiente urbano specifico per guida autonoma ma vediamo come si comporta con affioramenti naturali di roccia (avendone la possibilita' DepthAnything potrebbe essere esteso al proprio dataset)


Immagine RGB di partenza
Mappa di profondita'




from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import torch
import numpy as np
from PIL import Image
import cv2
import os

# Configuration
model_name = "depth-anything/Depth-Anything-V2-Metric-Outdoor-Large-hf"
# Or: "depth-anything/Depth-Anything-V2-Large" (relative)
input_image_path = "./images/rgb_0001.png"
output_dir = "output_depth"
os.makedirs(output_dir, exist_ok=True)

# Load image
image = Image.open(input_image_path).convert("RGB")

# Load model and processor
image_processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForDepthEstimation.from_pretrained(model_name)

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

# Inference
with torch.no_grad():
inputs = image_processor(images=image, return_tensors="pt").to(device)
outputs = model(**inputs)
# Check which attribute exists and use the correct one
if hasattr(outputs, 'predicted_depth'):
depth_tensor = outputs.predicted_depth
elif hasattr(outputs, 'depth'):
depth_tensor = outputs.depth
else:
# Fallback: get the first tensor from outputs
depth_tensor = outputs.last_hidden_state if hasattr(outputs, 'last_hidden_state') else list(outputs.values())[0]
# Manual post-processing since the built-in method expects 'predicted_depth'
# Get the depth tensor and resize it to original image size
depth_map = depth_tensor.squeeze().cpu()
# Resize depth map to original image size
original_size = (image.height, image.width)
if depth_map.shape != original_size:
depth_map_np = depth_map.numpy()
depth_map_np = cv2.resize(depth_map_np, (image.width, image.height), interpolation=cv2.INTER_LINEAR)
depth_map = torch.from_numpy(depth_map_np)
# Put it in a list to match expected format
depth_maps = [depth_map]

# Get numpy depth map (already resized to original image size)
depth_map = depth_maps[0].numpy() # (H, W)

# Normalize for visualization
depth_vis = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
depth_vis = (depth_vis * 255).astype(np.uint8)
depth_color = cv2.applyColorMap(depth_vis, cv2.COLORMAP_INFERNO)

# Save outputs
cv2.imwrite(os.path.join(output_dir, "depth_color.png"), depth_color)
cv2.imwrite(os.path.join(output_dir, "depth_grayscale.png"), depth_vis)
np.save(os.path.join(output_dir, "depth_map.npy"), depth_map)

# Save original image for comparison
original_array = np.array(image)
cv2.imwrite(os.path.join(output_dir, "original.png"), cv2.cvtColor(original_array, cv2.COLOR_RGB2BGR))

print(f"✅ Depth map saved. Shape: {depth_map.shape}")
print(f"๐Ÿ“Š Depth range: {depth_map.min():.3f} to {depth_map.max():.3f}")
print(f"๐Ÿ’พ Files saved in: {output_dir}/")
print(f" - depth_color.png (colorized depth map)")
print(f" - depth_grayscale.png (grayscale depth map)")
print(f" - depth_map.npy (raw numpy array)")
print(f" - original.png (original image for comparison)")


Creazione di un una nuvola di punti con i parametri specifici della mia Realsense D415

(fov, cx,cy,fx ed fy)

import numpy as np
from PIL import Image
import open3d as o3d
import os

def create_pointcloud_from_files(depth_path, rgb_path, output_path,
fov_horizontal=60, max_depth=10.0):
"""
Create a PLY point cloud from depth map and RGB image files
Args:
depth_path: path to .npy depth map file
rgb_path: path to RGB image file
output_path: path to save the .ply file
fov_horizontal: horizontal field of view in degrees
max_depth: maximum depth to include in meters
"""
# Load depth map
print(f"Loading depth map from: {depth_path}")
depth_map = np.load(depth_path)
print(f"Depth map shape: {depth_map.shape}")
print(f"Depth range: {depth_map.min():.3f} to {depth_map.max():.3f} meters")
# Load RGB image
print(f"Loading RGB image from: {rgb_path}")
rgb_image = Image.open(rgb_path).convert('RGB')
rgb_array = np.array(rgb_image)
print(f"RGB image shape: {rgb_array.shape}")
# Ensure RGB and depth have same dimensions
if rgb_array.shape[:2] != depth_map.shape:
print(f"Resizing RGB {rgb_array.shape[:2]} to match depth {depth_map.shape}")
rgb_image = rgb_image.resize((depth_map.shape[1], depth_map.shape[0]))
rgb_array = np.array(rgb_image)
height, width = depth_map.shape
# Estimate camera intrinsics from FOV
fov_rad = np.radians(fov_horizontal)
fx = 616.6863403320312
fy = 616.5963134765625
cx = 318.4041748046875
cy = 238.97064208984375
print(f"Estimated camera intrinsics:")
print(f" fx={fx:.1f}, fy={fy:.1f}")
print(f" cx={cx:.1f}, cy={cy:.1f}")
print(f" FOV: {fov_horizontal}°")
# Create coordinate grids
x, y = np.meshgrid(np.arange(width), np.arange(height))
# Convert to 3D coordinates using pinhole camera model
z = depth_map
x_3d = (x - cx) * z / fx
y_3d = (y - cy) * z / fy
# Filter valid depths
valid_mask = (z > 0) & (z < max_depth) & np.isfinite(z)
num_valid = np.sum(valid_mask)
print(f"Valid points: {num_valid:,} / {depth_map.size:,} ({100*num_valid/depth_map.size:.1f}%)")
# Extract valid points and colors
points_3d = np.stack([
x_3d[valid_mask],
-y_3d[valid_mask], # Flip Y for correct orientation
z[valid_mask]
], axis=1)
colors = rgb_array[valid_mask] / 255.0 # Normalize to [0, 1]
# Create Open3D point cloud
print("Creating point cloud...")
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points_3d)
pcd.colors = o3d.utility.Vector3dVector(colors)
# Optional: Remove statistical outliers
print("Removing outliers...")
pcd_filtered, outlier_indices = pcd.remove_statistical_outlier(
nb_neighbors=20, std_ratio=2.0
)
print(f"Removed {len(outlier_indices):,} outliers")
print(f"Final point cloud: {len(pcd_filtered.points):,} points")
# Save point cloud
print(f"Saving point cloud to: {output_path}")
success = o3d.io.write_point_cloud(output_path, pcd_filtered)
if success:
print("✅ Point cloud saved successfully!")
# Print file size
file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
print(f"๐Ÿ“ File size: {file_size:.2f} MB")
return pcd_filtered
else:
print("❌ Failed to save point cloud")
return None

def visualize_pointcloud(pcd):
"""Visualize the point cloud"""
try:
print("\n๐Ÿ” Visualizing point cloud...")
print(" - Use mouse to rotate, scroll to zoom")
print(" - Close the window when done")
o3d.visualization.draw_geometries([pcd])
except Exception as e:
print(f"⚠️ Visualization failed: {e}")
print(" You can open the .ply file in MeshLab, CloudCompare, or Blender")

if __name__ == "__main__":
# File paths
depth_file = "output_depth/depth_map.npy"
rgb_file = "output_depth/original.png"
output_file = "output_depth/pointcloud.ply"
# Check if files exist
if not os.path.exists(depth_file):
print(f"❌ Depth file not found: {depth_file}")
exit(1)
if not os.path.exists(rgb_file):
print(f"❌ RGB file not found: {rgb_file}")
exit(1)
# Create point cloud
pcd = create_pointcloud_from_files(
depth_path=depth_file,
rgb_path=rgb_file,
output_path=output_file,
fov_horizontal=69.4, # Adjust this based on your camera
max_depth=15.0 # Adjust this to filter distant points
)
if pcd is not None:
# Optional: visualize
visualize_pointcloud(pcd)


Analisi MNF su spettri di riflettanza di plastica

Devo cerca di lavorare su spettri di riflettanza di plastica e la prima domanda e': quale sono le bande significative? Sono partito dal ...