lunedì 18 agosto 2025

Preparazione dati Realsense D415 per monocular depth estimation

 Volevo provare a crearmi la mia rete neurale per Monocular Depth Estimation ma non ho dati da fornire alla rete...mi sono quindi armato di Realsense di D415 e sono andato alla mia palestra (cava di Maiano vicino a Firenze) a fare qualche acquisizione

Il problema con i Realsense e' che il sensore di profondita' e' molto rumoroso quindi il programma di acquisizione effettua piu' scansioni e poi media

 


import pyrealsense2 as rs
import numpy as np
from PIL import Image
import os

# -------------------
# Config
# -------------------
output_rgb_dir = "rgb"
output_depth_dir = "depth"
os.makedirs(output_rgb_dir, exist_ok=True)
os.makedirs(output_depth_dir, exist_ok=True)

num_frames = 30 # Number of frames to accumulate

# -------------------
# RealSense pipeline
# -------------------
pipeline = rs.pipeline()
config = rs.config()

config.enable_stream(rs.stream.color, 640, 480, rs.format.rgb8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)

profile = pipeline.start(config)

# Depth scale
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
print("Depth scale:", depth_scale)

# Align depth to color
align_to = rs.stream.color
align = rs.align(align_to)

try:
print(f"Capturing {num_frames} frames...")
depth_frames_list = []
rgb_frame_to_save = None

for i in range(num_frames):
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)

color_frame = aligned_frames.get_color_frame()
depth_frame = aligned_frames.get_depth_frame()

if not color_frame or not depth_frame:
continue

# Convert frames to numpy
color_image = np.asanyarray(color_frame.get_data()) # RGB
depth_image = np.asanyarray(depth_frame.get_data()) * depth_scale # meters

depth_frames_list.append(depth_image)

# Keep the last RGB frame (or you could store them all and take median too)
rgb_frame_to_save = color_image

# Compute median depth per pixel
depth_median = np.median(np.stack(depth_frames_list, axis=0), axis=0).astype(np.float32)

# Save RGB
rgb_filename = os.path.join(output_rgb_dir, "rgb_median.png")
Image.fromarray(rgb_frame_to_save).save(rgb_filename)

# Save depth as NPY
depth_npy_filename = os.path.join(output_depth_dir, "depth_median.npy")
np.save(depth_npy_filename, depth_median)

# Save depth as PNG (scaled to 16-bit for visualization)
depth_png_filename = os.path.join(output_depth_dir, "depth_median.png")
depth_mm = (depth_median * 1000).astype(np.uint16) # convert meters to mm
Image.fromarray(depth_mm).save(depth_png_filename)

print(f"Saved median RGB to {rgb_filename}")
print(f"Saved median depth to {depth_npy_filename} and {depth_png_filename}")

except KeyboardInterrupt:
print("Stopped by user.")

finally:
pipeline.stop()

 

I dati di profondita' presentano dei buchi dove non e' stato possibile acquisire il dato. il prossimo script usa inpainting di OpenCV per ovviare al problema (solo dati npy)

import sys
import numpy as np
import cv2
from scipy import ndimage

if len(sys.argv) != 2:
print(f"Usage: python {sys.argv[0]} depth_file.npy")
sys.exit(1)

filename = sys.argv[1]

# Load depth map
depth = np.load(filename)
mask = depth == 0

# Fill holes using nearest neighbor
nearest_filled = ndimage.distance_transform_edt(
mask,
return_distances=False,
return_indices=True
)
depth_filled = depth[tuple(nearest_filled)]

# Smooth with bilateral filter to preserve edges
depth_filtered = cv2.bilateralFilter(depth_filled.astype(np.float32), 9, 75, 75)

# Save with "_filled" suffix
output_filename = filename.replace(".npy", "_filled.npy")
np.save(output_filename, depth_filtered)

print(f"Saved filled depth map to: {output_filename}")

 

 

 

 

Nessun commento:

Posta un commento

Chiavetta ALIA

Sono maledettamente distratto e stavo cercando di vedere se riesco a replicare la chiavetta dei cassonetti di Firenze senza dover per forza ...