Volevo provare a crearmi la mia rete neurale per Monocular Depth Estimation ma non ho dati da fornire alla rete...mi sono quindi armato di Realsense di D415 e sono andato alla mia palestra (cava di Maiano vicino a Firenze) a fare qualche acquisizione
Il problema con i Realsense e' che il sensore di profondita' e' molto rumoroso quindi il programma di acquisizione effettua piu' scansioni e poi media
import pyrealsense2 as rs
import numpy as np
from PIL import Image
import os
# -------------------
# Config
# -------------------
output_rgb_dir = "rgb"
output_depth_dir = "depth"
os.makedirs(output_rgb_dir, exist_ok=True)
os.makedirs(output_depth_dir, exist_ok=True)
num_frames = 30 # Number of frames to accumulate
# -------------------
# RealSense pipeline
# -------------------
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.rgb8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
profile = pipeline.start(config)
# Depth scale
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
print("Depth scale:", depth_scale)
# Align depth to color
align_to = rs.stream.color
align = rs.align(align_to)
try:
print(f"Capturing {num_frames} frames...")
depth_frames_list = []
rgb_frame_to_save = None
for i in range(num_frames):
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)
color_frame = aligned_frames.get_color_frame()
depth_frame = aligned_frames.get_depth_frame()
if not color_frame or not depth_frame:
continue
# Convert frames to numpy
color_image = np.asanyarray(color_frame.get_data()) # RGB
depth_image = np.asanyarray(depth_frame.get_data()) * depth_scale # meters
depth_frames_list.append(depth_image)
# Keep the last RGB frame (or you could store them all and take median too)
rgb_frame_to_save = color_image
# Compute median depth per pixel
depth_median = np.median(np.stack(depth_frames_list, axis=0), axis=0).astype(np.float32)
# Save RGB
rgb_filename = os.path.join(output_rgb_dir, "rgb_median.png")
Image.fromarray(rgb_frame_to_save).save(rgb_filename)
# Save depth as NPY
depth_npy_filename = os.path.join(output_depth_dir, "depth_median.npy")
np.save(depth_npy_filename, depth_median)
# Save depth as PNG (scaled to 16-bit for visualization)
depth_png_filename = os.path.join(output_depth_dir, "depth_median.png")
depth_mm = (depth_median * 1000).astype(np.uint16) # convert meters to mm
Image.fromarray(depth_mm).save(depth_png_filename)
print(f"Saved median RGB to {rgb_filename}")
print(f"Saved median depth to {depth_npy_filename} and {depth_png_filename}")
except KeyboardInterrupt:
print("Stopped by user.")
finally:
pipeline.stop()
I dati di profondita' presentano dei buchi dove non e' stato possibile acquisire il dato. il prossimo script usa inpainting di OpenCV per ovviare al problema (solo dati npy)
import sys
import numpy as np
import cv2
from scipy import ndimage
if len(sys.argv) != 2:
print(f"Usage: python {sys.argv[0]} depth_file.npy")
sys.exit(1)
filename = sys.argv[1]
# Load depth map
depth = np.load(filename)
mask = depth == 0
# Fill holes using nearest neighbor
nearest_filled = ndimage.distance_transform_edt(
mask,
return_distances=False,
return_indices=True
)
depth_filled = depth[tuple(nearest_filled)]
# Smooth with bilateral filter to preserve edges
depth_filtered = cv2.bilateralFilter(depth_filled.astype(np.float32), 9, 75, 75)
# Save with "_filled" suffix
output_filename = filename.replace(".npy", "_filled.npy")
np.save(output_filename, depth_filtered)
print(f"Saved filled depth map to: {output_filename}")
Nessun commento:
Posta un commento