DHOB (IU5SGN)

martedì 15 luglio 2025

Confronto AprilTag vs ArucoTag

Ho provato a modifica il programma opencv_demo.cpp per avere un confronto diretto nelle prestazioni tra Apriltag e Arucotag

L'idea e' quella di acquisire un fotogramma da una camera realsense nel quale sono visulizzati contemporaneamente Aruco ed April tag con le stesse condizioni di illuminazione e risoluzione per vedere quale delle due famiglie e' migliore

Alla fine l'errore percentuale std_dev/media (150 fotogrammi) risulta essere

Arucotag = 0.0033%

AprilTag = 0.002%

/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
All rights reserved.
This software was developed in the APRIL Robotics Lab under the
direction of Edwin Olson, ebolson@umich.edu. This software may be
available under alternative licensing terms; contact the address above.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the Regents of The University of Michigan.
*/

// v4l2-ctl -A
// v4l2-ctl --device=/dev/video1 --all

#include <iostream>

#include "opencv2/opencv.hpp"
#include <opencv2/aruco.hpp>
#include <opencv2/objdetect/aruco_detector.hpp>
#include <librealsense2/rs.hpp>

#include <iostream>
#include <fstream>


extern "C" {
#include "apriltag.h"
#include "tag36h11.h"
#include "tag25h9.h"
#include "tag16h5.h"
#include "tagCircle21h7.h"
#include "tagCircle49h12.h"
#include "tagCustom48h12.h"
#include "tagStandard41h12.h"
#include "tagStandard52h13.h"
#include "apriltag_pose.h"

#include "common/getopt.h"
}

using namespace std;
using namespace cv;


int main(int argc, char *argv[])
{
    std::ofstream dati_april("dati_april.csv");
    std::ofstream dati_aruco("dati_aruco.csv");

    cv::aruco::Dictionary dictionary;
    cv::aruco::DetectorParameters detector_params;
    cv::aruco::ArucoDetector detector;

    double fx = 671.29320421;  // focal length x
    double fy = 672.01634326;  // focal length y
    double cx = 640;
    double cy = 360;
    double tagsize = 0.180; // in meters
    float tagsize_aruco = 0.150; // in meters

    std::vector<float> x_vec;
    std::vector<float> y_vec;
    std::vector<float> z_vec;
    std::vector<float> id_vec;


    std::vector<cv::Vec3d> rvecs, tvecs;

    double ref_x = 0.0;
    double ref_y = 0.0;
    double ref_z = 0.0;

    double aruco_ref_x = 0.0;
    double aruco_ref_y = 0.0;
    double aruco_ref_z = 0.0;


    int fontface = FONT_HERSHEY_SCRIPT_SIMPLEX;
    double fontscale = 1.0;
    String text;

    getopt_t *getopt = getopt_create();

    getopt_add_bool(getopt, 'h', "help", 0, "Show this help");
    getopt_add_bool(getopt, 'd', "debug", 1, "Enable debugging output (slow)");
    getopt_add_bool(getopt, 'q', "quiet", 0, "Reduce output");
    getopt_add_string(getopt, 'f', "family", "tag36h11", "Tag family to use");
    getopt_add_int(getopt, 't', "threads", "1", "Use this many CPU threads");
    getopt_add_double(getopt, 'x', "decimate", "2.0", "Decimate input image by this factor");
    getopt_add_double(getopt, 'b', "blur", "0.0", "Apply low-pass blur to input");
    getopt_add_int(getopt, 'c', "camera", "1", "Select camera");

    getopt_add_bool(getopt, '0', "refine-edges", 1, "Spend more time trying to align edges of tags");

    if (!getopt_parse(getopt, argc, argv, 1) ||
            getopt_get_bool(getopt, "help")) {
        printf("Usage: %s [options]\n", argv[0]);
        getopt_do_usage(getopt);
        exit(0);
    }    
    
    //VideoCapture inputVideo;
    rs2::pipeline pipe;
    rs2::config cfg;
    cfg.enable_stream(RS2_STREAM_COLOR, 1280, 720, RS2_FORMAT_BGR8, 30);

    try {
        pipe.start(cfg);
        cout << "RealSense D415 started at 1280x720 @ 30fps" << endl;
        } catch (const rs2::error &e) {
            cerr << "RealSense error: " << e.what() << endl;
            return -1;
        }


    dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_4X4_50);
    detector = cv::aruco::ArucoDetector(dictionary);


    
    // Camera intrinsics for ArUco (same as AprilTag)
    cv::Mat camMatrix = (cv::Mat_<double>(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
    cv::Mat distCoeffs = cv::Mat::zeros(1, 5, CV_64F); // Assuming no distortion



    // Initialize tag detector with options
    apriltag_family_t *tf = NULL;
    const char *famname = getopt_get_string(getopt, "family");
    if (!strcmp(famname, "tag36h11")) {
        tf = tag36h11_create();
    } else if (!strcmp(famname, "tag25h9")) {
        tf = tag25h9_create();
    } else if (!strcmp(famname, "tag16h5")) {
        tf = tag16h5_create();
    } else if (!strcmp(famname, "tagCircle21h7")) {
        tf = tagCircle21h7_create();
    } else if (!strcmp(famname, "tagCircle49h12")) {
        tf = tagCircle49h12_create();
    } else if (!strcmp(famname, "tagStandard41h12")) {
        tf = tagStandard41h12_create();
    } else if (!strcmp(famname, "tagStandard52h13")) {
        tf = tagStandard52h13_create();
    } else if (!strcmp(famname, "tagCustom48h12")) {
        tf = tagCustom48h12_create();
    } else {
        printf("Unrecognized tag family name. Use e.g. \"tag36h11\".\n");
        exit(-1);
    }


    apriltag_detector_t *td = apriltag_detector_create();
    apriltag_detector_add_family(td, tf);
    td->quad_decimate = getopt_get_double(getopt, "decimate");
    td->quad_sigma = getopt_get_double(getopt, "blur");
    td->nthreads = getopt_get_int(getopt, "threads");
    td->debug = getopt_get_bool(getopt, "debug");
    td->refine_edges = getopt_get_bool(getopt, "refine-edges");

    Mat frame, gray;

    vector<Mat> allImages;
    Size imageSize;
    
    while (true) {
        rs2::frameset frames = pipe.wait_for_frames();
        rs2::video_frame color_frame = frames.get_color_frame();
        if (!color_frame) continue;

        // Convert to cv::Mat
        Mat image(Size(1280, 720), CV_8UC3, (void*)color_frame.get_data(), Mat::AUTO_STEP);
        Mat imageCopy;
        cvtColor(image, gray, COLOR_BGR2GRAY);

        // Make an image_u8_t header for the Mat data
        image_u8_t im = { .width = gray.cols,
            .height = gray.rows,
            .stride = gray.cols,
            .buf = gray.data
        };

        std::vector<std::vector<cv::Point2f>> corners;
        std::vector<int> ids;
        std::vector<std::vector<cv::Point2f>> rejected;
        
        detector.detectMarkers(image, corners, ids, rejected);

        int test = 0;

        if (!ids.empty()) {
            std::cout << "Aruco trovato" << endl;
            cv::aruco::drawDetectedMarkers(image, corners, ids);
            cv::aruco::estimatePoseSingleMarkers(corners, 0.15, camMatrix, distCoeffs, rvecs, tvecs);
            for (size_t i = 0; i < ids.size(); ++i) {
                std::cout << "Id " << ids[i] << " Pos: (" << std::fixed << std::setprecision(6) << tvecs[i][0] << ", " << tvecs[i][1] << ", " << tvecs[i][2] << ")" << endl;
                if (ids[i] == 0)
                    {
                        aruco_ref_x = tvecs[i][0] ;
                        aruco_ref_y = tvecs[i][1] ;
                        aruco_ref_z = tvecs[i][2] ;
                        test =1;
                    }
                if (ids[i] == 1)
                    {
                        double distanza_aruco = std::sqrt(std::pow(tvecs[i][0] - aruco_ref_x, 2) + std::pow(tvecs[i][0] - aruco_ref_y, 2) + std::pow(tvecs[i][0] - aruco_ref_z, 2));
                        putText(image, std::to_string(distanza_aruco), Point(750,100),fontface, fontscale, Scalar(0xff, 0x0, 0), 2);
                        dati_aruco << distanza_aruco << endl;

                    }

                }


        }

        
        zarray_t *detections = apriltag_detector_detect(td, &im);
        cout << zarray_size(detections) << " Apriltags detected" << endl;

        // Draw detection outlines
        for (int i = 0; i < zarray_size(detections); i++) {
            apriltag_detection_t *det;

            zarray_get(detections, i, &det);

            apriltag_detection_info_t info;
            info.det = det;
            info.tagsize = tagsize;
            info.fx = fx;
            info.fy = fy;
            info.cx = cx;
            info.cy = cy;

            apriltag_pose_t pose;
            estimate_tag_pose(&info, &pose);

            // --- Translation ---
            double x = pose.t->data[0];
            double y = pose.t->data[1];
            double z = pose.t->data[2];

            std::cout << det->id << ";" << x << ";" << y << ";" << z;

            // --- Convert rotation matrix to yaw-pitch-roll ---
            double r00 = pose.R->data[0], r01 = pose.R->data[1], r02 = pose.R->data[2];
            double r10 = pose.R->data[3], r11 = pose.R->data[4], r12 = pose.R->data[5];
            double r20 = pose.R->data[6], r21 = pose.R->data[7], r22 = pose.R->data[8];

            double yaw   = atan2(r10, r00);
            double pitch = atan2(-r20, sqrt(r21 * r21 + r22 * r22));
            double roll  = atan2(r21, r22);

            std::cout << ";" << yaw * 180.0 / M_PI
                    << ";" << pitch * 180.0 / M_PI
                    << ";" << roll * 180.0 / M_PI << "\n";
            if (det->id == 0){
                ref_x = x;
                ref_y = y;
                ref_z = z;
                }
                    else
                {
                x_vec.push_back(x);
                y_vec.push_back(y);
                z_vec.push_back(z);
                id_vec.push_back(det->id);
                }

            matd_destroy(pose.R);
            matd_destroy(pose.t);
            

            line(image , Point(det->p[0][0], det->p[0][1]),
                     Point(det->p[1][0], det->p[1][1]),
                     Scalar(0, 0xff, 0), 2);
            line(image, Point(det->p[0][0], det->p[0][1]),
                     Point(det->p[3][0], det->p[3][1]),
                     Scalar(0, 0, 0xff), 2);
            line(image, Point(det->p[1][0], det->p[1][1]),
                     Point(det->p[2][0], det->p[2][1]),
                     Scalar(0xff, 0, 0), 2);
            line(image, Point(det->p[2][0], det->p[2][1]),
                     Point(det->p[3][0], det->p[3][1]),
                     Scalar(0xff, 0, 0), 2);

            stringstream ss;
            ss << det->id;
            text = ss.str();
            int baseline;
            Size textsize = getTextSize(text, fontface, fontscale, 2,&baseline);
            putText(image, text, Point(det->c[0]-textsize.width/2,det->c[1]+textsize.height/2),fontface, fontscale, Scalar(0xff, 0x99, 0), 2);

            }

            std::cout << "\n";
            for (int t=0;t < x_vec.size();t++)
                {
                double distanza = std::sqrt(std::pow(x_vec[t] - ref_x, 2) + std::pow(y_vec[t] - ref_y, 2) + std::pow(z_vec[t] - ref_z, 2));
                std::cout << id_vec[t] << ";" << distanza << "\n";
                switch ((int)id_vec[t])
                        {
                        case 1:
                            putText(image, std::to_string(distanza), Point(50,100),fontface, fontscale, Scalar(0xff, 0x99, 0), 2);
                            dati_april << distanza << endl;
                            break;
                        case 2:
                            putText(image, std::to_string(distanza), Point(50,200),fontface, fontscale, Scalar(0xff, 0x99, 0), 2);
                            break;
                        
                        default:
                            break;
                        }
                }
            x_vec.clear();
            y_vec.clear();
            z_vec.clear();
            id_vec.clear();


        apriltag_detections_destroy(detections);

        imshow("Tag Detections", image);
        if (waitKey(30) >= 0)
            break;
    }

    apriltag_detector_destroy(td);

    if (!strcmp(famname, "tag36h11")) {
        tag36h11_destroy(tf);
    } else if (!strcmp(famname, "tag25h9")) {
        tag25h9_destroy(tf);
    } else if (!strcmp(famname, "tag16h5")) {
        tag16h5_destroy(tf);
    } else if (!strcmp(famname, "tagCircle21h7")) {
        tagCircle21h7_destroy(tf);
    } else if (!strcmp(famname, "tagCircle49h12")) {
        tagCircle49h12_destroy(tf);
    } else if (!strcmp(famname, "tagStandard41h12")) {
        tagStandard41h12_destroy(tf);
    } else if (!strcmp(famname, "tagStandard52h13")) {
        tagStandard52h13_destroy(tf);
    } else if (!strcmp(famname, "tagCustom48h12")) {
        tagCustom48h12_destroy(tf);
    }


    getopt_destroy(getopt);
    dati_april.close();
    dati_aruco.close();

    return 0;
}

Per compilare si modifica CMakeLists.txt per includere librealsense

# opencv_demo
if(OpenCV_FOUND)
    find_package(realsense2 REQUIRED)  # Add this line
    add_executable(opencv_demo example/opencv_demo.cc)
    target_include_directories(opencv_demo PRIVATE ${realsense2_INCLUDE_DIR})
    target_link_libraries(opencv_demo
        apriltag
        ${OpenCV_LIBRARIES}
        realsense2                      # Link RealSense2
    )

    set_target_properties(opencv_demo PROPERTIES CXX_STANDARD 11)
    install(TARGETS opencv_demo RUNTIME DESTINATION bin)

endif(OpenCV_FOUND)

sabato 12 luglio 2025

Apriltag 3 e pose estimation

Con la versione 3 di Apriltag e' possibile avere la pose estimation del tag (ho usato 41h12)

il repository di riferimento e' https://github.com/zhenzhenxiang/apriltag3

(ve ne sono altri ma ho avuto problemi di compilazione all'interno di Debian Testing)

Si clona di github il progetto (al momento 3.4.3) e si crea un folder ap3 dove inserire il seguente Makefile e il successivo main.cpp

# Adjust these paths if needed
APRILTAG_DIR = ../
CXX = g++
CXXFLAGS = -Wall -O2 -std=c++11

# Paths for AprilTag
INCLUDES = -I$(APRILTAG_DIR) -I$(APRILTAG_DIR)/common
LIBS = -L$(APRILTAG_DIR) -lapriltag

# Use OpenCV4 pkg-config flags
OPENCV_CFLAGS := $(shell pkg-config --cflags opencv4)
OPENCV_LIBS := $(shell pkg-config --libs opencv4)

# Source file
SRC = main.cpp
OUT = pos_tag

all: $(OUT)

$(OUT): $(SRC)
    $(CXX) $(CXXFLAGS) $(SRC) $(INCLUDES) $(LIBS) $(OPENCV_CFLAGS) $(OPENCV_LIBS) -o $(OUT)

clean:
    rm -f $(OUT)

#include <opencv2/opencv.hpp>
#include <iostream>

extern "C" {
#include "apriltag.h"
#include "tagStandard41h12.h"
#include "common/getopt.h"
#include "apriltag_pose.h"
}

int main(int argc, char* argv[]) {
    // --- Load image ---
    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " <image_path>" << std::endl;
        return 1;
    }

    std::string imagePath = argv[1];
    
    cv::Mat image = cv::imread(imagePath, cv::IMREAD_GRAYSCALE);
    if (image.empty()) {
        std::cerr << "Failed to load image\n";
        return -1;
    }

    // --- Setup detector ---
    apriltag_family_t *tf = tagStandard41h12_create(); 
    apriltag_detector_t *td = apriltag_detector_create();
    apriltag_detector_add_family(td, tf);
    td->quad_decimate = 1.0;
    td->quad_sigma = 0.0;
    td->nthreads = 4;
    td->debug = false;
    td->refine_edges = true;

    // --- Convert OpenCV image to Apriltag image ---
    image_u8_t im = {
        .width = image.cols,
        .height = image.rows,
        .stride = image.cols,
        .buf = image.data
    };

    // --- Detect tags ---
    zarray_t *detections = apriltag_detector_detect(td, &im);
    //std::cout << "Detected " << zarray_size(detections) << " tags.\n";

    // --- Camera parameters (replace with calibrated values) ---
    double fx = 663.64233754;  // focal length x
    double fy = 664.86615495;  // focal length y
    double cx = image.cols / 2.0;
    double cy = image.rows / 2.0;
    double tagsize = 0.150; // in meters

    for (int i = 0; i < zarray_size(detections); i++) {
        apriltag_detection_t *det;
        zarray_get(detections, i, &det);

        std::cout << det->id;

        apriltag_detection_info_t info;
        info.det = det;
        info.tagsize = tagsize;
        info.fx = fx;
        info.fy = fy;
        info.cx = cx;
        info.cy = cy;

        apriltag_pose_t pose;
        estimate_tag_pose(&info, &pose);

        // --- Translation ---
        double x = pose.t->data[0];
        double y = pose.t->data[1];
        double z = pose.t->data[2];

        std::cout << ";" << x << ";" << y << ";" << z;

        // --- Convert rotation matrix to yaw-pitch-roll ---
        double r00 = pose.R->data[0], r01 = pose.R->data[1], r02 = pose.R->data[2];
        double r10 = pose.R->data[3], r11 = pose.R->data[4], r12 = pose.R->data[5];
        double r20 = pose.R->data[6], r21 = pose.R->data[7], r22 = pose.R->data[8];

        double yaw   = atan2(r10, r00);
        double pitch = atan2(-r20, sqrt(r21 * r21 + r22 * r22));
        double roll  = atan2(r21, r22);

        std::cout << ";" << yaw * 180.0 / M_PI
                  << ";" << pitch * 180.0 / M_PI
                  << ";" << roll * 180.0 / M_PI << "\n";

        matd_destroy(pose.R);
        matd_destroy(pose.t);
    }

    apriltag_detections_destroy(detections);
    apriltag_detector_destroy(td);
    tagStandard41h12_destroy(tf);

    return 0;
}

giovedì 10 luglio 2025

Calibrazione PiCam HQ 12 Mpx

Ho provato a calibrare la PiCam HQ 12.3 Mpx con sensore Sony IMX477

In modalita' foto la risoluzione e' 4056x3040 mentre in modalita' video parte da 640x480 30 fps

Ho usato sia una serie di foto sia un video usando lo script a link sottostante

https://github.com/yumashino/Camera-Calibration-with-ChArUco-Board/tree/main

rispetto alla documentazione non e' piu' necessario usare libcam ma per scattare le foto si usa

rpicam-still --encoding png -o img_01.png

rpicam-still --raw -o 01.jpg (salva anche in DNG)

per i video

rpicam-vid  -t 10s --width 2028 --height 1080 -o test.mp4 (salva 10 secondi in h264)

calibration_time: "Thu 10 Jul 2025 07:32:59 CEST"
image_width: 4032
image_height: 3024
flags: 0
camera_matrix: !!opencv-matrix
rows: 3
cols: 3
dt: d
data: [ 3377.8915344697502, 0., 2032.4285452943598, 0.,
3394.7675726553693, 1455.5140917783713, 0., 0., 1. ]
distortion_coefficients: !!opencv-matrix
rows: 1
cols: 5
dt: d
data: [ 0.24433970441928685, -1.371442972691743,
0.0033488938962143514, -0.00060884641206734576,
2.3798752100481146 ]
avg_reprojection_error: 2.668497308654219

mercoledì 9 luglio 2025

Charuco camera calibration

Attenzione : la tavola di calibrazione e' cambiata nelle versioni recenti di Opencv (da 4.6 in poi....in pratica nelle versioni vecchie il quadrato in alto a sinistra e' occupato da un aruco tag mentre in quelle moderne inizia con un quadrato nero....per fare le calibrazioni con le tavole vecchie si deve usare board.setLegacyPattern(true)


vecchia versione

nuova versione

Ho trovato che nel pacchetto opencv e precisamente in samples/cpp/tutorial_code/objectDetection e' gia' presente un programma che permette di fare la calibrazione della camera partendo da immagini o inquadrando in modalita' video una tavola di calibrazione

Per la tavola di calibrazione ho usato questo link che genera una charuco board 8x11

nel caso si usino immagini gia' pronte con nome tipo img_01.jpg. img_02.jpg

./calibrate_camera_charuco -w=11 -h=8 --sl=0.03 --ml=0.022 -d=0 -v=/home/luca/iphone/img_%02d.png

Nel caso del video

./calibrate_camera_charuco -w=11 -h=8 --sl=0.03 --ml=0.022 -d=0 --ci=0

i risultati sono salvati nel file cam.yml

Calibrazione colore DYI

Volevo provare a farmi una calibrazione colore in casa senza passare dallo spendere un centinaio di euro per le tavole colore (esempio quella di XRite)

Mi sono scaricato il pdf di questa immagine da Wikipedia

Per ogni mattonella sono riportati i valori rgb di riferimento

Ho poi fotografato con un IPhone una immagine ed ho messo a confronto i valori rgb di ogni mattonella della foto scattata con i valori rgb della tavola di riferimento

E' stato usato Gimp per selezionare un quadrato all'interno della mattonella colore e ed avere il valore RGB medio della selezione

Come si vede la relazione e' di tipo lineare ma e' ben lontana da y=x

Mi sono scritto questo semplice codice che trasforma i valori dei pixel

import cv2
import numpy as np

# Read the input image
image = cv2.imread('1.jpg')  # Change to your image filename

if image is None:
    raise FileNotFoundError("Image not found. Make sure the path is correct.")

# Convert image to float for accurate scaling
image_float = image.astype(np.float32)

# Split channels (OpenCV uses BGR format by default)
B, G, R = cv2.split(image_float)

# Apply scaling factors
R = (R * 1.047)-21.3
G = (G * 1.206)-27.3
B = (B * 1.23)-22.2

# Merge back the channels
result = cv2.merge((B, G, R))

# Clip values to valid range [0, 255] and convert back to uint8
result = np.clip(result, 0, 255).astype(np.uint8)

# Save the output image
cv2.imwrite('output1.jpg', result)

l'immagine corretta risulta essere questa

Come si osserva dal bianco della carta l'immagine corretta ha colori piu' fedeli alla realta'. Non dico che siamo a livello delle tavole di colore da 100 euro e dei software professionali ma e' decisamente un passo avanti