DHOB (IU5SGN): Distanza e Pose Estimation da single aruco marker con opencv

martedì 23 agosto 2022

Distanza e Pose Estimation da single aruco marker con opencv

Volevo vedere se potevo creare un distanziametro usando una webcam economica e gli aruco tag

Per le prove ho usato

1) Realsense D435 ottico (1920x1080 2 MPx)
2) Logitech C920 (2560x1470 3.7 Mpx)
3) webcam cinese senza marca (3624 x 2448 8.8 Mpx)
4) DJI Tello video (1280x720 1Mpx)

Per la calibrazione ho utilizzato 5x5 e 9x6 su fogli A4

l'errore stimato di calibrazione e' risultato
Realsense : 0.0371
Logitech : 0.052
Cinese : 0.047
Tello video : 0.027

In generale maggiore e' la risoluzione della camera e maggiore e' la dimensione del tag piu' distante il tag viene riconosciuto. Con un tag da 25 cm 4x4 e la camera a maggiore risoluzione opencv riesce a vederlo fino a 30 m. Con le altre camere non sono andato oltre i 20 m di distanza

Usando un telemetro laser ho calcolato la distanza del tag dalla camera e con Opencv ho calcolato la distanza. Come si vede dal grafico OpenCV riesce a risolvere la distanza .. da una prima stima c'e' un errore di 3-4 cm tra due posizioni vicine

prendendo solo i primi 10 metri la precisione aumenta

Ho provato anche a mettere a confronto gli Apriltag con gli Arucotag ma non ho notato sensibili vantaggi nell'usare AprilTags

Ho trovato invece miglioramenti nel raffinamento subpixel tra i parametri di OpenCV

elaborazione singole immagini

----------------------------------------------------------------

'''
Sample Command:-
python3 detect_aruco_images.py --image foto/103.png --type DICT_5X5_100 -a 0.1 -k ./calibration_matrix.npy -d ./distortion_coefficients.npy
'''
from turtle import width
import numpy as np
from utils import ARUCO_DICT, aruco_display
import argparse
import cv2
import sys
import math


ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to input image containing ArUCo tag")
ap.add_argument("-t", "--type", type=str, default="DICT_ARUCO_ORIGINAL", help="type of ArUCo tag to detect")
ap.add_argument("-k", "--K_Matrix", required=True, help="Path to calibration matrix (numpy file)")
ap.add_argument("-d", "--D_Coeff", required=True, help="Path to distortion coefficients (numpy file)")
ap.add_argument("-a", "--aruco_dim", required=True, help="ArUco tag dimension")
#la dimensione del tag e' quella dello spigolo esterno del quadrato nero esterno, non i singoli quadrati interni

args = vars(ap.parse_args())

image = cv2.imread(args["image"])
print("Loading image "+ args["image"])
height,width,_ = image.shape
# don't resize for better perfomance
#h,w,_ = image.shape
#width=600
#height = int(width*(h/w))
#image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC)


# verify that the supplied ArUCo tag exists and is supported by OpenCV
if ARUCO_DICT.get(args["type"], None) is None:
    print(f"ArUCo tag type '{args['type']}' is not supported")
    sys.exit(0)

# load the ArUCo dictionary, grab the ArUCo parameters, and detect
# the markers
#print("Detecting '{}' tags....".format(args["type"]))
arucoDict = cv2.aruco.Dictionary_get(ARUCO_DICT[args["type"]])

calibration_matrix_path = args["K_Matrix"]
distortion_coefficients_path = args["D_Coeff"]
k = np.load(calibration_matrix_path)
d = np.load(distortion_coefficients_path)

image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

arucoParams = cv2.aruco.DetectorParameters_create()

#--- 180 deg rotation matrix around the x axis
R_flip  = np.zeros((3,3), dtype=np.float32)
R_flip[0,0] = 1.0
R_flip[1,1] =-1.0
R_flip[2,2] =-1.0

corners, ids, rejected = cv2.aruco.detectMarkers(image, arucoDict,parameters=arucoParams,cameraMatrix=k,distCoeff=d)

if len(corners) > 0:
        #print(corners) #posizione degli angoli del marker
        for i in range(0, len(ids)):
            rvec, tvec, markerPoints = cv2.aruco.estimatePoseSingleMarkers(corners[i], float(args["aruco_dim"]), k,d)
            #print(rvec)
            dist = math.sqrt((tvec[0][0][0]*tvec[0][0][0])+(tvec[0][0][1]*tvec[0][0][1])+(tvec[0][0][2]*tvec[0][0][2]))
            #dist = (dist*1.060672)+10.1674
            print("Marker "+str(ids[i])+"="+str(dist))

-----------------------------------------------------------------

elaborazione video

-----------------------------------------------------------------

'''
Sample Command:-
python detect_aruco_video.py --type DICT_5X5_100 --camera True
python detect_aruco_video.py --type DICT_5X5_100 --camera False --video test_video.mp4 -a 25 -k ./calibration_matrix.npy -d ./distortion_coefficients.npy
'''

from turtle import delay
import numpy as np
from utils import ARUCO_DICT, aruco_display
import argparse
import time
import cv2
import sys
import math
import time

def isRotationMatrix(R):
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype=R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6

def rotationMatrixToEulerAngles(R):
    assert (isRotationMatrix(R))

    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])

    singular = sy < 1e-6

    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
    else:
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0

    return np.array([x, y, z])

R_flip  = np.zeros((3,3), dtype=np.float32)
R_flip[0,0] = 1.0
R_flip[1,1] =-1.0
R_flip[2,2] =-1.0

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--camera", required=True, help="Set to True if using webcam")
ap.add_argument("-v", "--video", help="Path to the video file")
ap.add_argument("-t", "--type", type=str, default="DICT_ARUCO_ORIGINAL", help="Type of ArUCo tag to detect")
ap.add_argument("-k", "--K_Matrix", required=True, help="Path to calibration matrix (numpy file)")
ap.add_argument("-d", "--D_Coeff", required=True, help="Path to distortion coefficients (numpy file)")
ap.add_argument("-a", "--aruco_dim", required=True, help="ArUco tag dimension")
args = vars(ap.parse_args())

if args["camera"].lower() == "true":
    video = cv2.VideoCapture('udp://127.0.0.1:5000',cv2.CAP_FFMPEG)
    time.sleep(2.0)
    
else:
    if args["video"] is None:
        print("[Error] Video file location is not provided")
        sys.exit(1)

    video = cv2.VideoCapture(args["video"])

if ARUCO_DICT.get(args["type"], None) is None:
    print(f"ArUCo tag type '{args['type']}' is not supported")
    sys.exit(0)

arucoDict = cv2.aruco.Dictionary_get(ARUCO_DICT[args["type"]])
calibration_matrix_path = args["K_Matrix"]
distortion_coefficients_path = args["D_Coeff"]
k = np.load(calibration_matrix_path)
d = np.load(distortion_coefficients_path)
arucoParams = cv2.aruco.DetectorParameters_create()

while True:
    ret, frame = video.read()
    time.sleep(0.1)
    
    if ret is False:
        break


    h, w, _ = frame.shape

    width=1000
    height = int(width*(h/w))
    frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_CUBIC)
    corners, ids, rejected = cv2.aruco.detectMarkers(frame, arucoDict, parameters=arucoParams)

    detected_markers = aruco_display(corners, ids, rejected, frame)
    if len(corners) > 0:
        #print(corners) #posizione degli angoli del marker
        for i in range(0, len(ids)):
            rvec, tvec, markerPoints = cv2.aruco.estimatePoseSingleMarkers(corners[i], float(args["aruco_dim"]), k,d)
            dist = math.sqrt((tvec[0][0][0]*tvec[0][0][0])+(tvec[0][0][1]*tvec[0][0][1])+(tvec[0][0][2]*tvec[0][0][2]))
            distanza = "Distanza=%4.0f"%(dist)
            cv2.putText(frame, distanza,(50, 50),cv2.FONT_HERSHEY_SIMPLEX, 1,(0, 0, 255), 2, cv2.LINE_4)
            R_ct = np.matrix(cv2.Rodrigues(rvec)[0])
            R_ct = R_ct.T
            roll_marker, pitch_marker, yaw_marker = rotationMatrixToEulerAngles(R_flip*R_ct)
            str_roll = "Roll=%4.0f"%(math.degrees(roll_marker))
            str_pitch = "Pitch=%4.0f"%(math.degrees(pitch_marker))
            str_yaw= "Yaw=%4.0f"%(math.degrees(yaw_marker))

            cv2.putText(frame, str_roll,(50, 100),cv2.FONT_HERSHEY_SIMPLEX, 1,(0, 0, 255), 2, cv2.LINE_4)
            cv2.putText(frame, str_pitch,(50, 125),cv2.FONT_HERSHEY_SIMPLEX, 1,(0, 0, 255), 2, cv2.LINE_4)
            cv2.putText(frame, str_yaw,(50, 150),cv2.FONT_HERSHEY_SIMPLEX, 1,(0, 0, 255), 2, cv2.LINE_4)
    cv2.imshow("Image", detected_markers)
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break

cv2.destroyAllWindows()
video.release()