├── release.sh ├── aniposelib ├── __init__.py ├── utils.py ├── boards.py └── cameras.py ├── README.md ├── setup.py ├── LICENSE └── .gitignore /release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | rm -rv dist build *.egg-info 4 | python3 setup.py sdist bdist_wheel 5 | twine upload dist/* 6 | -------------------------------------------------------------------------------- /aniposelib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __version__ = '0.7.2' 4 | VERSION = __version__ 5 | 6 | from . import boards, cameras, utils 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Anipose lib 2 | 3 | [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause) 4 | 5 | An easy-to-use library for calibrating cameras and triangulation in Python. 6 | 7 | This is the backend library for the [Anipose](https://github.com/lambdaloop/anipose) package. 8 | 9 | The [documentation](https://anipose.readthedocs.io/en/latest/aniposelib-tutorial.html) is located in Anipose repository. 10 | 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="aniposelib", 8 | version="0.7.2", 9 | author="Lili Karashchuk", 10 | author_email="lili.karashchuk@gmail.com", 11 | description="An easy-to-use library for calibrating cameras in python, made for Anipose", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/lambdaloop/aniposelib", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: BSD License", 19 | "Intended Audience :: Science/Research", 20 | "Topic :: Scientific/Engineering", 21 | "Topic :: Scientific/Engineering :: Image Recognition" 22 | ], 23 | install_requires=[ 24 | 'opencv-contrib-python>=4.7.0.68', 25 | 'numba', 'pandas', 26 | 'numpy', 'scipy', 'toml', 'tqdm' 27 | ], 28 | extras_require={ 29 | } 30 | ) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019-2023, Lili Karashchuk 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Tensorflow checkpoints 2 | *.ckpt 3 | snapshot-* 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # PyCharm project settings 99 | .idea 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | 110 | .DS_Store 111 | -------------------------------------------------------------------------------- /aniposelib/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from scipy.cluster.hierarchy import linkage, fcluster 4 | from scipy.cluster.vq import whiten 5 | from scipy.linalg import inv 6 | from collections import defaultdict, Counter 7 | import queue 8 | import pandas as pd 9 | 10 | def make_M(rvec, tvec): 11 | out = np.zeros((4,4)) 12 | rotmat, _ = cv2.Rodrigues(rvec) 13 | out[:3,:3] = rotmat 14 | out[:3, 3] = tvec.flatten() 15 | out[3, 3] = 1 16 | return out 17 | 18 | def get_rtvec(M): 19 | rvec = cv2.Rodrigues(M[:3, :3])[0].flatten() 20 | tvec = M[:3, 3].flatten() 21 | return rvec, tvec 22 | 23 | def get_most_common(vals): 24 | Z = linkage(whiten(vals), 'ward') 25 | n_clust = max(len(vals)/10, 3) 26 | clusts = fcluster(Z, t=n_clust, criterion='maxclust') 27 | cc = Counter(clusts[clusts >= 0]) 28 | most = cc.most_common(n=1) 29 | top = most[0][0] 30 | good = clusts == top 31 | return good 32 | 33 | def select_matrices(Ms): 34 | Ms = np.array(Ms) 35 | rvecs = [cv2.Rodrigues(M[:3,:3])[0][:, 0] for M in Ms] 36 | tvecs = np.array([M[:3, 3] for M in Ms]) 37 | best = get_most_common(np.hstack([rvecs, tvecs])) 38 | Ms_best = Ms[best] 39 | return Ms_best 40 | 41 | 42 | def mean_transform(M_list): 43 | rvecs = [cv2.Rodrigues(M[:3,:3])[0][:, 0] for M in M_list] 44 | tvecs = [M[:3, 3] for M in M_list] 45 | 46 | rvec = np.mean(rvecs, axis=0) 47 | tvec = np.mean(tvecs, axis=0) 48 | 49 | return make_M(rvec, tvec) 50 | 51 | def mean_transform_robust(M_list, approx=None, error=0.3): 52 | if approx is None: 53 | M_list_robust = M_list 54 | else: 55 | M_list_robust = [] 56 | for M in M_list: 57 | rot_error = (M - approx)[:3,:3] 58 | m = np.max(np.abs(rot_error)) 59 | if m < error: 60 | M_list_robust.append(M) 61 | return mean_transform(M_list_robust) 62 | 63 | 64 | def get_transform(rtvecs, left, right): 65 | L = [] 66 | for dix in range(rtvecs.shape[1]): 67 | d = rtvecs[:, dix] 68 | good = ~np.isnan(d[:, 0]) 69 | 70 | if good[left] and good[right]: 71 | M_left = make_M(d[left, 0:3], d[left, 3:6]) 72 | M_right = make_M(d[right, 0:3], d[right, 3:6]) 73 | M = np.matmul(M_left, inv(M_right)) 74 | L.append(M) 75 | L_best = select_matrices(L) 76 | M_mean = mean_transform(L_best) 77 | M_mean = mean_transform_robust(L, M_mean, error=0.5) 78 | # M_mean = mean_transform_robust(L, M_mean, error=0.2) 79 | # M_mean = mean_transform_robust(L, M_mean, error=0.1) 80 | return M_mean 81 | 82 | 83 | def get_connections(xs, cam_names=None, both=True): 84 | n_cams = xs.shape[0] 85 | n_points = xs.shape[1] 86 | 87 | if cam_names is None: 88 | cam_names = np.arange(n_cams) 89 | 90 | connections = defaultdict(int) 91 | 92 | for rnum in range(n_points): 93 | ixs = np.where(~np.isnan(xs[:, rnum, 0]))[0] 94 | keys = [cam_names[ix] for ix in ixs] 95 | for i in range(len(keys)): 96 | for j in range(i+1, len(keys)): 97 | a = keys[i] 98 | b = keys[j] 99 | connections[(a,b)] += 1 100 | if both: 101 | connections[(b,a)] += 1 102 | 103 | return connections 104 | 105 | 106 | def get_calibration_graph(rtvecs, cam_names=None): 107 | n_cams = rtvecs.shape[0] 108 | n_points = rtvecs.shape[1] 109 | 110 | if cam_names is None: 111 | cam_names = np.arange(n_cams) 112 | 113 | connections = get_connections(rtvecs, np.arange(n_cams)) 114 | 115 | components = dict(zip(np.arange(n_cams), range(n_cams))) 116 | edges = set(connections.items()) 117 | 118 | graph = defaultdict(list) 119 | 120 | for edgenum in range(n_cams-1): 121 | if len(edges) == 0: 122 | component_names = dict() 123 | for k,v in list(components.items()): 124 | component_names[cam_names[k]] = v 125 | raise ValueError(""" 126 | Could not build calibration graph. 127 | Some group of cameras could not be paired by simultaneous calibration board detections. 128 | Check which cameras have different group numbers below to see the missing edges. 129 | {}""".format(component_names)) 130 | 131 | (a, b), weight = max(edges, key=lambda x: x[1]) 132 | graph[a].append(b) 133 | graph[b].append(a) 134 | 135 | match = components[a] 136 | replace = components[b] 137 | for k, v in components.items(): 138 | if match == v: 139 | components[k] = replace 140 | 141 | for e in edges.copy(): 142 | (a,b), w = e 143 | if components[a] == components[b]: 144 | edges.remove(e) 145 | 146 | return graph 147 | 148 | def find_calibration_pairs(graph, source=None): 149 | pairs = [] 150 | explored = set() 151 | 152 | if source is None: 153 | source = sorted(graph.keys())[0] 154 | 155 | q = queue.deque() 156 | q.append(source) 157 | 158 | while len(q) > 0: 159 | item = q.pop() 160 | explored.add(item) 161 | 162 | for new in graph[item]: 163 | if new not in explored: 164 | q.append(new) 165 | pairs.append( (item, new) ) 166 | return pairs 167 | 168 | def compute_camera_matrices(rtvecs, pairs): 169 | extrinsics = dict() 170 | source = pairs[0][0] 171 | extrinsics[source] = np.identity(4) 172 | for (a,b) in pairs: 173 | ext = get_transform(rtvecs, b, a) 174 | extrinsics[b] = np.matmul(ext, extrinsics[a]) 175 | return extrinsics 176 | 177 | def get_initial_extrinsics(rtvecs, cam_names=None): 178 | graph = get_calibration_graph(rtvecs, cam_names) 179 | pairs = find_calibration_pairs(graph, source=0) 180 | extrinsics = compute_camera_matrices(rtvecs, pairs) 181 | 182 | n_cams = rtvecs.shape[0] 183 | rvecs = [] 184 | tvecs = [] 185 | for cnum in range(n_cams): 186 | rvec, tvec = get_rtvec(extrinsics[cnum]) 187 | rvecs.append(rvec) 188 | tvecs.append(tvec) 189 | rvecs = np.array(rvecs) 190 | tvecs = np.array(tvecs) 191 | return rvecs, tvecs 192 | 193 | 194 | ## convenience function to load a set of DeepLabCut pose-2d files 195 | def load_pose2d_fnames(fname_dict, offsets_dict=None, cam_names=None): 196 | if cam_names is None: 197 | cam_names = sorted(fname_dict.keys()) 198 | pose_names = [fname_dict[cname] for cname in cam_names] 199 | 200 | if offsets_dict is None: 201 | offsets_dict = dict([(cname, (0,0)) for cname in cam_names]) 202 | 203 | datas = [] 204 | for ix_cam, (cam_name, pose_name) in \ 205 | enumerate(zip(cam_names, pose_names)): 206 | dlabs = pd.read_hdf(pose_name) 207 | if len(dlabs.columns.levels) > 2: 208 | scorer = dlabs.columns.levels[0][0] 209 | dlabs = dlabs.loc[:, scorer] 210 | 211 | bp_index = dlabs.columns.names.index('bodyparts') 212 | joint_names = list(dlabs.columns.get_level_values(bp_index).unique()) 213 | dx = offsets_dict[cam_name][0] 214 | dy = offsets_dict[cam_name][1] 215 | 216 | for joint in joint_names: 217 | dlabs.loc[:, (joint, 'x')] += dx 218 | dlabs.loc[:, (joint, 'y')] += dy 219 | 220 | datas.append(dlabs) 221 | 222 | n_cams = len(cam_names) 223 | n_joints = len(joint_names) 224 | n_frames = min([d.shape[0] for d in datas]) 225 | 226 | # frame, camera, bodypart, xy 227 | points = np.full((n_cams, n_frames, n_joints, 2), np.nan, 'float') 228 | scores = np.full((n_cams, n_frames, n_joints), np.zeros(1), 'float') 229 | 230 | for cam_ix, dlabs in enumerate(datas): 231 | for joint_ix, joint_name in enumerate(joint_names): 232 | try: 233 | points[cam_ix, :, joint_ix] = np.array(dlabs.loc[:, (joint_name, ('x', 'y'))])[:n_frames] 234 | scores[cam_ix, :, joint_ix] = np.array(dlabs.loc[:, (joint_name, ('likelihood'))])[:n_frames].ravel() 235 | except KeyError: 236 | pass 237 | 238 | return { 239 | 'cam_names': cam_names, 240 | 'points': points, 241 | 'scores': scores, 242 | 'bodyparts': joint_names 243 | } 244 | -------------------------------------------------------------------------------- /aniposelib/boards.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from abc import ABC, abstractmethod 4 | from tqdm import trange 5 | from collections import defaultdict 6 | 7 | 8 | def get_video_params_cap(cap): 9 | params = dict() 10 | params['width'] = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 11 | params['height'] = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 12 | params['nframes'] = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 13 | params['fps'] = cap.get(cv2.CAP_PROP_FPS) 14 | return params 15 | 16 | 17 | def get_video_params(fname): 18 | cap = cv2.VideoCapture(fname) 19 | if not cap.isOpened(): 20 | raise FileNotFoundError(f'missing file "{fname}"') 21 | params = get_video_params_cap(cap) 22 | cap.release() 23 | return params 24 | 25 | 26 | def fix_rvec(rvec, tvec): 27 | # https://github.com/opencv/opencv/issues/8813 28 | T = tvec.ravel()[0] 29 | R = cv2.Rodrigues(rvec)[0] 30 | 31 | # Unrelated -- makes Y the up axis, Z forward 32 | R = R @ np.array([ 33 | [1, 0, 0], 34 | [0, 0, 1], 35 | [0, -1, 0], 36 | ]) 37 | if 0 < R[1, 1] < 1: 38 | # If it gets here, the pose is flipped. 39 | 40 | # Flip the axes. E.g., Y axis becomes [-y0, -y1, y2]. 41 | R *= np.array([ 42 | [1, -1, 1], 43 | [1, -1, 1], 44 | [-1, 1, -1], 45 | ]) 46 | 47 | # Fixup: rotate along the plane spanned by camera's forward (Z) axis and vector to marker's position 48 | forward = np.array([0, 0, 1]) 49 | tnorm = T / np.linalg.norm(T) 50 | axis = np.cross(tnorm, forward) 51 | angle = -2 * np.arccos(tnorm @ forward) 52 | R = cv2.Rodrigues(angle * axis)[0] @ R 53 | 54 | return cv2.Rodrigues(R)[0] 55 | 56 | 57 | def merge_rows(all_rows, cam_names=None): 58 | """Takes a list of rows returned from detect_images or detect_videos. 59 | Returns a merged version of the rows, wherein rows from different videos/images with same framenum are grouped. 60 | Optionally takes a list of cam_names, which determines what the keys are for each row. 61 | """ 62 | 63 | assert cam_names is None or len(all_rows) == len(cam_names), \ 64 | "number of rows does not match the number of camera names" 65 | 66 | if cam_names is None: 67 | cam_names = range(len(all_rows)) 68 | 69 | rows_dict = defaultdict(dict) 70 | framenums = set() 71 | 72 | for cname, rows in zip(cam_names, all_rows): 73 | for r in rows: 74 | num = r['framenum'] 75 | rows_dict[cname][num] = r 76 | framenums.add(num) 77 | 78 | framenums = sorted(framenums) 79 | merged = [] 80 | 81 | for num in framenums: 82 | d = dict() 83 | for cname in cam_names: 84 | if num in rows_dict[cname]: 85 | d[cname] = rows_dict[cname][num] 86 | merged.append(d) 87 | 88 | return merged 89 | 90 | 91 | def extract_points(merged, 92 | board, 93 | cam_names=None, 94 | min_cameras=1, 95 | min_points=4, 96 | check_rtvecs=True): 97 | """Takes a list of merged rows (output of merge_rows) and a board object. 98 | Returns an array of object points and another array of image points, both of size CxNx2, 99 | where C is the number of cameras, N is the number of points. 100 | Optionally takes a list of cam_names, which determines what the keys are for each row. If cam_names are not given, then it is automatically determined from the rows, used in sorted order. 101 | It also takes a parameter min_cameras, which specifies how many cameras must see a point in order to keep it. 102 | """ 103 | 104 | if cam_names is None: 105 | s = set.union(*[set(r.keys()) for r in merged]) 106 | cam_names = sorted(s) 107 | 108 | test = board.get_empty_detection().reshape(-1, 2) 109 | n_cams = len(cam_names) 110 | n_points_per_detect = test.shape[0] 111 | n_detects = len(merged) 112 | 113 | objp_template = board.get_object_points().reshape(-1, 3) 114 | 115 | imgp = np.full((n_cams, n_detects, n_points_per_detect, 2), 116 | np.nan, dtype='float64') 117 | 118 | rvecs = np.full((n_cams, n_detects, n_points_per_detect, 3), 119 | np.nan, dtype='float64') 120 | 121 | tvecs = np.full((n_cams, n_detects, n_points_per_detect, 3), 122 | np.nan, dtype='float64') 123 | 124 | objp = np.empty((n_detects, n_points_per_detect, 3), 125 | dtype='float64') 126 | 127 | board_ids = np.empty((n_detects, n_points_per_detect), 128 | dtype='int32') 129 | 130 | for rix, row in enumerate(merged): 131 | objp[rix] = np.copy(objp_template) 132 | board_ids[rix] = rix 133 | 134 | for cix, cname in enumerate(cam_names): 135 | if cname in row: 136 | filled = row[cname]['filled'].reshape(-1, 2) 137 | bad = np.any(np.isnan(filled), axis=1) 138 | num_good = np.sum(~bad) 139 | if num_good < min_points: 140 | continue 141 | 142 | if row[cname].get('rvec', None) is None or \ 143 | row[cname].get('tvec', None) is None: 144 | if check_rtvecs: 145 | continue 146 | else: 147 | row[cname]['rvec'] = np.full(3, np.nan, dtype='float64') 148 | row[cname]['tvec'] = np.full(3, np.nan, dtype='float64') 149 | 150 | imgp[cix, rix] = filled 151 | 152 | rvecs[cix, rix, ~bad] = row[cname]['rvec'].ravel() 153 | tvecs[cix, rix, ~bad] = row[cname]['tvec'].ravel() 154 | 155 | imgp = np.reshape(imgp, (n_cams, -1, 2)) 156 | rvecs = np.reshape(rvecs, (n_cams, -1, 3)) 157 | tvecs = np.reshape(tvecs, (n_cams, -1, 3)) 158 | objp = np.reshape(objp, (-1, 3)) 159 | board_ids = np.reshape(board_ids, (-1)) 160 | 161 | num_good = np.sum(~np.isnan(imgp), axis=0)[:, 0] 162 | good = num_good >= min_cameras 163 | 164 | imgp = imgp[:, good] 165 | rvecs = rvecs[:, good] 166 | tvecs = tvecs[:, good] 167 | objp = objp[good] 168 | board_ids = board_ids[good] 169 | 170 | extra = { 171 | 'objp': objp, 172 | 'ids': board_ids, 173 | 'rvecs': rvecs, 174 | 'tvecs': tvecs 175 | } 176 | 177 | return imgp, extra 178 | 179 | 180 | def extract_rtvecs(merged, 181 | cam_names=None, 182 | min_cameras=1, 183 | board=None, 184 | cameras=None): 185 | """Takes a list of merged rows (output of merge_rows) and a board object. 186 | Returns an array of rvecs and tvecs appended together, of size CxNx6, 187 | where C is the number of cameras, N is the number of detections. 188 | Optionally takes a list of cam_names, which determines what the keys are for each row. If cam_names are not given, then it is automatically determined from the rows, used in sorted order. 189 | It also takes a parameter min_cameras, which specifies how many cameras must see a point in order to keep it. 190 | 191 | board.estimate_pose_rows should have been run on the rows before merging. 192 | If not, the board and cameras must be passed as arguments. 193 | """ 194 | 195 | if cam_names is None: 196 | s = set.union(*[set(r.keys()) for r in merged]) 197 | cam_names = sorted(s) 198 | 199 | n_cams = len(cam_names) 200 | n_detects = len(merged) 201 | 202 | rtvecs = np.empty((n_cams, n_detects, 6), dtype='float64') 203 | rtvecs[:] = np.nan 204 | 205 | for rix, row in enumerate(merged): 206 | for cix, cname in enumerate(cam_names): 207 | if cname in row: 208 | r = row[cname] 209 | if 'rvec' not in r or 'tvec' not in r: 210 | if board is None: 211 | raise ValueError( 212 | 'rvec or tvec not found in rows. ' 213 | 'board.estimate_pose_rows should have been run on ' 214 | 'the rows before merging.' 215 | 'If not, the board and cameras must be passed as arguments.' 216 | ) 217 | else: 218 | rvec, tvec = board.estimate_pose_points( 219 | cameras[cix], r['corners'], r['ids']) 220 | r['rvec'] = rvec 221 | r['tvec'] = tvec 222 | 223 | if r['rvec'] is None or r['tvec'] is None: 224 | continue 225 | 226 | rvec = r['rvec'].ravel() 227 | tvec = r['tvec'].ravel() 228 | 229 | rtvec = np.hstack([rvec, tvec]) 230 | rtvecs[cix, rix] = rtvec 231 | 232 | num_good = np.sum(~np.isnan(rtvecs), axis=0)[:, 0] 233 | rtvecs = rtvecs[:, num_good >= min_cameras] 234 | 235 | return rtvecs 236 | 237 | 238 | class CalibrationObject(ABC): 239 | @abstractmethod 240 | def draw(self, size): 241 | pass 242 | 243 | @abstractmethod 244 | def detect_image(self, image): 245 | pass 246 | 247 | @abstractmethod 248 | def manually_verify_board_detection(self, image, corners): 249 | pass 250 | 251 | @abstractmethod 252 | def get_object_points(self): 253 | pass 254 | 255 | @abstractmethod 256 | def estimate_pose_points(self, camera, corners, ids): 257 | pass 258 | 259 | @abstractmethod 260 | def fill_points(self, corners, ids): 261 | pass 262 | 263 | @abstractmethod 264 | def get_empty_detection(self): 265 | pass 266 | 267 | def estimate_pose_image(self, camera, image): 268 | corners, ids = self.detect_image(image) 269 | return self.estimate_pose_points(camera, corners, ids) 270 | 271 | def detect_images(self, images, progress=False, prefix=None): 272 | length = len(images) 273 | rows = [] 274 | 275 | if progress: 276 | it = trange(length, ncols=70) 277 | else: 278 | it = range(length) 279 | 280 | for framenum in it: 281 | imname = images[framenum] 282 | frame = cv2.imread(imname) 283 | 284 | corners, ids = self.detect_image(frame) 285 | 286 | if corners is not None: 287 | 288 | if prefix is None: 289 | key = framenum 290 | else: 291 | key = (prefix, framenum) 292 | 293 | row = { 294 | 'framenum': key, 295 | 'corners': corners, 296 | 'ids': ids, 297 | 'fname': imname 298 | } 299 | 300 | rows.append(row) 301 | 302 | rows = self.fill_points_rows(rows) 303 | 304 | return rows 305 | 306 | def detect_video(self, vidname, prefix=None, skip=20, progress=False): 307 | cap = cv2.VideoCapture(vidname) 308 | if not cap.isOpened(): 309 | raise FileNotFoundError(f'missing video file "{vidname}"') 310 | length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 311 | if length < 10: 312 | length = int(1e9) 313 | progress = False 314 | rows = [] 315 | 316 | go = int(skip / 2) 317 | 318 | if progress: 319 | it = trange(length, ncols=70) 320 | else: 321 | it = range(length) 322 | 323 | for framenum in it: 324 | ret, frame = cap.read() 325 | if not ret: 326 | break 327 | if framenum % skip != 0 and go <= 0: 328 | continue 329 | 330 | corners, ids = self.detect_image(frame) 331 | 332 | if corners is not None and len(corners) > 0: 333 | if prefix is None: 334 | key = framenum 335 | else: 336 | key = (prefix, framenum) 337 | go = int(skip / 2) 338 | row = {'framenum': key, 'corners': corners, 'ids': ids} 339 | rows.append(row) 340 | 341 | go = max(0, go - 1) 342 | 343 | cap.release() 344 | 345 | rows = self.fill_points_rows(rows) 346 | 347 | return rows 348 | 349 | def estimate_pose_rows(self, camera, rows): 350 | for row in rows: 351 | rvec, tvec = self.estimate_pose_points(camera, 352 | row['corners'], 353 | row['ids']) 354 | row['rvec'] = rvec 355 | row['tvec'] = tvec 356 | return rows 357 | 358 | def fill_points_rows(self, rows): 359 | for row in rows: 360 | row['filled'] = self.fill_points(row['corners'], row['ids']) 361 | return rows 362 | 363 | def get_all_calibration_points(self, rows, min_points=5): 364 | rows = self.fill_points_rows(rows) 365 | 366 | objpoints = self.get_object_points() 367 | objpoints = objpoints.reshape(-1, 3) 368 | 369 | all_obj = [] 370 | all_img = [] 371 | 372 | for row in rows: 373 | filled_test = row['filled'].reshape(-1, 2) 374 | good = np.all(~np.isnan(filled_test), axis=1) 375 | filled_app = row['filled'].reshape(-1, 2) 376 | objp = np.copy(objpoints) 377 | if np.sum(good) >= min_points: 378 | all_obj.append(np.float32(objp[good])) 379 | all_img.append(np.float32(filled_app[good])) 380 | 381 | # all_obj = np.vstack(all_obj) 382 | # all_img = np.vstack(all_img) 383 | 384 | # all_obj = np.array(all_obj, dtype='float64') 385 | # all_img = np.array(all_img, dtype='float64') 386 | 387 | return all_obj, all_img 388 | 389 | 390 | class Checkerboard(CalibrationObject): 391 | DETECT_PARAMS = \ 392 | cv2.CALIB_CB_NORMALIZE_IMAGE + \ 393 | cv2.CALIB_CB_ADAPTIVE_THRESH + \ 394 | cv2.CALIB_CB_FAST_CHECK 395 | 396 | SUBPIX_CRITERIA = (cv2.TERM_CRITERIA_EPS + 397 | cv2.TERM_CRITERIA_MAX_ITER, 398 | 30, 0.01) 399 | 400 | def __init__(self, squaresX, squaresY, square_length=1, manually_verify=False): 401 | self.squaresX = squaresX 402 | self.squaresY = squaresY 403 | self.square_length = square_length 404 | self.manually_verify = manually_verify 405 | 406 | total_size = squaresX * squaresY 407 | 408 | objp = np.zeros((total_size, 3), np.float64) 409 | objp[:, :2] = np.mgrid[0:squaresX, 0:squaresY].T.reshape(-1, 2) 410 | objp *= square_length 411 | self.objPoints = objp 412 | 413 | self.ids = np.arange(total_size) 414 | 415 | self.empty_detection = np.zeros((total_size, 1, 2)) * np.nan 416 | 417 | def get_size(self): 418 | size = (self.squaresX, self.squaresY) 419 | return size 420 | 421 | def get_empty_detection(self): 422 | return np.copy(self.empty_detection) 423 | 424 | def get_square_length(self): 425 | return self.square_length 426 | 427 | # TODO: implement checkerboard draw function 428 | def draw(self, size): 429 | pass 430 | 431 | def get_empty(self): 432 | return np.copy(self.empty_detection) 433 | 434 | def fill_points(self, corners, ids=None): 435 | out = self.get_empty_detection() 436 | if corners is None or len(corners) == 0: 437 | return out 438 | if ids is None: 439 | return corners 440 | else: 441 | ids = ids.ravel() 442 | for i, cxs in zip(ids, corners): 443 | out[i] = cxs 444 | return out 445 | 446 | def detect_image(self, image, subpix=True): 447 | 448 | if len(image.shape) == 3: 449 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 450 | else: 451 | gray = image 452 | 453 | size = self.get_size() 454 | pattern_was_found, corners = cv2.findChessboardCorners(gray, size, self.DETECT_PARAMS) 455 | 456 | if corners is not None: 457 | 458 | if subpix: 459 | corners = cv2.cornerSubPix(gray, corners, (3, 3), (-1, -1), self.SUBPIX_CRITERIA) 460 | 461 | if corners is not None \ 462 | and self.manually_verify \ 463 | and not self.manually_verify_board_detection(gray, corners): 464 | corners = None 465 | 466 | if corners is None: 467 | ids = None 468 | else: 469 | ids = self.ids 470 | 471 | return corners, ids 472 | 473 | def manually_verify_board_detection(self, image, corners): 474 | 475 | height, width = image.shape[:2] 476 | image = cv2.drawChessboardCorners(image, self.get_size(), corners, 1) 477 | cv2.putText(image, '(a) Accept (d) Reject', (int(width/1.35), int(height/16)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 255, 1, cv2.LINE_AA) 478 | cv2.imshow('verify_detection', image) 479 | while 1: 480 | key = cv2.waitKey(0) & 0xFF 481 | if key == ord('a'): 482 | cv2.putText(image, 'Accepted!', (int(width/2.5), int(height/1.05)), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2, cv2.LINE_AA) 483 | cv2.imshow('verify_detection', image) 484 | cv2.waitKey(100) 485 | return True 486 | elif key == ord('d'): 487 | cv2.putText(image, 'Rejected!', (int(width/2.5), int(height/1.05)), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2, cv2.LINE_AA) 488 | cv2.imshow('verify_detection', image) 489 | cv2.waitKey(100) 490 | return False 491 | 492 | def get_object_points(self): 493 | return self.objPoints 494 | 495 | def estimate_pose_points(self, camera, points, ids=None): 496 | ngood = np.sum(~np.isnan(points)) // 2 497 | if points is None or ngood < 7: 498 | return None, None 499 | 500 | n_points = points.size // 2 501 | points = np.reshape(points, (n_points, 1, 2)) 502 | 503 | K = camera.get_camera_matrix() 504 | D = camera.get_distortions() 505 | obj_points = self.get_object_points() 506 | 507 | if points.shape[0] != obj_points.shape[0]: 508 | return None, None 509 | 510 | try: 511 | retval, rvec, tvec, inliers = cv2.solvePnPRansac(obj_points, 512 | points, 513 | K, 514 | D, 515 | confidence=0.9, 516 | reprojectionError=30) 517 | return rvec, tvec 518 | 519 | except: 520 | print("W: failed to find checkerboard pose in image") 521 | return None, None 522 | 523 | 524 | 525 | 526 | class CharucoBoard(CalibrationObject): 527 | def __init__(self, 528 | squaresX, 529 | squaresY, 530 | square_length, 531 | marker_length, 532 | marker_bits=4, 533 | dict_size=50, 534 | aruco_dict=None, 535 | manually_verify=False): 536 | self.squaresX = squaresX 537 | self.squaresY = squaresY 538 | self.square_length = square_length 539 | self.marker_length = marker_length 540 | self.manually_verify = manually_verify 541 | 542 | ARUCO_DICTS = { 543 | (4, 50): cv2.aruco.DICT_4X4_50, 544 | (5, 50): cv2.aruco.DICT_5X5_50, 545 | (6, 50): cv2.aruco.DICT_6X6_50, 546 | (7, 50): cv2.aruco.DICT_7X7_50, 547 | (4, 100): cv2.aruco.DICT_4X4_100, 548 | (5, 100): cv2.aruco.DICT_5X5_100, 549 | (6, 100): cv2.aruco.DICT_6X6_100, 550 | (7, 100): cv2.aruco.DICT_7X7_100, 551 | (4, 250): cv2.aruco.DICT_4X4_250, 552 | (5, 250): cv2.aruco.DICT_5X5_250, 553 | (6, 250): cv2.aruco.DICT_6X6_250, 554 | (7, 250): cv2.aruco.DICT_7X7_250, 555 | (4, 1000): cv2.aruco.DICT_4X4_1000, 556 | (5, 1000): cv2.aruco.DICT_5X5_1000, 557 | (6, 1000): cv2.aruco.DICT_6X6_1000, 558 | (7, 1000): cv2.aruco.DICT_7X7_1000 559 | } 560 | 561 | dkey = (marker_bits, dict_size) 562 | self.dictionary = cv2.aruco.getPredefinedDictionary(ARUCO_DICTS[dkey]) 563 | 564 | self.board = cv2.aruco.CharucoBoard([squaresX, squaresY], 565 | square_length, marker_length, 566 | self.dictionary) 567 | 568 | total_size = (squaresX - 1) * (squaresY - 1) 569 | 570 | objp = np.zeros((total_size, 3), np.float64) 571 | objp[:, :2] = np.mgrid[0:(squaresX - 1), 0:(squaresY - 1)].T.reshape( 572 | -1, 2) 573 | objp *= square_length 574 | self.objPoints = objp 575 | 576 | self.empty_detection = np.zeros((total_size, 1, 2)) * np.nan 577 | self.total_size = total_size 578 | 579 | def get_size(self): 580 | size = (self.squaresX, self.squaresY) 581 | return size 582 | 583 | def get_square_length(self): 584 | return self.square_length 585 | 586 | def get_empty_detection(self): 587 | return np.copy(self.empty_detection) 588 | 589 | def draw(self, size): 590 | return self.board.draw(size) 591 | 592 | def fill_points(self, corners, ids): 593 | out = self.get_empty_detection() 594 | if corners is None or len(corners) == 0: 595 | return out 596 | ids = ids.ravel() 597 | for i, cxs in zip(ids, corners): 598 | out[i] = cxs 599 | return out 600 | 601 | def detect_markers(self, image, camera=None, refine=True): 602 | if len(image.shape) == 3: 603 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 604 | else: 605 | gray = image 606 | 607 | params = cv2.aruco.DetectorParameters() 608 | params.cornerRefinementMethod = cv2.aruco.CORNER_REFINE_CONTOUR 609 | params.adaptiveThreshWinSizeMin = 50 610 | params.adaptiveThreshWinSizeMax = 700 611 | params.adaptiveThreshWinSizeStep = 50 612 | params.adaptiveThreshConstant = 0 613 | 614 | try: 615 | corners, ids, rejectedImgPoints = cv2.aruco.detectMarkers( 616 | gray, self.dictionary, parameters=params) 617 | except Exception: 618 | ids = None 619 | 620 | 621 | if ids is None: 622 | return [], [] 623 | 624 | if camera is None: 625 | K = D = None 626 | else: 627 | K = camera.get_camera_matrix() 628 | D = camera.get_distortions() 629 | 630 | if refine: 631 | detectedCorners, detectedIds, rejectedCorners, recoveredIdxs = \ 632 | cv2.aruco.refineDetectedMarkers(gray, self.board, corners, ids, 633 | rejectedImgPoints, 634 | K, D, 635 | parameters=params) 636 | else: 637 | detectedCorners, detectedIds = corners, ids 638 | 639 | return detectedCorners, detectedIds 640 | 641 | def detect_image(self, image, camera=None): 642 | 643 | if len(image.shape) == 3: 644 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 645 | else: 646 | gray = image 647 | 648 | corners, ids = self.detect_markers(image, camera, refine=True) 649 | if len(corners) > 0: 650 | ret, detectedCorners, detectedIds = cv2.aruco.interpolateCornersCharuco( 651 | corners, ids, gray, self.board) 652 | if detectedIds is None: 653 | detectedCorners = detectedIds = np.float64([]) 654 | else: 655 | detectedCorners = detectedIds = np.float64([]) 656 | 657 | if len(detectedCorners) > 0 \ 658 | and self.manually_verify \ 659 | and not self.manually_verify_board_detection(gray, detectedCorners, detectedIds): 660 | detectedCorners = detectedIds = np.float64([]) 661 | 662 | return detectedCorners, detectedIds 663 | 664 | 665 | def manually_verify_board_detection(self, image, corners, ids=None): 666 | 667 | height, width = image.shape[:2] 668 | image = cv2.aruco.drawDetectedCornersCharuco(image, corners, ids) 669 | cv2.putText(image, '(a) Accept (d) Reject', (int(width/1.35), int(height/16)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 255, 1, cv2.LINE_AA) 670 | cv2.imshow('verify_detection', image) 671 | while 1: 672 | key = cv2.waitKey(0) & 0xFF 673 | if key == ord('a'): 674 | cv2.putText(image, 'Accepted!', (int(width/2.5), int(height/1.05)), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2, cv2.LINE_AA) 675 | cv2.imshow('verify_detection', image) 676 | cv2.waitKey(100) 677 | return True 678 | elif key == ord('d'): 679 | cv2.putText(image, 'Rejected!', (int(width/2.5), int(height/1.05)), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2, cv2.LINE_AA) 680 | cv2.imshow('verify_detection', image) 681 | cv2.waitKey(100) 682 | return False 683 | 684 | def get_object_points(self): 685 | return self.objPoints 686 | 687 | def estimate_pose_points(self, camera, corners, ids): 688 | if corners is None or ids is None or len(corners) < 7: 689 | return None, None 690 | 691 | n_corners = corners.size // 2 692 | corners = np.reshape(corners, (n_corners, 1, 2)) 693 | 694 | K = camera.get_camera_matrix() 695 | D = camera.get_distortions() 696 | 697 | ret, rvec, tvec = cv2.aruco.estimatePoseCharucoBoard( 698 | corners, ids, self.board, K, D, None, None) 699 | 700 | return rvec, tvec 701 | -------------------------------------------------------------------------------- /aniposelib/cameras.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from copy import copy 4 | from scipy.sparse import lil_matrix, dok_matrix 5 | from scipy.linalg import inv 6 | from scipy import optimize 7 | from scipy import signal 8 | from numba import jit 9 | from collections import defaultdict, Counter 10 | import toml 11 | import itertools 12 | from tqdm import trange 13 | from pprint import pprint 14 | import time 15 | 16 | from .boards import merge_rows, extract_points, \ 17 | extract_rtvecs, get_video_params 18 | from .utils import get_initial_extrinsics, make_M, get_rtvec, \ 19 | get_connections 20 | 21 | @jit(nopython=True, parallel=True) 22 | def triangulate_simple(points, camera_mats): 23 | num_cams = len(camera_mats) 24 | A = np.zeros((num_cams * 2, 4)) 25 | for i in range(num_cams): 26 | x, y = points[i] 27 | mat = camera_mats[i] 28 | A[(i * 2):(i * 2 + 1)] = x * mat[2] - mat[0] 29 | A[(i * 2 + 1):(i * 2 + 2)] = y * mat[2] - mat[1] 30 | u, s, vh = np.linalg.svd(A, full_matrices=True) 31 | p3d = vh[-1] 32 | p3d = p3d[:3] / p3d[3] 33 | return p3d 34 | 35 | 36 | def get_error_dict(errors_full, min_points=10): 37 | n_cams = errors_full.shape[0] 38 | errors_norm = np.linalg.norm(errors_full, axis=2) 39 | 40 | good = ~np.isnan(errors_full[:, :, 0]) 41 | 42 | error_dict = dict() 43 | 44 | for i in range(n_cams): 45 | for j in range(i+1, n_cams): 46 | subset = good[i] & good[j] 47 | err_subset = errors_norm[:, subset][[i, j]] 48 | err_subset_mean = np.mean(err_subset, axis=0) 49 | if np.sum(subset) > min_points: 50 | percents = np.percentile(err_subset_mean, [15, 75]) 51 | # percents = np.percentile(err_subset, [25, 75]) 52 | error_dict[(i, j)] = (err_subset.shape[1], percents) 53 | return error_dict 54 | 55 | def check_errors(cgroup, imgp): 56 | p3ds = cgroup.triangulate(imgp) 57 | errors_full = cgroup.reprojection_error(p3ds, imgp, mean=False) 58 | return get_error_dict(errors_full) 59 | 60 | def subset_extra(extra, ixs): 61 | if extra is None: 62 | return None 63 | 64 | new_extra = { 65 | 'objp': extra['objp'][ixs], 66 | 'ids': extra['ids'][ixs], 67 | 'rvecs': extra['rvecs'][:, ixs], 68 | 'tvecs': extra['tvecs'][:, ixs] 69 | } 70 | return new_extra 71 | 72 | def resample_points_extra(imgp, extra, n_samp=25): 73 | n_cams, n_points, _ = imgp.shape 74 | ids = remap_ids(extra['ids']) 75 | n_ids = np.max(ids)+1 76 | good = ~np.isnan(imgp[:, :, 0]) 77 | ixs = np.arange(n_points) 78 | 79 | cam_counts = np.zeros((n_ids, n_cams), dtype='int64') 80 | for idnum in range(n_ids): 81 | cam_counts[idnum] = np.sum(good[:, ids == idnum], axis=1) 82 | cam_counts_random = cam_counts + np.random.random(size=cam_counts.shape) 83 | best_boards = np.argsort(-cam_counts_random, axis=0) 84 | 85 | cam_totals = np.zeros(n_cams, dtype='int64') 86 | 87 | include = set() 88 | for cam_num in range(n_cams): 89 | for board_id in best_boards[:, cam_num]: 90 | include.update(ixs[ids == board_id]) 91 | cam_totals += cam_counts[board_id] 92 | if cam_totals[cam_num] >= n_samp or \ 93 | cam_counts_random[board_id, cam_num] < 1: 94 | break 95 | 96 | final_ixs = sorted(include) 97 | newp = imgp[:, final_ixs] 98 | extra = subset_extra(extra, final_ixs) 99 | return newp, extra 100 | 101 | def resample_points(imgp, extra=None, n_samp=25): 102 | # if extra is not None: 103 | # return resample_points_extra(imgp, extra, n_samp) 104 | 105 | n_cams = imgp.shape[0] 106 | good = ~np.isnan(imgp[:, :, 0]) 107 | ixs = np.arange(imgp.shape[1]) 108 | 109 | num_cams = np.sum(~np.isnan(imgp[:, :, 0]), axis=0) 110 | 111 | include = set() 112 | 113 | for i in range(n_cams): 114 | for j in range(i+1, n_cams): 115 | subset = good[i] & good[j] 116 | n_good = np.sum(subset) 117 | if n_good > 0: 118 | ## pick points, prioritizing points seen by more cameras 119 | arr = np.copy(num_cams[subset]).astype('float64') 120 | arr += np.random.random(size=arr.shape) 121 | picked_ix = np.argsort(-arr)[:n_samp] 122 | picked = ixs[subset][picked_ix] 123 | include.update(picked) 124 | 125 | final_ixs = sorted(include) 126 | newp = imgp[:, final_ixs] 127 | extra = subset_extra(extra, final_ixs) 128 | return newp, extra 129 | 130 | def medfilt_data(values, size=15): 131 | padsize = size+5 132 | vpad = np.pad(values, (padsize, padsize), mode='reflect') 133 | vpadf = signal.medfilt(vpad, kernel_size=size) 134 | return vpadf[padsize:-padsize] 135 | 136 | def nan_helper(y): 137 | return np.isnan(y), lambda z: z.nonzero()[0] 138 | 139 | def interpolate_data(vals): 140 | nans, ix = nan_helper(vals) 141 | out = np.copy(vals) 142 | try: 143 | out[nans] = np.interp(ix(nans), ix(~nans), vals[~nans]) 144 | except ValueError: 145 | out[:] = 0 146 | return out 147 | 148 | def remap_ids(ids): 149 | unique_ids = np.unique(ids) 150 | ids_out = np.copy(ids) 151 | for i, num in enumerate(unique_ids): 152 | ids_out[ids == num] = i 153 | return ids_out 154 | 155 | def transform_points(points, rvecs, tvecs): 156 | """Rotate points by given rotation vectors and translate. 157 | Rodrigues' rotation formula is used. 158 | """ 159 | theta = np.linalg.norm(rvecs, axis=1)[:, np.newaxis] 160 | with np.errstate(invalid='ignore'): 161 | v = rvecs / theta 162 | v = np.nan_to_num(v) 163 | dot = np.sum(points * v, axis=1)[:, np.newaxis] 164 | cos_theta = np.cos(theta) 165 | sin_theta = np.sin(theta) 166 | 167 | rotated = cos_theta * points + \ 168 | sin_theta * np.cross(v, points) + \ 169 | dot * (1 - cos_theta) * v 170 | 171 | return rotated + tvecs 172 | 173 | 174 | class Camera: 175 | def __init__(self, 176 | matrix=np.eye(3), 177 | dist=np.zeros(5), 178 | size=None, 179 | rvec=np.zeros(3), 180 | tvec=np.zeros(3), 181 | name=None, 182 | extra_dist=False): 183 | 184 | self.set_camera_matrix(matrix) 185 | self.set_distortions(dist) 186 | self.set_size(size) 187 | self.set_rotation(rvec) 188 | self.set_translation(tvec) 189 | self.set_name(name) 190 | self.extra_dist = extra_dist 191 | 192 | def get_dict(self): 193 | return { 194 | 'name': self.get_name(), 195 | 'size': list(self.get_size()), 196 | 'matrix': self.get_camera_matrix().tolist(), 197 | 'distortions': self.get_distortions().tolist(), 198 | 'rotation': self.get_rotation().tolist(), 199 | 'translation': self.get_translation().tolist(), 200 | } 201 | 202 | def load_dict(self, d): 203 | self.set_camera_matrix(d['matrix']) 204 | self.set_rotation(d['rotation']) 205 | self.set_translation(d['translation']) 206 | self.set_distortions(d['distortions']) 207 | self.set_name(d['name']) 208 | self.set_size(d['size']) 209 | 210 | def from_dict(d): 211 | cam = Camera() 212 | cam.load_dict(d) 213 | return cam 214 | 215 | def get_camera_matrix(self): 216 | return self.matrix 217 | 218 | def get_distortions(self): 219 | return self.dist 220 | 221 | def set_camera_matrix(self, matrix): 222 | self.matrix = np.array(matrix, dtype='float64') 223 | 224 | def set_focal_length(self, fx, fy=None): 225 | if fy is None: 226 | fy = fx 227 | self.matrix[0, 0] = fx 228 | self.matrix[1, 1] = fy 229 | 230 | def get_focal_length(self, both=False): 231 | fx = self.matrix[0, 0] 232 | fy = self.matrix[1, 1] 233 | if both: 234 | return (fx, fy) 235 | else: 236 | return (fx + fy) / 2.0 237 | 238 | def set_distortions(self, dist): 239 | self.dist = np.array(dist, dtype='float64').ravel() 240 | 241 | def zero_distortions(self): 242 | self.dist = self.dist * 0 243 | 244 | def set_rotation(self, rvec): 245 | self.rvec = np.array(rvec, dtype='float64').ravel() 246 | 247 | def get_rotation(self): 248 | return self.rvec 249 | 250 | def set_translation(self, tvec): 251 | self.tvec = np.array(tvec, dtype='float64').ravel() 252 | 253 | def get_translation(self): 254 | return self.tvec 255 | 256 | def get_extrinsics_mat(self): 257 | return make_M(self.rvec, self.tvec) 258 | 259 | def get_name(self): 260 | return self.name 261 | 262 | def set_name(self, name): 263 | self.name = str(name) 264 | 265 | def set_size(self, size): 266 | """set size as (width, height)""" 267 | self.size = size 268 | 269 | def get_size(self): 270 | """get size as (width, height)""" 271 | return self.size 272 | 273 | def resize_camera(self, scale): 274 | """resize the camera by scale factor, updating intrinsics to match""" 275 | size = self.get_size() 276 | new_size = size[0] * scale, size[1] * scale 277 | matrix = self.get_camera_matrix() 278 | new_matrix = matrix * scale 279 | new_matrix[2, 2] = 1 280 | self.set_size(new_size) 281 | self.set_camera_matrix(new_matrix) 282 | 283 | def get_params(self, only_extrinsics=False): 284 | if only_extrinsics: 285 | params = np.zeros(6, dtype='float64') 286 | else: 287 | params = np.zeros(8 + self.extra_dist, dtype='float64') 288 | params[0:3] = self.get_rotation() 289 | params[3:6] = self.get_translation() 290 | if only_extrinsics: 291 | return params 292 | params[6] = self.get_focal_length() 293 | dist = self.get_distortions() 294 | params[7] = dist[0] 295 | if self.extra_dist: 296 | params[8] = dist[1] 297 | return params 298 | 299 | def set_params(self, params, only_extrinsics=False): 300 | self.set_rotation(params[0:3]) 301 | self.set_translation(params[3:6]) 302 | if only_extrinsics: 303 | return 304 | 305 | self.set_focal_length(params[6]) 306 | 307 | dist = np.zeros(5, dtype='float64') 308 | dist[0] = params[7] 309 | if self.extra_dist: 310 | dist[1] = params[8] 311 | self.set_distortions(dist) 312 | 313 | def distort_points(self, points): 314 | shape = points.shape 315 | points = points.reshape(-1, 1, 2) 316 | new_points = np.dstack([points, np.ones((points.shape[0], 1, 1))]) 317 | out, _ = cv2.projectPoints(new_points, np.zeros(3), np.zeros(3), 318 | self.matrix.astype('float64'), 319 | self.dist.astype('float64')) 320 | return out.reshape(shape) 321 | 322 | def undistort_points(self, points): 323 | shape = points.shape 324 | points = points.reshape(-1, 1, 2) 325 | out = cv2.undistortPoints(points, 326 | self.matrix.astype('float64'), 327 | self.dist.astype('float64')) 328 | return out.reshape(shape) 329 | 330 | def project(self, points): 331 | points = points.reshape(-1, 1, 3) 332 | out, _ = cv2.projectPoints(points, self.rvec, self.tvec, 333 | self.matrix.astype('float64'), 334 | self.dist.astype('float64')) 335 | return out 336 | 337 | def reprojection_error(self, p3d, p2d): 338 | proj = self.project(p3d).reshape(p2d.shape) 339 | return p2d - proj 340 | 341 | def copy(self): 342 | return \ 343 | Camera(matrix=self.get_camera_matrix().copy(), 344 | dist=self.get_distortions().copy(), 345 | size=self.get_size(), 346 | rvec=self.get_rotation().copy(), 347 | tvec=self.get_translation().copy(), 348 | name=self.get_name(), 349 | extra_dist=self.extra_dist) 350 | 351 | class FisheyeCamera(Camera): 352 | def __init__(self, 353 | matrix=np.eye(3), 354 | dist=np.zeros(4), 355 | size=None, 356 | rvec=np.zeros(3), 357 | tvec=np.zeros(3), 358 | name=None, 359 | extra_dist=False): 360 | self.set_camera_matrix(matrix) 361 | self.set_distortions(dist) 362 | self.set_size(size) 363 | self.set_rotation(rvec) 364 | self.set_translation(tvec) 365 | self.set_name(name) 366 | self.extra_dist = extra_dist 367 | 368 | def from_dict(d): 369 | cam = FisheyeCamera() 370 | cam.load_dict(d) 371 | return cam 372 | 373 | def get_dict(self): 374 | d = super().get_dict() 375 | d['fisheye'] = True 376 | return d 377 | 378 | def distort_points(self, points): 379 | shape = points.shape 380 | points = points.reshape(-1, 1, 2) 381 | new_points = np.dstack([points, np.ones((points.shape[0], 1, 1))]) 382 | out, _ = cv2.fisheye.projectPoints(new_points, 383 | np.zeros(3), np.zeros(3), 384 | self.matrix.astype('float64'), 385 | self.dist.astype('float64')) 386 | return out.reshape(shape) 387 | 388 | def undistort_points(self, points): 389 | shape = points.shape 390 | points = points.reshape(-1, 1, 2) 391 | out = cv2.fisheye.undistortPoints(points.astype('float64'), 392 | self.matrix.astype('float64'), 393 | self.dist.astype('float64')) 394 | return out.reshape(shape) 395 | 396 | def project(self, points): 397 | points = points.reshape(-1, 1, 3) 398 | out, _ = cv2.fisheye.projectPoints(points, 399 | self.rvec, self.tvec, 400 | self.matrix.astype('float64'), 401 | self.dist.astype('float64')) 402 | return out 403 | 404 | def set_params(self, params, only_extrinsics): 405 | self.set_rotation(params[0:3]) 406 | self.set_translation(params[3:6]) 407 | 408 | if only_extrinsics: 409 | return 410 | 411 | self.set_focal_length(params[6]) 412 | 413 | dist = np.zeros(4, dtype='float64') 414 | dist[0] = params[7] 415 | if self.extra_dist: 416 | dist[1] = params[8] 417 | # dist[2] = params[9] 418 | # dist[3] = params[10] 419 | self.set_distortions(dist) 420 | 421 | def get_params(self, only_extrinsics=False): 422 | if only_extrinsics: 423 | params = np.zeros(6, dtype='float64') 424 | else: 425 | params = np.zeros(8+self.extra_dist, dtype='float64') 426 | params[0:3] = self.get_rotation() 427 | params[3:6] = self.get_translation() 428 | if only_extrinsics: 429 | return params 430 | params[6] = self.get_focal_length() 431 | dist = self.get_distortions() 432 | params[7] = dist[0] 433 | if self.extra_dist: 434 | params[8] = dist[1] 435 | # params[9] = dist[2] 436 | # params[10] = dist[3] 437 | return params 438 | 439 | def copy(self): 440 | return FisheyeCamera( 441 | matrix=self.get_camera_matrix().copy(), 442 | dist=self.get_distortions().copy(), 443 | size=self.get_size(), 444 | rvec=self.get_rotation().copy(), 445 | tvec=self.get_translation().copy(), 446 | name=self.get_name(), 447 | extra_dist=self.extra_dist) 448 | 449 | class CameraGroup: 450 | def __init__(self, cameras, metadata={}): 451 | self.cameras = cameras 452 | self.metadata = metadata 453 | 454 | def subset_cameras(self, indices): 455 | cams = [self.cameras[ix].copy() for ix in indices] 456 | return CameraGroup(cams, self.metadata) 457 | 458 | def subset_cameras_names(self, names): 459 | cur_names = self.get_names() 460 | cur_names_dict = dict(zip(cur_names, range(len(cur_names)))) 461 | indices = [] 462 | for name in names: 463 | if name not in cur_names_dict: 464 | raise IndexError( 465 | "name {} not part of camera names: {}".format( 466 | name, cur_names 467 | )) 468 | indices.append(cur_names_dict[name]) 469 | return self.subset_cameras(indices) 470 | 471 | def project(self, points): 472 | """Given an Nx3 array of points, this returns an CxNx2 array of 2D points, 473 | where C is the number of cameras""" 474 | points = points.reshape(-1, 1, 3) 475 | n_points = points.shape[0] 476 | n_cams = len(self.cameras) 477 | 478 | out = np.empty((n_cams, n_points, 2), dtype='float64') 479 | for cnum, cam in enumerate(self.cameras): 480 | out[cnum] = cam.project(points).reshape(n_points, 2) 481 | 482 | return out 483 | 484 | def triangulate(self, points, undistort=True, progress=False, fast=False): 485 | """Given an CxNx2 array, this returns an Nx3 array of points, 486 | where N is the number of points and C is the number of cameras""" 487 | 488 | assert points.shape[0] == len(self.cameras), \ 489 | "Invalid points shape, first dim should be equal to" \ 490 | " number of cameras ({}), but shape is {}".format( 491 | len(self.cameras), points.shape 492 | ) 493 | 494 | one_point = False 495 | if len(points.shape) == 2: 496 | points = points.reshape(-1, 1, 2) 497 | one_point = True 498 | 499 | if undistort: 500 | new_points = np.empty(points.shape) 501 | for cnum, cam in enumerate(self.cameras): 502 | # must copy in order to satisfy opencv underneath 503 | sub = np.copy(points[cnum]) 504 | new_points[cnum] = cam.undistort_points(sub) 505 | points = new_points 506 | 507 | n_cams, n_points, _ = points.shape 508 | 509 | 510 | if fast: 511 | cam_Rt_mats = np.array([cam.get_extrinsics_mat()[:3] for cam in self.cameras]) 512 | 513 | p3d_allview_withnan = [] 514 | for j1, j2 in itertools.combinations(range(n_cams), 2): 515 | pts1, pts2 = points[j1], points[j2] 516 | Rt1, Rt2 = cam_Rt_mats[j1], cam_Rt_mats[j2] 517 | tri = cv2.triangulatePoints(Rt1, Rt2, pts1.T, pts2.T) 518 | tri = tri[:3]/tri[3] 519 | p3d_allview_withnan.append(tri.T) 520 | p3d_allview_withnan = np.array(p3d_allview_withnan) 521 | out = np.nanmedian(p3d_allview_withnan, axis=0) 522 | 523 | else: 524 | out = np.empty((n_points, 3)) 525 | out[:] = np.nan 526 | 527 | cam_mats = np.array([cam.get_extrinsics_mat() for cam in self.cameras]) 528 | 529 | if progress: 530 | iterator = trange(n_points, ncols=70) 531 | else: 532 | iterator = range(n_points) 533 | 534 | for ip in iterator: 535 | subp = points[:, ip, :] 536 | good = ~np.isnan(subp[:, 0]) 537 | if np.sum(good) >= 2: 538 | out[ip] = triangulate_simple(subp[good], cam_mats[good]) 539 | 540 | if one_point: 541 | out = out[0] 542 | 543 | return out 544 | 545 | def triangulate_possible(self, points, undistort=True, 546 | min_cams=2, progress=False, threshold=0.5): 547 | """Given an CxNxPx2 array, this returns an Nx3 array of points 548 | by triangulating all possible points and picking the ones with 549 | best reprojection error 550 | where: 551 | C: number of cameras 552 | N: number of points 553 | P: number of possible options per point 554 | """ 555 | 556 | assert points.shape[0] == len(self.cameras), \ 557 | "Invalid points shape, first dim should be equal to" \ 558 | " number of cameras ({}), but shape is {}".format( 559 | len(self.cameras), points.shape 560 | ) 561 | 562 | n_cams, n_points, n_possible, _ = points.shape 563 | 564 | cam_nums, point_nums, possible_nums = np.where( 565 | ~np.isnan(points[:, :, :, 0])) 566 | 567 | all_iters = defaultdict(dict) 568 | 569 | for cam_num, point_num, possible_num in zip(cam_nums, point_nums, 570 | possible_nums): 571 | if cam_num not in all_iters[point_num]: 572 | all_iters[point_num][cam_num] = [] 573 | all_iters[point_num][cam_num].append((cam_num, possible_num)) 574 | 575 | for point_num in all_iters.keys(): 576 | for cam_num in all_iters[point_num].keys(): 577 | all_iters[point_num][cam_num].append(None) 578 | 579 | out = np.full((n_points, 3), np.nan, dtype='float64') 580 | picked_vals = np.zeros((n_cams, n_points, n_possible), dtype='bool') 581 | errors = np.zeros(n_points, dtype='float64') 582 | points_2d = np.full((n_cams, n_points, 2), np.nan, dtype='float64') 583 | 584 | if progress: 585 | iterator = trange(n_points, ncols=70) 586 | else: 587 | iterator = range(n_points) 588 | 589 | for point_ix in iterator: 590 | best_point = None 591 | best_error = 200 592 | 593 | n_cams_max = len(all_iters[point_ix]) 594 | 595 | for picked in itertools.product(*all_iters[point_ix].values()): 596 | picked = [p for p in picked if p is not None] 597 | if len(picked) < min_cams and len(picked) != n_cams_max: 598 | continue 599 | 600 | cnums = [p[0] for p in picked] 601 | xnums = [p[1] for p in picked] 602 | 603 | pts = points[cnums, point_ix, xnums] 604 | cc = self.subset_cameras(cnums) 605 | 606 | p3d = cc.triangulate(pts, undistort=undistort) 607 | err = cc.reprojection_error(p3d, pts, mean=True) 608 | 609 | if err < best_error: 610 | best_point = { 611 | 'error': err, 612 | 'point': p3d[:3], 613 | 'points': pts, 614 | 'picked': picked, 615 | 'joint_ix': point_ix 616 | } 617 | best_error = err 618 | if best_error < threshold: 619 | break 620 | 621 | if best_point is not None: 622 | out[point_ix] = best_point['point'] 623 | picked = best_point['picked'] 624 | cnums = [p[0] for p in picked] 625 | xnums = [p[1] for p in picked] 626 | picked_vals[cnums, point_ix, xnums] = True 627 | errors[point_ix] = best_point['error'] 628 | points_2d[cnums, point_ix] = best_point['points'] 629 | 630 | return out, picked_vals, points_2d, errors 631 | 632 | def triangulate_ransac(self, points, undistort=True, min_cams=2, progress=False): 633 | """Given an CxNx2 array, this returns an Nx3 array of points, 634 | where N is the number of points and C is the number of cameras""" 635 | 636 | assert points.shape[0] == len(self.cameras), \ 637 | "Invalid points shape, first dim should be equal to" \ 638 | " number of cameras ({}), but shape is {}".format( 639 | len(self.cameras), points.shape 640 | ) 641 | 642 | n_cams, n_points, _ = points.shape 643 | 644 | points_ransac = points.reshape(n_cams, n_points, 1, 2) 645 | 646 | return self.triangulate_possible(points_ransac, 647 | undistort=undistort, 648 | min_cams=min_cams, 649 | progress=progress) 650 | 651 | 652 | @jit(parallel=True, forceobj=True) 653 | def reprojection_error(self, p3ds, p2ds, mean=False): 654 | """Given an Nx3 array of 3D points and an CxNx2 array of 2D points, 655 | where N is the number of points and C is the number of cameras, 656 | this returns an CxNx2 array of errors. 657 | Optionally mean=True, this averages the errors and returns array of length N of errors""" 658 | 659 | one_point = False 660 | if len(p3ds.shape) == 1 and len(p2ds.shape) == 2: 661 | p3ds = p3ds.reshape(1, 3) 662 | p2ds = p2ds.reshape(-1, 1, 2) 663 | one_point = True 664 | 665 | n_cams, n_points, _ = p2ds.shape 666 | assert p3ds.shape == (n_points, 3), \ 667 | "shapes of 2D and 3D points are not consistent: " \ 668 | "2D={}, 3D={}".format(p2ds.shape, p3ds.shape) 669 | 670 | errors = np.empty((n_cams, n_points, 2)) 671 | 672 | for cnum, cam in enumerate(self.cameras): 673 | errors[cnum] = cam.reprojection_error(p3ds, p2ds[cnum]) 674 | 675 | if mean: 676 | errors_norm = np.linalg.norm(errors, axis=2) 677 | good = ~np.isnan(errors_norm) 678 | errors_norm[~good] = 0 679 | denom = np.sum(good, axis=0).astype('float64') 680 | denom[denom < 1.5] = np.nan 681 | errors = np.sum(errors_norm, axis=0) / denom 682 | 683 | if one_point: 684 | if mean: 685 | errors = float(errors[0]) 686 | else: 687 | errors = errors.reshape(-1, 2) 688 | 689 | return errors 690 | 691 | 692 | def bundle_adjust_iter(self, p2ds, extra=None, 693 | n_iters=6, start_mu=15, end_mu=1, 694 | max_nfev=200, ftol=1e-4, 695 | n_samp_iter=200, n_samp_full=1000, 696 | error_threshold=0.3, only_extrinsics=False, 697 | verbose=False): 698 | """Given an CxNx2 array of 2D points, 699 | where N is the number of points and C is the number of cameras, 700 | this performs iterative bundle adjustsment to fine-tune the parameters of the cameras. 701 | That is, it performs bundle adjustment multiple times, adjusting the weights given to points 702 | to reduce the influence of outliers. 703 | This is inspired by the algorithm for Fast Global Registration by Zhou, Park, and Koltun 704 | """ 705 | 706 | assert p2ds.shape[0] == len(self.cameras), \ 707 | "Invalid points shape, first dim should be equal to" \ 708 | " number of cameras ({}), but shape is {}".format( 709 | len(self.cameras), p2ds.shape 710 | ) 711 | 712 | p2ds_full = p2ds 713 | extra_full = extra 714 | 715 | p2ds, extra = resample_points(p2ds_full, extra_full, 716 | n_samp=n_samp_full) 717 | error = self.average_error(p2ds, median=True) 718 | 719 | if verbose: 720 | print('error: ', error) 721 | 722 | mus = np.exp(np.linspace(np.log(start_mu), np.log(end_mu), num=n_iters)) 723 | 724 | if verbose: 725 | print('n_samples: {}'.format(n_samp_iter)) 726 | 727 | for i in range(n_iters): 728 | p2ds, extra = resample_points(p2ds_full, extra_full, 729 | n_samp=n_samp_full) 730 | p3ds = self.triangulate(p2ds) 731 | errors_full = self.reprojection_error(p3ds, p2ds, mean=False) 732 | errors_norm = self.reprojection_error(p3ds, p2ds, mean=True) 733 | 734 | error_dict = get_error_dict(errors_full) 735 | max_error = 0 736 | min_error = 0 737 | for k, v in error_dict.items(): 738 | num, percents = v 739 | max_error = max(percents[-1], max_error) 740 | min_error = max(percents[0], min_error) 741 | mu = max(min(max_error, mus[i]), min_error) 742 | 743 | good = errors_norm < mu 744 | extra_good = subset_extra(extra, good) 745 | p2ds_samp, extra_samp = resample_points( 746 | p2ds[:, good], extra_good, n_samp=n_samp_iter) 747 | 748 | error = np.median(errors_norm) 749 | 750 | if error < error_threshold: 751 | break 752 | 753 | if verbose: 754 | pprint(error_dict) 755 | print('error: {:.2f}, mu: {:.1f}, ratio: {:.3f}'.format(error, mu, np.mean(good))) 756 | 757 | self.bundle_adjust(p2ds_samp, extra_samp, 758 | loss='linear', ftol=ftol, 759 | max_nfev=max_nfev, only_extrinsics=only_extrinsics, 760 | verbose=verbose) 761 | 762 | 763 | p2ds, extra = resample_points(p2ds_full, extra_full, 764 | n_samp=n_samp_full) 765 | p3ds = self.triangulate(p2ds) 766 | errors_full = self.reprojection_error(p3ds, p2ds, mean=False) 767 | errors_norm = self.reprojection_error(p3ds, p2ds, mean=True) 768 | error_dict = get_error_dict(errors_full) 769 | if verbose: 770 | pprint(error_dict) 771 | 772 | max_error = 0 773 | min_error = 0 774 | for k, v in error_dict.items(): 775 | num, percents = v 776 | max_error = max(percents[-1], max_error) 777 | min_error = max(percents[0], min_error) 778 | mu = max(max(max_error, end_mu), min_error) 779 | 780 | good = errors_norm < mu 781 | extra_good = subset_extra(extra, good) 782 | self.bundle_adjust(p2ds[:, good], extra_good, 783 | loss='linear', 784 | ftol=ftol, max_nfev=max(200, max_nfev), 785 | only_extrinsics=only_extrinsics, 786 | verbose=verbose) 787 | 788 | error = self.average_error(p2ds, median=True) 789 | 790 | p3ds = self.triangulate(p2ds) 791 | errors_full = self.reprojection_error(p3ds, p2ds, mean=False) 792 | error_dict = get_error_dict(errors_full) 793 | if verbose: 794 | pprint(error_dict) 795 | 796 | if verbose: 797 | print('error: ', error) 798 | 799 | return error 800 | 801 | def bundle_adjust(self, p2ds, extra=None, 802 | loss='linear', 803 | threshold=50, 804 | ftol=1e-4, 805 | max_nfev=1000, 806 | weights=None, 807 | start_params=None, 808 | only_extrinsics=False, 809 | verbose=True): 810 | """Given an CxNx2 array of 2D points, 811 | where N is the number of points and C is the number of cameras, 812 | this performs bundle adjustsment to fine-tune the parameters of the cameras""" 813 | 814 | assert p2ds.shape[0] == len(self.cameras), \ 815 | "Invalid points shape, first dim should be equal to" \ 816 | " number of cameras ({}), but shape is {}".format( 817 | len(self.cameras), p2ds.shape 818 | ) 819 | 820 | if extra is not None: 821 | extra['ids_map'] = remap_ids(extra['ids']) 822 | 823 | x0, n_cam_params = self._initialize_params_bundle(p2ds, extra, only_extrinsics) 824 | 825 | if start_params is not None: 826 | x0 = start_params 827 | # n_cam_params = len(self.cameras[0].get_params(only_extrinsics)) 828 | 829 | error_fun = self._error_fun_bundle 830 | 831 | jac_sparse = self._jac_sparsity_bundle(p2ds, n_cam_params, extra) 832 | 833 | f_scale = threshold 834 | opt = optimize.least_squares(error_fun, 835 | x0, 836 | jac_sparsity=jac_sparse, 837 | f_scale=f_scale, 838 | x_scale='jac', 839 | loss=loss, 840 | ftol=ftol, 841 | method='trf', 842 | tr_solver='lsmr', 843 | verbose=2 * verbose, 844 | max_nfev=max_nfev, 845 | args=(p2ds, n_cam_params, extra, only_extrinsics)) 846 | best_params = opt.x 847 | 848 | for i, cam in enumerate(self.cameras): 849 | a = i * n_cam_params 850 | b = (i + 1) * n_cam_params 851 | cam.set_params(best_params[a:b], only_extrinsics) 852 | 853 | error = self.average_error(p2ds) 854 | return error 855 | 856 | @jit(parallel=True, forceobj=True) 857 | def _error_fun_bundle(self, params, p2ds, n_cam_params, extra, only_extrinsics): 858 | """Error function for bundle adjustment""" 859 | good = ~np.isnan(p2ds) 860 | n_cams = len(self.cameras) 861 | 862 | for i in range(n_cams): 863 | cam = self.cameras[i] 864 | a = i * n_cam_params 865 | b = (i + 1) * n_cam_params 866 | cam.set_params(params[a:b], only_extrinsics) 867 | 868 | n_cams = len(self.cameras) 869 | sub = n_cam_params * n_cams 870 | n3d = p2ds.shape[1] * 3 871 | p3ds_test = params[sub:sub+n3d].reshape(-1, 3) 872 | errors = self.reprojection_error(p3ds_test, p2ds) 873 | errors_reproj = errors[good] 874 | 875 | if extra is not None: 876 | ids = extra['ids_map'] 877 | objp = extra['objp'] 878 | min_scale = np.min(objp[objp > 0]) 879 | n_boards = int(np.max(ids)) + 1 880 | a = sub+n3d 881 | rvecs = params[a:a+n_boards*3].reshape(-1, 3) 882 | tvecs = params[a+n_boards*3:a+n_boards*6].reshape(-1, 3) 883 | expected = transform_points(objp, rvecs[ids], tvecs[ids]) 884 | errors_obj = 2 * (p3ds_test - expected).ravel() / min_scale 885 | else: 886 | errors_obj = np.array([]) 887 | 888 | return np.hstack([errors_reproj, errors_obj]) 889 | 890 | 891 | def _jac_sparsity_bundle(self, p2ds, n_cam_params, extra): 892 | """Given an CxNx2 array of 2D points, 893 | where N is the number of points and C is the number of cameras, 894 | compute the sparsity structure of the jacobian for bundle adjustment""" 895 | 896 | point_indices = np.zeros(p2ds.shape, dtype='int32') 897 | cam_indices = np.zeros(p2ds.shape, dtype='int32') 898 | 899 | for i in range(p2ds.shape[1]): 900 | point_indices[:, i] = i 901 | 902 | for j in range(p2ds.shape[0]): 903 | cam_indices[j] = j 904 | 905 | good = ~np.isnan(p2ds) 906 | 907 | if extra is not None: 908 | ids = extra['ids_map'] 909 | n_boards = int(np.max(ids)) + 1 910 | total_board_params = n_boards * (3 + 3) # rvecs + tvecs 911 | else: 912 | n_boards = 0 913 | total_board_params = 0 914 | 915 | n_cams = p2ds.shape[0] 916 | n_points = p2ds.shape[1] 917 | total_params_reproj = n_cams * n_cam_params + n_points * 3 918 | n_params = total_params_reproj + total_board_params 919 | 920 | n_good_values = np.sum(good) 921 | if extra is not None: 922 | n_errors = n_good_values + n_points * 3 923 | else: 924 | n_errors = n_good_values 925 | 926 | A_sparse = dok_matrix((n_errors, n_params), dtype='int16') 927 | 928 | cam_indices_good = cam_indices[good] 929 | point_indices_good = point_indices[good] 930 | 931 | # -- reprojection error -- 932 | ix = np.arange(n_good_values) 933 | 934 | ## update camera params based on point error 935 | for i in range(n_cam_params): 936 | A_sparse[ix, cam_indices_good * n_cam_params + i] = 1 937 | 938 | ## update point position based on point error 939 | for i in range(3): 940 | A_sparse[ix, n_cams * n_cam_params + point_indices_good * 3 + i] = 1 941 | 942 | # -- match for the object points-- 943 | if extra is not None: 944 | point_ix = np.arange(n_points) 945 | 946 | ## update all the camera parameters 947 | # A_sparse[n_good_values:n_good_values+n_points*3, 948 | # 0:n_cams*n_cam_params] = 1 949 | 950 | ## update board rotation and translation based on error from expected 951 | for i in range(3): 952 | for j in range(3): 953 | A_sparse[n_good_values + point_ix*3 + i, 954 | total_params_reproj + ids*3 + j] = 1 955 | A_sparse[n_good_values + point_ix*3 + i, 956 | total_params_reproj + n_boards*3 + ids*3 + j] = 1 957 | 958 | 959 | ## update point position based on error from expected 960 | for i in range(3): 961 | A_sparse[n_good_values + point_ix*3 + i, 962 | n_cams*n_cam_params + point_ix*3 + i] = 1 963 | 964 | 965 | return A_sparse 966 | 967 | def _initialize_params_bundle(self, p2ds, extra, only_extrinsics): 968 | """Given an CxNx2 array of 2D points, 969 | where N is the number of points and C is the number of cameras, 970 | initializes the parameters for bundle adjustment""" 971 | 972 | cam_params = np.hstack([cam.get_params(only_extrinsics) for cam in self.cameras]) 973 | n_cam_params = len(cam_params) // len(self.cameras) 974 | 975 | total_cam_params = len(cam_params) 976 | 977 | n_cams, n_points, _ = p2ds.shape 978 | assert n_cams == len(self.cameras), \ 979 | "number of cameras in CameraGroup does not " \ 980 | "match number of cameras in 2D points given" 981 | 982 | p3ds = self.triangulate(p2ds) 983 | 984 | if extra is not None: 985 | ids = extra['ids_map'] 986 | n_boards = int(np.max(ids[~np.isnan(ids)])) + 1 987 | total_board_params = n_boards * (3 + 3) # rvecs + tvecs 988 | 989 | # initialize to 0 990 | rvecs = np.zeros((n_boards, 3), dtype='float64') 991 | tvecs = np.zeros((n_boards, 3), dtype='float64') 992 | 993 | if 'rvecs' in extra and 'tvecs' in extra: 994 | rvecs_all = extra['rvecs'] 995 | tvecs_all = extra['tvecs'] 996 | for board_num in range(n_boards): 997 | point_id = np.where(ids == board_num)[0][0] 998 | cam_ids_possible = np.where(~np.isnan(p2ds[:, point_id, 0]))[0] 999 | cam_id = np.random.choice(cam_ids_possible) 1000 | M_cam = self.cameras[cam_id].get_extrinsics_mat() 1001 | M_board_cam = make_M(rvecs_all[cam_id, point_id], 1002 | tvecs_all[cam_id, point_id]) 1003 | M_board = np.matmul(inv(M_cam), M_board_cam) 1004 | rvec, tvec = get_rtvec(M_board) 1005 | rvecs[board_num] = rvec 1006 | tvecs[board_num] = tvec 1007 | 1008 | 1009 | else: 1010 | total_board_params = 0 1011 | 1012 | x0 = np.zeros(total_cam_params + p3ds.size + total_board_params) 1013 | x0[:total_cam_params] = cam_params 1014 | x0[total_cam_params:total_cam_params+p3ds.size] = p3ds.ravel() 1015 | 1016 | if extra is not None: 1017 | start_board = total_cam_params+p3ds.size 1018 | x0[start_board:start_board + n_boards*3] = rvecs.ravel() 1019 | x0[start_board + n_boards*3:start_board + n_boards*6] = \ 1020 | tvecs.ravel() 1021 | 1022 | return x0, n_cam_params 1023 | 1024 | def optim_points(self, points, p3ds, 1025 | constraints=[], 1026 | constraints_weak=[], 1027 | scale_smooth=4, 1028 | scale_length=2, scale_length_weak=0.5, 1029 | reproj_error_threshold=15, reproj_loss='soft_l1', 1030 | n_deriv_smooth=1, scores=None, verbose=False, 1031 | n_fixed=0): 1032 | """ 1033 | Take in an array of 2D points of shape CxNxJx2, 1034 | an array of 3D points of shape NxJx3, 1035 | and an array of constraints of shape Kx2, where 1036 | C: number of camera 1037 | N: number of frames 1038 | J: number of joints 1039 | K: number of constraints 1040 | 1041 | This function creates an optimized array of 3D points of shape NxJx3. 1042 | 1043 | Example constraints: 1044 | constraints = [[0, 1], [1, 2], [2, 3]] 1045 | (meaning that lengths of segments 0->1, 1->2, 2->3 are all constant) 1046 | 1047 | """ 1048 | assert points.shape[0] == len(self.cameras), \ 1049 | "Invalid points shape, first dim should be equal to" \ 1050 | " number of cameras ({}), but shape is {}".format( 1051 | len(self.cameras), points.shape 1052 | ) 1053 | 1054 | n_cams, n_frames, n_joints, _ = points.shape 1055 | constraints = np.array(constraints) 1056 | constraints_weak = np.array(constraints_weak) 1057 | 1058 | p3ds_intp = np.apply_along_axis(interpolate_data, 0, p3ds) 1059 | 1060 | p3ds_med = np.apply_along_axis(medfilt_data, 0, p3ds_intp, size=7) 1061 | 1062 | default_smooth = 1.0/np.mean(np.abs(np.diff(p3ds_med, axis=0))) 1063 | scale_smooth_full = scale_smooth * default_smooth 1064 | 1065 | t1 = time.time() 1066 | 1067 | x0 = self._initialize_params_triangulation( 1068 | p3ds_intp, constraints, constraints_weak) 1069 | 1070 | x0[~np.isfinite(x0)] = 0 1071 | 1072 | if n_fixed > 0: 1073 | p3ds_fixed = p3ds_intp[:n_fixed] 1074 | else: 1075 | p3ds_fixed = None 1076 | 1077 | jac = self._jac_sparsity_triangulation( 1078 | points, constraints, constraints_weak, n_deriv_smooth) 1079 | 1080 | opt2 = optimize.least_squares(self._error_fun_triangulation, 1081 | x0=x0, jac_sparsity=jac, 1082 | loss='linear', 1083 | ftol=1e-3, 1084 | verbose=2*verbose, 1085 | args=(points, 1086 | constraints, 1087 | constraints_weak, 1088 | scores, 1089 | scale_smooth_full, 1090 | scale_length, 1091 | scale_length_weak, 1092 | reproj_error_threshold, 1093 | reproj_loss, 1094 | n_deriv_smooth, 1095 | p3ds_fixed)) 1096 | 1097 | p3ds_new2 = opt2.x[:p3ds.size].reshape(p3ds.shape) 1098 | 1099 | if n_fixed > 0: 1100 | p3ds_new2 = np.vstack([p3ds_fixed, p3ds_new2[n_fixed:]]) 1101 | 1102 | t2 = time.time() 1103 | 1104 | if verbose: 1105 | print('optimization took {:.2f} seconds'.format(t2 - t1)) 1106 | 1107 | return p3ds_new2 1108 | 1109 | 1110 | def optim_points_possible(self, points, p3ds, 1111 | constraints=[], 1112 | constraints_weak=[], 1113 | scale_smooth=4, 1114 | scale_length=2, scale_length_weak=0.5, 1115 | reproj_error_threshold=15, reproj_loss='soft_l1', 1116 | n_deriv_smooth=1, scores=None, verbose=False): 1117 | """ 1118 | Take in an array of 2D points of shape CxNxJxPx2, 1119 | an array of 3D points of shape NxJx3, 1120 | and an array of constraints of shape Kx2, where 1121 | C: number of camera 1122 | N: number of frames 1123 | J: number of joints 1124 | P: number of possible options per point 1125 | K: number of constraints 1126 | 1127 | This function creates an optimized array of 3D points of shape NxJx3. 1128 | 1129 | Example constraints: 1130 | constraints = [[0, 1], [1, 2], [2, 3]] 1131 | (meaning that lengths of segments 0->1, 1->2, 2->3 are all constant) 1132 | 1133 | """ 1134 | assert points.shape[0] == len(self.cameras), \ 1135 | "Invalid points shape, first dim should be equal to" \ 1136 | " number of cameras ({}), but shape is {}".format( 1137 | len(self.cameras), points.shape 1138 | ) 1139 | 1140 | n_cams, n_frames, n_joints, n_possible, _ = points.shape 1141 | constraints = np.array(constraints) 1142 | constraints_weak = np.array(constraints_weak) 1143 | 1144 | p3ds_intp = np.apply_along_axis(interpolate_data, 0, p3ds) 1145 | 1146 | p3ds_med = np.apply_along_axis(medfilt_data, 0, p3ds_intp, size=7) 1147 | 1148 | default_smooth = 1.0/np.mean(np.abs(np.diff(p3ds_med, axis=0))) 1149 | scale_smooth_full = scale_smooth * default_smooth 1150 | 1151 | t1 = time.time() 1152 | 1153 | x0 = self._initialize_params_triangulation_possible( 1154 | p3ds_intp, points, constraints=constraints, constraints_weak=constraints_weak) 1155 | 1156 | print('getting jacobian...') 1157 | jac = self._jac_sparsity_triangulation_possible( 1158 | points, 1159 | constraints=constraints, 1160 | constraints_weak=constraints_weak, 1161 | n_deriv_smooth=n_deriv_smooth) 1162 | 1163 | beta = 5 1164 | 1165 | print('starting optimization...') 1166 | opt2 = optimize.least_squares(self._error_fun_triangulation_possible, 1167 | x0=x0, jac_sparsity=jac, 1168 | loss='linear', 1169 | ftol=1e-3, 1170 | verbose=2*verbose, 1171 | args=(points, 1172 | beta, 1173 | constraints, 1174 | constraints_weak, 1175 | scores, 1176 | scale_smooth_full, 1177 | scale_length, 1178 | scale_length_weak, 1179 | reproj_error_threshold, 1180 | reproj_loss, 1181 | n_deriv_smooth)) 1182 | params = opt2.x 1183 | 1184 | p3ds_new2 = params[:p3ds.size].reshape(p3ds.shape) 1185 | 1186 | bad = np.isnan(points[:, :, :, :, 0]) 1187 | all_bad = np.all(bad, axis=3) 1188 | 1189 | n_params_norm = p3ds.size + len(constraints) + len(constraints_weak) 1190 | 1191 | alphas = np.zeros((n_cams, n_frames, n_joints, n_possible), dtype='float64') 1192 | alphas[~bad] = params[n_params_norm:] 1193 | 1194 | alphas_exp = np.exp(beta * alphas) 1195 | alphas_exp[bad] = 0 1196 | alphas_sum = np.sum(alphas_exp, axis=3) 1197 | alphas_sum[all_bad] = 1 1198 | alphas_norm = alphas_exp / alphas_sum[:, :, :, None] 1199 | alphas_norm[bad] = np.nan 1200 | 1201 | t2 = time.time() 1202 | 1203 | if verbose: 1204 | print('optimization took {:.2f} seconds'.format(t2 - t1)) 1205 | 1206 | return p3ds_new2, alphas_norm 1207 | 1208 | 1209 | def triangulate_optim(self, points, init_ransac=False, init_progress=False, 1210 | **kwargs): 1211 | """ 1212 | Take in an array of 2D points of shape CxNxJx2, and an array of constraints of shape Kx2, where 1213 | C: number of camera 1214 | N: number of frames 1215 | J: number of joints 1216 | K: number of constraints 1217 | 1218 | This function creates an optimized array of 3D points of shape NxJx3. 1219 | 1220 | Example constraints: 1221 | constraints = [[0, 1], [1, 2], [2, 3]] 1222 | (meaning that lengths of segments 0->1, 1->2, 2->3 are all constant) 1223 | 1224 | """ 1225 | 1226 | assert points.shape[0] == len(self.cameras), \ 1227 | "Invalid points shape, first dim should be equal to" \ 1228 | " number of cameras ({}), but shape is {}".format( 1229 | len(self.cameras), points.shape 1230 | ) 1231 | 1232 | n_cams, n_frames, n_joints, _ = points.shape 1233 | # constraints = np.array(constraints) 1234 | # constraints_weak = np.array(constraints_weak) 1235 | 1236 | points_shaped = points.reshape(n_cams, n_frames*n_joints, 2) 1237 | if init_ransac: 1238 | p3ds, picked, p2ds, errors = self.triangulate_ransac(points_shaped, progress=init_progress) 1239 | points = p2ds.reshape(points.shape) 1240 | else: 1241 | p3ds = self.triangulate(points_shaped, progress=init_progress) 1242 | p3ds = p3ds.reshape((n_frames, n_joints, 3)) 1243 | 1244 | c = np.isfinite(p3ds[:, :, 0]) 1245 | if np.sum(c) < 20: 1246 | print("warning: not enough 3D points to run optimization") 1247 | return p3ds 1248 | 1249 | return self.optim_points(points, p3ds, **kwargs) 1250 | 1251 | 1252 | 1253 | @jit(forceobj=True, parallel=True) 1254 | def _error_fun_triangulation(self, params, p2ds, 1255 | constraints=[], 1256 | constraints_weak=[], 1257 | scores=None, 1258 | scale_smooth=10000, 1259 | scale_length=1, 1260 | scale_length_weak=0.2, 1261 | reproj_error_threshold=100, 1262 | reproj_loss='soft_l1', 1263 | n_deriv_smooth=1, 1264 | p3ds_fixed=None): 1265 | n_cams, n_frames, n_joints, _ = p2ds.shape 1266 | 1267 | n_3d = n_frames * n_joints * 3 1268 | n_constraints = len(constraints) 1269 | n_constraints_weak = len(constraints_weak) 1270 | 1271 | # load params 1272 | p3ds = params[:n_3d].reshape((n_frames, n_joints, 3)) 1273 | joint_lengths = np.array(params[n_3d:n_3d+n_constraints]) 1274 | joint_lengths_weak = np.array(params[n_3d+n_constraints:]) 1275 | 1276 | ## if fixed points, first n_fixed parameter points are ignored 1277 | ## and replacement points are put in 1278 | ## this way we can keep rest of code the same, especially _jac_sparsity_triangulation 1279 | if p3ds_fixed is not None: 1280 | n_fixed = p3ds_fixed.shape[0] 1281 | p3ds = np.vstack([p3ds_fixed, p3ds[n_fixed:]]) 1282 | 1283 | # reprojection errors 1284 | p3ds_flat = p3ds.reshape(-1, 3) 1285 | p2ds_flat = p2ds.reshape((n_cams, -1, 2)) 1286 | errors = self.reprojection_error(p3ds_flat, p2ds_flat) 1287 | if scores is not None: 1288 | scores_flat = scores.reshape((n_cams, -1)) 1289 | errors = errors * scores_flat[:, :, None] 1290 | errors_reproj = errors[~np.isnan(p2ds_flat)] 1291 | 1292 | rp = reproj_error_threshold 1293 | errors_reproj = np.abs(errors_reproj) 1294 | if reproj_loss == 'huber': 1295 | bad = errors_reproj > rp 1296 | errors_reproj[bad] = rp*(2*np.sqrt(errors_reproj[bad]/rp) - 1) 1297 | elif reproj_loss == 'linear': 1298 | pass 1299 | elif reproj_loss == 'soft_l1': 1300 | errors_reproj = rp*2*(np.sqrt(1+errors_reproj/rp)-1) 1301 | 1302 | # temporal constraint 1303 | errors_smooth = np.diff(p3ds, n=n_deriv_smooth, axis=0).ravel() * scale_smooth 1304 | 1305 | # joint length constraint 1306 | errors_lengths = np.empty((n_constraints, n_frames), dtype='float64') 1307 | for cix, (a, b) in enumerate(constraints): 1308 | lengths = np.linalg.norm(p3ds[:, a] - p3ds[:, b], axis=1) 1309 | expected = joint_lengths[cix] 1310 | errors_lengths[cix] = 100*(lengths - expected)/expected 1311 | errors_lengths = errors_lengths.ravel() * scale_length 1312 | 1313 | errors_lengths_weak = np.empty((n_constraints_weak, n_frames), dtype='float64') 1314 | for cix, (a, b) in enumerate(constraints_weak): 1315 | lengths = np.linalg.norm(p3ds[:, a] - p3ds[:, b], axis=1) 1316 | expected = joint_lengths_weak[cix] 1317 | errors_lengths_weak[cix] = 100*(lengths - expected)/expected 1318 | errors_lengths_weak = errors_lengths_weak.ravel() * scale_length_weak 1319 | 1320 | return np.hstack([errors_reproj, errors_smooth, 1321 | errors_lengths, errors_lengths_weak]) 1322 | 1323 | def _error_fun_triangulation_possible(self, params, p2ds, 1324 | beta=2, 1325 | constraints=[], 1326 | constraints_weak=[], 1327 | *args): 1328 | # extract alphas from end of params 1329 | # soft argmax for picking the appropriate points from p2ds 1330 | # pass the points to error_fun_triangulate_possible for residuals 1331 | # add errors to keep the alphas in check 1332 | # return all the errors 1333 | 1334 | n_cams, n_frames, n_joints, n_possible, _ = p2ds.shape 1335 | 1336 | n_3d = n_frames*n_joints*3 1337 | n_constraints = len(constraints) 1338 | n_constraints_weak = len(constraints_weak) 1339 | n_params_norm = n_3d+n_constraints+n_constraints_weak 1340 | 1341 | # load params 1342 | bad = np.isnan(p2ds[:, :, :, :, 0]) 1343 | all_bad = np.all(bad, axis=3) 1344 | 1345 | alphas = np.zeros((n_cams, n_frames, n_joints, n_possible), dtype='float64') 1346 | alphas[~bad] = params[n_params_norm:] 1347 | params_rest = np.array(params[:n_params_norm]) 1348 | 1349 | # get normalized alphas 1350 | alphas_exp = np.exp(beta * alphas) 1351 | alphas_exp[bad] = 0 1352 | alphas_sum = np.sum(alphas_exp, axis=3) 1353 | alphas_sum[all_bad] = 1 1354 | alphas_norm = alphas_exp / alphas_sum[:, :, :, None] 1355 | 1356 | # extract the 2D points using soft argmax 1357 | p2ds_test = np.copy(p2ds) 1358 | p2ds_test[bad] = 0 1359 | p2ds_adj = np.sum(alphas_norm[:, :, :, :, None] * p2ds_test, axis=3) 1360 | p2ds_adj[all_bad] = np.nan 1361 | 1362 | errors = self._error_fun_triangulation(params_rest, p2ds_adj, 1363 | constraints, constraints_weak, *args) 1364 | 1365 | alphas_test = alphas_norm[~all_bad] 1366 | errors_alphas = (1 - np.std(alphas_test, axis=1)) * 10 1367 | 1368 | return np.hstack([errors, errors_alphas]) 1369 | 1370 | 1371 | def _initialize_params_triangulation(self, p3ds, 1372 | constraints=[], 1373 | constraints_weak=[]): 1374 | joint_lengths = np.empty(len(constraints), dtype='float64') 1375 | joint_lengths_weak = np.empty(len(constraints_weak), dtype='float64') 1376 | 1377 | for cix, (a, b) in enumerate(constraints): 1378 | lengths = np.linalg.norm(p3ds[:, a] - p3ds[:, b], axis=1) 1379 | joint_lengths[cix] = np.median(lengths) 1380 | 1381 | 1382 | for cix, (a, b) in enumerate(constraints_weak): 1383 | lengths = np.linalg.norm(p3ds[:, a] - p3ds[:, b], axis=1) 1384 | joint_lengths_weak[cix] = np.median(lengths) 1385 | 1386 | all_lengths = np.hstack([joint_lengths, joint_lengths_weak]) 1387 | med = np.median(all_lengths) 1388 | if med == 0: 1389 | med = 1e-3 1390 | 1391 | mad = np.median(np.abs(all_lengths - med)) 1392 | 1393 | joint_lengths[joint_lengths == 0] = med 1394 | joint_lengths_weak[joint_lengths_weak == 0] = med 1395 | joint_lengths[joint_lengths > med+mad*5] = med 1396 | joint_lengths_weak[joint_lengths_weak > med+mad*5] = med 1397 | 1398 | return np.hstack([p3ds.ravel(), joint_lengths, joint_lengths_weak]) 1399 | 1400 | def _initialize_params_triangulation_possible(self, p3ds, p2ds, **kwargs): 1401 | # initialize params using above function 1402 | # initialize alphas to 1 for first one and 0 for other possible 1403 | 1404 | n_cams, n_frames, n_joints, n_possible, _ = p2ds.shape 1405 | good = ~np.isnan(p2ds[:, :, :, :, 0]) 1406 | 1407 | alphas = np.zeros((n_cams, n_frames, n_joints, n_possible), dtype='float64') 1408 | alphas[:, :, :, 0] = 0 1409 | 1410 | params = self._initialize_params_triangulation(p3ds, **kwargs) 1411 | params_full = np.hstack([params, alphas[good]]) 1412 | 1413 | return params_full 1414 | 1415 | def _jac_sparsity_triangulation(self, p2ds, 1416 | constraints=[], 1417 | constraints_weak=[], 1418 | n_deriv_smooth=1): 1419 | n_cams, n_frames, n_joints, _ = p2ds.shape 1420 | n_constraints = len(constraints) 1421 | n_constraints_weak = len(constraints_weak) 1422 | 1423 | p2ds_flat = p2ds.reshape((n_cams, -1, 2)) 1424 | 1425 | point_indices = np.zeros(p2ds_flat.shape, dtype='int32') 1426 | for i in range(p2ds_flat.shape[1]): 1427 | point_indices[:, i] = i 1428 | 1429 | point_indices_3d = np.arange(n_frames*n_joints)\ 1430 | .reshape((n_frames, n_joints)) 1431 | 1432 | good = ~np.isnan(p2ds_flat) 1433 | n_errors_reproj = np.sum(good) 1434 | n_errors_smooth = (n_frames-n_deriv_smooth) * n_joints * 3 1435 | n_errors_lengths = n_constraints * n_frames 1436 | n_errors_lengths_weak = n_constraints_weak * n_frames 1437 | 1438 | n_errors = n_errors_reproj + n_errors_smooth + \ 1439 | n_errors_lengths + n_errors_lengths_weak 1440 | 1441 | n_3d = n_frames*n_joints*3 1442 | n_params = n_3d + n_constraints + n_constraints_weak 1443 | 1444 | point_indices_good = point_indices[good] 1445 | 1446 | A_sparse = dok_matrix((n_errors, n_params), dtype='int16') 1447 | 1448 | # constraints for reprojection errors 1449 | ix_reproj = np.arange(n_errors_reproj) 1450 | for k in range(3): 1451 | A_sparse[ix_reproj, point_indices_good * 3 + k] = 1 1452 | 1453 | # sparse constraints for smoothness in time 1454 | frames = np.arange(n_frames-n_deriv_smooth) 1455 | for j in range(n_joints): 1456 | for n in range(n_deriv_smooth+1): 1457 | pa = point_indices_3d[frames, j] 1458 | pb = point_indices_3d[frames+n, j] 1459 | for k in range(3): 1460 | A_sparse[n_errors_reproj + pa*3 + k, pb*3 + k] = 1 1461 | 1462 | ## -- strong constraints -- 1463 | # joint lengths should change with joint lengths errors 1464 | start = n_errors_reproj + n_errors_smooth 1465 | frames = np.arange(n_frames) 1466 | for cix, (a, b) in enumerate(constraints): 1467 | A_sparse[start + cix*n_frames + frames, n_3d+cix] = 1 1468 | 1469 | # points should change accordingly to match joint lengths too 1470 | frames = np.arange(n_frames) 1471 | for cix, (a, b) in enumerate(constraints): 1472 | pa = point_indices_3d[frames, a] 1473 | pb = point_indices_3d[frames, b] 1474 | for k in range(3): 1475 | A_sparse[start + cix*n_frames + frames, pa*3 + k] = 1 1476 | A_sparse[start + cix*n_frames + frames, pb*3 + k] = 1 1477 | 1478 | ## -- weak constraints -- 1479 | # joint lengths should change with joint lengths errors 1480 | start = n_errors_reproj + n_errors_smooth + n_errors_lengths 1481 | frames = np.arange(n_frames) 1482 | for cix, (a, b) in enumerate(constraints_weak): 1483 | A_sparse[start + cix*n_frames + frames, n_3d + n_constraints + cix] = 1 1484 | 1485 | # points should change accordingly to match joint lengths too 1486 | frames = np.arange(n_frames) 1487 | for cix, (a, b) in enumerate(constraints_weak): 1488 | pa = point_indices_3d[frames, a] 1489 | pb = point_indices_3d[frames, b] 1490 | for k in range(3): 1491 | A_sparse[start + cix*n_frames + frames, pa*3 + k] = 1 1492 | A_sparse[start + cix*n_frames + frames, pb*3 + k] = 1 1493 | 1494 | return A_sparse 1495 | 1496 | def _jac_sparsity_triangulation_possible(self, p2ds_full, **kwargs): 1497 | # initialize sparse jacobian using above function 1498 | # extend to include alphas from parameters 1499 | ## TODO: this initialization is really slow for some reason 1500 | 1501 | n_cams, n_frames, n_joints, n_possible, _ = p2ds_full.shape 1502 | good_full = ~np.isnan(p2ds_full[:, :, :, :, 0]) 1503 | any_good = np.any(good_full, axis=3) 1504 | 1505 | n_alphas = np.sum(good_full) 1506 | n_errors_alphas = np.sum(any_good) 1507 | 1508 | p2ds = p2ds_full[:, :, :, 0] 1509 | A_sparse = self._jac_sparsity_triangulation(p2ds, **kwargs) 1510 | 1511 | n_errors, n_params = A_sparse.shape 1512 | 1513 | B_sparse = dok_matrix((n_errors + n_errors_alphas, n_params + n_alphas), dtype='int16') 1514 | for r, c in zip(*A_sparse.nonzero()): 1515 | B_sparse[r, c] = A_sparse[r, c] 1516 | 1517 | point_indices_2d = np.arange(n_cams*n_frames*n_joints)\ 1518 | .reshape(n_cams, n_frames, n_joints) 1519 | point_indices_2d_rep = np.repeat(point_indices_2d[:, :, :, None], 2, axis=3) 1520 | point_indices_2d_good = point_indices_2d_rep[~np.isnan(p2ds)] 1521 | point_indices_good = point_indices_2d[any_good] 1522 | 1523 | alpha_indices = np.zeros((n_cams, n_frames, n_joints, n_possible), dtype='int64') 1524 | for pnum in range(n_possible): 1525 | alpha_indices[:, :, :, pnum] = point_indices_2d 1526 | 1527 | alpha_indices_good = alpha_indices[good_full] 1528 | 1529 | # alphas should change according to the reprojection error for each corresponding point 1530 | point_indices_2d_good_find = defaultdict(list) 1531 | for ix, p in enumerate(point_indices_2d_good): 1532 | point_indices_2d_good_find[p].append(ix) 1533 | 1534 | for ix, alpha_index in enumerate(alpha_indices_good): 1535 | B_sparse[point_indices_2d_good_find[alpha_index], 1536 | n_params + ix] = 1 1537 | 1538 | # alphas should change according to the alpha errors 1539 | point_indices_good_find = dict() 1540 | for ix, p in enumerate(point_indices_good): 1541 | point_indices_good_find[p] = ix 1542 | 1543 | for ix, alpha_index in enumerate(alpha_indices_good): 1544 | if alpha_index in point_indices_good_find: 1545 | err_ix = n_errors + point_indices_good_find[alpha_index] 1546 | B_sparse[err_ix, n_params + ix] = 1 1547 | 1548 | return B_sparse 1549 | 1550 | def copy(self): 1551 | cameras = [cam.copy() for cam in self.cameras] 1552 | metadata = copy(self.metadata) 1553 | return CameraGroup(cameras, metadata) 1554 | 1555 | def set_rotations(self, rvecs): 1556 | for cam, rvec in zip(self.cameras, rvecs): 1557 | cam.set_rotation(rvec) 1558 | 1559 | def set_translations(self, tvecs): 1560 | for cam, tvec in zip(self.cameras, tvecs): 1561 | cam.set_translation(tvec) 1562 | 1563 | def get_rotations(self): 1564 | rvecs = [] 1565 | for cam in self.cameras: 1566 | rvec = cam.get_rotation() 1567 | rvecs.append(rvec) 1568 | return np.array(rvecs) 1569 | 1570 | def get_translations(self): 1571 | tvecs = [] 1572 | for cam in self.cameras: 1573 | tvec = cam.get_translation() 1574 | tvecs.append(tvec) 1575 | return np.array(tvecs) 1576 | 1577 | def get_names(self): 1578 | return [cam.get_name() for cam in self.cameras] 1579 | 1580 | def set_names(self, names): 1581 | for cam, name in zip(self.cameras, names): 1582 | cam.set_name(name) 1583 | 1584 | def average_error(self, p2ds, median=False): 1585 | p3ds = self.triangulate(p2ds) 1586 | errors = self.reprojection_error(p3ds, p2ds, mean=True) 1587 | if median: 1588 | return np.median(errors) 1589 | else: 1590 | return np.mean(errors) 1591 | 1592 | def calibrate_rows(self, all_rows, board, 1593 | init_intrinsics=True, init_extrinsics=True, verbose=True, 1594 | **kwargs): 1595 | assert len(all_rows) == len(self.cameras), \ 1596 | "Number of camera detections does not match number of cameras" 1597 | 1598 | for rows, camera in zip(all_rows, self.cameras): 1599 | size = camera.get_size() 1600 | 1601 | assert size is not None, \ 1602 | "Camera with name {} has no specified frame size".format(camera.get_name()) 1603 | 1604 | if init_intrinsics: 1605 | objp, imgp = board.get_all_calibration_points(rows) 1606 | mixed = [(o, i) for (o, i) in zip(objp, imgp) if len(o) >= 9] 1607 | objp, imgp = zip(*mixed) 1608 | matrix = cv2.initCameraMatrix2D(objp, imgp, tuple(size)) 1609 | camera.set_camera_matrix(matrix.copy()) 1610 | camera.zero_distortions() 1611 | 1612 | print(self.get_dicts()) 1613 | 1614 | for i, (row, cam) in enumerate(zip(all_rows, self.cameras)): 1615 | all_rows[i] = board.estimate_pose_rows(cam, row) 1616 | 1617 | new_rows = [[r for r in rows if r['ids'].size >= 8] for rows in all_rows] 1618 | merged = merge_rows(new_rows) 1619 | imgp, extra = extract_points(merged, board, min_cameras=2) 1620 | 1621 | if init_extrinsics: 1622 | rtvecs = extract_rtvecs(merged) 1623 | if verbose: 1624 | pprint(get_connections(rtvecs, self.get_names())) 1625 | rvecs, tvecs = get_initial_extrinsics(rtvecs, self.get_names()) 1626 | self.set_rotations(rvecs) 1627 | self.set_translations(tvecs) 1628 | 1629 | error = self.bundle_adjust_iter(imgp, extra, verbose=verbose, **kwargs) 1630 | 1631 | return error 1632 | 1633 | def get_rows_videos(self, videos, board, verbose=True): 1634 | all_rows = [] 1635 | 1636 | for cix, (cam, cam_videos) in enumerate(zip(self.cameras, videos)): 1637 | rows_cam = [] 1638 | for vnum, vidname in enumerate(cam_videos): 1639 | if verbose: print(vidname) 1640 | try: 1641 | rows = board.detect_video(vidname, prefix=vnum, progress=verbose) 1642 | except Exception as e: 1643 | print("WARNING: board detection failed for video {}".format(vidname)) 1644 | print(e) 1645 | rows = [] 1646 | if verbose: print("{} boards detected".format(len(rows))) 1647 | rows_cam.extend(rows) 1648 | all_rows.append(rows_cam) 1649 | 1650 | return all_rows 1651 | 1652 | def set_camera_sizes_videos(self, videos): 1653 | for cix, (cam, cam_videos) in enumerate(zip(self.cameras, videos)): 1654 | rows_cam = [] 1655 | for vnum, vidname in enumerate(cam_videos): 1656 | try: 1657 | params = get_video_params(vidname) 1658 | size = (params['width'], params['height']) 1659 | cam.set_size(size) 1660 | except Exception as e: 1661 | print("WARNING: camera size detection failed for video {}".format(vidname)) 1662 | print(e) 1663 | 1664 | 1665 | def calibrate_videos(self, videos, board, 1666 | init_intrinsics=True, init_extrinsics=True, verbose=True, 1667 | **kwargs): 1668 | """Takes as input a list of list of video filenames, one list of each camera. 1669 | Also takes a board which specifies what should be detected in the videos""" 1670 | 1671 | all_rows = self.get_rows_videos(videos, board, verbose=verbose) 1672 | if init_extrinsics: 1673 | self.set_camera_sizes_videos(videos) 1674 | 1675 | error = self.calibrate_rows(all_rows, board, 1676 | init_intrinsics=init_intrinsics, 1677 | init_extrinsics=init_extrinsics, 1678 | verbose=verbose, **kwargs) 1679 | return error, all_rows 1680 | 1681 | def get_dicts(self): 1682 | out = [] 1683 | for cam in self.cameras: 1684 | out.append(cam.get_dict()) 1685 | return out 1686 | 1687 | def from_dicts(arr): 1688 | cameras = [] 1689 | for d in arr: 1690 | if 'fisheye' in d and d['fisheye']: 1691 | cam = FisheyeCamera.from_dict(d) 1692 | else: 1693 | cam = Camera.from_dict(d) 1694 | cameras.append(cam) 1695 | return CameraGroup(cameras) 1696 | 1697 | def from_names(names, fisheye=False): 1698 | cameras = [] 1699 | for name in names: 1700 | if fisheye: 1701 | cam = FisheyeCamera(name=name) 1702 | else: 1703 | cam = Camera(name=name) 1704 | cameras.append(cam) 1705 | return CameraGroup(cameras) 1706 | 1707 | def load_dicts(self, arr): 1708 | for cam, d in zip(self.cameras, arr): 1709 | cam.load_dict(d) 1710 | 1711 | def dump(self, fname): 1712 | dicts = self.get_dicts() 1713 | names = ['cam_{}'.format(i) for i in range(len(dicts))] 1714 | master_dict = dict(zip(names, dicts)) 1715 | master_dict['metadata'] = self.metadata 1716 | with open(fname, 'w') as f: 1717 | toml.dump(master_dict, f) 1718 | 1719 | def load(fname): 1720 | master_dict = toml.load(fname) 1721 | keys = sorted(master_dict.keys()) 1722 | items = [master_dict[k] for k in keys if k != 'metadata'] 1723 | cgroup = CameraGroup.from_dicts(items) 1724 | if 'metadata' in master_dict: 1725 | cgroup.metadata = master_dict['metadata'] 1726 | return cgroup 1727 | 1728 | def resize_cameras(self, scale): 1729 | for cam in self.cameras: 1730 | cam.resize_camera(scale) 1731 | --------------------------------------------------------------------------------