├── .github
    └── workflows
    │   └── venv-cache.yml
├── .gitignore
├── README.md
├── amber_videos
    ├── traffic-overhead.mp4
    ├── traffic-scene-shorter.mp4
    └── traffic-scene.mp4
├── apperception
    ├── apperception_benchmark.py
    ├── layers.py
    ├── lens.py
    ├── metadata.py
    ├── metadata_context.py
    ├── metadata_context_executor.py
    ├── metadata_tests.py
    ├── metadata_util.py
    ├── mono_depth_estimator.py
    ├── object_tracker.py
    ├── point.py
    ├── tracker.py
    ├── video_context.py
    ├── video_context_executor.py
    ├── video_util.py
    ├── world.py
    └── world_executor.py
├── apperception_example.ipynb
├── config.py
├── docker-compose.yml
├── h264_videos
    ├── BirdsInCage_h264.mp4
    ├── CrowdRun_h264.mp4
    ├── ElFuente1_h264.mp4
    ├── ElFuente2_h264.mp4
    ├── OldTownCross_h264.mp4
    ├── Seeking_h264.mp4
    └── Tennis_h264.mp4
├── pg_extender
    └── overlap.sql
├── poetry.lock
├── pyproject.toml
├── requirements.txt
└── setup.sh


/.github/workflows/venv-cache.yml:
--------------------------------------------------------------------------------
 1 | name: Cache Poetry Virtual Environment
 2 | on:
 3 |   push:
 4 |     branches: 'main'
 5 | 
 6 | jobs:
 7 |   cache-venv:
 8 |     name: Cache Poetry Virtual Environment
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |         with:
13 |           token: ${{ secrets.GH_PAT || github.token }}
14 | 
15 |       - name: Set up Python 3.8
16 |         uses: actions/setup-python@v3
17 |         with:
18 |           python-version: 3.8
19 | 
20 |       - name: Install and configure Poetry
21 |         uses: snok/install-poetry@v1
22 |         with:
23 |           virtualenvs-create: true
24 |           virtualenvs-in-project: true
25 |           installer-parallel: true
26 | 
27 |       - name: Cache Poetry virtualenv
28 |         uses: actions/cache@v2
29 |         id: cached-poetry-dependencies 
30 |         with:
31 |           path: .venv
32 |           key: poetry-venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
33 | 
34 |       - name: Install Dependencies
35 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
36 |         run: poetry install --no-interaction


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __MACOSX
 2 | __pycache__
 3 | .DS_Store
 4 | .ipynb_checkpoints
 5 | 
 6 | yolov4-deepsort
 7 | yolov5-deepsort
 8 | .mypy_cache
 9 | .pytest_cache
10 | .idea
11 | output
12 | .apperception_cache
13 | env
14 | .venv
15 | .coverage
16 | coverage.xml
17 | 
18 | *.csv
19 | *.pickle
20 | maps
21 | samples
22 | sweeps
23 | v1.0-mini
24 | *.pickle
25 | 
26 | *.json
27 | data/nuscenes
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Apperception: a database management system optimized for multi-video applications
 2 | 
 3 | Apperception ingests video data from many perspectives and makes them queryable as a single multidimensional visual object. It incorporates new techniques for optimizing, executing, and storing multi-perspective video data. 
 4 | 
 5 | ## How to Setup Apperception Repo
 6 | ### Install dependencies:
 7 | ```
 8 | apt-get update && apt-get install -y postgresql python3-opencv
 9 | ```
10 | ### Clone the Apperception repo
11 | For ssh:
12 | ```
13 | git clone git@github.com:apperception-db/apperception.git
14 | cd apperception
15 | ```
16 | For HTTPS:
17 | ```
18 | git clone https://github.com/apperception-db/apperception.git
19 | cd apperception
20 | ```
21 | ### Downloading Official YOLOv4 Pre-trained in the repo
22 | 
23 | Copy and paste yolov4.weights from your downloads folder into this repository. For the Demo, we use yolov4-tiny.weights,
24 | 
25 | If you want to use yolov4-tiny.weights, a smaller model that is faster at running detections but less accurate, download file here: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
26 | 
27 | Our object tracker uses YOLOv4 to make the object detections, which deep sort then uses to track. There exists an official pre-trained YOLOv4 object detector model that is able to detect 80 classes. For easy demo purposes we will use the pre-trained weights for our tracker. Download pre-trained yolov4.weights file: https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT
28 | 
29 | Download the yolov4 model, unzip in the current repo
30 | https://drive.google.com/file/d/1g5D0pU-PKoe7uTHI7cRjFlGRm-xRQ1ZL/view?usp=sharing
31 | 
32 | ### Then we setup the repo
33 | ```
34 | chmod u+x ./setup.sh
35 | chmod 733 ./setup.sh
36 | ./setup.sh
37 | ```
38 | ## Apperception Demo Tryout without TASM
39 | As TASM requires nividia-docker/nvidia-docker2(https://www.ibm.com/docs/en/maximo-vi/8.2.0?topic=planning-installing-docker-nvidia-docker2) during runtime, and a machine with an encode-capable GPU (https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new). To tryout Apperception features without TASM, run the following:
40 | ### Start Apperception Metadata Store MobilityDB(https://github.com/MobilityDB/MobilityDB)
41 | ```
42 | docker volume create mobilitydb_data
43 | docker run --name "mobilitydb" -d -p 25432:5432 -v mobilitydb_data:/var/lib/postgresql mobilitydb/mobilitydb
44 | ```
45 | We need to setup the mobilitydb with customized functions
46 | ```
47 | cd pg_extender
48 | psql -h localhost -p 25432 -d mobilitydb -U docker
49 | Enter "docker" as the default password
50 | \i overlap.sql;
51 | \q
52 | ```
53 | 
54 | ### Try the demo.
55 | In apperception repo:
56 | `jupyter notebook` or `python3 -m notebook`
57 | 
58 | The demo notebook first constructs the world. Then it queries for the trajectory of the cars that appeared once in an area of interests within some time interval.
59 | 
60 | ## To fully activate apperception in TASM:
61 | ```
62 | docker-compose up
63 | cd pg_extender
64 | psql -h 172.19.0.3 -d mobilitydb -U docker
65 | Enter "docker" as the default password
66 | \i overlap.sql
67 | \q
68 | docker ps
69 | ```
70 | After fetching the CONTAINER_ID of apperceptiontasm/tasm:latest, run
71 | ```
72 | docker exec -it {CONTAINER_ID of apperceptiontasm/tasm:latest} /bin/bash
73 | ```
74 | Now we are under TASM env
75 | ```
76 | cd /apperception/
77 | pip3 install -r requirements.txt
78 | ```
79 | ### Try the demo.
80 | In the docker:  
81 | `jupyter notebook --ip 172.19.0.2 --port 8890 --allow-root &`
82 | Directly open the jupyter url
83 | The demo notebook first constructs the world. Then it queries for the trajectory and videos of the cars that appeared once in an area of interests within some time interval.
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/amber_videos/traffic-overhead.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/amber_videos/traffic-overhead.mp4


--------------------------------------------------------------------------------
/amber_videos/traffic-scene-shorter.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/amber_videos/traffic-scene-shorter.mp4


--------------------------------------------------------------------------------
/amber_videos/traffic-scene.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/amber_videos/traffic-scene.mp4


--------------------------------------------------------------------------------
/apperception/apperception_benchmark.py:
--------------------------------------------------------------------------------
 1 | ### IMPORTS
 2 | import cv2
 3 | 
 4 | from world import *
 5 | from world_executor import *
 6 | from video_util import *
 7 | from metadata_util import *
 8 | import lens
 9 | import point
10 | 
11 | #import tasm
12 | 
13 | ### Let's define some attribute for constructing the world first
14 | name = 'traffic_scene' # world name
15 | units = 'metrics'      # world units
16 | video_file = './amber_videos/traffic-scene-shorter.mp4' #example video file
17 | lens_attrs = {'fov': 120, 
18 |               'cam_origin': (0, 0, 0), 
19 |               'skew_factor': 0}
20 | point_attrs = {'p_id': 'p1', 
21 |                'cam_id': 'cam1', 
22 |                'x': 0,
23 |                'y': 0, 
24 |                'z': 0,
25 |                'time': None, 
26 |                'type':'pos'}
27 | camera_attrs = {'ratio': 0.5}
28 | fps = 30
29 | 
30 | ### First we define a world
31 | traffic_world = World(name=name, units=units)
32 | 
33 | ### Secondly we construct the camera
34 | fov, res, cam_origin, skew_factor = lens_attrs['fov'], [1280, 720], lens_attrs['cam_origin'], lens_attrs['skew_factor']
35 | cam_lens = lens.PinholeLens(res, cam_origin, fov, skew_factor)
36 | 
37 | pt_id, cam_id, x, y, z, time, pt_type = point_attrs['p_id'], point_attrs['cam_id'], point_attrs['x'], point_attrs['y'], point_attrs['z'], point_attrs['time'], point_attrs['type']
38 | location = point.Point(pt_id, cam_id, x, y, z, time, pt_type)
39 | 
40 | ratio = camera_attrs['ratio']
41 | 
42 | ### Ingest the camera to the world
43 | traffic_world = traffic_world.camera(cam_id=cam_id, 
44 |                                location=location, 
45 |                                ratio=ratio, 
46 |                                video_file=video_file, 
47 |                                metadata_identifier=name+"_"+cam_id, 
48 |                                lens=cam_lens)
49 | 
50 | ### Call execute on the world to run the detection algorithm and save the real data to the database
51 | recognized_world = traffic_world.recognize(cam_id)
52 | recognized_world.execute()
53 | 
54 | volume = traffic_world.select_intersection_of_interest_or_use_default(cam_id=cam_id)
55 | filtered_world = traffic_world.predicate(lambda obj:obj.object_type == "car").predicate(lambda obj:obj.location in volume, {"volume":volume})
56 | filtered_world = filtered_world.interval([0,fps*3])
57 |  
58 | ### to get the trajectory and the video over the entire trajectory(amber case)
59 | filtered_ids = filtered_world.selectkey(distinct = True).execute()
60 | print("filtered_ids are", filtered_ids)
61 | print(len(filtered_ids))
62 | if len(filtered_ids)>0:
63 |     id_array = [e[0] for e in filtered_ids]
64 |     ### Fetch the trajectory of these items
65 |     trajectory = traffic_world.predicate(lambda obj: obj.object_id in id_array, {"id_array":id_array}).get_trajectory(distinct=True).execute()
66 |     traffic_world.overlay_trajectory(cam_id, trajectory)
67 |     ### Get the videos of these items
68 | #     entire_video = traffic_world.predicate(lambda obj: obj.object_id in id_array, {"id_array":id_array}).get_video()
69 | #     entire_video.execute()
70 | 
71 | 


--------------------------------------------------------------------------------
/apperception/layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
  2 | #
  3 | # This software is licensed under the terms of the Monodepth2 licence
  4 | # which allows for non-commercial use only, the full terms of which are made
  5 | # available in the LICENSE file.
  6 | 
  7 | from __future__ import absolute_import, division, print_function
  8 | 
  9 | import numpy as np
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | 
 15 | 
 16 | def disp_to_depth(disp, min_depth, max_depth):
 17 |     """Convert network's sigmoid output into depth prediction
 18 |     The formula for this conversion is given in the 'additional considerations'
 19 |     section of the paper.
 20 |     """
 21 |     min_disp = 1 / max_depth
 22 |     max_disp = 1 / min_depth
 23 |     scaled_disp = min_disp + (max_disp - min_disp) * disp
 24 |     depth = 1 / scaled_disp
 25 |     return scaled_disp, depth
 26 | 
 27 | 
 28 | def transformation_from_parameters(axisangle, translation, invert=False):
 29 |     """Convert the network's (axisangle, translation) output into a 4x4 matrix
 30 |     """
 31 |     R = rot_from_axisangle(axisangle)
 32 |     t = translation.clone()
 33 | 
 34 |     if invert:
 35 |         R = R.transpose(1, 2)
 36 |         t *= -1
 37 | 
 38 |     T = get_translation_matrix(t)
 39 | 
 40 |     if invert:
 41 |         M = torch.matmul(R, T)
 42 |     else:
 43 |         M = torch.matmul(T, R)
 44 | 
 45 |     return M
 46 | 
 47 | 
 48 | def get_translation_matrix(translation_vector):
 49 |     """Convert a translation vector into a 4x4 transformation matrix
 50 |     """
 51 |     T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device)
 52 | 
 53 |     t = translation_vector.contiguous().view(-1, 3, 1)
 54 | 
 55 |     T[:, 0, 0] = 1
 56 |     T[:, 1, 1] = 1
 57 |     T[:, 2, 2] = 1
 58 |     T[:, 3, 3] = 1
 59 |     T[:, :3, 3, None] = t
 60 | 
 61 |     return T
 62 | 
 63 | 
 64 | def rot_from_axisangle(vec):
 65 |     """Convert an axisangle rotation into a 4x4 transformation matrix
 66 |     (adapted from https://github.com/Wallacoloo/printipi)
 67 |     Input 'vec' has to be Bx1x3
 68 |     """
 69 |     angle = torch.norm(vec, 2, 2, True)
 70 |     axis = vec / (angle + 1e-7)
 71 | 
 72 |     ca = torch.cos(angle)
 73 |     sa = torch.sin(angle)
 74 |     C = 1 - ca
 75 | 
 76 |     x = axis[..., 0].unsqueeze(1)
 77 |     y = axis[..., 1].unsqueeze(1)
 78 |     z = axis[..., 2].unsqueeze(1)
 79 | 
 80 |     xs = x * sa
 81 |     ys = y * sa
 82 |     zs = z * sa
 83 |     xC = x * C
 84 |     yC = y * C
 85 |     zC = z * C
 86 |     xyC = x * yC
 87 |     yzC = y * zC
 88 |     zxC = z * xC
 89 | 
 90 |     rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device)
 91 | 
 92 |     rot[:, 0, 0] = torch.squeeze(x * xC + ca)
 93 |     rot[:, 0, 1] = torch.squeeze(xyC - zs)
 94 |     rot[:, 0, 2] = torch.squeeze(zxC + ys)
 95 |     rot[:, 1, 0] = torch.squeeze(xyC + zs)
 96 |     rot[:, 1, 1] = torch.squeeze(y * yC + ca)
 97 |     rot[:, 1, 2] = torch.squeeze(yzC - xs)
 98 |     rot[:, 2, 0] = torch.squeeze(zxC - ys)
 99 |     rot[:, 2, 1] = torch.squeeze(yzC + xs)
100 |     rot[:, 2, 2] = torch.squeeze(z * zC + ca)
101 |     rot[:, 3, 3] = 1
102 | 
103 |     return rot
104 | 
105 | 
106 | class ConvBlock(nn.Module):
107 |     """Layer to perform a convolution followed by ELU
108 |     """
109 |     def __init__(self, in_channels, out_channels):
110 |         super(ConvBlock, self).__init__()
111 | 
112 |         self.conv = Conv3x3(in_channels, out_channels)
113 |         self.nonlin = nn.ELU(inplace=True)
114 | 
115 |     def forward(self, x):
116 |         out = self.conv(x)
117 |         out = self.nonlin(out)
118 |         return out
119 | 
120 | 
121 | class Conv3x3(nn.Module):
122 |     """Layer to pad and convolve input
123 |     """
124 |     def __init__(self, in_channels, out_channels, use_refl=True):
125 |         super(Conv3x3, self).__init__()
126 | 
127 |         if use_refl:
128 |             self.pad = nn.ReflectionPad2d(1)
129 |         else:
130 |             self.pad = nn.ZeroPad2d(1)
131 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
132 | 
133 |     def forward(self, x):
134 |         out = self.pad(x)
135 |         out = self.conv(out)
136 |         return out
137 | 
138 | 
139 | class BackprojectDepth(nn.Module):
140 |     """Layer to transform a depth image into a point cloud
141 |     """
142 |     def __init__(self, batch_size, height, width):
143 |         super(BackprojectDepth, self).__init__()
144 | 
145 |         self.batch_size = batch_size
146 |         self.height = height
147 |         self.width = width
148 | 
149 |         meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
150 |         self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
151 |         self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
152 |                                       requires_grad=False)
153 | 
154 |         self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
155 |                                  requires_grad=False)
156 | 
157 |         self.pix_coords = torch.unsqueeze(torch.stack(
158 |             [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
159 |         self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
160 |         self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
161 |                                        requires_grad=False)
162 | 
163 |     def forward(self, depth, inv_K):
164 |         cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
165 |         cam_points = depth.view(self.batch_size, 1, -1) * cam_points
166 |         cam_points = torch.cat([cam_points, self.ones], 1)
167 | 
168 |         return cam_points
169 | 
170 | 
171 | class Project3D(nn.Module):
172 |     """Layer which projects 3D points into a camera with intrinsics K and at position T
173 |     """
174 |     def __init__(self, batch_size, height, width, eps=1e-7):
175 |         super(Project3D, self).__init__()
176 | 
177 |         self.batch_size = batch_size
178 |         self.height = height
179 |         self.width = width
180 |         self.eps = eps
181 | 
182 |     def forward(self, points, K, T):
183 |         P = torch.matmul(K, T)[:, :3, :]
184 | 
185 |         cam_points = torch.matmul(P, points)
186 | 
187 |         pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
188 |         pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
189 |         pix_coords = pix_coords.permute(0, 2, 3, 1)
190 |         pix_coords[..., 0] /= self.width - 1
191 |         pix_coords[..., 1] /= self.height - 1
192 |         pix_coords = (pix_coords - 0.5) * 2
193 |         return pix_coords
194 | 
195 | 
196 | def upsample(x):
197 |     """Upsample input tensor by a factor of 2
198 |     """
199 |     return F.interpolate(x, scale_factor=2, mode="nearest")
200 | 
201 | 
202 | def get_smooth_loss(disp, img):
203 |     """Computes the smoothness loss for a disparity image
204 |     The color image is used for edge-aware smoothness
205 |     """
206 |     grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:])
207 |     grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :])
208 | 
209 |     grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True)
210 |     grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True)
211 | 
212 |     grad_disp_x *= torch.exp(-grad_img_x)
213 |     grad_disp_y *= torch.exp(-grad_img_y)
214 | 
215 |     return grad_disp_x.mean() + grad_disp_y.mean()
216 | 
217 | 
218 | class SSIM(nn.Module):
219 |     """Layer to compute the SSIM loss between a pair of images
220 |     """
221 |     def __init__(self):
222 |         super(SSIM, self).__init__()
223 |         self.mu_x_pool   = nn.AvgPool2d(3, 1)
224 |         self.mu_y_pool   = nn.AvgPool2d(3, 1)
225 |         self.sig_x_pool  = nn.AvgPool2d(3, 1)
226 |         self.sig_y_pool  = nn.AvgPool2d(3, 1)
227 |         self.sig_xy_pool = nn.AvgPool2d(3, 1)
228 | 
229 |         self.refl = nn.ReflectionPad2d(1)
230 | 
231 |         self.C1 = 0.01 ** 2
232 |         self.C2 = 0.03 ** 2
233 | 
234 |     def forward(self, x, y):
235 |         x = self.refl(x)
236 |         y = self.refl(y)
237 | 
238 |         mu_x = self.mu_x_pool(x)
239 |         mu_y = self.mu_y_pool(y)
240 | 
241 |         sigma_x  = self.sig_x_pool(x ** 2) - mu_x ** 2
242 |         sigma_y  = self.sig_y_pool(y ** 2) - mu_y ** 2
243 |         sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y
244 | 
245 |         SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
246 |         SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
247 | 
248 |         return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
249 | 
250 | 
251 | def compute_depth_errors(gt, pred):
252 |     """Computation of error metrics between predicted and ground truth depths
253 |     """
254 |     thresh = torch.max((gt / pred), (pred / gt))
255 |     a1 = (thresh < 1.25     ).float().mean()
256 |     a2 = (thresh < 1.25 ** 2).float().mean()
257 |     a3 = (thresh < 1.25 ** 3).float().mean()
258 | 
259 |     rmse = (gt - pred) ** 2
260 |     rmse = torch.sqrt(rmse.mean())
261 | 
262 |     rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
263 |     rmse_log = torch.sqrt(rmse_log.mean())
264 | 
265 |     abs_rel = torch.mean(torch.abs(gt - pred) / gt)
266 | 
267 |     sq_rel = torch.mean((gt - pred) ** 2 / gt)
268 | 
269 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
270 | 


--------------------------------------------------------------------------------
/apperception/lens.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import radians
  3 | 
  4 | class Lens:
  5 | 	def __init__(self, resolution, cam_origin):
  6 | 		"""
  7 | 		Construct a lens for the camera that translates to 3D world coordinates.
  8 | 
  9 | 		Args:
 10 | 			field_of_view: Angle of field of view of camera
 11 | 			resolution: Tuple of video resolution
 12 | 			cam_origin: Points of where camera is located in the world
 13 | 			skew_factor: (Optional) Float factor to correct shearness of camera  
 14 | 		"""
 15 | 		x, y = resolution
 16 | 		self.cam_origin = cam_origin
 17 | 		cam_x, cam_y = cam_origin
 18 | 	
 19 | 	def pixel_to_world(self, pixel_coord, depth):
 20 | 		"""
 21 | 		Translate pixel coordinates to world coordinates. 
 22 | 		""" 
 23 | 		return None
 24 | 	
 25 | 	def world_to_pixel(self, world_coord, depth):
 26 | 		"""
 27 | 		Translate world coordinates to pixel coordinates
 28 | 		"""
 29 | 		return None
 30 | 
 31 | class VRLens(Lens):
 32 | 	def __init__(self, resolution, cam_origin, yaw, roll, pitch):
 33 | 		"""
 34 | 		Construct a lens for the camera that translates to 3D world, spherical 
 35 | 		coordinates.
 36 | 
 37 | 		Args:
 38 | 			field_of_view: Angle of field of view of camera
 39 | 			resolution: Tuple of video resolution
 40 | 			cam_origin: Points of where camera is located in the world
 41 | 			skew_factor: (Optional) Float factor to correct shearness of camera   
 42 | 		"""
 43 | 		x, y = resolution
 44 | 		self.cam_origin = cam_origin
 45 | 		cam_x, cam_y, cam_z = cam_origin
 46 | 
 47 | 		yaw, pitch, roll = np.deg2rad(yaw), np.deg2rad(pitch), np.deg2rad(roll)
 48 | 		# Transformation 1
 49 | 		# X_1, X_2, X_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), -np.sin(pitch)
 50 | 
 51 | 		# Y_1 = np.cos(yaw)*np.sin(pitch)*np.sin(roll) - np.sin(yaw)*np.cos(roll)
 52 | 		# Y_2 = np.sin(yaw)*np.sin(pitch)*np.sin(roll) + np.cos(yaw)*np.cos(roll)
 53 | 		# Y_3 = np.cos(pitch)*np.sin(roll)
 54 | 
 55 | 		# Z_1 = np.cos(yaw)*np.sin(pitch)*np.cos(roll) + np.sin(yaw)*np.sin(roll)
 56 | 		# Z_2 = np.sin(yaw)*np.sin(pitch)*np.cos(roll) - np.cos(yaw)*np.sin(roll)
 57 | 		# Z_3 = np.cos(pitch)*np.cos(roll)
 58 | 
 59 | 		# self.transform = np.matrix([[X_1, Y_1, Z_1, cam_x],
 60 | 		# 	[X_2, Y_2, Z_2, cam_y], 
 61 | 		# 	[X_3, Y_3, Z_3, cam_z],
 62 | 		# 	[0, 0, 0, 1]
 63 | 		# 	])
 64 | 
 65 | 		# Transformation 2
 66 | 		# z = yaw, y = pitch, x = roll
 67 | 		# R_1, R_2, R_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), np.sin(pitch)
 68 | 		# R_4 = np.sin(roll)*np.sin(pitch)*np.cos(yaw) - np.cos(roll)*np.sin(yaw)
 69 | 		# R_5 = np.sin(roll)*np.sin(pitch)*np.sin(yaw) + np.cos(roll)*np.cos(yaw)
 70 | 		# R_6 = np.sin(roll)*np.cos(pitch)
 71 | 		# R_7 = np.cos(roll)*np.sin(pitch)*np.cos(yaw) - np.sin(roll)*np.sin(yaw)
 72 | 		# R_8 = np.sin(roll)*np.cos(yaw) + np.cos(roll)*np.sin(pitch)*np.sin(yaw)
 73 | 		# R_9 = np.cos(roll)*np.cos(pitch)
 74 | 
 75 | 		# self.transform = np.matrix([[R_1, R_2, R_3, cam_x],
 76 | 		# 	[R_4, R_5, R_6, cam_y], 
 77 | 		# 	[R_7, R_8, R_9, cam_z],
 78 | 		# 	[0, 0, 0, 1]
 79 | 		# 	])
 80 | 
 81 | 		# Transformation 3
 82 | 		# z = yaw, y = pitch, x = roll
 83 | 		# R_1, R_2, R_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), np.sin(pitch)
 84 | 		# R_4 = np.sin(roll)*np.sin(pitch)*np.cos(yaw) - np.cos(roll)*np.sin(yaw)
 85 | 		# R_5 = np.sin(roll)*np.sin(pitch)*np.sin(yaw) + np.cos(roll)*np.cos(yaw)
 86 | 		# R_6 = np.sin(roll)*-np.cos(pitch)
 87 | 		# R_7 = -np.cos(roll)*np.sin(pitch)*np.cos(yaw) - np.sin(roll)*np.sin(yaw)
 88 | 		# R_8 = np.sin(roll)*np.cos(yaw) - np.cos(roll)*np.sin(pitch)*np.sin(yaw)
 89 | 		# R_9 = np.cos(roll)*np.cos(pitch)
 90 | 
 91 | 		# rotation_mat = np.matrix([[R_1, R_2, R_3],
 92 | 		# 	[R_4, R_5, R_6], 
 93 | 		# 	[R_7, R_8, R_9]])
 94 | 
 95 | 		# cam_org_vec = np.matrix([[cam_x], [cam_y], [cam_z]])
 96 | 		# self.col_vec = np.ravel(rotation_mat @ cam_org_vec)
 97 | 		# col_x, col_y, col_z = self.col_vec
 98 | 		# self.transform = np.matrix([[R_1, R_2, R_3, -col_x],
 99 | 		# 	[R_4, R_5, R_6, -col_y], 
100 | 		# 	[R_7, R_8, R_9, -col_z],
101 | 		# 	[0, 0, 0, 1]
102 | 		# 	])
103 | 
104 | 		# Transformation 4
105 | 		# X_1, X_2, X_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), -np.sin(pitch)
106 | 
107 | 		# Y_1 = np.cos(yaw)*np.sin(pitch)*np.sin(roll) - np.sin(yaw)*np.cos(roll)
108 | 		# Y_2 = np.sin(yaw)*np.sin(pitch)*np.sin(roll) + np.cos(yaw)*np.cos(roll)
109 | 		# Y_3 = np.cos(pitch)*np.sin(roll)
110 | 
111 | 		# Z_1 = np.cos(yaw)*np.sin(pitch)*np.cos(roll) + np.sin(yaw)*np.sin(roll)
112 | 		# Z_2 = np.sin(yaw)*np.sin(pitch)*np.cos(roll) - np.cos(yaw)*np.sin(roll)
113 | 		# Z_3 = np.cos(pitch)*np.cos(roll)
114 | 
115 | 		# rotation_mat = np.matrix([[X_1, Y_1, Z_1],
116 | 		# 	[X_2, Y_2, Z_2], 
117 | 		# 	[X_3, Y_3, Z_3]])
118 | 		# cam_org_vec = np.matrix([[cam_x], [cam_y], [cam_z]])
119 | 		# self.col_vec = np.ravel(rotation_mat @ cam_org_vec)
120 | 		# col_x, col_y, col_z = self.col_vec
121 | 		# self.transform = np.matrix([[X_1, Y_1, Z_1, col_x],
122 | 		# 	[X_2, Y_2, Z_2, col_y], 
123 | 		# 	[X_3, Y_3, Z_3, col_z],
124 | 		# 	[0, 0, 0, 1]
125 | 		# 	])	
126 | 
127 | 		# Transformation 5 -- Lefthanded rotation matrix
128 | 		R_1, R_2, R_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), -np.sin(pitch)
129 | 		R_4 = np.sin(roll)*np.sin(pitch)*np.cos(yaw) - np.cos(roll)*np.sin(yaw)
130 | 		R_5 = np.sin(roll)*np.sin(pitch)*np.sin(yaw) + np.cos(roll)*np.cos(yaw)
131 | 		R_6 = np.sin(roll)*np.cos(pitch)
132 | 
133 | 		R_7 = np.cos(roll)*np.sin(pitch)*np.cos(yaw) + np.sin(roll)*np.sin(yaw)
134 | 		R_8 = np.cos(roll)*np.sin(pitch)*np.sin(yaw) - np.sin(roll)*np.cos(yaw)
135 | 		R_9 = np.cos(roll)*np.cos(pitch)
136 | 
137 | 		rotation_mat = np.matrix([[R_1, R_2, R_3],
138 | 			[R_4, R_5, R_6], 
139 | 			[R_7, R_8, R_9]])
140 | 		cam_org_vec = np.matrix([[cam_x], [cam_y], [cam_z]])
141 | 		self.col_vec = np.ravel(rotation_mat @ cam_org_vec)
142 | 		col_x, col_y, col_z = self.col_vec
143 | 		self.transform = np.matrix([[R_1, R_2, R_3, -col_x],
144 | 			[R_4, R_5, R_6, -col_y], 
145 | 			[R_7, R_8, R_9, -col_z],
146 | 			[0, 0, 0, 1]
147 | 			])
148 | 
149 | 		self.inv_transform = np.linalg.inv(self.transform)
150 | 	
151 | 	def pixel_to_world(self, pixel_coord, depth):
152 | 		"""
153 | 		Translate pixel coordinates to world coordinates. 
154 | 		"""       
155 | 		x, y = pixel_coord
156 | 		pixel = np.matrix([[x], [y], [depth], [0]])
157 | 		return self.transform @ pixel
158 | 
159 | 	def pixels_to_world(self, pixel_coords, depths):
160 | 		"""
161 | 		Translate multiple pixel coordinates to world coordinates. 
162 | 		"""
163 | 		x, y =  pixel_coords
164 | 		pixels = np.matrix([x, y, depths, np.ones(len(depths))])
165 | 		print(pixels)
166 | 		return self.transform @ pixels 
167 | 
168 | 	def world_to_pixel(self, world_coord):
169 | 		"""
170 | 		Translate world coordinates to pixel coordinates
171 | 		"""
172 | 		x, y, z, w = world_coord
173 | 		world_pixel = np.matrix([[x], [y], [z], [w]])
174 | 		return self.inv_transform @ world_pixel
175 | 
176 | 	def world_to_pixels(self, world_coords):
177 | 		"""
178 | 		Translate world coordinates to pixel coordinates
179 | 		"""
180 | 		x, y, z = world_coords
181 | 		world_pixel = np.matrix([x, y, z, np.zeros(len(x))])
182 | 		return self.inv_transform @ world_pixel    
183 | 
184 | 
185 | class PinholeLens(Lens):
186 | 	# TODO: (@Vanessa) change all the places where pinhole lens appears and change arguments
187 | 	def __init__(self, resolution, cam_origin, field_of_view, skew_factor):
188 | 		"""
189 | 		Construct a lens for the camera that translates to 3D world coordinates.
190 | 		
191 | 		Args:
192 | 			field_of_view: Angle of field of view of camera
193 | 			resolution: Tuple of video resolution
194 | 			cam_origin: Points of where camera is located in the world
195 | 			skew_factor: (Optional) Float factor to correct shearness of camera  
196 | 			depth: Float of depth of view from the camera
197 | 		"""
198 | 		self.fov = field_of_view
199 | 		x, y = resolution
200 | 		self.focal_x = (x/2)/np.tan(radians(field_of_view/2)) 
201 | 		self.focal_y = (y/2)/np.tan(radians(field_of_view/2)) 
202 | 		self.cam_origin = cam_origin
203 | 		cam_x, cam_y, cam_z = cam_origin
204 | 		self.alpha = skew_factor
205 | 		self.inv_transform = np.linalg.inv(np.matrix([[self.focal_x, self.alpha, cam_x], 
206 | 									[0, self.focal_y, cam_y],
207 | 									[0, 0, 1]
208 | 								   ]))
209 | 		self.transform = np.matrix([[self.focal_x, self.alpha, cam_x, 0], 
210 | 									[0, self.focal_y, cam_y, 0],
211 | 									[0, 0, 1, 0]
212 | 								   ])
213 | 		
214 | 	def pixel_to_world(self, pixel_coord, depth):
215 | 		"""
216 | 		Translate pixel coordinates to world coordinates. 
217 | 		"""
218 | 		x, y = pixel_coord
219 | 		pixel = np.matrix([[x], [y], [depth]])
220 | 		return (self.inv_transform @ pixel).flatten().tolist()[0]
221 | 
222 | 	def pixels_to_world(self, pixel_coords, depths):
223 | 		"""
224 | 		Translate multiple pixel coordinates to world coordinates. 
225 | 		"""
226 | 		x, y =  pixel_coords
227 | 		pixels = np.matrix([x, y, depths])
228 | 		return self.inv_transform @ pixels 
229 | 	
230 | 	def world_to_pixel(self, world_coord):
231 | 		"""
232 | 		Translate world coordinates to pixel coordinates
233 | 		"""
234 | 		x, y, z = world_coord
235 | 		world_pixel = np.matrix([[x], [y], [z], [1]])
236 | 		return self.transform @ world_pixel
237 | 
238 | 	def world_to_pixels(self, world_coords):
239 | 		"""
240 | 		Translate world coordinates to pixel coordinates
241 | 		"""
242 | 		x, y, z = world_coords
243 | 		world_pixel = np.matrix([x, y, z, np.ones(len(x))])
244 | 		return self.transform @ world_pixel


--------------------------------------------------------------------------------
/apperception/metadata.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | class View:
 3 |     def __init__(self, view_name):
 4 |         self.view_name = view_name
 5 |         self.default = False
 6 |     
 7 |     def from_context(self, context):
 8 |         self.context = context
 9 |     
10 |     def resolve_key(self, column_key):
11 |         if column_key in self.__class__.__dict__:
12 |             return self.__class__.__dict__[column_key]
13 |         else:
14 |             return None
15 |     
16 |     def contain(self, column_key):
17 |         return column_key in self.__dict__.keys()
18 |     
19 | class TrajectoryView(View):
20 |     object_id = "itemId"
21 |     object_type = "objectType"
22 |     color = "color"
23 |     trajectory = "trajCentroids"
24 |     table_name = "Item_General_Trajectory"
25 |     def __init__(self):
26 |         super().__init__(self.table_name)
27 |         self.default = True
28 |     
29 | class LocationView(View):
30 |     location = "trajBbox"
31 |     timestamp = "timestamp"
32 |     table_name = "General_Bbox"
33 |     def __init__(self):
34 |         super().__init__(self.table_name)
35 |         self.default = True
36 |         
37 | class MetadataView(View):
38 |     view_name = "metadata_view"
39 |     object_id = TrajectoryView.object_id
40 |     object_type = TrajectoryView.object_type
41 |     color = TrajectoryView.color
42 |     trajectory = TrajectoryView.trajectory
43 |     location = LocationView.location
44 |     timestamp = LocationView.timestamp
45 |     view_map = {object_id:TrajectoryView,
46 |                 object_type:TrajectoryView,
47 |                  color:TrajectoryView,
48 |                  trajectory:TrajectoryView,
49 |                  location:LocationView}
50 |     def __init__(self):
51 |         super().__init__(self.view_name)
52 |         self.default = True
53 |         self.trajectory_view = TrajectoryView()
54 |         self.location_view = LocationView()
55 | 
56 |     def map_view(self, column_key):
57 |         if self.view_map[column_key] == TrajectoryView:
58 |             return self.trajectory_view
59 |         else:
60 |             return self.location_view
61 |         
62 |     def resolve_key(self, column_key):
63 |         return self.trajectory_view.resolve_key(column_key) or self.location_view.resolve_key(column_key)
64 | metadata_view = MetadataView()


--------------------------------------------------------------------------------
/apperception/metadata_context.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import inspect
  3 | import os
  4 | import copy
  5 | from typing import Callable
  6 | import uncompyle6
  7 | import psycopg2
  8 | from metadata_util import *
  9 | from metadata import *
 10 | 
 11 | # TODO: Add checks for names
 12 | # Select Node (contains Column Nodes and Aggregate Nodes
 13 | # within Column Nodes)
 14 | class Project:
 15 |     def __init__(self, root):
 16 |         self.root = root
 17 |         self.distinct = False
 18 |         self.column_nodes = []
 19 | 
 20 |     def append(self, column_node):
 21 |         self.column_nodes.append(column_node)
 22 | 
 23 |     def find(self, column_name):
 24 |         for column_node in self.column_nodes:
 25 |             if column_node.column_name == column_name:
 26 |                 return column_node
 27 |         return None
 28 | 
 29 |     def remove(self, column_name):
 30 |         column_node = self.find(column_name)
 31 |         self.column_nodes.remove(column_node)
 32 | 
 33 |     def is_empty(self):
 34 |         return len(self.column_nodes) == 0
 35 | 
 36 | class Column:
 37 | 
 38 |     def __init__(self, column_name):
 39 |         self.column_name = column_name
 40 |         self.aggr_nodes = []
 41 | 
 42 |     def aggregate(self, func_name:str, parameters:list=[], special_args=[]):
 43 |         if func_name in common_aggregation:
 44 |             if len(special_args) > 0:
 45 |                 agg_node = eval(func_name)(func_name, parameters, special_args)
 46 |             else:
 47 |                 agg_node = eval(func_name)(func_name, parameters)
 48 |         else:
 49 |             agg_node = Aggregate(func_name, parameters)
 50 |         self.aggr_nodes.append(agg_node)
 51 |         return self
 52 | 
 53 |     def get_coordinates(self):
 54 |         self.aggregate("asMFJSON", special_args=["coordinates"])
 55 | 
 56 |     def interval(self, starttime, endtime):
 57 |         self.aggregate("atPeriodSet", parameters=["\'{[%s, %s)}\'"%(starttime, endtime)])
 58 | 
 59 | class Aggregate:
 60 | 
 61 |     def __init__(self, func_name:str, parameters:list=[]):
 62 |         self.func_name = func_name
 63 |         self.parameters = parameters
 64 | 
 65 | class asMFJSON(Aggregate):
 66 | 
 67 |     def __init__(self, func_name="asMFJSON", parameters:list=[], interesting_fields = [""]):
 68 |         super().__init__(func_name, parameters)
 69 |         self.interesting_fields = interesting_fields
 70 |     # def function_map(self):
 71 | 
 72 | class Scan:
 73 |     def __init__(self, root):
 74 |         self.view = None
 75 |         self.root = root
 76 | 
 77 |     def add_view(self, view):
 78 |         self.view = view
 79 | 
 80 | class Filter:
 81 |     def __init__(self, root):
 82 |         self.predicates = []
 83 |         self.root = root
 84 | 
 85 |     def append(self, predicate):
 86 |         self.predicates.append(predicate)
 87 |         predicate.root = self
 88 |         predicate.decompile()
 89 |         return self.root.view(use_view=predicate.view_context)
 90 | 
 91 |     def is_empty(self):
 92 |         return len(self.predicates) == 0
 93 | 
 94 |     def get_view(self):
 95 |         return self.root.scan.view
 96 | 
 97 | class Predicate:
 98 | 
 99 |     def __init__(self, predicate: Callable[[int], bool], evaluated_var={}):
100 |         self.predicate = predicate
101 |         s = uncompyle6.deparse_code2str(self.predicate.__code__, out=open(os.devnull, "w"))
102 |         self.t = ast.parse(s)
103 |         self.evaluated_var = evaluated_var
104 |         self.root = None
105 | 
106 |     def decompile(self):
107 |         assert self.root
108 |         self.attribute, self.operation, self.comparator, self.bool_ops, self.cast_types, self.view_context = decompile_filter(self.t, self.evaluated_var, self.root.get_view())
109 |     def get_compile(self):
110 |         return self.attribute, self.operation, self.comparator, self.bool_ops, self.cast_types
111 | 
112 | class Group:
113 |     def __init__(self, root):
114 |         self.group = None
115 | 
116 | # Context Root Node
117 | class MetadataContext:
118 |     def __init__(self, single_mode = True):
119 |         # Initialize the root, which is itself
120 |         self.root = self
121 |         self.start_time = None
122 |         self.project = Project(self.root)
123 |         self.scan = Scan(self.root)
124 |         self.filter = Filter(self.root)
125 |         self.groupby = None
126 |         self.single_mode = single_mode
127 |         # self.orderby_nodes = [orderby_node1, orderby_node2...] # we dont need these for now
128 | 
129 |     # Select a specific column
130 |     def select_column (self, column_key):
131 |         mapped_view = metadata_view.map_view(column_key)
132 |         if self.scan.view == None:
133 |             self.scan.view = mapped_view
134 |         elif self.scan.view.default and mapped_view.default and self.scan.view.view_name != mapped_view.view_name:
135 |             self.scan.view = metadata_view
136 | 
137 |         view_name = mapped_view.view_name
138 |         column_node = Column(view_name+"."+column_key)
139 |         self.project.append(column_node)
140 |         return column_node
141 | 
142 |     # Remove column in column nodes in question
143 |     def delete_column(self, column_name):
144 |         self.project.remove(column_name)
145 | 
146 |     # Restart a context from scratch
147 |     def clear(self):
148 |         self.project = Project(self.root)
149 |         self.scan = Scan(self.root)
150 |         self.filter = Filter(self.root)
151 | 
152 |     def get_columns(self, *argv, distinct=False):
153 |         if not self.single_mode:
154 |             self.project.distinct = distinct
155 |             for arg in argv:
156 |                 arg(self)
157 |             return self
158 |         else:
159 |             new_context = copy.deepcopy(self)
160 |             new_context.project.distinct = distinct
161 |             for arg in argv:
162 |                 new_context = arg(new_context)
163 |             return new_context
164 | 
165 |     ### The following functions would be Apperception commands
166 |     def predicate (self, p, evaluated_var = {}):
167 |         if not self.single_mode:
168 |             new_predicate = Predicate(p, evaluated_var)
169 |             self.filter.append(new_predicate)
170 |             return self
171 |         else:
172 |             ### make a copy of self first
173 |             new_context = copy.deepcopy(self)
174 | 
175 |             new_predicate = Predicate(p, evaluated_var)
176 |             new_context = new_context.filter.append(new_predicate)
177 |             return new_context
178 | 
179 |     def selectkey(self, distinct = False):
180 |         if not self.single_mode:
181 |             self.project.distinct = distinct
182 |             #self.select_column(MetadataView.camera_id)
183 |             self.select_column(MetadataView.object_id)
184 |             return self
185 |         else:
186 |             ### make a copy of self first
187 |             new_context = copy.deepcopy(self)
188 |             new_context.project.distinct = distinct
189 |    
190 |             # new_context.select_column(MetadataView.camera_id)
191 |             new_context.select_column(MetadataView.object_id)
192 |             return new_context
193 | 
194 |     def get_object_type(self, distinct = False):
195 |         if not self.single_mode:
196 |             self.project.distinct = distinct
197 |             #self.select_column(MetadataView.camera_id)
198 |             self.select_column(MetadataView.object_type)
199 |             return self
200 |         else:
201 |             ### make a copy of self first
202 |             new_context = copy.deepcopy(self)
203 |             new_context.project.distinct = distinct
204 |    
205 |             # new_context.select_column(MetadataView.camera_id)
206 |             new_context.select_column(MetadataView.object_type)
207 |             return new_context
208 |  
209 |     ### TODO: return a proxy type
210 |     def get_trajectory(self, time_interval = [], distinct = False):
211 |         if not self.single_mode:
212 |             self.project.distinct = distinct
213 |             traj_column = self.select_column(MetadataView.trajectory)
214 |             starttime, endtime = convert_time(self.start_time, time_interval)
215 |             traj_column.interval(starttime, endtime)
216 |             traj_column.get_coordinates()
217 |             return self
218 |         else:
219 |             ### make a copy of self first
220 |             new_context = copy.deepcopy(self)
221 |             new_context.project.distinct = distinct
222 |             traj_column = new_context.select_column(MetadataView.trajectory)
223 |             starttime, endtime = convert_time(self.start_time, time_interval)
224 |             traj_column.interval(starttime, endtime)
225 |             traj_column.get_coordinates()
226 |             return new_context
227 | 
228 |     ### TODO: return a proxy type
229 |     def get_geo(self, time_interval=[], distinct = False):
230 |         if not self.single_mode:
231 |             self.project.distinct = distinct
232 |             for geo_func in common_geo:
233 |                 new_trajColumn = self.select_column(MetadataView.location)
234 |                 new_trajColumn.aggregate(geo_func)
235 | 
236 |             self.interval(time_interval)
237 |             return self
238 |         else:
239 |             ### make a copy of self first
240 |             new_context = copy.deepcopy(self)
241 |             new_context.project.distinct = distinct
242 |             for geo_func in common_geo:
243 |                 new_trajColumn = new_context.select_column(MetadataView.location)
244 |                 new_trajColumn.aggregate(geo_func)
245 | 
246 |             
247 |             new_context.interval(time_interval)
248 |             return new_context
249 | 
250 |     ### TODO: return a proxy type
251 |     def interval(self,time_interval):
252 |         start, end = convert_time(self.start_time, time_interval)
253 |         if not self.single_mode:
254 |             self.predicate(lambda obj: Tmin(obj.location) >= start, {"start":"\'"+start+"\'"})
255 |             self.predicate(lambda obj: Tmax(obj.location) < end, {"end":"\'"+end+"\'"})
256 |             return self
257 |         else:
258 |             new_context = self.predicate(lambda obj: Tmin(obj.location) >= start, {"start":"\'"+start+"\'"}).predicate(lambda obj: Tmax(obj.location) < end, {"end":"\'"+end+"\'"})
259 |             return new_context
260 | 
261 | 
262 |     ### TODO: return a proxy type
263 |     def get_time(self, distinct = False):
264 |         if not self.single_mode:
265 |             self.project.distinct = distinct
266 |             new_trajColumn = self.select_column(MetadataView.location)
267 |             new_trajColumn.aggregate("Tmin")
268 |             return self
269 |         else:
270 |             ### make a copy of self first
271 |             new_context = copy.deepcopy(self)
272 |             new_context.project.distinct = distinct
273 |             new_trajColumn = new_context.select_column(MetadataView.location)
274 |             new_trajColumn.aggregate("Tmin")
275 |             return new_context
276 | 
277 |     ### TODO: return a proxy type
278 |     def get_distance(self, time_interval = [], distinct = False):
279 |         if not self.single_mode:
280 |             self.project.distinct = distinct
281 |             traj_column = self.select_column(MetadataView.trajectory)
282 |             starttime, endtime = convert_time(self.start_time, time_interval)
283 |             traj_column.interval(starttime, endtime)
284 |             traj_column.aggregate("cumulativeLength")
285 |             return self
286 |         else:
287 |             ### make a copy of self first
288 |             new_context = copy.deepcopy(self)
289 |             new_context.project.distinct = distinct
290 |             starttime, endtime = convert_time(self.start_time, time_interval)
291 |             traj_column.interval(starttime, endtime)
292 |             traj_column.aggregate("cumulativeLength")
293 |             return new_context
294 | 
295 |     ### TODO: return a proxy type
296 |     def get_speed(self, time_interval = [], distinct = False):
297 |         if not self.single_mode:
298 |             self.project.distinct = distinct
299 |             traj_column = self.select_column(MetadataView.trajectory)
300 |             starttime, endtime = convert_time(self.start_time, time_interval)
301 |             traj_column.interval(starttime, endtime)
302 |             traj_column.aggregate("speed")
303 |             return self
304 |         else:
305 |             ### make a copy of self first
306 |             new_context = copy.deepcopy(self)
307 |             new_context.project.distinct = distinct
308 |             traj_column = new_context.select_column(MetadataView.trajectory)
309 |             starttime, endtime = convert_time(self.start_time, time_interval)
310 |             traj_column.interval(starttime, endtime)
311 |             traj_column.aggregate("speed")
312 |             return new_context
313 |         
314 | 
315 |     def count(self, key):
316 |         ### make a copy of self first
317 |         new_context = copy.deepcopy(self)
318 | 
319 |         count_map = {MetadataContext.get_trajectory:"trajCentroids",
320 |                         MetadataContext.get_time:"Tmin(trajBbox)",
321 |                         MetadataContext.selectkey:"distinct(cameraId, itemId)"}
322 |         traj_column = new_context.select_column(count_map[key])
323 |         traj_column.aggregate(COUNT)
324 |         return new_context
325 | 
326 |     def group(self, key):
327 |         ### make a copy of self first
328 |         new_context = copy.deepcopy(self)
329 |         new_context.groupby = Group(key)
330 | 
331 |     ### TODO:Not fully functioned yet
332 |     def view(self, view_name="", use_view=None):
333 |         if not self.single_mode:
334 |             if use_view:
335 |                 self.scan.add_view(use_view)
336 |             else:
337 |                 temp_view = View(view_name)
338 |                 temp_view.context = self
339 |                 self.scan.add_view(temp_view)
340 |             return self
341 |         else:
342 |             ### make a copy of self first
343 |             new_context = copy.deepcopy(self)
344 |             if use_view:
345 |                 new_context.scan.add_view(use_view)
346 |             else:
347 |                 temp_view = View(view_name)
348 |                 temp_view.context = self
349 |                 new_context.scan.add_view(temp_view)
350 |                 ### need to figure out the return value of the view command;
351 |             return new_context
352 | 
353 |     def join(self, join_view, join_type = "", join_condition=""):
354 |         ### make a copy of self first
355 |         new_context = copy.deepcopy(self)
356 | 
357 |         if join_view.view_name == metadata_view.view_name:
358 |             new_context.scan.join(metadata_view.trajectory_view)
359 |             new_context.scan.join(metadata_view.location_view)
360 |         else:
361 |             new_context.scan.join(join_view)
362 | 
363 |         return new_context
364 | 
365 | primarykey = MetadataContext.selectkey
366 | trajectory = MetadataContext.get_trajectory
367 | distance = MetadataContext.get_distance
368 | speed = MetadataContext.get_speed
369 | geometry = MetadataContext.get_geo
370 | object_type = MetadataContext.get_object_type
371 | time = MetadataContext.get_time


--------------------------------------------------------------------------------
/apperception/metadata_context_executor.py:
--------------------------------------------------------------------------------
  1 | from metadata_context import *
  2 | from metadata_util import *
  3 | import numpy as np
  4 | 
  5 | # Executor class to execute the context input
  6 | # Essentially translates the context to a SQL query that
  7 | # the backend and interpret
  8 | class MetadataContextExecutor:
  9 |     def __init__(self, conn, new_context:MetadataContext=None):
 10 |         if new_context:
 11 |             self.context(new_context)
 12 |         self.conn = conn
 13 |             
 14 |     # Connect to the database
 15 |     def connect_db(self, 
 16 |         host='localhost',
 17 |         user=None,
 18 |         password=None,
 19 |         port=25432,
 20 |         database_name=None):
 21 |         self.conn = psycopg2.connect(
 22 |         database=database_name, user=user, password=password, host=host, port=port
 23 |         )
 24 |   
 25 |     def context(self, new_context:MetadataContext):
 26 |         self.current_context = new_context
 27 |         return self
 28 |     
 29 |     def visit(self, create_view, view_name):
 30 |         select_query = self.visit_project(self.current_context.project)
 31 |         from_query = self.visit_scan(self.current_context.scan)
 32 |         where_query = self.visit_filter(self.current_context.filter)
 33 |         if create_view:
 34 |             db_query = "CREATE VIEW "+view_name+" AS "+select_query + from_query + where_query + ";"
 35 |             print(db_query + "\n")
 36 |             return "SELECT * FROM " + view_name + ";"
 37 |         else:
 38 |             db_query = select_query + from_query + where_query + ";"
 39 |             print(db_query + "\n")
 40 |             return db_query
 41 |         
 42 |     def visit_project(self, project_node:Project):
 43 |         select_query = "SELECT "
 44 |         if project_node.distinct:
 45 |             select_query += "distinct on(itemId) "
 46 |         if project_node.is_empty():
 47 |             return select_query + "* "
 48 |         for column_node in project_node.column_nodes:
 49 |             select_query += self.visit_column(column_node)
 50 |             select_query += ", "
 51 |         select_query = select_query[:-2]
 52 |         return select_query
 53 |     
 54 |     def visit_scan(self, scan_node:Scan):
 55 |         from_query = " From "
 56 |         if scan_node.view:
 57 |             if scan_node.view.default:
 58 |                 if scan_node.view == metadata_view:
 59 |                     from_query += metadata_view.trajectory_view.view_name + " INNER JOIN " + metadata_view.location_view.view_name \
 60 |                         + " USING(itemId) "
 61 |                 else:
 62 |                     from_query = from_query + scan_node.view.view_name + " "
 63 |         # for view_node in scan_node.views:
 64 |         #     from_query += self.visit_table(view_node)
 65 |         #     from_query += ", "
 66 |         # from_query = from_query[:-2]
 67 |         return from_query
 68 |         
 69 |     def visit_filter(self, filter_node:Filter):
 70 |         where_query = " Where "
 71 |         if filter_node.is_empty():
 72 |             return ""
 73 |         for predicate_node in filter_node.predicates:
 74 |             where_query += self.visit_predicate(predicate_node)
 75 |             where_query += " AND "
 76 |         where_query = where_query[:-5]
 77 |         return where_query
 78 |     
 79 |     def visit_column(self, column_node:Column):
 80 |         aggregated = column_node.column_name
 81 |         for aggr_node in column_node.aggr_nodes:
 82 |             aggregated = translate_aggregation(aggr_node, aggregated)
 83 |             print(aggregated)
 84 |         return aggregated
 85 |     
 86 |     def visit_table(self, view_node:View):
 87 |         return view_node.view_name
 88 |         
 89 |     def visit_predicate(self, predicate_node:Predicate):
 90 |         attribute, operation, comparator, bool_ops, cast_types = predicate_node.get_compile()
 91 |         #assert(len(attribute) == len(operation) == len(comparator) == len(bool_ops) == len(cast_types))
 92 |         predicate_query = ""
 93 |         for i in range(len(attribute)):
 94 |             attr = attribute[i]
 95 |             op = operation[i]
 96 |             comp = comparator[i]
 97 |             bool_op = bool_ops[i]
 98 |             #cast_type = cast_types[i]
 99 |             #cast_str = "::" + cast_type if cast_type != "" else ""
100 |             #predicate_query += bool_op + attr + cast_str + op + comp + cast_str
101 |             predicate_query += bool_op + attr + op + comp
102 |         return predicate_query
103 |         
104 |     def execute(self, create_view = False, view_name=""):
105 |         self.cursor = self.conn.cursor()
106 |         self.cursor.execute(self.visit(create_view=create_view, view_name=view_name))
107 |         return np.asarray(self.cursor.fetchall())
108 | 
109 | 
110 | def translate_aggregation(aggr_node, aggregated):
111 |     aggregated = aggr_node.func_name + "(" + aggregated
112 |     for param in aggr_node.parameters:
113 |         aggregated = aggregated + "," + param 
114 |     aggregated +=  ")"
115 |     if aggr_node.func_name in common_aggregation:
116 |         if isinstance(aggr_node, asMFJSON):
117 |             if len(aggr_node.interesting_fields) > 0:
118 |                 interesting_field = aggr_node.interesting_fields[0]
119 |                 aggregated = aggregated + "::json->" + "\'"+interesting_field+"\'"
120 |     
121 |     return aggregated


--------------------------------------------------------------------------------
/apperception/metadata_tests.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from metadata_context import *
  3 | from metadata_context_executor import *
  4 | from metadata import *
  5 | 
  6 | from metadata_util import *
  7 | import json
  8 | 
  9 | test_context = MetadataContext()
 10 |         
 11 | conn = psycopg2.connect(
 12 |         database="mobilitydb", user="docker", password="docker", host="localhost", port=5432)
 13 | 
 14 | test_executor = MetadataContextExecutor(conn)
 15 | # test_executor.connect_db(user="postgres", password="postgres", database_name="postgres")
 16 | # Test simple queries using Context class
 17 | class TestStringMethods(unittest.TestCase):
 18 | 	
 19 | 	def test_commands(self):
 20 | 		test_executor.context(test_context.selectkey())
 21 | 		print(test_executor.execute())
 22 | 		print("------------------------------------")
 23 | 							  
 24 | 		test_executor.context(test_context.get_trajectory())
 25 | 		print(test_executor.execute())
 26 | 		print("------------------------------------")
 27 | 		
 28 | 		test_executor.context(test_context.get_geo().interval('0001-01-01 00:00:00','9999-12-31 23:59:59.999999'))
 29 | 		print(test_executor.execute())
 30 | 		print("------------------------------------")
 31 | 
 32 | 		test_executor.context(test_context.get_geo()) 
 33 | 		print(test_executor.execute())
 34 | 		print("------------------------------------")
 35 | 		
 36 | 		test_executor.context(test_context.get_time())
 37 | 		print(test_executor.execute())
 38 | 		print("------------------------------------")
 39 | 		
 40 | 		test_executor.context(test_context.get_speed())
 41 | 		print(test_executor.execute())
 42 | 		print("------------------------------------")
 43 | 		
 44 | 		test_executor.context(test_context.get_distance())
 45 | 		print(test_executor.execute())
 46 | 		print("------------------------------------")
 47 | 		
 48 | 		test_executor.context(test_context.get_columns(primarykey, geometry, time))
 49 | 		print("###### bboxes and times are:    ", test_executor.execute())
 50 | 		print("------------------------------------")
 51 | 		
 52 | 		# test_executor.context(test_context.count(MetadataContext.selectkey))
 53 | 		# print(test_executor.execute())
 54 | 		# print("------------------------------------")
 55 | 	
 56 | 	def test_usecases(self):
 57 | 		# test_executor.context(test_context.predicate(lambda  obj:obj.object_id == "Item_1").get_geo())
 58 | 		# print(test_executor.execute())
 59 | 		# print("------------------------------------")
 60 | 		
 61 | 		### This query could be confusing since the user may understand it as getting the trajectory of the objects when they are at the intersection
 62 | 		### but the trajectory is actually an attribute, so it's always the entire trajectory
 63 | 		### If the user really wants to get a certain period of trajectory they have to filter out the timestamps
 64 | 		volume = "stbox \'STBOX Z((1.81788543, 2.17411856, 0),(2.79369985, 3.51919659, 2))\'"
 65 | 		filtered_world = test_context.predicate(lambda obj:obj.object_type == "car").predicate(lambda obj:obj.location in volume, {"volume":volume})
 66 | 		trajectory = filtered_world.get_trajectory(distinct=True)
 67 | 		test_executor.context(trajectory)
 68 | 		print(test_executor.execute())
 69 | 		print("------------------------------------")
 70 | 		
 71 | 		## to get the video over the entire trajectory(amber case)
 72 | 		test_executor.context(filtered_world.selectkey(distinct=True))
 73 | 		filtered_ids = test_executor.execute()
 74 | 		print("filtered_IDS are *****:", filtered_ids)
 75 | 
 76 | 		id_array = [filtered_id[0] for filtered_id in filtered_ids]
 77 | 		entire_video = test_context.predicate(lambda obj: obj.object_id in id_array, {"id_array":id_array}).get_columns(primarykey, geometry, time)
 78 | 		test_executor.context(entire_video)
 79 | 		print(test_executor.execute())
 80 | 		print("------------------------------------")
 81 | 		
 82 | 		# test_executor.context(test_context.predicate(lambda obj:obj.color == "red").group(get_time).predicate(lambda obj:count(obj) >= 3).get_time())
 83 | 		# print(test_executor.execute())
 84 | 		# print("------------------------------------")
 85 | 		
 86 | 	# def test_table_join(self):
 87 | 	#     ### Inner Join
 88 | 	#     new_meta_context = test_context.selectkey().get_distance().get_speed().view().join(metadata_view) ### create a temporary view without reference
 89 | 	#     test_executor.context(new_meta_context.predicate(lambda obj:obj.object_type == 'car'))
 90 | 	#     car_newmeta = test_executor.execute()
 91 | 	#     print(car_newmeta)
 92 | 	#     print("------------------------------------")
 93 | 		
 94 | 	#     test_executor.context(new_meta_context.predicate(lambda obj:obj.object_type == 'car').view(view_name="car_view"))
 95 | 	#     car_newmeta_view = test_executor.execute()
 96 | 	#     print(car_newmeta_view) ### this should be the same result as previous execution
 97 | 	#     print("------------------------------------")
 98 | 		
 99 | 	#     ### Query from new view
100 | 	#     test_executor.context(test_context.view(use_view = car_newmeta_view).selectkey().get_trajectory().get_speed())
101 | 	#     print(test_executor.execute())
102 | 	#     print("------------------------------------")
103 | 		
104 | 
105 | 	# def test_mix(self):
106 | 	#     stbox = "stbox \'STBOX Z((1.81788543, 2.17411856, 0),(2.79369985, 3.51919659, 2))\'"
107 | 	#     proposal_context = test_context.get_trajectory().predicate(lambda obj:obj.object_type == 'car').predicate(lambda obj:obj.location in volume, {"volume":stbox})
108 | 	#     test_executor.context(proposal_context)
109 | 	#     print(test_executor.execute())
110 | 	#     print("------------------------------------")
111 | 		
112 | 	#     test_executor.context(test_context.count(key=MetadataContext.selectkey).predicate(lambda obj: obj.color == "red").group(lambda obj: obj.color))
113 | 	#     print(test_executor.execute())
114 | 	#     print("------------------------------------")
115 | 		
116 | 	#     test_executor.context(test_context.get_time().predicate(lambda obj:obj.color == "red" and obj.location in volume and count(obj.id), {"volume":stbox}).group(lambda obj: obj.color))
117 | 	#     print(test_executor.execute())
118 | 	#     print("------------------------------------")
119 | 	# def test_usecases(self):
120 | 	#     TODO: Define use cases here
121 | 
122 | if __name__ == '__main__':
123 | 	unittest.main()
124 | 


--------------------------------------------------------------------------------
/apperception/metadata_util.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import datetime
  3 | from metadata import *
  4 | 
  5 | common_geo = ["Xmin", "Ymin", "Zmin", "Xmax", "Ymax", "Zmax"]
  6 | common_aggregation = ["asMFJSON", common_geo]
  7 |     
  8 | 
  9 | # Map to translate ast comparators to SQL comparators
 10 | comparator_map = {
 11 |     ast.Eq: "=",
 12 |     ast.NotEq: ">=",
 13 |     ast.Lt: "<",
 14 |     ast.LtE: "<=",
 15 |     ast.Gt: ">",
 16 |     ast.GtE: ">="
 17 | }
 18 | 
 19 | # Map to translate ast propositions to SQL propositions
 20 | propositional_map = {
 21 |     ast.And: "AND",
 22 |     ast.Or: "OR"
 23 | }
 24 | 
 25 | 
 26 | def decompile_comparator(comparator, evaluated_var, view):
 27 |     # print(evaluated_var)
 28 |     # print(ast.dump(comparator))
 29 |     result_comparator = ""
 30 |     view_context = view
 31 |     if isinstance(comparator, ast.Call):
 32 |         func_name = comparator.func.id
 33 |         result_comparator = func_name + "("
 34 |         args = comparator.args
 35 |         for arg in args:
 36 |             if isinstance(arg, ast.Attribute):
 37 |                 table_name = arg.value.id
 38 |                 table_attr= arg.attr
 39 |                 view_context, table_name, column_name = resolve_default_view(table_attr, view)
 40 |                 # TODO: else not default
 41 |                 result_comparator += table_name +"." + column_name
 42 |             elif isinstance(arg, ast.Str):
 43 |                 result_comparator += arg.s
 44 |             elif isinstance(arg, ast.Name):
 45 |                 if arg.id in evaluated_var:
 46 |                     result_comparator += evaluated_var[arg.id]
 47 |                 else:
 48 |                     result_comparator += arg.id
 49 |             result_comparator += ","
 50 |         result_comparator = result_comparator[:-1]+")"
 51 |     elif isinstance(comparator, ast.Attribute):
 52 |         table_name = comparator.value.id
 53 |         table_attr= comparator.attr
 54 |         #TODO: if view == None:
 55 |         ### TODO: unresolved, dynamically determine the scan views based on both predicates and select
 56 |         view_context, table_name, column_name = resolve_default_view(table_attr, view)
 57 |         result_comparator = table_name+"."+ column_name
 58 |     elif isinstance(comparator, ast.Str):
 59 |         result_comparator = "\'"+comparator.s+"\'"
 60 |     elif isinstance(comparator, ast.Name):
 61 |         if comparator.id in evaluated_var:
 62 |             evaluated_variable = evaluated_var[comparator.id]
 63 |         else:
 64 |             evaluated_variable = comparator.id
 65 |         result_comparator =  evaluated_variable
 66 |     else:
 67 |         print(comparator)
 68 |         
 69 |     return result_comparator, view_context
 70 | 
 71 | def resolve_default_view(attr_name, view):
 72 |     view_context = view
 73 |     if view == None:
 74 |         column_name = metadata_view.trajectory_view.resolve_key(attr_name)
 75 |         if column_name:
 76 |             view_context = metadata_view.trajectory_view
 77 |         else:
 78 |             column_name = metadata_view.location_view.resolve_key(attr_name)
 79 |             view_context = metadata_view.location_view
 80 |         table_name = view_context.view_name
 81 |     elif view.default:
 82 |         if view.view_name == "metadata_view":
 83 |             column_name = view.resolve_key(attr_name)
 84 |             table_name = view.map_view(column_name).view_name
 85 |         else:
 86 |             column_name = view.resolve_key(attr_name)
 87 |             if not column_name:
 88 |                 view_context = metadata_view
 89 |                 column_name = metadata_view.resolve_key(attr_name)
 90 |                 table_name = metadata_view.map_view(column_name).view_name
 91 |             else:
 92 |                 table_name = view.view_name
 93 |     
 94 |     return view_context, table_name, column_name
 95 | 
 96 | def decompile_filter(ast_tree, evaluated_var, view):
 97 |     print(ast.dump(ast_tree))
 98 |     attributes = []
 99 |     operations = []
100 |     comparators = []
101 |     bool_ops = [""]
102 |     cast_types = []
103 |     result_view = view
104 |     for ast_node in ast.walk(ast_tree):
105 |         module_body = ast_node.body[0]
106 |         if isinstance(module_body, ast.Return):
107 |             value = module_body.value
108 |             # if isinstance(value, ast.BoolOp)
109 |             # case where we allow multiple constraints in a single filter, usually for OR
110 |             if isinstance(value, ast.Compare):
111 |                 left = value.left
112 |                 attribute, left_comebine_view = decompile_comparator(left, evaluated_var, view)
113 |                 right = value.comparators[0]
114 |                 comparator, right_combine_view = decompile_comparator(right, evaluated_var, view)
115 |                 
116 |                 op = value.ops[0]
117 |                 if type(op) in comparator_map:
118 |                     operation = comparator_map[type(op)]
119 |                 elif type(op) == ast.In:
120 |                     if isinstance(comparator, list):
121 |                         operation = " IN "
122 |                     elif isinstance(comparator, str):
123 |                         operation = "overlap"
124 |                 
125 |                 
126 |                 if operation == "overlap":
127 |                     attribute = "overlap(%s, %s)"%(attribute, comparator)
128 |                     operation = "="
129 |                     comparator = "true"
130 |                 elif operation == " IN ":
131 |                     comparator = list_to_str(comparator)
132 |                 
133 |                 attributes.append(attribute)
134 |                 operations.append(operation)
135 |                 comparators.append(comparator)
136 |                 
137 |         return attributes, operations, comparators, bool_ops, cast_types, left_comebine_view or right_combine_view
138 | 
139 | def list_to_str(lst):
140 |     result = "("
141 |     for s in lst:
142 |         result = result + "\'" + s + "\'" + ","
143 |     result = result[:-1] + ")"
144 |     return result
145 | 
146 | def convert_time(start, interval=[]):
147 |     if len(interval) == 0:
148 |         starttime = str(datetime.datetime.min)
149 |         endtime = str(datetime.datetime.max)
150 |     else:
151 |         starttime = str(start + datetime.timedelta(seconds=interval[0]))
152 |         endtime = str(start + datetime.timedelta(seconds=interval[1]))
153 |     return starttime, endtime
154 | 
155 | 
156 | 
157 | # Translate the overlap function to psql overlap function
158 | def overlap(stbox1, stbox2):
159 |   return "Overlap(%s, %s)"%(stbox1, stbox2)
160 | 
161 | 
162 | # Translate the Tmin function to psql Tmin function
163 | def Tmin(stbox):
164 |     return "Tmin"
165 | 
166 | # Translate the Tmax function to psql Tmax function
167 | def Tmax(stbox):
168 |     return "Tmax"
169 | 
170 | # SQL Count
171 | def COUNT(key):
172 |     return "COUNT(%s)"%key
173 | 
174 | 


--------------------------------------------------------------------------------
/apperception/mono_depth_estimator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import numpy as np
 5 | import PIL.Image as pil
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | import torch
 9 | from torchvision import transforms
10 | 
11 | import monodepth2.networks
12 | from layers import disp_to_depth
13 | from monodepth2.utils import download_model_if_doesnt_exist
14 | 
15 | # Create depth frames for each frame from a video.
16 | def create_depth_frames(video_byte_array, model_name="mono+stereo_640x192"):
17 | 	"""Function to predict for a video.
18 | 	"""
19 | 	assert model_name is not None, \
20 | 		"You must specify the --model_name parameter; see README.md for an example" 
21 | 
22 | 	if torch.cuda.is_available() and not args.no_cuda:
23 | 		device = torch.device("cuda")
24 | 	else:
25 | 		device = torch.device("cpu")   
26 | 
27 | 	download_model_if_doesnt_exist(model_name)
28 | 	model_path = os.path.join("models", model_name)
29 | 	print("-> Loading model from ", model_path)
30 | 	encoder_path = os.path.join(model_path, "encoder.pth")
31 | 	depth_decoder_path = os.path.join(model_path, "depth.pth")
32 | 
33 | 	# LOADING PRETRAINED MODEL
34 | 	print("   Loading pretrained encoder")
35 | 	encoder = monodepth2.networks.ResnetEncoder(18, False)
36 | 	loaded_dict_enc = torch.load(encoder_path, map_location=device)
37 | 
38 | 	# extract the height and width of image that this model was trained with
39 | 	feed_height = loaded_dict_enc['height']
40 | 	feed_width = loaded_dict_enc['width']
41 | 	filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
42 | 	encoder.load_state_dict(filtered_dict_enc)
43 | 	encoder.to(device)
44 | 	encoder.eval()
45 | 
46 | 	print("   Loading pretrained decoder")
47 | 	depth_decoder = monodepth2.networks.DepthDecoder(
48 | 		num_ch_enc=encoder.num_ch_enc, scales=range(4))
49 | 
50 | 	loaded_dict = torch.load(depth_decoder_path, map_location=device)
51 | 	depth_decoder.load_state_dict(loaded_dict)
52 | 
53 | 	depth_decoder.to(device)
54 | 	depth_decoder.eval()
55 | 
56 | 	num_frames, original_height, original_width, _ = video_byte_array.shape
57 | 	disp_map = np.zeros((num_frames, original_height, original_width))
58 | 
59 | 	# Go through each frame and predict the depth map
60 | 	for i in range(num_frames):
61 | 		input_image = pil.fromarray(np.uint8(video_byte_array[i])).convert('RGB')
62 | 		input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
63 | 		input_image = transforms.ToTensor()(input_image).unsqueeze(0)
64 | 
65 | 		# PREDICTION
66 | 		input_image = input_image.to(device)
67 | 		features = encoder(input_image)
68 | 		outputs = depth_decoder(features)
69 | 
70 | 		disp = outputs[("disp", 0)]
71 | 		disp_resized = torch.nn.functional.interpolate(
72 | 			disp, (original_height, original_width), mode="bilinear", align_corners=False)
73 | 
74 | 		# Saving numpy file
75 | 		# Save the resized disp instead
76 | 		scaled_disp, _ = disp_to_depth(disp_resized.squeeze(), 0.1, 100)
77 | 		disp_map[i] = scaled_disp.cpu().detach().numpy()
78 | 	return disp_map


--------------------------------------------------------------------------------
/apperception/object_tracker.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | # comment out below line to enable tensorflow logging outputs
  3 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
  4 | import sys
  5 | sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../yolov4-deepsort"))
  6 | import time
  7 | import tensorflow as tf
  8 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
  9 | if len(physical_devices) > 0:
 10 | 	tf.config.experimental.set_memory_growth(physical_devices[0], True)
 11 | # from absl import app, flags, logging
 12 | # from absl.flags import FLAGS
 13 | import core.utils as utils
 14 | from core.yolov4 import filter_boxes
 15 | from tensorflow.python.saved_model import tag_constants
 16 | from core.config import cfg
 17 | from PIL import Image
 18 | import cv2
 19 | import numpy as np
 20 | import matplotlib.pyplot as plt
 21 | from tensorflow.compat.v1 import ConfigProto
 22 | from tensorflow.compat.v1 import InteractiveSession
 23 | # deep sort imports
 24 | from deep_sort import preprocessing, nn_matching
 25 | from deep_sort.detection import Detection
 26 | from deep_sort.tracker import Tracker
 27 | from tools import generate_detections as gdet
 28 | 
 29 | from collections import namedtuple
 30 | FLAGS = namedtuple('Flags', ['framework', 'weights', 'size', 'tiny', 
 31 |                      'model', 'iou', 'score', 'dont_show', 'info', 'count'])\
 32 |           (framework='tf', 
 33 |            weights=os.path.join(os.path.dirname(os.path.realpath(__file__)),'../yolov4-deepsort/checkpoints/yolov4-416'), 
 34 |            size=416, tiny=True, model='yolov4',
 35 |            iou=0.45, score=0.50, dont_show=True, info=False, count=False)
 36 | 
 37 | # flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
 38 | # flags.DEFINE_string('weights', './checkpoints/yolov4-416',
 39 | #                     'path to weights file')
 40 | # flags.DEFINE_integer('size', 416, 'resize images to')
 41 | # flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
 42 | # flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
 43 | # flags.DEFINE_float('iou', 0.45, 'iou threshold')
 44 | # flags.DEFINE_float('score', 0.50, 'score threshold')
 45 | # flags.DEFINE_boolean('dont_show', False, 'dont show video output')
 46 | # flags.DEFINE_boolean('info', False, 'show detailed info of tracked objects')
 47 | # flags.DEFINE_boolean('count', False, 'count objects being tracked on screen')
 48 | 
 49 | def yolov4_deepsort_video_track(video_file):
 50 | 	# Definition of the parameters
 51 | 	max_cosine_distance = 0.4
 52 | 	nn_budget = None
 53 | 	nms_max_overlap = 1.0
 54 | 	
 55 | 	# initialize deep sort
 56 | 	
 57 | 	model_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)),
 58 |                                	  '../yolov4-deepsort/model_data/mars-small128.pb')
 59 | 	encoder = gdet.create_box_encoder(model_filename, batch_size=1)
 60 | 	# calculate cosine distance metric
 61 | 	metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
 62 | 	# initialize tracker
 63 | 	tracker = Tracker(metric)
 64 | 
 65 | 	# load configuration for object detector
 66 | 	config = ConfigProto()
 67 | 	config.gpu_options.allow_growth = True
 68 | 	session = InteractiveSession(config=config)
 69 | 	STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
 70 | 	input_size = 416
 71 | 
 72 | 	# load standard tensorflow saved model
 73 | 	saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
 74 | 	infer = saved_model_loaded.signatures['serving_default']
 75 | 
 76 | 	formatted_result = {}
 77 | 	cap = cv2.VideoCapture(video_file)
 78 | 	frame_num = 0
 79 | 	# while video is running
 80 | 	while(cap.isOpened()):
 81 | 		# Capture frame-by-frame
 82 | 		ret, frame = cap.read()
 83 | 		if ret == True:
 84 | 			image = Image.fromarray(frame)
 85 | 			frame_num +=1
 86 | 			# print('Frame #: ', frame_num)
 87 | 			frame_size = frame.shape[:2]
 88 | 			image_data = cv2.resize(frame, (input_size, input_size))
 89 | 			image_data = image_data / 255.
 90 | 			image_data = image_data[np.newaxis, ...].astype(np.float32)
 91 | 			start_time = time.time()
 92 | 
 93 | 			
 94 | 			batch_data = tf.constant(image_data)
 95 | 			pred_bbox = infer(batch_data)
 96 | 			for key, value in pred_bbox.items():
 97 | 				boxes = value[:, :, 0:4]
 98 | 				pred_conf = value[:, :, 4:]
 99 | 
100 | 			boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
101 | 				boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
102 | 				scores=tf.reshape(
103 | 					pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
104 | 				max_output_size_per_class=50,
105 | 				max_total_size=50,
106 | 				iou_threshold=FLAGS.iou,
107 | 				score_threshold=FLAGS.score
108 | 			)
109 | 
110 | 			# convert data to numpy arrays and slice out unused elements
111 | 			num_objects = valid_detections.numpy()[0]
112 | 			bboxes = boxes.numpy()[0]
113 | 			bboxes = bboxes[0:int(num_objects)]
114 | 			scores = scores.numpy()[0]
115 | 			scores = scores[0:int(num_objects)]
116 | 			classes = classes.numpy()[0]
117 | 			classes = classes[0:int(num_objects)]
118 | 
119 | 			# format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
120 | 			original_h, original_w, _ = frame.shape
121 | 			bboxes = utils.format_boxes(bboxes, original_h, original_w)
122 | 
123 | 			# store all predictions in one parameter for simplicity when calling functions
124 | 			pred_bbox = [bboxes, scores, classes, num_objects]
125 | 
126 | 			# read in all class names from config
127 | 			class_names = utils.read_class_names(cfg.YOLO.CLASSES)
128 | 
129 | 			# by default allow all classes in .names file
130 | 			allowed_classes = list(class_names.values())
131 | 			
132 | 			# custom allowed classes (uncomment line below to customize tracker for only people)
133 | 			#allowed_classes = ['person']
134 | 
135 | 			# loop through objects and use class index to get class name, allow only classes in allowed_classes list
136 | 			names = []
137 | 			deleted_indx = []
138 | 			for i in range(num_objects):
139 | 				class_indx = int(classes[i])
140 | 				class_name = class_names[class_indx]
141 | 				if class_name not in allowed_classes:
142 | 					deleted_indx.append(i)
143 | 				else:
144 | 					names.append(class_name)
145 | 			names = np.array(names)
146 | 			if FLAGS.count:
147 | 				cv2.putText(frame, "Objects being tracked: {}".format(len(names)), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
148 | 				print("Objects being tracked: {}".format(len(names)))
149 | 			# delete detections that are not in allowed_classes
150 | 			bboxes = np.delete(bboxes, deleted_indx, axis=0)
151 | 			scores = np.delete(scores, deleted_indx, axis=0)
152 | 
153 | 			# encode yolo detections and feed to tracker
154 | 			features = encoder(frame, bboxes)
155 | 			detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]
156 | 
157 | 			#initialize color map
158 | 			cmap = plt.get_cmap('tab20b')
159 | 			colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
160 | 
161 | 			# run non-maxima supression
162 | 			boxs = np.array([d.tlwh for d in detections])
163 | 			scores = np.array([d.confidence for d in detections])
164 | 			classes = np.array([d.class_name for d in detections])
165 | 			indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
166 | 			detections = [detections[i] for i in indices]       
167 | 
168 | 			# Call the tracker
169 | 			tracker.predict()
170 | 			tracker.update(detections)
171 | 
172 | 			# update tracks
173 | 			# current_bboxes = []
174 | 			# current_labels = []
175 | 			
176 | 			for track in tracker.tracks:
177 | 				if not track.is_confirmed() or track.time_since_update > 1:
178 | 					continue 
179 | 				bbox = track.to_tlbr()
180 | 				class_name = track.get_class()
181 | 				# current_bboxes.append([[int(bbox[0]), int(bbox[1])], [int(bbox[2]), int(bbox[3])]])
182 | 				# current_labels.append(class_name)
183 | 				item_id = class_name+"-"+str(track.track_id)
184 | 				if item_id in formatted_result:
185 | 					formatted_result[item_id]["bboxes"].append([[int(bbox[0]), int(bbox[1])], [int(bbox[2]), int(bbox[3])]])
186 | 					formatted_result[item_id]["tracked_cnt"].append(frame_num)
187 | 				else:
188 | 					formatted_result[item_id]={"object_type": class_name,
189 | 											"bboxes":[[[int(bbox[0]), int(bbox[1])], [int(bbox[2]), int(bbox[3])]]],
190 | 											"tracked_cnt":[frame_num]}
191 | 		else:
192 | 			break
193 | 	print("# of tracked items:", len(formatted_result))
194 | 	return formatted_result
195 | 
196 | 


--------------------------------------------------------------------------------
/apperception/point.py:
--------------------------------------------------------------------------------
 1 | class Point:
 2 | 
 3 | 	def __init__(self, point_id: str, object_id: str, x: float, 
 4 | 		y: float, z: float, time: float, point_type: str):
 5 | 		''' 
 6 | 		Initializes an Point given coordinates, time, type and associated point ID
 7 | 		and object ID. 
 8 | 		''' 
 9 | 		self.point_id = point_id
10 | 		self.object_id = object_id
11 | 		self.coordinate = (x, y, z)
12 | 		self.time = time
13 | 		self.point_type = point_type
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/apperception/tracker.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import sys
  3 | import numpy as np
  4 | 
  5 | class Tracker:
  6 | 	def __init__(self, tracker_type = "default", customized_tracker = None):
  7 | 		"""
  8 | 		Constructs a Tracker.
  9 | 		Args:
 10 | 			tracker_type: indicator of whether using customized tracker
 11 | 			customized_tracker: user specified tracker algorithm
 12 | 		"""
 13 | 		self.tracker_type = tracker_type
 14 | 		self.customized_tracker = customized_tracker
 15 | 
 16 | 	def video_track(self, video_data, bboxes, first_frame):
 17 | 		self.video_data = video_data
 18 | 		if self.tracker_type == "default":
 19 | 			self.tracker = SingleObjectTracker()
 20 | 			return self.tracker.video_track(video_data, bboxes[0], first_frame)
 21 | 		elif self.tracker_type == "multi":
 22 | 			self.tracker = MultiObjectsTracker()
 23 | 			print("boxes at tracker", bboxes)
 24 | 			return self.tracker.video_track(video_data, bboxes, first_frame)
 25 | 		else:
 26 | 			self.tracker = self.customized_tracker()
 27 | 			return self.tracker.video_track(video_data, bboxes)
 28 | 
 29 | 	def __iter__(self):
 30 | 		return iter(self.tracker)
 31 | 
 32 | 	def __next__(self):
 33 | 		return next(self.tracker)
 34 | 
 35 | class SingleObjectTracker(Tracker):
 36 | 	"""
 37 | 	OpenCV Single Object Tracker
 38 | 	https://www.pyimagesearch.com/2018/07/30/opencv-object-tracking/
 39 | 	"""
 40 | 	def __init__(self, tracker_type="CSRT"):
 41 | 		"""
 42 | 		Constructs a Tracker.
 43 | 		Args:
 44 | 			tracker_type: type of the opencv tracker, default to be "CSRT"
 45 | 		"""
 46 | 		self.tracker = cv2.TrackerCSRT_create()
 47 | 	
 48 | 	def video_track(self, video_data, bbox, first_frame):
 49 | 		self.video_data = video_data
 50 | 		if(self.tracker.init(first_frame, bbox)):
 51 | 			return iter(self)
 52 | 		else:
 53 | 			return None			
 54 | 
 55 | 	def __iter__(self):
 56 | 		self.video_iter = iter(self.video_data)
 57 | 		self.framect = 0
 58 | 		return self
 59 | 
 60 | 	def __next__(self):
 61 | 		frame = next(self.video_iter)
 62 | 		self.framect += 1
 63 | 		ok, bbox = self.tracker.update(frame)
 64 | 		if ok:
 65 | 			p1 = [int(bbox[0]), int(bbox[1])]
 66 | 			p2 = [int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])]
 67 | 			cv2.rectangle(frame, p1, p2, (255,255,255), 2, 2)
 68 | 		else:
 69 | 			# Tracking failure
 70 | 			cv2.putText(frame, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
 71 | 		
 72 | 		# Return the new bounding box and frameidx
 73 | 		return frame, [[p1,p2]], self.framect
 74 | 
 75 | class MultiObjectsTracker(Tracker):
 76 | 	"""
 77 | 	OpenCV Multi Object Tracker
 78 | 	https://www.pyimagesearch.com/2018/08/06/tracking-multiple-objects-with-opencv/	"""
 79 | 	def __init__(self, tracker_type="Multi"):
 80 | 		"""
 81 | 		Constructs a Tracker.
 82 | 		Args:
 83 | 			tracker_type: type of the opencv tracker, default to be "CSRT"
 84 | 		"""
 85 | 		self.trackers = []
 86 | 	
 87 | 	def video_track(self, video_data, bboxes, first_frame):
 88 | 		# print(bboxes)
 89 | 		self.video_data = video_data
 90 | 		for bbox in bboxes:
 91 | 			tracker = cv2.TrackerCSRT_create()
 92 | 			tracker.init(first_frame, bbox)
 93 | 			self.trackers.append(tracker)
 94 | 		return iter(self)			
 95 | 
 96 | 	def __iter__(self):
 97 | 		self.video_iter = iter(self.video_data)
 98 | 		self.framect = 0
 99 | 		return self
100 | 
101 | 	def __next__(self):
102 | 		frame = next(self.video_iter)
103 | 		self.framect += 1
104 | 		tracker_boxes = np.zeros((len(self.trackers), 2, 2))
105 | 		for i in range(len(self.trackers)):
106 | 			current_tracker = self.trackers[i]
107 | 			ok, bbox = current_tracker.update(frame)
108 | 			if ok:
109 | 				p1 = [int(bbox[0]), int(bbox[1])]
110 | 				p2 = [int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])]
111 | 				tracker_boxes[i] = np.array([p1, p2])
112 | 				# tracker_boxes.append([p1,p2])
113 | 				cv2.rectangle(frame, tuple(p1), tuple(p2), (255,255,255), 2, 2)
114 | 			else:
115 | 				# Tracking failure
116 | 				cv2.putText(frame, "Tracking failure detected, Tracker %d" % i, (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
117 | 		
118 | 		return frame, tracker_boxes, self.framect
119 | 


--------------------------------------------------------------------------------
/apperception/video_context.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import inspect
  3 | import os
  4 | from typing import Callable
  5 | import uncompyle6
  6 | import psycopg2
  7 | from video_util import *
  8 | import datetime 
  9 | 
 10 | 
 11 | # Camera node
 12 | class Camera:
 13 |     def __init__(self, cam_id, point, ratio, video_file, metadata_id, lens):
 14 |         self.cam_id = cam_id 
 15 |         self.ratio = ratio
 16 |         self.video_file = video_file
 17 |         self.metadata_id = metadata_id
 18 |         self.properties = []
 19 | 
 20 |         # Contain objects that still have yet to be added to the backend
 21 |         # If user calls recognize, those items will have already been 
 22 |         # stored in the backend. These are reserved for objects that users 
 23 |         # have not added to the camera.
 24 |         self.items = [] 
 25 |         self.object_recognition = None
 26 |         self.point = point
 27 |         self.lens = lens
 28 | 
 29 |     def add_item(self, item):
 30 |         # Add item
 31 |         self.items.append(item)
 32 | 
 33 |     def add_property(self, properties, property_type, new_prop):
 34 |         # Add property
 35 |         self.properties[property_type].append(new_prop)
 36 | 
 37 |     def add_lens(self, lens):
 38 |         # Add lens
 39 |         self.lens = lens
 40 | 
 41 |     # Add a default add_recog_obj = True
 42 |     def recognize(self, algo = 'Yolo', tracker_type = 'multi', tracker = None):
 43 |         # Create object recognition node
 44 |         object_rec_node = ObjectRecognition(algo, tracker_type, tracker=None)
 45 |         self.object_recognition = object_rec_node
 46 |         return object_rec_node
 47 | 
 48 | # Item node
 49 | class Item:
 50 |     def __init__(self, item_id, item_type, location):
 51 |         self.item_id = item_id
 52 |         self.item_type = item_type
 53 |         self.location = location
 54 |         self.properties = {}
 55 | 
 56 | 
 57 | # Object Recognition node
 58 | class ObjectRecognition:
 59 |     def __init__(self, algo, tracker_type, tracker = None):
 60 |         self.algo = algo
 61 |         self.tracker_type = tracker_type
 62 |         self.tracker = tracker
 63 |         # bounding boxes from object recognition
 64 |         self.bboxes = []
 65 |         self.labels = None
 66 |         self.tracked_cnt = None
 67 | 
 68 |     def add_properties(self, properties):
 69 |         self.properties = properties
 70 | 
 71 | class VideoContext:
 72 |     def __init__(self, name, units):
 73 |         self.root = self
 74 |         self.name = name
 75 |         self.units = units
 76 |         self.camera_nodes = {}
 77 |         self.start_time = datetime.datetime(2021, 6, 8, 7, 10, 28)
 78 | 
 79 |     # Connect to the database
 80 |     def connect_db(self, host='localhost', 
 81 |                         user=None,
 82 |                         password=None,
 83 |                         port=5432,
 84 |                         database_name=None):
 85 |         self.conn = psycopg2.connect(database=database_name, user=user, 
 86 |             password=password, host=host, port=port)
 87 |         
 88 |     def get_name(self):
 89 |         return self.name
 90 |     
 91 |     def get_units(self):
 92 |         return self.units
 93 | 
 94 |     # Establish camera
 95 |     def camera(self, cam_id, point, ratio, video_file, metadata_id, lens):
 96 |         camera_node = self.__get_camera(cam_id)
 97 |         if not camera_node:
 98 |             camera_node = Camera(cam_id, point, ratio, video_file, metadata_id, lens)
 99 |             self.__add_camera(cam_id, camera_node)
100 |         return camera_node
101 | 
102 |     def properties(self, cam_id, properties, property_type):
103 |         camera_node = self.__get_camera(cam_id)
104 |         if not camera_node:
105 |             return None
106 |      
107 |         camera_node.add_properties(properties, property_type)
108 |        # Display error 
109 | 
110 |     def get_camera(self, cam_id):
111 |         return self.__get_camera(cam_id)
112 |     
113 |     # Get camera
114 |     def __get_camera(self, cam_id):
115 |         if cam_id in self.camera_nodes.keys():
116 |           return self.camera_nodes[cam_id]
117 |         return None
118 | 
119 |     # Add camera
120 |     def __add_camera(self, cam_id, camera_node):
121 |         self.camera_nodes[cam_id] = camera_node
122 | 
123 |     # Remove camera
124 |     def remove_camera(self, cam_id):
125 |         camera_node = self.__get_camera(cam_id)
126 |         self.camera_nodes.remove(camera_node)
127 | 
128 |     # Clear
129 |     def clear(self):
130 |         self.camera_nodes = []
131 | 
132 |     
133 | 
134 | 


--------------------------------------------------------------------------------
/apperception/video_context_executor.py:
--------------------------------------------------------------------------------
 1 | from video_context import *
 2 | from video_util import *
 3 | 
 4 | import json
 5 | 
 6 | # TODO: Add checks for Nones 
 7 | class VideoContextExecutor:
 8 |     def __init__(self, conn, new_video_context:VideoContext=None, tasm=None):
 9 |         if new_video_context:
10 |             self.context(new_video_context)
11 |         self.conn = conn
12 |         self.tasm = tasm
13 | 
14 |     def context(self, video_context:VideoContext):
15 |         self.current_context = video_context
16 |         return self
17 | 
18 |     def visit(self):
19 |         video_query = self.visit_world()
20 |         return video_query
21 | 
22 |     def visit_world(self):
23 |         # Query to store world in database 
24 |         name, units = self.current_context.name, self.current_context.units
25 |         world_sql = create_or_insert_world_table(self.conn, name, units)
26 | 
27 |         all_sqls = []
28 |         cameras = self.current_context.camera_nodes
29 |         if len(cameras) != 0:
30 |             for c in cameras.values():
31 |                 camera_sql = self.visit_camera(c)
32 |                 all_sqls.append(camera_sql)
33 |         return all_sqls
34 | 
35 |     def visit_camera(self, camera_node):
36 |         world_name = self.current_context.name
37 |         camera_sql = create_or_insert_camera_table(self.conn, world_name, camera_node)
38 |         if camera_node.object_recognition is not None:
39 |             self.visit_obj_rec(camera_node, camera_node.object_recognition)
40 |         if self.tasm:
41 |             video_data_to_tasm(camera_node.video_file, camera_node.metadata_id, self.tasm)
42 |         return camera_sql
43 | 
44 |     def visit_obj_rec(self, camera_node, object_rec_node):
45 |         cam_id = camera_node.cam_id
46 |         lens = camera_node.lens
47 |         video_file = camera_node.video_file
48 | 
49 |         start_time = self.current_context.start_time
50 | 
51 |         tracker = object_rec_node.tracker
52 |         tracker_type = object_rec_node.tracker_type
53 |         algo = object_rec_node.algo
54 | 
55 |         
56 |         tracking_results = recognize(video_file, algo, tracker_type, tracker)
57 |         add_recognized_objs(self.conn, lens, tracking_results, start_time)
58 |         if self.tasm:
59 |             metadata_to_tasm(tracking_results, camera_node.metadata_id, self.tasm)
60 |         
61 |     def execute(self):
62 |         query = self.visit()
63 | 
64 | 


--------------------------------------------------------------------------------
/apperception/video_util.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import psycopg2
  3 | import numpy as np
  4 | import datetime 
  5 | import cv2
  6 | from object_tracker import yolov4_deepsort_video_track
  7 | 
  8 | 
  9 | def video_data_to_tasm(video_file, metadata_id, t):
 10 | 	t.store(video_file, metadata_id)
 11 | 
 12 | def metadata_to_tasm(formatted_result, metadata_id, t):
 13 | 	import tasm
 14 | 	metadata_info = []
 15 | 	bound_width = lambda x : min(max(0, x), 3840)
 16 | 	bound_height = lambda y: min(max(0, y), 2160)
 17 | 	for obj, info in formatted_result.items():
 18 | 		object_type = info['object_type']
 19 | 		for bbox, frame in zip(info['bboxes'], info['tracked_cnt']):
 20 | 			x1 = bound_width(bbox[0][0])
 21 | 			y1 = bound_height(bbox[0][1])
 22 | 			x2 = bound_width(bbox[1][0])
 23 | 			y2 = bound_height(bbox[1][1])
 24 | 			if frame < 0 or x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
 25 | 				import pdb; pdb.set_trace()
 26 | 			metadata_info.append(tasm.MetadataInfo(metadata_id, object_type, frame, x1, y1, x2, y2))
 27 | 			metadata_info.append(tasm.MetadataInfo(metadata_id, obj, frame, x1, y1, x2, y2))
 28 | 
 29 | 	t.add_bulk_metadata(metadata_info)
 30 | 
 31 | def convert_datetime_to_frame_num(start_time, date_times):
 32 | 	
 33 | 	return [(t.replace(tzinfo = None) - start_time).total_seconds() for t in date_times] 
 34 | 
 35 | def get_video_roi(file_name, cam_video_file, rois, times):
 36 | 	"""
 37 | 	Get the region of interest from the video, based on bounding box points in
 38 | 	video coordinates.
 39 | 	
 40 | 	Args:
 41 | 		file_name: String of file name to save video as
 42 | 		rois: A list of bounding boxes
 43 | 		time_intervals: A list of time intervals of which frames
 44 | 	"""
 45 | 
 46 | 	rois = np.array(rois).T
 47 | 	print(rois.shape)
 48 | 	len_x, len_y = np.max(rois.T[2] - rois.T[0]), np.max(rois.T[3] - rois.T[1])
 49 | 	# len_x, len_y  = np.max(rois.T[0][1] - rois.T[0][0]), np.max(rois.T[1][1] - rois.T[1][0])
 50 | 
 51 | 	len_x = int(round(len_x))
 52 | 	len_y = int(round(len_y))
 53 | 	# print(len_x)
 54 | 	# print(len_y)
 55 | 	vid_writer = cv2.VideoWriter(file_name, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, (len_x, len_y))
 56 | 	# print("rois")
 57 | 	# print(rois)
 58 | 	start_time = int(times[0])
 59 | 	cap = cv2.VideoCapture(cam_video_file)
 60 | 	frame_cnt = 0
 61 | 	while(cap.isOpened()):
 62 | 		# Capture frame-by-frame
 63 | 		ret, frame = cap.read()
 64 | 		if frame_cnt in times and ret:
 65 | 			i = frame_cnt - start_time
 66 | 			if i >= len(rois):
 67 | 				print("incorrect length:", len(rois))
 68 | 				break
 69 | 			current_roi = rois[i]
 70 | 
 71 | 			b_x, b_y, e_x, e_y = current_roi
 72 | 			b_x, b_y = max(0, b_x), max(0, b_y)
 73 | 			# e_x, e_y = current_roi[1]
 74 | 			e_x, e_y = max(0, e_x), max(0, e_y)
 75 | 			diff_y, diff_x =int(abs(e_y - b_y)), int(abs(e_x - b_x))
 76 | 			pad_y = int((len_y - diff_y) // 2)
 77 | 			pad_x = int((len_x - diff_x) // 2)
 78 | 
 79 | 			# print("padding")
 80 | 			# print(pad_y)
 81 | 			# print(pad_x)
 82 | 			roi_byte = frame[int(b_y):int(e_y), int(b_x): int(e_x), :]
 83 | 			
 84 | 
 85 | 			roi_byte = np.pad(roi_byte, pad_width = [(pad_y, len_y - diff_y - pad_y), (pad_x, len_x - diff_x - pad_x), (0, 0)])
 86 | 			frame = cv2.cvtColor(roi_byte, cv2.COLOR_RGB2BGR)
 87 | 
 88 | 			
 89 | 			vid_writer.write(roi_byte)
 90 | 		frame_cnt += 1
 91 | 		if not ret:
 92 | 			break
 93 | 
 94 | 	vid_writer.release()
 95 | 
 96 | def create_or_insert_world_table(conn, name, units):
 97 | 	#Creating a cursor object using the cursor() method
 98 | 	cursor = conn.cursor()
 99 | 	'''
100 | 	Create and Populate A world table with the given world object.
101 | 	'''
102 | 	#Doping Worlds table if already exists. TODO: For testing purpose only
103 | 	cursor.execute("DROP TABLE IF EXISTS Worlds;")
104 | 	#Creating table with the first world
105 | 	sql = '''CREATE TABLE IF NOT EXISTS Worlds(
106 | 	worldId TEXT PRIMARY KEY,
107 | 	units TEXT
108 | 	);'''
109 | 	cursor.execute(sql)
110 | 	print("Worlds Table created successfully........")
111 | 	insert_world(conn, name, units)
112 | 	return sql
113 | 
114 | # Helper function to insert the world
115 | def insert_world(conn, name, units):
116 | 	#Creating a cursor object using the cursor() method
117 | 	cursor = conn.cursor()
118 | 	cursor.execute('''INSERT INTO Worlds (worldId, units) '''+ \
119 | 			'''VALUES (\'%s\',  \'%s\');''' \
120 | 			%(name, units))
121 | 	print("New world inserted successfully........")
122 | 	#Insert the existing cameras of the current world into the camera table
123 | 	conn.commit()
124 | 
125 | # Create a camera table
126 | def create_or_insert_camera_table(conn, world_name, camera):
127 | 	#Creating a cursor object using the cursor() method
128 | 	cursor = conn.cursor()
129 | 	'''
130 | 	Create and Populate A camera table with the given camera object.
131 | 	'''
132 | 	#Doping Cameras table if already exists. TODO: For testing purpose only
133 | 	cursor.execute("DROP TABLE IF EXISTS Cameras")
134 | 	#Creating table with the first camera
135 | 	sql = '''CREATE TABLE IF NOT EXISTS Cameras(
136 | 	cameraId TEXT,
137 | 	worldId TEXT,
138 | 	ratio real,
139 | 	origin geometry,
140 | 	focalpoints geometry,
141 | 	fov INTEGER,
142 | 	skev_factor real
143 | 	);'''
144 | 	cursor.execute(sql)
145 | 	print("Camera Table created successfully........")
146 | 	insert_camera(conn, world_name, camera)
147 | 	return sql
148 | 
149 | # Helper function to insert the camera
150 | def insert_camera(conn, world_name, camera_node):
151 | 	#Creating a cursor object using the cursor() method
152 | 	cursor = conn.cursor()
153 | 	lens = camera_node.lens
154 | 	focal_x = str(lens.focal_x)
155 | 	focal_y = str(lens.focal_y)
156 | 	cam_x, cam_y, cam_z = str(lens.cam_origin[0]), str(lens.cam_origin[1]), str(lens.cam_origin[2])
157 | 	cursor.execute('''INSERT INTO Cameras (cameraId, worldId, ratio, origin, focalpoints, fov, skev_factor) '''+ \
158 | 			'''VALUES (\'%s\', \'%s\', %f, \'POINT Z (%s %s %s)\', \'POINT(%s %s)\', %s, %f);''' \
159 | 			%(camera_node.cam_id, world_name, camera_node.ratio, cam_x, cam_y, cam_z, focal_x, focal_y, lens.fov, lens.alpha))
160 | 	print("New camera inserted successfully.........")
161 | 	conn.commit()
162 | 
163 | # Default object recognition (YOLOv3)
164 | def recognize(video_file, recog_algo = "", tracker_type = "default", customized_tracker = None):
165 | 	# recognition = item.ItemRecognition(recog_algo = recog_algo, tracker_type = tracker_type, customized_tracker = customized_tracker)
166 | 	# return recognition.video_item_recognize(video.byte_array)
167 | 	return yolov4_deepsort_video_track(video_file)	
168 | 
169 | 
170 | def add_recognized_objs(conn, lens, formatted_result, start_time, properties={'color':{}}, default_depth=True):
171 | 	clean_tables(conn)
172 | 	for item_id in formatted_result:
173 | 		object_type = formatted_result[item_id]["object_type"]
174 | 		recognized_bboxes = np.array(formatted_result[item_id]["bboxes"])
175 | 		tracked_cnt = formatted_result[item_id]["tracked_cnt"]
176 | 		top_left = np.vstack((recognized_bboxes[:,0,0], recognized_bboxes[:,0,1]))
177 | 		if default_depth:
178 | 			top_left_depths = np.ones(len(recognized_bboxes))
179 | 		else:
180 | 			top_left_depths = self.__get_depths_of_points(recognized_bboxes[:,0,0], recognized_bboxes[:,0,1])
181 | 		top_left = lens.pixels_to_world(top_left, top_left_depths)
182 | 		
183 | 		# Convert bottom right coordinates to world coordinates
184 | 		bottom_right = np.vstack((recognized_bboxes[:,1,0], recognized_bboxes[:,1,1]))
185 | 		if default_depth:
186 | 			bottom_right_depths = np.ones(len(tracked_cnt))
187 | 		else:
188 | 			bottom_right_depths = self.__get_depths_of_points(recognized_bboxes[:,1,0], recognized_bboxes[:,1,1])
189 | 		bottom_right = lens.pixels_to_world(bottom_right, bottom_right_depths)
190 | 		
191 | 		top_left = np.array(top_left.T)
192 | 		bottom_right = np.array(bottom_right.T)
193 | 		obj_traj = []
194 | 		for i in range(len(top_left)):
195 | 			current_tl = top_left[i]
196 | 			current_br = bottom_right[i]
197 | 			obj_traj.append([current_tl.tolist(), current_br.tolist()])      
198 | 		
199 | 		bbox_to_postgres(conn, item_id, object_type, "default_color" if item_id not in properties['color'] else properties['color'][item_id], start_time, tracked_cnt, obj_traj, type="yolov4")
200 | 		# bbox_to_tasm()
201 | 	
202 | # Helper function to convert the timestam to the timestamp formula pg-trajectory uses
203 | def convert_timestamps(start_time, timestamps):
204 | 	return [str(start_time + datetime.timedelta(seconds=t)) for t in timestamps]
205 | 
206 | # Helper function to convert trajectory to centroids
207 | def bbox_to_data3d(bbox):
208 | 	'''
209 | 	Compute the center, x, y, z delta of the bbox
210 | 	'''
211 | 	tl, br = bbox
212 | 	x_delta = (br[0] - tl[0])/2
213 | 	y_delta = (br[1] - tl[1])/2
214 | 	z_delta = (br[2] - tl[2])/2
215 | 	center = (tl[0] + x_delta, tl[1] + y_delta, tl[2] + z_delta)
216 | 	
217 | 	return center, x_delta, y_delta, z_delta
218 | 
219 | # Insert bboxes to postgres
220 | def bbox_to_postgres(conn, item_id, object_type, color, start_time, timestamps, bboxes, type='yolov3'):
221 | 	if type == 'yolov3':
222 | 		timestamps = range(timestamps)
223 | 
224 | 	converted_bboxes = [bbox_to_data3d(bbox) for bbox in bboxes]
225 | 	pairs = []
226 | 	deltas = []
227 | 	for meta_box in converted_bboxes:
228 | 		pairs.append(meta_box[0])
229 | 		deltas.append(meta_box[1:])
230 | 	postgres_timestamps = convert_timestamps(start_time, timestamps)
231 | 	create_or_insert_general_trajectory(conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs)
232 | 	print(f"{item_id} saved successfully")
233 | 
234 | def clean_tables(conn):
235 | 	cursor = conn.cursor()
236 | 	cursor.execute("DROP TABLE IF EXISTS General_Bbox;")
237 | 	cursor.execute("DROP TABLE IF EXISTS Item_General_Trajectory;")
238 | 	conn.commit()
239 | 
240 | # Create general trajectory table
241 | def create_or_insert_general_trajectory(conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs):
242 | 	#Creating a cursor object using the cursor() method
243 | 	cursor = conn.cursor()
244 | 	'''
245 | 	Create and Populate A Trajectory table using mobilityDB.
246 | 	Now the timestamp matches, the starting time should be the meta data of the world
247 | 	Then the timestamp should be the timestamp regarding the world starting time
248 | 	'''
249 | 	
250 | 	#Creating table with the first item
251 | 	create_itemtraj_sql ='''CREATE TABLE IF NOT EXISTS Item_General_Trajectory(
252 | 	itemId TEXT,
253 | 	objectType TEXT,
254 | 	color TEXT,
255 | 	trajCentroids tgeompoint,
256 | 	largestBbox stbox,
257 | 	PRIMARY KEY (itemId)
258 | 	);'''
259 | 	cursor.execute(create_itemtraj_sql)
260 | 	cursor.execute("CREATE INDEX IF NOT EXISTS traj_idx ON Item_General_Trajectory USING GiST(trajCentroids);")
261 | 	conn.commit()
262 | 	#Creating table with the first item
263 | 	create_bboxes_sql ='''CREATE TABLE IF NOT EXISTS General_Bbox(
264 | 	itemId TEXT,
265 | 	trajBbox stbox,
266 | 	FOREIGN KEY(itemId)
267 | 		REFERENCES Item_General_Trajectory(itemId)
268 | 	);'''
269 | 	cursor.execute(create_bboxes_sql)
270 | 	cursor.execute("CREATE INDEX IF NOT EXISTS item_idx ON General_Bbox(itemId);")
271 | 	cursor.execute("CREATE INDEX IF NOT EXISTS traj_bbox_idx ON General_Bbox USING GiST(trajBbox);")
272 | 	conn.commit()
273 | 	#Insert the trajectory of the first item
274 | 	insert_general_trajectory(conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs)
275 | 
276 | 
277 | # Insert general trajectory
278 | def insert_general_trajectory(conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs):
279 | 	#Creating a cursor object using the cursor() method
280 | 	cursor = conn.cursor()
281 | 	#Inserting bboxes into Bbox table
282 | 	insert_bbox_trajectory = ""
283 | 	insert_format = "INSERT INTO General_Bbox (itemId, trajBbox) "+ \
284 | 	"VALUES (\'%s\',"  % (item_id)
285 | 	# Insert the item_trajectory separately
286 | 	insert_trajectory = "INSERT INTO Item_General_Trajectory (itemId, objectType, color, trajCentroids, largestBbox) "+ \
287 | 	"VALUES (\'%s\', \'%s\', \'%s\', "  % (item_id, object_type, color)
288 | 	traj_centroids = "\'{"
289 | 	min_ltx, min_lty, min_ltz, max_brx, max_bry, max_brz = float('inf'), float('inf'), float('inf'), float('-inf'), float('-inf'), float('-inf')
290 | 	# max_ltx, max_lty, max_ltz, min_brx, min_bry, min_brz = float('-inf'), float('-inf'), float('-inf'), float('inf'), float('inf'), float('inf')
291 | 	for i in range(len(postgres_timestamps)):
292 | 		postgres_timestamp = postgres_timestamps[i]
293 | 		### Insert bbox
294 | 		# print(bboxes[i])
295 | 		tl, br = bboxes[i]
296 | 		min_ltx, min_lty, min_ltz, max_brx, max_bry, max_brz = min(tl[0], min_ltx), min(tl[1], min_lty), min(tl[2], min_ltz),\
297 | 			max(br[0], max_brx), max(br[1], max_bry), max(br[2], max_brz)
298 | 		# max_ltx, max_lty, max_ltz, min_brx, min_bry, min_brz = max(tl[0], max_ltx), max(tl[1], max_lty), max(tl[2], max_ltz),\
299 | 		#     min(br[0], min_brx), min(br[1], min_bry), min(br[2], min_brz)
300 | 		current_bbox_sql = "stbox \'STBOX ZT((%s, %s, %s, %s), (%s, %s, %s, %s))\');" \
301 | 		%(tl[0], tl[1], tl[2], postgres_timestamp, br[0], br[1], br[2], postgres_timestamp)
302 | 		insert_bbox_trajectory += insert_format + current_bbox_sql
303 | 		### Construct trajectory
304 | 		current_point = pairs[i]
305 | 		tg_pair_centroid = "POINT Z (%s %s %s)@%s," \
306 | 		%(str(current_point[0]), str(current_point[1]), str(current_point[2]), postgres_timestamp)
307 | 		traj_centroids += tg_pair_centroid
308 | 	traj_centroids = traj_centroids[:-1]
309 | 	traj_centroids += "}\', "
310 | 	insert_trajectory += traj_centroids
311 | 	insert_trajectory += "stbox \'STBOX Z((%s, %s, %s),"%(min_ltx, min_lty, min_ltz)\
312 | 		+"(%s, %s, %s))\'); "%(max_brx, max_bry, max_brz)
313 | 	cursor.execute(insert_trajectory)
314 | 	cursor.execute(insert_bbox_trajectory)
315 | 	# Commit your changes in the database
316 | 	conn.commit()
317 | 	
318 | def merge_trajectory(item_id, new_postgres_timestamps, new_bboxes, new_pairs):
319 | 	### Fetch the timestamps of the current trajectory from the database
320 | 	### Filter out the already had timestamp from the new timestamps
321 | 	### Construct the adding trajectory
322 | 	### Calling the merge function of mobilitydb
323 | 	### do the same thing for the bboxes
324 | 	return
325 | 	
326 | def fetch_camera(conn, world_id, cam_id = []):
327 | 	cursor = conn.cursor()
328 | 	
329 | 	if cam_id == []:
330 | 		query = '''SELECT cameraId, ratio, ST_X(origin), ST_Y(origin), ST_Z(origin), ST_X(focalpoints), ST_Y(focalpoints), fov, skev_factor ''' \
331 | 		 + '''FROM Cameras WHERE worldId = \'%s\';''' %world_id
332 | 	else:
333 | 		query = '''SELECT cameraId, ratio, ST_X(origin), ST_Y(origin), ST_Z(origin), ST_X(focalpoints), ST_Y(focalpoints), fov, skev_factor ''' \
334 | 		 + '''FROM Cameras WHERE cameraId IN (\'%s\') AND worldId = \'%s\';''' %(','.join(cam_id), world_id)
335 | 	cursor.execute(query)
336 | 	return cursor.fetchall()


--------------------------------------------------------------------------------
/apperception/world.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable
  2 | import psycopg2
  3 | from metadata_context import *
  4 | from video_context import *
  5 | import copy
  6 | from world_executor import WorldExecutor
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | BASE_VOLUME_QUERY_TEXT = "stbox \'STBOX Z(({x1}, {y1}, {z1}),({x2}, {y2}, {z2}))\'"
 10 | world_executor = WorldExecutor()
 11 | class World:
 12 | 
 13 |     def __init__(self, name, units, enable_tasm=False):
 14 |         self.VideoContext = VideoContext(name, units)
 15 |         self.MetadataContext = MetadataContext(single_mode=False)
 16 |         self.MetadataContext.start_time = self.VideoContext.start_time
 17 |         self.GetVideo = False
 18 |         self.enable_tasm = enable_tasm
 19 |         # self.AccessedVideoContext = False
 20 |     
 21 |     def get_camera(self, cam_id=[]):
 22 |         # Change depending if you're on docker or not 
 23 |         if self.enable_tasm:
 24 |             world_executor.connect_db(port=5432, user="docker", password="docker", database_name="mobilitydb")
 25 |         else:
 26 |             world_executor.connect_db(user="docker", password="docker", database_name="mobilitydb")
 27 |         return world_executor.get_camera(cam_id)
 28 |     
 29 | #########################
 30 | ###   Video Context  ####
 31 | #########################
 32 |     # TODO(@Vanessa): Add a helper function
 33 |     def get_lens(self, cam_id=""):
 34 |         return self.get_camera(cam_id).lens
 35 |     
 36 |     def get_name(self):
 37 |         return self.VideoContext.get_name()
 38 | 
 39 |     def get_units(self):
 40 |         return self.VideoContext.get_units()
 41 |  
 42 |     def item(self, item_id, cam_id, item_type, location):
 43 |         new_context = copy.deepcopy(self)
 44 |         new_context.VideoContext.item(item_id, cam_id, item_type, location)
 45 |         return new_context
 46 | 
 47 |     def camera(self, cam_id, location, ratio, video_file, metadata_identifier, lens):
 48 |         new_context = copy.deepcopy(self)
 49 |         new_context.VideoContext.camera(cam_id, location, ratio, video_file, metadata_identifier, lens)
 50 |         return new_context
 51 | 
 52 |     def add_properties(self, cam_id, properties, property_type):
 53 |         new_context = copy.deepcopy(self)
 54 |         new_context.VideoContext.properties(cam_id, properties, property_type)
 55 |         return new_context
 56 | 
 57 |     def recognize(self, cam_id, algo ='Yolo', tracker_type = 'multi', tracker = None):
 58 |         new_context = copy.deepcopy(self)
 59 |         new_context.VideoContext.camera_nodes[cam_id].recognize(algo, tracker_type, tracker)
 60 |         return new_context
 61 | 
 62 | #########################
 63 | ### Metadata Context ####
 64 | #########################
 65 | 
 66 |     def get_columns(self, *argv, distinct = False):
 67 |         new_context = copy.deepcopy(self)
 68 |         new_context.MetadataContext.get_columns(argv, distinct)
 69 |         return new_context
 70 | 
 71 |     def predicate(self, p, evaluated_var = {}):
 72 |         new_context = copy.deepcopy(self)
 73 |         new_context.MetadataContext.predicate(p, evaluated_var)
 74 |         return new_context
 75 | 
 76 |     def selectkey(self, distinct = False):
 77 |         new_context = copy.deepcopy(self)
 78 |         new_context.MetadataContext.selectkey(distinct)
 79 |         return new_context
 80 | 
 81 |     def get_trajectory(self, interval = [], distinct = False):
 82 |         new_context = copy.deepcopy(self)
 83 |         new_context.MetadataContext.get_trajectory(interval, distinct)
 84 |         return new_context
 85 | 
 86 |     def get_geo(self, interval = [], distinct = False):
 87 |         new_context = copy.deepcopy(self)
 88 |         new_context.MetadataContext.get_geo(interval, distinct)
 89 |         return new_context
 90 |         
 91 |     def get_time(self, distinct = False):
 92 |         new_context = copy.deepcopy(self)
 93 |         new_context.MetadataContext.get_time(distinct)
 94 |         return new_context  
 95 |     
 96 |     def get_distance(self, interval = [], distinct = False):
 97 |         new_context = copy.deepcopy(self)
 98 |         new_context.MetadataContext.distance(interval, distinct)
 99 |         return new_context
100 |         
101 |     def get_speed(self, interval = [], distinct = False):
102 |         new_context = copy.deepcopy(self)
103 |         new_context.MetadataContext.get_speed(interval, distinct)
104 |         return new_context
105 |     
106 |     def get_video(self, cam_id=[]):
107 |         # Go through all the cameras in 'filtered' world and obtain videos 
108 |         new_context = copy.deepcopy(self)
109 |         new_context.GetVideo = True
110 |         ## get camera gives the direct results from the data base
111 |         new_context.get_video_cams = self.get_camera(cam_id)
112 |         return new_context
113 | 
114 |     def interval(self, time_interval):
115 |         new_context = copy.deepcopy(self)
116 |         new_context.MetadataContext.interval(time_interval)
117 |         return new_context
118 |     
119 |     def execute(self):
120 |         world_executor.create_world(self)
121 |         if self.enable_tasm:
122 |             world_executor.enable_tasm()
123 |             print("successfully enable tasm during execution time")
124 |         # Change depending if you're on docker or not 
125 |             world_executor.connect_db(port=5432, user="docker", password="docker", database_name="mobilitydb")
126 |         else:
127 |             world_executor.connect_db(user="docker", password="docker", database_name="mobilitydb")
128 |         return world_executor.execute()
129 | 
130 |     def select_intersection_of_interest_or_use_default(self, cam_id, default=True):
131 |         print(self.VideoContext.camera_nodes)
132 |         camera = self.VideoContext.camera_nodes[cam_id]
133 |         video_file = camera.video_file
134 |         if default:
135 |             x1, y1, z1 = 0.01082532, 2.59647246, 0
136 |             x2, y2, z2 = 3.01034039, 3.35985782, 2
137 |         else:
138 |             vs = cv2.VideoCapture(video_file)
139 |             frame = vs.read()
140 |             frame = frame[1]
141 |             cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)
142 |             cv2.resizeWindow('Frame', 384, 216)
143 |             initBB = cv2.selectROI("Frame", frame, fromCenter=False)
144 |             print(initBB)
145 |             cv2.destroyAllWindows()
146 |             print("world coordinate #1")
147 |             tl = camera.lens.pixel_to_world(initBB[:2], 1)
148 |             print(tl)
149 |             x1, y1, z1 = tl
150 |             print("world coordinate #2")
151 |             br = camera.lens.pixel_to_world((initBB[0]+initBB[2], initBB[1]+initBB[3]), 1)
152 |             print(br)
153 |             x2, y2, z2 = br
154 |         return BASE_VOLUME_QUERY_TEXT.format(x1=x1, y1=y1, z1=0, x2=x2, y2=y2, z2=2)
155 |     
156 |     def overlay_trajectory(self, cam_id, trajectory):
157 |         camera = self.VideoContext.get_camera(cam_id)
158 |         video_file = camera.video_file
159 |         for traj in trajectory:
160 |             current_trajectory = np.asarray(traj[0])
161 |             frame_points = camera.lens.world_to_pixels(current_trajectory.T).T
162 |             vs = cv2.VideoCapture(video_file)
163 |             frame = vs.read()
164 |             frame = cv2.cvtColor(frame[1], cv2.COLOR_BGR2RGB)
165 |             for point in frame_points.tolist():
166 |                 cv2.circle(frame,tuple([int(point[0]), int(point[1])]),3,(255,0,0))
167 |             plt.figure()
168 |             plt.imshow(frame)
169 |             plt.show()
170 | 


--------------------------------------------------------------------------------
/apperception/world_executor.py:
--------------------------------------------------------------------------------
  1 | from metadata_context_executor import *
  2 | from metadata_context import *
  3 | from video_context_executor import *
  4 | from video_util import *
  5 | import numpy as np
  6 | 
  7 | class WorldExecutor:
  8 |     def __init__(self, world=None):
  9 |         if world:
 10 |             self.create_world(world)
 11 |         self.tasm = None
 12 |         
 13 |     def connect_db(self, 
 14 |         host='localhost',
 15 |         user=None,
 16 |         password=None,
 17 |         port=25432,
 18 |         database_name=None):
 19 | 
 20 |         self.conn = psycopg2.connect(database=database_name, user=user, password=password, host=host, port=port)
 21 | 
 22 |     def create_world(self, world):
 23 |         self.curr_world = world
 24 |         return self
 25 |     
 26 |     def enable_tasm(self):
 27 |         import tasm
 28 |         if not self.tasm:
 29 |             self.tasm = tasm.TASM()
 30 |     
 31 |     def get_camera(self, cam_id = []):
 32 |         assert self.curr_world, self.conn
 33 |         cameras = fetch_camera(self.conn, self.curr_world.get_name(), cam_id)
 34 |         ### each camera appear like:
 35 |         ### (cameraId, ratio, origin3d, focalpoints2d, fov, skev_factor)
 36 |         
 37 |         return cameras
 38 |     
 39 |     def tasm_get_video(self, metadata_results):
 40 |         ### Get the metadata context executing query text, let tasm get video call it
 41 |         ### the tasm would execute the query to get the ids, bboxes and timestamps
 42 |         ### then it can use these to tile the video and get it
 43 |         cam_nodes = self.curr_world.get_video_cams
 44 |         tasm = self.curr_world.fetch_tasm()
 45 |         for cam_node in cam_nodes:
 46 |             current_metadata_identifier = cam_node.metadata_id
 47 |             current_video_file = cam_nodes.video_file
 48 |             tasm.activate_regret_based_tiling(current_video_file, current_metadata_identifier)
 49 |             for label, timestamps in metadata_results.items():
 50 |                 tasm.get_video_roi(
 51 |                     f'./output/{label}.mp4', # output path
 52 |                     current_video_file, # name in TASM
 53 |                     current_metadata_identifier, # metadata identifier in TASM
 54 |                     label, # label name
 55 |                     timestamps[0], # first frame inclusive
 56 |                     timestamps[-1] # last frame exclusive
 57 |                 )
 58 |                 tasm.retile_based_on_regret(current_video_file, current_metadata_identifier)
 59 | 
 60 |     
 61 |     def get_video(self, metadata_results):
 62 |         start_time = self.curr_world.VideoContext.start_time
 63 |         # print("Start time is", start_time)
 64 |         ### The cam nodes are raw data from the database
 65 |         ### TODO: I forget why we used the data from the db instead of directly fetch
 66 |         ### from the world
 67 |         cam_nodes = self.curr_world.get_video_cams
 68 |         video_files = []
 69 |         for i in range(len(cam_nodes)):
 70 |             cam_id, ratio, cam_x, cam_y, cam_z, focal_x, focal_y, fov, skew_factor = cam_nodes[i]
 71 |             cam_video_file = self.curr_world.VideoContext.camera_nodes[cam_id].video_file
 72 | 
 73 |             transform_matrix = create_transform_matrix(focal_x, focal_y, cam_x, cam_y, skew_factor)
 74 |             
 75 |             for item_id, vals in metadata_results.items():
 76 |                 world_coords, timestamps = vals
 77 |                 # print("timestamps are", timestamps)
 78 |                 world_coords = reformat_fetched_world_coords(world_coords)
 79 | 
 80 |                 cam_coords = world_to_pixel(world_coords, transform_matrix)
 81 |                
 82 |                 vid_times = convert_datetime_to_frame_num(start_time, timestamps)
 83 |                 # print(vid_times)
 84 | 
 85 |                 vid_fname = './output/'+self.curr_world.VideoContext.camera_nodes[cam_id].metadata_id + item_id + '.mp4'
 86 |                 # print(vid_fname)
 87 |                 get_video_roi(vid_fname, cam_video_file, cam_coords, vid_times) 
 88 |                 video_files.append(vid_fname)
 89 |         print("output video files", ','.join(video_files))
 90 |         return video_files
 91 |         
 92 |     def execute(self):
 93 |         # Edit logic for execution here through checks of whether VideoContext or MetadataContext is being used 
 94 |         video_executor = VideoContextExecutor(self.conn, self.curr_world.VideoContext, self.tasm)
 95 |         video_executor.execute()
 96 | 
 97 |         if self.curr_world.MetadataContext.scan.view == None:
 98 |             return
 99 | 
100 |         if self.curr_world.GetVideo:
101 |             if self.tasm:
102 |                 metadata_executor = MetadataContextExecutor(self.conn, self.curr_world.MetadataContext.get_columns(primarykey, time))
103 |                 metadata_results = video_fetch_reformat_tasm(metadata_executor.execute())
104 |                 return self.tasm_get_video(metadata_results)
105 |             else:
106 |                 metadata_executor = MetadataContextExecutor(self.conn, self.curr_world.MetadataContext.get_columns(primarykey, geometry, time))
107 |                 metadata_results = video_fetch_reformat(metadata_executor.execute())
108 |                 return self.get_video(metadata_results)
109 | 
110 |         metadata_executor = MetadataContextExecutor(self.conn, self.curr_world.MetadataContext)
111 |         return metadata_executor.execute()
112 | 
113 | def create_transform_matrix(focal_x, focal_y, cam_x, cam_y, skew_factor):
114 |     alpha = skew_factor
115 | 
116 |     transform = np.array([[focal_x, alpha, cam_x, 0], 
117 |                                     [0, focal_y, cam_y, 0],
118 |                                     [0, 0, 1, 0]
119 |                                    ])
120 | 
121 |     return transform
122 | 
123 | def reformat_fetched_world_coords(world_coords):
124 |     return np.array(world_coords)
125 | 
126 | def world_to_pixel(world_coords, transform):
127 |     tl_x, tl_y, tl_z, br_x, br_y, br_z = world_coords.T
128 | 
129 |     tl_world_pixels = np.array([tl_x, tl_y, tl_z, np.ones(len(tl_x))])
130 |     tl_vid_coords = transform @ tl_world_pixels
131 | 
132 |     br_world_pixels = np.array([br_x, br_y, br_z, np.ones(len(br_x))])
133 |     br_vid_coords = transform @ br_world_pixels
134 | 
135 |     return np.stack((tl_vid_coords[0], tl_vid_coords[1], br_vid_coords[0], br_vid_coords[1]), axis=0)
136 | 
137 | def video_fetch_reformat_tasm(fetched_meta):
138 |     result = {}
139 |     for meta in fetched_meta:
140 |         item_id, timestamp = meta[0], meta[1]
141 |         if item_id in result:
142 |             result[item_id]['tracked_cnt'].append(timestamp)
143 |         else:
144 |             result[item_id] = {'tracked_cnt':[timestamp]}
145 | 
146 |     return result
147 | 
148 | 
149 | def video_fetch_reformat(fetched_meta):
150 |     result = {}
151 |     for meta in fetched_meta:
152 |         item_id, coordinates, timestamp = meta[0], meta[1:-1], meta[-1]
153 |         if item_id in result:
154 |             result[item_id][0].append(coordinates)
155 |             result[item_id][1].append(timestamp)
156 |         else:
157 |             result[item_id] = [[coordinates],[timestamp]]
158 | 
159 |     return result


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | from easydict import EasyDict as edict
 4 | import os
 5 | 
 6 | __C                           = edict()
 7 | # Consumers can get config by: from config import cfg
 8 | 
 9 | cfg                           = __C
10 | 
11 | # YOLO options
12 | __C.YOLO                      = edict()
13 | 
14 | __C.YOLO.CLASSES              = os.path.join(os.path.dirname(os.path.realpath(__file__)),"../data/classes/coco.names")
15 | __C.YOLO.ANCHORS              = [12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401]
16 | __C.YOLO.ANCHORS_V3           = [10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]
17 | __C.YOLO.ANCHORS_TINY         = [23,27, 37,58, 81,82, 81,82, 135,169, 344,319]
18 | __C.YOLO.STRIDES              = [8, 16, 32]
19 | __C.YOLO.STRIDES_TINY         = [16, 32]
20 | __C.YOLO.XYSCALE              = [1.2, 1.1, 1.05]
21 | __C.YOLO.XYSCALE_TINY         = [1.05, 1.05]
22 | __C.YOLO.ANCHOR_PER_SCALE     = 3
23 | __C.YOLO.IOU_LOSS_THRESH      = 0.5
24 | 
25 | 
26 | # Train options
27 | __C.TRAIN                     = edict()
28 | 
29 | __C.TRAIN.ANNOT_PATH          = os.path.join(os.path.dirname(os.path.realpath(__file__)),"../data/dataset/val2017.txt")
30 | __C.TRAIN.BATCH_SIZE          = 2
31 | # __C.TRAIN.INPUT_SIZE            = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
32 | __C.TRAIN.INPUT_SIZE          = 416
33 | __C.TRAIN.DATA_AUG            = True
34 | __C.TRAIN.LR_INIT             = 1e-3
35 | __C.TRAIN.LR_END              = 1e-6
36 | __C.TRAIN.WARMUP_EPOCHS       = 2
37 | __C.TRAIN.FISRT_STAGE_EPOCHS    = 20
38 | __C.TRAIN.SECOND_STAGE_EPOCHS   = 30
39 | 
40 | 
41 | 
42 | # TEST options
43 | __C.TEST                      = edict()
44 | 
45 | __C.TEST.ANNOT_PATH           = os.path.join(os.path.dirname(os.path.realpath(__file__)),"../data/dataset/val2017.txt")
46 | __C.TEST.BATCH_SIZE           = 2
47 | __C.TEST.INPUT_SIZE           = 416
48 | __C.TEST.DATA_AUG             = False
49 | __C.TEST.DECTECTED_IMAGE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)),"../data/detection/")
50 | __C.TEST.SCORE_THRESHOLD      = 0.25
51 | __C.TEST.IOU_THRESHOLD        = 0.5
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2.3'
 2 | services:
 3 |   tasm:
 4 |     image: apperceptiontasm/tasm:latest
 5 |     runtime: nvidia
 6 |     environment:
 7 |       - NVIDIA_VISIBLE_DEVICES=all
 8 |     ports:
 9 |       - "8890:8890"
10 |     # Mount volume 
11 |     volumes:
12 |       - "./:/apperception"
13 |     entrypoint: /bin/sh
14 |     stdin_open: true
15 |     tty: true
16 |     links:
17 |       - "mobilitydb:localhost"
18 |     networks:
19 |       vpcbr:
20 |         ipv4_address: 172.19.0.2
21 |     
22 |   mobilitydb:
23 | 
24 |     # image to fetch from docker hub
25 |     image: mobilitydb/mobilitydb
26 |     ports:
27 |       - "25432:5432"
28 |     volumes:
29 |       - "mobilitydb_data:/var/lib/postgresql"
30 |     networks:
31 |       vpcbr:
32 |         ipv4_address: 172.19.0.3
33 | 
34 | volumes:
35 |   mobilitydb_data:
36 | 
37 | networks:
38 |   vpcbr:
39 |     driver: bridge
40 |     ipam:
41 |      config:
42 |        - subnet: 172.19.0.0/16
43 |          gateway: 172.19.0.1
44 | 


--------------------------------------------------------------------------------
/h264_videos/BirdsInCage_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/BirdsInCage_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/CrowdRun_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/CrowdRun_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/ElFuente1_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/ElFuente1_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/ElFuente2_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/ElFuente2_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/OldTownCross_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/OldTownCross_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/Seeking_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/Seeking_h264.mp4


--------------------------------------------------------------------------------
/h264_videos/Tennis_h264.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apperception-db/apperception/490371af3492c2ed03d98ccaec71c66b10c79e0b/h264_videos/Tennis_h264.mp4


--------------------------------------------------------------------------------
/pg_extender/overlap.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS overlap(stbox, stbox);
2 | CREATE OR REPLACE FUNCTION overlap(bbox1 stbox, bbox2 stbox) RETURNS boolean AS
3 | $BODY$
4 | BEGIN
5 |   RETURN bbox1 && bbox2;
6 | END
7 | $BODY$
8 | LANGUAGE 'plpgsql' ;


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "apperception"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Your Name <you@example.com>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.8"
 9 | PyPika = "^0.48.9"
10 | pyquaternion = "^0.9.9"
11 | numpy = "^1.22.4"
12 | matplotlib = "^3.5.2"
13 | opencv-python = "^4.6.0"
14 | pandas = "^1.4.2"
15 | decompyle3 = "^3.9.0"
16 | psycopg2-binary = "^2.9.3"
17 | 
18 | [tool.poetry.dev-dependencies]
19 | jupyterlab = "^3.4.3"
20 | flake8 = "^4.0.1"
21 | mypy = "^0.961"
22 | types-psycopg2 = "^2.9.16"
23 | pyright = "^1.1.253"
24 | pytest = "^7.1.2"
25 | astpretty = "^3.0.0"
26 | coverage = "^6.4.1"
27 | pytest-cov = "^3.0.0"
28 | 
29 | [build-system]
30 | requires = ["poetry-core>=1.0.0"]
31 | build-backend = "poetry.core.masonry.api"
32 | 
33 | [tool.black]
34 | line-length = 100
35 | verbose = true
36 | 
37 | [tool.autopep8]
38 | in-place = true
39 | recursive = true
40 | aggressive = 3
41 | verbose = 2
42 | 
43 | [tool.mypy]
44 | exclude = [
45 |   'trackers/object_tracker_yolov4_deepsort\.py',
46 |   'trackers/object_tracker_yolov5_deepsort\.py',
47 |   'legacy/.*',
48 |   'video_util\.py',
49 |   'scenic_util\.py',
50 | ]
51 | 
52 | [[tool.mypy.overrides]]
53 | module = [
54 |   'pandas',
55 |   'pypika',
56 |   'pypika.dialects',
57 |   'pypika.functions',
58 |   'cv2',
59 |   'pyquaternion',
60 |   'pyquaternion.quaternion',
61 |   'uncompyle6',
62 |   'decompyle3',
63 | ]
64 | ignore_missing_imports = true
65 | 
66 | [tool.pyright]
67 | ignore = [
68 |   'apperception/legacy/*',
69 |   'apperception/video_util.py',
70 |   'apperception/trackers/object_tracker_yolov4_deepsort.py',
71 |   'apperception/trackers/object_tracker_yolov5_deepsort.py',
72 | ]
73 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | uncompyle6
 2 | psycopg2
 3 | ffmpeg-python
 4 | ffprobe-python
 5 | easydict
 6 | scipy
 7 | opencv-python
 8 | lxml
 9 | tqdm
10 | tensorflow
11 | absl-py
12 | easydict
13 | matplotlib
14 | pillow
15 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | git clone https://github.com/theAIGuysCode/yolov4-deepsort.git
 3 | mv ./yolov4-tiny.weights ./yolov4-deepsort/data
 4 | mv ./yolov4.weights ./yolov4-deepsort/data
 5 | python3 -m venv env
 6 | source env/bin/activate
 7 | pip3 install -r requirements.txt
 8 | mv checkpoint/ ./yolov4-deepsort
 9 | pip install -r requirements.txt
10 | cd yolov4-deepsort
11 | python3 save_model.py --model yolov4
12 | cd ..
13 | mv ./config.py ./yolov4-deepsort/core
14 | mkdir output
15 | 


--------------------------------------------------------------------------------