├── LDPTrace ├── code │ ├── logger │ │ ├── __init__.py │ │ ├── logger_config.json │ │ └── logger.py │ ├── parse.py │ ├── dataset.py │ ├── trajectory.py │ ├── utils.py │ ├── map_func.py │ ├── ldp.py │ ├── grid.py │ ├── experiment.py │ └── main.py └── data │ ├── campus │ └── readme.txt │ ├── porto │ └── readme.txt │ └── oldenburg │ └── readme.txt ├── fig └── framework.jpg ├── .gitignore ├── README.md └── LICENSE /LDPTrace/code/logger/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fig/framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zealscott/LDPTrace/HEAD/fig/framework.jpg -------------------------------------------------------------------------------- /LDPTrace/data/campus/readme.txt: -------------------------------------------------------------------------------- 1 | This dictionary contains corresponding synthesized dataset by LDPTrace. -------------------------------------------------------------------------------- /LDPTrace/data/porto/readme.txt: -------------------------------------------------------------------------------- 1 | This dictionary contains corresponding synthesized dataset by LDPTrace. -------------------------------------------------------------------------------- /LDPTrace/data/oldenburg/readme.txt: -------------------------------------------------------------------------------- 1 | This dictionary contains corresponding synthesized dataset by LDPTrace. -------------------------------------------------------------------------------- /LDPTrace/code/parse.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser() 4 | 5 | parser.add_argument('--epsilon', type=float, default=1.0, 6 | help='Privacy budget') 7 | parser.add_argument('--grid_num', type=int, default=6, 8 | help='Number of grids is n x n') 9 | parser.add_argument('--query_num', type=int, default=200, 10 | help='Number of experiment queries') 11 | parser.add_argument('--dataset', type=str, default='oldenburg') 12 | parser.add_argument('--re_syn', action='store_true', 13 | help='Synthesizing or use existing file') 14 | parser.add_argument('--max_len', type=float, default=0.9, 15 | help='Quantile of estimated max length') 16 | parser.add_argument('--size_factor', type=float, default=9.0, 17 | help='Quantile of estimated max length') 18 | parser.add_argument('--multiprocessing', action='store_true') 19 | 20 | 21 | args = parser.parse_args() 22 | -------------------------------------------------------------------------------- /LDPTrace/code/logger/logger_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "simple": {"format": "%(asctime)s %(message)s"}, 6 | "datetime": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"} 7 | }, 8 | "handlers": { 9 | "console": { 10 | "class": "logging.StreamHandler", 11 | "level": "DEBUG", 12 | "formatter": "simple", 13 | "stream": "ext://sys.stdout" 14 | }, 15 | "info_file_handler": { 16 | "class": "logging.handlers.RotatingFileHandler", 17 | "level": "INFO", 18 | "formatter": "datetime", 19 | "filename": "info.log", 20 | "maxBytes": 10485760, 21 | "backupCount": 20, "encoding": "utf8" 22 | } 23 | }, 24 | "root": { 25 | "level": "INFO", 26 | "handlers": [ 27 | "console", 28 | "info_file_handler" 29 | ] 30 | } 31 | } -------------------------------------------------------------------------------- /LDPTrace/code/dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | import numpy as np 3 | import json 4 | import pickle 5 | import trajectory 6 | 7 | 8 | def read_brinkhoff(dataset='brinkhoff'): 9 | """ 10 | Brinkhoff dataset: 11 | #n: 12 | >0: x1,y1;x2,y2;...: 13 | """ 14 | db = [] 15 | file_name = f'../data/{dataset}.dat' 16 | with open(file_name, 'r') as f: 17 | row = f.readline() 18 | while row: 19 | if row[0] == '#': 20 | row = f.readline() 21 | continue 22 | if not row[0] == '>': 23 | print(row) 24 | exit() 25 | # Skip '>0:' and ';\n' in the end 26 | row = row[3:-2].split(';') # row: ['x1,y1', 'x2,y2', ...] 27 | 28 | t = [x.split(',') for x in row] # t: [['x1','y1'], ['x2','y2'], ...] 29 | 30 | t = [(eval(x[0]), eval(x[1])) for x in t] # t: [(x1,y1), (x2,y2), ...] 31 | 32 | db.append(t) 33 | row = f.readline() 34 | 35 | return db 36 | 37 | 38 | def dataset_stats(db: List[List[Tuple[float, float]]], db_name: str): 39 | lengths = np.asarray([len(t) for t in db]) 40 | 41 | xs = [[p[0] for p in t] for t in db] 42 | ys = [[p[1] for p in t] for t in db] 43 | 44 | min_xs = [min(x) for x in xs] 45 | min_ys = [min(y) for y in ys] 46 | max_xs = [max(x) for x in xs] 47 | max_ys = [max(y) for y in ys] 48 | 49 | stats = { 50 | 'num': len(db), 51 | 'min_len': int(min(lengths)), 52 | 'max_len': int(max(lengths)), 53 | 'mean_len': float(np.mean(lengths)), 54 | 'min_x': min(min_xs), 55 | 'min_y': min(min_ys), 56 | 'max_x': max(max_xs), 57 | 'max_y': max(max_ys) 58 | } 59 | 60 | print(stats) 61 | 62 | with open(db_name, 'w') as f: 63 | json.dump(stats, f) 64 | 65 | return stats -------------------------------------------------------------------------------- /LDPTrace/code/logger/logger.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import logging 3 | import logging.config 4 | from pathlib import Path 5 | import json 6 | from datetime import datetime 7 | 8 | 9 | def read_json(fname): 10 | fname = Path(fname) 11 | with fname.open('rt') as handle: 12 | return json.load(handle, object_hook=OrderedDict) 13 | 14 | 15 | def setup_logging(save_dir, log_config='logger/logger_config.json', default_level=logging.INFO): 16 | """ 17 | Setup logging configuration 18 | """ 19 | log_config = Path(log_config) 20 | if log_config.is_file(): 21 | config = read_json(log_config) 22 | # modify logging paths based on run config 23 | for _, handler in config['handlers'].items(): 24 | if 'filename' in handler: 25 | handler['filename'] = str(save_dir / handler['filename']) 26 | 27 | logging.config.dictConfig(config) 28 | else: 29 | print("Warning: logging configuration file is not found in {}.".format(log_config)) 30 | logging.basicConfig(level=default_level) 31 | 32 | 33 | class ConfigParser: 34 | def __init__(self, name, save_dir): 35 | self.exper_name = name 36 | run_id = datetime.now().strftime(r'%m%d_%H%M%S') 37 | self.log_dir = Path(save_dir) / 'log' / self.exper_name / run_id 38 | 39 | self.log_dir.mkdir(parents=True) 40 | 41 | # configure logging module 42 | setup_logging(self.log_dir) 43 | self.log_levels = { 44 | 0: logging.WARNING, 45 | 1: logging.INFO, 46 | 2: logging.DEBUG 47 | } 48 | 49 | def get_logger(self, name, verbosity=2): 50 | msg_verbosity = 'verbosity option {} is invalid. Valid options are {}.'.format(verbosity, 51 | self.log_levels.keys()) 52 | assert verbosity in self.log_levels, msg_verbosity 53 | logger = logging.getLogger(name) 54 | logger.setLevel(self.log_levels[verbosity]) 55 | return logger 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LDPTrace/code/trajectory.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from grid import GridMap, Grid 3 | import grid 4 | import utils 5 | import map_func 6 | import numpy as np 7 | 8 | 9 | def trajectory_point2grid(t: List[Tuple[float, float]], g: GridMap, interp=True): 10 | """ 11 | Convert trajectory from raw points to grids 12 | :param t: raw trajectory 13 | :param g: grid map 14 | :param interp: whether to interpolate 15 | :return: grid trajectory 16 | """ 17 | grid_map = g.map 18 | grid_t = list() 19 | 20 | for p in range(len(t)): 21 | point = t[p] 22 | found = False 23 | # Find which grid the point belongs to 24 | for i in range(len(grid_map)): 25 | for j in range(len(grid_map[i])): 26 | if grid_map[i][j].in_cell(point): 27 | grid_t.append(grid_map[i][j]) 28 | found = True 29 | break 30 | if found: 31 | break 32 | 33 | # Remove duplicates 34 | grid_t_new = [grid_t[0]] 35 | for i in range(1, len(grid_t)): 36 | if not grid_t[i].index == grid_t_new[-1].index: 37 | grid_t_new.append(grid_t[i]) 38 | 39 | # Interpolation 40 | if interp: 41 | grid_t_final = list() 42 | for i in range(len(grid_t_new)-1): 43 | current_grid = grid_t_new[i] 44 | next_grid = grid_t_new[i+1] 45 | # Adjacent, no need to interpolate 46 | if grid.is_adjacent_grids(current_grid, next_grid): 47 | grid_t_final.append(current_grid) 48 | else: 49 | # Result of find_shortest_path() doesn't include the end point 50 | grid_t_final.extend(g.find_shortest_path(current_grid, next_grid)) 51 | 52 | grid_t_final.append(grid_t_new[-1]) 53 | return grid_t_final 54 | 55 | return grid_t_new 56 | 57 | 58 | def trajectory_grid2points(g_t: List[Grid]): 59 | if len(g_t) == 1: 60 | return [g_t[0].sample_point() for _ in range(2)] 61 | return [g.sample_point() for g in g_t] 62 | 63 | 64 | def pass_through(t: List[Grid], g: Grid): 65 | for t_g in t: 66 | if t_g.index == g.index: 67 | return True 68 | 69 | return False 70 | 71 | 72 | def get_diameter(t: List[Tuple[float, float]]): 73 | max_d = 0 74 | for i in range(len(t)): 75 | for j in range(i+1, len(t)): 76 | max_d = max(max_d, utils.euclidean_distance(t[i], t[j])) 77 | 78 | return max_d 79 | 80 | 81 | def get_travel_distance(t: List[Tuple[float, float]]): 82 | dist = 0 83 | for i in range(len(t) - 1): 84 | curr_p = t[i] 85 | next_p = t[i+1] 86 | dist += utils.euclidean_distance(curr_p, next_p) 87 | 88 | return dist 89 | 90 | 91 | def get_real_markov(grid_db: List[List[Grid]], grid_map: GridMap): 92 | markov_vec = np.zeros(grid_map.size * 8) 93 | for t in grid_db: 94 | for i in range(len(t) - 1): 95 | curr_grid = t[i] 96 | next_grid = t[i + 1] 97 | map_id = map_func.adjacent_pair_grid_map_func((curr_grid, next_grid), grid_map) 98 | markov_vec[map_id] += 1 99 | 100 | return markov_vec 101 | -------------------------------------------------------------------------------- /LDPTrace/code/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from typing import Tuple, List 4 | from math import sqrt 5 | 6 | 7 | def precompute_markov(one_level_mat: np.ndarray, max_level: int): 8 | """ 9 | Estimate N-level markov probabilities using one-level prob 10 | :return: list of markov matrices 11 | """ 12 | # Use 1-level matrix as a placeholder for 0-level matrix 13 | markov_mats = [one_level_mat, one_level_mat] 14 | 15 | for i in range(2, max_level + 1): 16 | prev_level_mat = markov_mats[i - 1] 17 | 18 | # Use matrix multiply to calculate next-level matrix 19 | markov_mats.append(np.matmul(prev_level_mat, one_level_mat)) 20 | 21 | return markov_mats 22 | 23 | 24 | # @jit(nopython=True, fastmath=True) 25 | def euclidean_distance(p1: Tuple[float, float], p2: Tuple[float, float]): 26 | return sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) 27 | 28 | 29 | # @jit(nopython=True) 30 | def dtw_distance(t0: List[Tuple[float, float]], t1: List[Tuple[float, float]]): 31 | """ 32 | Usage 33 | ----- 34 | The Dynamic-Time Warping distance between trajectory t0 and t1. 35 | Parameters 36 | ---------- 37 | param t0 : List[Tuple[float,float]] 38 | param t1 : List[Tuple[float,float]] 39 | Returns 40 | ------- 41 | dtw : float 42 | The Dynamic-Time Warping distance between trajectory t0 and t1 43 | """ 44 | 45 | n0 = len(t0) 46 | n1 = len(t1) 47 | C = np.zeros((n0 + 1, n1 + 1)) 48 | C[1:, 0] = np.inf 49 | C[0, 1:] = np.inf 50 | for i in range(1, n0 + 1): 51 | for j in range(1, n1 + 1): 52 | C[i, j] = euclidean_distance(t0[i - 1], t1[j - 1]) + min(C[i, j - 1], C[i - 1, j - 1], C[i - 1, j]) 53 | dtw = C[n0, n1] 54 | return dtw 55 | 56 | 57 | def point_to_line_distance(p0: Tuple[float, float], p1: Tuple[float, float], p2: Tuple[float, float]): 58 | """ 59 | Euclidean distance between p0 to p1p2 60 | """ 61 | # A = y2 - y1 62 | A = p2[1] - p1[1] 63 | # B = x1 - x2 64 | B = p1[0] - p2[0] 65 | # C = x1(y1-y2) + y1(x2-x1) 66 | C = p1[0] * (p1[1] - p2[1]) + p1[1] * (p2[0] - p1[0]) 67 | 68 | return np.abs(A * p0[0] + B * p0[1] + C) / (np.sqrt(A ** 2 + B ** 2)) 69 | 70 | 71 | def kl_divergence(prob1, prob2): 72 | prob1 = np.asarray(prob1) 73 | prob2 = np.asarray(prob2) 74 | 75 | kl = np.log((prob1 + 1e-8) / (prob2 + 1e-8)) * prob1 76 | 77 | return np.sum(kl) 78 | 79 | 80 | def jensen_shannon_distance(prob1, prob2): 81 | prob1 = np.asarray(prob1) 82 | prob2 = np.asarray(prob2) 83 | 84 | avg_prob = (prob1 + prob2) / 2 85 | 86 | return 0.5 * kl_divergence(prob1, avg_prob) + 0.5 * kl_divergence(prob2, avg_prob) 87 | 88 | 89 | def lonlat2meters(lon: float, lat: float): 90 | """ 91 | return 2 `float` x and y 92 | """ 93 | semi_major_axis = 6378137.0 94 | east = lon * 0.017453292519943295 95 | north = lat * 0.017453292519943295 96 | t = math.sin(north) 97 | return semi_major_axis * east, 3189068.5 * math.log((1 + t) / (1 - t)) 98 | 99 | 100 | def meters2lonlat(x: float, y: float): 101 | """ 102 | return 2 `float` lon and lat 103 | """ 104 | semi_major_axis = 6378137.0 105 | lon = x / semi_major_axis / 0.017453292519943295 106 | t = math.exp(y / 3189068.5) 107 | lat = math.asin((t - 1) / (t + 1)) / 0.017453292519943295 108 | return lon, lat 109 | 110 | 111 | def get_length_buckets(max_len): 112 | step = 1 113 | while max_len // step > 5: 114 | step = step + 1 115 | 116 | start = 0 117 | end = start + step 118 | 119 | buckets = [] 120 | while start < max_len: 121 | buckets.append((start, end)) 122 | start += step 123 | end = start + step 124 | 125 | return buckets 126 | 127 | def grid_num(n, l, eps, fre, lam=2.5): 128 | E = math.exp(eps/(fre * l)) 129 | return lam * math.pow(n*l*math.pow(E-1,2)/E, 0.25) 130 | -------------------------------------------------------------------------------- /LDPTrace/code/map_func.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from grid import GridMap, Grid 3 | 4 | 5 | # =========================== MAP FUNCTIONS ========================== # 6 | def grid_index_map_func(g: Grid, grid_map: GridMap): 7 | """ 8 | Map a grid to its index: (i, j) => int 9 | return: i*|column|+j 10 | """ 11 | i, j = g.index 12 | return i * len(grid_map.map[0]) + j 13 | 14 | 15 | def pair_grid_index_map_func(grid_pair: Tuple[Grid, Grid], grid_map: GridMap): 16 | """ 17 | Map a pair of grid to index: (g1, g2) => (i1, i2) => int 18 | Firstly map (g1, g2) to a matrix of [N x N], where N is 19 | the total number of grids 20 | return: i1 * N + i2 21 | """ 22 | g1, g2 = grid_pair 23 | index1 = grid_index_map_func(g1, grid_map) 24 | index2 = grid_index_map_func(g2, grid_map) 25 | 26 | return index1 * grid_map.size + index2 27 | 28 | 29 | def adjacent_pair_grid_map_func(grid_pair: Tuple[Grid, Grid], grid_map: GridMap): 30 | """ 31 | Map a pair of adjacent grid to index: (g1, g2) => (j1, j2) => int 32 | Firstly map (g1, g2) to a matrix of [N x 8], where N is 33 | the total number of grids 34 | |0|1|2| 35 | |3|-|4| 36 | |5|6|7| 37 | return: j1 * 8 + j2 38 | """ 39 | g1, g2 = grid_pair 40 | if not grid_map.is_adjacent_grids(g1, g2): 41 | return -1 42 | 43 | index1 = grid_index_map_func(g1, grid_map) 44 | i1, j1 = g1.index 45 | i2, j2 = g2.index 46 | 47 | if j2 == j1 + 1: 48 | index2 = i2 - i1 + 1 49 | elif j2 == j1: 50 | index2 = 3 if i2 == i1 - 1 else 4 51 | else: 52 | index2 = i2 - i1 + 6 53 | 54 | return index1 * 8 + index2 55 | 56 | 57 | def grid_index_inv_func(index: int, grid_map: GridMap): 58 | """ 59 | Inverse function of grid_index_map_func 60 | """ 61 | i = index // len(grid_map.map[0]) 62 | j = index % len(grid_map.map[0]) 63 | return grid_map.map[i][j] 64 | 65 | 66 | def pair_grid_index_inv_func(index: int, grid_map: GridMap): 67 | """ 68 | Inverse function of pair_grid_index_map_func 69 | """ 70 | index1 = index // grid_map.size 71 | index2 = index % grid_map.size 72 | return grid_index_inv_func(index1, grid_map), grid_index_inv_func(index2, grid_map) 73 | 74 | 75 | def adjacent_pair_grid_inv_func(index: int, grid_map: GridMap): 76 | """ 77 | Inverse function of adjacent_pair_grid_map_func 78 | """ 79 | index1 = index // 8 80 | g1 = grid_index_inv_func(index1, grid_map) 81 | i1, j1 = g1.index 82 | index2 = index % 8 83 | 84 | if 0 <= index2 <= 2: 85 | j2 = j1 + 1 86 | i2 = index2 + i1 - 1 87 | elif 3 <= index2 <= 4: 88 | j2 = j1 89 | i2 = i1 - 1 if index2 == 3 else i1 + 1 90 | else: 91 | j2 = j1 - 1 92 | i2 = index2 + i1 - 6 93 | 94 | # Out of bound 95 | if not (0 <= i2 < len(grid_map.map) and 0 <= j2 < len(grid_map.map[0])): 96 | return g1, None 97 | g2 = grid_map.map[i2][j2] 98 | return g1, g2 99 | 100 | 101 | def pair_grid_no_dir_map_func(grid_pair: Tuple[Grid, Grid], grid_map: GridMap): 102 | """ 103 | No direction: A->B == B->A. O(n^4/2) 104 | """ 105 | g1, g2 = grid_pair 106 | indexes = (grid_index_map_func(g1, grid_map), grid_index_map_func(g2, grid_map)) 107 | index1 = min(indexes) 108 | index2 = max(indexes) 109 | 110 | # If the row is at lower half of the matrix, need to move it to the upper half 111 | if index1 >= grid_map.size - grid_map.size // 2: 112 | # Central symmetry 113 | index1 = grid_map.size - index1 114 | index2 = grid_map.size - index2 - 1 115 | 116 | return index1 * grid_map.size + index2 117 | 118 | 119 | def pair_grid_no_dir_inv_func(index: int, grid_map: GridMap): 120 | index1 = index // grid_map.size 121 | index2 = index % grid_map.size 122 | 123 | if index1 > index2: 124 | index1 = grid_map.size - index1 125 | index2 = grid_map.size - index2 - 1 126 | 127 | return grid_index_inv_func(index1, grid_map), grid_index_inv_func(index2, grid_map) 128 | 129 | 130 | def trip_length_map_func(trip_len: Tuple[Grid, Grid, int], grid_map: GridMap, buckets): 131 | """ 132 | ((start, end), length) -> index) 133 | """ 134 | trip = (trip_len[0], trip_len[1]) 135 | length = trip_len[2] 136 | trip_index = pair_grid_no_dir_map_func(trip, grid_map) 137 | 138 | length_index = -1 139 | for bucket_id, (start, end) in enumerate(buckets): 140 | if length > start: 141 | if length < 0: 142 | # infinity 143 | length_index = bucket_id 144 | break 145 | if end > 0 and length <= end: 146 | length_index = bucket_id 147 | break 148 | 149 | return trip_index * len(buckets) + length_index 150 | 151 | 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LDPTrace 2 | 3 |
4 | 5 |
6 | 7 | This is our Python implementation for the paper: 8 | 9 | > Yuntao Du, Yujia Hu, Zhikun Zhang, Ziquan Fang, Lu Chen, Baihua Zheng and Yunjun Gao (2023). LDPTrace: Locally Differentially Private Trajectory Synthesis. Paper in [arXiv](https://arxiv.org/abs/2302.06180) or [PVLDB](https://www.vldb.org/pvldb/vol16/p1897-gao.pdf). In VLDB'23, Vancouver, Canada, August 28 to September 1, 2023. 10 | 11 | See our [blog](https://research.zealscott.com/blog/2023/04/22/LDPTrace/) for the introduction to this work. 12 | 13 | ## Environment Requirements 14 | 15 | - Ubuntu OS 16 | - Python >= 3.8 (Anaconda3 is recommended) 17 | - numpy == 1.21.4 18 | 19 | ## Dataset 20 | 21 | ### Dataset Statistics 22 | 23 | We conduct our experiments on four benchmark trajectory datasets. The overall statistics are listed below: 24 | 25 | | Dataset | Size | Average Length | Sampling Interval | 26 | | --------- | --------- | -------------- | ----------------- | 27 | | Oldenburg | 500,000 | 69.75 | 15.6 sec | 28 | | Porto | 361,591 | 34.13 | 15 sec | 29 | | Hangzhou | 348,144 | 125.02 | 5 sec | 30 | | Campus | 1,000,000 | 35.98 | 25 sec | 31 | 32 | Oldenburg dataset is provided for testing. 33 | 34 | ### Oldenburg 35 | 36 | * Oldenburg is a synthetic dataset simulated by Brinkhoff's network-based moving objects generator. It is based on the map of Oldenburg city, Germany. 37 | 38 | * For Oldenburg dataset, please refer to http://iapg.jade-hs.de/personen/brinkhoff/generator/ to generate the synthesized dataset. The setting parameters we used are as follows: 39 | * obj./time 0 0 40 | * maximum time: 1000 41 | * classes: 1 0 42 | * max. speed div: 50 43 | 44 | * After obtaining the raw dataset, it needs to be transformed to the standard input format: 45 | 46 | ``` 47 | #0: 48 | >0: x_0,y_0; x_1,y_1;... 49 | #1: 50 | >0: x_0,y_0; x_1,y_1;... 51 | #2: 52 | >0:... 53 | ... 54 | ``` 55 | '>0' is a fixed string denoting the start of a trajectory. 56 | 57 | Different format can also work if the type of variable `db` in the code is guaranteed to be `List[Tuple[float, float]]`. 58 | * Locate the dataset into `./LDPTrace/data/` dictionary. 59 | 60 | 61 | ## Reproducibility & Run 62 | 63 | Please make sure the data file is in ``./LDPTrace/data/`` dictionary 64 | 65 | Here's an example of running LDPTrace: 66 | 67 | ```python 68 | python main.py --dataset oldenburg --grid_num 6 --max_len 0.9 --epsilon 1.0 --re_syn --multiprocessing 69 | ``` 70 | 71 | LDPTrace will save the synthesized database in ``./LDPTrace/data/DATASET_NAME/`` and output the evaluation metrics. 72 | 73 | ## Configurations 74 | 75 | The running parameters include: 76 | 77 | + --dataset: 78 | + 'oldenburg': for Oldenburg dataset 79 | + 'porto': for Porto dataset 80 | + 'campus': for Campus dataset 81 | + --epsilon: privacy budget 82 | + --grid_num: grid granularity `N`, the spatial map will be decomposed into `N x N` grids. Based on the theoretical analysis in our paper, we recommend `N=6` for Oldenburg, Porto and Campus dataset. 83 | + --max_len: quantile of estimated max length, the default setting is 0.9 84 | + --size_factor: reciprocal of query size `r` (i.e., `1/r`), the default setting is 9 85 | + --query_num: the number of range queries, LDPTrace will output the average query error. The default setting is 200 86 | + --re_syn: whether to re-synthesize the database. If this parameter is not set, LDPTrace will try to read the saved databased that is synthesized before. 87 | + --multiprocessing: whether to use multiprocessing in experiments to improve efficiency. 88 | 89 | ## Acknowledgement 90 | 91 | Any scientific publications that use our datasets/codes or mention our work should cite the following paper as the reference: 92 | 93 | ``` 94 | @inproceedings{LDPTrace, 95 | author = {Yuntao Du and 96 | Yujia Hu and 97 | Zhikun Zhang and 98 | Ziquan Fang and 99 | Lu Chen and 100 | Baihua Zheng and 101 | Yunjun Gao}, 102 | title = {{LDPTrace}: Locally Differentially Private Trajectory Synthesis}, 103 | booktitle = {{PVLDB}}, 104 | pages = {1897--1909}, 105 | year = {2023} 106 | } 107 | ``` 108 | 109 | 110 | Nobody guarantees the correctness of the data, its suitability for any particular purpose, or the validity of results based on the use of the data set. The data set may be used for any research purposes under the following conditions: 111 | 112 | * The user must acknowledge the use of the data set in publications resulting from the use of the data set. 113 | * The user may not redistribute the data without separate permission. 114 | * The user may not try to deanonymise the data. 115 | * The user may not use this information for any commercial or revenue-bearing purposes without first obtaining permission from us. 116 | -------------------------------------------------------------------------------- /LDPTrace/code/ldp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import math 4 | 5 | 6 | class LDPServer: 7 | def __init__(self, epsilon, d, map_func=None): 8 | """ 9 | General class of server side 10 | :param epsilon: privacy budget 11 | :param d: domain size 12 | :param map_func: index mapping function 13 | """ 14 | self.epsilon = epsilon 15 | self.d = d 16 | self.map_func = lambda x: x if map_func is None else map_func 17 | 18 | # Sum of updated data 19 | self.aggregated_data = np.zeros(self.d) 20 | # Adjusted from aggregated data 21 | self.adjusted_data = np.zeros(self.d) 22 | 23 | # Number of users 24 | self.n = 0 25 | 26 | def aggregate(self, data): 27 | """ 28 | Aggregate users' updated data items 29 | :param data: real data item updated by user 30 | """ 31 | raise NotImplementedError('Aggregation on sever not implemented!') 32 | 33 | def adjust(self): 34 | """ 35 | Adjust aggregated data to get unbiased estimation 36 | """ 37 | raise NotImplementedError('Adjust on sever not implemented!') 38 | 39 | def initialize(self, epsilon, d, map_func=None): 40 | self.epsilon = epsilon 41 | self.d = d 42 | #self.map_func = lambda x: x if (map_func is None) else map_func 43 | self.map_func = map_func 44 | 45 | # Sum of updated data 46 | self.aggregated_data = np.zeros(self.d) 47 | # Adjusted from aggregated data 48 | self.adjusted_data = np.zeros(self.d) 49 | 50 | # Number of users 51 | self.n = 0 52 | 53 | 54 | class LDPClient: 55 | def __init__(self, epsilon, d, map_func=None): 56 | """ 57 | General class of client side 58 | :param epsilon: privacy budget 59 | :param d: domain size 60 | :param map_func: index mapping function 61 | """ 62 | self.epsilon = epsilon 63 | self.d = d 64 | #self.map_func = lambda x: x if (map_func is None) else map_func 65 | self.map_func = map_func 66 | 67 | def _perturb(self, index): 68 | """ 69 | Internal method for perturbing real data 70 | :param index: index of real data item 71 | """ 72 | raise NotImplementedError('Perturb on client not implemented!') 73 | 74 | def privatise(self, data): 75 | """ 76 | Public method for privatising real data 77 | :param data: data item 78 | """ 79 | raise NotImplementedError('Privatise on sever not implemented!') 80 | 81 | def initialize(self, epsilon, d, map_func=None): 82 | self.epsilon = epsilon 83 | self.d = d 84 | self.map_func = lambda x: x if map_func is None else map_func 85 | 86 | 87 | class OUEServer(LDPServer): 88 | def __init__(self, epsilon, d, map_func=None): 89 | """ 90 | Optimal Unary Encoding of server side 91 | """ 92 | super(OUEServer, self).__init__(epsilon, d, map_func) 93 | 94 | # Probability of 1=>1 95 | self.p = 0.5 96 | # Probability of 0=>1 97 | self.q = 1 / (math.pow(math.e, self.epsilon) + 1) 98 | # self.q = 0 99 | 100 | def aggregate(self, data): 101 | self.aggregated_data += data 102 | self.n += 1 103 | 104 | def adjust(self) -> np.ndarray: 105 | # Real data, don't adjust 106 | if self.epsilon < 0: 107 | self.adjusted_data = self.aggregated_data 108 | return self.adjusted_data 109 | 110 | self.adjusted_data = (self.aggregated_data - self.n * self.q) / (self.p - self.q) 111 | return self.adjusted_data 112 | 113 | def estimate(self, data) -> float: 114 | """ 115 | Estimate frequency of a specific data item 116 | :param data: data item 117 | :return: estimated frequency 118 | """ 119 | index = self.map_func(data) 120 | return self.adjusted_data[index] 121 | 122 | 123 | class OUEClient(LDPClient): 124 | def __init__(self, epsilon, d, map_func=None): 125 | """ 126 | Optimal Unary Encoding of server side 127 | """ 128 | super(OUEClient, self).__init__(epsilon, d, map_func) 129 | 130 | # Probability of 1=>1 131 | self.p = 0.5 132 | # Probability of 1=>1 133 | self.q = 1 / (math.pow(math.e, self.epsilon) + 1) 134 | # self.q = 0 135 | 136 | def _perturb(self, index): 137 | # Remember that p is the probability for 1=>1; 138 | # And q is the probability for 0=>1 139 | 140 | # Update real data 141 | if self.epsilon < 0: 142 | perturbed_data = np.zeros(self.d) 143 | perturbed_data[index] = 1 144 | return perturbed_data 145 | 146 | # If y=0, Prob(y'=1)=q, Prob(y'=0)=1-q 147 | perturbed_data = np.random.choice([1, 0], size=self.d, p=[self.q, 1-self.q]) 148 | 149 | # If y=1, Prob(y'=0)=p 150 | if random.random() < self.p: 151 | perturbed_data[index] = 1 152 | else: 153 | perturbed_data[index] = 0 154 | 155 | return perturbed_data 156 | 157 | def privatise(self, data): 158 | index = self.map_func(data) 159 | return self._perturb(index) 160 | -------------------------------------------------------------------------------- /LDPTrace/code/grid.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List 2 | import random 3 | 4 | 5 | class Grid: 6 | def __init__(self, 7 | min_x: float, 8 | min_y: float, 9 | step_x: float, 10 | step_y: float, 11 | index: Tuple[int, int]): 12 | """ 13 | Attributes: 14 | min_x, min_y, max_x, max_y: boundary of current grid 15 | index = (i, j): grid index in the matrix 16 | """ 17 | self.min_x = min_x 18 | self.min_y = min_y 19 | self.max_x = min_x + step_x 20 | self.max_y = min_y + step_y 21 | self.index = index 22 | 23 | def in_cell(self, p: Tuple[float, float]): 24 | if self.min_x <= p[0] <= self.max_x and self.min_y <= p[1] <= self.max_y: 25 | return True 26 | else: 27 | return False 28 | 29 | def sample_point(self): 30 | x = self.min_x + random.random() * (self.max_x - self.min_x) 31 | y = self.min_y + random.random() * (self.max_y - self.min_y) 32 | 33 | return x, y 34 | 35 | def equal(self, other): 36 | return self.index == other.index 37 | 38 | 39 | class GridMap: 40 | def __init__(self, 41 | n: int, 42 | min_x: float, 43 | min_y: float, 44 | max_x: float, 45 | max_y: float): 46 | """ 47 | Geographical map after griding 48 | Parameters: 49 | n: cell count 50 | min_x, min_y, max_x, max_y: boundary of the map 51 | """ 52 | min_x -= 1e-6 53 | min_y -= 1e-6 54 | max_x += 1e-6 55 | max_y += 1e-6 56 | self.min_x = min_x 57 | self.min_y = min_y 58 | self.max_x = max_x 59 | self.max_y = max_y 60 | step_x = (max_x - min_x) / n 61 | step_y = (max_y - min_y) / n 62 | self.step_x = step_x 63 | self.step_y = step_y 64 | 65 | # Spatial map, n x n matrix of grids 66 | self.map: List[List[Grid]] = list() 67 | for i in range(n): 68 | self.map.append(list()) 69 | for j in range(n): 70 | self.map[i].append(Grid(min_x + step_x * i, min_y + step_y * j, step_x, step_y, (i, j))) 71 | 72 | def find_shortest_path(self, start: Grid, end: Grid): 73 | start_i, start_j = start.index 74 | end_i, end_j = end.index 75 | 76 | shortest_path = list() 77 | current_i, current_j = start_i, start_j 78 | 79 | while True: 80 | # NOTICE: shortest path doesn't include the end grid 81 | 82 | shortest_path.append(self.map[current_i][current_j]) 83 | if end_i > current_i: 84 | current_i += 1 85 | elif end_i < current_i: 86 | current_i -= 1 87 | if end_j > current_j: 88 | current_j += 1 89 | elif end_j < current_j: 90 | current_j -= 1 91 | 92 | if end_i == current_i and end_j == current_j: 93 | break 94 | 95 | return shortest_path 96 | 97 | def get_adjacent(self, g: Grid) -> List[Tuple[int, int]]: 98 | """ 99 | Get 8 adjacent grids of g 100 | """ 101 | i, j = g.index 102 | adjacent_index = [(i - 1, j - 1), (i - 1, j), (i - 1, j + 1), (i, j + 1), 103 | (i, j - 1), (i + 1, j + 1), (i + 1, j), (i + 1, j - 1)] 104 | adjacent_index_new = [] 105 | # Remove grids out of bound 106 | for index in adjacent_index: 107 | if len(self.map) > index[0] >= 0 and len(self.map[0]) > index[1] >= 0: 108 | adjacent_index_new.append(index) 109 | return adjacent_index_new 110 | 111 | def is_adjacent_grids(self, g1: Grid, g2: Grid): 112 | return True if g2.index in self.get_adjacent(g1) else False 113 | 114 | def bounding_box(self, g1: Grid, g2: Grid): 115 | """ 116 | Return all grids in the rectangular bounding box EXCEPT g1 and g2 117 | """ 118 | start_i = min(g1.index[0], g2.index[0]) 119 | start_j = min(g1.index[1], g2.index[1]) 120 | end_i = max(g1.index[0], g2.index[0]) 121 | end_j = max(g1.index[1], g2.index[1]) 122 | 123 | box = [] 124 | for i in range(start_i, end_i + 1): 125 | for j in range(start_j, end_j + 1): 126 | g = self.map[i][j] 127 | if not (g.index == g1.index or g.index == g2.index): 128 | box.append(g) 129 | 130 | return box 131 | 132 | def get_list_map(self): 133 | list_map = [] 134 | for li in self.map: 135 | list_map.extend(li) 136 | return list_map 137 | 138 | @property 139 | def size(self): 140 | return len(self.map) * len(self.map[0]) 141 | 142 | 143 | def is_adjacent_grids(g1: Grid, g2: Grid): 144 | """ 145 | Doesn't consider the boundary of the map. 146 | Only use this function when there's no global grid_map. 147 | """ 148 | i1, j1 = g1.index 149 | i2, j2 = g2.index 150 | # East, Northeast, Southeast 151 | if i2 == i1 + 1 and (j2 == j1 or j2 == j1 + 1 or j2 == j1 - 1): 152 | return True 153 | # West, Northwest, Southwest 154 | if i2 == i1 - 1 and (j2 == j1 or j2 == j1 + 1 or j2 == j1 - 1): 155 | return True 156 | # North, South 157 | if i2 == i1 and (j2 == j1 + 1 or j2 == j1 - 1): 158 | return True 159 | return False 160 | 161 | -------------------------------------------------------------------------------- /LDPTrace/code/experiment.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List, Tuple 3 | import utils 4 | import numpy as np 5 | from grid import Grid, GridMap 6 | import trajectory 7 | import map_func 8 | import multiprocessing 9 | import math 10 | 11 | CORES = multiprocessing.cpu_count() // 2 12 | 13 | 14 | class Query: 15 | def __init__(self): 16 | pass 17 | 18 | def point_query(self, db): 19 | raise NotImplementedError 20 | 21 | class SquareQuery(Query): 22 | def __init__(self, 23 | min_x: float, 24 | min_y: float, 25 | max_x: float, 26 | max_y: float, 27 | size_factor=9.0): 28 | super().__init__() 29 | # Randomly select center 30 | center_x = random.random() * (max_x - min_x) + min_x 31 | center_y = random.random() * (max_y - min_y) + min_y 32 | self.center = (center_x, center_y) 33 | 34 | self.edge = math.sqrt((max_x-min_x)*(max_y-min_y)/size_factor) 35 | self.left_x = center_x - self.edge / 2 36 | self.up_y = center_y + self.edge / 2 37 | self.right_x = center_x + self.edge / 2 38 | self.down_y = center_y - self.edge / 2 39 | 40 | def in_square(self, point: Tuple[float, float]): 41 | return self.left_x <= point[0] <= self.right_x and self.down_y <= point[1] <= self.up_y 42 | 43 | 44 | def point_query(self, db: List[List[Tuple[float, float]]]): 45 | count = 0 46 | for t in db: 47 | for p in t: 48 | if self.in_square(p): 49 | count += 1 50 | 51 | return count 52 | 53 | 54 | class Pattern: 55 | def __init__(self, grids: List[Grid]): 56 | self.grids = grids 57 | 58 | @property 59 | def size(self): 60 | return len(self.grids) 61 | 62 | def __eq__(self, other): 63 | if other is None: 64 | return False 65 | if not type(other) == Pattern: 66 | return False 67 | if not other.size == self.size: 68 | return False 69 | 70 | for i in range(self.size): 71 | if not self.grids[i].index == other.grids[i].index: 72 | return False 73 | 74 | return True 75 | 76 | def __hash__(self): 77 | prime = 31 78 | result = 1 79 | for g in self.grids: 80 | result = result * prime + g.__hash__() 81 | 82 | return result 83 | 84 | 85 | def calculate_point_query(orig_db, 86 | syn_db, 87 | queries: List[Query], 88 | sanity_bound=0.01): 89 | actual_ans = list() 90 | syn_ans = list() 91 | 92 | total_points = np.sum([len(t) for t in orig_db]) 93 | 94 | for q in queries: 95 | actual_ans.append(q.point_query(orig_db)) 96 | syn_ans.append(q.point_query(syn_db)) 97 | 98 | actual_ans = np.asarray(actual_ans) 99 | syn_ans = np.asarray(syn_ans) 100 | 101 | # Error = |actual-syn| / max{actual, 1% * len(db)} 102 | numerator = np.abs(actual_ans - syn_ans) 103 | # numerator = syn_ans - actual_ans 104 | denominator = np.asarray([max(actual_ans[i], total_points * sanity_bound) for i in range(len(actual_ans))]) 105 | # denominator = actual_ans 106 | 107 | error = numerator / denominator 108 | 109 | return np.mean(error) 110 | 111 | 112 | def calculate_coverage_kendall_tau(orig_db: List[List[Grid]], 113 | syn_db: List[List[Grid]], 114 | grid_map: GridMap): 115 | actual_counts = np.zeros(grid_map.size) 116 | syn_counts = np.zeros(grid_map.size) 117 | 118 | # For each grid, find how many trajectories pass through it 119 | for i in range(len(grid_map.map)): 120 | for j in range(len(grid_map.map[0])): 121 | g = grid_map.map[i][j] 122 | index = map_func.grid_index_map_func(g, grid_map) 123 | for t in orig_db: 124 | actual_counts[index] += trajectory.pass_through(t, g) 125 | for t in syn_db: 126 | syn_counts[index] += trajectory.pass_through(t, g) 127 | 128 | concordant_pairs = 0 129 | reversed_pairs = 0 130 | for i in range(grid_map.size): 131 | for j in range(i + 1, grid_map.size): 132 | if actual_counts[i] > actual_counts[j]: 133 | if syn_counts[i] > syn_counts[j]: 134 | concordant_pairs += 1 135 | else: 136 | reversed_pairs += 1 137 | if actual_counts[i] < actual_counts[j]: 138 | if syn_counts[i] < syn_counts[j]: 139 | concordant_pairs += 1 140 | else: 141 | reversed_pairs += 1 142 | 143 | denominator = grid_map.size * (grid_map.size - 1) / 2 144 | return (concordant_pairs - reversed_pairs) / denominator 145 | 146 | 147 | def calculate_diameter_error(orig_db: List[List[Tuple[float, float]]], 148 | syn_db: List[List[Tuple[float, float]]], 149 | bucket_num=20, multi=False): 150 | if multi: 151 | pool = multiprocessing.Pool(CORES) 152 | orig_diameter = pool.map(trajectory.get_diameter, orig_db) 153 | pool.close() 154 | pool = multiprocessing.Pool(CORES) 155 | syn_diameter = pool.map(trajectory.get_diameter, syn_db) 156 | pool.close() 157 | else: 158 | orig_diameter = [trajectory.get_diameter(t) for t in orig_db] 159 | syn_diameter = [trajectory.get_diameter(t) for t in syn_db] 160 | 161 | bucket_size = (max(orig_diameter) - min(orig_diameter)) / bucket_num 162 | 163 | orig_count = np.zeros(bucket_num) 164 | syn_count = np.zeros(bucket_num) 165 | for i in range(bucket_num): 166 | start = i * bucket_size 167 | end = start + bucket_size 168 | 169 | for d in orig_diameter: 170 | if start <= d <= end: 171 | orig_count[i] += 1 172 | for d in syn_diameter: 173 | if start <= d <= end: 174 | syn_count[i] += 1 175 | 176 | # Normalization 177 | orig_count /= np.sum(orig_count) 178 | syn_count /= np.sum(syn_count) 179 | 180 | return utils.jensen_shannon_distance(orig_count, syn_count) 181 | 182 | 183 | def calculate_length_error(orig_db: List[List[Tuple[float, float]]], 184 | syn_db: List[List[Tuple[float, float]]], 185 | bucket_num=20): 186 | orig_length = [trajectory.get_travel_distance(t) for t in orig_db] 187 | syn_length = [trajectory.get_travel_distance(t) for t in syn_db] 188 | 189 | bucket_size = (max(orig_length) - min(orig_length)) / bucket_num 190 | 191 | orig_count = np.zeros(bucket_num) 192 | syn_count = np.zeros(bucket_num) 193 | for i in range(bucket_num): 194 | start = i * bucket_size 195 | end = start + bucket_size 196 | 197 | for d in orig_length: 198 | if start <= d <= end: 199 | orig_count[i] += 1 200 | for d in syn_length: 201 | if start <= d <= end: 202 | syn_count[i] += 1 203 | 204 | # Normalization 205 | orig_count /= np.sum(orig_count) 206 | syn_count /= np.sum(syn_count) 207 | 208 | return utils.jensen_shannon_distance(orig_count, syn_count) 209 | 210 | 211 | def mine_patterns(db: List[List[Grid]], min_size=2, max_size=8): 212 | """ 213 | Find all patterns of size between min_size and max_size 214 | :return: Dict[Pattern, int]: count of each pattern 215 | """ 216 | pattern_dict = {} 217 | for curr_size in range(min_size, max_size + 1): 218 | for t in db: 219 | for i in range(0, len(t) - curr_size + 1): 220 | p = Pattern(t[i: i + curr_size]) 221 | try: 222 | pattern_dict[p] += 1 223 | except KeyError: 224 | pattern_dict[p] = 1 225 | 226 | return pattern_dict 227 | 228 | 229 | def calculate_pattern_f1_error(orig_pattern, 230 | syn_pattern, 231 | k=100): 232 | sorted_orig = sorted(orig_pattern.items(), key=lambda x: x[1], reverse=True) 233 | sorted_syn = sorted(syn_pattern.items(), key=lambda x: x[1], reverse=True) 234 | 235 | orig_top_k = [x[0] for x in sorted_orig][:k] 236 | syn_top_k = [x[0] for x in sorted_syn][:k] 237 | 238 | count = 0 239 | for p1 in syn_top_k: 240 | if p1 in orig_top_k: 241 | count += 1 242 | 243 | precision = count / k 244 | recall = count / k 245 | 246 | return 2 * precision * recall / (precision + recall) 247 | 248 | 249 | def calculate_hotspot_ndcg(orig_density, syn_density, k=5): 250 | sorted_orig = sorted(enumerate(orig_density), key=lambda x: x[1], reverse=True) 251 | sorted_syn = sorted(enumerate(syn_density), key=lambda x: x[1], reverse=True) 252 | 253 | orig_top_k = [x[0] for x in sorted_orig][:k] 254 | syn_top_k = [x[0] for x in sorted_syn][:k] 255 | 256 | r = np.zeros(k) 257 | 258 | for i,p1 in enumerate(syn_top_k): 259 | if p1 in orig_top_k: 260 | r[i] = 1 / (orig_top_k.index(p1) + 1) 261 | 262 | idcg = np.sum( (np.ones(k)/np.arange(1, k+1)) * 1./np.log2(np.arange(2, k + 2))) 263 | dcg = np.sum(r * 1./np.log2(np.arange(2, k + 2))) 264 | 265 | return dcg / idcg if idcg else 0 266 | 267 | def calculate_pattern_support(orig_pattern, syn_pattern, k=100): 268 | sorted_orig = sorted(orig_pattern.items(), key=lambda x: x[1], reverse=True) 269 | orig_top_k = [x[0] for x in sorted_orig][:k] 270 | 271 | error = 0 272 | for i in range(len(orig_top_k)): 273 | p: Pattern = orig_top_k[i] 274 | orig_support = orig_pattern[p] 275 | try: 276 | syn_support = syn_pattern[p] 277 | except KeyError: 278 | syn_support = 0 279 | error += np.abs(orig_support-syn_support)/orig_support 280 | 281 | return error / k 282 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LDPTrace/code/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Dict 2 | 3 | import numpy as np 4 | 5 | import trajectory 6 | import ldp 7 | from grid import GridMap, Grid 8 | import map_func 9 | import utils 10 | import experiment 11 | from experiment import SquareQuery 12 | from parse import args 13 | import dataset 14 | import pickle 15 | import random 16 | import lzma 17 | 18 | from logger.logger import ConfigParser 19 | import multiprocessing 20 | np.random.seed(2022) 21 | random.seed(2022) 22 | CORES = multiprocessing.cpu_count() // 2 23 | 24 | config = ConfigParser(name='LDPTrace', save_dir='./') 25 | logger = config.get_logger(config.exper_name) 26 | 27 | logger.info(f'Parameters: {args}') 28 | 29 | 30 | # ======================= CONVERTING FUNCTIONS ======================= # 31 | 32 | 33 | def convert_raw_to_grid(raw_trajectories: List[List[Tuple[float, float]]], 34 | interp=True): 35 | # Convert raw trajectories to grid trajectories 36 | grid_db = [trajectory.trajectory_point2grid(t, grid_map, interp) 37 | for t in raw_trajectories] 38 | return grid_db 39 | 40 | 41 | def convert_grid_to_raw(grid_db: List[List[Grid]]): 42 | raw_trajectories = [trajectory.trajectory_grid2points(g_t) for g_t in grid_db] 43 | 44 | return raw_trajectories 45 | 46 | 47 | # =============================== END ================================ # 48 | 49 | 50 | # ======================= LDP UPDATE FUNCTIONS ======================= # 51 | 52 | def estimate_max_length(grid_db: List[List[Grid]], epsilon): 53 | """ 54 | Return 90% quantile of lengths 55 | """ 56 | ldp_server = ldp.OUEServer(epsilon, grid_map.size, lambda x: x - 1) 57 | ldp_client = ldp.OUEClient(epsilon, grid_map.size, lambda x: x - 1) 58 | 59 | for t in grid_db: 60 | if len(t) > grid_map.size: 61 | binary_vec = ldp_client.privatise(grid_map.size) 62 | else: 63 | binary_vec = ldp_client.privatise(len(t)) 64 | ldp_server.aggregate(binary_vec) 65 | 66 | ldp_server.adjust() 67 | sum_count = np.sum(ldp_server.adjusted_data) 68 | count = 0 69 | quantile = len(ldp_server.adjusted_data) 70 | for i in range(len(ldp_server.adjusted_data)): 71 | count += ldp_server.adjusted_data[i] 72 | if count >= args.max_len * sum_count: 73 | quantile = i + 1 74 | break 75 | 76 | return ldp_server, quantile 77 | 78 | 79 | def update_markov_prob(grid_db: List[List[Grid]], epsilon, max_len=36): 80 | ldp_server = ldp.OUEServer(epsilon / (max_len+1), grid_map.size * 8, 81 | lambda x: x) 82 | ldp_client = ldp.OUEClient(epsilon / (max_len+1), grid_map.size * 8, 83 | lambda x: x) 84 | start_server = ldp.OUEServer(epsilon / (max_len+1), grid_map.size, 85 | lambda x: map_func.grid_index_map_func(x, grid_map)) 86 | start_client = ldp.OUEClient(epsilon / (max_len+1), grid_map.size, 87 | lambda x: map_func.grid_index_map_func(x, grid_map)) 88 | end_server = ldp.OUEServer(epsilon / (max_len+1), grid_map.size, 89 | lambda x: map_func.grid_index_map_func(x, grid_map)) 90 | end_client = ldp.OUEClient(epsilon / (max_len+1), grid_map.size, 91 | lambda x: map_func.grid_index_map_func(x, grid_map)) 92 | 93 | for t in grid_db: 94 | length = min(len(t), max_len) 95 | # Start point 96 | start = t[0] 97 | binary_vec = start_client.privatise(start) 98 | start_server.aggregate(binary_vec) 99 | for i in range(length - 1): 100 | curr_grid = t[i] 101 | next_grid = t[i + 1] 102 | if grid_map.is_adjacent_grids(curr_grid, next_grid): 103 | map_id = map_func.adjacent_pair_grid_map_func((curr_grid, next_grid), grid_map) 104 | binary_vec = ldp_client.privatise(map_id) 105 | ldp_server.aggregate(binary_vec) 106 | else: 107 | logger.info('Trajectory has non-adjacent moves, use non-adjacent map function!') 108 | end = t[length - 1] 109 | binary_vec = end_client.privatise(end) 110 | end_server.aggregate(binary_vec) 111 | 112 | ldp_server.adjust() 113 | start_server.adjust() 114 | end_server.adjust() 115 | return ldp_server, start_server, end_server 116 | 117 | 118 | # =============================== END ================================ # 119 | 120 | 121 | # ======================== AGGREGATE FUNCTIONS ======================= # 122 | 123 | def generate_markov_matrix(markov_vec: np.ndarray, start_vec, end_vec): 124 | """ 125 | Convert extracted Markov counts to probability matrix. 126 | :param markov_vec: [1 x 8n^2] numpy array 127 | :param start_vec: [1 x n^2] numpy array 128 | :param end_vec: [1 x n^2] numpy array 129 | :return: [n^2+1 x n^2+1] Markov probability matrix 130 | n^2+1th row: start -> other 131 | n^2+1th column: other -> end 132 | """ 133 | n = grid_map.size + 1 # with virtual start and end point 134 | markov_mat = np.zeros((n, n), dtype=float) 135 | for k in range(8 * grid_map.size): 136 | if markov_vec[k] <= 0: 137 | continue 138 | 139 | # Find index in matrix (convert k => (i, j)) 140 | g1, g2 = map_func.adjacent_pair_grid_inv_func(k, grid_map) 141 | 142 | # g2 out of bound 143 | if g2 is None: 144 | continue 145 | 146 | i = map_func.grid_index_map_func(g1, grid_map) 147 | j = map_func.grid_index_map_func(g2, grid_map) 148 | 149 | markov_mat[i][j] = markov_vec[k] 150 | 151 | for i in range(len(start_vec)): 152 | if start_vec[i] < 0: 153 | start_vec[i] = 0 154 | if end_vec[i] < 0: 155 | end_vec[i] = 0 156 | # Start -> other, n^2+1th row 157 | markov_mat[-1, :-1] = start_vec 158 | # Other -> end, n^2+1th column 159 | markov_mat[:-1, -1] = end_vec 160 | 161 | # Normalize probabilities by each ROW 162 | markov_mat = markov_mat / (markov_mat.sum(axis=1).reshape((-1, 1)) + 1e-8) 163 | return markov_mat 164 | 165 | 166 | # =============================== END ================================ # 167 | 168 | 169 | # ======================== SAMPLING FUNCTIONS ======================== # 170 | 171 | def sample_start_point(markov_mat: np.ndarray): 172 | """ 173 | N^2+1th row: virtual start -> other 174 | """ 175 | prob = markov_mat[-1] 176 | 177 | sample_id = np.random.choice(np.arange(grid_map.size), p=prob[:-1]) 178 | 179 | return map_func.grid_index_inv_func(sample_id, grid_map) 180 | 181 | 182 | def sample_length(length_dis: np.ndarray): 183 | prob = length_dis / np.sum(length_dis) 184 | 185 | length = np.random.choice(np.arange(len(length_dis)), p=prob) 186 | 187 | return length + 1 188 | 189 | 190 | def sample_markov_next(one_level_mat: np.ndarray, 191 | prev_grid: Grid, 192 | length: int) -> Grid: 193 | """ 194 | Sample next grid based on Markov probability 195 | :param one_level_mat: 1-level Markov matrix 196 | :param prev_grid: previous grid 197 | :return: next grid 198 | """ 199 | candidates = grid_map.get_adjacent(prev_grid) 200 | 201 | candidate_probabilities = np.zeros(len(candidates) + 1, dtype=float) 202 | 203 | for k, (i, j) in enumerate(candidates): 204 | # Calculate P(Candidate|T[0 ~ k-1]) using 1-level matrix 205 | row = map_func.grid_index_map_func(prev_grid, grid_map) 206 | col = map_func.grid_index_map_func(grid_map.map[i][j], grid_map) 207 | prob1 = one_level_mat[row][col] 208 | 209 | if np.isnan(prob1): 210 | candidate_probabilities[k] = 0 211 | else: 212 | candidate_probabilities[k] = prob1 213 | 214 | # Virtual end point 215 | row = map_func.grid_index_map_func(prev_grid, grid_map) 216 | col = -1 217 | prob1 = one_level_mat[row][col] 218 | 219 | prob1 *= min(1.0, 0.3 + (length - 1) * 0.2) 220 | 221 | candidate_probabilities[-1] = prob1 222 | 223 | if candidate_probabilities.sum() < 0.00001: 224 | return prev_grid 225 | 226 | candidate_probabilities = candidate_probabilities / candidate_probabilities.sum() 227 | 228 | sample_id = np.random.choice(np.arange(len(candidate_probabilities)), p=candidate_probabilities) 229 | 230 | # End 231 | if sample_id == len(candidate_probabilities) - 1: 232 | return prev_grid 233 | 234 | i, j = candidates[sample_id] 235 | return grid_map.map[i][j] 236 | 237 | 238 | # =============================== END ================================ # 239 | 240 | 241 | def generate_synthetic_database(length_dis: np.ndarray, 242 | markov_mat: np.ndarray, 243 | size: int): 244 | """ 245 | Generate synthetic trajectories 246 | :param length_dis: length distribution, Dict[int, List[int]] 247 | :param markov_mat: Markov matrix 248 | :param size: size of synthetic database 249 | """ 250 | 251 | for i in range(len(length_dis)): 252 | if length_dis[i] < 0: 253 | length_dis[i] = 0 254 | 255 | synthetic_db = list() 256 | for i in range(size): 257 | # Sample start point 258 | start_grid = sample_start_point(markov_mat) 259 | 260 | # Sample length 261 | length = sample_length(length_dis) 262 | syn_trajectory = [start_grid] 263 | for j in range(1, length): 264 | prev_grid = syn_trajectory[j - 1] 265 | # Sample next grid based on Markov probability 266 | next_grid = sample_markov_next(markov_mat, 267 | prev_grid, len(syn_trajectory)) 268 | # Virtual end point 269 | if next_grid.equal(prev_grid): 270 | break 271 | 272 | syn_trajectory.append(next_grid) 273 | synthetic_db.append(syn_trajectory) 274 | 275 | return synthetic_db 276 | 277 | 278 | def get_start_end_dist(grid_db: List[List[Grid]]): 279 | dist = np.zeros(grid_map.size * grid_map.size) 280 | start_dist = np.zeros(grid_map.size) 281 | end_dist = np.zeros(grid_map.size) 282 | 283 | for g_t in grid_db: 284 | start = g_t[0] 285 | end = g_t[-1] 286 | index = map_func.pair_grid_index_map_func((start, end), grid_map) 287 | dist[index] += 1 288 | start_index = map_func.grid_index_map_func(start, grid_map) 289 | start_dist[start_index] += 1 290 | end_index = map_func.grid_index_map_func(end, grid_map) 291 | end_dist[end_index] += 1 292 | 293 | return dist, start_dist, end_dist 294 | 295 | 296 | def get_real_density(grid_db: List[List[Grid]]): 297 | real_dens = np.zeros(grid_map.size) 298 | 299 | for t in grid_db: 300 | for g in t: 301 | index = map_func.grid_index_map_func(g, grid_map) 302 | real_dens[index] += 1 303 | 304 | return real_dens 305 | 306 | 307 | logger.info(f'Reading {args.dataset} dataset...') 308 | if args.dataset == 'oldenburg': 309 | db = dataset.read_brinkhoff(args.dataset) 310 | elif args.dataset == 'porto': 311 | with lzma.open('../data/porto.xz', 'rb') as f: 312 | db = pickle.load(f) 313 | elif args.dataset == 'campus': 314 | with lzma.open('../data/campus.xz','rb') as f: 315 | db = pickle.load(f) 316 | else: 317 | logger.info(f'Invalid dataset: {args.dataset}') 318 | db = None 319 | exit() 320 | 321 | random.shuffle(db) 322 | 323 | stats = dataset.dataset_stats(db, f'../data/{args.dataset}_stats.json') 324 | 325 | grid_map = GridMap(args.grid_num, 326 | stats['min_x'], 327 | stats['min_y'], 328 | stats['max_x'], 329 | stats['max_y']) 330 | 331 | logger.info('Convert raw trajectories to grids...') 332 | grid_trajectories = convert_raw_to_grid(db) 333 | 334 | if args.re_syn: 335 | length_server, quantile = estimate_max_length(grid_trajectories, args.epsilon / 10) 336 | logger.info(f'Quantile: {quantile}') 337 | 338 | logger.info('Updating Markov prob...') 339 | markov_servers = update_markov_prob(grid_trajectories, 9 * args.epsilon / 10, max_len=quantile) 340 | 341 | logger.info('Aggregating...') 342 | 343 | one_level_mat = generate_markov_matrix(markov_servers[0].adjusted_data, 344 | markov_servers[1].adjusted_data, 345 | markov_servers[2].adjusted_data) 346 | 347 | logger.info('Synthesizing...') 348 | synthetic_database = generate_synthetic_database(length_server.adjusted_data, 349 | one_level_mat, 350 | len(db)) 351 | 352 | synthetic_trajectories = convert_grid_to_raw(synthetic_database) 353 | 354 | with open(f'../data/{args.dataset}/syn_{args.dataset}_eps_{args.epsilon}_max_{args.max_len}_grid_{args.grid_num}.pkl', 'wb') as f: 355 | pickle.dump(synthetic_trajectories, f) 356 | 357 | synthetic_grid_trajectories = synthetic_database 358 | 359 | else: 360 | try: 361 | logger.info('Reading saved synthetic database...') 362 | with open(f'../data/{args.dataset}/syn_{args.dataset}_eps_{args.epsilon}_max_{args.max_len}_grid_{args.grid_num}.pkl', 363 | 'rb') as f: 364 | synthetic_trajectories = pickle.load(f) 365 | synthetic_grid_trajectories = convert_raw_to_grid(synthetic_trajectories) 366 | except FileNotFoundError: 367 | logger.info('Synthesized file not found! Use --re_syn') 368 | exit() 369 | 370 | orig_trajectories = db 371 | orig_grid_trajectories = grid_trajectories 372 | orig_sampled_trajectories = convert_grid_to_raw(orig_grid_trajectories) 373 | 374 | # ============================ EXPERIMENTS =========================== # 375 | np.random.seed(2022) 376 | random.seed(2022) 377 | logger.info('Experiment: Density Error...') 378 | orig_density = get_real_density(orig_grid_trajectories) 379 | syn_density = get_real_density(synthetic_grid_trajectories) 380 | orig_density /= np.sum(orig_density) 381 | syn_density /= np.sum(syn_density) 382 | density_error = utils.jensen_shannon_distance(orig_density, syn_density) 383 | logger.info(f'Density Error: {density_error}') 384 | 385 | logger.info('Experiment: Hotspot Query Error...') 386 | hotspot_ndcg = experiment.calculate_hotspot_ndcg(orig_density, syn_density) 387 | logger.info(f'Hotspot Query Error: {1-hotspot_ndcg}') 388 | # Query AvRE 389 | logger.info('Experiment: Query AvRE...') 390 | 391 | queries = [SquareQuery(grid_map.min_x, grid_map.min_y, grid_map.max_x, grid_map.max_y, size_factor=args.size_factor) for _ in range(args.query_num)] 392 | 393 | query_error = experiment.calculate_point_query(orig_sampled_trajectories, 394 | synthetic_trajectories, 395 | queries) 396 | logger.info(f'Point Query AvRE: {query_error}') 397 | 398 | # Location coverage Kendall-tau 399 | logger.info('Experiment: Kendall-tau...') 400 | kendall_tau = experiment.calculate_coverage_kendall_tau(orig_grid_trajectories, 401 | synthetic_grid_trajectories, 402 | grid_map) 403 | logger.info(f'Kendall_tau:{kendall_tau}') 404 | 405 | # Trip error 406 | logger.info('Experiment: Trip error...') 407 | orig_trip_dist, _, _ = get_start_end_dist(orig_grid_trajectories) 408 | syn_trip_dist, _, _ = get_start_end_dist(synthetic_grid_trajectories) 409 | 410 | orig_trip_dist = np.asarray(orig_trip_dist) / np.sum(orig_trip_dist) 411 | syn_trip_dist = np.asarray(syn_trip_dist) / np.sum(syn_trip_dist) 412 | trip_error = utils.jensen_shannon_distance(orig_trip_dist, syn_trip_dist) 413 | logger.info(f'Trip error: {trip_error}') 414 | 415 | # Diameter error 416 | logger.info('Experiment: Diameter error...') 417 | diameter_error = experiment.calculate_diameter_error(orig_trajectories, synthetic_trajectories, 418 | multi=args.multiprocessing) 419 | logger.info(f'Diameter error: {diameter_error}') 420 | 421 | # Length error 422 | logger.info('Experiment: Length error...') 423 | length_error = experiment.calculate_length_error(orig_trajectories, synthetic_trajectories) 424 | logger.info(f'Length error: {length_error}') 425 | 426 | # Pattern mining errors 427 | logger.info('Experiment: Pattern mining errors...') 428 | orig_pattern = experiment.mine_patterns(orig_grid_trajectories) 429 | syn_pattern = experiment.mine_patterns(synthetic_grid_trajectories) 430 | 431 | pattern_f1_error = experiment.calculate_pattern_f1_error(orig_pattern, syn_pattern) 432 | pattern_support_error = experiment.calculate_pattern_support(orig_pattern, syn_pattern) 433 | 434 | logger.info(f'Pattern F1 error: {pattern_f1_error}') 435 | logger.info(f'Pattern support error: {pattern_support_error}') 436 | 437 | --------------------------------------------------------------------------------