├── README.md ├── config.py ├── dataset.py ├── train.py ├── preprocess.py ├── ipynb ├── gowalla_term.ipynb ├── merge_data.ipynb ├── total_prepro2.ipynb ├── total_prepro.ipynb └── preprocess2.ipynb └── models.py /README.md: -------------------------------------------------------------------------------- 1 | # POI2Vec 2 | POI2Vec: Geographical Latent Representation for Predicting Future Visitors 3 | Shanshan Feng, Gao Cong, Bo An, Yeow Meng Chee 4 | Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17) 5 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # Parameters 4 | # ================================================== 5 | ltype = torch.cuda.LongTensor 6 | ftype = torch.cuda.FloatTensor 7 | 8 | # Model Hyperparameters 9 | feat_dim = 200 10 | route_depth = 16 11 | route_count = 4 12 | context_len = 32 13 | 14 | # Weight init 15 | weight_m = 0 16 | weight_v = 0.1 17 | 18 | # Training Parameters 19 | batch_size = 128 20 | num_epochs = 30 21 | learning_rate = 0.005 22 | momentum = 0.0 23 | evaluate_every = 3 24 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | class Data(): 5 | def __init__(self): 6 | 7 | self.id2route = None 8 | self.id2lr = None 9 | self.id2prob = None 10 | 11 | self.user_train = None 12 | self.context_train = None 13 | self.target_train = None 14 | self.user_valid = None 15 | self.context_valid = None 16 | self.target_valid = None 17 | self.user_test = None 18 | self.context_test = None 19 | self.target_test = None 20 | self.maxlen_context = 32 21 | 22 | def load(self): 23 | print("Loading data...") 24 | poi_list = np.load("./npy/id2poi.npy") 25 | user_list = np.load("./npy/id2user.npy") 26 | self.id2route = np.load("./npy/id2route.npy") 27 | self.id2lr = np.load("./npy/id2lr.npy") 28 | self.id2prob = np.load("./npy/id2prob.npy") 29 | 30 | self.user_train = np.load("./npy/train_user.npy") 31 | self.context_train = np.load("./npy/train_context.npy") 32 | self.target_train = np.load("./npy/train_target.npy") 33 | self.user_valid = np.load("./npy/valid_user.npy") 34 | self.context_valid = np.load("./npy/valid_context.npy") 35 | self.target_valid = np.load("./npy/valid_target.npy") 36 | self.user_test = np.load("./npy/test_user.npy") 37 | self.context_test = np.load("./npy/test_context.npy") 38 | self.target_test = np.load("./npy/test_target.npy") 39 | print("Train/Valid/Test/POI/User: {:d}/{:d}/{:d}/{:d}/{:d}".format(len(self.user_train), len(self.user_valid), len(self.user_test), len(poi_list), len(user_list))) 40 | print("==================================================================================") 41 | 42 | return len(poi_list), len(user_list) 43 | 44 | def train_batch_iter(self, batch_size): 45 | data = list(zip(self.user_train, self.context_train, self.target_train)) 46 | random.shuffle(data) 47 | return self.batch_iter(data, batch_size) 48 | 49 | def valid_batch_iter(self, batch_size): 50 | data = list(zip(self.user_valid, self.context_valid, self.target_valid)) 51 | return self.batch_iter(data, batch_size) 52 | 53 | def test_batch_iter(self, batch_size): 54 | data = list(zip(self.user_test, self.context_test, self.target_test)) 55 | return self.batch_iter(data, batch_size) 56 | 57 | def batch_iter(self, data, batch_size): 58 | data_size = float(len(data)) 59 | num_batches = int(np.ceil(data_size / batch_size)) 60 | for batch_num in xrange(num_batches): 61 | start_index = int(batch_num * batch_size) 62 | end_index = min(int((batch_num + 1) * batch_size), int(data_size)) 63 | yield data[start_index:end_index] 64 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import gc 4 | import time 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | import dataset 11 | import models 12 | import config 13 | 14 | # Type Parameters 15 | ltype = config.ltype 16 | ftype = config.ftype 17 | # Training Parameters 18 | learning_rate = config.learning_rate 19 | 20 | def parameters(*argv): 21 | params = [] 22 | for model in argv: 23 | params += list(model.parameters()) 24 | 25 | return params 26 | 27 | def print_score(batches, step): 28 | batch_loss = 0. # hit count 29 | for i, batch in enumerate(batches): 30 | user_batch, context_batch, target_batch = zip(*batch) 31 | batch_loss += run(user_batch, context_batch, target_batch, step=step) 32 | print("Validation Error :", batch_loss/i, time.ctime()) 33 | 34 | ############################################################################################## 35 | def run(user, context, target, step): 36 | 37 | optimizer.zero_grad() 38 | 39 | user = Variable(torch.from_numpy(np.asarray(user)).type(ltype)) 40 | context = Variable(torch.from_numpy(np.asarray(context)).type(ltype)) 41 | 42 | # POI2VEC 43 | loss = p2v_model(user, context, target) 44 | 45 | loss.backward() 46 | optimizer.step() 47 | gc.collect() 48 | 49 | return loss.data.cpu().numpy()[0] 50 | 51 | ############################################################################################## 52 | ############################################################################################## 53 | if __name__ == "__main__": 54 | 55 | # Data Preparation 56 | data = dataset.Data() 57 | poi_cnt, user_cnt = data.load() 58 | 59 | # Model Preparation 60 | p2v_model = models.POI2VEC(poi_cnt, user_cnt, data.id2route, data.id2lr, data.id2prob).cuda() 61 | loss_model = nn.CrossEntropyLoss().cuda() 62 | optimizer = torch.optim.SGD(parameters(p2v_model), lr=config.learning_rate, momentum=config.momentum) 63 | 64 | for i in xrange(config.num_epochs): 65 | # Training 66 | batch_loss = 0. 67 | train_batches = data.train_batch_iter(config.batch_size) 68 | for j, train_batch in enumerate(train_batches): 69 | user_batch, context_batch, target_batch = zip(*train_batch) 70 | batch_loss += run(user_batch, context_batch, target_batch, step=1) 71 | if (j+1) % 1000 == 0: 72 | print("batch #{:d}: ".format(j+1), "batch_loss :", batch_loss/j, time.ctime()) 73 | 74 | # Validation 75 | if (i+1) % config.evaluate_every == 0: 76 | print("==================================================================================") 77 | print("Evaluation at epoch #{:d}: ".format(i+1)) 78 | p2v_model.eval() 79 | valid_batches = data.valid_batch_iter(config.batch_size) 80 | print_score(valid_batches, step=2) 81 | p2v_model.train() 82 | 83 | # Test 84 | print("==================================================================================") 85 | print("Testing") 86 | p2v_model.eval() 87 | test_batches = data.test_batch_iter(config.batch_size) 88 | print_score(test_batches, step=2) 89 | -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Process 2 | import numpy as np 3 | import pandas as pd 4 | import tqdm 5 | import os 6 | from models import Node, Rec 7 | 8 | checkin_file = "../dataset/loc-gowalla_totalCheckins.txt" 9 | df = pd.read_csv(checkin_file, sep='\t', header=None) 10 | df.columns = ["user", "time", "latitude", "longitude", "poi"] 11 | print "total visit :", len(df), 12 | df = df.drop_duplicates(subset=['poi']) 13 | print "/ total poi :", len(df) 14 | poi2pos = df.loc[:, ['latitude', 'longitude', 'poi']].set_index('poi').T.to_dict('list') 15 | 16 | proc_n = 20 17 | 18 | poi2id = {'unk':0} 19 | id2poi = ['unk'] 20 | for poi in df['poi']: 21 | if poi2id.get(poi) == None: 22 | poi2id[poi] = len(id2poi) 23 | id2poi.append(poi) 24 | np.save("./npy/poi2id.npy", poi2id) 25 | np.save("./npy/id2poi.npy", id2poi) 26 | 27 | # build a tree of area 28 | tree = Node(df['latitude'].min(), df['latitude'].max(),df['longitude'].max(), df['longitude'].min(), 0) 29 | tree.build() 30 | print "total node of tree :", Node.count 31 | theta = Node.theta 32 | 33 | def main(id2poi_batch, proc_i): 34 | id2route = [] 35 | id2lr = [] 36 | id2prob = [] 37 | 38 | # make route/left_right_choice/probability list of each poi 39 | for poi in tqdm.tqdm(id2poi_batch): 40 | # each poi, they have a area. p_n is each corner 41 | p_n = [(poi2pos[poi][0] - 0.5*theta, poi2pos[poi][1] - 0.5*theta)\ 42 | ,(poi2pos[poi][0] - 0.5*theta, poi2pos[poi][1] + 0.5*theta)\ 43 | ,(poi2pos[poi][0] + 0.5*theta, poi2pos[poi][1] - 0.5*theta)\ 44 | ,(poi2pos[poi][0] + 0.5*theta, poi2pos[poi][1] + 0.5*theta)] 45 | # that area 46 | poi_area = Rec((poi2pos[poi][1]+0.5*theta, poi2pos[poi][1]-0.5*theta\ 47 | ,poi2pos[poi][0]-0.5*theta, poi2pos[poi][0]+0.5*theta)) 48 | 49 | route_list = [] 50 | lr_list = [] 51 | area_list = [] 52 | # each corner, where they are contained in 53 | for p in p_n: 54 | route, lr = tree.find_route(p) 55 | route_list.append(route) 56 | lr_list.append(lr) 57 | 58 | # remove duplicate 59 | route_set = [] 60 | for route in route_list: 61 | if route not in route_set: 62 | route_set.append(route) 63 | lr_set = [] 64 | for lr in lr_list: 65 | if lr not in lr_set: 66 | lr_set.append(lr) 67 | 68 | # each leaf, how much they are overlaped 69 | for route in route_set: 70 | leaf_area = Rec(tree.find_idx(route[0])) 71 | area_list.append(leaf_area.overlap(poi_area)) 72 | area_list = np.divide(area_list, sum(area_list)) 73 | 74 | id2route.append(route_set) 75 | id2lr.append(lr_set) 76 | id2prob.append(area_list) 77 | 78 | np.save("./npy/splited_file/id2route_%02d.npy" % proc_i, id2route) 79 | np.save("./npy/splited_file/id2lr_%02d.npy" % proc_i, id2lr) 80 | np.save("./npy/splited_file/id2prob_%02d.npy" % proc_i, id2prob) 81 | 82 | if __name__ == '__main__': 83 | procs = [] 84 | batch_size = len(id2poi)/proc_n 85 | for i in xrange(proc_n+1): 86 | print "process #%02d running..."%(i+1) 87 | proc = Process(target=main, args=(id2poi[i*batch_size+1:(i+1)*batch_size+1], i+1)) 88 | procs.append(proc) 89 | proc.start() 90 | 91 | for proc in procs: 92 | proc.join() 93 | -------------------------------------------------------------------------------- /ipynb/gowalla_term.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tqdm\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from datetime import datetime\n", 13 | "\n", 14 | "checkin_file = \"../dataset/loc-gowalla_totalCheckins.txt\"\n", 15 | "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n", 16 | "df.columns = [\"user\", \"time\", \"latitude\", \"longitude\", \"poi\"]\n", 17 | "df = df[['user', 'time']]" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "ename": "AttributeError", 27 | "evalue": "'datetime.datetime' object has no attribute 'total_seconds'", 28 | "output_type": "error", 29 | "traceback": [ 30 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 31 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 32 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%Y-%m-%dT%H:%M:%SZ\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mtotal_seconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m3600\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# hour\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 33 | "\u001b[0;32m/home/yongqyu/yongqyu/local/lib/python2.7/site-packages/pandas/core/series.pyc\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwds)\u001b[0m\n\u001b[1;32m 2292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2293\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2294\u001b[0;31m \u001b[0mmapped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2295\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2296\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 34 | "\u001b[0;32mpandas/src/inference.pyx\u001b[0m in \u001b[0;36mpandas.lib.map_infer (pandas/lib.c:66124)\u001b[0;34m()\u001b[0m\n", 35 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%Y-%m-%dT%H:%M:%SZ\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mtotal_seconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m3600\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# hour\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 36 | "\u001b[0;31mAttributeError\u001b[0m: 'datetime.datetime' object has no attribute 'total_seconds'" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%dT%H:%M:%SZ\"))\\\n", 42 | " .total_seconds()/3600) # hour" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "total_term = 0.\n", 52 | "total_tran = 0\n", 53 | "prev_user, prev_time = df.iloc[0]\n", 54 | "print prev_user, prev_time" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "for target_idx in tqdm.tqdm(xrange(1, len(df))):\n", 64 | " (user, time) = df.iloc[target_idx]\n", 65 | " if prev_user != user:\n", 66 | " prev_user = user\n", 67 | " prev_time = time\n", 68 | " else:\n", 69 | " total_tran += 1\n", 70 | " total_term += (prev_time - time)\n", 71 | " prev_time = time" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "total_term/total_tran" 81 | ] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Python 2", 87 | "language": "python", 88 | "name": "python2" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 2 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython2", 100 | "version": "2.7.12" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 2 105 | } 106 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | import datetime 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | import config 11 | 12 | class POI2VEC(nn.Module): 13 | def __init__(self, poi_cnt, user_cnt, id2route, id2lr, id2prob): 14 | super(POI2VEC, self).__init__() 15 | 16 | # attributes 17 | route_cnt = np.power(2, config.route_depth)-1 18 | self.id2route = id2route 19 | self.id2lr = np.array(id2lr) 20 | self.id2prob = np.array(id2prob) 21 | 22 | # models 23 | self.poi_weight = nn.Embedding(poi_cnt, config.feat_dim, padding_idx=0) 24 | self.poi_weight.weight.data.normal_(config.weight_m, config.weight_v) 25 | self.user_weight = nn.Embedding(user_cnt, config.feat_dim, padding_idx=0) 26 | self.user_weight.weight.data.normal_(config.weight_m, config.weight_v) 27 | self.route_weight = nn.Embedding(route_cnt, config.feat_dim, padding_idx=0) 28 | self.route_weight.weight.data.normal_(config.weight_m, config.weight_v) 29 | self.sigmoid = nn.Sigmoid() 30 | 31 | def forward(self, user, context, target): 32 | target = map(int, target) 33 | route = Variable(torch.from_numpy(self.id2route[target]))\ 34 | .contiguous().view(-1, config.route_count*config.route_depth).type(config.ltype) 35 | # batch x (route_coutn(4) x route_dept(22)) 36 | lr = Variable(torch.from_numpy(self.id2lr[target]))\ 37 | .view(-1, config.route_count*(config.route_depth)).type(config.ftype) 38 | # batch x (route_count(4) x route_depth(21)) 39 | prob = Variable(torch.from_numpy(self.id2prob[target]))\ 40 | .view(-1, config.route_count).type(config.ftype) # batch x route_count(4) 41 | 42 | context = self.poi_weight(context) # batch x context_len(32) x feat_dim(200) 43 | route = self.route_weight(route) # batch x (route_count(4) x route_depth(22)) x feat_dim(200) 44 | user = self.user_weight(user) # batch x feat_dim(200) 45 | target = Variable(torch.from_numpy(np.asarray(target)).type(config.ltype)) 46 | target = self.poi_weight(target) 47 | 48 | phi_context = torch.sum(context, dim=1, keepdim=True).permute(0,2,1) # batch x feat_dim x 1 49 | psi_context = torch.bmm(route, phi_context) # batch x (route_count x route_depth) x 1 50 | psi_context = self.sigmoid(psi_context).view(-1, config.route_count*config.route_depth) 51 | 52 | psi_context = (torch.pow(torch.mul(psi_context, 2), lr) - psi_context)\ 53 | .view(-1, config.route_count, config.route_depth) 54 | 55 | pr_path = 1 56 | for i in xrange(config.route_depth): 57 | pr_path = torch.mul(psi_context[:,:,i], pr_path) 58 | pr_path = torch.sum(torch.mul(pr_path, prob), 1) 59 | 60 | pr_user = torch.mm(user, self.poi_weight.weight.t()) 61 | pr_user = torch.sum(torch.exp(pr_user), 1) 62 | pr_user = torch.div(torch.exp(torch.sum(torch.mul(target, user), 1)), pr_user) 63 | pr_ult = 1.0-torch.sum(torch.mul(pr_user, pr_path)) 64 | 65 | return pr_ult 66 | 67 | class Rec: 68 | # Rectangle for calculate overlaped area 69 | def __init__(self, (top, down, left, right)): 70 | self.top = top 71 | self.down = down 72 | self.left = left 73 | self.right = right 74 | 75 | def overlap(self, a): 76 | dx = min(self.top, a.top) - max(self.down, a.down) 77 | dy = min(self.right, a.right) - max(self.left, a.left) 78 | if (dx>=0) and (dy>=0): 79 | return dx*dy 80 | else: 81 | # error 82 | return -1 83 | 84 | class Node: 85 | # Tree Node 86 | theta = 0.5 87 | count = 0 88 | leaves = [] 89 | 90 | def __init__(self, west, east, north, south, level): 91 | self.left = None 92 | self.right = None 93 | self.west = west 94 | self.east = east 95 | self.north = north 96 | self.south = south 97 | self.level = level 98 | Node.count += 1 99 | self.count = Node.count 100 | 101 | def build(self): 102 | # even : horizen, odd : vertical 103 | if self.level%2 == 0: 104 | if (self.east - (self.west+self.east)/2) > 2*Node.theta: 105 | self.left = Node(self.west, (self.west+self.east)/2, self.north, self.south, self.level+1) 106 | self.right = Node((self.west+self.east)/2, self.east, self.north, self.south, self.level+1) 107 | self.left.build() 108 | self.right.build() 109 | else: 110 | Node.leaves.append(self) 111 | else: 112 | if (self.north - (self.north+self.south)/2) > 2*Node.theta: 113 | self.left = Node(self.west, self.east, self.north, (self.north+self.south)/2, self.level+1) 114 | self.right = Node(self.west, self.east, (self.north+self.south)/2, self.south, self.level+1) 115 | self.left.build() 116 | self.right.build() 117 | else: 118 | Node.leaves.append(self) 119 | 120 | def find_route(self, (latitude, longitude)): 121 | if self.left == None: 122 | prev_route = [self.count] 123 | prev_lr = [] 124 | return prev_route, prev_lr 125 | 126 | # left : 0, right : 1 127 | if self.level%2 == 0: 128 | if self.left.east < latitude: 129 | prev_route, prev_lr = self.right.find_route((latitude, longitude)) 130 | prev_lr.append(1) 131 | else: 132 | prev_route, prev_lr = self.left.find_route((latitude, longitude)) 133 | prev_lr.append(0) 134 | else: 135 | if self.left.south < longitude: 136 | prev_route, prev_lr = self.left.find_route((latitude, longitude)) 137 | prev_lr.append(0) 138 | else: 139 | prev_route, prev_lr = self.right.find_route((latitude, longitude)) 140 | prev_lr.append(1) 141 | prev_route.append(self.count) 142 | return prev_route, prev_lr 143 | 144 | def find_idx(self, idx): 145 | # find in leaves 146 | for leaf in Node.leaves: 147 | if leaf.count == idx: 148 | return leaf.north, leaf.south, leaf.west, leaf.east 149 | -------------------------------------------------------------------------------- /ipynb/merge_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 146, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import tqdm\n", 11 | "import numpy as np\n", 12 | "\n", 13 | "route_files = []\n", 14 | "lr_files = []\n", 15 | "prob_files = []\n", 16 | "\n", 17 | "for (path, dir, files) in os.walk(\"./npy/splited_file/\"):\n", 18 | " for filename in files:\n", 19 | " if 'id2route' in filename:\n", 20 | " route_files.append('./npy/splited_file/'+filename)\n", 21 | " if 'id2lr' in filename:\n", 22 | " lr_files.append('./npy/splited_file/'+filename)\n", 23 | " if 'id2prob' in filename:\n", 24 | " prob_files.append('./npy/splited_file/'+filename)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 147, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "route_files.sort()\n", 34 | "lr_files.sort()\n", 35 | "prob_files.sort()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 148, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "['./npy/splited_file/id2route_01.npy', './npy/splited_file/id2route_02.npy', './npy/splited_file/id2route_03.npy', './npy/splited_file/id2route_04.npy', './npy/splited_file/id2route_05.npy', './npy/splited_file/id2route_06.npy', './npy/splited_file/id2route_07.npy', './npy/splited_file/id2route_08.npy', './npy/splited_file/id2route_09.npy', './npy/splited_file/id2route_10.npy', './npy/splited_file/id2route_11.npy', './npy/splited_file/id2route_12.npy', './npy/splited_file/id2route_13.npy', './npy/splited_file/id2route_14.npy', './npy/splited_file/id2route_15.npy', './npy/splited_file/id2route_16.npy', './npy/splited_file/id2route_17.npy', './npy/splited_file/id2route_18.npy', './npy/splited_file/id2route_19.npy', './npy/splited_file/id2route_20.npy', './npy/splited_file/id2route_21.npy']\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "print route_files" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 149, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stderr", 62 | "output_type": "stream", 63 | "text": [ 64 | "100%|██████████| 21/21 [00:00<00:00, 36.92it/s]\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "pad = [0]*17\n", 70 | "id2route = [[pad[1:], pad[1:], pad[1:], pad[1:]]]\n", 71 | "id2route_cnt = [0]\n", 72 | "max_route_cnt = 4\n", 73 | "max_node_idx = 0\n", 74 | "\n", 75 | "for filename in tqdm.tqdm(route_files):\n", 76 | " routes_list = np.load(filename)\n", 77 | " for routes in routes_list:\n", 78 | " id2route_cnt.append(len(routes))\n", 79 | " \n", 80 | " batch_max = np.max([node for route in routes\n", 81 | " for node in route[1:]])\n", 82 | " if batch_max > max_node_idx:\n", 83 | " max_node_idx = batch_max\n", 84 | " \n", 85 | " if len(routes) < max_route_cnt:\n", 86 | " for _ in xrange(max_route_cnt - len(routes)):\n", 87 | " routes.append(pad)\n", 88 | " \n", 89 | " routes = np.asarray([l[1:] for l in routes])\n", 90 | " id2route.append(routes)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 150, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "(15234, 4, 16)\n", 103 | "(15234,)\n", 104 | "58258\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "print np.asarray(id2route).shape\n", 110 | "print np.asarray(id2route_cnt).shape\n", 111 | "print max_node_idx" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 151, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stderr", 121 | "output_type": "stream", 122 | "text": [ 123 | "100%|██████████| 21/21 [00:00<00:00, 117.84it/s]\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "pad = [0]*16\n", 129 | "id2lr = [[pad, pad, pad, pad]]\n", 130 | "\n", 131 | "for filename in tqdm.tqdm(lr_files):\n", 132 | " lrs_list = np.load(filename)\n", 133 | " for lrs in lrs_list:\n", 134 | " if len(lrs) < max_route_cnt:\n", 135 | " for _ in xrange(max_route_cnt - len(lrs)):\n", 136 | " lrs.append(pad)\n", 137 | " id2lr.append(lrs)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 152, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "(15234, 4, 16)\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "print np.asarray(id2lr).shape" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 153, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stderr", 164 | "output_type": "stream", 165 | "text": [ 166 | "100%|██████████| 21/21 [00:00<00:00, 173.03it/s]\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "pad = 0\n", 172 | "id2prob = [[0,0,0,0]]\n", 173 | "\n", 174 | "for filename in tqdm.tqdm(prob_files):\n", 175 | " probs_list = np.load(filename)\n", 176 | " for probs in probs_list:\n", 177 | " probs = list(probs)\n", 178 | " if len(probs) < max_route_cnt:\n", 179 | " for _ in xrange(max_route_cnt - len(probs)):\n", 180 | " probs.append(pad)\n", 181 | " id2prob.append(probs) " 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 154, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "name": "stdout", 191 | "output_type": "stream", 192 | "text": [ 193 | "(15234, 4)\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "print np.asarray(id2prob).shape" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 155, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "np.save('./npy/id2route.npy', np.asarray(id2route))\n", 208 | "np.save('./npy/id2route_cnt.npy', np.asarray(id2route_cnt))\n", 209 | "np.save('./npy/id2lr.npy', np.asarray(id2lr))\n", 210 | "np.save('./npy/id2prob.npy', np.asarray(id2prob))" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 158, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "[[46455 46447 46444 46413 46349 46346 46091 46088 45065 45062 40967 32775\n", 223 | " 32772 5 2 1]\n", 224 | " [46468 46461 46445 46413 46349 46346 46091 46088 45065 45062 40967 32775\n", 225 | " 32772 5 2 1]\n", 226 | " [ 0 0 0 0 0 0 0 0 0 0 0 0\n", 227 | " 0 0 0 0]\n", 228 | " [ 0 0 0 0 0 0 0 0 0 0 0 0\n", 229 | " 0 0 0 0]]\n", 230 | "[0.11276673535000725, 0.88723326464999275, 0, 0]\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "print id2route[1]\n", 236 | "print id2prob[1]" 237 | ] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": "Python 2", 243 | "language": "python", 244 | "name": "python2" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 2 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython2", 256 | "version": "2.7.12" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 2 261 | } 262 | -------------------------------------------------------------------------------- /ipynb/total_prepro2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 91, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from datetime import datetime\n", 12 | "import tqdm\n", 13 | "\n", 14 | "checkin_file = \"../dataset/test_total.txt\"\n", 15 | "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n", 16 | "df.columns = [\"user\", \"poi\", \"time\"]" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 92, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "425648" 28 | ] 29 | }, 30 | "execution_count": 92, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "len(df)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 93, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "prev_cnt = 0\n", 46 | "curr_cnt = len(df)\n", 47 | "while prev_cnt != curr_cnt:\n", 48 | " prev_cnt = curr_cnt\n", 49 | " df = df[df.groupby('user').user.transform(len) > 5]\n", 50 | " df = df[df.groupby('poi').poi.transform(len) > 5]\n", 51 | " curr_cnt = len(df)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 94, 57 | "metadata": { 58 | "scrolled": false 59 | }, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "242686" 65 | ] 66 | }, 67 | "execution_count": 94, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "len(df)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 95, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "poi2id = np.load(\"./npy/poi2id.npy\").item()\n", 83 | "df['poi'] = df['poi'].apply(lambda x: poi2id[x] if poi2id.get(x) != None else 13187)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 96, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")-datetime(2009,1,1))\\\n", 93 | " .total_seconds()/360) # hour" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 97, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "13005" 105 | ] 106 | }, 107 | "execution_count": 97, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "np.max(df.poi)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 98, 119 | "metadata": { 120 | "scrolled": true 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "'''\n", 125 | "user2id = {'unk':0}\n", 126 | "id2user = [0]\n", 127 | "for target_idx in tqdm.tqdm(xrange(len(df))):\n", 128 | " (user, poi, time) = df.iloc[target_idx]\n", 129 | " if user2id.get(user) == None:\n", 130 | " user2id[user] = len(id2user)\n", 131 | " id2user.append(user)\n", 132 | "'''\n", 133 | "user2id = np.load('./npy/user2id.npy').item()\n", 134 | "id2user = np.load('./npy/id2user.npy')\n", 135 | "\n", 136 | "df['user'] = df['user'].apply(lambda x: user2id[x] if user2id.get(x) != None else 0)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 99, 142 | "metadata": { 143 | "scrolled": true 144 | }, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "(18712, 242686)" 150 | ] 151 | }, 152 | "execution_count": 99, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "len(df.groupby('user')),len(df)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 100, 164 | "metadata": { 165 | "scrolled": true 166 | }, 167 | "outputs": [ 168 | { 169 | "name": "stderr", 170 | "output_type": "stream", 171 | "text": [ 172 | "100%|██████████| 242686/242686 [01:24<00:00, 2883.45it/s]\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "train_user = []\n", 178 | "train_context = []\n", 179 | "train_target = []\n", 180 | "\n", 181 | "tow = 6\n", 182 | "prev_user = df.iloc[0]['user']\n", 183 | "user_user = []\n", 184 | "user_context = []\n", 185 | "user_target = []\n", 186 | "for target_idx in tqdm.tqdm(xrange(len(df))):\n", 187 | " (user, poi, time) = df.iloc[target_idx]\n", 188 | " if prev_user != user:\n", 189 | " prev_user = user\n", 190 | " train_user += user_user\n", 191 | " train_context += user_context\n", 192 | " train_target += user_target\n", 193 | "\n", 194 | " user_user = []\n", 195 | " user_context = []\n", 196 | " user_target = []\n", 197 | " #print train_user, train_context, train_target\n", 198 | " \n", 199 | " context = []\n", 200 | " for context_idx in xrange(target_idx+1, len(df)):\n", 201 | " (c_user, c_poi, c_time) = df.iloc[context_idx]\n", 202 | " if user == c_user and (time+tow) > c_time:\n", 203 | " context.append(c_poi)\n", 204 | " else:\n", 205 | " break\n", 206 | " if context:\n", 207 | " user_user.append(user)\n", 208 | " user_context.append(context)\n", 209 | " user_target.append(poi)\n", 210 | " \n", 211 | "train_user += user_user\n", 212 | "train_context += user_context\n", 213 | "train_target += user_target" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 101, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "(44189, 44189, 44189)" 225 | ] 226 | }, 227 | "execution_count": 101, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "len(train_user), len(train_context), len(train_target)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 102, 239 | "metadata": { 240 | "scrolled": false 241 | }, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "41 2.94349272443 2.0 1\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "len_context = []\n", 253 | "for i, context in enumerate(train_context):\n", 254 | " len_context.append(len(context))\n", 255 | "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 103, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "44189\n", 268 | "21\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "len_context.sort()\n", 274 | "print len(len_context)\n", 275 | "print len_context[int(len(len_context)*0.99)]" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 104, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "maxlen_context = 16\n", 285 | "for i, context in enumerate(train_context):\n", 286 | " if len(context) < maxlen_context:\n", 287 | " train_context[i] += ([0]*(maxlen_context-len(context)))\n", 288 | " elif len(context) > maxlen_context:\n", 289 | " train_context[i] = context[:maxlen_context]" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 105, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "16 16.0 16.0 16\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "len_context = []\n", 307 | "for context in train_context:\n", 308 | " len_context.append(len(context))\n", 309 | "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 106, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "np.save('./npy/test_context.npy', train_context)\n", 319 | "np.save('./npy/test_user.npy', train_user)\n", 320 | "np.save('./npy/test_target.npy', train_target)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 63, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "np.save('./npy/user2id.npy', user2id)\n", 330 | "np.save('./npy/id2user.npy', id2user)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 101, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "4627" 342 | ] 343 | }, 344 | "execution_count": 101, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": [ 350 | "len(id2user)" 351 | ] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 2", 357 | "language": "python", 358 | "name": "python2" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 2 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython2", 370 | "version": "2.7.12" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /ipynb/total_prepro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 75, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "total poi : 13187\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from multiprocessing import Process\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import tqdm\n", 21 | "import os\n", 22 | "from models import Node, Rec \n", 23 | "\n", 24 | "checkin_file = \"../dataset/poi_info.txt\"\n", 25 | "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n", 26 | "df.columns = [\"id\", \"poi\", \"latitude\", \"longitude\"]\n", 27 | "print \"total poi :\", len(df)\n", 28 | "poi2id = {}\n", 29 | "id2poi = {}\n", 30 | "for i in xrange(len(df)):\n", 31 | " poi2id[df['poi'][i]] = df['id'][i]\n", 32 | " id2poi[df['id'][i]] = df['poi'][i]\n", 33 | "id2poi = id2poi.values()\n", 34 | "id2pos = df.loc[:, ['latitude', 'longitude', 'poi']].set_index('poi').T.to_dict('list')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 76, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "unk = 'u*n*k'\n", 44 | "poi2id[unk] = len(id2poi)\n", 45 | "id2poi.append(unk)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 77, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "np.save(\"./npy/poi2id.npy\", poi2id)\n", 55 | "np.save(\"./npy/id2poi.npy\", id2poi)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 78, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "13187\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "print poi2id.get(unk)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 79, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "total node of tree : 40955\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "# build a tree of area\n", 90 | "tree = Node(df['latitude'].min(), df['latitude'].max(),df['longitude'].max(), df['longitude'].min(), 0)\n", 91 | "tree.build()\n", 92 | "print \"total node of tree :\", Node.count\n", 93 | "theta = Node.theta" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 80, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "def main(id2poi_batch):\n", 103 | " id2route = []\n", 104 | " id2lr = []\n", 105 | " id2prob = []\n", 106 | "\n", 107 | " # make route/left_right_choice/probability list of each poi\n", 108 | " for poi in tqdm.tqdm(id2poi_batch):\n", 109 | " # each poi, they have a area. p_n is each corner\n", 110 | " p_n = [(id2pos[poi][0] - 0.5*theta, id2pos[poi][1] - 0.5*theta)\\\n", 111 | " ,(id2pos[poi][0] - 0.5*theta, id2pos[poi][1] + 0.5*theta)\\\n", 112 | " ,(id2pos[poi][0] + 0.5*theta, id2pos[poi][1] - 0.5*theta)\\\n", 113 | " ,(id2pos[poi][0] + 0.5*theta, id2pos[poi][1] + 0.5*theta)]\n", 114 | " # that area\n", 115 | " poi_area = Rec((id2pos[poi][1]+0.5*theta, id2pos[poi][1]-0.5*theta\\\n", 116 | " ,id2pos[poi][0]-0.5*theta, id2pos[poi][0]+0.5*theta))\n", 117 | "\n", 118 | " route_list = []\n", 119 | " lr_list = []\n", 120 | " area_list = []\n", 121 | " # each corner, where they are contained in\n", 122 | " for p in p_n:\n", 123 | " route, lr = tree.find_route(p)\n", 124 | " route_list.append(route)\n", 125 | " lr_list.append(lr)\n", 126 | "\n", 127 | " # remove duplicate\n", 128 | " route_set = []\n", 129 | " for route in route_list:\n", 130 | " if route not in route_set:\n", 131 | " route_set.append(route)\n", 132 | " lr_set = []\n", 133 | " for lr in lr_list:\n", 134 | " if lr not in lr_set:\n", 135 | " lr_set.append(lr)\n", 136 | "\n", 137 | " # each leaf, how much they are overlaped\n", 138 | " for route in route_set:\n", 139 | " leaf_area = Rec(tree.find_idx(route[0]))\n", 140 | " area_list.append(leaf_area.overlap(poi_area))\n", 141 | " area_list = np.divide(area_list, sum(area_list))\n", 142 | "\n", 143 | " id2route.append(route_set)\n", 144 | " id2lr.append(lr_set)\n", 145 | " id2prob.append(area_list)\n", 146 | " \n", 147 | " return id2route, id2lr, id2prob" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 81, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stderr", 157 | "output_type": "stream", 158 | "text": [ 159 | "100%|██████████| 13187/13187 [00:23<00:00, 551.16it/s]\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "if __name__ == '__main__':\n", 165 | " id2route, id2lr, id2prob = main(id2poi[:-1])" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 85, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "13\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "max_path = len(id2route[0][0])\n", 183 | "print max_path" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 86, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stderr", 193 | "output_type": "stream", 194 | "text": [ 195 | "100%|██████████| 13187/13187 [00:00<00:00, 81040.12it/s]\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "pad = [0]*max_path\n", 201 | "max_route_cnt = 4\n", 202 | "id2route_cnt = []\n", 203 | "\n", 204 | "for idx, routes in enumerate(tqdm.tqdm(id2route)):\n", 205 | " id2route_cnt.append(len(routes))\n", 206 | " \n", 207 | " if len(routes) < max_route_cnt:\n", 208 | " for _ in xrange(max_route_cnt - len(routes)):\n", 209 | " routes.append(pad)\n", 210 | " \n", 211 | " routes = np.asarray([l[1:] for l in routes])\n", 212 | " id2route[idx] = routes\n", 213 | "\n", 214 | "id2route.append([pad[1:], pad[1:], pad[1:], pad[1:]])\n", 215 | "id2route_cnt.append(0)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 87, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "(13188, 4, 12)\n", 228 | "(13188,)\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "print np.asarray(id2route).shape\n", 234 | "print np.asarray(id2route_cnt).shape" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 89, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stderr", 244 | "output_type": "stream", 245 | "text": [ 246 | "100%|██████████| 13187/13187 [00:00<00:00, 592942.69it/s]\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "pad = [0]*(max_path-1)\n", 252 | "\n", 253 | "for idx, lrs in enumerate(tqdm.tqdm(id2lr)):\n", 254 | " if len(lrs) < max_route_cnt:\n", 255 | " for _ in xrange(max_route_cnt - len(lrs)):\n", 256 | " lrs.append(pad)\n", 257 | " \n", 258 | " id2lr[idx] = lrs\n", 259 | " \n", 260 | "id2lr.append([pad, pad, pad, pad])" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 90, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "(13188, 4, 12)\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "print np.asarray(id2lr).shape" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 91, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stderr", 287 | "output_type": "stream", 288 | "text": [ 289 | "100%|██████████| 13187/13187 [00:00<00:00, 162874.68it/s]\n" 290 | ] 291 | } 292 | ], 293 | "source": [ 294 | "pad = 0\n", 295 | "\n", 296 | "for idx, probs in enumerate(tqdm.tqdm(id2prob)):\n", 297 | " probs = list(probs)\n", 298 | " if len(probs) < max_route_cnt:\n", 299 | " for _ in xrange(max_route_cnt - len(probs)):\n", 300 | " probs.append(pad)\n", 301 | " \n", 302 | " id2prob[idx] = probs\n", 303 | " \n", 304 | "id2prob.append([0,0,0,0])" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 92, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "text": [ 316 | "(13188, 4)\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "print np.asarray(id2prob).shape" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 94, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "np.save(\"./npy/id2route.npy\", id2route)\n", 331 | "np.save(\"./npy/id2lr.npy\", id2lr)\n", 332 | "np.save(\"./npy/id2prob.npy\", id2prob)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 93, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "40683" 344 | ] 345 | }, 346 | "execution_count": 93, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "np.max(id2route)" 353 | ] 354 | } 355 | ], 356 | "metadata": { 357 | "kernelspec": { 358 | "display_name": "Python 2", 359 | "language": "python", 360 | "name": "python2" 361 | }, 362 | "language_info": { 363 | "codemirror_mode": { 364 | "name": "ipython", 365 | "version": 2 366 | }, 367 | "file_extension": ".py", 368 | "mimetype": "text/x-python", 369 | "name": "python", 370 | "nbconvert_exporter": "python", 371 | "pygments_lexer": "ipython2", 372 | "version": "2.7.12" 373 | } 374 | }, 375 | "nbformat": 4, 376 | "nbformat_minor": 2 377 | } 378 | -------------------------------------------------------------------------------- /ipynb/preprocess2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 181, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from datetime import datetime\n", 12 | "import tqdm\n", 13 | "\n", 14 | "checkin_file = \"../dataset/loc-gowalla_totalCheckins.txt\"\n", 15 | "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n", 16 | "df.columns = [\"user\", \"time\", \"latitude\", \"longitude\", \"poi\"]\n", 17 | "df = df[['user', 'time', 'poi']]" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 182, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "6442892" 29 | ] 30 | }, 31 | "execution_count": 182, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": [ 37 | "len(df)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 183, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "prev_cnt = 0\n", 47 | "curr_cnt = len(df)\n", 48 | "while prev_cnt != curr_cnt:\n", 49 | " prev_cnt = curr_cnt\n", 50 | " df = df[df.groupby('user').user.transform(len) > 5]\n", 51 | " df = df[df.groupby('poi').poi.transform(len) > 5]\n", 52 | " curr_cnt = len(df)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 184, 58 | "metadata": { 59 | "scrolled": false 60 | }, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "4293047" 66 | ] 67 | }, 68 | "execution_count": 184, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "len(df)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 186, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "poi2id = np.load(\"./npy/poi2id.npy\").item()\n", 84 | "df['poi'] = df['poi'].apply(lambda x: poi2id[x] if poi2id.get(x) != None else 0)\n", 85 | "df = df[df['poi'] != 0]\n", 86 | "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%dT%H:%M:%SZ\")-datetime(2009,1,1)).total_seconds()\\\n", 87 | " /360) # hour" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 187, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "15233" 99 | ] 100 | }, 101 | "execution_count": 187, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "np.max(df.poi)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 188, 113 | "metadata": { 114 | "scrolled": true 115 | }, 116 | "outputs": [ 117 | { 118 | "name": "stderr", 119 | "output_type": "stream", 120 | "text": [ 121 | " 43%|████▎ | 242208/561673 [00:31<00:41, 7721.47it/s]" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "user2id = {'unk':0}\n", 127 | "id2user = [0]\n", 128 | "for target_idx in tqdm.tqdm(xrange(len(df))):\n", 129 | " (user, time, poi) = df.iloc[target_idx]\n", 130 | " if user2id.get(user) == None:\n", 131 | " user2id[user] = len(id2user)\n", 132 | " id2user.append(user)\n", 133 | " if len(id2user) == 4627:\n", 134 | " break\n", 135 | "df['user'] = df['user'].apply(lambda x: user2id[x] if user2id.get(x) != None else 0)\n", 136 | "df = df[df['user'] != 0]" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 189, 142 | "metadata": { 143 | "scrolled": true 144 | }, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "(4626, 242873)" 150 | ] 151 | }, 152 | "execution_count": 189, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "len(df.groupby('user')),len(df)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 190, 164 | "metadata": { 165 | "scrolled": true 166 | }, 167 | "outputs": [ 168 | { 169 | "name": "stderr", 170 | "output_type": "stream", 171 | "text": [ 172 | "\n", 173 | " 0%| | 0/242873 [00:00 20:\n", 1021 | " train_thr = int(len(user_user)*0.9)\n", 1022 | " valid_thr = int(len(user_user)*0.95)\n", 1023 | " train_user += user_user[:train_thr]\n", 1024 | " train_context += user_context[:train_thr]\n", 1025 | " train_target += user_target[:train_thr]\n", 1026 | " valid_user += user_user[train_thr:valid_thr]\n", 1027 | " valid_context += user_context[train_thr:valid_thr]\n", 1028 | " valid_target += user_target[train_thr:valid_thr]\n", 1029 | " test_user += user_user[valid_thr:]\n", 1030 | " test_context += user_context[valid_thr:]\n", 1031 | " test_target += user_target[valid_thr:]\n", 1032 | " elif len(user_user) > 0:\n", 1033 | " train_user += user_user\n", 1034 | " train_context += user_context\n", 1035 | " train_target += user_target \n", 1036 | " user_user = []\n", 1037 | " user_context = []\n", 1038 | " user_target = []\n", 1039 | " #print train_user, train_context, train_target\n", 1040 | " \n", 1041 | " context = []\n", 1042 | " for context_idx in xrange(target_idx+1, len(df)):\n", 1043 | " (c_user, c_time, c_poi) = df.iloc[context_idx]\n", 1044 | " if user == c_user and (time-tow) < c_time:\n", 1045 | " context.append(c_poi)\n", 1046 | " else:\n", 1047 | " break\n", 1048 | " if context:\n", 1049 | " user_user.append(user)\n", 1050 | " user_context.append(context)\n", 1051 | " user_target.append(poi)\n", 1052 | " \n", 1053 | "if len(user_user) > 20:\n", 1054 | " train_thr = int(len(user_user)*0.9)\n", 1055 | " valid_thr = int(len(user_user)*0.95)\n", 1056 | " train_user += user_user[:train_thr]\n", 1057 | " train_context += user_context[:train_thr]\n", 1058 | " train_target += user_target[:train_thr]\n", 1059 | " valid_user += user_user[train_thr:valid_thr]\n", 1060 | " valid_context += user_context[train_thr:valid_thr]\n", 1061 | " valid_target += user_target[train_thr:valid_thr]\n", 1062 | " test_user += user_user[valid_thr:]\n", 1063 | " test_context += user_context[valid_thr:]\n", 1064 | " test_target += user_target[valid_thr:]\n", 1065 | "elif len(user_user) > 0:\n", 1066 | " train_user += user_user\n", 1067 | " train_context += user_context\n", 1068 | " train_target += user_target" 1069 | ] 1070 | }, 1071 | { 1072 | "cell_type": "code", 1073 | "execution_count": 191, 1074 | "metadata": {}, 1075 | "outputs": [ 1076 | { 1077 | "data": { 1078 | "text/plain": [ 1079 | "(53359, 53359, 53359, 2336, 2336, 2336, 2677, 2677, 2677)" 1080 | ] 1081 | }, 1082 | "execution_count": 191, 1083 | "metadata": {}, 1084 | "output_type": "execute_result" 1085 | } 1086 | ], 1087 | "source": [ 1088 | "len(train_user), len(train_context), len(train_target), len(valid_user), len(valid_context), len(valid_target), len(test_user), len(test_context), len(test_target)" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "code", 1093 | "execution_count": 192, 1094 | "metadata": { 1095 | "scrolled": false 1096 | }, 1097 | "outputs": [ 1098 | { 1099 | "name": "stdout", 1100 | "output_type": "stream", 1101 | "text": [ 1102 | "64 4.00256751438 2.0 1\n", 1103 | "64 3.94891386281 2.0 1\n" 1104 | ] 1105 | } 1106 | ], 1107 | "source": [ 1108 | "len_context = []\n", 1109 | "for i, context in enumerate(train_context):\n", 1110 | " len_context.append(len(context))\n", 1111 | "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)\n", 1112 | "for i, context in enumerate(valid_context):\n", 1113 | " len_context.append(len(context))\n", 1114 | "for i, context in enumerate(test_context):\n", 1115 | " len_context.append(len(context))\n", 1116 | "len_context.sort()\n", 1117 | "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "code", 1122 | "execution_count": 193, 1123 | "metadata": {}, 1124 | "outputs": [ 1125 | { 1126 | "name": "stdout", 1127 | "output_type": "stream", 1128 | "text": [ 1129 | "58372\n", 1130 | "26\n" 1131 | ] 1132 | } 1133 | ], 1134 | "source": [ 1135 | "print len(len_context)\n", 1136 | "print len_context[int(len(len_context)*0.99)]" 1137 | ] 1138 | }, 1139 | { 1140 | "cell_type": "code", 1141 | "execution_count": 194, 1142 | "metadata": {}, 1143 | "outputs": [], 1144 | "source": [ 1145 | "maxlen_context = 32\n", 1146 | "for i, context in enumerate(train_context):\n", 1147 | " if len(context) < maxlen_context:\n", 1148 | " train_context[i] += ([0]*(maxlen_context-len(context)))\n", 1149 | " elif len(context) > maxlen_context:\n", 1150 | " train_context[i] = context[:maxlen_context]\n", 1151 | "for i, context in enumerate(valid_context):\n", 1152 | " if len(context) < maxlen_context:\n", 1153 | " valid_context[i] += ([0]*(maxlen_context-len(context)))\n", 1154 | " elif len(context) > maxlen_context:\n", 1155 | " valid_context[i] = context[:maxlen_context]\n", 1156 | "for i, context in enumerate(test_context):\n", 1157 | " if len(context) < maxlen_context:\n", 1158 | " test_context[i] += ([0]*(maxlen_context-len(context)))\n", 1159 | " elif len(context) > maxlen_context:\n", 1160 | " test_context[i] = context[:maxlen_context]" 1161 | ] 1162 | }, 1163 | { 1164 | "cell_type": "code", 1165 | "execution_count": 195, 1166 | "metadata": {}, 1167 | "outputs": [ 1168 | { 1169 | "name": "stdout", 1170 | "output_type": "stream", 1171 | "text": [ 1172 | "32 32.0 32.0 32\n" 1173 | ] 1174 | } 1175 | ], 1176 | "source": [ 1177 | "len_context = []\n", 1178 | "for context in test_context:\n", 1179 | " len_context.append(len(context))\n", 1180 | "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)" 1181 | ] 1182 | }, 1183 | { 1184 | "cell_type": "code", 1185 | "execution_count": 196, 1186 | "metadata": {}, 1187 | "outputs": [], 1188 | "source": [ 1189 | "np.save('./npy/train_context.npy', train_context)\n", 1190 | "np.save('./npy/valid_context.npy', valid_context)\n", 1191 | "np.save('./npy/test_context.npy', test_context)\n", 1192 | "np.save('./npy/user2id.npy', user2id)\n", 1193 | "np.save('./npy/id2user.npy', id2user)" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": 197, 1199 | "metadata": {}, 1200 | "outputs": [], 1201 | "source": [ 1202 | "np.save('./npy/train_user.npy', train_user)\n", 1203 | "np.save('./npy/valid_user.npy', valid_user)\n", 1204 | "np.save('./npy/test_user.npy', test_user)\n", 1205 | "np.save('./npy/train_target.npy', train_target)\n", 1206 | "np.save('./npy/valid_target.npy', valid_target)\n", 1207 | "np.save('./npy/test_target.npy', test_target)" 1208 | ] 1209 | }, 1210 | { 1211 | "cell_type": "code", 1212 | "execution_count": 101, 1213 | "metadata": {}, 1214 | "outputs": [ 1215 | { 1216 | "data": { 1217 | "text/plain": [ 1218 | "4627" 1219 | ] 1220 | }, 1221 | "execution_count": 101, 1222 | "metadata": {}, 1223 | "output_type": "execute_result" 1224 | } 1225 | ], 1226 | "source": [ 1227 | "len(id2user)" 1228 | ] 1229 | } 1230 | ], 1231 | "metadata": { 1232 | "kernelspec": { 1233 | "display_name": "Python 2", 1234 | "language": "python", 1235 | "name": "python2" 1236 | }, 1237 | "language_info": { 1238 | "codemirror_mode": { 1239 | "name": "ipython", 1240 | "version": 2 1241 | }, 1242 | "file_extension": ".py", 1243 | "mimetype": "text/x-python", 1244 | "name": "python", 1245 | "nbconvert_exporter": "python", 1246 | "pygments_lexer": "ipython2", 1247 | "version": "2.7.12" 1248 | } 1249 | }, 1250 | "nbformat": 4, 1251 | "nbformat_minor": 2 1252 | } 1253 | --------------------------------------------------------------------------------