├── README.md
├── config.py
├── dataset.py
├── train.py
├── preprocess.py
├── ipynb
    ├── gowalla_term.ipynb
    ├── merge_data.ipynb
    ├── total_prepro2.ipynb
    ├── total_prepro.ipynb
    └── preprocess2.ipynb
└── models.py


/README.md:
--------------------------------------------------------------------------------
1 | # POI2Vec
2 | POI2Vec: Geographical Latent Representation for Predicting Future Visitors
3 | Shanshan Feng, Gao Cong, Bo An, Yeow Meng Chee
4 | Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)
5 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # Parameters
 4 | # ==================================================
 5 | ltype = torch.cuda.LongTensor
 6 | ftype = torch.cuda.FloatTensor
 7 | 
 8 | # Model Hyperparameters
 9 | feat_dim = 200
10 | route_depth = 16
11 | route_count = 4
12 | context_len = 32
13 | 
14 | # Weight init
15 | weight_m = 0
16 | weight_v = 0.1
17 | 
18 | # Training Parameters
19 | batch_size = 128
20 | num_epochs = 30
21 | learning_rate = 0.005
22 | momentum = 0.0
23 | evaluate_every = 3
24 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | class Data():
 5 |     def __init__(self):
 6 | 
 7 |         self.id2route = None
 8 |         self.id2lr = None
 9 |         self.id2prob = None
10 | 
11 |         self.user_train = None 
12 |         self.context_train = None
13 |         self.target_train = None
14 |         self.user_valid = None 
15 |         self.context_valid = None
16 |         self.target_valid = None
17 |         self.user_test = None 
18 |         self.context_test = None
19 |         self.target_test = None
20 |         self.maxlen_context = 32
21 | 
22 |     def load(self):
23 |         print("Loading data...")
24 |         poi_list = np.load("./npy/id2poi.npy")
25 |         user_list = np.load("./npy/id2user.npy")
26 |         self.id2route = np.load("./npy/id2route.npy")
27 |         self.id2lr = np.load("./npy/id2lr.npy")
28 |         self.id2prob = np.load("./npy/id2prob.npy")
29 | 
30 |         self.user_train = np.load("./npy/train_user.npy")
31 |         self.context_train = np.load("./npy/train_context.npy")
32 |         self.target_train = np.load("./npy/train_target.npy")
33 |         self.user_valid = np.load("./npy/valid_user.npy")
34 |         self.context_valid = np.load("./npy/valid_context.npy")
35 |         self.target_valid = np.load("./npy/valid_target.npy")
36 |         self.user_test = np.load("./npy/test_user.npy")
37 |         self.context_test = np.load("./npy/test_context.npy")
38 |         self.target_test = np.load("./npy/test_target.npy")
39 |         print("Train/Valid/Test/POI/User: {:d}/{:d}/{:d}/{:d}/{:d}".format(len(self.user_train), len(self.user_valid), len(self.user_test), len(poi_list), len(user_list)))
40 |         print("==================================================================================")
41 | 
42 |         return len(poi_list), len(user_list)
43 | 
44 |     def train_batch_iter(self, batch_size):
45 |         data = list(zip(self.user_train, self.context_train, self.target_train))
46 |         random.shuffle(data)
47 |         return self.batch_iter(data, batch_size)
48 | 
49 |     def valid_batch_iter(self, batch_size):
50 |         data = list(zip(self.user_valid, self.context_valid, self.target_valid))
51 |         return self.batch_iter(data, batch_size)
52 | 
53 |     def test_batch_iter(self, batch_size):
54 |         data = list(zip(self.user_test, self.context_test, self.target_test))
55 |         return self.batch_iter(data, batch_size)
56 | 
57 |     def batch_iter(self, data, batch_size):
58 |         data_size = float(len(data))
59 |         num_batches = int(np.ceil(data_size / batch_size))
60 |         for batch_num in xrange(num_batches):
61 |             start_index = int(batch_num * batch_size)
62 |             end_index = min(int((batch_num + 1) * batch_size), int(data_size))
63 |             yield data[start_index:end_index]
64 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import gc
 4 | import time
 5 | import numpy as np
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | from torch.autograd import Variable
10 | import dataset
11 | import models
12 | import config
13 | 
14 | # Type Parameters
15 | ltype = config.ltype
16 | ftype = config.ftype
17 | # Training Parameters
18 | learning_rate = config.learning_rate
19 | 
20 | def parameters(*argv):
21 |     params = []
22 |     for model in argv:
23 |         params += list(model.parameters())
24 | 
25 |     return params
26 | 
27 | def print_score(batches, step):
28 |     batch_loss = 0. # hit count
29 |     for i, batch in enumerate(batches):
30 |         user_batch, context_batch, target_batch = zip(*batch) 
31 |         batch_loss += run(user_batch, context_batch, target_batch, step=step)
32 |     print("Validation Error :", batch_loss/i, time.ctime())
33 | 
34 | ##############################################################################################
35 | def run(user, context, target, step):
36 | 
37 |     optimizer.zero_grad()
38 | 
39 |     user = Variable(torch.from_numpy(np.asarray(user)).type(ltype))
40 |     context = Variable(torch.from_numpy(np.asarray(context)).type(ltype))
41 | 
42 |     # POI2VEC
43 |     loss = p2v_model(user, context, target)
44 | 
45 |     loss.backward()
46 |     optimizer.step()
47 |     gc.collect()
48 | 
49 |     return loss.data.cpu().numpy()[0]
50 | 
51 | ##############################################################################################
52 | ##############################################################################################
53 | if __name__ == "__main__":
54 | 
55 |     # Data Preparation
56 |     data = dataset.Data()
57 |     poi_cnt, user_cnt = data.load()
58 | 
59 |     # Model Preparation
60 |     p2v_model = models.POI2VEC(poi_cnt, user_cnt, data.id2route, data.id2lr, data.id2prob).cuda()
61 |     loss_model = nn.CrossEntropyLoss().cuda()
62 |     optimizer = torch.optim.SGD(parameters(p2v_model), lr=config.learning_rate, momentum=config.momentum)
63 | 
64 |     for i in xrange(config.num_epochs):
65 |         # Training
66 |         batch_loss = 0.
67 |         train_batches = data.train_batch_iter(config.batch_size)
68 |         for j, train_batch in enumerate(train_batches):
69 |             user_batch, context_batch, target_batch = zip(*train_batch) 
70 |             batch_loss += run(user_batch, context_batch, target_batch, step=1)
71 |             if (j+1) % 1000 == 0:
72 |                 print("batch #{:d}: ".format(j+1), "batch_loss :", batch_loss/j, time.ctime())
73 | 
74 |         # Validation 
75 |         if (i+1) % config.evaluate_every == 0:
76 |             print("==================================================================================")
77 |             print("Evaluation at epoch #{:d}: ".format(i+1))
78 |             p2v_model.eval()
79 |             valid_batches = data.valid_batch_iter(config.batch_size)
80 |             print_score(valid_batches, step=2)
81 |             p2v_model.train()
82 | 
83 | # Test
84 | print("==================================================================================")
85 | print("Testing")
86 | p2v_model.eval()
87 | test_batches = data.test_batch_iter(config.batch_size)
88 | print_score(test_batches, step=2)
89 | 


--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import Process
 2 | import numpy as np
 3 | import pandas as pd
 4 | import tqdm
 5 | import os
 6 | from models import Node, Rec
 7 | 
 8 | checkin_file = "../dataset/loc-gowalla_totalCheckins.txt"
 9 | df = pd.read_csv(checkin_file, sep='\t', header=None)
10 | df.columns = ["user", "time", "latitude", "longitude", "poi"]
11 | print "total visit :", len(df),
12 | df = df.drop_duplicates(subset=['poi'])
13 | print "/ total poi :", len(df)
14 | poi2pos = df.loc[:, ['latitude', 'longitude', 'poi']].set_index('poi').T.to_dict('list')
15 | 
16 | proc_n = 20
17 | 
18 | poi2id = {'unk':0}
19 | id2poi = ['unk']
20 | for poi in df['poi']:
21 |     if poi2id.get(poi) == None:
22 |         poi2id[poi] = len(id2poi)
23 |         id2poi.append(poi)
24 | np.save("./npy/poi2id.npy", poi2id)
25 | np.save("./npy/id2poi.npy", id2poi)
26 | 
27 | # build a tree of area
28 | tree = Node(df['latitude'].min(), df['latitude'].max(),df['longitude'].max(), df['longitude'].min(), 0)
29 | tree.build()
30 | print "total node of tree :", Node.count
31 | theta = Node.theta
32 | 
33 | def main(id2poi_batch, proc_i):
34 |     id2route = []
35 |     id2lr = []
36 |     id2prob = []
37 | 
38 |     # make route/left_right_choice/probability list of each poi
39 |     for poi in tqdm.tqdm(id2poi_batch):
40 |         # each poi, they have a area. p_n is each corner
41 |         p_n = [(poi2pos[poi][0] - 0.5*theta, poi2pos[poi][1] - 0.5*theta)\
42 |                 ,(poi2pos[poi][0] - 0.5*theta, poi2pos[poi][1] + 0.5*theta)\
43 |                 ,(poi2pos[poi][0] + 0.5*theta, poi2pos[poi][1] - 0.5*theta)\
44 |                 ,(poi2pos[poi][0] + 0.5*theta, poi2pos[poi][1] + 0.5*theta)]
45 |         # that area
46 |         poi_area = Rec((poi2pos[poi][1]+0.5*theta, poi2pos[poi][1]-0.5*theta\
47 |                         ,poi2pos[poi][0]-0.5*theta, poi2pos[poi][0]+0.5*theta))
48 | 
49 |         route_list = []
50 |         lr_list = []
51 |         area_list = []
52 |         # each corner, where they are contained in
53 |         for p in p_n:
54 |             route, lr = tree.find_route(p)
55 |             route_list.append(route)
56 |             lr_list.append(lr)
57 | 
58 |         # remove duplicate
59 |         route_set = []
60 |         for route in route_list:
61 |             if route not in route_set:
62 |                 route_set.append(route)
63 |         lr_set = []
64 |         for lr in lr_list:
65 |             if lr not in lr_set:
66 |                 lr_set.append(lr)
67 | 
68 |         # each leaf, how much they are overlaped
69 |         for route in route_set:
70 |             leaf_area = Rec(tree.find_idx(route[0]))
71 |             area_list.append(leaf_area.overlap(poi_area))
72 |         area_list = np.divide(area_list, sum(area_list))
73 | 
74 |         id2route.append(route_set)
75 |         id2lr.append(lr_set)
76 |         id2prob.append(area_list)
77 | 
78 |     np.save("./npy/splited_file/id2route_%02d.npy" % proc_i, id2route)
79 |     np.save("./npy/splited_file/id2lr_%02d.npy" % proc_i, id2lr)
80 |     np.save("./npy/splited_file/id2prob_%02d.npy" % proc_i, id2prob)
81 |     
82 | if __name__ == '__main__':
83 |     procs = []
84 |     batch_size = len(id2poi)/proc_n
85 |     for i in xrange(proc_n+1):
86 |         print "process #%02d running..."%(i+1)
87 |         proc = Process(target=main, args=(id2poi[i*batch_size+1:(i+1)*batch_size+1], i+1))
88 |         procs.append(proc)
89 |         proc.start()
90 | 
91 |     for proc in procs:
92 |         proc.join()
93 | 


--------------------------------------------------------------------------------
/ipynb/gowalla_term.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import tqdm\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "from datetime import datetime\n",
 13 |     "\n",
 14 |     "checkin_file = \"../dataset/loc-gowalla_totalCheckins.txt\"\n",
 15 |     "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n",
 16 |     "df.columns = [\"user\", \"time\", \"latitude\", \"longitude\", \"poi\"]\n",
 17 |     "df = df[['user', 'time']]"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "ename": "AttributeError",
 27 |      "evalue": "'datetime.datetime' object has no attribute 'total_seconds'",
 28 |      "output_type": "error",
 29 |      "traceback": [
 30 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 31 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
 32 |       "\u001b[0;32m<ipython-input-33-c7979f878fd3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%Y-%m-%dT%H:%M:%SZ\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m                              \u001b[0;34m.\u001b[0m\u001b[0mtotal_seconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m3600\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# hour\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 33 |       "\u001b[0;32m/home/yongqyu/yongqyu/local/lib/python2.7/site-packages/pandas/core/series.pyc\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwds)\u001b[0m\n\u001b[1;32m   2292\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2293\u001b[0m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2294\u001b[0;31m                 \u001b[0mmapped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2295\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2296\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 34 |       "\u001b[0;32mpandas/src/inference.pyx\u001b[0m in \u001b[0;36mpandas.lib.map_infer (pandas/lib.c:66124)\u001b[0;34m()\u001b[0m\n",
 35 |       "\u001b[0;32m<ipython-input-33-c7979f878fd3>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%Y-%m-%dT%H:%M:%SZ\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m                              \u001b[0;34m.\u001b[0m\u001b[0mtotal_seconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m3600\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# hour\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 36 |       "\u001b[0;31mAttributeError\u001b[0m: 'datetime.datetime' object has no attribute 'total_seconds'"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%dT%H:%M:%SZ\"))\\\n",
 42 |     "                              .total_seconds()/3600)  # hour"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "total_term = 0.\n",
 52 |     "total_tran = 0\n",
 53 |     "prev_user, prev_time = df.iloc[0]\n",
 54 |     "print prev_user, prev_time"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "for target_idx in tqdm.tqdm(xrange(1, len(df))):\n",
 64 |     "    (user, time) = df.iloc[target_idx]\n",
 65 |     "    if prev_user != user:\n",
 66 |     "        prev_user = user\n",
 67 |     "        prev_time = time\n",
 68 |     "    else:\n",
 69 |     "        total_tran += 1\n",
 70 |     "        total_term += (prev_time - time)\n",
 71 |     "        prev_time = time"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "total_term/total_tran"
 81 |    ]
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Python 2",
 87 |    "language": "python",
 88 |    "name": "python2"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 2
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython2",
100 |    "version": "2.7.12"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 2
105 | }
106 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import os
  4 | import datetime
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable
 10 | import config
 11 | 
 12 | class POI2VEC(nn.Module):
 13 |     def __init__(self, poi_cnt, user_cnt, id2route, id2lr, id2prob):
 14 |         super(POI2VEC, self).__init__()
 15 | 
 16 |         # attributes
 17 |         route_cnt = np.power(2, config.route_depth)-1
 18 |         self.id2route = id2route
 19 |         self.id2lr = np.array(id2lr)
 20 |         self.id2prob = np.array(id2prob)
 21 | 
 22 |         # models
 23 |         self.poi_weight = nn.Embedding(poi_cnt, config.feat_dim, padding_idx=0)
 24 |         self.poi_weight.weight.data.normal_(config.weight_m, config.weight_v)
 25 |         self.user_weight = nn.Embedding(user_cnt, config.feat_dim, padding_idx=0)
 26 |         self.user_weight.weight.data.normal_(config.weight_m, config.weight_v)
 27 |         self.route_weight = nn.Embedding(route_cnt, config.feat_dim, padding_idx=0)
 28 |         self.route_weight.weight.data.normal_(config.weight_m, config.weight_v)
 29 |         self.sigmoid = nn.Sigmoid()
 30 | 
 31 |     def forward(self, user, context, target):
 32 |         target = map(int, target)
 33 |         route = Variable(torch.from_numpy(self.id2route[target]))\
 34 |                         .contiguous().view(-1, config.route_count*config.route_depth).type(config.ltype)
 35 |                         # batch x (route_coutn(4) x route_dept(22))
 36 |         lr = Variable(torch.from_numpy(self.id2lr[target]))\
 37 |                         .view(-1, config.route_count*(config.route_depth)).type(config.ftype)
 38 |                         # batch x (route_count(4) x route_depth(21))
 39 |         prob = Variable(torch.from_numpy(self.id2prob[target]))\
 40 |                         .view(-1, config.route_count).type(config.ftype) # batch x route_count(4)
 41 | 
 42 |         context = self.poi_weight(context) # batch x context_len(32) x feat_dim(200)
 43 |         route = self.route_weight(route) # batch x (route_count(4) x route_depth(22)) x feat_dim(200)
 44 |         user = self.user_weight(user) # batch x feat_dim(200)
 45 |         target = Variable(torch.from_numpy(np.asarray(target)).type(config.ltype))
 46 |         target = self.poi_weight(target)
 47 | 
 48 |         phi_context = torch.sum(context, dim=1, keepdim=True).permute(0,2,1) # batch x feat_dim x 1
 49 |         psi_context = torch.bmm(route, phi_context) # batch x (route_count x route_depth) x 1
 50 |         psi_context = self.sigmoid(psi_context).view(-1, config.route_count*config.route_depth)
 51 | 
 52 |         psi_context = (torch.pow(torch.mul(psi_context, 2), lr) - psi_context)\
 53 |                         .view(-1, config.route_count, config.route_depth)
 54 | 
 55 |         pr_path = 1
 56 |         for i in xrange(config.route_depth):
 57 |             pr_path = torch.mul(psi_context[:,:,i], pr_path)
 58 |         pr_path = torch.sum(torch.mul(pr_path, prob), 1)
 59 |         
 60 |         pr_user = torch.mm(user, self.poi_weight.weight.t())
 61 |         pr_user = torch.sum(torch.exp(pr_user), 1)
 62 |         pr_user = torch.div(torch.exp(torch.sum(torch.mul(target, user), 1)), pr_user)
 63 |         pr_ult = 1.0-torch.sum(torch.mul(pr_user, pr_path))
 64 | 
 65 |         return pr_ult
 66 |         
 67 | class Rec:
 68 |     # Rectangle for calculate overlaped area
 69 |     def __init__(self, (top, down, left, right)):
 70 |         self.top = top
 71 |         self.down = down
 72 |         self.left = left
 73 |         self.right = right
 74 | 
 75 |     def overlap(self, a): 
 76 |         dx = min(self.top, a.top) - max(self.down, a.down)
 77 |         dy = min(self.right, a.right) - max(self.left, a.left)
 78 |         if (dx>=0) and (dy>=0):
 79 |             return dx*dy
 80 |         else:
 81 |             # error
 82 |             return -1
 83 | 
 84 | class Node:
 85 | # Tree Node
 86 |     theta = 0.5
 87 |     count = 0 
 88 |     leaves = []
 89 | 
 90 |     def __init__(self, west, east, north, south, level):
 91 |         self.left = None
 92 |         self.right = None
 93 |         self.west = west
 94 |         self.east = east
 95 |         self.north = north
 96 |         self.south = south
 97 |         self.level = level
 98 |         Node.count += 1
 99 |         self.count = Node.count
100 | 
101 |     def build(self):
102 |         # even : horizen, odd : vertical
103 |         if self.level%2 == 0:
104 |             if (self.east - (self.west+self.east)/2) > 2*Node.theta:
105 |                 self.left = Node(self.west, (self.west+self.east)/2, self.north, self.south, self.level+1)
106 |                 self.right = Node((self.west+self.east)/2, self.east, self.north, self.south, self.level+1)
107 |                 self.left.build()
108 |                 self.right.build()
109 |             else:
110 |                 Node.leaves.append(self)
111 |         else:
112 |             if (self.north - (self.north+self.south)/2) > 2*Node.theta:
113 |                 self.left = Node(self.west, self.east, self.north, (self.north+self.south)/2, self.level+1)
114 |                 self.right = Node(self.west, self.east, (self.north+self.south)/2, self.south, self.level+1)
115 |                 self.left.build()
116 |                 self.right.build()
117 |             else:
118 |                 Node.leaves.append(self)
119 | 
120 |     def find_route(self, (latitude, longitude)):
121 |         if self.left == None:
122 |             prev_route = [self.count]
123 |             prev_lr = []
124 |             return prev_route, prev_lr
125 | 
126 |         # left : 0, right : 1
127 |         if self.level%2 == 0:
128 |             if self.left.east < latitude:
129 |                 prev_route, prev_lr = self.right.find_route((latitude, longitude))
130 |                 prev_lr.append(1)
131 |             else:
132 |                 prev_route, prev_lr = self.left.find_route((latitude, longitude))
133 |                 prev_lr.append(0)
134 |         else:
135 |             if self.left.south < longitude:
136 |                 prev_route, prev_lr = self.left.find_route((latitude, longitude))
137 |                 prev_lr.append(0)
138 |             else:
139 |                 prev_route, prev_lr = self.right.find_route((latitude, longitude))
140 |                 prev_lr.append(1)
141 |         prev_route.append(self.count)
142 |         return prev_route, prev_lr
143 | 
144 |     def find_idx(self, idx):
145 |         # find in leaves
146 |         for leaf in Node.leaves:
147 |             if leaf.count == idx:
148 |                 return leaf.north, leaf.south, leaf.west, leaf.east
149 | 


--------------------------------------------------------------------------------
/ipynb/merge_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 146,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import tqdm\n",
 11 |     "import numpy as np\n",
 12 |     "\n",
 13 |     "route_files = []\n",
 14 |     "lr_files = []\n",
 15 |     "prob_files = []\n",
 16 |     "\n",
 17 |     "for (path, dir, files) in os.walk(\"./npy/splited_file/\"):\n",
 18 |     "    for filename in files:\n",
 19 |     "        if 'id2route' in filename:\n",
 20 |     "            route_files.append('./npy/splited_file/'+filename)\n",
 21 |     "        if 'id2lr' in filename:\n",
 22 |     "            lr_files.append('./npy/splited_file/'+filename)\n",
 23 |     "        if 'id2prob' in filename:\n",
 24 |     "            prob_files.append('./npy/splited_file/'+filename)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 147,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "route_files.sort()\n",
 34 |     "lr_files.sort()\n",
 35 |     "prob_files.sort()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 148,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "['./npy/splited_file/id2route_01.npy', './npy/splited_file/id2route_02.npy', './npy/splited_file/id2route_03.npy', './npy/splited_file/id2route_04.npy', './npy/splited_file/id2route_05.npy', './npy/splited_file/id2route_06.npy', './npy/splited_file/id2route_07.npy', './npy/splited_file/id2route_08.npy', './npy/splited_file/id2route_09.npy', './npy/splited_file/id2route_10.npy', './npy/splited_file/id2route_11.npy', './npy/splited_file/id2route_12.npy', './npy/splited_file/id2route_13.npy', './npy/splited_file/id2route_14.npy', './npy/splited_file/id2route_15.npy', './npy/splited_file/id2route_16.npy', './npy/splited_file/id2route_17.npy', './npy/splited_file/id2route_18.npy', './npy/splited_file/id2route_19.npy', './npy/splited_file/id2route_20.npy', './npy/splited_file/id2route_21.npy']\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "print route_files"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 149,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stderr",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "100%|██████████| 21/21 [00:00<00:00, 36.92it/s]\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "pad = [0]*17\n",
 70 |     "id2route = [[pad[1:], pad[1:], pad[1:], pad[1:]]]\n",
 71 |     "id2route_cnt = [0]\n",
 72 |     "max_route_cnt = 4\n",
 73 |     "max_node_idx = 0\n",
 74 |     "\n",
 75 |     "for filename in tqdm.tqdm(route_files):\n",
 76 |     "    routes_list = np.load(filename)\n",
 77 |     "    for routes in routes_list:\n",
 78 |     "        id2route_cnt.append(len(routes))\n",
 79 |     "        \n",
 80 |     "        batch_max = np.max([node for route in routes\n",
 81 |     "                                for node in route[1:]])\n",
 82 |     "        if batch_max > max_node_idx:\n",
 83 |     "            max_node_idx = batch_max\n",
 84 |     "            \n",
 85 |     "        if len(routes) < max_route_cnt:\n",
 86 |     "            for _ in xrange(max_route_cnt - len(routes)):\n",
 87 |     "                routes.append(pad)\n",
 88 |     "        \n",
 89 |     "        routes = np.asarray([l[1:] for l in routes])\n",
 90 |     "        id2route.append(routes)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 150,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "(15234, 4, 16)\n",
103 |       "(15234,)\n",
104 |       "58258\n"
105 |      ]
106 |     }
107 |    ],
108 |    "source": [
109 |     "print np.asarray(id2route).shape\n",
110 |     "print np.asarray(id2route_cnt).shape\n",
111 |     "print max_node_idx"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 151,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "name": "stderr",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "100%|██████████| 21/21 [00:00<00:00, 117.84it/s]\n"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "pad = [0]*16\n",
129 |     "id2lr = [[pad, pad, pad, pad]]\n",
130 |     "\n",
131 |     "for filename in tqdm.tqdm(lr_files):\n",
132 |     "    lrs_list = np.load(filename)\n",
133 |     "    for lrs in lrs_list:\n",
134 |     "        if len(lrs) < max_route_cnt:\n",
135 |     "            for _ in xrange(max_route_cnt - len(lrs)):\n",
136 |     "                lrs.append(pad)\n",
137 |     "        id2lr.append(lrs)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 152,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "(15234, 4, 16)\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "print np.asarray(id2lr).shape"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 153,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stderr",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "100%|██████████| 21/21 [00:00<00:00, 173.03it/s]\n"
167 |      ]
168 |     }
169 |    ],
170 |    "source": [
171 |     "pad = 0\n",
172 |     "id2prob = [[0,0,0,0]]\n",
173 |     "\n",
174 |     "for filename in tqdm.tqdm(prob_files):\n",
175 |     "    probs_list = np.load(filename)\n",
176 |     "    for probs in probs_list:\n",
177 |     "        probs = list(probs)\n",
178 |     "        if len(probs) < max_route_cnt:\n",
179 |     "            for _ in xrange(max_route_cnt - len(probs)):\n",
180 |     "                probs.append(pad)\n",
181 |     "        id2prob.append(probs)       "
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 154,
187 |    "metadata": {},
188 |    "outputs": [
189 |     {
190 |      "name": "stdout",
191 |      "output_type": "stream",
192 |      "text": [
193 |       "(15234, 4)\n"
194 |      ]
195 |     }
196 |    ],
197 |    "source": [
198 |     "print np.asarray(id2prob).shape"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 155,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "np.save('./npy/id2route.npy', np.asarray(id2route))\n",
208 |     "np.save('./npy/id2route_cnt.npy', np.asarray(id2route_cnt))\n",
209 |     "np.save('./npy/id2lr.npy', np.asarray(id2lr))\n",
210 |     "np.save('./npy/id2prob.npy', np.asarray(id2prob))"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 158,
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "name": "stdout",
220 |      "output_type": "stream",
221 |      "text": [
222 |       "[[46455 46447 46444 46413 46349 46346 46091 46088 45065 45062 40967 32775\n",
223 |       "  32772     5     2     1]\n",
224 |       " [46468 46461 46445 46413 46349 46346 46091 46088 45065 45062 40967 32775\n",
225 |       "  32772     5     2     1]\n",
226 |       " [    0     0     0     0     0     0     0     0     0     0     0     0\n",
227 |       "      0     0     0     0]\n",
228 |       " [    0     0     0     0     0     0     0     0     0     0     0     0\n",
229 |       "      0     0     0     0]]\n",
230 |       "[0.11276673535000725, 0.88723326464999275, 0, 0]\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "print id2route[1]\n",
236 |     "print id2prob[1]"
237 |    ]
238 |   }
239 |  ],
240 |  "metadata": {
241 |   "kernelspec": {
242 |    "display_name": "Python 2",
243 |    "language": "python",
244 |    "name": "python2"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 2
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython2",
256 |    "version": "2.7.12"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 2
261 | }
262 | 


--------------------------------------------------------------------------------
/ipynb/total_prepro2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 91,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from datetime import datetime\n",
 12 |     "import tqdm\n",
 13 |     "\n",
 14 |     "checkin_file = \"../dataset/test_total.txt\"\n",
 15 |     "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n",
 16 |     "df.columns = [\"user\", \"poi\", \"time\"]"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 92,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "425648"
 28 |       ]
 29 |      },
 30 |      "execution_count": 92,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "len(df)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 93,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "prev_cnt = 0\n",
 46 |     "curr_cnt = len(df)\n",
 47 |     "while prev_cnt != curr_cnt:\n",
 48 |     "    prev_cnt = curr_cnt\n",
 49 |     "    df = df[df.groupby('user').user.transform(len) > 5]\n",
 50 |     "    df = df[df.groupby('poi').poi.transform(len) > 5]\n",
 51 |     "    curr_cnt = len(df)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 94,
 57 |    "metadata": {
 58 |     "scrolled": false
 59 |    },
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "242686"
 65 |       ]
 66 |      },
 67 |      "execution_count": 94,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "len(df)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 95,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "poi2id = np.load(\"./npy/poi2id.npy\").item()\n",
 83 |     "df['poi'] = df['poi'].apply(lambda x: poi2id[x] if poi2id.get(x) != None else 13187)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 96,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")-datetime(2009,1,1))\\\n",
 93 |     "                              .total_seconds()/360)  # hour"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 97,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "13005"
105 |       ]
106 |      },
107 |      "execution_count": 97,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "np.max(df.poi)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 98,
119 |    "metadata": {
120 |     "scrolled": true
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "'''\n",
125 |     "user2id = {'unk':0}\n",
126 |     "id2user = [0]\n",
127 |     "for target_idx in tqdm.tqdm(xrange(len(df))):\n",
128 |     "    (user, poi, time) = df.iloc[target_idx]\n",
129 |     "    if user2id.get(user) == None:\n",
130 |     "        user2id[user] = len(id2user)\n",
131 |     "        id2user.append(user)\n",
132 |     "'''\n",
133 |     "user2id = np.load('./npy/user2id.npy').item()\n",
134 |     "id2user = np.load('./npy/id2user.npy')\n",
135 |     "\n",
136 |     "df['user'] = df['user'].apply(lambda x: user2id[x] if user2id.get(x) != None else 0)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 99,
142 |    "metadata": {
143 |     "scrolled": true
144 |    },
145 |    "outputs": [
146 |     {
147 |      "data": {
148 |       "text/plain": [
149 |        "(18712, 242686)"
150 |       ]
151 |      },
152 |      "execution_count": 99,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "len(df.groupby('user')),len(df)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 100,
164 |    "metadata": {
165 |     "scrolled": true
166 |    },
167 |    "outputs": [
168 |     {
169 |      "name": "stderr",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "100%|██████████| 242686/242686 [01:24<00:00, 2883.45it/s]\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "train_user = []\n",
178 |     "train_context = []\n",
179 |     "train_target = []\n",
180 |     "\n",
181 |     "tow = 6\n",
182 |     "prev_user = df.iloc[0]['user']\n",
183 |     "user_user = []\n",
184 |     "user_context = []\n",
185 |     "user_target = []\n",
186 |     "for target_idx in tqdm.tqdm(xrange(len(df))):\n",
187 |     "    (user, poi, time) = df.iloc[target_idx]\n",
188 |     "    if prev_user != user:\n",
189 |     "        prev_user = user\n",
190 |     "        train_user += user_user\n",
191 |     "        train_context += user_context\n",
192 |     "        train_target += user_target\n",
193 |     "\n",
194 |     "        user_user = []\n",
195 |     "        user_context = []\n",
196 |     "        user_target = []\n",
197 |     "        #print train_user, train_context, train_target\n",
198 |     "    \n",
199 |     "    context = []\n",
200 |     "    for context_idx in xrange(target_idx+1, len(df)):\n",
201 |     "        (c_user, c_poi, c_time) = df.iloc[context_idx]\n",
202 |     "        if user == c_user and (time+tow) > c_time:\n",
203 |     "                context.append(c_poi)\n",
204 |     "        else:\n",
205 |     "            break\n",
206 |     "    if context:\n",
207 |     "        user_user.append(user)\n",
208 |     "        user_context.append(context)\n",
209 |     "        user_target.append(poi)\n",
210 |     "        \n",
211 |     "train_user += user_user\n",
212 |     "train_context += user_context\n",
213 |     "train_target += user_target"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 101,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/plain": [
224 |        "(44189, 44189, 44189)"
225 |       ]
226 |      },
227 |      "execution_count": 101,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": [
233 |     "len(train_user), len(train_context), len(train_target)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 102,
239 |    "metadata": {
240 |     "scrolled": false
241 |    },
242 |    "outputs": [
243 |     {
244 |      "name": "stdout",
245 |      "output_type": "stream",
246 |      "text": [
247 |       "41 2.94349272443 2.0 1\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "len_context = []\n",
253 |     "for i, context in enumerate(train_context):\n",
254 |     "    len_context.append(len(context))\n",
255 |     "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 103,
261 |    "metadata": {},
262 |    "outputs": [
263 |     {
264 |      "name": "stdout",
265 |      "output_type": "stream",
266 |      "text": [
267 |       "44189\n",
268 |       "21\n"
269 |      ]
270 |     }
271 |    ],
272 |    "source": [
273 |     "len_context.sort()\n",
274 |     "print len(len_context)\n",
275 |     "print len_context[int(len(len_context)*0.99)]"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 104,
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "maxlen_context = 16\n",
285 |     "for i, context in enumerate(train_context):\n",
286 |     "    if len(context) < maxlen_context:\n",
287 |     "        train_context[i] += ([0]*(maxlen_context-len(context)))\n",
288 |     "    elif len(context) > maxlen_context:\n",
289 |     "        train_context[i] = context[:maxlen_context]"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 105,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "16 16.0 16.0 16\n"
302 |      ]
303 |     }
304 |    ],
305 |    "source": [
306 |     "len_context = []\n",
307 |     "for context in train_context:\n",
308 |     "    len_context.append(len(context))\n",
309 |     "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 106,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "np.save('./npy/test_context.npy', train_context)\n",
319 |     "np.save('./npy/test_user.npy', train_user)\n",
320 |     "np.save('./npy/test_target.npy', train_target)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 63,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "np.save('./npy/user2id.npy', user2id)\n",
330 |     "np.save('./npy/id2user.npy', id2user)"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 101,
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "data": {
340 |       "text/plain": [
341 |        "4627"
342 |       ]
343 |      },
344 |      "execution_count": 101,
345 |      "metadata": {},
346 |      "output_type": "execute_result"
347 |     }
348 |    ],
349 |    "source": [
350 |     "len(id2user)"
351 |    ]
352 |   }
353 |  ],
354 |  "metadata": {
355 |   "kernelspec": {
356 |    "display_name": "Python 2",
357 |    "language": "python",
358 |    "name": "python2"
359 |   },
360 |   "language_info": {
361 |    "codemirror_mode": {
362 |     "name": "ipython",
363 |     "version": 2
364 |    },
365 |    "file_extension": ".py",
366 |    "mimetype": "text/x-python",
367 |    "name": "python",
368 |    "nbconvert_exporter": "python",
369 |    "pygments_lexer": "ipython2",
370 |    "version": "2.7.12"
371 |   }
372 |  },
373 |  "nbformat": 4,
374 |  "nbformat_minor": 2
375 | }
376 | 


--------------------------------------------------------------------------------
/ipynb/total_prepro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 75,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "total poi : 13187\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "from multiprocessing import Process\n",
 18 |     "import numpy as np\n",
 19 |     "import pandas as pd\n",
 20 |     "import tqdm\n",
 21 |     "import os\n",
 22 |     "from models import Node, Rec \n",
 23 |     "\n",
 24 |     "checkin_file = \"../dataset/poi_info.txt\"\n",
 25 |     "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n",
 26 |     "df.columns = [\"id\", \"poi\", \"latitude\", \"longitude\"]\n",
 27 |     "print \"total poi :\", len(df)\n",
 28 |     "poi2id = {}\n",
 29 |     "id2poi = {}\n",
 30 |     "for i in xrange(len(df)):\n",
 31 |     "    poi2id[df['poi'][i]] = df['id'][i]\n",
 32 |     "    id2poi[df['id'][i]] = df['poi'][i]\n",
 33 |     "id2poi = id2poi.values()\n",
 34 |     "id2pos = df.loc[:, ['latitude', 'longitude', 'poi']].set_index('poi').T.to_dict('list')"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 76,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "unk = 'u*n*k'\n",
 44 |     "poi2id[unk] = len(id2poi)\n",
 45 |     "id2poi.append(unk)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 77,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "np.save(\"./npy/poi2id.npy\", poi2id)\n",
 55 |     "np.save(\"./npy/id2poi.npy\", id2poi)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 78,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "13187\n"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "print poi2id.get(unk)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 79,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "total node of tree : 40955\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "# build a tree of area\n",
 90 |     "tree = Node(df['latitude'].min(), df['latitude'].max(),df['longitude'].max(), df['longitude'].min(), 0)\n",
 91 |     "tree.build()\n",
 92 |     "print \"total node of tree :\", Node.count\n",
 93 |     "theta = Node.theta"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 80,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "def main(id2poi_batch):\n",
103 |     "    id2route = []\n",
104 |     "    id2lr = []\n",
105 |     "    id2prob = []\n",
106 |     "\n",
107 |     "    # make route/left_right_choice/probability list of each poi\n",
108 |     "    for poi in tqdm.tqdm(id2poi_batch):\n",
109 |     "        # each poi, they have a area. p_n is each corner\n",
110 |     "        p_n = [(id2pos[poi][0] - 0.5*theta, id2pos[poi][1] - 0.5*theta)\\\n",
111 |     "                ,(id2pos[poi][0] - 0.5*theta, id2pos[poi][1] + 0.5*theta)\\\n",
112 |     "                ,(id2pos[poi][0] + 0.5*theta, id2pos[poi][1] - 0.5*theta)\\\n",
113 |     "                ,(id2pos[poi][0] + 0.5*theta, id2pos[poi][1] + 0.5*theta)]\n",
114 |     "        # that area\n",
115 |     "        poi_area = Rec((id2pos[poi][1]+0.5*theta, id2pos[poi][1]-0.5*theta\\\n",
116 |     "                        ,id2pos[poi][0]-0.5*theta, id2pos[poi][0]+0.5*theta))\n",
117 |     "\n",
118 |     "        route_list = []\n",
119 |     "        lr_list = []\n",
120 |     "        area_list = []\n",
121 |     "        # each corner, where they are contained in\n",
122 |     "        for p in p_n:\n",
123 |     "            route, lr = tree.find_route(p)\n",
124 |     "            route_list.append(route)\n",
125 |     "            lr_list.append(lr)\n",
126 |     "\n",
127 |     "        # remove duplicate\n",
128 |     "        route_set = []\n",
129 |     "        for route in route_list:\n",
130 |     "            if route not in route_set:\n",
131 |     "                route_set.append(route)\n",
132 |     "        lr_set = []\n",
133 |     "        for lr in lr_list:\n",
134 |     "            if lr not in lr_set:\n",
135 |     "                lr_set.append(lr)\n",
136 |     "\n",
137 |     "        # each leaf, how much they are overlaped\n",
138 |     "        for route in route_set:\n",
139 |     "            leaf_area = Rec(tree.find_idx(route[0]))\n",
140 |     "            area_list.append(leaf_area.overlap(poi_area))\n",
141 |     "        area_list = np.divide(area_list, sum(area_list))\n",
142 |     "\n",
143 |     "        id2route.append(route_set)\n",
144 |     "        id2lr.append(lr_set)\n",
145 |     "        id2prob.append(area_list)\n",
146 |     "        \n",
147 |     "    return id2route, id2lr, id2prob"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 81,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stderr",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "100%|██████████| 13187/13187 [00:23<00:00, 551.16it/s]\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "if __name__ == '__main__':\n",
165 |     "    id2route, id2lr, id2prob = main(id2poi[:-1])"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 85,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "13\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "max_path = len(id2route[0][0])\n",
183 |     "print max_path"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 86,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "name": "stderr",
193 |      "output_type": "stream",
194 |      "text": [
195 |       "100%|██████████| 13187/13187 [00:00<00:00, 81040.12it/s]\n"
196 |      ]
197 |     }
198 |    ],
199 |    "source": [
200 |     "pad = [0]*max_path\n",
201 |     "max_route_cnt = 4\n",
202 |     "id2route_cnt = []\n",
203 |     "\n",
204 |     "for idx, routes in enumerate(tqdm.tqdm(id2route)):\n",
205 |     "    id2route_cnt.append(len(routes))\n",
206 |     "            \n",
207 |     "    if len(routes) < max_route_cnt:\n",
208 |     "        for _ in xrange(max_route_cnt - len(routes)):\n",
209 |     "            routes.append(pad)\n",
210 |     "        \n",
211 |     "    routes = np.asarray([l[1:] for l in routes])\n",
212 |     "    id2route[idx] = routes\n",
213 |     "\n",
214 |     "id2route.append([pad[1:], pad[1:], pad[1:], pad[1:]])\n",
215 |     "id2route_cnt.append(0)"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 87,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "name": "stdout",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "(13188, 4, 12)\n",
228 |       "(13188,)\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "print np.asarray(id2route).shape\n",
234 |     "print np.asarray(id2route_cnt).shape"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 89,
240 |    "metadata": {},
241 |    "outputs": [
242 |     {
243 |      "name": "stderr",
244 |      "output_type": "stream",
245 |      "text": [
246 |       "100%|██████████| 13187/13187 [00:00<00:00, 592942.69it/s]\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "pad = [0]*(max_path-1)\n",
252 |     "\n",
253 |     "for idx, lrs in enumerate(tqdm.tqdm(id2lr)):\n",
254 |     "    if len(lrs) < max_route_cnt:\n",
255 |     "        for _ in xrange(max_route_cnt - len(lrs)):\n",
256 |     "            lrs.append(pad)\n",
257 |     "            \n",
258 |     "    id2lr[idx] = lrs\n",
259 |     "        \n",
260 |     "id2lr.append([pad, pad, pad, pad])"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 90,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       "(13188, 4, 12)\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "print np.asarray(id2lr).shape"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 91,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "name": "stderr",
287 |      "output_type": "stream",
288 |      "text": [
289 |       "100%|██████████| 13187/13187 [00:00<00:00, 162874.68it/s]\n"
290 |      ]
291 |     }
292 |    ],
293 |    "source": [
294 |     "pad = 0\n",
295 |     "\n",
296 |     "for idx, probs in enumerate(tqdm.tqdm(id2prob)):\n",
297 |     "    probs = list(probs)\n",
298 |     "    if len(probs) < max_route_cnt:\n",
299 |     "        for _ in xrange(max_route_cnt - len(probs)):\n",
300 |     "            probs.append(pad)\n",
301 |     "            \n",
302 |     "    id2prob[idx] = probs\n",
303 |     "        \n",
304 |     "id2prob.append([0,0,0,0])"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 92,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "name": "stdout",
314 |      "output_type": "stream",
315 |      "text": [
316 |       "(13188, 4)\n"
317 |      ]
318 |     }
319 |    ],
320 |    "source": [
321 |     "print np.asarray(id2prob).shape"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": 94,
327 |    "metadata": {},
328 |    "outputs": [],
329 |    "source": [
330 |     "np.save(\"./npy/id2route.npy\", id2route)\n",
331 |     "np.save(\"./npy/id2lr.npy\", id2lr)\n",
332 |     "np.save(\"./npy/id2prob.npy\", id2prob)"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 93,
338 |    "metadata": {},
339 |    "outputs": [
340 |     {
341 |      "data": {
342 |       "text/plain": [
343 |        "40683"
344 |       ]
345 |      },
346 |      "execution_count": 93,
347 |      "metadata": {},
348 |      "output_type": "execute_result"
349 |     }
350 |    ],
351 |    "source": [
352 |     "np.max(id2route)"
353 |    ]
354 |   }
355 |  ],
356 |  "metadata": {
357 |   "kernelspec": {
358 |    "display_name": "Python 2",
359 |    "language": "python",
360 |    "name": "python2"
361 |   },
362 |   "language_info": {
363 |    "codemirror_mode": {
364 |     "name": "ipython",
365 |     "version": 2
366 |    },
367 |    "file_extension": ".py",
368 |    "mimetype": "text/x-python",
369 |    "name": "python",
370 |    "nbconvert_exporter": "python",
371 |    "pygments_lexer": "ipython2",
372 |    "version": "2.7.12"
373 |   }
374 |  },
375 |  "nbformat": 4,
376 |  "nbformat_minor": 2
377 | }
378 | 


--------------------------------------------------------------------------------
/ipynb/preprocess2.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 181,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import numpy as np\n",
  10 |     "import pandas as pd\n",
  11 |     "from datetime import datetime\n",
  12 |     "import tqdm\n",
  13 |     "\n",
  14 |     "checkin_file = \"../dataset/loc-gowalla_totalCheckins.txt\"\n",
  15 |     "df = pd.read_csv(checkin_file, sep='\\t', header=None)\n",
  16 |     "df.columns = [\"user\", \"time\", \"latitude\", \"longitude\", \"poi\"]\n",
  17 |     "df = df[['user', 'time', 'poi']]"
  18 |    ]
  19 |   },
  20 |   {
  21 |    "cell_type": "code",
  22 |    "execution_count": 182,
  23 |    "metadata": {},
  24 |    "outputs": [
  25 |     {
  26 |      "data": {
  27 |       "text/plain": [
  28 |        "6442892"
  29 |       ]
  30 |      },
  31 |      "execution_count": 182,
  32 |      "metadata": {},
  33 |      "output_type": "execute_result"
  34 |     }
  35 |    ],
  36 |    "source": [
  37 |     "len(df)"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": 183,
  43 |    "metadata": {},
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "prev_cnt = 0\n",
  47 |     "curr_cnt = len(df)\n",
  48 |     "while prev_cnt != curr_cnt:\n",
  49 |     "    prev_cnt = curr_cnt\n",
  50 |     "    df = df[df.groupby('user').user.transform(len) > 5]\n",
  51 |     "    df = df[df.groupby('poi').poi.transform(len) > 5]\n",
  52 |     "    curr_cnt = len(df)"
  53 |    ]
  54 |   },
  55 |   {
  56 |    "cell_type": "code",
  57 |    "execution_count": 184,
  58 |    "metadata": {
  59 |     "scrolled": false
  60 |    },
  61 |    "outputs": [
  62 |     {
  63 |      "data": {
  64 |       "text/plain": [
  65 |        "4293047"
  66 |       ]
  67 |      },
  68 |      "execution_count": 184,
  69 |      "metadata": {},
  70 |      "output_type": "execute_result"
  71 |     }
  72 |    ],
  73 |    "source": [
  74 |     "len(df)"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "code",
  79 |    "execution_count": 186,
  80 |    "metadata": {},
  81 |    "outputs": [],
  82 |    "source": [
  83 |     "poi2id = np.load(\"./npy/poi2id.npy\").item()\n",
  84 |     "df['poi'] = df['poi'].apply(lambda x: poi2id[x] if poi2id.get(x) != None else 0)\n",
  85 |     "df = df[df['poi'] != 0]\n",
  86 |     "df['time'] = df['time'].apply(lambda x: (datetime.strptime(x, \"%Y-%m-%dT%H:%M:%SZ\")-datetime(2009,1,1)).total_seconds()\\\n",
  87 |     "                              /360)  # hour"
  88 |    ]
  89 |   },
  90 |   {
  91 |    "cell_type": "code",
  92 |    "execution_count": 187,
  93 |    "metadata": {},
  94 |    "outputs": [
  95 |     {
  96 |      "data": {
  97 |       "text/plain": [
  98 |        "15233"
  99 |       ]
 100 |      },
 101 |      "execution_count": 187,
 102 |      "metadata": {},
 103 |      "output_type": "execute_result"
 104 |     }
 105 |    ],
 106 |    "source": [
 107 |     "np.max(df.poi)"
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "code",
 112 |    "execution_count": 188,
 113 |    "metadata": {
 114 |     "scrolled": true
 115 |    },
 116 |    "outputs": [
 117 |     {
 118 |      "name": "stderr",
 119 |      "output_type": "stream",
 120 |      "text": [
 121 |       " 43%|████▎     | 242208/561673 [00:31<00:41, 7721.47it/s]"
 122 |      ]
 123 |     }
 124 |    ],
 125 |    "source": [
 126 |     "user2id = {'unk':0}\n",
 127 |     "id2user = [0]\n",
 128 |     "for target_idx in tqdm.tqdm(xrange(len(df))):\n",
 129 |     "    (user, time, poi) = df.iloc[target_idx]\n",
 130 |     "    if user2id.get(user) == None:\n",
 131 |     "        user2id[user] = len(id2user)\n",
 132 |     "        id2user.append(user)\n",
 133 |     "    if len(id2user) == 4627:\n",
 134 |     "        break\n",
 135 |     "df['user'] = df['user'].apply(lambda x: user2id[x] if user2id.get(x) != None else 0)\n",
 136 |     "df = df[df['user'] != 0]"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": 189,
 142 |    "metadata": {
 143 |     "scrolled": true
 144 |    },
 145 |    "outputs": [
 146 |     {
 147 |      "data": {
 148 |       "text/plain": [
 149 |        "(4626, 242873)"
 150 |       ]
 151 |      },
 152 |      "execution_count": 189,
 153 |      "metadata": {},
 154 |      "output_type": "execute_result"
 155 |     }
 156 |    ],
 157 |    "source": [
 158 |     "len(df.groupby('user')),len(df)"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "code",
 163 |    "execution_count": 190,
 164 |    "metadata": {
 165 |     "scrolled": true
 166 |    },
 167 |    "outputs": [
 168 |     {
 169 |      "name": "stderr",
 170 |      "output_type": "stream",
 171 |      "text": [
 172 |       "\n",
 173 |       "  0%|          | 0/242873 [00:00<?, ?it/s]\u001b[A\n",
 174 |       "  0%|          | 298/242873 [00:00<01:22, 2953.39it/s]\u001b[A\n",
 175 |       "  0%|          | 398/242873 [00:00<02:02, 1975.48it/s]\u001b[A\n",
 176 |       "  0%|          | 491/242873 [00:00<02:41, 1500.95it/s]\u001b[A\n",
 177 |       "  0%|          | 589/242873 [00:00<02:55, 1377.20it/s]\u001b[A\n",
 178 |       "  0%|          | 680/242873 [00:00<03:30, 1149.25it/s]\u001b[A\n",
 179 |       "  0%|          | 760/242873 [00:00<04:17, 940.52it/s] \u001b[A\n",
 180 |       "  0%|          | 827/242873 [00:00<04:36, 875.98it/s]\u001b[A\n",
 181 |       "  0%|          | 889/242873 [00:01<04:53, 823.95it/s]\u001b[A\n",
 182 |       "  0%|          | 946/242873 [00:01<05:09, 782.52it/s]\u001b[A\n",
 183 |       "  0%|          | 1042/242873 [00:01<05:04, 795.48it/s]\u001b[A\n",
 184 |       "  0%|          | 1121/242873 [00:01<05:04, 794.29it/s]\u001b[A\n",
 185 |       "  0%|          | 1202/242873 [00:01<05:03, 795.15it/s]\u001b[A\n",
 186 |       "  1%|          | 1555/242873 [00:01<04:10, 964.75it/s]\u001b[A\n",
 187 |       "  1%|          | 1898/242873 [00:01<03:37, 1108.64it/s]\u001b[A\n",
 188 |       "  1%|          | 2252/242873 [00:01<03:13, 1242.80it/s]\u001b[A\n",
 189 |       "  1%|          | 2584/242873 [00:01<02:58, 1348.80it/s]\u001b[A\n",
 190 |       "  1%|          | 2859/242873 [00:02<02:55, 1368.04it/s]\u001b[A\n",
 191 |       "  1%|▏         | 3099/242873 [00:02<02:58, 1344.16it/s]\u001b[A\n",
 192 |       "  1%|▏         | 3301/242873 [00:02<03:12, 1246.70it/s]\u001b[A\n",
 193 |       "  1%|▏         | 3460/242873 [00:02<03:10, 1255.73it/s]\u001b[A\n",
 194 |       "  1%|▏         | 3616/242873 [00:02<03:10, 1257.95it/s]\u001b[A\n",
 195 |       "  2%|▏         | 3765/242873 [00:02<03:10, 1257.23it/s]\u001b[A\n",
 196 |       "  2%|▏         | 3909/242873 [00:03<03:09, 1262.94it/s]\u001b[A\n",
 197 |       "  2%|▏         | 4051/242873 [00:03<03:10, 1250.74it/s]\u001b[A\n",
 198 |       "  2%|▏         | 4180/242873 [00:03<03:14, 1230.31it/s]\u001b[A\n",
 199 |       "  2%|▏         | 4579/242873 [00:03<03:02, 1309.13it/s]\u001b[A\n",
 200 |       "  2%|▏         | 4921/242873 [00:03<02:53, 1367.80it/s]\u001b[A\n",
 201 |       "  2%|▏         | 5257/242873 [00:03<02:47, 1421.63it/s]\u001b[A\n",
 202 |       "  2%|▏         | 5551/242873 [00:03<02:42, 1461.38it/s]\u001b[A\n",
 203 |       "  2%|▏         | 5871/242873 [00:03<02:37, 1506.01it/s]\u001b[A\n",
 204 |       "  3%|▎         | 6162/242873 [00:04<02:35, 1522.55it/s]\u001b[A\n",
 205 |       "  3%|▎         | 6507/242873 [00:04<02:30, 1568.95it/s]\u001b[A\n",
 206 |       "  3%|▎         | 6794/242873 [00:04<02:27, 1595.71it/s]\u001b[A\n",
 207 |       "  3%|▎         | 7126/242873 [00:04<02:24, 1635.36it/s]\u001b[A\n",
 208 |       "  3%|▎         | 7421/242873 [00:04<02:22, 1649.53it/s]\u001b[A\n",
 209 |       "  3%|▎         | 7690/242873 [00:04<02:21, 1667.41it/s]\u001b[A\n",
 210 |       "  3%|▎         | 7950/242873 [00:04<02:19, 1683.52it/s]\u001b[A\n",
 211 |       "  3%|▎         | 8203/242873 [00:04<02:18, 1690.44it/s]\u001b[A\n",
 212 |       "  3%|▎         | 8438/242873 [00:04<02:18, 1688.58it/s]\u001b[A\n",
 213 |       "  4%|▎         | 8723/242873 [00:05<02:16, 1711.34it/s]\u001b[A\n",
 214 |       "  4%|▎         | 9075/242873 [00:05<02:13, 1746.02it/s]\u001b[A\n",
 215 |       "  4%|▍         | 9365/242873 [00:05<02:12, 1767.76it/s]\u001b[A\n",
 216 |       "  4%|▍         | 9719/242873 [00:05<02:09, 1800.54it/s]\u001b[A\n",
 217 |       "  4%|▍         | 10025/242873 [00:05<02:07, 1823.18it/s]\u001b[A\n",
 218 |       "  4%|▍         | 10326/242873 [00:05<02:06, 1832.05it/s]\u001b[A\n",
 219 |       "  4%|▍         | 10602/242873 [00:05<02:07, 1822.88it/s]\u001b[A\n",
 220 |       "  4%|▍         | 10860/242873 [00:05<02:06, 1835.55it/s]\u001b[A\n",
 221 |       "  5%|▍         | 11143/242873 [00:06<02:05, 1852.15it/s]\u001b[A\n",
 222 |       "  5%|▍         | 11553/242873 [00:06<02:02, 1888.88it/s]\u001b[A\n",
 223 |       "  5%|▍         | 11944/242873 [00:06<02:00, 1921.36it/s]\u001b[A\n",
 224 |       "  5%|▌         | 12329/242873 [00:06<01:58, 1951.89it/s]\u001b[A\n",
 225 |       "  5%|▌         | 12737/242873 [00:06<01:55, 1984.99it/s]\u001b[A\n",
 226 |       "  5%|▌         | 13153/242873 [00:06<01:53, 2018.32it/s]\u001b[A\n",
 227 |       "  6%|▌         | 13560/242873 [00:06<01:51, 2049.29it/s]\u001b[A\n",
 228 |       "  6%|▌         | 13953/242873 [00:06<01:50, 2077.20it/s]\u001b[A\n",
 229 |       "  6%|▌         | 14342/242873 [00:06<01:48, 2102.56it/s]\u001b[A\n",
 230 |       "  6%|▌         | 14727/242873 [00:07<01:50, 2059.36it/s]\u001b[A\n",
 231 |       "  6%|▌         | 15032/242873 [00:07<01:50, 2062.48it/s]\u001b[A\n",
 232 |       "  6%|▋         | 15312/242873 [00:07<01:51, 2043.44it/s]\u001b[A\n",
 233 |       "  6%|▋         | 15549/242873 [00:07<01:51, 2044.25it/s]\u001b[A\n",
 234 |       "  6%|▋         | 15778/242873 [00:07<01:52, 2012.38it/s]\u001b[A\n",
 235 |       "  7%|▋         | 15968/242873 [00:08<01:54, 1978.29it/s]\u001b[A\n",
 236 |       "  7%|▋         | 16345/242873 [00:08<01:53, 2000.11it/s]\u001b[A\n",
 237 |       "  7%|▋         | 16728/242873 [00:08<01:51, 2022.11it/s]\u001b[A\n",
 238 |       "\n",
 239 |       "Exception in thread Thread-16:\n",
 240 |       "Traceback (most recent call last):\n",
 241 |       "  File \"/usr/lib/python2.7/threading.py\", line 801, in __bootstrap_inner\n",
 242 |       "    self.run()\n",
 243 |       "  File \"/home/yongqyu/yongqyu/local/lib/python2.7/site-packages/tqdm/_tqdm.py\", line 144, in run\n",
 244 |       "    for instance in self.tqdm_cls._instances:\n",
 245 |       "  File \"/home/yongqyu/yongqyu/lib/python2.7/_weakrefset.py\", line 60, in __iter__\n",
 246 |       "    for itemref in self.data:\n",
 247 |       "RuntimeError: Set changed size during iteration\n",
 248 |       "\n",
 249 |       "  7%|▋         | 16999/242873 [00:08<01:51, 2020.94it/s]\u001b[A\n",
 250 |       "  7%|▋         | 17252/242873 [00:08<01:51, 2026.92it/s]\u001b[A\n",
 251 |       "  7%|▋         | 17502/242873 [00:08<01:53, 1988.47it/s]\u001b[A\n",
 252 |       "  7%|▋         | 17703/242873 [00:09<01:55, 1950.91it/s]\u001b[A\n",
 253 |       "  7%|▋         | 17866/242873 [00:09<01:57, 1915.86it/s]\u001b[A\n",
 254 |       "  7%|▋         | 18000/242873 [00:09<01:58, 1894.44it/s]\u001b[A\n",
 255 |       "  7%|▋         | 18120/242873 [00:09<01:59, 1887.12it/s]\u001b[A\n",
 256 |       "  8%|▊         | 18330/242873 [00:09<01:58, 1889.27it/s]\u001b[A\n",
 257 |       "  8%|▊         | 18696/242873 [00:09<01:57, 1907.29it/s]\u001b[A\n",
 258 |       "  8%|▊         | 19059/242873 [00:09<01:56, 1924.66it/s]\u001b[A\n",
 259 |       "  8%|▊         | 19441/242873 [00:10<01:54, 1943.54it/s]\u001b[A\n",
 260 |       "  8%|▊         | 19741/242873 [00:10<01:54, 1953.98it/s]\u001b[A\n",
 261 |       "  8%|▊         | 20124/242873 [00:10<01:52, 1972.39it/s]\u001b[A\n",
 262 |       "  8%|▊         | 20515/242873 [00:10<01:51, 1991.16it/s]\u001b[A\n",
 263 |       "  9%|▊         | 20871/242873 [00:10<01:50, 2006.08it/s]\u001b[A\n",
 264 |       "  9%|▊         | 21217/242873 [00:10<01:50, 2002.29it/s]\u001b[A\n",
 265 |       "  9%|▉         | 21593/242873 [00:10<01:49, 2018.69it/s]\u001b[A\n",
 266 |       "  9%|▉         | 21913/242873 [00:10<01:48, 2028.14it/s]\u001b[A\n",
 267 |       "  9%|▉         | 22226/242873 [00:10<01:48, 2028.08it/s]\u001b[A\n",
 268 |       "  9%|▉         | 22507/242873 [00:11<01:48, 2034.58it/s]\u001b[A\n",
 269 |       "  9%|▉         | 22808/242873 [00:11<01:47, 2042.89it/s]\u001b[A\n",
 270 |       " 10%|▉         | 23169/242873 [00:11<01:46, 2057.16it/s]\u001b[A\n",
 271 |       " 10%|▉         | 23553/242873 [00:11<01:45, 2072.83it/s]\u001b[A\n",
 272 |       " 10%|▉         | 23883/242873 [00:11<01:46, 2060.19it/s]\u001b[A\n",
 273 |       " 10%|▉         | 24157/242873 [00:11<01:46, 2051.16it/s]\u001b[A\n",
 274 |       " 10%|█         | 24394/242873 [00:11<01:46, 2042.05it/s]\u001b[A\n",
 275 |       " 10%|█         | 24640/242873 [00:12<01:46, 2044.99it/s]\u001b[A\n",
 276 |       " 10%|█         | 24857/242873 [00:12<01:47, 2033.23it/s]\u001b[A\n",
 277 |       " 10%|█         | 25208/242873 [00:12<01:46, 2045.23it/s]\u001b[A\n",
 278 |       " 10%|█         | 25450/242873 [00:12<01:46, 2048.19it/s]\u001b[A\n",
 279 |       " 11%|█         | 25729/242873 [00:12<01:45, 2054.04it/s]\u001b[A\n",
 280 |       " 11%|█         | 25980/242873 [00:12<01:47, 2022.13it/s]\u001b[A\n",
 281 |       " 11%|█         | 26237/242873 [00:12<01:46, 2026.83it/s]\u001b[A\n",
 282 |       " 11%|█         | 26649/242873 [00:13<01:45, 2042.88it/s]\u001b[A\n",
 283 |       " 11%|█         | 27058/242873 [00:13<01:44, 2058.42it/s]\u001b[A\n",
 284 |       " 11%|█▏        | 27418/242873 [00:13<01:44, 2070.09it/s]\u001b[A\n",
 285 |       " 11%|█▏        | 27875/242873 [00:13<01:42, 2088.84it/s]\u001b[A\n",
 286 |       " 12%|█▏        | 28242/242873 [00:13<01:42, 2099.49it/s]\u001b[A\n",
 287 |       " 12%|█▏        | 28632/242873 [00:13<01:41, 2112.63it/s]\u001b[A\n",
 288 |       " 12%|█▏        | 29000/242873 [00:13<01:41, 2116.88it/s]\u001b[A\n",
 289 |       " 12%|█▏        | 29333/242873 [00:13<01:40, 2118.66it/s]\u001b[A\n",
 290 |       " 12%|█▏        | 29635/242873 [00:14<01:40, 2111.52it/s]\u001b[A\n",
 291 |       " 12%|█▏        | 29937/242873 [00:14<01:40, 2117.96it/s]\u001b[A\n",
 292 |       " 12%|█▏        | 30241/242873 [00:14<01:40, 2124.36it/s]\u001b[A\n",
 293 |       " 13%|█▎        | 30523/242873 [00:14<01:39, 2126.45it/s]\u001b[A\n",
 294 |       " 13%|█▎        | 30794/242873 [00:14<01:39, 2126.12it/s]\u001b[A\n",
 295 |       " 13%|█▎        | 31074/242873 [00:14<01:39, 2130.72it/s]\u001b[A\n",
 296 |       " 13%|█▎        | 31334/242873 [00:14<01:39, 2131.28it/s]\u001b[A\n",
 297 |       " 13%|█▎        | 31721/242873 [00:14<01:38, 2143.05it/s]\u001b[A\n",
 298 |       " 13%|█▎        | 32123/242873 [00:14<01:37, 2155.62it/s]\u001b[A\n",
 299 |       " 13%|█▎        | 32446/242873 [00:15<01:37, 2158.75it/s]\u001b[A\n",
 300 |       " 14%|█▎        | 32802/242873 [00:15<01:36, 2167.97it/s]\u001b[A\n",
 301 |       " 14%|█▎        | 33120/242873 [00:15<01:36, 2173.83it/s]\u001b[A\n",
 302 |       " 14%|█▍        | 33434/242873 [00:15<01:36, 2175.94it/s]\u001b[A\n",
 303 |       " 14%|█▍        | 33739/242873 [00:15<01:35, 2181.58it/s]\u001b[A\n",
 304 |       " 14%|█▍        | 34035/242873 [00:15<01:36, 2173.93it/s]\u001b[A\n",
 305 |       " 14%|█▍        | 34353/242873 [00:15<01:35, 2180.27it/s]\u001b[A\n",
 306 |       " 14%|█▍        | 34639/242873 [00:15<01:35, 2184.41it/s]\u001b[A\n",
 307 |       " 14%|█▍        | 34915/242873 [00:15<01:35, 2187.01it/s]\u001b[A\n",
 308 |       " 14%|█▍        | 35186/242873 [00:16<01:35, 2183.32it/s]\u001b[A\n",
 309 |       " 15%|█▍        | 35449/242873 [00:16<01:34, 2186.11it/s]\u001b[A\n",
 310 |       " 15%|█▍        | 35787/242873 [00:16<01:34, 2193.36it/s]\u001b[A\n",
 311 |       " 15%|█▍        | 36063/242873 [00:16<01:34, 2195.16it/s]\u001b[A\n",
 312 |       " 15%|█▍        | 36419/242873 [00:16<01:33, 2203.42it/s]\u001b[A\n",
 313 |       " 15%|█▌        | 36785/242873 [00:16<01:33, 2212.15it/s]\u001b[A\n"
 314 |      ]
 315 |     },
 316 |     {
 317 |      "name": "stderr",
 318 |      "output_type": "stream",
 319 |      "text": [
 320 |       " 15%|█▌        | 37149/242873 [00:16<01:32, 2220.68it/s]\u001b[A\n",
 321 |       " 15%|█▌        | 37516/242873 [00:16<01:32, 2229.29it/s]\u001b[A\n",
 322 |       " 16%|█▌        | 37877/242873 [00:16<01:31, 2237.39it/s]\u001b[A\n",
 323 |       " 16%|█▌        | 38224/242873 [00:17<01:31, 2243.95it/s]\u001b[A\n",
 324 |       " 16%|█▌        | 38566/242873 [00:17<01:30, 2249.28it/s]\u001b[A\n",
 325 |       " 16%|█▌        | 38933/242873 [00:17<01:30, 2257.51it/s]\u001b[A\n",
 326 |       " 16%|█▌        | 39319/242873 [00:17<01:29, 2266.68it/s]\u001b[A\n",
 327 |       " 16%|█▋        | 39674/242873 [00:17<01:29, 2273.48it/s]\u001b[A\n",
 328 |       " 16%|█▋        | 40025/242873 [00:17<01:29, 2274.33it/s]\u001b[A\n",
 329 |       " 17%|█▋        | 40342/242873 [00:17<01:28, 2277.09it/s]\u001b[A\n",
 330 |       " 17%|█▋        | 40645/242873 [00:17<01:29, 2270.46it/s]\u001b[A\n",
 331 |       " 17%|█▋        | 40934/242873 [00:18<01:28, 2273.91it/s]\u001b[A\n",
 332 |       " 17%|█▋        | 41204/242873 [00:18<01:28, 2273.88it/s]\u001b[A\n",
 333 |       " 17%|█▋        | 41480/242873 [00:18<01:28, 2276.57it/s]\u001b[A\n",
 334 |       " 17%|█▋        | 41820/242873 [00:18<01:28, 2282.68it/s]\u001b[A\n",
 335 |       " 17%|█▋        | 42194/242873 [00:18<01:27, 2290.57it/s]\u001b[A\n",
 336 |       " 18%|█▊        | 42572/242873 [00:18<01:27, 2298.57it/s]\u001b[A\n",
 337 |       " 18%|█▊        | 42904/242873 [00:18<01:27, 2282.42it/s]\u001b[A\n",
 338 |       " 18%|█▊        | 43286/242873 [00:18<01:27, 2290.53it/s]\u001b[A\n",
 339 |       " 18%|█▊        | 43589/242873 [00:19<01:27, 2273.32it/s]\u001b[A\n",
 340 |       " 18%|█▊        | 43834/242873 [00:19<01:27, 2270.92it/s]\u001b[A\n",
 341 |       " 18%|█▊        | 44063/242873 [00:19<01:27, 2259.60it/s]\u001b[A\n",
 342 |       " 18%|█▊        | 44262/242873 [00:19<01:27, 2258.16it/s]\u001b[A\n",
 343 |       " 18%|█▊        | 44484/242873 [00:19<01:27, 2257.93it/s]\u001b[A\n",
 344 |       " 18%|█▊        | 44708/242873 [00:19<01:27, 2257.53it/s]\u001b[A\n",
 345 |       " 18%|█▊        | 44917/242873 [00:19<01:27, 2254.44it/s]\u001b[A\n",
 346 |       " 19%|█▊        | 45260/242873 [00:20<01:27, 2260.31it/s]\u001b[A\n",
 347 |       " 19%|█▉        | 45627/242873 [00:20<01:26, 2267.30it/s]\u001b[A\n",
 348 |       " 19%|█▉        | 45949/242873 [00:20<01:26, 2271.98it/s]\u001b[A\n",
 349 |       " 19%|█▉        | 46241/242873 [00:20<01:26, 2271.09it/s]\u001b[A\n",
 350 |       " 19%|█▉        | 46636/242873 [00:20<01:26, 2279.27it/s]\u001b[A\n",
 351 |       " 19%|█▉        | 46992/242873 [00:20<01:25, 2285.49it/s]\u001b[A\n",
 352 |       " 19%|█▉        | 47340/242873 [00:20<01:25, 2291.26it/s]\u001b[A\n",
 353 |       " 20%|█▉        | 47707/242873 [00:20<01:24, 2297.89it/s]\u001b[A\n",
 354 |       " 20%|█▉        | 48075/242873 [00:20<01:24, 2304.49it/s]\u001b[A\n",
 355 |       " 20%|█▉        | 48424/242873 [00:20<01:24, 2309.98it/s]\u001b[A\n",
 356 |       " 20%|██        | 48771/242873 [00:21<01:24, 2308.55it/s]\u001b[A\n",
 357 |       " 20%|██        | 49078/242873 [00:21<01:24, 2297.05it/s]\u001b[A\n",
 358 |       " 20%|██        | 49332/242873 [00:21<01:24, 2289.58it/s]\u001b[A\n",
 359 |       " 20%|██        | 49552/242873 [00:21<01:24, 2288.33it/s]\u001b[A\n",
 360 |       " 20%|██        | 49767/242873 [00:21<01:24, 2285.86it/s]\u001b[A\n",
 361 |       " 21%|██        | 49988/242873 [00:21<01:24, 2285.49it/s]\u001b[A\n",
 362 |       " 21%|██        | 50353/242873 [00:21<01:24, 2291.70it/s]\u001b[A\n",
 363 |       " 21%|██        | 50719/242873 [00:22<01:23, 2297.86it/s]\u001b[A\n",
 364 |       " 21%|██        | 51036/242873 [00:22<01:23, 2301.76it/s]\u001b[A\n",
 365 |       " 21%|██        | 51334/242873 [00:22<01:23, 2302.26it/s]\u001b[A\n",
 366 |       " 21%|██▏       | 51703/242873 [00:22<01:22, 2308.44it/s]\u001b[A\n",
 367 |       " 21%|██▏       | 52010/242873 [00:22<01:22, 2306.41it/s]\u001b[A\n",
 368 |       " 22%|██▏       | 52369/242873 [00:22<01:22, 2312.05it/s]\u001b[A\n",
 369 |       " 22%|██▏       | 52669/242873 [00:22<01:22, 2305.29it/s]\u001b[A\n",
 370 |       " 22%|██▏       | 52925/242873 [00:23<01:22, 2295.10it/s]\u001b[A\n",
 371 |       " 22%|██▏       | 53141/242873 [00:23<01:23, 2279.85it/s]\u001b[A\n",
 372 |       " 22%|██▏       | 53371/242873 [00:23<01:23, 2279.93it/s]\u001b[A\n",
 373 |       " 22%|██▏       | 53654/242873 [00:23<01:22, 2282.27it/s]\u001b[A\n",
 374 |       " 22%|██▏       | 53913/242873 [00:23<01:22, 2283.55it/s]\u001b[A\n",
 375 |       " 22%|██▏       | 54145/242873 [00:23<01:22, 2279.50it/s]\u001b[A\n",
 376 |       " 22%|██▏       | 54356/242873 [00:23<01:23, 2269.33it/s]\u001b[A\n",
 377 |       " 23%|██▎       | 54702/242873 [00:24<01:22, 2274.26it/s]\u001b[A\n",
 378 |       " 23%|██▎       | 54931/242873 [00:24<01:22, 2268.36it/s]\u001b[A\n",
 379 |       " 23%|██▎       | 55222/242873 [00:24<01:22, 2270.99it/s]\u001b[A\n",
 380 |       " 23%|██▎       | 55586/242873 [00:24<01:22, 2276.59it/s]\u001b[A\n",
 381 |       " 23%|██▎       | 55950/242873 [00:24<01:21, 2282.14it/s]\u001b[A\n",
 382 |       " 23%|██▎       | 56267/242873 [00:24<01:21, 2285.69it/s]\u001b[A\n",
 383 |       " 23%|██▎       | 56571/242873 [00:24<01:21, 2287.23it/s]\u001b[A\n",
 384 |       " 23%|██▎       | 56862/242873 [00:24<01:21, 2289.14it/s]\u001b[A\n",
 385 |       " 24%|██▎       | 57208/242873 [00:24<01:20, 2293.78it/s]\u001b[A\n",
 386 |       " 24%|██▎       | 57512/242873 [00:25<01:22, 2257.18it/s]\u001b[A\n",
 387 |       " 24%|██▍       | 57889/242873 [00:25<01:21, 2263.08it/s]\u001b[A\n",
 388 |       " 24%|██▍       | 58205/242873 [00:25<01:21, 2266.57it/s]\u001b[A\n",
 389 |       " 24%|██▍       | 58492/242873 [00:25<01:21, 2267.19it/s]\u001b[A\n",
 390 |       " 24%|██▍       | 58765/242873 [00:25<01:21, 2261.83it/s]\u001b[A\n",
 391 |       " 24%|██▍       | 59082/242873 [00:26<01:21, 2265.29it/s]\u001b[A\n",
 392 |       " 24%|██▍       | 59426/242873 [00:26<01:20, 2269.78it/s]\u001b[A\n",
 393 |       " 25%|██▍       | 59760/242873 [00:26<01:20, 2273.63it/s]\u001b[A\n",
 394 |       " 25%|██▍       | 60072/242873 [00:26<01:20, 2276.66it/s]\u001b[A\n",
 395 |       " 25%|██▍       | 60375/242873 [00:26<01:20, 2276.27it/s]\u001b[A\n",
 396 |       " 25%|██▌       | 60729/242873 [00:26<01:19, 2281.00it/s]\u001b[A\n",
 397 |       " 25%|██▌       | 61029/242873 [00:26<01:19, 2280.60it/s]\u001b[A\n",
 398 |       " 25%|██▌       | 61424/242873 [00:26<01:19, 2286.84it/s]\u001b[A\n",
 399 |       " 25%|██▌       | 61792/242873 [00:26<01:19, 2291.98it/s]\u001b[A\n",
 400 |       " 26%|██▌       | 62146/242873 [00:27<01:18, 2296.58it/s]\u001b[A\n",
 401 |       " 26%|██▌       | 62515/242873 [00:27<01:18, 2301.71it/s]\u001b[A\n",
 402 |       " 26%|██▌       | 62868/242873 [00:27<01:18, 2306.19it/s]\u001b[A\n",
 403 |       " 26%|██▌       | 63216/242873 [00:27<01:17, 2310.18it/s]\u001b[A\n",
 404 |       " 26%|██▌       | 63561/242873 [00:27<01:17, 2308.32it/s]\u001b[A\n",
 405 |       " 26%|██▋       | 63932/242873 [00:27<01:17, 2313.40it/s]\u001b[A\n",
 406 |       " 26%|██▋       | 64313/242873 [00:27<01:17, 2318.76it/s]\u001b[A\n",
 407 |       " 27%|██▋       | 64674/242873 [00:27<01:16, 2323.41it/s]\u001b[A\n",
 408 |       " 27%|██▋       | 65059/242873 [00:27<01:16, 2328.89it/s]\u001b[A\n",
 409 |       " 27%|██▋       | 65417/242873 [00:28<01:16, 2330.02it/s]\u001b[A\n",
 410 |       " 27%|██▋       | 65745/242873 [00:28<01:15, 2333.21it/s]\u001b[A\n",
 411 |       " 27%|██▋       | 66073/242873 [00:28<01:15, 2327.24it/s]\u001b[A\n",
 412 |       " 27%|██▋       | 66348/242873 [00:28<01:15, 2325.27it/s]\u001b[A\n",
 413 |       " 27%|██▋       | 66762/242873 [00:28<01:15, 2331.61it/s]\u001b[A\n",
 414 |       " 28%|██▊       | 67131/242873 [00:28<01:15, 2336.33it/s]\u001b[A\n",
 415 |       " 28%|██▊       | 67457/242873 [00:28<01:14, 2339.53it/s]\u001b[A\n",
 416 |       " 28%|██▊       | 67779/242873 [00:28<01:14, 2340.34it/s]\u001b[A\n",
 417 |       " 28%|██▊       | 68080/242873 [00:29<01:14, 2337.50it/s]\u001b[A\n",
 418 |       " 28%|██▊       | 68390/242873 [00:29<01:14, 2340.11it/s]\u001b[A\n",
 419 |       " 28%|██▊       | 68670/242873 [00:29<01:14, 2335.45it/s]\u001b[A\n",
 420 |       " 28%|██▊       | 68913/242873 [00:29<01:14, 2332.91it/s]\u001b[A\n",
 421 |       " 28%|██▊       | 69191/242873 [00:29<01:14, 2334.41it/s]\u001b[A\n",
 422 |       " 29%|██▊       | 69453/242873 [00:29<01:14, 2335.34it/s]\u001b[A\n",
 423 |       " 29%|██▊       | 69700/242873 [00:30<01:14, 2321.88it/s]\u001b[A\n",
 424 |       " 29%|██▉       | 69899/242873 [00:30<01:14, 2318.38it/s]\u001b[A\n",
 425 |       " 29%|██▉       | 70084/242873 [00:30<01:14, 2315.50it/s]\u001b[A\n",
 426 |       " 29%|██▉       | 70277/242873 [00:30<01:14, 2314.20it/s]\u001b[A\n",
 427 |       " 29%|██▉       | 70651/242873 [00:30<01:14, 2318.86it/s]\u001b[A\n",
 428 |       " 29%|██▉       | 71037/242873 [00:30<01:13, 2323.91it/s]\u001b[A\n",
 429 |       " 29%|██▉       | 71376/242873 [00:30<01:13, 2327.34it/s]\u001b[A\n",
 430 |       " 30%|██▉       | 71741/242873 [00:30<01:13, 2331.63it/s]\u001b[A\n",
 431 |       " 30%|██▉       | 72070/242873 [00:30<01:13, 2334.72it/s]\u001b[A\n",
 432 |       " 30%|██▉       | 72392/242873 [00:30<01:12, 2336.08it/s]\u001b[A\n",
 433 |       " 30%|██▉       | 72752/242873 [00:31<01:12, 2340.12it/s]\u001b[A\n",
 434 |       " 30%|███       | 73094/242873 [00:31<01:12, 2343.59it/s]\u001b[A\n",
 435 |       " 30%|███       | 73452/242873 [00:31<01:12, 2347.50it/s]\u001b[A\n",
 436 |       " 30%|███       | 73807/242873 [00:31<01:11, 2351.33it/s]\u001b[A\n",
 437 |       " 31%|███       | 74149/242873 [00:31<01:11, 2353.82it/s]\u001b[A\n",
 438 |       " 31%|███       | 74480/242873 [00:31<01:11, 2345.90it/s]\u001b[A\n",
 439 |       " 31%|███       | 74752/242873 [00:32<01:12, 2328.14it/s]\u001b[A\n",
 440 |       " 31%|███       | 74965/242873 [00:32<01:12, 2327.02it/s]\u001b[A\n",
 441 |       " 31%|███       | 75174/242873 [00:32<01:12, 2321.65it/s]\u001b[A\n",
 442 |       " 31%|███       | 75415/242873 [00:32<01:12, 2321.94it/s]\u001b[A\n",
 443 |       " 31%|███       | 75617/242873 [00:32<01:12, 2317.76it/s]\u001b[A\n",
 444 |       " 31%|███       | 75800/242873 [00:32<01:12, 2315.65it/s]\u001b[A\n",
 445 |       " 31%|███▏      | 75980/242873 [00:32<01:12, 2314.07it/s]\u001b[A\n",
 446 |       " 31%|███▏      | 76313/242873 [00:32<01:11, 2317.16it/s]\u001b[A\n",
 447 |       " 32%|███▏      | 76538/242873 [00:33<01:12, 2307.83it/s]\u001b[A\n",
 448 |       " 32%|███▏      | 76725/242873 [00:33<01:12, 2300.35it/s]\u001b[A\n",
 449 |       " 32%|███▏      | 76886/242873 [00:33<01:12, 2295.31it/s]\u001b[A\n",
 450 |       " 32%|███▏      | 77185/242873 [00:33<01:12, 2297.38it/s]\u001b[A\n",
 451 |       " 32%|███▏      | 77414/242873 [00:33<01:12, 2297.34it/s]\u001b[A\n",
 452 |       " 32%|███▏      | 77625/242873 [00:33<01:11, 2296.78it/s]\u001b[A\n",
 453 |       " 32%|███▏      | 77831/242873 [00:33<01:11, 2294.53it/s]\u001b[A\n",
 454 |       " 32%|███▏      | 78164/242873 [00:34<01:11, 2297.57it/s]\u001b[A\n",
 455 |       " 32%|███▏      | 78502/242873 [00:34<01:11, 2300.71it/s]\u001b[A\n"
 456 |      ]
 457 |     },
 458 |     {
 459 |      "name": "stderr",
 460 |      "output_type": "stream",
 461 |      "text": [
 462 |       " 32%|███▏      | 78768/242873 [00:34<01:11, 2287.88it/s]\u001b[A\n",
 463 |       " 33%|███▎      | 79001/242873 [00:34<01:11, 2287.99it/s]\u001b[A\n",
 464 |       " 33%|███▎      | 79307/242873 [00:34<01:11, 2290.20it/s]\u001b[A\n",
 465 |       " 33%|███▎      | 79552/242873 [00:34<01:11, 2288.94it/s]\u001b[A\n",
 466 |       " 33%|███▎      | 79782/242873 [00:34<01:11, 2288.61it/s]\u001b[A\n",
 467 |       " 33%|███▎      | 80009/242873 [00:34<01:11, 2286.06it/s]\u001b[A\n",
 468 |       " 33%|███▎      | 80217/242873 [00:35<01:11, 2278.66it/s]\u001b[A\n",
 469 |       " 33%|███▎      | 80417/242873 [00:35<01:11, 2277.86it/s]\u001b[A\n",
 470 |       " 33%|███▎      | 80600/242873 [00:35<01:11, 2267.49it/s]\u001b[A\n",
 471 |       " 33%|███▎      | 80772/242873 [00:35<01:11, 2265.92it/s]\u001b[A\n",
 472 |       " 33%|███▎      | 81029/242873 [00:35<01:11, 2266.73it/s]\u001b[A\n",
 473 |       " 33%|███▎      | 81273/242873 [00:35<01:11, 2267.22it/s]\u001b[A\n",
 474 |       " 34%|███▎      | 81560/242873 [00:35<01:11, 2268.89it/s]\u001b[A\n",
 475 |       " 34%|███▎      | 81846/242873 [00:36<01:10, 2270.51it/s]\u001b[A\n",
 476 |       " 34%|███▍      | 82107/242873 [00:36<01:10, 2271.44it/s]\u001b[A\n",
 477 |       " 34%|███▍      | 82358/242873 [00:36<01:10, 2271.51it/s]\u001b[A\n",
 478 |       " 34%|███▍      | 82602/242873 [00:36<01:10, 2270.16it/s]\u001b[A\n",
 479 |       " 34%|███▍      | 82830/242873 [00:36<01:10, 2269.29it/s]\u001b[A\n",
 480 |       " 34%|███▍      | 83066/242873 [00:36<01:10, 2269.53it/s]\u001b[A\n",
 481 |       " 34%|███▍      | 83310/242873 [00:36<01:10, 2269.94it/s]\u001b[A\n",
 482 |       " 34%|███▍      | 83540/242873 [00:37<01:10, 2253.51it/s]\u001b[A\n",
 483 |       " 34%|███▍      | 83720/242873 [00:37<01:11, 2235.96it/s]\u001b[A\n",
 484 |       " 35%|███▍      | 84014/242873 [00:37<01:10, 2237.84it/s]\u001b[A\n",
 485 |       " 35%|███▍      | 84325/242873 [00:37<01:10, 2240.14it/s]\u001b[A\n",
 486 |       " 35%|███▍      | 84549/242873 [00:37<01:10, 2234.43it/s]\u001b[A\n",
 487 |       " 35%|███▍      | 84740/242873 [00:37<01:10, 2232.81it/s]\u001b[A\n",
 488 |       " 35%|███▍      | 84991/242873 [00:38<01:10, 2233.52it/s]\u001b[A\n",
 489 |       " 35%|███▌      | 85196/242873 [00:38<01:10, 2227.31it/s]\u001b[A\n",
 490 |       " 35%|███▌      | 85485/242873 [00:38<01:10, 2229.05it/s]\u001b[A\n",
 491 |       " 35%|███▌      | 85705/242873 [00:38<01:10, 2228.91it/s]\u001b[A\n",
 492 |       " 35%|███▌      | 85917/242873 [00:38<01:10, 2216.94it/s]\u001b[A\n",
 493 |       " 35%|███▌      | 86207/242873 [00:38<01:10, 2218.69it/s]\u001b[A\n",
 494 |       " 36%|███▌      | 86475/242873 [00:38<01:10, 2219.86it/s]\u001b[A\n",
 495 |       " 36%|███▌      | 86721/242873 [00:39<01:10, 2220.48it/s]\u001b[A\n",
 496 |       " 36%|███▌      | 86973/242873 [00:39<01:10, 2221.26it/s]\u001b[A\n",
 497 |       " 36%|███▌      | 87325/242873 [00:39<01:09, 2224.54it/s]\u001b[A\n",
 498 |       " 36%|███▌      | 87629/242873 [00:39<01:09, 2226.62it/s]\u001b[A\n",
 499 |       " 36%|███▌      | 87993/242873 [00:39<01:09, 2230.19it/s]\u001b[A\n",
 500 |       " 36%|███▋      | 88315/242873 [00:39<01:09, 2232.69it/s]\u001b[A\n",
 501 |       " 36%|███▋      | 88626/242873 [00:39<01:09, 2234.82it/s]\u001b[A\n",
 502 |       " 37%|███▋      | 88936/242873 [00:39<01:08, 2235.72it/s]\u001b[A\n",
 503 |       " 37%|███▋      | 89229/242873 [00:40<01:08, 2230.29it/s]\u001b[A\n",
 504 |       " 37%|███▋      | 89590/242873 [00:40<01:08, 2233.74it/s]\u001b[A\n",
 505 |       " 37%|███▋      | 89879/242873 [00:40<01:08, 2235.35it/s]\u001b[A\n",
 506 |       " 37%|███▋      | 90161/242873 [00:40<01:08, 2236.53it/s]\u001b[A\n",
 507 |       " 37%|███▋      | 90439/242873 [00:40<01:08, 2235.68it/s]\u001b[A\n",
 508 |       " 37%|███▋      | 90742/242873 [00:40<01:07, 2237.62it/s]\u001b[A\n",
 509 |       " 37%|███▋      | 91011/242873 [00:40<01:07, 2238.12it/s]\u001b[A\n",
 510 |       " 38%|███▊      | 91295/242873 [00:40<01:07, 2239.60it/s]\u001b[A\n",
 511 |       " 38%|███▊      | 91590/242873 [00:40<01:07, 2241.31it/s]\u001b[A\n",
 512 |       " 38%|███▊      | 91968/242873 [00:40<01:07, 2245.06it/s]\u001b[A\n",
 513 |       " 38%|███▊      | 92280/242873 [00:41<01:07, 2247.21it/s]\u001b[A\n",
 514 |       " 38%|███▊      | 92632/242873 [00:41<01:06, 2250.31it/s]\u001b[A\n",
 515 |       " 38%|███▊      | 92954/242873 [00:41<01:06, 2252.49it/s]\u001b[A\n",
 516 |       " 38%|███▊      | 93273/242873 [00:41<01:06, 2248.16it/s]\u001b[A\n",
 517 |       " 39%|███▊      | 93563/242873 [00:41<01:06, 2249.73it/s]\u001b[A\n",
 518 |       " 39%|███▊      | 93892/242873 [00:41<01:06, 2252.22it/s]\u001b[A\n",
 519 |       " 39%|███▉      | 94182/242873 [00:41<01:06, 2252.14it/s]\u001b[A\n",
 520 |       " 39%|███▉      | 94452/242873 [00:42<01:06, 2243.71it/s]\u001b[A\n",
 521 |       " 39%|███▉      | 94671/242873 [00:42<01:06, 2240.13it/s]\u001b[A\n",
 522 |       " 39%|███▉      | 94864/242873 [00:42<01:06, 2238.03it/s]\u001b[A\n",
 523 |       " 39%|███▉      | 95211/242873 [00:42<01:05, 2240.91it/s]\u001b[A\n",
 524 |       " 39%|███▉      | 95509/242873 [00:42<01:05, 2242.65it/s]\u001b[A\n",
 525 |       " 39%|███▉      | 95824/242873 [00:42<01:05, 2244.77it/s]\u001b[A\n",
 526 |       " 40%|███▉      | 96139/242873 [00:42<01:05, 2246.87it/s]\u001b[A\n",
 527 |       " 40%|███▉      | 96516/242873 [00:42<01:05, 2250.42it/s]\u001b[A\n",
 528 |       " 40%|███▉      | 96876/242873 [00:42<01:04, 2253.55it/s]\u001b[A\n",
 529 |       " 40%|████      | 97284/242873 [00:43<01:04, 2257.79it/s]\u001b[A\n",
 530 |       " 40%|████      | 97635/242873 [00:43<01:04, 2258.79it/s]\u001b[A\n",
 531 |       " 40%|████      | 97958/242873 [00:43<01:04, 2251.37it/s]\u001b[A\n",
 532 |       " 40%|████      | 98236/242873 [00:43<01:04, 2252.57it/s]\u001b[A\n",
 533 |       " 41%|████      | 98529/242873 [00:43<01:04, 2254.10it/s]\u001b[A\n",
 534 |       " 41%|████      | 98856/242873 [00:43<01:03, 2256.41it/s]\u001b[A\n",
 535 |       " 41%|████      | 99146/242873 [00:43<01:03, 2255.48it/s]\u001b[A\n",
 536 |       " 41%|████      | 99408/242873 [00:44<01:03, 2255.11it/s]\u001b[A\n",
 537 |       " 41%|████      | 99655/242873 [00:44<01:03, 2254.79it/s]\u001b[A\n",
 538 |       " 41%|████      | 99957/242873 [00:44<01:03, 2256.52it/s]\u001b[A\n",
 539 |       " 41%|████▏     | 100228/242873 [00:44<01:03, 2257.53it/s]\u001b[A\n",
 540 |       " 41%|████▏     | 100489/242873 [00:44<01:03, 2252.95it/s]\u001b[A\n",
 541 |       " 41%|████▏     | 100710/242873 [00:44<01:03, 2252.23it/s]\u001b[A\n",
 542 |       " 42%|████▏     | 100924/242873 [00:44<01:03, 2250.06it/s]\u001b[A\n",
 543 |       " 42%|████▏     | 101151/242873 [00:44<01:02, 2250.10it/s]\u001b[A\n",
 544 |       " 42%|████▏     | 101382/242873 [00:45<01:02, 2250.22it/s]\u001b[A\n",
 545 |       " 42%|████▏     | 101595/242873 [00:45<01:03, 2240.53it/s]\u001b[A\n",
 546 |       " 42%|████▏     | 101766/242873 [00:45<01:03, 2238.86it/s]\u001b[A\n",
 547 |       " 42%|████▏     | 102090/242873 [00:45<01:02, 2241.04it/s]\u001b[A\n",
 548 |       " 42%|████▏     | 102304/242873 [00:45<01:02, 2239.18it/s]\u001b[A\n",
 549 |       " 42%|████▏     | 102654/242873 [00:45<01:02, 2241.94it/s]\u001b[A\n",
 550 |       " 42%|████▏     | 102924/242873 [00:45<01:02, 2242.93it/s]\u001b[A\n",
 551 |       " 43%|████▎     | 103277/242873 [00:45<01:02, 2245.71it/s]\u001b[A\n",
 552 |       " 43%|████▎     | 103611/242873 [00:46<01:01, 2248.08it/s]\u001b[A\n",
 553 |       " 43%|████▎     | 103924/242873 [00:46<01:01, 2249.99it/s]\u001b[A\n",
 554 |       " 43%|████▎     | 104303/242873 [00:46<01:01, 2253.30it/s]\u001b[A\n",
 555 |       " 43%|████▎     | 104732/242873 [00:46<01:01, 2257.69it/s]\u001b[A\n",
 556 |       " 43%|████▎     | 105111/242873 [00:46<01:00, 2260.97it/s]\u001b[A\n",
 557 |       " 43%|████▎     | 105474/242873 [00:46<01:00, 2261.98it/s]\u001b[A\n",
 558 |       " 44%|████▎     | 105806/242873 [00:46<01:00, 2263.70it/s]\u001b[A\n",
 559 |       " 44%|████▎     | 106146/242873 [00:46<01:00, 2266.14it/s]\u001b[A\n",
 560 |       " 44%|████▍     | 106474/242873 [00:46<01:00, 2267.93it/s]\u001b[A\n",
 561 |       " 44%|████▍     | 106814/242873 [00:47<00:59, 2270.34it/s]\u001b[A\n",
 562 |       " 44%|████▍     | 107141/242873 [00:47<00:59, 2272.11it/s]\u001b[A\n",
 563 |       " 44%|████▍     | 107465/242873 [00:47<00:59, 2267.29it/s]\u001b[A\n",
 564 |       " 44%|████▍     | 107731/242873 [00:47<00:59, 2267.39it/s]\u001b[A\n",
 565 |       " 44%|████▍     | 107986/242873 [00:47<00:59, 2267.48it/s]\u001b[A\n",
 566 |       " 45%|████▍     | 108234/242873 [00:47<00:59, 2267.87it/s]\u001b[A\n",
 567 |       " 45%|████▍     | 108587/242873 [00:47<00:59, 2270.50it/s]\u001b[A\n",
 568 |       " 45%|████▍     | 108940/242873 [00:47<00:58, 2273.13it/s]\u001b[A\n",
 569 |       " 45%|████▍     | 109256/242873 [00:48<00:58, 2274.95it/s]\u001b[A\n",
 570 |       " 45%|████▌     | 109561/242873 [00:48<00:58, 2274.90it/s]\u001b[A\n",
 571 |       " 45%|████▌     | 109843/242873 [00:48<00:58, 2274.62it/s]\u001b[A\n",
 572 |       " 45%|████▌     | 110131/242873 [00:48<00:58, 2275.88it/s]\u001b[A\n",
 573 |       " 45%|████▌     | 110401/242873 [00:48<00:58, 2273.29it/s]\u001b[A\n",
 574 |       " 46%|████▌     | 110707/242873 [00:48<00:58, 2274.91it/s]\u001b[A\n",
 575 |       " 46%|████▌     | 110994/242873 [00:48<00:57, 2276.11it/s]\u001b[A\n",
 576 |       " 46%|████▌     | 111290/242873 [00:48<00:57, 2277.50it/s]\u001b[A\n",
 577 |       " 46%|████▌     | 111606/242873 [00:48<00:57, 2279.30it/s]\u001b[A\n",
 578 |       " 46%|████▌     | 111893/242873 [00:49<00:57, 2278.23it/s]\u001b[A\n",
 579 |       " 46%|████▌     | 112182/242873 [00:49<00:57, 2279.44it/s]\u001b[A\n",
 580 |       " 46%|████▋     | 112450/242873 [00:49<00:57, 2279.52it/s]\u001b[A\n",
 581 |       " 46%|████▋     | 112729/242873 [00:49<00:57, 2280.55it/s]\u001b[A\n",
 582 |       " 47%|████▋     | 113038/242873 [00:49<00:56, 2282.19it/s]\u001b[A\n",
 583 |       " 47%|████▋     | 113413/242873 [00:49<00:56, 2285.13it/s]\u001b[A\n",
 584 |       " 47%|████▋     | 113779/242873 [00:49<00:56, 2287.89it/s]\u001b[A\n",
 585 |       " 47%|████▋     | 114138/242873 [00:49<00:56, 2290.50it/s]\u001b[A\n",
 586 |       " 47%|████▋     | 114473/242873 [00:50<00:56, 2275.02it/s]\u001b[A\n",
 587 |       " 47%|████▋     | 114813/242873 [00:50<00:56, 2277.24it/s]\u001b[A\n",
 588 |       " 47%|████▋     | 115198/242873 [00:50<00:55, 2280.36it/s]\u001b[A\n",
 589 |       " 48%|████▊     | 115564/242873 [00:50<00:55, 2283.07it/s]\u001b[A\n",
 590 |       " 48%|████▊     | 115929/242873 [00:50<00:55, 2285.76it/s]\u001b[A\n",
 591 |       " 48%|████▊     | 116282/242873 [00:50<00:55, 2288.06it/s]\u001b[A\n",
 592 |       " 48%|████▊     | 116625/242873 [00:50<00:55, 2288.38it/s]\u001b[A\n",
 593 |       " 48%|████▊     | 116936/242873 [00:51<00:55, 2287.31it/s]\u001b[A\n",
 594 |       " 48%|████▊     | 117212/242873 [00:51<00:54, 2286.67it/s]\u001b[A\n",
 595 |       " 48%|████▊     | 117491/242873 [00:51<00:54, 2287.63it/s]\u001b[A\n",
 596 |       " 48%|████▊     | 117764/242873 [00:51<00:54, 2288.50it/s]\u001b[A\n"
 597 |      ]
 598 |     },
 599 |     {
 600 |      "name": "stderr",
 601 |      "output_type": "stream",
 602 |      "text": [
 603 |       " 49%|████▊     | 118029/242873 [00:51<00:54, 2286.06it/s]\u001b[A\n",
 604 |       " 49%|████▊     | 118261/242873 [00:51<00:54, 2284.92it/s]\u001b[A\n",
 605 |       " 49%|████▉     | 118530/242873 [00:51<00:54, 2285.70it/s]\u001b[A\n",
 606 |       " 49%|████▉     | 118784/242873 [00:51<00:54, 2286.18it/s]\u001b[A\n",
 607 |       " 49%|████▉     | 119084/242873 [00:52<00:54, 2287.53it/s]\u001b[A\n",
 608 |       " 49%|████▉     | 119342/242873 [00:52<00:54, 2285.99it/s]\u001b[A\n",
 609 |       " 49%|████▉     | 119575/242873 [00:52<00:53, 2285.73it/s]\u001b[A\n",
 610 |       " 49%|████▉     | 119803/242873 [00:52<00:53, 2285.14it/s]\u001b[A\n",
 611 |       " 49%|████▉     | 120023/242873 [00:52<00:53, 2283.46it/s]\u001b[A\n",
 612 |       " 50%|████▉     | 120227/242873 [00:52<00:53, 2281.36it/s]\u001b[A\n",
 613 |       " 50%|████▉     | 120455/242873 [00:52<00:53, 2281.32it/s]\u001b[A\n",
 614 |       " 50%|████▉     | 120685/242873 [00:52<00:53, 2281.36it/s]\u001b[A\n",
 615 |       " 50%|████▉     | 120913/242873 [00:53<00:53, 2281.33it/s]\u001b[A\n",
 616 |       " 50%|████▉     | 121127/242873 [00:53<00:53, 2279.19it/s]\u001b[A\n",
 617 |       " 50%|████▉     | 121345/242873 [00:53<00:53, 2278.98it/s]\u001b[A\n",
 618 |       " 50%|█████     | 121566/242873 [00:53<00:53, 2278.85it/s]\u001b[A\n",
 619 |       " 50%|█████     | 121830/242873 [00:53<00:53, 2279.53it/s]\u001b[A\n",
 620 |       " 50%|█████     | 122054/242873 [00:53<00:53, 2276.78it/s]\u001b[A\n",
 621 |       " 50%|█████     | 122253/242873 [00:53<00:53, 2272.11it/s]\u001b[A\n",
 622 |       " 50%|█████     | 122422/242873 [00:53<00:53, 2270.36it/s]\u001b[A\n",
 623 |       " 50%|█████     | 122584/242873 [00:54<00:53, 2268.00it/s]\u001b[A\n",
 624 |       " 51%|█████     | 122736/242873 [00:54<00:53, 2266.41it/s]\u001b[A\n",
 625 |       " 51%|█████     | 122886/242873 [00:54<00:52, 2264.56it/s]\u001b[A\n",
 626 |       " 51%|█████     | 123033/242873 [00:54<00:52, 2262.24it/s]\u001b[A\n",
 627 |       " 51%|█████     | 123278/242873 [00:54<00:52, 2262.59it/s]\u001b[A\n",
 628 |       " 51%|█████     | 123483/242873 [00:54<00:52, 2262.16it/s]\u001b[A\n",
 629 |       " 51%|█████     | 123664/242873 [00:54<00:52, 2260.13it/s]\u001b[A\n",
 630 |       " 51%|█████     | 123977/242873 [00:54<00:52, 2261.72it/s]\u001b[A\n",
 631 |       " 51%|█████     | 124239/242873 [00:54<00:52, 2262.36it/s]\u001b[A\n",
 632 |       " 51%|█████▏    | 124586/242873 [00:55<00:52, 2264.56it/s]\u001b[A\n",
 633 |       " 51%|█████▏    | 124857/242873 [00:55<00:52, 2265.34it/s]\u001b[A\n",
 634 |       " 52%|█████▏    | 125122/242873 [00:55<00:52, 2264.31it/s]\u001b[A\n",
 635 |       " 52%|█████▏    | 125395/242873 [00:55<00:51, 2265.14it/s]\u001b[A\n",
 636 |       " 52%|█████▏    | 125678/242873 [00:55<00:51, 2266.14it/s]\u001b[A\n",
 637 |       " 52%|█████▏    | 125938/242873 [00:55<00:51, 2266.30it/s]\u001b[A\n",
 638 |       " 52%|█████▏    | 126231/242873 [00:55<00:51, 2267.48it/s]\u001b[A\n",
 639 |       " 52%|█████▏    | 126496/242873 [00:55<00:51, 2268.02it/s]\u001b[A\n",
 640 |       " 52%|█████▏    | 126760/242873 [00:55<00:51, 2268.66it/s]\u001b[A\n",
 641 |       " 52%|█████▏    | 127072/242873 [00:55<00:51, 2270.18it/s]\u001b[A\n",
 642 |       " 52%|█████▏    | 127349/242873 [00:56<00:50, 2269.02it/s]\u001b[A\n",
 643 |       " 53%|█████▎    | 127599/242873 [00:56<00:50, 2269.37it/s]\u001b[A\n",
 644 |       " 53%|█████▎    | 127848/242873 [00:56<00:50, 2268.72it/s]\u001b[A\n",
 645 |       " 53%|█████▎    | 128082/242873 [00:56<00:50, 2268.72it/s]\u001b[A\n",
 646 |       " 53%|█████▎    | 128314/242873 [00:56<00:50, 2267.52it/s]\u001b[A\n",
 647 |       " 53%|█████▎    | 128529/242873 [00:56<00:50, 2259.44it/s]\u001b[A\n",
 648 |       " 53%|█████▎    | 128701/242873 [00:57<00:50, 2250.05it/s]\u001b[A\n",
 649 |       " 53%|█████▎    | 128838/242873 [00:57<00:50, 2243.67it/s]\u001b[A\n",
 650 |       " 53%|█████▎    | 129119/242873 [00:57<00:50, 2244.65it/s]\u001b[A\n",
 651 |       " 53%|█████▎    | 129339/242873 [00:57<00:50, 2244.56it/s]\u001b[A\n",
 652 |       " 53%|█████▎    | 129619/242873 [00:57<00:50, 2245.52it/s]\u001b[A\n",
 653 |       " 53%|█████▎    | 129830/242873 [00:57<00:50, 2244.53it/s]\u001b[A\n",
 654 |       " 54%|█████▎    | 130057/242873 [00:57<00:50, 2244.56it/s]\u001b[A\n",
 655 |       " 54%|█████▎    | 130266/242873 [00:58<00:50, 2243.07it/s]\u001b[A\n",
 656 |       " 54%|█████▍    | 130621/242873 [00:58<00:49, 2245.33it/s]\u001b[A\n",
 657 |       " 54%|█████▍    | 130863/242873 [00:58<00:49, 2245.39it/s]\u001b[A\n",
 658 |       " 54%|█████▍    | 131203/242873 [00:58<00:49, 2247.35it/s]\u001b[A\n",
 659 |       " 54%|█████▍    | 131518/242873 [00:58<00:49, 2248.90it/s]\u001b[A\n",
 660 |       " 54%|█████▍    | 131816/242873 [00:58<00:49, 2250.14it/s]\u001b[A\n",
 661 |       " 54%|█████▍    | 132113/242873 [00:58<00:49, 2251.36it/s]\u001b[A\n",
 662 |       " 55%|█████▍    | 132436/242873 [00:58<00:49, 2253.02it/s]\u001b[A\n",
 663 |       " 55%|█████▍    | 132736/242873 [00:58<00:48, 2254.23it/s]\u001b[A\n",
 664 |       " 55%|█████▍    | 133112/242873 [00:58<00:48, 2256.78it/s]\u001b[A\n",
 665 |       " 55%|█████▍    | 133458/242873 [00:59<00:48, 2258.80it/s]\u001b[A\n",
 666 |       " 55%|█████▌    | 133800/242873 [00:59<00:48, 2260.76it/s]\u001b[A\n",
 667 |       " 55%|█████▌    | 134134/242873 [00:59<00:48, 2262.59it/s]\u001b[A\n",
 668 |       " 55%|█████▌    | 134467/242873 [00:59<00:47, 2263.98it/s]\u001b[A\n",
 669 |       " 55%|█████▌    | 134791/242873 [00:59<00:47, 2264.51it/s]\u001b[A\n",
 670 |       " 56%|█████▌    | 135093/242873 [00:59<00:47, 2265.18it/s]\u001b[A\n",
 671 |       " 56%|█████▌    | 135392/242873 [00:59<00:47, 2266.40it/s]\u001b[A\n",
 672 |       " 56%|█████▌    | 135685/242873 [00:59<00:47, 2266.90it/s]\u001b[A\n",
 673 |       " 56%|█████▌    | 136044/242873 [00:59<00:47, 2269.12it/s]\u001b[A\n",
 674 |       " 56%|█████▌    | 136421/242873 [01:00<00:46, 2271.60it/s]\u001b[A\n",
 675 |       " 56%|█████▋    | 136784/242873 [01:00<00:46, 2273.87it/s]\u001b[A\n",
 676 |       " 56%|█████▋    | 137178/242873 [01:00<00:46, 2276.63it/s]\u001b[A\n",
 677 |       " 57%|█████▋    | 137532/242873 [01:00<00:46, 2277.30it/s]\u001b[A\n",
 678 |       " 57%|█████▋    | 137905/242873 [01:00<00:46, 2279.69it/s]\u001b[A\n",
 679 |       " 57%|█████▋    | 138245/242873 [01:00<00:45, 2280.42it/s]\u001b[A\n",
 680 |       " 57%|█████▋    | 138573/242873 [01:00<00:45, 2282.05it/s]\u001b[A\n",
 681 |       " 57%|█████▋    | 138935/242873 [01:00<00:45, 2284.26it/s]\u001b[A\n",
 682 |       " 57%|█████▋    | 139268/242873 [01:01<00:45, 2283.07it/s]\u001b[A\n",
 683 |       " 57%|█████▋    | 139557/242873 [01:01<00:45, 2284.00it/s]\u001b[A\n",
 684 |       " 58%|█████▊    | 139924/242873 [01:01<00:45, 2286.25it/s]\u001b[A\n",
 685 |       " 58%|█████▊    | 140236/242873 [01:01<00:44, 2285.86it/s]\u001b[A\n",
 686 |       " 58%|█████▊    | 140575/242873 [01:01<00:44, 2287.64it/s]\u001b[A\n",
 687 |       " 58%|█████▊    | 140874/242873 [01:01<00:44, 2288.15it/s]\u001b[A\n",
 688 |       " 58%|█████▊    | 141260/242873 [01:01<00:44, 2290.70it/s]\u001b[A\n",
 689 |       " 58%|█████▊    | 141618/242873 [01:01<00:44, 2292.77it/s]\u001b[A\n",
 690 |       " 58%|█████▊    | 141980/242873 [01:01<00:43, 2294.92it/s]\u001b[A\n",
 691 |       " 59%|█████▊    | 142319/242873 [01:02<00:43, 2290.43it/s]\u001b[A\n",
 692 |       " 59%|█████▊    | 142594/242873 [01:02<00:43, 2286.83it/s]\u001b[A\n",
 693 |       " 59%|█████▉    | 142946/242873 [01:02<00:43, 2288.80it/s]\u001b[A\n",
 694 |       " 59%|█████▉    | 143265/242873 [01:02<00:43, 2290.24it/s]\u001b[A\n",
 695 |       " 59%|█████▉    | 143589/242873 [01:02<00:43, 2291.75it/s]\u001b[A\n",
 696 |       " 59%|█████▉    | 143923/242873 [01:02<00:43, 2293.41it/s]\u001b[A\n",
 697 |       " 59%|█████▉    | 144232/242873 [01:02<00:42, 2294.66it/s]\u001b[A\n",
 698 |       " 60%|█████▉    | 144539/242873 [01:03<00:42, 2292.06it/s]\u001b[A\n",
 699 |       " 60%|█████▉    | 144900/242873 [01:03<00:42, 2294.15it/s]\u001b[A\n",
 700 |       " 60%|█████▉    | 145190/242873 [01:03<00:42, 2295.03it/s]\u001b[A\n",
 701 |       " 60%|█████▉    | 145583/242873 [01:03<00:42, 2297.61it/s]\u001b[A\n",
 702 |       " 60%|██████    | 145998/242873 [01:03<00:42, 2300.52it/s]\u001b[A\n",
 703 |       " 60%|██████    | 146374/242873 [01:03<00:41, 2302.78it/s]\u001b[A\n",
 704 |       " 60%|██████    | 146741/242873 [01:03<00:41, 2304.91it/s]\u001b[A\n",
 705 |       " 61%|██████    | 147122/242873 [01:03<00:41, 2307.26it/s]\u001b[A\n",
 706 |       " 61%|██████    | 147516/242873 [01:03<00:41, 2309.81it/s]\u001b[A\n",
 707 |       " 61%|██████    | 147890/242873 [01:03<00:41, 2311.74it/s]\u001b[A\n",
 708 |       " 61%|██████    | 148271/242873 [01:04<00:40, 2314.07it/s]\u001b[A\n",
 709 |       " 61%|██████    | 148667/242873 [01:04<00:40, 2316.62it/s]\u001b[A\n",
 710 |       " 61%|██████▏   | 149045/242873 [01:04<00:40, 2317.66it/s]\u001b[A\n",
 711 |       " 62%|██████▏   | 149414/242873 [01:04<00:40, 2319.77it/s]\u001b[A\n",
 712 |       " 62%|██████▏   | 149771/242873 [01:04<00:40, 2321.71it/s]\u001b[A\n",
 713 |       " 62%|██████▏   | 150126/242873 [01:04<00:39, 2323.44it/s]\u001b[A\n",
 714 |       " 62%|██████▏   | 150477/242873 [01:04<00:39, 2323.84it/s]\u001b[A\n",
 715 |       " 62%|██████▏   | 150806/242873 [01:04<00:39, 2325.33it/s]\u001b[A\n",
 716 |       " 62%|██████▏   | 151129/242873 [01:04<00:39, 2325.96it/s]\u001b[A\n",
 717 |       " 62%|██████▏   | 151502/242873 [01:05<00:39, 2328.13it/s]\u001b[A\n",
 718 |       " 63%|██████▎   | 151865/242873 [01:05<00:39, 2330.12it/s]\u001b[A\n",
 719 |       " 63%|██████▎   | 152260/242873 [01:05<00:38, 2332.61it/s]\u001b[A\n",
 720 |       " 63%|██████▎   | 152657/242873 [01:05<00:38, 2335.10it/s]\u001b[A\n",
 721 |       " 63%|██████▎   | 153040/242873 [01:05<00:38, 2337.38it/s]\u001b[A\n",
 722 |       " 63%|██████▎   | 153441/242873 [01:05<00:38, 2339.93it/s]\u001b[A\n",
 723 |       " 63%|██████▎   | 153833/242873 [01:05<00:38, 2342.33it/s]\u001b[A\n",
 724 |       " 64%|██████▎   | 154229/242873 [01:05<00:37, 2344.79it/s]\u001b[A\n",
 725 |       " 64%|██████▎   | 154617/242873 [01:05<00:37, 2346.95it/s]\u001b[A\n",
 726 |       " 64%|██████▍   | 155000/242873 [01:05<00:37, 2348.49it/s]\u001b[A\n",
 727 |       " 64%|██████▍   | 155364/242873 [01:06<00:37, 2350.29it/s]\u001b[A\n",
 728 |       " 64%|██████▍   | 155779/242873 [01:06<00:37, 2353.01it/s]\u001b[A\n",
 729 |       " 64%|██████▍   | 156155/242873 [01:06<00:36, 2354.60it/s]\u001b[A\n",
 730 |       " 64%|██████▍   | 156517/242873 [01:06<00:36, 2355.45it/s]\u001b[A\n",
 731 |       " 65%|██████▍   | 156855/242873 [01:06<00:36, 2356.53it/s]\u001b[A\n",
 732 |       " 65%|██████▍   | 157180/242873 [01:06<00:36, 2356.08it/s]\u001b[A\n",
 733 |       " 65%|██████▍   | 157552/242873 [01:06<00:36, 2358.12it/s]\u001b[A\n",
 734 |       " 65%|██████▌   | 157939/242873 [01:06<00:35, 2360.36it/s]\u001b[A\n",
 735 |       " 65%|██████▌   | 158276/242873 [01:07<00:35, 2361.15it/s]\u001b[A\n",
 736 |       " 65%|██████▌   | 158646/242873 [01:07<00:35, 2363.15it/s]\u001b[A\n"
 737 |      ]
 738 |     },
 739 |     {
 740 |      "name": "stderr",
 741 |      "output_type": "stream",
 742 |      "text": [
 743 |       " 65%|██████▌   | 159000/242873 [01:07<00:35, 2364.89it/s]\u001b[A\n",
 744 |       " 66%|██████▌   | 159341/242873 [01:07<00:35, 2363.03it/s]\u001b[A\n",
 745 |       " 66%|██████▌   | 159632/242873 [01:07<00:35, 2358.51it/s]\u001b[A\n",
 746 |       " 66%|██████▌   | 159952/242873 [01:07<00:35, 2359.74it/s]\u001b[A\n",
 747 |       " 66%|██████▌   | 160267/242873 [01:07<00:34, 2360.90it/s]\u001b[A\n",
 748 |       " 66%|██████▌   | 160628/242873 [01:07<00:34, 2362.72it/s]\u001b[A\n",
 749 |       " 66%|██████▋   | 160988/242873 [01:08<00:34, 2364.53it/s]\u001b[A\n",
 750 |       " 66%|██████▋   | 161308/242873 [01:08<00:34, 2364.44it/s]\u001b[A\n",
 751 |       " 67%|██████▋   | 161621/242873 [01:08<00:34, 2365.46it/s]\u001b[A\n",
 752 |       " 67%|██████▋   | 161921/242873 [01:08<00:34, 2364.69it/s]\u001b[A\n",
 753 |       " 67%|██████▋   | 162315/242873 [01:08<00:34, 2366.98it/s]\u001b[A\n",
 754 |       " 67%|██████▋   | 162707/242873 [01:08<00:33, 2369.25it/s]\u001b[A\n",
 755 |       " 67%|██████▋   | 163119/242873 [01:08<00:33, 2371.78it/s]\u001b[A\n",
 756 |       " 67%|██████▋   | 163596/242873 [01:08<00:33, 2375.27it/s]\u001b[A\n",
 757 |       " 68%|██████▊   | 164000/242873 [01:08<00:33, 2377.68it/s]\u001b[A\n",
 758 |       " 68%|██████▊   | 164396/242873 [01:09<00:32, 2378.97it/s]\u001b[A\n",
 759 |       " 68%|██████▊   | 164765/242873 [01:09<00:32, 2380.78it/s]\u001b[A\n",
 760 |       " 68%|██████▊   | 165132/242873 [01:09<00:32, 2381.70it/s]\u001b[A\n",
 761 |       " 68%|██████▊   | 165504/242873 [01:09<00:32, 2383.62it/s]\u001b[A\n",
 762 |       " 68%|██████▊   | 165884/242873 [01:09<00:32, 2385.65it/s]\u001b[A\n",
 763 |       " 68%|██████▊   | 166262/242873 [01:09<00:32, 2387.65it/s]\u001b[A\n",
 764 |       " 69%|██████▊   | 166628/242873 [01:09<00:31, 2388.24it/s]\u001b[A\n",
 765 |       " 69%|██████▊   | 166965/242873 [01:09<00:31, 2387.11it/s]\u001b[A\n",
 766 |       " 69%|██████▉   | 167259/242873 [01:10<00:31, 2386.86it/s]\u001b[A\n",
 767 |       " 69%|██████▉   | 167532/242873 [01:10<00:31, 2387.33it/s]\u001b[A\n",
 768 |       " 69%|██████▉   | 167891/242873 [01:10<00:31, 2389.04it/s]\u001b[A\n",
 769 |       " 69%|██████▉   | 168251/242873 [01:10<00:31, 2390.76it/s]\u001b[A\n",
 770 |       " 69%|██████▉   | 168602/242873 [01:10<00:31, 2392.34it/s]\u001b[A\n",
 771 |       " 70%|██████▉   | 168999/242873 [01:10<00:30, 2394.57it/s]\u001b[A\n",
 772 |       " 70%|██████▉   | 169363/242873 [01:10<00:30, 2396.34it/s]\u001b[A\n",
 773 |       " 70%|██████▉   | 169716/242873 [01:10<00:30, 2397.75it/s]\u001b[A\n",
 774 |       " 70%|███████   | 170067/242873 [01:10<00:30, 2399.32it/s]\u001b[A\n",
 775 |       " 70%|███████   | 170431/242873 [01:10<00:30, 2401.04it/s]\u001b[A\n",
 776 |       " 70%|███████   | 170784/242873 [01:11<00:30, 2402.37it/s]\u001b[A\n",
 777 |       " 70%|███████   | 171140/242873 [01:11<00:29, 2403.98it/s]\u001b[A\n",
 778 |       " 71%|███████   | 171489/242873 [01:11<00:29, 2405.16it/s]\u001b[A\n",
 779 |       " 71%|███████   | 171828/242873 [01:11<00:29, 2404.61it/s]\u001b[A\n",
 780 |       " 71%|███████   | 172130/242873 [01:11<00:29, 2405.01it/s]\u001b[A\n",
 781 |       " 71%|███████   | 172422/242873 [01:11<00:29, 2404.28it/s]\u001b[A\n",
 782 |       " 71%|███████   | 172762/242873 [01:11<00:29, 2405.67it/s]\u001b[A\n",
 783 |       " 71%|███████▏  | 173050/242873 [01:11<00:29, 2406.13it/s]\u001b[A\n",
 784 |       " 71%|███████▏  | 173363/242873 [01:12<00:28, 2407.14it/s]\u001b[A\n",
 785 |       " 72%|███████▏  | 173667/242873 [01:12<00:28, 2408.01it/s]\u001b[A\n",
 786 |       " 72%|███████▏  | 174079/242873 [01:12<00:28, 2410.38it/s]\u001b[A\n",
 787 |       " 72%|███████▏  | 174410/242873 [01:12<00:28, 2411.40it/s]\u001b[A\n",
 788 |       " 72%|███████▏  | 174773/242873 [01:12<00:28, 2413.07it/s]\u001b[A\n",
 789 |       " 72%|███████▏  | 175109/242873 [01:12<00:28, 2413.11it/s]\u001b[A\n",
 790 |       " 72%|███████▏  | 175417/242873 [01:12<00:27, 2413.71it/s]\u001b[A\n",
 791 |       " 72%|███████▏  | 175765/242873 [01:12<00:27, 2415.16it/s]\u001b[A\n",
 792 |       " 72%|███████▏  | 176079/242873 [01:12<00:27, 2415.57it/s]\u001b[A\n",
 793 |       " 73%|███████▎  | 176379/242873 [01:13<00:27, 2415.26it/s]\u001b[A\n",
 794 |       " 73%|███████▎  | 176657/242873 [01:13<00:27, 2410.41it/s]\u001b[A\n",
 795 |       " 73%|███████▎  | 176883/242873 [01:13<00:27, 2406.51it/s]\u001b[A\n",
 796 |       " 73%|███████▎  | 177074/242873 [01:13<00:27, 2401.99it/s]\u001b[A\n",
 797 |       " 73%|███████▎  | 177234/242873 [01:13<00:27, 2396.06it/s]\u001b[A\n",
 798 |       " 73%|███████▎  | 177506/242873 [01:14<00:27, 2396.50it/s]\u001b[A\n",
 799 |       " 73%|███████▎  | 177681/242873 [01:14<00:27, 2395.61it/s]\u001b[A\n",
 800 |       " 73%|███████▎  | 177855/242873 [01:14<00:27, 2386.49it/s]\u001b[A\n",
 801 |       " 73%|███████▎  | 178168/242873 [01:14<00:27, 2387.47it/s]\u001b[A\n",
 802 |       " 73%|███████▎  | 178433/242873 [01:14<00:26, 2387.81it/s]\u001b[A\n",
 803 |       " 74%|███████▎  | 178765/242873 [01:14<00:26, 2389.05it/s]\u001b[A\n",
 804 |       " 74%|███████▎  | 179061/242873 [01:14<00:26, 2389.81it/s]\u001b[A\n",
 805 |       " 74%|███████▍  | 179323/242873 [01:15<00:26, 2389.53it/s]\u001b[A\n",
 806 |       " 74%|███████▍  | 179574/242873 [01:15<00:26, 2389.59it/s]\u001b[A\n",
 807 |       " 74%|███████▍  | 179823/242873 [01:15<00:26, 2389.56it/s]\u001b[A\n",
 808 |       " 74%|███████▍  | 180068/242873 [01:15<00:26, 2387.70it/s]\u001b[A\n",
 809 |       " 74%|███████▍  | 180285/242873 [01:15<00:26, 2383.86it/s]\u001b[A\n",
 810 |       " 74%|███████▍  | 180606/242873 [01:15<00:26, 2384.94it/s]\u001b[A\n",
 811 |       " 75%|███████▍  | 180984/242873 [01:15<00:25, 2386.78it/s]\u001b[A\n",
 812 |       " 75%|███████▍  | 181321/242873 [01:15<00:25, 2388.06it/s]\u001b[A\n",
 813 |       " 75%|███████▍  | 181611/242873 [01:16<00:25, 2387.97it/s]\u001b[A\n",
 814 |       " 75%|███████▍  | 181884/242873 [01:16<00:25, 2386.09it/s]\u001b[A\n",
 815 |       " 75%|███████▍  | 182122/242873 [01:16<00:25, 2385.99it/s]\u001b[A\n",
 816 |       " 75%|███████▌  | 182359/242873 [01:16<00:25, 2385.72it/s]\u001b[A\n",
 817 |       " 75%|███████▌  | 182729/242873 [01:16<00:25, 2387.44it/s]\u001b[A\n",
 818 |       " 75%|███████▌  | 183029/242873 [01:16<00:25, 2388.23it/s]\u001b[A\n",
 819 |       " 75%|███████▌  | 183310/242873 [01:16<00:24, 2388.29it/s]\u001b[A\n",
 820 |       " 76%|███████▌  | 183580/242873 [01:16<00:24, 2387.62it/s]\u001b[A\n",
 821 |       " 76%|███████▌  | 183829/242873 [01:17<00:24, 2387.36it/s]\u001b[A\n",
 822 |       " 76%|███████▌  | 184070/242873 [01:17<00:24, 2386.77it/s]\u001b[A\n",
 823 |       " 76%|███████▌  | 184299/242873 [01:17<00:24, 2385.62it/s]\u001b[A\n",
 824 |       " 76%|███████▌  | 184511/242873 [01:17<00:24, 2382.33it/s]\u001b[A\n",
 825 |       " 76%|███████▌  | 184692/242873 [01:17<00:24, 2380.13it/s]\u001b[A\n",
 826 |       " 76%|███████▌  | 185009/242873 [01:17<00:24, 2381.13it/s]\u001b[A\n",
 827 |       " 76%|███████▋  | 185218/242873 [01:17<00:24, 2380.59it/s]\u001b[A\n",
 828 |       " 76%|███████▋  | 185445/242873 [01:17<00:24, 2380.44it/s]\u001b[A\n",
 829 |       " 76%|███████▋  | 185773/242873 [01:18<00:23, 2381.57it/s]\u001b[A\n",
 830 |       " 77%|███████▋  | 186103/242873 [01:18<00:23, 2382.74it/s]\u001b[A\n",
 831 |       " 77%|███████▋  | 186374/242873 [01:18<00:23, 2380.96it/s]\u001b[A\n",
 832 |       " 77%|███████▋  | 186611/242873 [01:18<00:23, 2377.96it/s]\u001b[A\n",
 833 |       " 77%|███████▋  | 186889/242873 [01:18<00:23, 2378.46it/s]\u001b[A\n",
 834 |       " 77%|███████▋  | 187114/242873 [01:18<00:23, 2377.96it/s]\u001b[A\n",
 835 |       " 77%|███████▋  | 187468/242873 [01:18<00:23, 2379.42it/s]\u001b[A\n",
 836 |       " 77%|███████▋  | 187745/242873 [01:18<00:23, 2379.91it/s]\u001b[A\n",
 837 |       " 77%|███████▋  | 188111/242873 [01:18<00:22, 2381.53it/s]\u001b[A\n",
 838 |       " 78%|███████▊  | 188496/242873 [01:19<00:22, 2383.38it/s]\u001b[A\n",
 839 |       " 78%|███████▊  | 188828/242873 [01:19<00:22, 2384.56it/s]\u001b[A\n",
 840 |       " 78%|███████▊  | 189232/242873 [01:19<00:22, 2386.65it/s]\u001b[A\n",
 841 |       " 78%|███████▊  | 189582/242873 [01:19<00:22, 2388.04it/s]\u001b[A\n",
 842 |       " 78%|███████▊  | 189962/242873 [01:19<00:22, 2389.82it/s]\u001b[A\n",
 843 |       " 78%|███████▊  | 190403/242873 [01:19<00:21, 2392.35it/s]\u001b[A\n",
 844 |       " 79%|███████▊  | 190786/242873 [01:19<00:21, 2392.24it/s]\u001b[A\n",
 845 |       " 79%|███████▊  | 191124/242873 [01:19<00:21, 2392.98it/s]\u001b[A\n",
 846 |       " 79%|███████▉  | 191448/242873 [01:19<00:21, 2393.87it/s]\u001b[A\n",
 847 |       " 79%|███████▉  | 191767/242873 [01:20<00:21, 2393.87it/s]\u001b[A\n",
 848 |       " 79%|███████▉  | 192062/242873 [01:20<00:21, 2393.40it/s]\u001b[A\n",
 849 |       " 79%|███████▉  | 192333/242873 [01:20<00:21, 2392.34it/s]\u001b[A\n",
 850 |       " 79%|███████▉  | 192682/242873 [01:20<00:20, 2393.70it/s]\u001b[A\n",
 851 |       " 79%|███████▉  | 192958/242873 [01:20<00:20, 2394.05it/s]\u001b[A\n",
 852 |       " 80%|███████▉  | 193316/242873 [01:20<00:20, 2395.51it/s]\u001b[A\n",
 853 |       " 80%|███████▉  | 193674/242873 [01:20<00:20, 2396.98it/s]\u001b[A\n",
 854 |       " 80%|███████▉  | 194093/242873 [01:20<00:20, 2399.20it/s]\u001b[A\n",
 855 |       " 80%|████████  | 194471/242873 [01:20<00:20, 2400.89it/s]\u001b[A\n",
 856 |       " 80%|████████  | 194852/242873 [01:21<00:19, 2402.63it/s]\u001b[A\n",
 857 |       " 80%|████████  | 195248/242873 [01:21<00:19, 2404.54it/s]\u001b[A\n",
 858 |       " 81%|████████  | 195628/242873 [01:21<00:19, 2406.26it/s]\u001b[A\n",
 859 |       " 81%|████████  | 196061/242873 [01:21<00:19, 2408.63it/s]\u001b[A\n",
 860 |       " 81%|████████  | 196454/242873 [01:21<00:19, 2410.11it/s]\u001b[A\n",
 861 |       " 81%|████████  | 196834/242873 [01:21<00:19, 2411.58it/s]\u001b[A\n",
 862 |       " 81%|████████  | 197218/242873 [01:21<00:18, 2413.33it/s]\u001b[A\n",
 863 |       " 81%|████████▏ | 197608/242873 [01:21<00:18, 2415.14it/s]\u001b[A\n",
 864 |       " 82%|████████▏ | 197988/242873 [01:21<00:18, 2416.22it/s]\u001b[A\n",
 865 |       " 82%|████████▏ | 198349/242873 [01:22<00:18, 2416.82it/s]\u001b[A\n",
 866 |       " 82%|████████▏ | 198686/242873 [01:22<00:18, 2416.93it/s]\u001b[A\n",
 867 |       " 82%|████████▏ | 199014/242873 [01:22<00:18, 2417.85it/s]\u001b[A\n",
 868 |       " 82%|████████▏ | 199329/242873 [01:22<00:18, 2418.44it/s]\u001b[A\n",
 869 |       " 82%|████████▏ | 199720/242873 [01:22<00:17, 2420.26it/s]\u001b[A\n",
 870 |       " 82%|████████▏ | 200089/242873 [01:22<00:17, 2421.79it/s]\u001b[A\n",
 871 |       " 83%|████████▎ | 200481/242873 [01:22<00:17, 2423.59it/s]\u001b[A\n",
 872 |       " 83%|████████▎ | 200863/242873 [01:22<00:17, 2425.29it/s]\u001b[A\n",
 873 |       " 83%|████████▎ | 201337/242873 [01:22<00:17, 2428.08it/s]\u001b[A\n",
 874 |       " 83%|████████▎ | 201734/242873 [01:23<00:16, 2429.93it/s]\u001b[A\n",
 875 |       " 83%|████████▎ | 202135/242873 [01:23<00:16, 2431.82it/s]\u001b[A\n",
 876 |       " 83%|████████▎ | 202533/242873 [01:23<00:16, 2433.53it/s]\u001b[A\n"
 877 |      ]
 878 |     },
 879 |     {
 880 |      "name": "stderr",
 881 |      "output_type": "stream",
 882 |      "text": [
 883 |       " 84%|████████▎ | 202926/242873 [01:23<00:16, 2434.74it/s]\u001b[A\n",
 884 |       " 84%|████████▎ | 203301/242873 [01:23<00:16, 2436.33it/s]\u001b[A\n",
 885 |       " 84%|████████▍ | 203675/242873 [01:23<00:16, 2424.79it/s]\u001b[A\n",
 886 |       " 84%|████████▍ | 204002/242873 [01:24<00:16, 2425.69it/s]\u001b[A\n",
 887 |       " 84%|████████▍ | 204342/242873 [01:24<00:15, 2426.93it/s]\u001b[A\n",
 888 |       " 84%|████████▍ | 204743/242873 [01:24<00:15, 2428.81it/s]\u001b[A\n",
 889 |       " 84%|████████▍ | 205146/242873 [01:24<00:15, 2430.71it/s]\u001b[A\n",
 890 |       " 85%|████████▍ | 205540/242873 [01:24<00:15, 2432.49it/s]\u001b[A\n",
 891 |       " 85%|████████▍ | 205935/242873 [01:24<00:15, 2434.28it/s]\u001b[A\n",
 892 |       " 85%|████████▍ | 206342/242873 [01:24<00:14, 2436.20it/s]\u001b[A\n",
 893 |       " 85%|████████▌ | 206740/242873 [01:24<00:14, 2438.02it/s]\u001b[A\n",
 894 |       " 85%|████████▌ | 207151/242873 [01:24<00:14, 2439.98it/s]\u001b[A\n",
 895 |       " 85%|████████▌ | 207547/242873 [01:25<00:14, 2441.68it/s]\u001b[A\n",
 896 |       " 86%|████████▌ | 207939/242873 [01:25<00:14, 2443.38it/s]\u001b[A\n",
 897 |       " 86%|████████▌ | 208330/242873 [01:25<00:14, 2445.05it/s]\u001b[A\n",
 898 |       " 86%|████████▌ | 208719/242873 [01:25<00:13, 2446.73it/s]\u001b[A\n",
 899 |       " 86%|████████▌ | 209107/242873 [01:25<00:13, 2448.12it/s]\u001b[A\n",
 900 |       " 86%|████████▋ | 209485/242873 [01:25<00:13, 2449.52it/s]\u001b[A\n",
 901 |       " 86%|████████▋ | 209857/242873 [01:25<00:13, 2449.40it/s]\u001b[A\n",
 902 |       " 87%|████████▋ | 210234/242873 [01:25<00:13, 2450.93it/s]\u001b[A\n",
 903 |       " 87%|████████▋ | 210580/242873 [01:25<00:13, 2451.49it/s]\u001b[A\n",
 904 |       " 87%|████████▋ | 210940/242873 [01:25<00:13, 2452.83it/s]\u001b[A\n",
 905 |       " 87%|████████▋ | 211315/242873 [01:26<00:12, 2454.33it/s]\u001b[A\n",
 906 |       " 87%|████████▋ | 211755/242873 [01:26<00:12, 2456.59it/s]\u001b[A\n",
 907 |       " 87%|████████▋ | 212147/242873 [01:26<00:12, 2458.28it/s]\u001b[A\n",
 908 |       " 88%|████████▊ | 212538/242873 [01:26<00:12, 2459.95it/s]\u001b[A\n",
 909 |       " 88%|████████▊ | 212922/242873 [01:26<00:12, 2461.04it/s]\u001b[A\n",
 910 |       " 88%|████████▊ | 213289/242873 [01:26<00:12, 2461.02it/s]\u001b[A\n",
 911 |       " 88%|████████▊ | 213619/242873 [01:26<00:11, 2461.67it/s]\u001b[A\n",
 912 |       " 88%|████████▊ | 213939/242873 [01:26<00:11, 2462.39it/s]\u001b[A\n",
 913 |       " 88%|████████▊ | 214255/242873 [01:27<00:11, 2462.49it/s]\u001b[A\n",
 914 |       " 88%|████████▊ | 214552/242873 [01:27<00:11, 2460.29it/s]\u001b[A\n",
 915 |       " 88%|████████▊ | 214805/242873 [01:27<00:11, 2459.22it/s]\u001b[A\n",
 916 |       " 89%|████████▊ | 215036/242873 [01:27<00:11, 2458.96it/s]\u001b[A\n",
 917 |       " 89%|████████▊ | 215265/242873 [01:27<00:11, 2458.65it/s]\u001b[A\n",
 918 |       " 89%|████████▉ | 215608/242873 [01:27<00:11, 2459.83it/s]\u001b[A\n",
 919 |       " 89%|████████▉ | 215993/242873 [01:27<00:10, 2461.41it/s]\u001b[A\n",
 920 |       " 89%|████████▉ | 216372/242873 [01:27<00:10, 2462.89it/s]\u001b[A\n",
 921 |       " 89%|████████▉ | 216752/242873 [01:27<00:10, 2464.41it/s]\u001b[A\n",
 922 |       " 89%|████████▉ | 217135/242873 [01:28<00:10, 2465.96it/s]\u001b[A\n",
 923 |       " 90%|████████▉ | 217527/242873 [01:28<00:10, 2467.60it/s]\u001b[A\n",
 924 |       " 90%|████████▉ | 217917/242873 [01:28<00:10, 2469.23it/s]\u001b[A\n",
 925 |       " 90%|████████▉ | 218325/242873 [01:28<00:09, 2471.04it/s]\u001b[A\n",
 926 |       " 90%|█████████ | 218708/242873 [01:28<00:09, 2472.45it/s]\u001b[A\n",
 927 |       " 90%|█████████ | 219086/242873 [01:28<00:09, 2472.50it/s]\u001b[A\n",
 928 |       " 90%|█████████ | 219426/242873 [01:28<00:09, 2473.52it/s]\u001b[A\n",
 929 |       " 90%|█████████ | 219765/242873 [01:28<00:09, 2474.38it/s]\u001b[A\n",
 930 |       " 91%|█████████ | 220102/242873 [01:28<00:09, 2475.37it/s]\u001b[A\n",
 931 |       " 91%|█████████ | 220436/242873 [01:29<00:09, 2475.88it/s]\u001b[A\n",
 932 |       " 91%|█████████ | 220760/242873 [01:29<00:08, 2475.90it/s]\u001b[A\n",
 933 |       " 91%|█████████ | 221088/242873 [01:29<00:08, 2476.70it/s]\u001b[A\n",
 934 |       " 91%|█████████ | 221395/242873 [01:29<00:08, 2476.98it/s]\u001b[A\n",
 935 |       " 91%|█████████▏| 221759/242873 [01:29<00:08, 2478.29it/s]\u001b[A\n",
 936 |       " 91%|█████████▏| 222117/242873 [01:29<00:08, 2479.51it/s]\u001b[A\n",
 937 |       " 92%|█████████▏| 222469/242873 [01:29<00:08, 2480.66it/s]\u001b[A\n",
 938 |       " 92%|█████████▏| 222804/242873 [01:29<00:08, 2481.34it/s]\u001b[A\n",
 939 |       " 92%|█████████▏| 223129/242873 [01:29<00:07, 2481.10it/s]\u001b[A\n",
 940 |       " 92%|█████████▏| 223427/242873 [01:30<00:07, 2481.24it/s]\u001b[A\n",
 941 |       " 92%|█████████▏| 223713/242873 [01:30<00:07, 2480.30it/s]\u001b[A\n",
 942 |       " 92%|█████████▏| 223971/242873 [01:30<00:07, 2479.29it/s]\u001b[A\n",
 943 |       " 92%|█████████▏| 224328/242873 [01:30<00:07, 2480.49it/s]\u001b[A\n",
 944 |       " 92%|█████████▏| 224600/242873 [01:30<00:07, 2480.40it/s]\u001b[A\n",
 945 |       " 93%|█████████▎| 224928/242873 [01:30<00:07, 2481.27it/s]\u001b[A\n",
 946 |       " 93%|█████████▎| 225234/242873 [01:30<00:07, 2481.88it/s]\u001b[A\n",
 947 |       " 93%|█████████▎| 225523/242873 [01:30<00:06, 2481.06it/s]\u001b[A\n",
 948 |       " 93%|█████████▎| 225784/242873 [01:31<00:06, 2480.76it/s]\u001b[A\n",
 949 |       " 93%|█████████▎| 226035/242873 [01:31<00:06, 2478.71it/s]\u001b[A\n",
 950 |       " 93%|█████████▎| 226253/242873 [01:31<00:06, 2476.18it/s]\u001b[A\n",
 951 |       " 93%|█████████▎| 226442/242873 [01:31<00:06, 2473.94it/s]\u001b[A\n",
 952 |       " 93%|█████████▎| 226804/242873 [01:31<00:06, 2475.18it/s]\u001b[A\n",
 953 |       " 94%|█████████▎| 227168/242873 [01:31<00:06, 2476.45it/s]\u001b[A\n",
 954 |       " 94%|█████████▎| 227520/242873 [01:31<00:06, 2477.57it/s]\u001b[A\n",
 955 |       " 94%|█████████▍| 227909/242873 [01:31<00:06, 2479.11it/s]\u001b[A\n",
 956 |       " 94%|█████████▍| 228303/242873 [01:32<00:05, 2480.69it/s]\u001b[A\n",
 957 |       " 94%|█████████▍| 228732/242873 [01:32<00:05, 2482.66it/s]\u001b[A\n",
 958 |       " 94%|█████████▍| 229101/242873 [01:32<00:05, 2483.90it/s]\u001b[A\n",
 959 |       " 94%|█████████▍| 229468/242873 [01:32<00:05, 2484.79it/s]\u001b[A\n",
 960 |       " 95%|█████████▍| 229821/242873 [01:32<00:05, 2485.89it/s]\u001b[A\n",
 961 |       " 95%|█████████▍| 230173/242873 [01:32<00:05, 2485.55it/s]\u001b[A\n",
 962 |       " 95%|█████████▍| 230552/242873 [01:32<00:04, 2486.94it/s]\u001b[A\n",
 963 |       " 95%|█████████▌| 230886/242873 [01:32<00:04, 2487.49it/s]\u001b[A\n",
 964 |       " 95%|█████████▌| 231208/242873 [01:32<00:04, 2487.52it/s]\u001b[A\n",
 965 |       " 95%|█████████▌| 231509/242873 [01:33<00:04, 2487.49it/s]\u001b[A\n",
 966 |       " 95%|█████████▌| 231881/242873 [01:33<00:04, 2488.81it/s]\u001b[A\n",
 967 |       " 96%|█████████▌| 232252/242873 [01:33<00:04, 2490.12it/s]\u001b[A\n",
 968 |       " 96%|█████████▌| 232640/242873 [01:33<00:04, 2491.60it/s]\u001b[A\n",
 969 |       " 96%|█████████▌| 232991/242873 [01:33<00:03, 2492.69it/s]\u001b[A\n",
 970 |       " 96%|█████████▌| 233339/242873 [01:33<00:03, 2493.54it/s]\u001b[A\n",
 971 |       " 96%|█████████▌| 233720/242873 [01:33<00:03, 2494.94it/s]\u001b[A\n",
 972 |       " 96%|█████████▋| 234115/242873 [01:33<00:03, 2496.49it/s]\u001b[A\n",
 973 |       " 97%|█████████▋| 234506/242873 [01:33<00:03, 2497.99it/s]\u001b[A\n",
 974 |       " 97%|█████████▋| 234879/242873 [01:33<00:03, 2499.25it/s]\u001b[A\n",
 975 |       " 97%|█████████▋| 235250/242873 [01:34<00:03, 2500.20it/s]\u001b[A\n",
 976 |       " 97%|█████████▋| 235609/242873 [01:34<00:02, 2501.36it/s]\u001b[A\n",
 977 |       " 97%|█████████▋| 235968/242873 [01:34<00:02, 2498.44it/s]\u001b[A\n",
 978 |       " 97%|█████████▋| 236262/242873 [01:34<00:02, 2498.36it/s]\u001b[A\n",
 979 |       " 97%|█████████▋| 236600/242873 [01:34<00:02, 2499.28it/s]\u001b[A\n",
 980 |       " 98%|█████████▊| 237010/242873 [01:34<00:02, 2500.96it/s]\u001b[A\n",
 981 |       " 98%|█████████▊| 237340/242873 [01:34<00:02, 2501.31it/s]\u001b[A\n",
 982 |       " 98%|█████████▊| 237718/242873 [01:34<00:02, 2502.66it/s]\u001b[A\n",
 983 |       " 98%|█████████▊| 238108/242873 [01:35<00:01, 2504.12it/s]\u001b[A\n",
 984 |       " 98%|█████████▊| 238521/242873 [01:35<00:01, 2505.84it/s]\u001b[A\n",
 985 |       " 98%|█████████▊| 238935/242873 [01:35<00:01, 2507.54it/s]\u001b[A\n",
 986 |       " 99%|█████████▊| 239392/242873 [01:35<00:01, 2509.70it/s]\u001b[A\n",
 987 |       " 99%|█████████▊| 239804/242873 [01:35<00:01, 2511.38it/s]\u001b[A\n",
 988 |       " 99%|█████████▉| 240211/242873 [01:35<00:01, 2512.89it/s]\u001b[A\n",
 989 |       " 99%|█████████▉| 240613/242873 [01:35<00:00, 2513.71it/s]\u001b[A\n",
 990 |       " 99%|█████████▉| 240988/242873 [01:35<00:00, 2514.77it/s]\u001b[A\n",
 991 |       " 99%|█████████▉| 241354/242873 [01:35<00:00, 2515.85it/s]\u001b[A\n",
 992 |       "100%|█████████▉| 241716/242873 [01:36<00:00, 2515.55it/s]\u001b[A\n",
 993 |       "100%|█████████▉| 242096/242873 [01:36<00:00, 2516.87it/s]\u001b[A\n",
 994 |       "100%|█████████▉| 242436/242873 [01:36<00:00, 2517.35it/s]\u001b[A\n",
 995 |       "100%|█████████▉| 242761/242873 [01:36<00:00, 2518.02it/s]\u001b[A\n",
 996 |       "100%|██████████| 242873/242873 [01:36<00:00, 2518.41it/s]\u001b[A"
 997 |      ]
 998 |     }
 999 |    ],
1000 |    "source": [
1001 |     "train_user = []\n",
1002 |     "train_context = []\n",
1003 |     "train_target = []\n",
1004 |     "valid_user = []\n",
1005 |     "valid_context = []\n",
1006 |     "valid_target = []\n",
1007 |     "test_user = []\n",
1008 |     "test_context = []\n",
1009 |     "test_target = []\n",
1010 |     "\n",
1011 |     "tow = 6\n",
1012 |     "prev_user = df['user'][0]\n",
1013 |     "user_user = []\n",
1014 |     "user_context = []\n",
1015 |     "user_target = []\n",
1016 |     "for target_idx in tqdm.tqdm(xrange(len(df))):\n",
1017 |     "    (user, time, poi) = df.iloc[target_idx]\n",
1018 |     "    if prev_user != user:\n",
1019 |     "        prev_user = user\n",
1020 |     "        if len(user_user) > 20:\n",
1021 |     "            train_thr = int(len(user_user)*0.9)\n",
1022 |     "            valid_thr = int(len(user_user)*0.95)\n",
1023 |     "            train_user += user_user[:train_thr]\n",
1024 |     "            train_context += user_context[:train_thr]\n",
1025 |     "            train_target += user_target[:train_thr]\n",
1026 |     "            valid_user += user_user[train_thr:valid_thr]\n",
1027 |     "            valid_context += user_context[train_thr:valid_thr]\n",
1028 |     "            valid_target += user_target[train_thr:valid_thr]\n",
1029 |     "            test_user += user_user[valid_thr:]\n",
1030 |     "            test_context += user_context[valid_thr:]\n",
1031 |     "            test_target += user_target[valid_thr:]\n",
1032 |     "        elif len(user_user) > 0:\n",
1033 |     "            train_user += user_user\n",
1034 |     "            train_context += user_context\n",
1035 |     "            train_target += user_target    \n",
1036 |     "        user_user = []\n",
1037 |     "        user_context = []\n",
1038 |     "        user_target = []\n",
1039 |     "        #print train_user, train_context, train_target\n",
1040 |     "    \n",
1041 |     "    context = []\n",
1042 |     "    for context_idx in xrange(target_idx+1, len(df)):\n",
1043 |     "        (c_user, c_time, c_poi) = df.iloc[context_idx]\n",
1044 |     "        if user == c_user and (time-tow) < c_time:\n",
1045 |     "                context.append(c_poi)\n",
1046 |     "        else:\n",
1047 |     "            break\n",
1048 |     "    if context:\n",
1049 |     "        user_user.append(user)\n",
1050 |     "        user_context.append(context)\n",
1051 |     "        user_target.append(poi)\n",
1052 |     "        \n",
1053 |     "if len(user_user) > 20:\n",
1054 |     "    train_thr = int(len(user_user)*0.9)\n",
1055 |     "    valid_thr = int(len(user_user)*0.95)\n",
1056 |     "    train_user += user_user[:train_thr]\n",
1057 |     "    train_context += user_context[:train_thr]\n",
1058 |     "    train_target += user_target[:train_thr]\n",
1059 |     "    valid_user += user_user[train_thr:valid_thr]\n",
1060 |     "    valid_context += user_context[train_thr:valid_thr]\n",
1061 |     "    valid_target += user_target[train_thr:valid_thr]\n",
1062 |     "    test_user += user_user[valid_thr:]\n",
1063 |     "    test_context += user_context[valid_thr:]\n",
1064 |     "    test_target += user_target[valid_thr:]\n",
1065 |     "elif len(user_user) > 0:\n",
1066 |     "    train_user += user_user\n",
1067 |     "    train_context += user_context\n",
1068 |     "    train_target += user_target"
1069 |    ]
1070 |   },
1071 |   {
1072 |    "cell_type": "code",
1073 |    "execution_count": 191,
1074 |    "metadata": {},
1075 |    "outputs": [
1076 |     {
1077 |      "data": {
1078 |       "text/plain": [
1079 |        "(53359, 53359, 53359, 2336, 2336, 2336, 2677, 2677, 2677)"
1080 |       ]
1081 |      },
1082 |      "execution_count": 191,
1083 |      "metadata": {},
1084 |      "output_type": "execute_result"
1085 |     }
1086 |    ],
1087 |    "source": [
1088 |     "len(train_user), len(train_context), len(train_target), len(valid_user), len(valid_context), len(valid_target), len(test_user), len(test_context), len(test_target)"
1089 |    ]
1090 |   },
1091 |   {
1092 |    "cell_type": "code",
1093 |    "execution_count": 192,
1094 |    "metadata": {
1095 |     "scrolled": false
1096 |    },
1097 |    "outputs": [
1098 |     {
1099 |      "name": "stdout",
1100 |      "output_type": "stream",
1101 |      "text": [
1102 |       "64 4.00256751438 2.0 1\n",
1103 |       "64 3.94891386281 2.0 1\n"
1104 |      ]
1105 |     }
1106 |    ],
1107 |    "source": [
1108 |     "len_context = []\n",
1109 |     "for i, context in enumerate(train_context):\n",
1110 |     "    len_context.append(len(context))\n",
1111 |     "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)\n",
1112 |     "for i, context in enumerate(valid_context):\n",
1113 |     "    len_context.append(len(context))\n",
1114 |     "for i, context in enumerate(test_context):\n",
1115 |     "    len_context.append(len(context))\n",
1116 |     "len_context.sort()\n",
1117 |     "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)"
1118 |    ]
1119 |   },
1120 |   {
1121 |    "cell_type": "code",
1122 |    "execution_count": 193,
1123 |    "metadata": {},
1124 |    "outputs": [
1125 |     {
1126 |      "name": "stdout",
1127 |      "output_type": "stream",
1128 |      "text": [
1129 |       "58372\n",
1130 |       "26\n"
1131 |      ]
1132 |     }
1133 |    ],
1134 |    "source": [
1135 |     "print len(len_context)\n",
1136 |     "print len_context[int(len(len_context)*0.99)]"
1137 |    ]
1138 |   },
1139 |   {
1140 |    "cell_type": "code",
1141 |    "execution_count": 194,
1142 |    "metadata": {},
1143 |    "outputs": [],
1144 |    "source": [
1145 |     "maxlen_context = 32\n",
1146 |     "for i, context in enumerate(train_context):\n",
1147 |     "    if len(context) < maxlen_context:\n",
1148 |     "        train_context[i] += ([0]*(maxlen_context-len(context)))\n",
1149 |     "    elif len(context) > maxlen_context:\n",
1150 |     "        train_context[i] = context[:maxlen_context]\n",
1151 |     "for i, context in enumerate(valid_context):\n",
1152 |     "    if len(context) < maxlen_context:\n",
1153 |     "        valid_context[i] += ([0]*(maxlen_context-len(context)))\n",
1154 |     "    elif len(context) > maxlen_context:\n",
1155 |     "        valid_context[i] = context[:maxlen_context]\n",
1156 |     "for i, context in enumerate(test_context):\n",
1157 |     "    if len(context) < maxlen_context:\n",
1158 |     "        test_context[i] += ([0]*(maxlen_context-len(context)))\n",
1159 |     "    elif len(context) > maxlen_context:\n",
1160 |     "        test_context[i] = context[:maxlen_context]"
1161 |    ]
1162 |   },
1163 |   {
1164 |    "cell_type": "code",
1165 |    "execution_count": 195,
1166 |    "metadata": {},
1167 |    "outputs": [
1168 |     {
1169 |      "name": "stdout",
1170 |      "output_type": "stream",
1171 |      "text": [
1172 |       "32 32.0 32.0 32\n"
1173 |      ]
1174 |     }
1175 |    ],
1176 |    "source": [
1177 |     "len_context = []\n",
1178 |     "for context in test_context:\n",
1179 |     "    len_context.append(len(context))\n",
1180 |     "print np.max(len_context), np.mean(len_context), np.median(len_context), np.min(len_context)"
1181 |    ]
1182 |   },
1183 |   {
1184 |    "cell_type": "code",
1185 |    "execution_count": 196,
1186 |    "metadata": {},
1187 |    "outputs": [],
1188 |    "source": [
1189 |     "np.save('./npy/train_context.npy', train_context)\n",
1190 |     "np.save('./npy/valid_context.npy', valid_context)\n",
1191 |     "np.save('./npy/test_context.npy', test_context)\n",
1192 |     "np.save('./npy/user2id.npy', user2id)\n",
1193 |     "np.save('./npy/id2user.npy', id2user)"
1194 |    ]
1195 |   },
1196 |   {
1197 |    "cell_type": "code",
1198 |    "execution_count": 197,
1199 |    "metadata": {},
1200 |    "outputs": [],
1201 |    "source": [
1202 |     "np.save('./npy/train_user.npy', train_user)\n",
1203 |     "np.save('./npy/valid_user.npy', valid_user)\n",
1204 |     "np.save('./npy/test_user.npy', test_user)\n",
1205 |     "np.save('./npy/train_target.npy', train_target)\n",
1206 |     "np.save('./npy/valid_target.npy', valid_target)\n",
1207 |     "np.save('./npy/test_target.npy', test_target)"
1208 |    ]
1209 |   },
1210 |   {
1211 |    "cell_type": "code",
1212 |    "execution_count": 101,
1213 |    "metadata": {},
1214 |    "outputs": [
1215 |     {
1216 |      "data": {
1217 |       "text/plain": [
1218 |        "4627"
1219 |       ]
1220 |      },
1221 |      "execution_count": 101,
1222 |      "metadata": {},
1223 |      "output_type": "execute_result"
1224 |     }
1225 |    ],
1226 |    "source": [
1227 |     "len(id2user)"
1228 |    ]
1229 |   }
1230 |  ],
1231 |  "metadata": {
1232 |   "kernelspec": {
1233 |    "display_name": "Python 2",
1234 |    "language": "python",
1235 |    "name": "python2"
1236 |   },
1237 |   "language_info": {
1238 |    "codemirror_mode": {
1239 |     "name": "ipython",
1240 |     "version": 2
1241 |    },
1242 |    "file_extension": ".py",
1243 |    "mimetype": "text/x-python",
1244 |    "name": "python",
1245 |    "nbconvert_exporter": "python",
1246 |    "pygments_lexer": "ipython2",
1247 |    "version": "2.7.12"
1248 |   }
1249 |  },
1250 |  "nbformat": 4,
1251 |  "nbformat_minor": 2
1252 | }
1253 | 


--------------------------------------------------------------------------------