├── .gitignore
├── bin
    └── datasets
├── readme.md
├── recommender_datasets
    ├── __init__.py
    ├── _common.py
    ├── amazon.py
    ├── amazon_jmcauley.py
    ├── gowalla.py
    ├── movielens.py
    ├── output.py
    ├── verification.py
    └── yoochoose.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | *egg*
4 | *#*


--------------------------------------------------------------------------------
/bin/datasets:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import numpy as np
  3 | 
  4 | from recommender_datasets import (amazon,
  5 |                                   amazon_jmcauley,
  6 |                                   gowalla,
  7 |                                   movielens,
  8 |                                   output,
  9 |                                   verification,
 10 |                                   yoochoose)
 11 | 
 12 | 
 13 | def download_movielens():
 14 | 
 15 |     params = (
 16 |         ('movielens_100K',
 17 |          movielens.read_movielens_100K),
 18 |         ('movielens_1M',
 19 |          movielens.read_movielens_1M),
 20 |         ('movielens_10M',
 21 |          movielens.read_movielens_10M),
 22 |         ('movielens_20M',
 23 |          movielens.read_movielens_20M),
 24 |     )
 25 | 
 26 |     for out_file_path, data_fnc in params:
 27 |         print('Processing {}'.format(out_file_path))
 28 |         output.write_csv_data(out_file_path,
 29 |                               data_fnc())
 30 |         output.write_hdf5_data(out_file_path,
 31 |                                data_fnc())
 32 | 
 33 |         print('Verifying {}'.format(out_file_path))
 34 |         verification.verify(out_file_path)
 35 | 
 36 | 
 37 | def download_gowalla():
 38 | 
 39 |     header = ('user_id',
 40 |               'timestamp',
 41 |               'latitude',
 42 |               'longitude',
 43 |               'item_id')
 44 | 
 45 |     output.write_csv_data('gowalla',
 46 |                           gowalla.read_gowalla(),
 47 |                           header=header)
 48 |     output.write_hdf5_data('gowalla',
 49 |                            gowalla.read_gowalla(),
 50 |                            header=header,
 51 |                            dtype=(np.int32,
 52 |                                   np.int32,
 53 |                                   np.float32,
 54 |                                   np.float32,
 55 |                                   np.int32))
 56 |     verification.verify('gowalla', columns=header)
 57 | 
 58 | 
 59 | def download_amazon():
 60 | 
 61 |     header = ('user_id',
 62 |               'item_id',
 63 |               'rating',
 64 |               'timestamp',
 65 |               'features_item_id',
 66 |               'features_feature_id')
 67 | 
 68 |     output.write_hdf5_data('amazon_co_purchasing',
 69 |                            amazon.read_amazon_co_purchasing(),
 70 |                            header=header,
 71 |                            dtype=(np.int32,
 72 |                                   np.int32,
 73 |                                   np.float32,
 74 |                                   np.float32,
 75 |                                   np.int32,
 76 |                                   np.int32))
 77 | 
 78 | 
 79 | def download_yoochoose():
 80 | 
 81 |     header = ('user_id', 'item_id', 'timestamp')
 82 | 
 83 |     for variant in ('buys', 'clicks'):
 84 |         data = yoochoose.read_yoochoose(variant)
 85 |         output.write_hdf5_data('yoochoose_{}'.format(variant),
 86 |                                data,
 87 |                                header=header,
 88 |                                dtype=(np.int32,
 89 |                                       np.int32,
 90 |                                       np.float32))
 91 | 
 92 | 
 93 | def download_amazon_jmcauley():
 94 | 
 95 |     header = ('user_id', 'item_id', 'rating', 'timestamp')
 96 | 
 97 |     for variant in amazon_jmcauley.VARIANTS:
 98 |         data = amazon_jmcauley.read_amazon(variant)
 99 |         output.write_hdf5_data('amazon_jmcauley_{}'.format(variant),
100 |                                data,
101 |                                header=header,
102 |                                dtype=(np.int32,
103 |                                       np.int32,
104 |                                       np.float32))
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     download_amazon_jmcauley()
109 |     download_yoochoose()
110 |     download_movielens()
111 |     download_amazon()
112 |     download_gowalla()
113 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Recommender datasets
2 | 
3 | Parses and packages popular recommender datasets as simple-to-use CSV and HDF5 files. Have a look at releases for download links.


--------------------------------------------------------------------------------
/recommender_datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejkula/recommender_datasets/bbca3e736f885c95ff1c38dd0b100a0e081adf31/recommender_datasets/__init__.py


--------------------------------------------------------------------------------
/recommender_datasets/_common.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import requests
 4 | 
 5 | 
 6 | def get_data_home():
 7 | 
 8 |     return os.path.join(os.path.expanduser('~'),
 9 |                         '.recommender_datasets')
10 | 
11 | 
12 | def create_data_dir(path):
13 | 
14 |     if not os.path.isdir(path):
15 |         os.makedirs(path)
16 | 
17 | 
18 | def download(url, dest_path):
19 | 
20 |     req = requests.get(url, stream=True)
21 |     req.raise_for_status()
22 | 
23 |     with open(dest_path, 'wb') as fd:
24 |         for chunk in req.iter_content(chunk_size=2**20):
25 |             fd.write(chunk)
26 | 
27 | 
28 | def get_data(url, dest_subdir, dest_filename, download_if_missing=True):
29 | 
30 |     data_home = get_data_home()
31 |     data_dir = os.path.join(os.path.abspath(data_home), dest_subdir)
32 | 
33 |     create_data_dir(data_dir)
34 | 
35 |     dest_path = os.path.join(data_dir, dest_filename)
36 | 
37 |     if not os.path.isfile(dest_path):
38 |         if download_if_missing:
39 |             download(url, dest_path)
40 |         else:
41 |             raise IOError('Dataset missing.')
42 | 
43 |     return dest_path
44 | 


--------------------------------------------------------------------------------
/recommender_datasets/amazon.py:
--------------------------------------------------------------------------------
  1 | import array
  2 | import datetime
  3 | import gzip
  4 | from itertools import islice
  5 | import time
  6 | 
  7 | import numpy as np
  8 | 
  9 | from recommender_datasets import _common
 10 | 
 11 | 
 12 | def _read_blocks(path):
 13 | 
 14 |     with gzip.open(path, 'r') as source_file:
 15 | 
 16 |         block = []
 17 | 
 18 |         for line in islice(source_file, 3, None):
 19 |             line = line.decode('utf-8')
 20 |             if line.startswith('Id:'):
 21 |                 if block:
 22 |                     yield block
 23 |                 block = [line.replace('\r\n', '')]
 24 |                 continue
 25 |             else:
 26 |                 block.append(line.replace('\r\n', ''))
 27 | 
 28 | 
 29 | def _parse_category(cat_string):
 30 | 
 31 |     start = cat_string.find('[')
 32 |     stop = cat_string.find(']')
 33 | 
 34 |     return int(cat_string[start + 1:stop])
 35 | 
 36 | 
 37 | def _parse_categories(lines):
 38 | 
 39 |     num_categories = int(lines[0].split(':')[-1].strip())
 40 | 
 41 |     categories = []
 42 | 
 43 |     for category_line in lines[1:1 + num_categories]:
 44 | 
 45 |         cat_strings = category_line.strip().split('|')
 46 |         cat_ids = [_parse_category(x) for x in cat_strings if x]
 47 | 
 48 |         categories += cat_ids
 49 | 
 50 |     return categories
 51 | 
 52 | 
 53 | def _parse_reviews(lines):
 54 | 
 55 |     customer_ids = []
 56 |     ratings = []
 57 |     dates = []
 58 | 
 59 |     for line in lines[1:]:
 60 | 
 61 |         if not line:
 62 |             continue
 63 | 
 64 |         date_stop = line.find('cutomer')
 65 |         year, month, day = line[:date_stop].split('-')
 66 |         dates.append(datetime.date(year=int(year),
 67 |                                    month=int(month),
 68 |                                    day=int(day)))
 69 | 
 70 |         rating_start = line.find('rating')
 71 |         rating_stop = line.find('votes')
 72 |         rating = int(line[rating_start:rating_stop].split(':')[1])
 73 |         ratings.append(rating)
 74 | 
 75 |         customer_ids.append(line[date_stop:rating_start].split(':')[1].strip())
 76 | 
 77 |     return customer_ids, ratings, dates
 78 | 
 79 | 
 80 | def _parse_block(lines):
 81 | 
 82 |     ITEM_ID_LINE = 0
 83 | 
 84 |     item_id = int(lines[ITEM_ID_LINE].split(':')[1]) + 1
 85 |     categories = []
 86 |     user_ids = []
 87 |     ratings = []
 88 |     dates = []
 89 | 
 90 |     for line_num, line in enumerate(lines):
 91 |         if 'categories' in line:
 92 |             categories = _parse_categories(lines[line_num:])
 93 | 
 94 |         if 'reviews:' in line:
 95 |             (user_ids,
 96 |              ratings,
 97 |              dates) = _parse_reviews(lines[line_num:])
 98 | 
 99 |     return (item_id,
100 |             categories,
101 |             user_ids,
102 |             ratings,
103 |             dates)
104 | 
105 | 
106 | def read_amazon_co_purchasing():
107 | 
108 |     path = _common.get_data('https://snap.stanford.edu/data/bigdata/amazon/amazon-meta.txt.gz',
109 |                             'amazon',
110 |                             'amazon_co_purchasing.gz')
111 | 
112 |     user_dict = {}
113 |     feature_dict = {}
114 | 
115 |     interaction_user_ids = array.array('i')
116 |     interaction_item_ids = array.array('i')
117 |     interaction_ratings = array.array('f')
118 |     interaction_timestamps = array.array('f')
119 | 
120 |     feature_item_ids = array.array('i')
121 |     feature_ids = array.array('i')
122 | 
123 |     failed_parses = []
124 |     total_parses = 0
125 | 
126 |     for block in _read_blocks(path):
127 |         total_parses += 1
128 |         try:
129 |             (item_id,
130 |              categories,
131 |              user_ids,
132 |              ratings,
133 |              dates) = _parse_block(block)
134 |         except Exception as e:
135 |             print('Parse failed')
136 |             failed_parses.append((e, block))
137 | 
138 |         user_ids = [user_dict.setdefault(x, len(user_dict))
139 |                     for x in user_ids]
140 | 
141 |         interaction_user_ids.extend(user_ids)
142 |         interaction_item_ids.extend([item_id] * len(user_ids))
143 |         interaction_ratings.extend(ratings)
144 |         interaction_timestamps.extend([int(time.mktime(x.timetuple()))
145 |                                        for x in dates])
146 | 
147 |         categories = [feature_dict.setdefault(x, len(feature_dict))
148 |                       for x in categories]
149 | 
150 |         feature_item_ids.extend([item_id] * len(categories))
151 |         feature_ids.extend(categories)
152 | 
153 |     print('Num of failed parses: {} (out of {})'.format(len(failed_parses),
154 |                                                         total_parses))
155 | 
156 |     return (np.array(interaction_user_ids),
157 |             np.array(interaction_item_ids),
158 |             np.array(interaction_ratings),
159 |             np.array(interaction_timestamps),
160 |             np.array(feature_item_ids),
161 |             np.array(feature_ids))
162 | 


--------------------------------------------------------------------------------
/recommender_datasets/amazon_jmcauley.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | 
 3 | import numpy as np
 4 | 
 5 | from recommender_datasets import _common
 6 | 
 7 | 
 8 | BASE_URL = 'http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/'
 9 | 
10 | VARIANTS = ('books', 'electronics')
11 | 
12 | 
13 | def _to_numpy(data):
14 | 
15 |     uid_map = {}
16 |     iid_map = {}
17 | 
18 |     uids = array.array('i')
19 |     iids = array.array('i')
20 |     ratings = array.array('f')
21 |     timestamps = array.array('f')
22 | 
23 |     for uid, iid, rating, timestamp in data:
24 |         uid = uid_map.setdefault(uid, len(uid_map) + 1)
25 |         iid = iid_map.setdefault(iid, len(iid_map) + 1)
26 | 
27 |         uids.append(uid)
28 |         iids.append(iid)
29 |         ratings.append(rating)
30 |         timestamps.append(timestamp)
31 | 
32 |     return (np.array(uids, dtype=np.int32),
33 |             np.array(iids, dtype=np.int32),
34 |             np.array(ratings, dtype=np.float32),
35 |             np.array(timestamps, dtype=np.float32))
36 | 
37 | 
38 | def _read_data(variant):
39 | 
40 |     file_path = _common.get_data(BASE_URL +
41 |                                  'ratings_{}.csv'.format(variant.title()),
42 |                                  'amazon',
43 |                                  'ratings_{}.csv'.format(variant))
44 | 
45 |     with open(file_path, 'r') as datafile:
46 |         for line in datafile:
47 |             uid, iid, rating, timestamp = line.split(',')
48 | 
49 |             yield uid, iid, float(rating), float(timestamp)
50 | 
51 | 
52 | def read_amazon(variant):
53 | 
54 |     data = _read_data(variant)
55 | 
56 |     uids, iids, ratings, timestamps = _to_numpy(data)
57 | 
58 |     return uids, iids, ratings, timestamps
59 | 


--------------------------------------------------------------------------------
/recommender_datasets/gowalla.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import gzip
 3 | 
 4 | 
 5 | from recommender_datasets import _common
 6 | 
 7 | URL = 'https://snap.stanford.edu/data/loc-gowalla_totalCheckins.txt.gz'
 8 | 
 9 | 
10 | def _read_data(path):
11 | 
12 |     with gzip.GzipFile(path) as datafile:
13 |         for line in datafile:
14 |             (user_id, time, lat,
15 |              lon, locaton_id) = line.decode('utf-8').split('\t')
16 | 
17 |             yield (int(user_id),
18 |                    int(datetime.datetime.strptime(time,
19 |                                                   "%Y-%m-%dT%H:%M:%SZ")
20 |                        .timestamp()),
21 |                    float(lat),
22 |                    float(lon),
23 |                    int(locaton_id))
24 | 
25 | 
26 | def read_gowalla():
27 | 
28 |     zip_path = _common.get_data(URL,
29 |                                 'gowalla',
30 |                                 'gowalla.txt.gz')
31 | 
32 |     for line in _read_data(zip_path):
33 |         yield line
34 | 


--------------------------------------------------------------------------------
/recommender_datasets/movielens.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import os
 3 | import zipfile
 4 | 
 5 | from recommender_datasets import _common
 6 | 
 7 | 
 8 | URL_PREFIX = 'http://files.grouplens.org/datasets/movielens/'
 9 | URL_100K = 'ml-100k.zip'
10 | URL_1M = 'ml-1m.zip'
11 | URL_10M = 'ml-10m.zip'
12 | URL_20M = 'ml-20m.zip'
13 | 
14 | 
15 | def _read_data(path, archive_path):
16 | 
17 |     with zipfile.ZipFile(path) as archive:
18 |         with archive.open(archive_path) as datafile:
19 |             for line in datafile:
20 |                 yield line.decode('utf-8')
21 | 
22 | 
23 | def _parse_line(line, separator='::'):
24 | 
25 |     uid, iid, rating, timestamp = line.split(separator)
26 | 
27 |     return (int(uid), int(iid), float(rating), int(timestamp))
28 | 
29 | 
30 | def _make_contiguous(data, separator):
31 | 
32 |     user_map = {}
33 |     item_map = {}
34 | 
35 |     for line in data:
36 |         uid, iid, rating, timestamp = _parse_line(line, separator=separator)
37 | 
38 |         uid = user_map.setdefault(uid, len(user_map) + 1)
39 |         iid = item_map.setdefault(iid, len(item_map) + 1)
40 | 
41 |         yield uid, iid, rating, timestamp
42 | 
43 | 
44 | def read_movielens_100K():
45 | 
46 |     zip_path = _common.get_data(URL_PREFIX + URL_100K,
47 |                                 'movielens',
48 |                                 'movielens_100k.zip')
49 | 
50 |     archive_path = os.path.join('ml-100k', 'u.data')
51 | 
52 |     for line in _read_data(zip_path, archive_path):
53 |         yield _parse_line(line, separator='\t')
54 | 
55 | 
56 | def read_movielens_1M():
57 | 
58 |     zip_path = _common.get_data(URL_PREFIX + URL_1M,
59 |                                 'movielens',
60 |                                 'movielens_1M.zip')
61 | 
62 |     archive_path = os.path.join('ml-1m', 'ratings.dat')
63 | 
64 |     data = _read_data(zip_path, archive_path)
65 | 
66 |     for line in _make_contiguous(data, separator='::'):
67 |         yield line
68 | 
69 | 
70 | def read_movielens_10M():
71 | 
72 |     zip_path = _common.get_data(URL_PREFIX + URL_10M,
73 |                                 'movielens',
74 |                                 'movielens_10M.zip')
75 | 
76 |     archive_path = os.path.join('ml-10M100K', 'ratings.dat')
77 | 
78 |     data = _read_data(zip_path, archive_path)
79 | 
80 |     for line in _make_contiguous(data, separator='::'):
81 |         yield line
82 | 
83 | 
84 | def read_movielens_20M():
85 | 
86 |     zip_path = _common.get_data(URL_PREFIX + URL_20M,
87 |                                 'movielens',
88 |                                 'movielens_20M.zip')
89 | 
90 |     archive_path = os.path.join('ml-20m', 'ratings.csv')
91 | 
92 |     data = itertools.islice(_read_data(zip_path, archive_path), 1, None)
93 | 
94 |     for line in _make_contiguous(data, separator=','):
95 |         yield line
96 | 


--------------------------------------------------------------------------------
/recommender_datasets/output.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import os
 3 | import zipfile
 4 | 
 5 | import h5py
 6 | import numpy as np
 7 | 
 8 | from recommender_datasets import _common
 9 | 
10 | 
11 | SEPARATOR = ','
12 | 
13 | 
14 | def _array_from_dtype(dtype):
15 | 
16 |     if dtype == np.float32:
17 |         return array.array('f')
18 |     else:
19 |         return array.array('i')
20 | 
21 | 
22 | def _to_numpy(data, dtypes):
23 | 
24 |     arrays = tuple(_array_from_dtype(x)
25 |                    for x in dtypes)
26 | 
27 |     for row in data:
28 |         for (arr, elem) in zip(arrays, row):
29 |             arr.append(elem)
30 | 
31 |     return tuple(np.array(arr, dtype=dtype)
32 |                  for (arr, dtype) in
33 |                  zip(arrays, dtypes))
34 | 
35 | 
36 | def _serialize(row):
37 | 
38 |     row = [str(x) for x in row]
39 | 
40 |     return (SEPARATOR.join(row) + '\n').encode('utf-8')
41 | 
42 | 
43 | def write_csv_data(filename, data,
44 |                    header=('user_id', 'item_id', 'rating', 'timestamp')):
45 | 
46 |     output_dir = os.path.join(_common.get_data_home(), 'output')
47 | 
48 |     if not os.path.exists(output_dir):
49 |         os.makedirs(output_dir)
50 | 
51 |     path = os.path.join(output_dir, filename) + '.zip'
52 | 
53 |     with zipfile.ZipFile(path, mode='w') as archive:
54 |         with archive.open('data.csv', mode='w') as output_file:
55 |             output_file.write(_serialize(header))
56 | 
57 |             for row in data:
58 |                 output_file.write(_serialize(row))
59 | 
60 | 
61 | def write_hdf5_data(filename, data,
62 |                     header=('user_id',
63 |                             'item_id',
64 |                             'rating',
65 |                             'timestamp'),
66 |                     dtype=(np.int32,
67 |                            np.int32,
68 |                            np.float32,
69 |                            np.int32)):
70 | 
71 |     if not isinstance(data[0], np.ndarray):
72 |         arrays = _to_numpy(data, dtype)
73 |     else:
74 |         arrays = data
75 | 
76 |     output_dir = os.path.join(_common.get_data_home(), 'output')
77 | 
78 |     if not os.path.exists(output_dir):
79 |         os.makedirs(output_dir)
80 | 
81 |     path = os.path.join(output_dir, filename + '.hdf5')
82 | 
83 |     with h5py.File(path, "w") as archive:
84 |         for (arr, name) in zip(arrays, header):
85 |             archive.create_dataset(name, data=arr, compression='gzip')
86 | 


--------------------------------------------------------------------------------
/recommender_datasets/verification.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | 
 4 | import h5py
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | 
 9 | from recommender_datasets import _common
10 | 
11 | 
12 | def _read_csv(fname, columns=('user_id', 'item_id', 'rating', 'timestamp')):
13 | 
14 |     with zipfile.ZipFile(fname, mode='r') as archive:
15 |         with archive.open('data.csv') as datafile:
16 |             df = pd.read_csv(datafile)
17 |             data = df[list(columns)]
18 | 
19 |     return data
20 | 
21 | 
22 | def _read_hdf5(fname, columns=('user_id', 'item_id', 'rating', 'timestamp')):
23 | 
24 |     with h5py.File(fname, 'r') as data:
25 |         return {column: data['/{}'.format(column)][:]
26 |                 for column in columns}
27 | 
28 | 
29 | def verify(path, columns=('user_id', 'item_id', 'rating', 'timestamp')):
30 | 
31 |     output_dir = os.path.join(_common.get_data_home(), 'output')
32 |     path = os.path.join(output_dir, path)
33 | 
34 |     csv_data = _read_csv(path + '.zip', columns)
35 |     hdf5data = _read_hdf5(path + '.hdf5', columns)
36 | 
37 |     return all(
38 |         np.all(csv_data[column] == hdf5data[column])
39 |         for column in columns
40 |     )
41 | 


--------------------------------------------------------------------------------
/recommender_datasets/yoochoose.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import datetime
 3 | import os
 4 | import subprocess
 5 | import time
 6 | 
 7 | import numpy as np
 8 | 
 9 | from recommender_datasets import _common
10 | 
11 | 
12 | def _to_numpy(data):
13 | 
14 |     uid_map = {}
15 |     iid_map = {}
16 | 
17 |     uids = array.array('i')
18 |     iids = array.array('i')
19 |     timestamps = array.array('f')
20 | 
21 |     for uid, iid, timestamp in data:
22 |         uid = uid_map.setdefault(uid, len(uid_map) + 1)
23 |         iid = iid_map.setdefault(iid, len(iid_map) + 1)
24 | 
25 |         uids.append(uid)
26 |         iids.append(iid)
27 |         timestamps.append(timestamp)
28 | 
29 |     return (np.array(uids, dtype=np.int32),
30 |             np.array(iids, dtype=np.int32),
31 |             np.array(timestamps, dtype=np.float32))
32 | 
33 | 
34 | def _read_data(variant):
35 | 
36 |     zip_path = _common.get_data('https://s3-eu-west-1.amazonaws.com/'
37 |                                 'yc-rdata/yoochoose-data.7z',
38 |                                 'yoochoose',
39 |                                 'yoochoose.7z')
40 | 
41 |     dest_dir = os.path.dirname(zip_path)
42 | 
43 |     for suffix in ('buys', 'clicks'):
44 |         if not os.path.exists(os.path.join(
45 |                 dest_dir, 'yoochoose-{}.dat'.format(suffix))):
46 |             subprocess.check_call(['7z',
47 |                                    '-o{}'.format(dest_dir),
48 |                                    'x',
49 |                                    zip_path])
50 | 
51 |     fname = os.path.join(dest_dir, 'yoochoose-{}.dat'.format(variant))
52 |     with open(fname, 'r') as datafile:
53 |         for line in datafile:
54 |             uid, timestamp, iid = line.split(',')[:3]
55 | 
56 |             timestamp = time.mktime(
57 |                 datetime.datetime.strptime(timestamp,
58 |                                            '%Y-%m-%dT%H:%M:%S.%fZ')
59 |                 .timetuple())
60 | 
61 |             yield int(uid), int(iid), timestamp
62 | 
63 | 
64 | def read_yoochoose(variant):
65 | 
66 |     data = _read_data(variant)
67 | 
68 |     uids, iids, timestamps = _to_numpy(data)
69 | 
70 |     return uids, iids, timestamps
71 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='recommender_datasets',
 6 |     version='0.1.0',
 7 |     requirements=['click', 'numpy', 'scipy', 'requests', 'h5py'],
 8 |     packages=['recommender_datasets'],
 9 |     license='MIT',
10 |     classifiers=['Development Status :: 3 - Alpha',
11 |                  'License :: OSI Approved :: MIT License',
12 |                  'Topic :: Scientific/Engineering :: Artificial Intelligence'],
13 | )
14 | 


--------------------------------------------------------------------------------