├── .gitignore
├── LICENSE
├── Readme.md
├── __init__.py
├── api
    ├── action.py
    ├── catalog.py
    ├── item.py
    ├── items_ranking_request.py
    ├── sequential_dataset.py
    └── user.py
├── datasets
    ├── __init__.py
    ├── amazon.py
    ├── beauty.py
    ├── bert4rec_datasets.py
    ├── booking.py
    ├── dataset_stats.py
    ├── dataset_utils.py
    ├── datasets_register.py
    ├── download_file.py
    ├── gowalla.py
    ├── movielens100k.py
    ├── movielens20m.py
    ├── movielens25m.py
    ├── movies_dataset.py
    ├── mts_kion.py
    ├── netflix.py
    └── yelp.py
├── docker
    ├── .bashrc
    ├── .gitignore
    ├── .vimrc
    ├── Dockerfile
    ├── cuda-archive-keyring.gpg
    ├── requirements.txt
    └── sources.list
├── evaluation
    ├── .gitignore
    ├── __init__.py
    ├── analyze_experiment_in_progress.py
    ├── analyze_results.py
    ├── clean_output.cpp
    ├── conf_intervals.py
    ├── configs
    │   ├── ML1M-bpr-example.py
    │   ├── __init__.py
    │   └── gsasrec
    │   │   ├── common_benchmark_config.py
    │   │   ├── gowalla_benchmark.py
    │   │   ├── ml1m_benchmark.py
    │   │   └── steam_benchmark.py
    ├── dataset_by_config.py
    ├── evaluate_recommender.py
    ├── evaluation_utils.py
    ├── experiment_to_excel.py
    ├── filter_cold_start.py
    ├── metrics
    │   ├── __init__.py
    │   ├── average_popularity_rank.py
    │   ├── entropy.py
    │   ├── highest_score.py
    │   ├── hit.py
    │   ├── map.py
    │   ├── metric.py
    │   ├── model_confidence.py
    │   ├── mrr.py
    │   ├── ndcg.py
    │   ├── pairwise_cos_sim.py
    │   ├── precision.py
    │   ├── recall.py
    │   └── sampled_proxy_metric.py
    ├── n_actions_for_user.py
    ├── run_all_b4rec_originals.sh
    ├── run_experiment.py
    ├── run_n_experiments.sh
    ├── samplers
    │   ├── pop_sampler.py
    │   ├── random_sampler.py
    │   └── sampler.py
    ├── split_actions.py
    ├── statistical_signifficance_test.py
    └── two_predictions_signficance_test.py
├── losses
    ├── __init__.py
    ├── bce.py
    ├── bpr.py
    ├── climf.py
    ├── get_loss.py
    ├── items_masking_loss_proxy.py
    ├── lambda_gamma_rank.py
    ├── logit_norm.py
    ├── loss.py
    ├── loss_utils.py
    ├── mean_ypred_loss.py
    ├── softmax_crossentropy.py
    ├── top1.py
    └── xendcg.py
├── recommenders
    ├── BERT4rec
    │   ├── LICENSE
    │   ├── Readme.md
    │   ├── __init__.py
    │   ├── gen_data_fin.py
    │   ├── modeling.py
    │   ├── optimization.py
    │   ├── run.py
    │   ├── util.py
    │   └── vocab.py
    ├── __init__.py
    ├── conditional_top_recommender.py
    ├── constant_recommender.py
    ├── deep_mf.py
    ├── duorec
    │   └── duorec.py
    ├── featurizer.py
    ├── filter_seen_recommender.py
    ├── first_order_mc.py
    ├── item_item.py
    ├── kion_challenge_featurizer.py
    ├── lambdamart_ensemble_recommender.py
    ├── lightfm.py
    ├── matrix_factorization.py
    ├── metrics
    │   ├── __init__.py
    │   ├── ndcg.py
    │   └── success.py
    ├── mlp.py
    ├── mlp_historical.py
    ├── random_recommender.py
    ├── recommender.py
    ├── sequential
    │   ├── __init__.py
    │   ├── data_generator
    │   │   └── data_generator.py
    │   ├── featurizers
    │   │   ├── __init__.py
    │   │   └── hashing_featurizer.py
    │   ├── history_vectorizers
    │   │   ├── add_mask_history_vectorizer.py
    │   │   ├── default_history_vectorizer.py
    │   │   └── history_vectorizer.py
    │   ├── model_trainier.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── bert4rec
    │   │   │   ├── __init__.py
    │   │   │   ├── bert4recft.py
    │   │   │   ├── full_bert.py
    │   │   │   └── special_items.py
    │   │   ├── caser.py
    │   │   ├── gru4rec.py
    │   │   ├── positional_encodings.py
    │   │   ├── sasrec
    │   │   │   ├── __init__.py
    │   │   │   ├── sasrec.py
    │   │   │   └── sasrec_multihead_attention.py
    │   │   ├── sequential_recsys_model.py
    │   │   └── vit4rec.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── idf_sampler.py
    │   │   ├── popularity_sampler.py
    │   │   ├── random_sampler.py
    │   │   └── sampler.py
    │   ├── sequential_recommender.py
    │   ├── sequential_recommender_config.py
    │   ├── target_builders
    │   │   ├── full_matrix_targets_builder.py
    │   │   ├── items_masking_target_builder.py
    │   │   ├── negative_per_positive_target.py
    │   │   ├── positives_only_targets_builder.py
    │   │   ├── positives_sequence_target_builder.py
    │   │   ├── sampled_matrix_target_builder.py
    │   │   └── target_builders.py
    │   └── targetsplitters
    │   │   ├── fair_item_masking.py
    │   │   ├── items_masking.py
    │   │   ├── last_item_splitter.py
    │   │   ├── random_fraction_splitter.py
    │   │   ├── random_splitter.py
    │   │   ├── recency_sequence_sampling.py
    │   │   ├── shifted_sequence_splitter.py
    │   │   └── targetsplitter.py
    ├── svd.py
    ├── top_recommender.py
    ├── transition_chain_recommender.py
    └── vanilla_bert4rec.py
├── tests
    ├── .gitignore
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── booking_dataset_reference_actions.json
    │   ├── mts_kion_reference_actions.json
    │   ├── test_beauty_dataset.py
    │   ├── test_bert4rec_datasets.py
    │   ├── test_booking_dataset.py
    │   ├── test_datasets_register.py
    │   ├── test_filter_cold_users.py
    │   ├── test_get_movielens.py
    │   ├── test_gowalla_dataset.py
    │   ├── test_mts_kion_dataset.py
    │   ├── test_netflix.py
    │   └── test_yelp_dataset.py
    ├── generate_actions.py
    ├── lossess
    │   ├── __init__.py
    │   ├── bce_bad_sample.py
    │   ├── test_bce_loss.py
    │   ├── test_bpr_loss.py
    │   ├── test_climf_loss.py
    │   ├── test_items_masking_proxy_loss.py
    │   ├── test_lambdarank_loss.py
    │   ├── test_lambdarank_time.py
    │   ├── test_logit_norm.py
    │   ├── test_softmax_crossentropy.py
    │   ├── test_top1loss.py
    │   └── test_xendcg_loss.py
    ├── metrics
    │   ├── __init__.py
    │   ├── test_map.py
    │   ├── test_mrr.py
    │   ├── test_ndcg.py
    │   ├── test_pairwise_cos_sim.py
    │   ├── test_precision.py
    │   └── test_proxy_metric.py
    ├── misc
    │   ├── __init__.py
    │   ├── test_configs.py
    │   ├── test_evaluate_recommender.py
    │   ├── test_item_id.py
    │   ├── test_keras_ndcg.py
    │   ├── test_kion_challenge_featurizer.py
    │   ├── test_n_actions_for_user.py
    │   ├── test_recommender_evaluator.py
    │   └── test_split_actions.py
    ├── ml_sequences.py
    └── recommenders
    │   ├── __init__.py
    │   ├── baselines
    │       ├── __init__.py
    │       ├── test_conditional_top_recommender.py
    │       ├── test_constnat_recommender.py
    │       ├── test_deepmf.py
    │       ├── test_filter_seen_recommender.py
    │       ├── test_first_order_mc_recommender.py
    │       ├── test_item_item_recommender.py
    │       ├── test_lightfm_recommender.py
    │       ├── test_matrix_factorization_recommender.py
    │       ├── test_mlp_historical.py
    │       ├── test_mlp_recommender.py
    │       ├── test_svd_recommender.py
    │       ├── test_top_recommender.py
    │       └── test_transition_chain_recommender.py
    │   ├── sequential
    │       ├── __init__.py
    │       ├── bert4rec
    │       │   ├── __init__.py
    │       │   ├── test_bert4rec_ft.py
    │       │   └── test_full_bert.py
    │       ├── sasrec
    │       │   ├── __init__.py
    │       │   ├── test_positional_encoding.py
    │       │   ├── test_sasrec_attention_map.py
    │       │   ├── test_sasrec_full_target.py
    │       │   ├── test_sasrec_model.py
    │       │   ├── test_sasrec_no_embedding_reuse.py
    │       │   ├── test_sasrec_rss.py
    │       │   ├── test_sasrec_save_model.py
    │       │   └── test_vanilla_sasrec.py
    │       ├── test_add_mask_vectorizer.py
    │       ├── test_caser_no_uid.py
    │       ├── test_gru_model.py
    │       ├── test_items_masking_target_builder.py
    │       └── test_target_splitters.py
    │   ├── test_lambdamart_ensemble_recommender.py
    │   └── test_vanilla_bert4rec.py
├── ui
    ├── config.py
    ├── server.py
    └── static
    │   ├── app.js
    │   ├── index.html
    │   ├── typeahead.css
    │   └── typeahead.js
└── utils
    ├── generator_limit.py
    ├── item_id.py
    └── os_utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | data
3 | .idea/
4 | evaluation/results/
5 | evaluation/results
6 | *__pycache__/
7 | *.ipynb_checkpoints/
8 | *.DS_Store
9 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/__init__.py


--------------------------------------------------------------------------------
/api/action.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | class Action(object):
 3 |     def __init__(self, user_id, item_id, timestamp, data=None):
 4 |         if data is None:
 5 |             data = dict()
 6 |         self.user_id = user_id
 7 |         self.item_id = item_id
 8 |         self.data = data
 9 |         self.timestamp = timestamp
10 | 
11 |     def to_str(self):
12 |        result = "Action(uid={}, item={}, ts={}".format(
13 |                     self.user_id, 
14 |                     self.item_id, 
15 |                     self.timestamp)
16 |        if self.data != {}:
17 |            result += ", data={}".format(str(self.data))
18 |        result += ")"
19 |        return result
20 | 
21 |     def to_json(self):
22 |         try:
23 |             #check if data is json serializable
24 |             json.dumps(self.data)
25 |             data = self.data
26 | 
27 |         except:
28 |             #fallback to just string representation
29 |             #TODO: restore may work incorrectly with some datasets
30 |             data = str(self.data)
31 | 
32 |         return json.dumps({
33 |             "user_id": self.user_id,
34 |             "item_id": self.item_id,
35 |             "data": data,
36 |             "timestamp": self.timestamp
37 |         })
38 | 
39 |     @staticmethod
40 |     def from_json(action_str):
41 |         doc = json.loads(action_str)
42 |         return Action(doc["user_id"], doc["item_id"], doc["data"], doc["timestamp"])
43 | 
44 |     def __str__(self):
45 |         return self.to_str()
46 |         
47 |     def __repr__(self):
48 |         return self.to_str()
49 |         
50 | 


--------------------------------------------------------------------------------
/api/catalog.py:
--------------------------------------------------------------------------------
 1 | def relevancy (keyword, string):
 2 |     if keyword.lower() == string.lower():
 3 |         return -1
 4 |     return keyword.lower().find(string.lower())
 5 | 
 6 | class Catalog(object):
 7 |     def __init__(self):
 8 |         self.items = {}
 9 | 
10 |     def add_item(self, item):
11 |         self.items[item.item_id] = item
12 | 
13 |     def get_item(self, item_id):
14 |         return self.items[item_id]
15 | 
16 |     def search(self, keyword):
17 |         result = []
18 |         for item in self.items.values():
19 |             if keyword.lower() in item.title.lower():
20 |                 result.append(item)
21 |         result.sort(key=lambda value: relevancy(keyword, value.title))
22 |         return result
23 | 
24 | 


--------------------------------------------------------------------------------
/api/item.py:
--------------------------------------------------------------------------------
 1 | class Item(object):
 2 |     tags = None
 3 |     title = None
 4 | 
 5 |     def __init__(self, item_id, cat_features=None, real_features=None):
 6 |         if real_features is None:
 7 |             real_features = []
 8 | 
 9 |         if cat_features is None:
10 |             cat_features = []
11 | 
12 |         self.item_id = item_id
13 |         self.cat_features = cat_features
14 |         self.real_features = real_features
15 | 
16 | 
17 |     def with_tags(self, tags):
18 |         self.tags = tags
19 |         return self
20 | 
21 |     def with_title(self, title):
22 |         self.title = title
23 |         return self
24 | 
25 |     def __str__(self):
26 |         return "item id={} title={} tags={}".format(self.item_id, self.title, self.tags)
27 | 
28 |     def __repr__(self):
29 |         return self.__str__()
30 | 


--------------------------------------------------------------------------------
/api/items_ranking_request.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | class ItemsRankingRequest(object):
 5 |     def __init__(self, user_id, item_ids: List):
 6 |         self.user_id = user_id
 7 |         self.item_ids = item_ids
 8 | 
 9 |     def __str__(self):
10 |         return f"user_id={self.user_id} item_ids=[{','.join(self.item_ids)}]"
11 | 
12 |     def __repr__(self):
13 |         return self.__str__()
14 | 


--------------------------------------------------------------------------------
/api/sequential_dataset.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from pathlib import PosixPath
 3 | from typing import List
 4 | 
 5 | import numpy as np
 6 | from aprec.api.action import Action
 7 | from aprec.utils.item_id import ItemId
 8 | from aprec.utils.os_utils import mkdir_p
 9 | 
10 | 
11 | class MapedSequences(object):
12 |     ALL_SEQUENCES = 'all_sequences.mmap'
13 |     BORDERS = 'borders.mmap'
14 | 
15 |     def __init__(self, directory, n_users, n_items):
16 |         self.directory = directory
17 |         self.is_maped = False
18 |         
19 |         self.sequences = None
20 |         self.borders = None
21 |         
22 |     @staticmethod
23 |     def build(user_actions, n_users, n_items, directory:PosixPath):
24 |         all_sequences = []
25 |         borders = []
26 |         for i in range(n_users):
27 |             user_sequence = []
28 |             for action in user_actions[i]:
29 |                 item = action[0]
30 |                 user_sequence.append(item)
31 |             all_sequences += user_sequence
32 |             borders.append(len(all_sequences))
33 |         all_sequences = np.array(all_sequences, dtype='int32')
34 |         sequences_map = np.memmap(directory/MapedSequences.ALL_SEQUENCES, shape=all_sequences.shape, dtype='int32', mode="write")
35 |         sequences_map[:] = all_sequences[:]
36 |         sequences_map.flush()
37 |         borders = np.array(borders, dtype='int32')
38 |         borders_map = np.memmap(directory/MapedSequences.ALL_SEQUENCES, shape=all_sequences.shape, dtype='int32', mode="write")
39 |         borders_map.flush()
40 | 
41 | class SequentialDataset(object):
42 |     def __init__(self):
43 |         self.user_mapping = ItemId()
44 |         self.item_mapping = ItemId()
45 |         self.user_actions = defaultdict(list)
46 |         self.is_sorted = True
47 |         
48 |     def add_action(self, action):
49 |         user_id = self.user_mapping.get_id(action.user_id)    
50 |         item_id = self.item_mapping.get_id(action.item_id)
51 | 
52 |         if self.user_actions.has(user_id) and self.user_actions[user_id][-1].timestamp > action.timestamp:
53 |             self.is_sorted = False
54 | 
55 |         self.user_actions[user_id].append((item_id, action.timestamp))
56 |         
57 |     def sort(self):
58 |         if not self.is_sorted:
59 |             for user in self.user_actions:
60 |                 self.user_actions[user].sort(lambda a: a.timestamp)
61 |             self.is_sorted = True
62 | 
63 |     
64 |     


--------------------------------------------------------------------------------
/api/user.py:
--------------------------------------------------------------------------------
 1 | class User(object):
 2 |     def __init__(self, user_id, cat_features=None, real_features=None):
 3 |         if real_features is None:
 4 |             real_features = dict()
 5 | 
 6 |         if cat_features is None:
 7 |             cat_features = dict()
 8 | 
 9 |         self.user_id = user_id
10 |         self.cat_features = cat_features
11 |         self.real_features = real_features
12 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/datasets/__init__.py


--------------------------------------------------------------------------------
/datasets/amazon.py:
--------------------------------------------------------------------------------
 1 | from aprec.api.action import Action
 2 | from aprec.datasets.download_file import download_file
 3 | 
 4 | 
 5 | URLS = {"books": "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Books.csv"}
 6 | DATA_DIR = "data/amazon"
 7 | 
 8 | def download(category):
 9 |     filename = download_file(URLS[category], f"{category}.csv", DATA_DIR)
10 |     return filename
11 | 
12 | def get_amazon_actions(category):
13 |     filename = download(category)
14 |     result = []
15 |     for line in open(filename):
16 |         user_id, item_id, rating, timestamp = line.strip().split(",")
17 |         rating = float(rating)
18 |         timestamp = int(timestamp)
19 |         result.append(Action(user_id, item_id, timestamp, {"rating": rating}))
20 |     return result


--------------------------------------------------------------------------------
/datasets/beauty.py:
--------------------------------------------------------------------------------
 1 | from aprec.api.action import Action
 2 | from aprec.datasets.download_file import download_file
 3 | 
 4 | dataset_url="http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Beauty.csv"
 5 | dataset = "ratings.csv"
 6 | dir = "data/beauty"
 7 | 
 8 | def get_beauty_dataset():
 9 |     dataset_filename = download_file(dataset_url, dataset, dir)
10 |     actions = []
11 |     with open(dataset_filename) as input:
12 |         for line in input:
13 |             user, item, rating, timestamp = line.strip().split(",")
14 |             timestamp = int(timestamp)
15 |             actions.append(Action(user, item, timestamp))
16 |     return actions


--------------------------------------------------------------------------------
/datasets/bert4rec_datasets.py:
--------------------------------------------------------------------------------
 1 | from aprec.api.action import Action
 2 | from aprec.datasets.download_file import download_file
 3 | 
 4 | BERT4REC_DATASET_URL="https://raw.githubusercontent.com/asash/BERT4rec_py3_tf2/master/BERT4rec/data/{}.txt"
 5 | BERT4REC_DIR = "data/bert4rec"
 6 | VALID_DATASETS={"beauty", "ml-1m", "steam"}
 7 | 
 8 | def get_bert4rec_dataset(dataset):
 9 |     if dataset not in VALID_DATASETS:
10 |         raise ValueError(f"unknown bert4rec dataset {dataset}")
11 |     dataset_filename = download_file(BERT4REC_DATASET_URL.format(dataset), dataset + ".txt", BERT4REC_DIR)
12 |     actions = []
13 |     prev_user = None
14 |     current_timestamp = 0
15 |     with open(dataset_filename) as input:
16 |         for line in input:
17 |             user, item = [str(id) for id in line.strip().split()]
18 |             if user != prev_user:
19 |                 current_timestamp = 0
20 |             prev_user = user
21 |             current_timestamp += 1
22 |             actions.append(Action(user, item, current_timestamp))
23 |     return actions


--------------------------------------------------------------------------------
/datasets/dataset_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import gzip
 3 | import logging
 4 | import os
 5 | import mmh3
 6 | 
 7 | from aprec.utils.os_utils import get_dir, mkdir_p, shell
 8 | 
 9 | 
10 | 
11 | def filter_popular_items(actions_generator, max_actions):
12 |     actions = []
13 |     items_counter = Counter()
14 |     for action in actions_generator:
15 |         actions.append(action)
16 |         items_counter[action.item_id] += 1
17 |     popular_items = set([item_id for (item_id, cnt) in items_counter.most_common(max_actions)])
18 |     return filter(lambda action: action.item_id in popular_items, actions)
19 | 
20 | def filter_cold_users(actions_generator, min_actions_per_user = 0):
21 |     actions = []
22 |     user_counter = Counter()
23 |     for action in actions_generator:
24 |         actions.append(action)
25 |         user_counter[action.user_id] += 1
26 |     return filter(lambda action: user_counter[action.user_id] >= min_actions_per_user, actions)
27 | 
28 | def take_user_fraction(actions_generator, fraction):
29 |     return filter(lambda action: abs(mmh3.hash(action.user_id) / 2**31) < fraction, actions_generator)
30 | 
31 | def unzip(zipped_file, unzip_dir):
32 |     full_dir_name = os.path.join(get_dir(), unzip_dir)
33 |     if os.path.isdir(full_dir_name):
34 |         logging.info(f"{unzip_dir} already exists, skipping")
35 |     else:
36 |         mkdir_p(full_dir_name)
37 |         shell(f"unzip -o {zipped_file} -d {full_dir_name}")
38 |     return full_dir_name
39 | 
40 | def gunzip(gzip_file):
41 |     full_file_name = os.path.abspath(gzip_file)
42 |     if not(gzip_file.endswith(".gz")):
43 |         raise Exception(f"{gzip_file} is not a gzip file")
44 |     unzipped_file_name = full_file_name[:-3]
45 |     if os.path.isfile(unzipped_file_name):
46 |         logging.info(f"{unzipped_file_name} already exists, skipping")
47 |         return unzipped_file_name
48 | 
49 |     with gzip.open(full_file_name) as input:
50 |         data = input.read()
51 |         with open(unzipped_file_name, 'wb') as output:
52 |             output.write(data)
53 |     return unzipped_file_name 
54 |     


--------------------------------------------------------------------------------
/datasets/download_file.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import requests
 5 | from tqdm import tqdm
 6 | 
 7 | from aprec.utils.os_utils import mkdir_p_local, get_dir
 8 | def download_file(url, filename, data_dir):
 9 |     mkdir_p_local(data_dir)
10 |     full_filename = os.path.join(get_dir(), data_dir, filename)
11 |     if not os.path.isfile(full_filename):
12 |         logging.info(f"downloading  {filename} file")
13 |         response = requests.get(url, stream=True)
14 |         with open(full_filename, 'wb') as out_file:
15 |             expected_length = int(response.headers.get('content-length'))
16 |             downloaded_bytes = 0
17 |             with tqdm(total=expected_length, ascii=True) as pbar:
18 |                 for chunk in response.iter_content(chunk_size=1024):
19 |                     out_file.write(chunk)
20 |                     out_file.flush()
21 |                     pbar.update(len(chunk))
22 |         logging.info(f"{filename} dataset downloaded")
23 |     else:
24 |         logging.info(f"booking {filename} file already exists, skipping")
25 |     return full_filename


--------------------------------------------------------------------------------
/datasets/gowalla.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import time
 3 | import dateutil.parser
 4 | 
 5 | from aprec.datasets.dataset_utils import gunzip
 6 | from aprec.datasets.download_file import download_file
 7 | from aprec.api.action import Action
 8 | 
 9 | 
10 | GOWALLA_DATASET_URL='https://snap.stanford.edu/data/loc-gowalla_totalCheckins.txt.gz'
11 | DIR="data/gowalla"
12 | GOWALLA_GZIPPED="gowalla.txt.gz"
13 | 
14 | def prepare_data():
15 |     gowalla_file_zipped = download_file(GOWALLA_DATASET_URL,GOWALLA_GZIPPED, DIR)
16 |     unzipped_gowalla_file = gunzip(gowalla_file_zipped)
17 |     return unzipped_gowalla_file
18 | 
19 | def parse_line(line):
20 |     user_id, timestamp, lat, lon, item_id = line.split("\t")
21 |     timestamp = time.mktime(dateutil.parser.isoparse(timestamp).timetuple())
22 |     return Action(user_id, item_id, timestamp)
23 | 
24 | def get_gowalla_dataset(max_actions=None):
25 |     dataset_file = prepare_data()
26 |     actions = []
27 |     for line in open(dataset_file):
28 |         actions.append(parse_line(line.strip()))
29 |         if max_actions is not None and len(actions) >= max_actions:
30 |             break
31 |     return actions


--------------------------------------------------------------------------------
/datasets/movielens100k.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | from aprec.utils.os_utils import get_dir, console_logging, shell
 5 | from aprec.api.action import Action
 6 | from aprec.datasets.download_file import download_file
 7 | from requests.exceptions import ConnectionError
 8 | 
 9 | DATASET_NAME = 'ml-100k'
10 | MOVIELENS_URL = "http://files.grouplens.org/datasets/movielens/{}.zip".format(DATASET_NAME)
11 | MOVIELENS_BACKUP_URL = "https://web.archive.org/web/20220128015818/https://files.grouplens.org/datasets/movielens/ml-100k.zip"
12 | MOVIELENS_DIR = "data/movielens100k"
13 | MOVIELENS_FILE = "movielens.zip"
14 | MOVIELENS_FILE_ABSPATH = os.path.join(get_dir(), MOVIELENS_DIR, MOVIELENS_FILE)
15 | MOVIELENS_DIR_ABSPATH = os.path.join(get_dir(), MOVIELENS_DIR)
16 | RATINGS_FILE = os.path.join(MOVIELENS_DIR_ABSPATH, 'u.data')
17 | 
18 | 
19 | def extract_movielens_dataset():
20 |     if os.path.isfile(RATINGS_FILE):
21 |         logging.info("movielens dataset is already extracted")
22 |         return
23 |     shell("unzip -o {} -d {}".format(MOVIELENS_FILE_ABSPATH, MOVIELENS_DIR_ABSPATH))
24 |     dataset_dir = os.path.join(MOVIELENS_DIR_ABSPATH, DATASET_NAME)
25 |     for filename in os.listdir(dataset_dir):
26 |         shell("mv {} {}".format(os.path.join(dataset_dir, filename), MOVIELENS_DIR_ABSPATH))
27 |     shell("rm -rf {}".format(dataset_dir))
28 | 
29 | 
30 | def prepare_data():
31 |     try:
32 |         download_file(MOVIELENS_URL,  MOVIELENS_FILE, MOVIELENS_DIR)
33 |     except ConnectionError:
34 |         download_file(MOVIELENS_BACKUP_URL,  MOVIELENS_FILE, MOVIELENS_DIR)
35 |         
36 |     extract_movielens_dataset()
37 | 
38 | 
39 | def get_movielens100k_actions(min_rating=4.0):
40 |     prepare_data()
41 |     with open(RATINGS_FILE, 'r') as data_file:
42 |         i = 0
43 |         for line in data_file:
44 |             i += 1
45 |             user_id, movie_id, rating_str, timestamp_str = line.strip().split('\t')
46 |             rating = float(rating_str)
47 |             timestamp = int(timestamp_str)
48 |             if rating >= min_rating:
49 |                 yield Action(user_id, movie_id, timestamp, {"rating": rating})
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     console_logging()
54 |     prepare_data()
55 | 


--------------------------------------------------------------------------------
/docker/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | 


--------------------------------------------------------------------------------
/docker/.vimrc:
--------------------------------------------------------------------------------
 1 | set nu
 2 | colorscheme elflord
 3 | 
 4 | filetype plugin on
 5 | set expandtab
 6 | set tabstop=4
 7 | set smarttab
 8 | set shiftwidth=4
 9 | set smartindent
10 | set nocompatible
11 | syntax on
12 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | from ubuntu:jammy
 2 | run unset https_proxy
 3 | add ./sources.list /etc/apt/sources.list
 4 | add ./cuda-archive-keyring.gpg /usr/share/keyrings/cuda-archive-keyring.gpg
 5 | run apt-get update
 6 | ARG DEBIAN_FRONTEND=noninteractive
 7 | run apt-get install -y apt-utils
 8 | run apt-get install -y  ca-certificates
 9 | run apt-get install -y vim 
10 | 
11 | run echo "deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" > /etc/apt/sources.list.d/cuda-ubuntu2204-x86_64.list 
12 | run apt-get update
13 | 
14 | 
15 | run apt-get install -y wget curl vim gcc git openssl\
16 |              htop atop screen locales tmux mc sudo
17 | 
18 | run apt-get install -y  cuda-toolkit-11.7
19 | run apt-get install -y libcudnn8=8.5.0.96-1+cuda11.7
20 | run apt-mark hold libcudnn8 
21 | 
22 | #run apt-get install -y nvidia-utils-520
23 | run apt-get install -y pip  
24 | run apt-get install -y expect  
25 | run apt-get install -y zsh  
26 | run apt-get install -y curl  
27 | 
28 | run locale-gen "en_US.UTF-8"
29 | run dpkg-reconfigure locales
30 | run update-locale LANG=en_US.UTF-8 LANGUAGE=en.UTF-8
31 | run mkdir -p  /home/aprec/Projects/aprec
32 | run useradd aprec
33 | run chown -R aprec:aprec /home/aprec/
34 | run usermod -aG sudo aprec
35 | run usermod --password $(openssl passwd -1 lambdarank) aprec
36 | run echo "aprec ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 
37 | user aprec
38 | 
39 | SHELL ["/bin/zsh", "-c"]
40 | RUN sh -c "$(wget -O- https://github.com/deluan/zsh-in-docker/releases/download/v1.1.4/zsh-in-docker.sh)" -- \
41 |     -t robbyrussell
42 | 
43 | 
44 | ENV SHELL=/bin/zsh
45 | ENV NVIDIA_VISIBLE_DEVICES all
46 | ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
47 | add .vimrc /home/aprec/.vimrc 
48 | add .gitconfig /home/aprec/.gitconfig 
49 | #
50 | ##make /bin/sh symlink to zsh instead of dash:
51 | user root
52 | RUN echo "dash dash/sh boolean false" | debconf-set-selections
53 | RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
54 | user aprec
55 | ENV ENV=/home/aprec/.profile
56 | ENV PATH=$PATH:/home/aprec/.local/bin
57 | 
58 | workdir /home/aprec/Projects/
59 | ENV PYTHONPATH=/home/aprec/Projects
60 | 
61 | add requirements.txt  /tmp/aprec_requirements.txt
62 | run pip3 install -r /tmp/aprec_requirements.txt
63 | 
64 | cmd zsh
65 | 


--------------------------------------------------------------------------------
/docker/cuda-archive-keyring.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/docker/cuda-archive-keyring.gpg


--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
 1 | jupyter>=1.0.0
 2 | tqdm>=4.62.3
 3 | requests>=2.26.0
 4 | pandas>=1.5.2
 5 | scipy>=1.6.0
 6 | tornado>=6.1
 7 | scikit-learn>=1.0
 8 | lightgbm>=3.3.0 
 9 | mmh3>=3.0.0
10 | matplotlib>=3.6.2
11 | seaborn>=0.12.1
12 | jupyterlab>=3.2.2
13 | transformers>=4.25.1
14 | wget>=3.2
15 | pytest>=7.1.2
16 | pytest-forked>=1.4.0
17 | multiprocessing_on_dill>=3.5.0a4
18 | ujson>=5.5.0
19 | faiss-gpu>=1.7.2
20 | tensorflow-gpu>=2.11.0
21 | tensorflow-probability>=0.18.0
22 | git+https://github.com/asash/lightfm.git@main
23 | 


--------------------------------------------------------------------------------
/docker/sources.list:
--------------------------------------------------------------------------------
 1 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy main restricted
 2 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-updates main restricted
 3 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy universe
 4 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-updates universe
 5 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy multiverse
 6 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-updates multiverse
 7 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-backports main restricted universe multiverse
 8 | 
 9 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-security main restricted
10 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-security universe
11 | deb mirror://mirrors.ubuntu.com/mirrors.txt jammy-security multiverse
12 | #http://security.ubuntu.com/ubuntu/
13 | 


--------------------------------------------------------------------------------
/evaluation/.gitignore:
--------------------------------------------------------------------------------
1 | booking_data/
2 | saved/
3 | .ipynb_checkpoints/
4 | a.out
5 | log_tensorboard
6 | .DS_Store
7 | ._.DS_Store
8 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/evaluation/__init__.py


--------------------------------------------------------------------------------
/evaluation/analyze_results.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | import os
 4 | from copy import deepcopy
 5 | import pandas as pd
 6 | 
 7 | pd.set_option("display.max_rows", None, "display.max_columns", None)
 8 | pd.set_option('display.expand_frame_repr', False) 
 9 | 
10 | experiment_file = sys.argv[1]
11 | data = json.load(open(experiment_file))
12 | 
13 | commit_filename = os.path.join(os.path.dirname(experiment_file), "commit")
14 | if os.path.isfile(commit_filename):
15 |     with open(commit_filename) as commit_file:
16 |         print(commit_file.read())
17 | 
18 | i = 0
19 | for split_fraction in data:
20 |     print("="*40)
21 |     i += 1
22 |     doc = deepcopy(split_fraction)
23 |     recommenders = doc['recommenders']
24 |     del(doc['recommenders'])
25 |     print("experiment_{}".format(i))
26 |     print (pd.DataFrame([doc]).T)
27 |     print("\n")
28 | 
29 |     experiment_docs = []
30 |     for recommender_name in recommenders:
31 |         recommender = recommenders[recommender_name]
32 |         recommender['name'] = recommender_name
33 |         del(recommender['model_metadata'])
34 |         experiment_docs.append(recommender)
35 | 
36 |     df = pd.DataFrame(experiment_docs)
37 |     df = df.sort_values("ndcg@40")
38 |     df = df.set_index('name')
39 |     print(df)
40 |         
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/evaluation/clean_output.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define BUFFERSIZE 10240
 5 | 
 6 | void print(int * buf, int pos) {
 7 |     int i ;
 8 |     for (i = 0; i < pos; ++i)
 9 |         putchar(buf[i]) ;
10 |     putchar('\n') ;
11 | }
12 | 
13 | int main(int argc, char* argv[])
14 | {
15 |     int c ;
16 |     int buf[BUFFERSIZE] ;
17 |     int pos = 0 ;
18 |     while((c = getchar()) != EOF)
19 |     {
20 |         switch (c)
21 |         {
22 |         case '\b':
23 |         {
24 |             if (pos > 0)
25 |                 pos-- ;
26 |             break ;
27 |         }
28 |         case '\n':
29 |         {
30 |             print(buf, pos);
31 |             pos = 0 ;
32 |             break ;
33 |         }
34 | 
35 |         case '\r':
36 |         {
37 |             print(buf, pos);
38 |             pos = 0 ;
39 |             break ;
40 |         }
41 | 
42 |         default:
43 |         {
44 |             buf[pos++] = c ;
45 |             break ;
46 |         }
47 |         }
48 |     }
49 |     return 0 ;
50 | } 
51 | 


--------------------------------------------------------------------------------
/evaluation/conf_intervals.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import gzip
 3 | import json
 4 | import pandas as pd
 5 | import numpy as np
 6 | import scipy.stats
 7 | 
 8 | def mean_confidence_interval(data, confidence=0.95):
 9 |     a = 1.0 * np.array(data)
10 |     n = len(a)
11 |     m, se = np.mean(a), scipy.stats.sem(a)
12 |     h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
13 |     return m, m-h, m+h
14 | 
15 | prediction_file = sys.argv[1]
16 | data = json.load(gzip.open(prediction_file))
17 | docs = []
18 | for doc in data: 
19 |     docs.append(doc['metrics'])
20 | df = pd.DataFrame(docs)
21 | 
22 | metrics = []
23 | for metric in (df.columns):
24 |     mean, conf_min, conf_max = mean_confidence_interval(df[metric])
25 |     metric_doc = {'name': metric, 'mean': mean, 'conf_min': conf_min, 'conf_max': conf_max}
26 |     metrics.append(metric_doc)
27 | 
28 | print(pd.DataFrame(metrics))
29 |     
30 | 


--------------------------------------------------------------------------------
/evaluation/configs/ML1M-bpr-example.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.top_recommender import TopRecommender
 2 | from aprec.recommenders.lightfm import LightFMRecommender
 3 | from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 4 | from aprec.evaluation.samplers.pop_sampler import PopTargetItemsSampler
 5 | from aprec.evaluation.metrics.mrr import MRR
 6 | from aprec.evaluation.metrics.ndcg import NDCG
 7 | from aprec.evaluation.split_actions import LeaveOneOut
 8 | import numpy as np
 9 | 
10 | 
11 | DATASET = "BERT4rec.ml-1m"
12 | 
13 | USERS_FRACTIONS = [1]
14 | 
15 | def top_recommender():
16 |     return FilterSeenRecommender(TopRecommender())
17 | 
18 | def lightfm_recommender(k, loss):
19 |     return FilterSeenRecommender(LightFMRecommender(k, loss))
20 | 
21 | RECOMMENDERS = {
22 |     "top_recommender": top_recommender,
23 |     "MF-BPR": lambda: lightfm_recommender(30, 'bpr'),
24 | }
25 | 
26 | MAX_TEST_USERS=6040
27 | 
28 | METRICS = [NDCG(10), MRR()]
29 | TARGET_ITEMS_SAMPLER = PopTargetItemsSampler(101)
30 | 
31 | RECOMMENDATIONS_LIMIT = 100
32 | SPLIT_STRATEGY = LeaveOneOut(MAX_TEST_USERS)
33 | 


--------------------------------------------------------------------------------
/evaluation/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/evaluation/configs/__init__.py


--------------------------------------------------------------------------------
/evaluation/configs/gsasrec/gowalla_benchmark.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from aprec.evaluation.split_actions import LeaveOneOut
 3 | from aprec.evaluation.configs.gsasrec.common_benchmark_config import *
 4 | 
 5 | DATASET = "gowalla_warm5"
 6 | N_VAL_USERS=512
 7 | MAX_TEST_USERS=86168
 8 | SPLIT_STRATEGY = LeaveOneOut(MAX_TEST_USERS)
 9 | RECOMMENDERS = get_recommenders(filter_seen=False)
10 | 
11 | if __name__ == "__main__":
12 | 
13 |     from aprec.tests.misc.test_configs import TestConfigs
14 |     TestConfigs().validate_config(__file__)
15 | 


--------------------------------------------------------------------------------
/evaluation/configs/gsasrec/ml1m_benchmark.py:
--------------------------------------------------------------------------------
 1 | from aprec.evaluation.split_actions import LeaveOneOut
 2 | from aprec.evaluation.configs.gsasrec.common_benchmark_config import *
 3 | 
 4 | DATASET = "BERT4rec.ml-1m"
 5 | N_VAL_USERS=512
 6 | MAX_TEST_USERS=6040
 7 | SPLIT_STRATEGY = LeaveOneOut(MAX_TEST_USERS)
 8 | RECOMMENDERS = get_recommenders(filter_seen=True)
 9 | 
10 | if __name__ == "__main__":
11 | 
12 |     from aprec.tests.misc.test_configs import TestConfigs
13 |     TestConfigs().validate_config(__file__)
14 | 


--------------------------------------------------------------------------------
/evaluation/configs/gsasrec/steam_benchmark.py:
--------------------------------------------------------------------------------
 1 | from aprec.evaluation.split_actions import LeaveOneOut
 2 | from aprec.evaluation.configs.gsasrec.common_benchmark_config import *
 3 | 
 4 | DATASET = "BERT4rec.steam"
 5 | N_VAL_USERS=512
 6 | MAX_TEST_USERS=281428
 7 | SPLIT_STRATEGY = LeaveOneOut(MAX_TEST_USERS)
 8 | RECOMMENDERS = get_recommenders(filter_seen=True)
 9 | 
10 | if __name__ == "__main__":
11 |     from aprec.tests.misc.test_configs import TestConfigs
12 |     TestConfigs().validate_config(__file__)
13 | 


--------------------------------------------------------------------------------
/evaluation/dataset_by_config.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import sys
3 | 
4 | spec = importlib.util.spec_from_file_location("config", sys.argv[1])
5 | config = importlib.util.module_from_spec(spec)
6 | spec.loader.exec_module(config)
7 | sys.stdout.write(config.DATASET)


--------------------------------------------------------------------------------
/evaluation/evaluation_utils.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | 
3 | 
4 | def group_by_user(actions):
5 |     result = defaultdict(list)
6 |     for action in actions:
7 |         result[action.user_id].append(action)
8 |     return result


--------------------------------------------------------------------------------
/evaluation/filter_cold_start.py:
--------------------------------------------------------------------------------
 1 | def filter_cold_start(train_actions, test_actions):
 2 |     train_user_ids = set()
 3 |     cold_start_set = set()
 4 |     non_cold_start = set()
 5 |     result = []
 6 | 
 7 |     for action in train_actions:
 8 |         train_user_ids.add(action.user_id)
 9 | 
10 |     for action in test_actions:
11 |         if action.user_id in train_user_ids:
12 |             non_cold_start.add(action.user_id)
13 |             result.append(action)
14 |         else:
15 |             cold_start_set.add(action.user_id)
16 |     print("number of cold start users filtered: {}".format(len(cold_start_set)))
17 |     print("number of users in test set: {}".format(len(non_cold_start)))
18 |     return result
19 | 
20 | 


--------------------------------------------------------------------------------
/evaluation/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/evaluation/metrics/__init__.py


--------------------------------------------------------------------------------
/evaluation/metrics/average_popularity_rank.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | from collections import Counter
 3 | 
 4 | class AveragePopularityRank(Metric):
 5 |     def __init__(self, k, actions):
 6 |         self.name = "apr@{}".format(k)
 7 |         self.k = k
 8 |         cnt = Counter()
 9 |         for action in actions:
10 |             cnt[action.item_id] += 1
11 | 
12 |         self.pop_rank = {}
13 |         rank = 0
14 |         for item, cnt in cnt.most_common():
15 |             rank += 1
16 |             self.pop_rank[item] = rank
17 | 
18 |         
19 |     def __call__(self, recommendations, actual_actions):
20 |         cnt = 0
21 |         s =  0
22 |         for recommendation in recommendations[:self.k]:
23 |             item_id = recommendation[0]
24 |             if item_id in self.pop_rank:
25 |                 s += self.pop_rank[item_id]
26 |                 cnt += 1
27 |         if cnt == 0:
28 |             return 0
29 |         return s/cnt
30 | 


--------------------------------------------------------------------------------
/evaluation/metrics/entropy.py:
--------------------------------------------------------------------------------
 1 | from aprec.evaluation.metrics.metric import Metric
 2 | from scipy.special import softmax
 3 | from scipy.stats import entropy
 4 | import numpy as np
 5 | 
 6 | def sigmoid(x):  
 7 |     return np.exp(-np.logaddexp(0, -x))
 8 | 
 9 | class Entropy(Metric):
10 |     def __init__(self, activation, k):
11 |         self.name = f"{activation}Entropy@{k}"
12 |         if activation == 'Softmax':
13 |             self.activation = softmax
14 |         elif activation == 'Sigmoid':
15 |             self.activation = sigmoid 
16 |         else:
17 |             raise Exception(f"unknown activation {activation}")
18 |         self.k = k
19 |             
20 |         
21 |     def __call__(self, recommendations, actual_actions):
22 |         if len(recommendations) == 0:
23 |             return 0
24 |         scores = self.activation(np.array([rec[1] for rec in recommendations[:self.k]]))
25 |         scores = scores/np.sum(scores) #normalize, so that we can treat them as probs
26 |         return entropy(scores, base=2) / len(scores)


--------------------------------------------------------------------------------
/evaluation/metrics/highest_score.py:
--------------------------------------------------------------------------------
 1 | from aprec.evaluation.metrics.metric import Metric
 2 | 
 3 | 
 4 | class HighestScore(Metric):
 5 |     def __init__(self):
 6 |         self.name = "HighestScore"
 7 |         
 8 |     def __call__(self, recommendations, actual_actions):
 9 |         if len(recommendations) == 0:
10 |             return 0
11 |         return recommendations[0][1] 


--------------------------------------------------------------------------------
/evaluation/metrics/hit.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | 
 3 | class HIT(Metric):
 4 |     """
 5 |         Short-Term Prediction Success
 6 |         Equals 1 if recommender system was able to predict next item in sequence, 0 otherwise.  
 7 |     """
 8 |     def __init__(self, k):
 9 |         self.name = "HIT@{}".format(k)
10 |         self.k = k
11 |         
12 |     def __call__(self, recommendations, actual_actions):
13 |         if(len(recommendations) == 0):
14 |             return 0
15 |         action_to_check = actual_actions[0] 
16 |         for action in actual_actions[1:]:
17 |             if action.timestamp < action_to_check.timestamp:
18 |                 action_to_check = action
19 |         recommended = set([recommendation[0] for recommendation in recommendations[:self.k]])
20 |         return 1 if action_to_check.item_id in recommended else 0
21 | 


--------------------------------------------------------------------------------
/evaluation/metrics/map.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from .metric import Metric
 3 | 
 4 | class MAP(Metric):
 5 |     def __init__(self, k):
 6 |         self.name = f"MAP@{k}"
 7 |         self.k = k
 8 | 
 9 |     def __call__(self, recommendations, actual_actions):
10 |         if(len(recommendations) == 0 or len(actual_actions) == 0):
11 |             return 0
12 |         actual_set = set([action.item_id for action in actual_actions])
13 |         correct_predictions = 0
14 |         running_sum = 0
15 |         for i in range(len(recommendations[:self.k])):
16 |             pos = i + 1
17 |             predicted = recommendations[i][0]
18 |             if predicted in actual_set:
19 |                 correct_predictions += 1
20 |                 running_sum += correct_predictions/pos
21 |                 pass
22 |         return running_sum / len(actual_actions)
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/evaluation/metrics/metric.py:
--------------------------------------------------------------------------------
 1 | class Metric(object):
 2 |     less_is_better = False
 3 |     def __init__(self):
 4 |         self.name == "undefined"
 5 |     
 6 |     def get_name(self) -> str:
 7 |         return self.name
 8 | 
 9 |     def __call__(self, recommendations, actual):
10 |         raise NotImplementedError
11 | 


--------------------------------------------------------------------------------
/evaluation/metrics/model_confidence.py:
--------------------------------------------------------------------------------
 1 | from aprec.evaluation.metrics.metric import Metric
 2 | from scipy.special import softmax
 3 | import numpy as np
 4 | 
 5 | def sigmoid(x):  
 6 |     return np.exp(-np.logaddexp(0, -x))
 7 | 
 8 | class Confidence(Metric):
 9 |     def __init__(self, activation):
10 |         self.name = f"{activation}Confidence"
11 |         if activation == 'Softmax':
12 |             self.activation = softmax
13 |         elif activation == 'Sigmoid':
14 |             self.activation = sigmoid 
15 |         else:
16 |             raise Exception(f"unknown activation {activation}")
17 |             
18 |         
19 |     def __call__(self, recommendations, actual_actions):
20 |         if len(recommendations) == 0:
21 |             return 0
22 |         scores = np.array([rec[1] for rec in recommendations])
23 |         return self.activation(scores)[0]


--------------------------------------------------------------------------------
/evaluation/metrics/mrr.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from .metric import Metric
 3 | 
 4 | class MRR(Metric):
 5 |     def __init__(self):
 6 |         self.name = "MRR"
 7 | 
 8 |     def __call__(self, recommendations, actual_actions):
 9 |         if(len(recommendations) == 0):
10 |             return 0
11 |         actual_set = set([action.item_id for action in actual_actions])
12 |         for i in range(len(recommendations)):
13 |             if recommendations[i][0] in actual_set:
14 |                 return 1/(i + 1)
15 |         return 0
16 | 


--------------------------------------------------------------------------------
/evaluation/metrics/ndcg.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from .metric import Metric
 3 | 
 4 | class NDCG(Metric):
 5 |     def __init__(self, k):
 6 |         self.name = "ndcg@{}".format(k)
 7 |         self.k = k
 8 |         
 9 |     def __call__(self, recommendations, actual_actions):
10 |         if(len(recommendations) == 0):
11 |             return 0
12 |         actual_set = set([action.item_id for action in actual_actions])
13 |         recommended = [recommendation[0] for recommendation in recommendations[:self.k]]
14 |         cool = set(recommended).intersection(actual_set)
15 |         if len(cool) == 0:
16 |             return 0
17 |         ideal_rec = sorted(recommended, key = lambda x: not(x in actual_set))
18 |         return NDCG.dcg(recommended, actual_set)/NDCG.dcg(ideal_rec, actual_set)
19 |          
20 | 
21 |     @staticmethod
22 |     def dcg(id_list, relevant_id_set):
23 |         result = 0.0
24 |         for idx in range(len(id_list)):
25 |             i = idx + 1
26 |             if (id_list[idx]) in relevant_id_set:
27 |                 result += 1 / math.log2(i+1)
28 |         return result
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/evaluation/metrics/pairwise_cos_sim.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter, defaultdict
 2 | import random
 3 | 
 4 | from aprec.evaluation.metrics.metric import Metric
 5 | import numpy as np
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | class PairwiseCosSim(Metric):
10 |     def __init__(self, actions, k):
11 |         print("init pairwise_cos_sim...")
12 |         self.name = "pairwise_cos_sim@{}".format(k)
13 |         self.k = k
14 |         self.max_actions_per_user = 500
15 |         self.max_users = 500
16 |         self.item_cnt = Counter()
17 |         self.pair_cnt = Counter()
18 | 
19 |         user_sets = defaultdict(list)
20 | 
21 |         for action in actions:
22 |             user_sets[action.user_id].append(action.item_id)
23 | 
24 |         for user_id in np.random.choice(list(user_sets.keys()), min(self.max_users, len(user_sets)), replace=False):
25 |             random.shuffle(user_sets[user_id])
26 |             for item1 in user_sets[user_id][:self.max_actions_per_user]:
27 |                 self.item_cnt[item1] += 1
28 |                 for item2 in user_sets[user_id][:self.max_actions_per_user]:
29 |                     if item1 != item2:
30 |                         self.pair_cnt[(item1, item2)] += 1
31 |         self.item_cnt = dict(self.item_cnt)
32 |         self.pair_cnt = dict(self.pair_cnt)
33 |         print("init done...")
34 | 
35 |     def __call__(self, recommendations, actual_actions):
36 |         items = [recommendation[0] for recommendation in recommendations[:self.k]]
37 |         pairs = 0
38 |         s = 0
39 |         for item1 in items:
40 |             for item2 in items:
41 |                 if (item1 != item2):
42 |                     pairs += 1
43 |                     if (item1, item2) in self.pair_cnt:
44 |                         s += self.pair_cnt[(item1, item2)] ** 2 / (self.item_cnt[item1] * self.item_cnt[(item2)])
45 |         if pairs == 0: return 0
46 |         return s/pairs
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/evaluation/metrics/precision.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | 
 3 | class Precision(Metric):
 4 |     def __init__(self, k):
 5 |         self.name = "precision@{}".format(k)
 6 |         self.k = k
 7 |         
 8 |     def __call__(self, recommendations, actual_actions):
 9 |         if len(recommendations) == 0:
10 |             return 0
11 |         actual_set = set([action.item_id for action in actual_actions])
12 |         recommended = set([recommendation[0] for recommendation in recommendations[:self.k]])
13 |         cool = recommended.intersection(actual_set)
14 |         return len(cool) / len(recommended)
15 | 


--------------------------------------------------------------------------------
/evaluation/metrics/recall.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | 
 3 | class Recall(Metric):
 4 |     def __init__(self, k):
 5 |         self.name = "recall@{}".format(k)
 6 |         self.k = k
 7 |         
 8 |     def __call__(self, recommendations, actual_actions):
 9 |         if len(recommendations) == 0:
10 |             return 0
11 |         actual_set = set([action.item_id for action in actual_actions])
12 |         recommended = set([recommendation[0] for recommendation in recommendations[:self.k]])
13 |         cool = recommended.intersection(actual_set)
14 |         return len(cool) / len(actual_set)
15 | 


--------------------------------------------------------------------------------
/evaluation/metrics/sampled_proxy_metric.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from collections import defaultdict, Counter
 3 | 
 4 | import numpy as np
 5 | 
 6 | from .metric import Metric
 7 | 
 8 | #this proxy is used to match BERT4rec evaluation strategy,
 9 | # in order to be able to compare our models evaluation to what they report in the paper
10 | # In their code they randomly sample 100 items out of full items list, add relevant items and then calculate metrics.
11 | # for the items outside of returned recommendations,
12 | # we assume, that score is equal to min_score of recommended items - random value
13 | 
14 | 
15 | class SampledProxy(Metric):
16 |     def __init__(self, item_ids, probs, n_negatives, metric):
17 |         self.item_ids = list(item_ids)
18 |         self.n_negatives = n_negatives
19 |         self.metric = metric
20 |         self.name = f"{metric.name}_sampled@{self.n_negatives}"
21 |         self.probs = probs
22 | 
23 |     def __call__(self, recommendations, actual_actions):
24 |         rec_dict = {}
25 |         min_score = float('inf')
26 |         for item, score in recommendations:
27 |             rec_dict[item] = score
28 |             min_score = min(min_score, score)
29 | 
30 |         recs = []
31 |         recommended = set()
32 |         for action in actual_actions:
33 |             recs.append((action.item_id, self.get_item_score(action.item_id, min_score, rec_dict)))
34 |             recommended.add(action.item_id)
35 | 
36 |         target_size = len(actual_actions) + self.n_negatives
37 |         while(len(recommended) < target_size):
38 |             item_ids = np.random.choice(self.item_ids,  target_size - len(recommended), p=self.probs, replace=False)
39 |             for item_id in item_ids:
40 |                 if item_id not in recommended:
41 |                     recs.append((item_id, self.get_item_score(item_id, min_score, rec_dict)))
42 |                     recommended.add(item_id)
43 |         recs.sort(key=lambda x: -x[1])
44 |         return self.metric(recs, actual_actions)
45 | 
46 |     @staticmethod
47 |     def all_item_ids_probs(actions):
48 |         counter = Counter()
49 |         cnt = 0
50 |         for action in actions:
51 |             counter[action.item_id] += 1
52 |             cnt += 1
53 | 
54 |         items, probs = [], []
55 |         for item, item_cnt in counter.most_common():
56 |             items.append(item)
57 |             probs.append(item_cnt / cnt)
58 |         return items, probs
59 | 
60 | 
61 | 
62 |     @staticmethod
63 |     def get_item_score(item_id, min_score, rec_dict):
64 |         if item_id not in rec_dict:
65 |             return min_score - random.random()
66 |         else:
67 |             return rec_dict[item_id]
68 | 


--------------------------------------------------------------------------------
/evaluation/n_actions_for_user.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | def n_actions_for_user(actions, n):
 3 |     """leave only n first actions for particular user"""
 4 |     user_actions = defaultdict(lambda: [])
 5 |     for action in actions:
 6 |         user_actions[action.user_id].append(action)
 7 | 
 8 |     result = []
 9 |     for user_id in user_actions:
10 |         result += sorted(user_actions[user_id], key = lambda action: action.timestamp)[:n]
11 | 
12 |     return result
13 |         
14 |         
15 | 


--------------------------------------------------------------------------------
/evaluation/run_all_b4rec_originals.sh:
--------------------------------------------------------------------------------
1 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_200000steps.py 
2 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_400000steps.py 
3 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_800000steps.py 
4 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_1600000steps.py 
5 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_3200000steps.py 
6 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_6400000steps.py 
7 | sh run_n_experiments.sh configs/bert4rec_repro_paper/ml_1m_original_12800000steps.py 
8 | 


--------------------------------------------------------------------------------
/evaluation/run_n_experiments.sh:
--------------------------------------------------------------------------------
 1 | config=$1
 2 | N=$2
 3 | 
 4 | if [ "$CHECK_COMMIT_STATUS" != "false" ]; then
 5 |     if [ -n "$(git status --porcelain)" ]; then
 6 |       echo "There are changes in the repo. Please commit the code in order to maintain traceability of the experiments";
 7 |       exit 1
 8 |     fi
 9 | fi
10 | 
11 | config_filename=$(basename -- "$config")
12 | config_id="${config_filename%.*}"
13 | 
14 | 
15 | 
16 | date=`date +%Y_%m_%dT%H_%M_%S`
17 | experiment_id="${config_id}_${date}"
18 | dataset_id=`python3 dataset_by_config.py ${config}`
19 | echo "running experiments on dataset ${dataset_id}"
20 | root_dir=./results/$dataset_id/$experiment_id
21 | experiment_stdout=$root_dir/stdout
22 | experiment_stderr=$root_dir/stderr
23 | experiment_commit=$root_dir/commit
24 | 
25 | mkdir -p $root_dir
26 | 
27 | latest_experiment_link=./results/latest_experiment
28 | 
29 | rm -f $latest_experiment_link
30 | ln -s `pwd`/$root_dir $latest_experiment_link
31 | 
32 | echo experement resutls are saved at $root_dir
33 | 
34 | cp $config $root_dir
35 | 
36 | for i in `seq 1 $N`;
37 | do 
38 |     experiment_result=$root_dir/experiment_${i}.json
39 |     echo "experiment_stdout: ${experiment_stdout}"
40 |     echo "experiment_stderr: ${experiment_stderr}"
41 |     echo "experiment_result: ${experiment_result}"
42 |     echo "experiment_commit: ${experiment_commit}"
43 |     git log -1 > $experiment_commit
44 | 	unbuffer python3 run_experiment.py $config $experiment_result > $experiment_stdout 2> $experiment_stderr;
45 | done;
46 | 


--------------------------------------------------------------------------------
/evaluation/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from aprec.api.items_ranking_request import ItemsRankingRequest
 4 | from aprec.evaluation.evaluation_utils import group_by_user
 5 | from aprec.evaluation.samplers.sampler import TargetItemSampler
 6 | 
 7 | 
 8 | class RandomTargetItemSampler(TargetItemSampler):
 9 |     def get_sampled_ranking_requests(self):
10 |         all_items = set()
11 |         for action in self.actions:
12 |             all_items.add(action.item_id)
13 |         items = list(all_items)
14 |         by_user_test = group_by_user(self.test)
15 |         result = []
16 |         for user_id in by_user_test:
17 |             target_items = set(action.item_id for action in by_user_test[user_id])
18 |             while(len(target_items) < self.target_size):
19 |                 item_ids = np.random.choice(items,
20 |                   self.target_size - len(target_items),replace=False)
21 |                 for item_id in item_ids:
22 |                     if item_id not in target_items:
23 |                         target_items.add(item_id)
24 |             result.append(ItemsRankingRequest(user_id=user_id, item_ids=list(target_items)))
25 |         return result


--------------------------------------------------------------------------------
/evaluation/samplers/sampler.py:
--------------------------------------------------------------------------------
 1 | class TargetItemSampler(object):
 2 |     def __init__(self, target_size) -> None:
 3 |         super().__init__()
 4 |         self.target_size = target_size
 5 |     
 6 |     def set_actions(self, all_actions, test_actions):
 7 |         self.actions = all_actions
 8 |         self.test = test_actions
 9 | 
10 | 
11 |     def get_sampled_ranking_requests(self):
12 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/evaluation/statistical_signifficance_test.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import json
 3 | import os
 4 | from argparse import ArgumentParser
 5 | from collections import defaultdict
 6 | from scipy.stats import ttest_ind
 7 | 
 8 | def get_arguments():
 9 |     parser = ArgumentParser()
10 |     parser.add_argument("--predictions-dir", required=True)
11 |     parser.add_argument("--output-file", required=True)
12 |     return parser.parse_args()
13 | 
14 | 
15 | def process(arguments):
16 |     metrics = defaultdict(lambda: defaultdict(list))
17 |     for filename in os.listdir(arguments.predictions_dir):
18 |         if filename.endswith(".json.gz"):
19 |             in_file = gzip.open(os.path.join(arguments.predictions_dir, filename))
20 |             recommender_name = ".".join(filename.split(".")[:-2])
21 |         elif filename.endswith(".json"):
22 |             in_file = open(os.path.join(filename, filename))
23 |             recommender_name = ".".join(filename.split(".")[:-1])
24 |         else:
25 |             continue
26 |         for line in in_file:
27 |             user_doc = json.loads(line)
28 |             for metric in user_doc["metrics"]:
29 |                 metrics[metric][recommender_name].append(user_doc["metrics"][metric])
30 |     result = defaultdict(lambda: defaultdict(dict))
31 |     for metric in metrics:
32 |        for recommender_name_1 in metrics[metric]:
33 |            rec_1_sample = metrics[metric][recommender_name_1]
34 |            for recommender_name_2 in metrics[metric]:
35 |                 rec_2_sample = metrics[metric][recommender_name_2]
36 |                 t, p_value = ttest_ind(rec_1_sample, rec_2_sample)
37 |                 result[recommender_name_1][metric][recommender_name_2] = p_value
38 |     with open(arguments.output_file, 'w') as output:
39 |         output.write(json.dumps(result, indent=4))
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | def main():
48 |     arguments = get_arguments()
49 |     process(arguments)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     main()
54 | 


--------------------------------------------------------------------------------
/evaluation/two_predictions_signficance_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import gzip
 4 | import json
 5 | import pandas as pd
 6 | from scipy.stats import ttest_ind
 7 | from argparse import ArgumentParser
 8 | 
 9 | parser = ArgumentParser()
10 | parser.add_argument('--first', type=str, required=True)
11 | parser.add_argument('--second', type=str, required=True)
12 | parser.add_argument("--metrics", type=str,required=False, default=None)
13 | args = parser.parse_args()
14 | 
15 | predictions_file_1 = args.first 
16 | predictions_file_2 = args.second
17 | first_name = os.path.basename(predictions_file_1).rstrip(".json.gz")
18 | second_name = os.path.basename(predictions_file_2).rstrip(".json.gz")
19 | 
20 | def get_metrics(doc):
21 |     result = doc['metrics']
22 |     if 'sampled_metrics' in doc:
23 |         for key in doc['sampled_metrics']:
24 |             result[f"sampled_{key}"] = doc['sampled_metrics'][key]
25 |     return result
26 | 
27 | def read_data(filename):
28 |     result = []
29 |     data = json.load(gzip.open(filename))
30 |     for doc in data:
31 |         metrics = get_metrics(doc)
32 |         result.append(metrics)
33 |     return pd.DataFrame(result)
34 | 
35 | df1 = read_data(predictions_file_1)
36 | df2 = read_data(predictions_file_2)
37 | 
38 | overlap_columns = set(df1.columns).intersection(set(df2.columns))
39 | 
40 | if args.metrics is not None:
41 |     overlap_columns = overlap_columns.intersection(set(args.metrics.split(",")))
42 | 
43 | 
44 | docs = []
45 | 
46 | for column_name in overlap_columns:
47 |     df1_series = df1[column_name]
48 |     df2_series = df2[column_name]
49 | 
50 |     mean1 = df1_series.mean()
51 |     mean2 = df2_series.mean()
52 |     doc = {}
53 |     doc["metric_name"] = column_name
54 |     doc[first_name] = mean1
55 |     doc[second_name] = mean2
56 |     doc["difference"] = mean2 - mean1
57 |     doc["difference_pct"] = (mean2 - mean1) * 100 / mean1
58 |     t, pval = ttest_ind(df1_series, df2_series) 
59 |     doc["p_value"] = pval 
60 |     doc["p_value_bonferoni"] = pval * len(overlap_columns)
61 |     docs.append(doc)
62 | 
63 | result = pd.DataFrame(docs)
64 | result['significant_0.05'] = result["p_value_bonferoni"] < 0.05
65 | result['significant_0.01'] = result["p_value_bonferoni"] < 0.01
66 | result['significant_0.001'] = result["p_value_bonferoni"] < 0.001
67 | result['significant_0.0001'] = result["p_value_bonferoni"] < 0.0001
68 | 
69 | with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.expand_frame_repr', False):  # more options can be specified also
70 |     print(result)
71 | 
72 | 


--------------------------------------------------------------------------------
/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/losses/__init__.py


--------------------------------------------------------------------------------
/losses/bce.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses.loss import ListWiseLoss
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class BCELoss(ListWiseLoss):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(**kwargs)
 8 |         self.__name__ = "BCE"
 9 |         self.less_is_better = True
10 | 
11 |     def calc_per_list(self, y_true_raw, y_pred):
12 |         eps = tf.constant(1e-8, y_pred.dtype)
13 |         y_true = tf.cast(y_true_raw, y_pred.dtype)
14 |         is_target = tf.cast((y_true >= -eps), y_pred.dtype)
15 |         trues = y_true*is_target
16 |         pos = trues*tf.math.softplus(-y_pred) * is_target
17 |         neg = (1.0 - trues)*tf.math.softplus(y_pred) * is_target
18 |         num_targets = tf.reduce_sum(is_target, axis=1)
19 |         ce_sum = tf.reduce_sum(pos + neg, axis=1)
20 |         res_sum = tf.math.divide_no_nan(ce_sum, num_targets)
21 |         return res_sum
22 | 
23 |     def __call__(self, y_true_raw, y_pred):
24 |         y_true = tf.cast(y_true_raw, y_pred.dtype)
25 |         eps = tf.constant(1e-8, y_pred.dtype)
26 |         is_target = tf.cast((y_true >= -eps), y_pred.dtype)
27 |         trues = y_true*is_target
28 |         pos = trues*tf.math.softplus(-y_pred) * is_target
29 |         neg = (1.0 - trues)*tf.math.softplus(y_pred) * is_target
30 |         num_targets = tf.reduce_sum(is_target)
31 |         ce_sum = tf.reduce_sum(pos + neg)
32 |         res_sum = tf.math.divide_no_nan(ce_sum, num_targets)
33 |         return res_sum


--------------------------------------------------------------------------------
/losses/bpr.py:
--------------------------------------------------------------------------------
 1 | from math import perm
 2 | import tensorflow as tf
 3 | 
 4 | from aprec.losses.loss_utils import get_pairwise_diff_batch, get_truncated, masked_softmax
 5 | from aprec.losses.loss import ListWiseLoss, Loss
 6 | 
 7 | #BPR Loss as  described in orignial paper. 
 8 | #https://arxiv.org/abs/1205.2618
 9 | #This loss doesn't include regularization term as in tensorflow it should be done on the model side (e.g. include l2 regularization in embeddings)
10 | #Setting softmax_weighted into True will turn this loss into BPR-max loss, as described in the GRU4Rec+ paper
11 | ##https://dl.acm.org/doi/abs/10.1145/3269206.3271761
12 | class BPRLoss(ListWiseLoss):
13 |     def __init__(self, num_items=None, batch_size=None, max_positives=10, pred_truncate=None, softmax_weighted=False):
14 |         super().__init__(num_items, batch_size)
15 |         self.max_positives = max_positives
16 |         self.softmax_weighted=softmax_weighted
17 |         self.pred_truncate = pred_truncate
18 |     
19 |     def calc_per_list(self, y_true, y_pred):
20 |         top_true = tf.math.top_k(y_true, self.max_positives)
21 |         pred_ordered_by_true = tf.gather(y_pred, top_true.indices, batch_dims=1)
22 | 
23 |         pred, true_ordered_by_pred = get_truncated(y_true, y_pred, self.pred_truncate) 
24 |         pred_size = tf.shape(pred)[-1]
25 | 
26 |         mask = tf.cast((get_pairwise_diff_batch(top_true.values, true_ordered_by_pred, self.max_positives, pred_size) > 0), tf.float32)
27 |         values = get_pairwise_diff_batch(pred_ordered_by_true, pred, self.max_positives, pred_size)
28 |         sigmoid =  -tf.math.log_sigmoid(values)
29 |         sigmoid = sigmoid * mask
30 |         if self.softmax_weighted:
31 |             pred_tile = tf.tile(tf.expand_dims(pred, 1), [1, self.max_positives, 1])
32 |             mask_transposed = tf.transpose(mask, perm=[0, 2, 1])
33 |             pred_softmax = tf.transpose(masked_softmax(pred_tile, mask_transposed), perm=[0, 2, 1])
34 |             sigmoid *= pred_softmax
35 |         result = tf.reduce_sum(sigmoid, axis=[1, 2]) / tf.reduce_sum(mask, axis=[1, 2])
36 |         return result
37 | 
38 |     def __call__(self, y_true, y_pred):
39 |         result = self.calc_per_list(y_true, y_pred)
40 |         return tf.reduce_mean(result)
41 | 
42 | 


--------------------------------------------------------------------------------
/losses/climf.py:
--------------------------------------------------------------------------------
 1 | #CLIMF Loss Implementation
 2 | #See paper:
 3 | #https://dl.acm.org/doi/10.1145/2365952.2365981
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | from aprec.losses.loss_utils import my_map
 8 | from aprec.losses.loss import Loss
 9 | 
10 | 
11 | class CLIMFLoss(Loss):
12 |     def __init__(self, num_items=None, batch_size=None, max_positives=10):
13 |         super().__init__(num_items, batch_size)
14 |         self.max_positives = max_positives
15 | 
16 | 
17 |     def get_pairwise_diffs_matrix(self, x, y):
18 |         a, b = tf.meshgrid(tf.transpose(y), x)
19 |         return tf.subtract(b, a)
20 | 
21 |     def get_pairwise_diffs_matrices(self, a, b):
22 |         result = my_map(self.get_pairwise_diffs_matrix, (a, b))
23 |         return result
24 | 
25 |     #equation (9) from the paper
26 |     def __call__(self, y_true, y_pred):
27 |         EPS=1e-6
28 |         top_true = tf.math.top_k(y_true, self.max_positives)
29 |         true_values = top_true.values
30 |         pred_ordered = tf.gather(y_pred, top_true.indices, batch_dims=1)
31 |         values = self.get_pairwise_diffs_matrices(pred_ordered, y_pred)
32 |         values_sigmoid = tf.math.sigmoid(values)
33 |         tiled_values = tf.tile(true_values, [1, y_pred.shape[-1]])
34 |         mask = tf.reshape(tiled_values, (self.batch_size, self.num_items, true_values.shape[1]))
35 |         mask = tf.transpose(mask, perm=[0, 2, 1])
36 |         second_climf_term = tf.math.reduce_sum(tf.math.log(1 - mask*values_sigmoid + EPS), axis=1)
37 |         first_climf_term = tf.math.log_sigmoid(y_pred)
38 |         result = -tf.reduce_sum(y_true*(second_climf_term + first_climf_term))
39 |         return result


--------------------------------------------------------------------------------
/losses/get_loss.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses.bpr import BPRLoss
 2 | from aprec.losses.bce import BCELoss
 3 | from aprec.losses.climf import CLIMFLoss
 4 | from aprec.losses.loss import ListWiseLoss, Loss
 5 | from aprec.losses.softmax_crossentropy import SoftmaxCrossEntropy
 6 | from aprec.losses.top1 import TOP1Loss
 7 | from aprec.losses.lambda_gamma_rank import LambdaGammaRankLoss
 8 | from aprec.losses.xendcg import XENDCGLoss
 9 | import tensorflow as tf
10 | 
11 | losses = {
12 |     'xendcg': XENDCGLoss, 
13 |     'bpr': BPRLoss, 
14 |     'climf': CLIMFLoss, 
15 |     'bce': BCELoss, 
16 |     'top1': TOP1Loss, 
17 |     'lambdarank': LambdaGammaRankLoss, 
18 |     'softmax_ce': SoftmaxCrossEntropy, 
19 | }
20 | 
21 | def get_loss(loss_name, items_num, batch_size, max_positives=40,
22 |              internal_dtype=tf.float32, lambda_normalization=True,
23 |              lambdarank_pred_truncate=None,
24 |              lambdarank_bce_weight=0.0,
25 |              ):
26 |     if loss_name == 'lambdarank':
27 |         return LambdaGammaRankLoss(num_items=items_num, batch_size=batch_size, ndcg_at=max_positives,
28 |                                    dtype=internal_dtype,
29 |                                    lambda_normalization=lambda_normalization,
30 |                                    pred_truncate_at=lambdarank_pred_truncate,
31 |                                    bce_grad_weight=lambdarank_bce_weight)
32 |     return losses[loss_name](num_items=items_num, batch_size=batch_size)
33 | 
34 | 
35 | listwise_losses = {
36 |     'softmax_ce': SoftmaxCrossEntropy, 
37 |     'lambdarank': LambdaGammaRankLoss, 
38 |     'bce': BCELoss, 
39 | }
40 |  
41 | def listwise_loss_from_config(loss_name, loss_params) -> ListWiseLoss:
42 |     return losses[loss_name](**loss_params)
43 | 
44 | 


--------------------------------------------------------------------------------
/losses/items_masking_loss_proxy.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses.loss import ListWiseLoss, Loss
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class ItemsMaksingLossProxy(Loss):
 6 |     def __init__(self, listwise_loss: ListWiseLoss, negatives_per_positve, sequence_length, num_items=None, batch_size=None, add_positive = True):
 7 |         super().__init__(num_items, batch_size)
 8 |         self.listwise_loss = listwise_loss
 9 |         self.negatives_per_positive = negatives_per_positve
10 |         self.sequence_length = sequence_length
11 |         if add_positive:
12 |             self.listwise_loss.set_num_items(negatives_per_positve + 1)
13 |         else:
14 |             self.listwise_loss.set_num_items(negatives_per_positve)
15 |         self.less_is_better = listwise_loss.less_is_better
16 |         self.__name__ = self.listwise_loss.__name__ + "_proxy"
17 |         self.add_positive = add_positive
18 | 
19 |     def set_batch_size(self, batch_size):
20 |         super().set_batch_size(batch_size)
21 |         self.listwise_loss.set_batch_size(self.batch_size * self.sequence_length)
22 | 
23 |     def __call__(self, y_true, y_pred):
24 |         n_targets = self.negatives_per_positive
25 |         if self.add_positive:
26 |             n_targets += 1
27 |         ytrue_reshaped = tf.reshape(y_true, (self.batch_size * self.sequence_length, n_targets))
28 |         ypred_reshaped = tf.cast(tf.reshape(y_pred, (self.batch_size * self.sequence_length, n_targets)), 'float32')
29 |         result =  self.listwise_loss.loss_per_list(ytrue_reshaped, ypred_reshaped)
30 |         return result
31 |     
32 | 


--------------------------------------------------------------------------------
/losses/logit_norm.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses.loss import ListWiseLoss
 2 | import tensorflow as tf
 3 | 
 4 | #https://arxiv.org/abs/2205.09310
 5 | class LogitNormLoss(ListWiseLoss): #used by bert
 6 |     def __init__(self, temperature=1, *args, **kwargs):
 7 |         super().__init__()
 8 |         self.__name__ = "LogitNormLoss"
 9 |         self.less_is_better = True
10 |         self.temperature = temperature
11 |     
12 |     def calc_per_list(self, y_true, y_pred):
13 |         norms = tf.expand_dims(tf.norm(y_pred, axis=-1), -1)
14 |         logit_norms = tf.math.divide_no_nan(y_pred, norms)/self.temperature
15 |         return tf.nn.softmax_cross_entropy_with_logits(y_true, logit_norms)
16 |         


--------------------------------------------------------------------------------
/losses/loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | class Loss():
 3 |     def __init__(self, num_items=None, batch_size=None):
 4 |         self.num_items = num_items
 5 |         self.batch_size = batch_size
 6 | 
 7 |     def __call__(self, y_true, y_pred):
 8 |         raise NotImplementedError
 9 | 
10 |     def set_num_items(self, num_items):
11 |         self.num_items = num_items
12 | 
13 |     def set_batch_size(self, batch_size):
14 |         self.batch_size = batch_size
15 | 
16 |     def get_config(self):
17 |         result =  {"num_items": self.num_items, "batch_size": self.batch_size}
18 |         return result
19 | 
20 |     @classmethod
21 |     def from_config(cls, config):
22 |         return cls(num_items=config['num_items'], batch_size=config['batch_size']) 
23 | 
24 | class ListWiseLoss(Loss):
25 |     @tf.custom_gradient
26 |     def loss_per_list(self, y_true, y_pred, sample_weights=None):
27 |         with tf.GradientTape() as g:
28 |             g.watch(y_pred)
29 |             ignore_mask = tf.cast(y_true == -100, y_pred.dtype) #-100 is the default ignore value
30 |             use_mask = 1.0 - ignore_mask
31 |             noise =  ignore_mask * tf.random.uniform(y_pred.shape, 0.0, 1.0, dtype=y_pred.dtype) 
32 |             listwise_ytrue = use_mask * tf.cast(y_true, y_pred.dtype) + noise
33 |             listwise_loss = self.calc_per_list(listwise_ytrue, y_pred)
34 |             use_loss_mask = tf.squeeze(use_mask[:,:1], axis=1)
35 |             average_loss =  tf.reduce_sum(listwise_loss * use_loss_mask) / tf.reduce_sum(use_loss_mask)
36 |             loss_grads = g.gradient(average_loss, y_pred)
37 |             
38 |         if sample_weights:    
39 |             weighted_mask =  use_loss_mask * sample_weights[:,0]
40 |             average_loss =  tf.reduce_sum(listwise_loss * weighted_mask) / tf.reduce_sum(weighted_mask)
41 |             
42 |         def grad(dy): #ensure that we don't utilize gradients for ignored items 
43 |             y_true_grad = tf.zeros_like(y_true)
44 |             y_pred_grad = dy * use_mask * loss_grads
45 |             if sample_weights:
46 |                 y_pred_grad = sample_weights * y_pred_grad
47 |                 sample_weights_grad = tf.zeros_like(sample_weights)
48 |                 return y_true_grad, y_pred_grad, sample_weights_grad 
49 |             return y_true_grad, y_pred_grad 
50 | 
51 |         return average_loss, grad
52 | 
53 | 
54 |     def calc_per_list(self, y_true, y_pred):
55 |         raise NotImplementedError
56 | 


--------------------------------------------------------------------------------
/losses/loss_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | # https://stackoverflow.com/questions/37086098/does-tensorflow-map-fn-support-taking-more-than-one-tensor
 4 | 
 5 | 
 6 | def my_map(fn, arrays, dtype=tf.float32):
 7 |     # assumes all arrays have same leading dim
 8 |     indices = tf.range(tf.shape(arrays[0])[0])
 9 |     out = tf.map_fn(lambda ii: fn(*[array[ii] for array in arrays]), indices, dtype=dtype)
10 |     return out
11 | 
12 | def get_pairwise_diff_batch(a, b, a_size, b_size):
13 |     a_tile = tf.tile(tf.expand_dims(a, 1), [1, b_size, 1])
14 |     b_tile = tf.tile(tf.expand_dims(b, 2), [1, 1, a_size])
15 |     result = a_tile - b_tile
16 |     return result
17 | 
18 | 
19 | def get_truncated(y_true, y_pred, truncate_at):
20 |     if truncate_at is not None:
21 |         top_pred = tf.math.top_k(y_pred, truncate_at)
22 |         pred = top_pred.values
23 |         true_ordered_by_pred = tf.gather(y_true, top_pred.indices, batch_dims=1) 
24 |     else:
25 |         pred = y_pred
26 |         true_ordered_by_pred = y_true
27 |     return pred,true_ordered_by_pred
28 | 
29 | def masked_softmax(x, mask):
30 |     exp = tf.math.exp(x) * mask
31 |     sum = tf.expand_dims(tf.reduce_sum(exp, -1), -1)
32 |     result = tf.math.divide_no_nan(exp, sum)
33 |     return result
34 | 


--------------------------------------------------------------------------------
/losses/mean_ypred_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from aprec.losses.loss import Loss
 4 | 
 5 | #this is a dummy loss function, that does not use y_true
 6 | #It can be useful when the model itself already computes loss 
 7 | #Example = BERT masking model
 8 | 
 9 | class MeanPredLoss(Loss):
10 |     def __init__(self, num_items=None, batch_size=None, name="mean_ypred"):
11 |         super().__init__(num_items, batch_size)
12 |         self.__name__ = name 
13 |         self.less_is_better=True
14 | 
15 |     def __call__(self, y_true, y_pred):
16 |         result = tf.reduce_mean(y_pred)
17 |         return result
18 | 


--------------------------------------------------------------------------------
/losses/softmax_crossentropy.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses.loss import ListWiseLoss
 2 | import tensorflow as tf
 3 | 
 4 | class SoftmaxCrossEntropy(ListWiseLoss): #used by bert
 5 |     def __init__(self,  *args, **kwargs):
 6 |         super().__init__()
 7 |         self.__name__ = "SoftmaxCrossEntropy"
 8 |         self.less_is_better = True
 9 |     
10 |     def calc_per_list(self, y_true, y_pred):
11 |         return tf.nn.softmax_cross_entropy_with_logits(y_true, y_pred)
12 | 
13 | 


--------------------------------------------------------------------------------
/losses/top1.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from aprec.losses.loss_utils import get_pairwise_diff_batch, get_truncated, masked_softmax
 3 | 
 4 | from aprec.losses.loss import Loss
 5 | 
 6 | #TOP1 loss as defined in GRU4rec Papper https://arxiv.org/pdf/1511.06939
 7 | #We assume that there is only one positive sample. 
 8 | #If there are more then one posive, the one will be sampled randomly. 
 9 | 
10 | #setting softmax_weighting to True turns this loss into TOP1-Max loss, described in the GRU4Rrec+ Paper
11 | #https://dl.acm.org/doi/abs/10.1145/3269206.3271761
12 | class TOP1Loss(Loss):
13 |     def __init__(self, num_items=None, batch_size=None, pred_truncate=None, softmax_weighted=False):
14 |         super().__init__(num_items, batch_size)
15 |         self.pred_truncate = pred_truncate
16 |         self.softmax_weighted = softmax_weighted 
17 | 
18 |     def __call__(self, y_true, y_pred):
19 |         top_true = tf.math.top_k(y_true)
20 |         positive_true = top_true.values
21 |         positive_pred = tf.gather(y_pred, top_true.indices, batch_dims=1)
22 |         pred, true_ordered_by_pred = get_truncated(y_true, y_pred, self.pred_truncate)
23 |         diff = pred - positive_pred        
24 |         mask = tf.cast(true_ordered_by_pred < positive_true, 'float32')
25 |         sigm = tf.sigmoid(diff) * mask
26 |         square = tf.sigmoid(pred * pred) * mask
27 |         result = (sigm + square)
28 |         if self.softmax_weighted:
29 |             pred_softmax = masked_softmax(pred, mask)
30 |             result *= pred_softmax
31 |         result_mean = tf.reduce_sum(result, axis=1 ) / tf.reduce_sum(mask, axis=1)
32 |         return tf.reduce_mean(result_mean)
33 | 


--------------------------------------------------------------------------------
/losses/xendcg.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from aprec.losses.loss import Loss
 4 | 
 5 | 
 6 | class XENDCGLoss(Loss):
 7 |     def __init__(self, num_items=None, batch_size=None):
 8 |         super().__init__(num_items, batch_size)
 9 |         self.__name__ = 'xendcg'
10 | 
11 |     def __call__(self, true, pred):
12 |         eps = 1e-5
13 |         gamma = tf.random.uniform(shape=(self.batch_size, self.num_items))
14 |         true_transformed = (2 ** true) - gamma
15 |         true_transformed_sum = tf.expand_dims(tf.math.reduce_sum(true_transformed, axis=1),1)
16 |         true_probs = true_transformed / (true_transformed_sum + eps)
17 | 
18 |         pred_transformed = tf.exp(pred)
19 |         pred_transformed_sum = tf.expand_dims(tf.math.reduce_sum(pred_transformed, axis=1),1)
20 |         pred_probs = pred_transformed / (pred_transformed_sum + eps)
21 | 
22 |         result = -tf.math.reduce_sum(true_probs * tf.math.log(pred_probs), axis=1)
23 |         return result
24 | 


--------------------------------------------------------------------------------
/recommenders/BERT4rec/Readme.md:
--------------------------------------------------------------------------------
1 | This code is ported from the original BERT4rec implementation. 
2 | 
3 | Original code is taken from here: 
4 | https://github.com/FeiSun/BERT4Rec


--------------------------------------------------------------------------------
/recommenders/BERT4rec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/BERT4rec/__init__.py


--------------------------------------------------------------------------------
/recommenders/BERT4rec/util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from collections import defaultdict
 3 | 
 4 | 
 5 | def data_partition(fname):
 6 |     usernum = 0
 7 |     itemnum = 0
 8 |     User = defaultdict(list)
 9 |     user_train = {}
10 |     user_test = {}
11 |     # assume user/item index starting from 1
12 |     f = open(fname, 'r')
13 |     for line in f:
14 |         u, i = line.rstrip().split(' ')
15 |         u = int(u)
16 |         i = int(i)
17 |         usernum = max(u, usernum)
18 |         itemnum = max(i, itemnum)
19 |         User[u].append(i)
20 | 
21 |     for user in User:
22 |         user_train[user] = User[user][:-1]
23 |         user_test[user] = [User[user][-1]]
24 |     return [user_train, user_test, usernum, itemnum]
25 | 
26 | 


--------------------------------------------------------------------------------
/recommenders/BERT4rec/vocab.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | 
 3 | 
 4 | def convert_by_vocab(vocab, items):
 5 |     """Converts a sequence of [tokens|ids] using the vocab."""
 6 |     output = []
 7 |     for item in items:
 8 |         try:
 9 |             output.append(vocab[item])
10 |         except:
11 |             pass
12 |     return output
13 | 
14 | 
15 | class FreqVocab(object):
16 |     """Runs end-to-end tokenziation."""
17 | 
18 |     def __init__(self, user_to_list):
19 |         # layout of the  ulary
20 |         # item_id based on freq
21 |         # special token
22 |         # user_id based on nothing
23 |         self.counter = Counter(
24 |         )  #sorted(self.items(), key=_itemgetter(1), reverse=True)
25 |         self.user_set = set()
26 |         for u, item_list in user_to_list.items():
27 |             self.counter.update(item_list)
28 |             self.user_set.add(str(u))
29 | 
30 |         self.user_count = len(self.user_set)
31 |         self.item_count = len(self.counter.keys())
32 |         self.special_tokens = {"[pad]", "[MASK]", '[NO_USE]'}
33 |         self.token_to_ids = {}  # index begin from 1
34 |         #first items
35 |         for token, count in self.counter.most_common():
36 |             self.token_to_ids[token] = len(self.token_to_ids) + 1
37 | 
38 |         # then special tokens
39 |         for token in self.special_tokens:
40 |             self.token_to_ids[token] = len(self.token_to_ids) + 1
41 | 
42 |         # then user
43 | #         for user in self.user_set:
44 | #             self.token_to_ids[user] = len(self.token_to_ids) + 1
45 | 
46 |         self.id_to_tokens = {v: k for k, v in self.token_to_ids.items()}
47 |         self.vocab_words = list(self.token_to_ids.keys())
48 | 
49 |     def convert_tokens_to_ids(self, tokens):
50 |         return convert_by_vocab(self.token_to_ids, tokens)
51 | 
52 |     def convert_ids_to_tokens(self, ids):
53 |         return convert_by_vocab(self.id_to_tokens, ids)
54 | 
55 |     def get_vocab_words(self):
56 |         return self.vocab_words  # not in order
57 | 
58 |     def get_item_count(self):
59 |         return self.item_count
60 | 
61 |     def get_user_count(self):
62 |         return self.user_count
63 | 
64 |     def get_items(self):
65 |         return list(self.counter.keys())
66 | 
67 |     def get_users(self):
68 |         return self.user_set
69 | 
70 |     def get_special_token_count(self):
71 |         return len(self.special_tokens)
72 | 
73 |     def get_special_token(self):
74 |         return self.special_tokens
75 | 
76 |     def get_vocab_size(self):
77 |         return self.get_item_count() + self.get_special_token_count() + 1 #self.get_user_count()
78 | 


--------------------------------------------------------------------------------
/recommenders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/__init__.py


--------------------------------------------------------------------------------
/recommenders/conditional_top_recommender.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | from aprec.api.user import User
 3 | from aprec.recommenders.recommender import Recommender
 4 | 
 5 | 
 6 | class ConditionalTopRecommender(Recommender):
 7 |     """
 8 |     This recommender calculates top items based on some condition. For example, we want to recommend
 9 |     the most popular hotel in the city, not globally (for global top we can use @TopRecommender).
10 |     """
11 |     def __init__(self, conditional_field: str):
12 |         self.conditional_field: str = conditional_field
13 |         self.items_counts: dict = dict()
14 |         self.precalculated_top_items: dict = dict()
15 |         self.user_field_values: dict = dict()
16 |     
17 |     def add_user(self, user: User):
18 |         if self.conditional_field in user.cat_features:
19 |             self.user_field_values[user.user_id] = user.cat_features[self.conditional_field]
20 |         
21 | 
22 |     def add_action(self, action):
23 | 
24 |         if self.conditional_field in action.data:
25 |             field_value = action.data[self.conditional_field]
26 |         elif action.user_id in self.user_field_values:
27 |             field_value = self.user_field_values[action.user_id]
28 |         else:
29 |             field_value = "N/A"
30 |         if field_value not in self.items_counts:
31 |             self.items_counts[field_value] = Counter()
32 |         self.user_field_values[action.user_id] = field_value
33 | 
34 |         if action.item_id is not None:
35 |             self.items_counts[field_value][action.item_id] += 1
36 | 
37 |     def rebuild_model(self):
38 |         self.precalculated_top_items = {
39 |             field_value: counter.most_common() for field_value, counter in self.items_counts.items()
40 |         }
41 | 
42 |     def recommend(self, user_id, limit, features=None):
43 |         if user_id not in self.user_field_values:
44 |             field_value = "N/A"
45 |         else:
46 |             field_value = self.user_field_values[user_id]
47 |         return self.precalculated_top_items.get(field_value, [])[:limit]
48 | 
49 |     def get_similar_items(self, item_id, limit):
50 |         raise NotImplementedError
51 | 
52 |     def name(self):
53 |         return "ConditionalTopItemsRecommender"
54 | 


--------------------------------------------------------------------------------
/recommenders/constant_recommender.py:
--------------------------------------------------------------------------------
 1 | from .recommender import Recommender
 2 | 
 3 | class ConstantRecommender(Recommender):
 4 |     def __init__(self, recommendations):
 5 |         super().__init__()
 6 |         self.recommendations = recommendations
 7 | 
 8 |     def name(self):
 9 |         return "ConstantRecommender"
10 | 
11 |     def add_action(self, action):
12 |         pass
13 | 
14 |     def rebuild_model(self):
15 |         pass
16 | 
17 |     def recommend(self, user_id, limit, features=None):
18 |         return self.recommendations[:limit]
19 | 
20 |     def get_similar_items(self, item_id, limit):
21 |         return self.recommendations[:limit]
22 | 
23 |     def to_str(self):
24 |         raise(NotImplementedError)
25 | 
26 |     def from_str(self):
27 |         raise(NotImplementedError)
28 | 


--------------------------------------------------------------------------------
/recommenders/featurizer.py:
--------------------------------------------------------------------------------
 1 | from aprec.api.action import Action
 2 | from aprec.api.user import User
 3 | from aprec.api.item import Item
 4 | 
 5 | class Featurizer(object):
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     def add_action(self, action: Action):
10 |         pass
11 | 
12 |     def add_user(self, user: User):
13 |         pass
14 | 
15 |     def add_item(self, item: Item):
16 |         pass
17 | 
18 |     def get_features(self, user_id, item_id):
19 |         pass
20 | 
21 |     def build(self):
22 |         pass
23 | 


--------------------------------------------------------------------------------
/recommenders/first_order_mc.py:
--------------------------------------------------------------------------------
 1 | from argparse import Action
 2 | from collections import Counter, defaultdict
 3 | from aprec.recommenders.recommender import Recommender
 4 | 
 5 | 
 6 | class FirstOrderMarkovChainRecommender(Recommender):
 7 |     def __init__(self, cache_items=1000):
 8 |         super().__init__()
 9 |         self.user_actions = defaultdict(list)
10 |         self.cache_items = cache_items
11 | 
12 |     def add_action(self, action: Action):
13 |        self.user_actions[action.user_id].append(action.item_id)
14 | 
15 |     def rebuild_model(self):
16 |         self.item_pairs_counter = defaultdict(Counter)
17 |         for user in self.user_actions:
18 |             for i in range(1, len(self.user_actions[user])):
19 |                 src = self.user_actions[user][i-1]
20 |                 dst = self.user_actions[user][i]
21 |                 self.item_pairs_counter[src][dst] += 1
22 | 
23 |         self.cache = defaultdict(list)
24 |         for item in self.item_pairs_counter:
25 |             self.cache[item] = self.item_pairs_counter[item].most_common(self.cache_items)
26 | 
27 |     def recommend(self, user_id, limit: int, features=None):
28 |         if user_id not in self.user_actions:
29 |             return []
30 |         return self.cache[self.user_actions[user_id][-1]][:limit]
31 | 
32 |     def get_item_rankings(self):
33 |         result = {}
34 |         for request in self.items_ranking_requests:
35 |             user_result = []
36 |             user_id = request.user_id
37 |             last_item = self.user_actions[user_id][-1]
38 |             scores = self.item_pairs_counter[last_item]
39 | 
40 |             for item_id in request.item_ids:
41 |                     score = scores.get(item_id, 0) 
42 |                     user_result.append((item_id, score))
43 |             user_result.sort(key=lambda x: -x[1])
44 |             result[request.user_id] = user_result
45 |         return result
46 |     


--------------------------------------------------------------------------------
/recommenders/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/metrics/__init__.py


--------------------------------------------------------------------------------
/recommenders/metrics/ndcg.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from aprec.losses.loss import ListWiseLoss
 6 | 
 7 | class KerasNDCG(ListWiseLoss):
 8 |     def __init__(self, k):
 9 |         self.k = k
10 |         discounts = []
11 |         for i in range(1, k+1):
12 |             discounts.append(1 / math.log2(i + 1))
13 |         self.discounts = tf.cast(tf.constant(tf.expand_dims(discounts, 1)), 'float32')
14 |         self.__name__ = f"ndcg_at_{k}"
15 |         self.less_is_better = False
16 | 
17 |     def dcg(self, scores):
18 |        gain = tf.pow(2.0, scores) - 1
19 |        return gain @ self.discounts
20 | 
21 |     def calc_per_list(self, y_true, y_pred):
22 |         return self.__call__(y_true, y_pred) 
23 | 
24 |     def __call__(self, y_true, y_pred):
25 |         eps = 0.000001
26 |         top_k = tf.nn.top_k(y_pred, self.k)
27 |         gains = tf.gather(y_true, top_k.indices, batch_dims=1)
28 |         dcg_val = self.dcg(tf.cast(gains, 'float32'))
29 | 
30 |         ideal_top_k = tf.nn.top_k(y_true, self.k)
31 |         ideal_gains = tf.gather(y_true, ideal_top_k.indices, batch_dims=1)
32 |         idcg_val = self.dcg(tf.cast(ideal_gains, 'float32'))
33 |         return float(tf.reduce_mean(dcg_val / (idcg_val + eps)))
34 | 
35 | 


--------------------------------------------------------------------------------
/recommenders/metrics/success.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.keras.backend as K
 3 | 
 4 | class KerasSuccess(object):
 5 |    def __init__(self, k):
 6 |         self.k = k
 7 |         self.__name__ = f"Success_at_{k}"
 8 |         self.less_is_better = False
 9 | 
10 |    def __call__(self, y_true, y_pred):
11 |         top_k = tf.nn.top_k(y_pred, self.k)
12 |         gains = tf.gather(y_true, top_k.indices, batch_dims=1)
13 |         user_success = K.sum(gains, axis=-1)
14 |         return K.mean(user_success)
15 | 
16 | 


--------------------------------------------------------------------------------
/recommenders/random_recommender.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from aprec.recommenders.recommender import Recommender
 3 | 
 4 | 
 5 | class RandomRecommender(Recommender):
 6 |     def __init__(self):
 7 |         self.items_set = set()
 8 | 
 9 |     def add_action(self, action):
10 |         self.items_set.add(action.item_id)
11 | 
12 |     def rebuild_model(self):
13 |         self.items = list(self.items_set)
14 | 
15 |     def recommend(self, user_id, limit, features=None):
16 |         recommended_items = np.random.choice(self.items, limit, replace=False)
17 |         result = []
18 |         current_score = 1.0
19 |         for item in recommended_items:
20 |             result.append((item, current_score))
21 |             current_score *= 0.9
22 |         return result
23 | 


--------------------------------------------------------------------------------
/recommenders/sequential/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/featurizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/featurizers/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/featurizers/hashing_featurizer.py:
--------------------------------------------------------------------------------
 1 | import mmh3
 2 | 
 3 | class HashingFeaturizer(object):
 4 |     def __init__(self, num_cat_hashes=3, cat_hashes_space=1000):
 5 |         self.num_cat_hashes = num_cat_hashes
 6 |         self.cat_hashes_space = cat_hashes_space
 7 | 
 8 |     def __call__(self, obj):
 9 |         result = []
10 |         if type(obj.cat_features) == dict:
11 |             features = list(obj.cat_features.items())
12 |         else:
13 |             features = obj.cat_features
14 | 
15 |         for feature in features:
16 |             for hash_num in range(self.num_cat_hashes):
17 |                 val = f"{feature[0]}_" + str(feature[1]) + f"_hash{hash_num}"
18 |                 hash_val = mmh3.hash(val) % self.cat_hashes_space + 1
19 |                 result.append(hash_val)
20 |         return result
21 | 


--------------------------------------------------------------------------------
/recommenders/sequential/history_vectorizers/add_mask_history_vectorizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from aprec.recommenders.sequential.history_vectorizers.history_vectorizer import HistoryVectorizer
 3 | 
 4 | class AddMaskHistoryVectorizer(HistoryVectorizer):
 5 |     def __call__(self, user_actions):
 6 |         mask = self.padding_value + 1
 7 |         if len(user_actions) >= self.sequence_len - 1:
 8 |             return np.array([action[1] for action in user_actions[-self.sequence_len + 1:]] + [mask])
 9 |         else:
10 |             n_special = self.sequence_len - 1  - len(user_actions)
11 |             result_list = [self.padding_value] * n_special + [action[1] for action in user_actions] + [mask]
12 |             return np.array(result_list)
13 | 


--------------------------------------------------------------------------------
/recommenders/sequential/history_vectorizers/default_history_vectorizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from aprec.recommenders.sequential.history_vectorizers.history_vectorizer import HistoryVectorizer
 3 | 
 4 | class DefaultHistoryVectrizer(HistoryVectorizer):
 5 |     def __call__(self, user_actions):
 6 |         if len(user_actions) >= self.sequence_len:
 7 |             return np.array([action[1] for action in user_actions[-self.sequence_len:]])
 8 |         else:
 9 |             n_special = self.sequence_len - len(user_actions)
10 |             result_list = [self.padding_value] * n_special + [action[1] for action in user_actions]
11 |             return np.array(result_list)
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/recommenders/sequential/history_vectorizers/history_vectorizer.py:
--------------------------------------------------------------------------------
 1 | class HistoryVectorizer(object):
 2 |     def __init__(self) -> None:
 3 |         self.sequence_len = None
 4 |         self.padding_value = None
 5 | 
 6 |     def set_sequence_len(self, sequence_len):
 7 |         self.sequence_len =  sequence_len
 8 | 
 9 |     def set_padding_value(self, padding_value):
10 |         self.padding_value = padding_value
11 | 
12 |     def __call__(self, user_actions):
13 |         raise NotImplementedError
14 | 


--------------------------------------------------------------------------------
/recommenders/sequential/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/models/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/models/bert4rec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/models/bert4rec/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/models/bert4rec/special_items.py:
--------------------------------------------------------------------------------
1 | SPECIAL_ITEMS = {
2 |     "PAD" : 0, 
3 |     "MASK": 1,
4 |     "IGNORE": 2
5 | }


--------------------------------------------------------------------------------
/recommenders/sequential/models/positional_encodings.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class SinePositionEncoding(tf.keras.layers.Layer):
 5 |     def __init__(
 6 |         self,
 7 |         seq_length, 
 8 |         hidden_size,
 9 |         max_wavelength=10000,
10 |         **kwargs,
11 |     ):
12 |         super().__init__(**kwargs)
13 |         self.max_wavelength = max_wavelength
14 |         self.seq_length = seq_length
15 |         self.hidden_size = hidden_size
16 |     
17 |     def call(self, positions):
18 |         seq_length = self.seq_length
19 |         hidden_size = self.hidden_size
20 |         position = tf.cast(tf.range(seq_length), self.compute_dtype)
21 |         min_freq = tf.cast(1 / self.max_wavelength, dtype=self.compute_dtype)
22 |         timescales = tf.pow(
23 |             min_freq,
24 |             tf.cast(2 * (tf.range(hidden_size) // 2), self.compute_dtype)
25 |             / tf.cast(hidden_size, self.compute_dtype),
26 |         )
27 |         angles = tf.expand_dims(position, 1) * tf.expand_dims(timescales, 0)
28 |         cos_mask = tf.cast(tf.range(hidden_size) % 2, self.compute_dtype)
29 |         sin_mask = 1 - cos_mask
30 |         positional_encodings = (
31 |             tf.sin(angles) * sin_mask + tf.cos(angles) * cos_mask
32 |         )
33 |         return tf.gather(positional_encodings, positions)
34 | 
35 | 
36 | class ExpPositionEncoding(tf.keras.layers.Layer):
37 |     def __init__(self, seq_len, emb_size, init=3, **kwargs):
38 |         super().__init__(**kwargs)
39 |         self.seq_len = seq_len
40 |         self.emb_size = emb_size
41 |         pows_initalizer = tf.random_uniform_initializer(-init, init)
42 |         self.pow = tf.Variable(initial_value=pows_initalizer(shape=(emb_size, )), trainable=True)
43 |         
44 |     
45 |     def __call__(self, positions):
46 |         w = tf.exp(self.pow)
47 |         for i in range(len(positions.shape)):
48 |             w = tf.expand_dims(w, 0)
49 |         tiles = list(positions.shape) + [1]
50 |         w = tf.tile(w, tiles)
51 |         positions_norm = tf.cast((positions+1), 'float32')/(self.seq_len+1)
52 |         pos = tf.tile(tf.expand_dims(positions_norm, -1), [1] * len(positions.shape) + [self.emb_size])
53 |         return tf.pow(pos, w)
54 |     
55 | def get_pos_embedding(seq_len, emb_size, kind):
56 |     if (kind == 'default') or (kind=='learnable'):
57 |         return tf.keras.layers.Embedding(seq_len, output_dim=emb_size, dtype='float32')
58 | 
59 |     if kind == 'exp':
60 |         return ExpPositionEncoding(seq_len, emb_size)
61 |     
62 |     if kind == 'sin':
63 |         return SinePositionEncoding(seq_len, emb_size)
64 | 
65 |         
66 | 


--------------------------------------------------------------------------------
/recommenders/sequential/models/sasrec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/models/sasrec/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/models/sasrec/sasrec_multihead_attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | # this version of multihead attention was ported from the original SASRec implementation,
 4 | # as it does some non-standard transformations, including 'casuality' one
 5 | def multihead_attention(queries,
 6 |                         keys,
 7 |                         num_heads,
 8 |                         attention_layers,
 9 |                         causality=False):
10 |     Q = attention_layers["query_proj"](queries)  # (N, T_q, C)
11 |     K = attention_layers["key_proj"](keys)  # (N, T_k, C)
12 |     V = attention_layers["val_proj"](keys)  # (N, T_k, C)
13 | 
14 |     # Split and concat
15 |     Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0)  # (h*N, T_q, C/h)
16 |     K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)
17 |     V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)
18 | 
19 |     # Multiplication
20 |     outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)
21 | 
22 |     # Scale
23 |     outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
24 | 
25 |     # Key Masking
26 |     key_masks = tf.sign(tf.reduce_sum(tf.abs(keys), axis=-1))  # (N, T_k)
27 |     key_masks = tf.tile(key_masks, [num_heads, 1])  # (h*N, T_k)
28 |     key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1])  # (h*N, T_q, T_k)
29 | 
30 |     paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
31 |     outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)  # (h*N, T_q, T_k)
32 | 
33 |     # Causality = Future blinding
34 |     if causality:
35 |         diag_vals = tf.ones_like(outputs[0, :, :])  # (T_q, T_k)
36 |         tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense()  # (T_q, T_k)
37 |         masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1])  # (h*N, T_q, T_k)
38 | 
39 |         paddings = tf.ones_like(masks) * (-2 ** 32 + 1)
40 |         outputs = tf.where(tf.equal(masks, 0), paddings, outputs)  # (h*N, T_q, T_k)
41 | 
42 |     # Activation
43 |     outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)
44 | 
45 |     # Query Masking
46 |     query_masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1))  # (N, T_q)
47 |     query_masks = tf.tile(query_masks, [num_heads, 1])  # (h*N, T_q)
48 |     query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]])  # (h*N, T_q, T_k)
49 |     outputs *= query_masks  # broadcasting. (N, T_q, C)
50 |     
51 |     attention_weights = outputs
52 | 
53 |     # Dropouts
54 |     outputs = attention_layers["dropout"](outputs)
55 | 
56 |     # Weighted sum
57 |     outputs = tf.matmul(outputs, V_)  # ( h*N, T_q, C/h)
58 | 
59 |     # Restore shape
60 |     outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)  # (N, T_q, C)
61 |     return outputs, attention_weights
62 | 


--------------------------------------------------------------------------------
/recommenders/sequential/models/sequential_recsys_model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations 
 2 | from typing import List, Type
 3 | import tensorflow as tf
 4 | 
 5 | class SequentialModelConfig(object):
 6 |     def __init__(self):
 7 |         self.config = {}
 8 |     
 9 |     def as_dict(self) -> dict:
10 |         return self.config 
11 |     
12 |     def get_model_architecture(self) -> Type[SequentialRecsysModel]:
13 |         raise NotImplementedError()
14 |     
15 | 
16 | class SequentialDataParameters(object):
17 |     def __init__(self, num_users, num_items, sequence_length, batch_size) -> None:
18 |         self.num_users = num_users
19 |         self.num_items = num_items
20 |         self.sequence_length = sequence_length
21 |         self.batch_size = batch_size
22 |     
23 |     def as_dict(self):
24 |         return self.__dict__
25 | 
26 | class SequentialRecsysModel(tf.keras.Model):
27 |     @classmethod
28 |     def get_model_config_class() -> Type[SequentialModelConfig]:
29 |         raise NotImplementedError()
30 | 
31 |     def __init__(self, model_parameters: SequentialModelConfig, data_parameters: SequentialDataParameters, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 |         self.model_parameters = model_parameters
34 |         self.data_parameters = data_parameters
35 | 
36 |     def get_dummy_inputs(self) -> List[tf.Tensor]:
37 |         raise NotImplementedError()
38 | 
39 |     def fit_biases(self, train_users):
40 |         pass        
41 | 
42 |     #write tensorboard staff metrics here
43 |     def log(self):
44 |         pass
45 | 
46 |     @classmethod
47 |     def from_config(cls, config: dict):
48 |         data_parameters = SequentialDataParameters(**config['data_parameters'])
49 |         model_parameters = cls.get_model_config_class()(**config['model_parameters'])
50 |         model = cls(model_parameters, data_parameters)
51 |         dummy_data = model.get_dummy_inputs() 
52 |         model(dummy_data, training=False) #dummy call to build the model 
53 |         return model
54 |     
55 |     def get_config(self):
56 |         return get_config_dict(self.model_parameters, self.data_parameters)
57 | 
58 |     
59 | 
60 | def get_sequential_model(model_config: SequentialModelConfig, data_parameters: SequentialDataParameters):
61 |     config = get_config_dict(model_config, data_parameters)
62 |     model_arch = model_config.get_model_architecture()
63 |     return model_arch.from_config(config)
64 | 
65 | def get_config_dict(model_config, data_parameters):
66 |     model_config_dict = model_config.as_dict()
67 |     data_parameters_dict = data_parameters.as_dict()
68 |     config = {'model_parameters': model_config_dict, 'data_parameters': data_parameters_dict}
69 |     return config
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/recommenders/sequential/models/vit4rec.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.sequential.models.sequential_recsys_model import SequentialRecsysModelBuilder
 2 | from tensorflow.keras import Model
 3 | from tensorflow.keras.layers import Embedding, Dense
 4 | import tensorflow as tf
 5 | from transformers import TFViTModel
 6 | from tensorflow.keras import activations
 7 | 
 8 | 
 9 | class Vit4Rec(SequentialRecsysModelBuilder):
10 |     VIT_SIZE = 224
11 |     def __init__(self, output_layer_activation='linear', embedding_size=None, max_history_len=VIT_SIZE):    
12 |         super().__init__(output_layer_activation, embedding_size, max_history_len)
13 | 
14 |     def get_model(self):
15 |         return Vit4RecModel(self.num_items, self.output_layer_activation)
16 | 
17 | 
18 | 
19 | class Vit4RecModel(Model):
20 |     def __init__(self, n_items, activation):
21 |         super().__init__()
22 |         vit_image_size = Vit4Rec.VIT_SIZE
23 |         VIT_EMBEDDING_SIZE=768
24 |         self.embeddings_r = Embedding(n_items+1, vit_image_size)
25 |         self.embeddings_g = Embedding(n_items+1, vit_image_size)
26 |         self.embeddings_b = Embedding(n_items+1, vit_image_size)
27 |         self.projection = Dense(VIT_EMBEDDING_SIZE, input_shape=(3 * vit_image_size,))
28 |         self.n_items = n_items 
29 |         self.all_items = tf.range(0, self.n_items)
30 |         self.vit = TFViTModel.from_pretrained("google/vit-base-patch16-224")
31 |         self.output_activation = activations.get(activation)
32 |     
33 |     def call(self, inputs):
34 |         seqs = inputs[0]
35 |         red = self.embeddings_r(seqs) 
36 |         green = self.embeddings_g(seqs)
37 |         blue = self.embeddings_b(seqs)
38 |         images = tf.tanh(tf.stack([red, green, blue], axis=1))
39 |         encoded = self.vit(images).pooler_output
40 |         all_items_red = self.embeddings_r(self.all_items)
41 |         all_items_green = self.embeddings_r(self.all_items)
42 |         all_items_blue = self.embeddings_r(self.all_items)
43 |         all_embs = self.projection(tf.concat([all_items_red, all_items_green, all_items_blue], axis=1))
44 |         result = self.output_activation(tf.einsum("be, ie -> bi", encoded, all_embs))
45 |         return result
46 | 
47 | 
48 |         


--------------------------------------------------------------------------------
/recommenders/sequential/samplers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/recommenders/sequential/samplers/__init__.py


--------------------------------------------------------------------------------
/recommenders/sequential/samplers/idf_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import tensorflow_probability as tfp
 4 | import tqdm
 5 | from aprec.recommenders.sequential.samplers.sampler import NegativesSampler
 6 | from aprec.recommenders.sequential.models.sequential_recsys_model import SequentialDataParameters
 7 | 
 8 | 
 9 | class IDFSampler(NegativesSampler):
10 |     def __init__(self, data_parameters: SequentialDataParameters, num_negatives: int) -> None:
11 |         super().__init__(data_parameters, num_negatives) 
12 |         weights = tf.random.uniform((self.data_parameters.num_items,), 0, 1) 
13 |         self.reset_logits(weights)
14 |        
15 |     def reset_logits(self, weights):
16 |         probs = weights/tf.reduce_sum(weights) 
17 |         self.logits = tf.expand_dims(tf.math.log(probs/tf.reduce_sum(probs)), 0)
18 |     
19 |     def fit(self, train_users):
20 |         print("fitting idf negatives sampler...")
21 |         item_counts = np.zeros(self.data_parameters.num_items)
22 |         for user_seq in tqdm.tqdm(train_users):
23 |             for timestamp, item in user_seq:
24 |                 item_counts[item] += 1
25 |         item_counts = tf.constant(item_counts, 'float32' )
26 |         numerator = tf.expand_dims(tf.constant(len(train_users), 'float32'), 0)
27 |         EPS = 1-9
28 |         #for items with zero zero interactions the result is negative, so we take relu to make them 9
29 |         inverted_counts = tf.nn.relu(tf.math.log(tf.math.divide_no_nan(numerator, item_counts) + EPS)) 
30 |         self.reset_logits(inverted_counts)
31 | 
32 |     def __call__(self, masked_sequences, labels):
33 |         negatives = tf.random.categorical(self.logits, self.data_parameters.batch_size*self.data_parameters.sequence_length*self.num_negatives) 
34 |         negatives = tf.reshape(negatives, (self.data_parameters.batch_size, self.data_parameters.sequence_length, self.num_negatives))
35 |         return negatives
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/recommenders/sequential/samplers/popularity_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import tensorflow_probability as tfp
 4 | import tqdm
 5 | from aprec.recommenders.sequential.samplers.sampler import NegativesSampler
 6 | from aprec.recommenders.sequential.models.sequential_recsys_model import SequentialDataParameters
 7 | 
 8 | 
 9 | class PopularitySampler(NegativesSampler):
10 |     def __init__(self, data_parameters: SequentialDataParameters, num_negatives: int) -> None:
11 |         super().__init__(data_parameters, num_negatives) 
12 |         weights = tf.random.uniform((self.data_parameters.num_items,), 0, 1) 
13 |         self.reset_logits(weights)
14 |        
15 |     def reset_logits(self, weights):
16 |         probs = weights/tf.reduce_sum(weights) 
17 |         self.logits = tf.expand_dims(tf.math.log(probs/tf.reduce_sum(probs)), 0)
18 |     
19 |     def fit(self, train_users):
20 |         print("fitting popularity negatives sampler...")
21 |         item_counts = np.zeros(self.data_parameters.num_items)
22 |         for user_seq in tqdm.tqdm(train_users):
23 |             for timestamp, item in user_seq:
24 |                 item_counts[item] += 1
25 |         self.reset_logits(tf.convert_to_tensor(item_counts))
26 | 
27 |     def __call__(self, masked_sequences, labels):
28 |         negatives = tf.random.categorical(self.logits, self.data_parameters.batch_size*self.data_parameters.sequence_length*self.num_negatives) 
29 |         negatives = tf.reshape(negatives, (self.data_parameters.batch_size, self.data_parameters.sequence_length, self.num_negatives))
30 |         return negatives
31 | 
32 | 


--------------------------------------------------------------------------------
/recommenders/sequential/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from aprec.recommenders.sequential.samplers.sampler import NegativesSampler
 3 | from aprec.recommenders.sequential.models.sequential_recsys_model import SequentialDataParameters
 4 | 
 5 | 
 6 | class RandomNegativesSampler(NegativesSampler):
 7 |     def __init__(self, data_parameters: SequentialDataParameters, num_negatives:int) -> None:
 8 |         super().__init__(data_parameters, num_negatives)
 9 | 
10 |     def fit(self, training_sequences):
11 |         pass
12 | 
13 |     def __call__(self, masked_sequences, labels):
14 |         negatives = tf.random.uniform((self.data_parameters.batch_size,
15 |                                        self.data_parameters.sequence_length,
16 |                                        self.num_negatives), dtype='int64', maxval=self.data_parameters.num_items)
17 |         return negatives
18 | 


--------------------------------------------------------------------------------
/recommenders/sequential/samplers/sampler.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.sequential.models.sequential_recsys_model import SequentialDataParameters
 2 | 
 3 | 
 4 | class NegativesSampler(object):
 5 |     def __init__(self, data_parameters: SequentialDataParameters, num_negatives: int) -> None:
 6 |         self.data_parameters = data_parameters
 7 |         self.num_negatives = num_negatives
 8 | 
 9 |     def fit(self, train_users):
10 |         pass
11 | 
12 |     def __call__(self, masked_sequences, labels):
13 |         raise NotImplementedError()
14 | 
15 | 
16 | def get_negatives_sampler(sampler_name, data_parameters, num_negatives) -> NegativesSampler:
17 |     if sampler_name == "random":
18 |         from aprec.recommenders.sequential.samplers.random_sampler import RandomNegativesSampler
19 |         return RandomNegativesSampler(data_parameters, num_negatives)
20 | 
21 |     elif sampler_name == "popularity":
22 |         from aprec.recommenders.sequential.samplers.popularity_sampler import PopularitySampler
23 |         return PopularitySampler(data_parameters, num_negatives)
24 | 
25 |     elif sampler_name == "idf":
26 |         from aprec.recommenders.sequential.samplers.idf_sampler import IDFSampler
27 |         return IDFSampler(data_parameters, num_negatives)
28 |     else:
29 |         raise Exception(f"wrong negatives sampler name {sampler_name}")  


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/full_matrix_targets_builder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.sparse.csr import csr_matrix
 3 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 4 | 
 5 | 
 6 | class FullMatrixTargetsBuilder(TargetBuilder):
 7 |     def __init__(self, max_target_label=1.0, target_decay=1.0, min_target_val=0.1):
 8 |         self.max_target_label = max_target_label
 9 |         self.target_decay = target_decay
10 |         self.min_target_val = min_target_val
11 | 
12 |     def build(self, user_targets):
13 |         rows = []
14 |         cols = []
15 |         vals = []
16 |         for i in range(len(user_targets)):
17 |             cur_val = self.max_target_label 
18 |             for action_num in range(len(user_targets[i])):
19 |                 action = user_targets[i][action_num]
20 |                 rows.append(i)
21 |                 cols.append(action[1])
22 |                 vals.append(cur_val)
23 |                 cur_val *= self.target_decay
24 |                 if cur_val < self.min_target_val:
25 |                     cur_val = self.min_target_val
26 |         self.target_matrix = csr_matrix((vals, (rows, cols)), shape=(len(user_targets), self.n_items),
27 |                                                                                         dtype='float32')
28 |     def get_targets(self, start, end):
29 |         target_inputs = [] 
30 |         target_outputs = np.array(self.target_matrix[start:end].todense())
31 |         return target_inputs, target_outputs


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/items_masking_target_builder.py:
--------------------------------------------------------------------------------
 1 | from random import Random
 2 | 
 3 | import numpy as np
 4 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 5 | 
 6 | class ItemsMaskingTargetsBuilder(TargetBuilder):
 7 |     def __init__(self, random_seed=31337, 
 8 |                        relative_positions_encoding = False, 
 9 |                        ignore_value=-100): #-100 is used by default in hugginface's BERT implementation
10 |         self.random = Random()
11 |         self.random.seed(random_seed) 
12 |         self.targets = []
13 |         self.ignore_value = ignore_value
14 |         self.relative_positions_encoding = relative_positions_encoding
15 |         self.positions = []
16 | 
17 |     def build(self, user_targets):
18 |         targets = []
19 |         positions = []
20 |         for seq_len, user in user_targets:
21 |             user_positions = []
22 |             user_target = [self.ignore_value] * self.sequence_len
23 |             if self.relative_positions_encoding:
24 |                 split_pos = self.random.randint(self.sequence_len - seq_len, self.sequence_len - 1)
25 |             else:
26 |                 split_pos = self.sequence_len - 1
27 | 
28 |             for i in range(self.sequence_len):
29 |                 user_positions.append(self.sequence_len - split_pos  + i) 
30 | 
31 |             positions.append(user_positions)
32 |             for pos in user:
33 |                 user_target[pos[0]] = pos[1][1]
34 | 
35 |             targets.append(user_target)
36 | 
37 |         self.positions = np.array(positions)
38 |         self.targets = np.array(targets)
39 | 
40 | 
41 | 
42 |     def get_targets(self, start, end):
43 |         return [self.targets[start:end], self.positions[start:end]], self.targets[start:end]
44 | 


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/negative_per_positive_target.py:
--------------------------------------------------------------------------------
 1 | from random import Random
 2 | 
 3 | import numpy as np
 4 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 5 | 
 6 | 
 7 | class NegativePerPositiveTargetBuilder(TargetBuilder):
 8 |     def __init__(self, sequence_len=64, random_seed=31337):
 9 |         self.random = Random()
10 |         self.random.seed(random_seed) 
11 |         self.sequence_len = sequence_len
12 | 
13 |     def build(self, user_targets):
14 |         self.inputs = []
15 |         self.targets = []
16 |         for i in range(len(user_targets)):
17 |             user_inputs = []
18 |             targets_for_user = [] 
19 |             seq = user_targets[i]
20 |             if len(seq) < self.sequence_len:
21 |                 user_inputs += [[self.n_items, self.n_items]] * (self.sequence_len - len(seq))
22 |                 targets_for_user += [[-1.0, -1.0]] * (self.sequence_len - len(seq))
23 |             for target in seq[-self.sequence_len:]:
24 |                 positive = target[1]
25 |                 negative = self.random.randint(0, self.n_items - 1)
26 |                 while negative == positive:
27 |                     negative = self.random.randint(0, self.n_items - 1)
28 |                 user_inputs.append([positive, negative])
29 |                 targets_for_user.append([1.0, 0.0])
30 |             self.inputs.append(user_inputs)
31 |             self.targets.append(targets_for_user)
32 |         self.inputs = np.array(self.inputs)
33 |         self.targets = np.array(self.targets)
34 |     
35 |     def get_targets(self, start, end):
36 |         return [self.inputs[start:end]], self.targets[start:end]
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/positives_only_targets_builder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 3 | 
 4 | 
 5 | class PositvesOnlyTargetBuilder(TargetBuilder):
 6 |     def __init__(self, max_targets_per_user = 10):
 7 |         self.max_targets_per_user = max_targets_per_user
 8 | 
 9 |     def build(self, user_targets):
10 |         result = []
11 |         for i in range(len(user_targets)):
12 |             seq = np.array([item[1] for item in user_targets[i]])
13 |             if len(seq) > self.max_targets_per_user:
14 |                 seq = np.random.choice(seq, self.max_targets_per_user)
15 |             if len(seq) < self.max_targets_per_user:
16 |                 seq = np.pad(seq, (0,  self.max_targets_per_user - len(seq)), constant_values=self.n_items)
17 |             result.append(seq)
18 |         self.target_matrix = np.array(result)
19 |         pass
20 |         
21 |     def get_targets(self, start, end):
22 |         target_outputs = self.target_matrix[start:end]
23 |         return [target_outputs], target_outputs


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/positives_sequence_target_builder.py:
--------------------------------------------------------------------------------
 1 | from random import Random
 2 | 
 3 | import numpy as np
 4 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 5 | 
 6 | 
 7 | class PositivesSequenceTargetBuilder(TargetBuilder):
 8 |     def __init__(self, sequence_len=64):
 9 |         self.random = Random()
10 |         self.sequence_len = sequence_len
11 | 
12 |     def build(self, user_targets):
13 |         self.targets = []
14 |         for i in range(len(user_targets)):
15 |             targets_for_user = [] 
16 |             seq = user_targets[i]
17 |             if len(seq) < self.sequence_len:
18 |                 targets_for_user += [-100.0] * (self.sequence_len - len(seq))
19 |             for target in seq[-self.sequence_len:]:
20 |                 targets_for_user.append(target[1])
21 |             self.targets.append(targets_for_user)
22 |         self.targets = np.array(self.targets, 'int64')
23 |     
24 |     def get_targets(self, start, end):
25 |         return [self.targets[start:end]], self.targets[start:end]
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/sampled_matrix_target_builder.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | from aprec.recommenders.sequential.target_builders.target_builders import TargetBuilder
 5 | 
 6 | 
 7 | class SampledMatrixBuilder(TargetBuilder):
 8 |     def __init__(self, max_target_label=1.0, target_decay=1.0, min_target_val=0.1, n_samples=101):
 9 |         self.max_target_label = max_target_label
10 |         self.target_decay = target_decay
11 |         self.min_target_val = min_target_val
12 |         self.n_samples = n_samples
13 | 
14 |     def build(self, user_targets):
15 |         all_items = list(range(self.n_items))
16 |         self.target_matrix = []
17 |         self.target_ids = []
18 |         for i in range(len(user_targets)): 
19 |             targets = []
20 |             target_ids =  []
21 |             sampled = set()
22 |             cur_val = self.max_target_label 
23 |             for action_num in range(len(user_targets[i])):
24 |                 action = user_targets[i][action_num]           
25 |                 targets.append(cur_val)
26 |                 target_ids.append(action[1])
27 |                 sampled.add(action[1])
28 |                 cur_val *= self.target_decay
29 |                 if cur_val < self.min_target_val:
30 |                     cur_val = self.min_target_val
31 |                 sampled.add(action[1])
32 |             while(len(targets) < self.n_samples):
33 |                 negatives = np.random.choice(all_items, self.n_samples - len(targets))
34 |                 for item_id in negatives:
35 |                     if item_id not in sampled:
36 |                         sampled.add(item_id)
37 |                         target_ids.append(item_id)
38 |                         targets.append(0.0)
39 |             targets_with_ids = list(zip(targets, target_ids))
40 |             random.shuffle(targets_with_ids)
41 |             targets, target_ids = zip(*targets_with_ids)
42 |             self.target_matrix.append(targets)
43 |             self.target_ids.append(target_ids)
44 |         self.target_matrix = np.array(self.target_matrix)
45 |         self.target_ids = np.array(self.target_ids)
46 | 
47 |     def get_targets(self, start, end):
48 |         target_inputs = [self.target_ids[start:end]]
49 |         target_outputs = self.target_matrix[start:end]
50 |         return target_inputs, target_outputs
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/recommenders/sequential/target_builders/target_builders.py:
--------------------------------------------------------------------------------
 1 | class TargetBuilder(object):
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def set_n_items(self, n):
 6 |         self.n_items = n
 7 |     
 8 |     def set_sequence_len(self, sequence_len):
 9 |         self.sequence_len = sequence_len
10 |     
11 |     def build(self, user_targets):
12 |         raise NotImplementedError()
13 |     
14 |     def set_train_sequences(self, train_sequences):
15 |         pass
16 | 
17 |     def get_targets(self, start, end):
18 |         raise NotImplementedError()
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/fair_item_masking.py:
--------------------------------------------------------------------------------
 1 | from ast import List
 2 | from collections import Counter
 3 | import copy
 4 | from typing import Dict
 5 | 
 6 | import numpy as np
 7 | 
 8 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 9 | 
10 | class FairItemMasking(TargetSplitter):
11 |     def __init__(self, masking_prob = 0.2,
12 |                  max_predictions_per_seq = 20
13 |                  
14 |                  ) -> None:
15 |         super().__init__()
16 |         self.masking_prob = masking_prob
17 |         self.max_predictions_per_seq = max_predictions_per_seq
18 | 
19 |     
20 |     def set_item_attributes(self, item_attributes: List[int]):
21 |         self.item_attributes = item_attributes
22 |         self.temperature = np.ones(len(self.item_attributes))
23 |         
24 |         
25 |     def set_actions(self, actions):
26 |         attribute_counts = Counter()
27 |         for action in actions:
28 |             attribute_counts[action.item_id]
29 |             
30 |         
31 |         
32 | 
33 |     def split(self, sequence):
34 |         seq = sequence[-self.seqence_len: ]
35 |         seq_len = len(seq)
36 | 
37 |         if len(seq) < self.seqence_len:
38 |             seq = [(-1, self.num_items)] * (self.seqence_len - len(seq)) + seq
39 | 
40 |         if not self.force_last:
41 |             n_masks = min(self.max_predictions_per_seq,
42 |                             max(1, int(round(len(sequence) * self.masking_prob))))
43 |             sample_range = list(range(len(seq) - seq_len, len(seq)))
44 |             rss_vals = np.array([self.recency_importance(self.seqence_len, pos) for pos in sample_range])
45 |             rss_vals_sum = np.sum(rss_vals)
46 |             probs = rss_vals / rss_vals_sum
47 |             mask_positions = self.random.choice(sample_range, n_masks, p=probs, replace=False)
48 |         else:
49 |             n_masks = 1
50 |             mask_positions = [len(seq) - 1]
51 |         train = copy.deepcopy(seq)
52 |         labels = []
53 |         mask_token = self.num_items + 1 #self.num_items is used for padding
54 |         for position in mask_positions:
55 |             labels.append((position, seq[position]))
56 |             train[position] = (train[position][0], mask_token)
57 |         return train, (seq_len, labels)
58 |     


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/items_masking.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | import numpy as np
 4 | 
 5 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 6 | 
 7 | class ItemsMasking(TargetSplitter):
 8 |     def __init__(self,  masking_prob = 0.2,
 9 |                  max_predictions_per_seq = 20,
10 |                  random_seed = 31337, 
11 |                  force_last=False, 
12 |                  recency_importance = lambda n, k: 1, 
13 |                  tuning_samples_prob = 0.0
14 |                  ) -> None:
15 |         super().__init__()
16 |         self.masking_prob = masking_prob
17 |         self.max_predictions_per_seq = max_predictions_per_seq
18 |         self.random = np.random.Generator(np.random.PCG64(np.random.SeedSequence(random_seed)))
19 |         self.tuning_random = np.random.Generator(np.random.PCG64(np.random.SeedSequence(random_seed+1)))
20 |         self.force_last = force_last 
21 |         self.recency_importance = recency_importance
22 |         self.tuning_samples_prob = tuning_samples_prob
23 | 
24 |     def split(self, sequence):
25 |         seq = sequence[-self.seqence_len: ]
26 |         seq_len = len(seq)
27 | 
28 |         if len(seq) < self.seqence_len:
29 |             seq = [(-1, self.num_items)] * (self.seqence_len - len(seq)) + seq
30 | 
31 |         if not self.force_last and self.tuning_random.random() > self.tuning_samples_prob:
32 |             n_masks = min(self.max_predictions_per_seq,
33 |                             max(1, int(round(len(sequence) * self.masking_prob))))
34 |             sample_range = list(range(len(seq) - seq_len, len(seq)))
35 |             rss_vals = np.array([self.recency_importance(self.seqence_len, pos) for pos in sample_range])
36 |             rss_vals_sum = np.sum(rss_vals)
37 |             probs = rss_vals / rss_vals_sum
38 |             mask_positions = self.random.choice(sample_range, n_masks, p=probs, replace=False)
39 |         else:
40 |             n_masks = 1
41 |             mask_positions = [len(seq) - 1]
42 |         train = copy.deepcopy(seq)
43 |         labels = []
44 |         mask_token = self.num_items + 1 #self.num_items is used for padding
45 |         for position in mask_positions:
46 |             labels.append((position, seq[position]))
47 |             train[position] = (train[position][0], mask_token)
48 |         return train, (seq_len, labels)
49 |     


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/last_item_splitter.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 3 | 
 4 | 
 5 | class SequenceContinuation(TargetSplitter):
 6 |     def __init__(self, add_cls=False) -> None:
 7 |         super().__init__()
 8 |         self.add_cls = add_cls
 9 |     
10 |     def split(self, sequence, max_targets=1):
11 |         if len(sequence) == 0:
12 |             return [], []
13 |         train = sequence[:-max_targets]
14 |         
15 |         target = sequence[-max_targets:]
16 | 
17 |         if self.add_cls:
18 |             cls_token = self.num_items + 1 #self.num_items is used for padding
19 |             for t in target:
20 |                 cls = (t[0], cls_token)
21 |                 train.append(cls)
22 |         return train, target


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/random_fraction_splitter.py:
--------------------------------------------------------------------------------
 1 | from random import Random
 2 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 3 | 
 4 | 
 5 | class RandomFractionSplitter(TargetSplitter):
 6 |     def __init__(self, min_targets=1, random_seed=31337) -> None:
 7 |         super().__init__()
 8 |         self.min_targets = min_targets
 9 |         self.random = Random(random_seed)
10 |     
11 |     def split(self, sequence):
12 |         if(len(sequence) == 0):
13 |             return [], []
14 |         target_actions = self.random.randint(1, max(len(sequence) -1, 1)) 
15 |         train_actions = len(sequence) - target_actions
16 |         return sequence[:train_actions], sequence[-target_actions:]
17 | 
18 | 


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/random_splitter.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 3 | 
 4 | 
 5 | class RandomSplitter(TargetSplitter):
 6 |     def __init__(self, seed=31337, target_chance = 0.25) -> None:
 7 |         self.random = random.Random()
 8 |         self.random.seed(seed)
 9 |         self.target_chance = target_chance
10 |         super().__init__()
11 |     
12 |     def split(self, sequence):
13 |         input = []
14 |         target = []
15 |         for item in sequence:
16 |             if self.random.random() < self.target_chance:
17 |                 target.append(item)
18 |             else:
19 |                 input.append(item)
20 |         return input, target
21 | 


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/recency_sequence_sampling.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 3 | import numpy as np
 4 | 
 5 | def exponential_importance(p):
 6 |     return lambda n, k: p**(n - k)
 7 | 
 8 | def linear_importance(a=1, b=1):
 9 |     return lambda n, k: a*k+b
10 | 
11 | def pow_importance(p, c=0):
12 |     def func(n, k):
13 |         return math.pow((k+1)/(n+1), math.exp(p)) +c
14 |     return func
15 | 
16 | class RecencySequenceSampling(TargetSplitter):
17 |     #recency importance is a function that defines the chances of k-th element 
18 |     #to be sampled as a positive in the sequence of the length n
19 | 
20 |     def __init__(self, max_pct, recency_importance=exponential_importance(0.8), seed=31337, add_cls = False) -> None:
21 |         super().__init__()
22 |         self.max_pct = max_pct
23 |         self.recency_iportnace = recency_importance
24 |         self.random = np.random.default_rng(seed=seed)
25 |         self.add_cls = add_cls
26 | 
27 |     
28 |     def split(self, sequence):
29 |         if len(sequence) == 0:
30 |             return [], []
31 |         target = set() 
32 |         cnt = max(1, int(len(sequence)*self.max_pct))
33 |         f = lambda j: self.recency_iportnace(len(sequence), j)
34 |         f_vals = np.array([f(i) for i in range(len(sequence))])
35 |         f_sum = sum(f_vals)
36 |         sampled_idx = set(self.random.choice(range(len(sequence)), cnt, p=f_vals/f_sum, replace=True))
37 |         input = list() 
38 |         for i in range(len(sequence)):
39 |             if i not in sampled_idx:
40 |                 input.append(sequence[i])
41 |             else:
42 |                 target.add(sequence[i])
43 |         
44 |         if self.add_cls:
45 |             if len(input) > 0:
46 |                 last_input_timestamp = input[-1][0]
47 |             else:
48 |                 last_input_timestamp = 1 
49 |             cls_token = self.num_items + 1 #self.num_items is used for padding
50 |             input.append((last_input_timestamp + 1, cls_token))
51 |         return input, list(target)
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/shifted_sequence_splitter.py:
--------------------------------------------------------------------------------
 1 | from random import Random
 2 | from aprec.recommenders.sequential.targetsplitters.targetsplitter import TargetSplitter
 3 | 
 4 | 
 5 | class ShiftedSequenceSplitter(TargetSplitter):
 6 |     def __init__(self) -> None:
 7 |         super().__init__()
 8 |     
 9 |     def split(self, sequence):
10 |         train = sequence[-self.seqence_len - 1: -1]
11 |         label = sequence[-len(train):]
12 |         return train, label
13 |     
14 | 


--------------------------------------------------------------------------------
/recommenders/sequential/targetsplitters/targetsplitter.py:
--------------------------------------------------------------------------------
 1 | class TargetSplitter(object):
 2 |     def __init__(self) -> None:
 3 |         self.num_items = None 
 4 |         self.seqence_len = None
 5 | 
 6 |     def split(self, sequence):
 7 |         return NotImplementedError() 
 8 | 
 9 |     def set_num_items(self, num_items):
10 |         self.num_items = num_items
11 |     
12 |     def set_actions(self, actions):
13 |         pass #most target splitters do not require actions beforehand. 
14 | 
15 |     def set_sequence_len(self, sequence_len):
16 |         self.seqence_len = sequence_len
17 | 


--------------------------------------------------------------------------------
/recommenders/top_recommender.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.recommender import Recommender
 2 | from collections import Counter
 3 | 
 4 | class TopRecommender(Recommender):
 5 |     def __init__(self, recency=1.0): #recency parameter controls how many actions are considered out of all actions
 6 |         super().__init__()
 7 |         self.items_counter=Counter()
 8 |         self.item_scores = {}
 9 |         self.actions = []
10 |         self.recency = recency
11 | 
12 |     def add_action(self, action):
13 |         self.actions.append(action)
14 | 
15 |     def rebuild_model(self):
16 |         self.actions.sort(key=lambda x: x.timestamp)
17 |         n_actions = int(len(self.actions) * self.recency)
18 |         for action in self.actions[-n_actions:]:
19 |             self.items_counter[action.item_id] += 1
20 |         self.actions = []
21 |         self.most_common = self.items_counter.most_common()
22 |         for item, score in self.most_common:
23 |             self.item_scores[item] = score
24 | 
25 |     def recommend(self, user_id, limit, features=None):
26 |         return self.most_common[:limit]
27 | 
28 |     def get_metadata(self):
29 |         return {"top 20 items":  self.most_common[:20]}
30 | 
31 | 
32 |     def get_similar_items(self, item_id, limit):
33 |         return self.most_common[:limit]
34 | 
35 |     def name(self):
36 |         return "TopItemsRecommender"
37 | 
38 |     def get_item_rankings(self):
39 |         result = {}
40 |         for request in self.items_ranking_requests:
41 |             request_result = []
42 |             for item_id in request.item_ids:
43 |                 score = self.item_scores.get(item_id, 0)
44 |                 request_result.append((item_id, score))
45 |             request_result.sort(key=lambda x: -x[1])
46 |             result[request.user_id] = request_result
47 |         return result
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | log_tensorboard/
2 | saved/
3 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | #physical_devices = tf.config.list_physical_devices('GPU') 
4 | #tf.config.experimental.set_memory_growth(physical_devices[0], True)
5 | 


--------------------------------------------------------------------------------
/tests/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/datasets/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/booking_dataset_reference_actions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "user_id": "1000027_1",
 4 |         "item_id": "8183",
 5 |         "data": {
 6 |             "user_id": "1000027",
 7 |             "device_class": "desktop",
 8 |             "affiliate_id": "7168",
 9 |             "hotel_country": "Gondal",
10 |             "booker_country": "Elbonia",
11 |             "checkin_date": 1471046400.0,
12 |             "checkout_date": 1471132800.0,
13 |             "is_control": false
14 |         },
15 |         "timestamp": 1471046400.0
16 |     },
17 |     {
18 |         "user_id": "1000027_1",
19 |         "item_id": "15626",
20 |         "data": {
21 |             "user_id": "1000027",
22 |             "device_class": "desktop",
23 |             "affiliate_id": "7168",
24 |             "hotel_country": "Gondal",
25 |             "booker_country": "Elbonia",
26 |             "checkin_date": 1471132800.0,
27 |             "checkout_date": 1471305600.0,
28 |             "is_control": false
29 |         },
30 |         "timestamp": 1471132800.0
31 |     },
32 |     {
33 |         "user_id": "1000066_2",
34 |         "item_id": "56430",
35 |         "data": {
36 |             "user_id": "1000066",
37 |             "device_class": "desktop",
38 |             "affiliate_id": "9924",
39 |             "hotel_country": "Urkesh",
40 |             "booker_country": "Gondal",
41 |             "checkin_date": 1469059200.0,
42 |             "checkout_date": 1469232000.0,
43 |             "is_control": true
44 |         },
45 |         "timestamp": 1469059200.0
46 |     },
47 |     {
48 |         "user_id": "1000066_2",
49 |         "item_id": "41971",
50 |         "data": {
51 |             "user_id": "1000066",
52 |             "device_class": "desktop",
53 |             "affiliate_id": "9924",
54 |             "hotel_country": "Urkesh",
55 |             "booker_country": "Gondal",
56 |             "checkin_date": 1469232000.0,
57 |             "checkout_date": 1469404800.0,
58 |             "is_control": true
59 |         },
60 |         "timestamp": 1469232000.0
61 |     }
62 | ]
63 | 


--------------------------------------------------------------------------------
/tests/datasets/mts_kion_reference_actions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "user_id": "176549",
 4 |         "item_id": "9506",
 5 |         "data": {},
 6 |         "timestamp": 1620691200
 7 |     },
 8 |     {
 9 |         "user_id": "699317",
10 |         "item_id": "1659",
11 |         "data": {},
12 |         "timestamp": 1622246400
13 |     },
14 |     {
15 |         "user_id": "656683",
16 |         "item_id": "7107",
17 |         "data": {},
18 |         "timestamp": 1620518400
19 |     },
20 |     {
21 |         "user_id": "864613",
22 |         "item_id": "7638",
23 |         "data": {},
24 |         "timestamp": 1625443200
25 |     },
26 |     {
27 |         "user_id": "964868",
28 |         "item_id": "9506",
29 |         "data": {},
30 |         "timestamp": 1619740800
31 |     },
32 |     {
33 |         "user_id": "1032142",
34 |         "item_id": "6686",
35 |         "data": {},
36 |         "timestamp": 1620864000
37 |     },
38 |     {
39 |         "user_id": "1016458",
40 |         "item_id": "354",
41 |         "data": {},
42 |         "timestamp": 1628899200
43 |     },
44 |     {
45 |         "user_id": "884009",
46 |         "item_id": "693",
47 |         "data": {},
48 |         "timestamp": 1628035200
49 |     },
50 |     {
51 |         "user_id": "648682",
52 |         "item_id": "1449",
53 |         "data": {},
54 |         "timestamp": 1623542400
55 |     },
56 |     {
57 |         "user_id": "203219",
58 |         "item_id": "13582",
59 |         "data": {},
60 |         "timestamp": 1629590400
61 |     }
62 | ]


--------------------------------------------------------------------------------
/tests/datasets/test_beauty_dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from unittest import TestCase
 3 | import unittest
 4 | 
 5 | 
 6 | class TestBeautyDataset(TestCase):
 7 |     def test_beauty_dataset(self):
 8 |         from aprec.datasets.dataset_stats import dataset_stats
 9 |         from aprec.datasets.beauty import get_beauty_dataset
10 |         from aprec.datasets.dataset_utils import filter_cold_users
11 | 
12 |         dataset = filter_cold_users(get_beauty_dataset(), 5)
13 |         result = dataset_stats(dataset, metrics=['num_users', 'num_items', 'num_interactions'])
14 |         print(result)
15 | 
16 | if __name__ == "__main__":
17 |     unittest.main()


--------------------------------------------------------------------------------
/tests/datasets/test_bert4rec_datasets.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestBert4recDatasets(unittest.TestCase):
 4 |     def test_bert4rec_dataset(self):
 5 |         import json
 6 |         from aprec.datasets.dataset_stats import dataset_stats
 7 |         from aprec.datasets.bert4rec_datasets import get_bert4rec_dataset
 8 | 
 9 |         for dataset_name in ["beauty", "steam", "ml-1m"]:
10 |             print(f"analyzing dataset {dataset_name}")
11 |             dataset = get_bert4rec_dataset(dataset_name)
12 |             stats = dataset_stats(dataset, metrics=['num_users', 'num_items', 'num_interactions'])
13 |             print(json.dumps(stats, indent=4))
14 | 
15 | if __name__ == "__main__":
16 |     unittest.main()


--------------------------------------------------------------------------------
/tests/datasets/test_booking_dataset.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import TestCase
 3 | 
 4 | class TestBookingDatset(TestCase):
 5 |     def test_booking_download(self):
 6 |         from aprec.datasets.booking import download_booking_train, download_booking_test
 7 |         from aprec.utils.os_utils import file_md5
 8 | 
 9 |         #download train file
10 |         result_file = download_booking_train()
11 |         booking_file_md5 = file_md5(result_file)
12 |         self.assertEqual(booking_file_md5, "4f343b12d76b28ec0f1899e4083a72a8")
13 |  
14 |         #download train file
15 |         result_file = download_booking_test()
16 |         booking_file_md5 = file_md5(result_file)
17 |         self.assertEqual(booking_file_md5, "2d068bea795cc4b798422ad1d80bd0c4")
18 | 
19 |     def test_booking_dataset(self):
20 |         import os.path
21 | 
22 |         from aprec.datasets.booking import get_booking_dataset
23 |         import json
24 | 
25 |         local_path = os.path.abspath(os.path.dirname(__file__))
26 |         with open(os.path.join(local_path, "booking_dataset_reference_actions.json")) as input:
27 |             reference_actions = json.load(input)
28 |         actions_dataset = get_booking_dataset(max_actions_per_file=2, unix_timestamps=True)[0]
29 |         actions = [json.loads(action.to_json()) for action in actions_dataset]
30 |         self.assertEqual(actions, reference_actions)
31 | 
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()
36 |         
37 | 


--------------------------------------------------------------------------------
/tests/datasets/test_datasets_register.py:
--------------------------------------------------------------------------------
 1 | from aprec.datasets.datasets_register import DatasetsRegister
 2 | import unittest
 3 | 
 4 | class TestDatasetsRegister(unittest.TestCase):
 5 |     def test_register(self):
 6 |         register = DatasetsRegister()
 7 |         dataset = register["ml-100k"]()
 8 |         self.assertEquals(len(dataset), 100000)
 9 | 
10 | if __name__ == "__main__":
11 |     unittest.main()


--------------------------------------------------------------------------------
/tests/datasets/test_filter_cold_users.py:
--------------------------------------------------------------------------------
 1 | import unittest 
 2 | 
 3 | class TestFilterColdUsers(unittest.TestCase):
 4 |     def test_filter_cold_users(self):
 5 |         from aprec.api.action import  Action
 6 |         from aprec.datasets.dataset_utils import filter_cold_users
 7 |         actions = [Action(item_id=1, user_id=1, timestamp=1), 
 8 |                    Action(item_id=2, user_id=1, timestamp=2), 
 9 |                    Action(item_id=1, user_id=2, timestamp=1)]
10 |         result = list(filter_cold_users(actions, 2))
11 |         self.assertEquals(str(result), "[Action(uid=1, item=1, ts=1), Action(uid=1, item=2, ts=2)]") 
12 | 
13 | if __name__ == "__main__":
14 |     unittest.main()


--------------------------------------------------------------------------------
/tests/datasets/test_gowalla_dataset.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | reference_actions = [{'user_id': '0', 'item_id': '22847', 'data': {}, 'timestamp': 1287532527.0},
 4 |                      {'user_id': '0', 'item_id': '420315', 'data': {}, 'timestamp': 1287440263.0}, 
 5 |                      {'user_id': '0', 'item_id': '316637', 'data': {}, 'timestamp': 1287358923.0}, 
 6 |                      {'user_id': '0', 'item_id': '16516', 'data': {}, 'timestamp': 1287343565.0}, 
 7 |                      {'user_id': '0', 'item_id': '5535878', 'data': {}, 'timestamp': 1287255042.0}]
 8 | 
 9 | class TestGowallaDataset(unittest.TestCase):
10 |     def test_gowalla_dataset(self):
11 |         import json
12 |         from aprec.datasets.gowalla import get_gowalla_dataset 
13 |         actions = [json.loads(action.to_json()) for action in get_gowalla_dataset(5)]
14 |         self.assertEquals(actions, reference_actions)


--------------------------------------------------------------------------------
/tests/datasets/test_mts_kion_dataset.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestMtsKionDataset(unittest.TestCase):
 4 |     def test_get_mts_kion(self):
 5 |         import os
 6 |         import json
 7 |         from aprec.datasets.mts_kion import get_mts_kion_dataset
 8 | 
 9 |         local_path = os.path.abspath(os.path.dirname(__file__))
10 |         with open(os.path.join(local_path, "mts_kion_reference_actions.json")) as reference_file:
11 |             reference_data = json.load(reference_file)
12 |         data = [json.loads(action.to_json()) for action in get_mts_kion_dataset(max_actions=10)]
13 |         self.assertEqual(reference_data, data)
14 | 
15 | 
16 |     def test_get_submission_user_ids(self):
17 |         from aprec.datasets.mts_kion import get_submission_user_ids
18 |         submission_users = get_submission_user_ids()
19 |         self.assertEqual(submission_users[:10],
20 |                          ['3', '11', '29', '30', '33', '39', '46', '47', '51', '61'])
21 | 
22 |     def test_get_users(self):
23 |         from aprec.datasets.mts_kion import get_users
24 | 
25 |         users = get_users()
26 |         pass
27 | 
28 |     def test_get_items(self):
29 |         from aprec.datasets.mts_kion import get_items
30 | 
31 |         items = get_items()
32 |         self.assertEquals(items[0].cat_features[:2], [('content_type', 'film'), ('age_rating', '16.0')])
33 |         self.assertEquals(len(items), 15963)
34 | 
35 | 
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/tests/datasets/test_netflix.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | import unittest
 3 | from aprec.datasets.datasets_register import DatasetsRegister
 4 | 
 5 | 
 6 | class TestNetflixDataset(TestCase):
 7 |     def test_netflix(self):
 8 |         dataset = DatasetsRegister()["netflix_fraction_0.001"]()
 9 |         self.assertEqual(len(dataset), 97030)
10 | 
11 | if __name__ == "__main__":
12 |     unittest.main()    
13 | 


--------------------------------------------------------------------------------
/tests/datasets/test_yelp_dataset.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import unittest
 3 | 
 4 | reference_actions =  [{'user_id': 430450, 'item_id': 91854, 'data': {}, 'timestamp': 1108524202}, 
 5 |                       {'user_id': 430450, 'item_id': 137692, 'data': {}, 'timestamp': 1108524579}, 
 6 |                       {'user_id': 430450, 'item_id': 105383, 'data': {}, 'timestamp': 1108526786},
 7 |                       {'user_id': 6662, 'item_id': 30082, 'data': {}, 'timestamp': 1109696237}, 
 8 |                       {'user_id': 6662, 'item_id': 105487, 'data': {}, 'timestamp': 1109696377},
 9 |                       {'user_id': 6662, 'item_id': 75311, 'data': {}, 'timestamp': 1109697913}, 
10 |                       {'user_id': 6662, 'item_id': 124, 'data': {}, 'timestamp': 1109699235}, 
11 |                       {'user_id': 6662, 'item_id': 15422, 'data': {}, 'timestamp': 1109699966},
12 |                       {'user_id': 6662, 'item_id': 76445, 'data': {}, 'timestamp': 1109705615},
13 |                        {'user_id': 6662, 'item_id': 76263, 'data': {}, 'timestamp': 1109705744}]
14 | 
15 | class TestYelpDataset(unittest.TestCase):
16 |     def test_yelp_dataset(self):
17 |         from aprec.datasets.yelp import get_yelp_dataset
18 |         dataset = [json.loads(action.to_json()) for action in get_yelp_dataset(max_actions=10)]
19 |         self.assertEqual(reference_actions, dataset)
20 | 
21 | if __name__ == "__main__":
22 |     unittest.main()


--------------------------------------------------------------------------------
/tests/generate_actions.py:
--------------------------------------------------------------------------------
 1 | def generate_actions(n):
 2 |     from math import sin, cos
 3 |     from aprec.api.action import Action
 4 |     max_users = n / 3
 5 |     max_timestamp = n / 2
 6 |     result = []
 7 |     actions_set = set() 
 8 |     i = 0
 9 |     while len(result) < n:
10 |         user_id = int((sin(i) + 1)/2 * max_users) 
11 |         item_id = int((cos(i) + 1)/2 * max_users) 
12 |         timestamp = int((sin(i) ** 2) * max_timestamp)  
13 |         if ((user_id, item_id) not in actions_set):
14 |             actions_set.add((user_id, item_id))
15 |             result.append(Action(user_id, item_id, timestamp))
16 |         i += 1
17 |     return result
18 | 


--------------------------------------------------------------------------------
/tests/lossess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/lossess/__init__.py


--------------------------------------------------------------------------------
/tests/lossess/test_bce_loss.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestBCELoss(unittest.TestCase):
 4 |     def test_bce_loss(self):
 5 |         from tensorflow.keras.losses import BinaryCrossentropy
 6 |         import tensorflow as tf
 7 |         from aprec.tests.lossess.bce_bad_sample import y_true as bad_y_true
 8 |         from aprec.tests.lossess.bce_bad_sample import y_pred as bad_y_pred
 9 |         from aprec.losses.bce import BCELoss
10 |         loss = float(BCELoss()(bad_y_true, bad_y_pred))
11 |         print(loss)
12 | 
13 |         y_true = tf.constant([-1., -1, -1, -1])
14 |         y_pred = [-50.0, -50, -50, -50]
15 |         loss = float(BCELoss()(y_true, y_pred))
16 |         self.assertAlmostEqual(loss, 0.0)
17 | 
18 |         y_true = tf.constant([1, 0, 1, 0])
19 |         y_pred = [0.1, 0.2, 0.3, 0.4]
20 |         loss = float(BCELoss()(y_true, y_pred))
21 |         keras_loss = float(BinaryCrossentropy(from_logits=True)(y_true, y_pred))
22 |         self.assertAlmostEqual(loss, keras_loss, 5)
23 | 
24 |         y_true = tf.constant([1., 0, 1, 0])
25 |         y_pred = [-50.0, -50, -50, -50]
26 |         loss = float(BCELoss()(y_true, y_pred))
27 |         keras_loss = float(BinaryCrossentropy(from_logits=True)(y_true, y_pred))
28 |         self.assertAlmostEqual(loss, 18.420679092407227)
29 |         pass
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     unittest.main()
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/lossess/test_climf_loss.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | 
 5 | class TestCLIMFLoss(unittest.TestCase):
 6 |     def test_climf_loss(self):
 7 |         from aprec.losses.climf import CLIMFLoss
 8 |         import tensorflow.keras.backend as K
 9 |         climf_loss = CLIMFLoss(4, 2, 3)
10 |         val = climf_loss(K.constant([[0, 0, 1, 1],
11 |                                    [0, 0, 1, 1]]),
12 |                        K.constant([[0.1, 0.3, 1, 0], [0, 0, 1, 1]]))
13 |         self.assertAlmostEqual(float(val), 7.418338775634766, places=4)
14 |         climf_loss = CLIMFLoss(4, 1, 3)
15 |         poor_pred_loss = climf_loss(K.constant([[0, 0, 1, 1]]), K.constant([[1, 0.5, 0, 0]]))
16 |         avg_pred_loss = climf_loss(K.constant([[0, 0, 1, 1]]), K.constant([[0.1, 0.3, 1, 0]]))
17 |         good_pred_loss = climf_loss(K.constant([[0, 0, 1, 1]]), K.constant([[0, 0, 1, 1]]))
18 |         assert (poor_pred_loss > avg_pred_loss)
19 |         assert (good_pred_loss < avg_pred_loss)
20 | 
21 | if __name__ == "__main__":
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/tests/lossess/test_items_masking_proxy_loss.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | class TestItemsMaskingProxyLoss(unittest.TestCase):
 5 |     def test_items_masking_proxy_loss(self):
 6 |         from aprec.losses.bpr import BPRLoss
 7 |         from aprec.losses.bce import BCELoss
 8 |         from aprec.losses.items_masking_loss_proxy import ItemsMaksingLossProxy
 9 |         proxy_loss = ItemsMaksingLossProxy(BCELoss(), 2, 4)
10 |         proxy_loss.set_batch_size(2)
11 |         proxy_loss.set_num_items(10)
12 | 
13 |         ytrue = np.array([
14 |             [
15 |                 [-100, -100, -100],
16 |                 [1, 0, 0],
17 |                 [-100, -100, -100], 
18 |                 [1, 0, 0]
19 |             ],
20 |             [
21 |                 [1, 0, 0],
22 |                 [-100, -100, -100],
23 |                 [-100, -100, -100], 
24 |                 [-100, -100, -100]
25 | 
26 |             ]
27 |         ])
28 |         np.random.seed(31337)
29 |         ypred = np.random.rand(2, 4, 3)
30 |         result = proxy_loss(ytrue, ypred) 
31 |         print(result)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()


--------------------------------------------------------------------------------
/tests/lossess/test_lambdarank_time.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestLambdaranTime(unittest.TestCase):
 4 |    def test_get_lambdas(self):
 5 |        import random
 6 |        import numpy as np
 7 |        from aprec.losses.lambda_gamma_rank import LambdaGammaRankLoss
 8 |        import tensorflow as tf
 9 |        from tqdm import tqdm
10 | 
11 | 
12 |        random.seed(31337)
13 |        np.random.seed(31337)
14 |        batch_size = 128
15 |        dataset_size = 128 * 1024
16 |        positives_per_sample = 100
17 |        n_items = 50000
18 |        pred_truncate_at = 500
19 | 
20 |        y_true = np.zeros((batch_size, n_items))
21 |        for sample_num in range(batch_size):
22 |             positives = np.random.choice((range(n_items)), positives_per_sample, replace=False)
23 |             for positive in positives:
24 |                 y_true[sample_num][positive] = random.random()
25 |        y_true = tf.constant(y_true)
26 | 
27 |        loss = LambdaGammaRankLoss(n_items, batch_size, 1, ndcg_at=40, dtype=tf.float32,
28 |                                   pred_truncate_at=pred_truncate_at, bce_grad_weight=0.1)
29 |        for i in tqdm(range(dataset_size // batch_size)):
30 |            y_pred =  tf.random.uniform((batch_size, n_items))
31 |            #tf.keras.losses.binary_crossentropy(y_true, y_pred)
32 |            loss.get_lambdas(y_true, y_pred)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     unittest.main()
37 | 


--------------------------------------------------------------------------------
/tests/lossess/test_logit_norm.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from aprec.losses.logit_norm import LogitNormLoss
 3 | import os
 4 | import tensorflow as tf
 5 | 
 6 | class LogitNormsTest(unittest.TestCase):
 7 |     def setUp(self) -> None:
 8 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 9 | 
10 |     def test_logitnorm_loss(self):
11 |         loss = LogitNormLoss()
12 |         y_true = tf.constant([[0, 1, 0, 0], [1, 0, 0, 0.]])
13 |         y_pred = tf.constant([[0.2, -1, 5, 7], [2, 1, 1, 1.]])
14 |         expected = [1.8970, 1.1170]
15 |         result = loss.calc_per_list(y_true, y_pred).numpy()
16 |         self.assertAlmostEqual(expected[0], result[0], places=3)
17 |         self.assertAlmostEqual(expected[1], result[1], places=3)
18 | 
19 |         loss = LogitNormLoss(2)
20 |         result = loss.calc_per_list(y_true, y_pred).numpy()
21 |         self.assertAlmostEquals(result[1], 1.2480, 4)
22 |         
23 | if __name__ == "__main__":
24 |     unittest.main()    


--------------------------------------------------------------------------------
/tests/lossess/test_softmax_crossentropy.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from aprec.losses.softmax_crossentropy import SoftmaxCrossEntropy
 3 | from transformers.modeling_tf_utils import TFCausalLanguageModelingLoss
 4 | import os
 5 | import tensorflow as tf
 6 | 
 7 | class SoftmaxCrossentropyLossTest(unittest.TestCase):
 8 |     def setUp(self) -> None:
 9 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
10 | 
11 |     def test_nll_loss0(self):
12 |         y_true_sparse = tf.constant([[1]])
13 |         y_true = [0, 1, 0, 0]
14 |         y_pred = tf.constant([[0.1, 0.2, 0.3, 0.4]])
15 |         class HFTLossConfig(object):
16 |             tf_legacy_loss = False
17 | 
18 |         hft_transformers_loss = TFCausalLanguageModelingLoss() 
19 |         hft_transformers_loss.config = HFTLossConfig()
20 |         hft_loss = hft_transformers_loss.hf_compute_loss(y_true_sparse, y_pred).numpy()[0]
21 |         our_loss = SoftmaxCrossEntropy().calc_per_list(y_true, y_pred).numpy()[0] 
22 |         self.assertEqual(hft_loss, our_loss)
23 |         
24 |         y_true_sparse = tf.constant([[1, -100, 2]])
25 |         y_true = tf.constant([[0, 1, 0, 0], [-100, -100, -100, -100], [0, 0, 1, 0]]) 
26 |         y_pred = tf.constant([[0.1, 0.2, 0.3, 0.4], [0.0, 0.0, 0.0, 1.0], [0.8, 0.0, 0.2, 0.0]])
27 |         hft_loss = hft_transformers_loss.hf_compute_loss(y_true_sparse, y_pred)
28 |         our_loss = SoftmaxCrossEntropy().loss_per_list(y_true, y_pred)
29 |         self.assertEqual(our_loss, hft_loss)
30 |         
31 | 
32 | if __name__ == "__main__":
33 |     unittest.main()    


--------------------------------------------------------------------------------
/tests/lossess/test_xendcg_loss.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from aprec.losses.xendcg import XENDCGLoss
 3 | 
 4 | 
 5 | class TestXENDCGLoss(unittest.TestCase):
 6 |     def test_xendcg(self):
 7 |         import tensorflow as tf
 8 |         true = tf.constant([[0., 1., 0.]])
 9 |         pred = tf.constant([[0., 0.5, 0]])
10 |         xendcg = XENDCGLoss(true.shape[1], true.shape[0])
11 |         result = xendcg(true, pred)
12 |         print(result)
13 | 
14 |     def test_model_xendcg(self):
15 |         import tensorflow as tf
16 |         from tensorflow.keras.models import Sequential
17 |         from tensorflow.keras.layers import Dense
18 |         X = tf.constant([[0., 0], [1, 0]])
19 |         Y = tf.constant([[1., 0], [0, 1]])
20 |         model = Sequential()
21 |         model.add(Dense(2, activation='sigmoid'))
22 |         model.add(Dense(2, activation='sigmoid'))
23 |         model.add(Dense(2, activation='sigmoid'))
24 |         model.add(Dense(2, activation='linear'))
25 |         xendcg = XENDCGLoss(X.shape[1], X.shape[0])
26 |         model.compile(optimizer='adam', loss=xendcg)
27 | 
28 |         model.fit(X, Y, epochs=2000, verbose=False)
29 |         result = model.predict(X)
30 |         tf.print(result)
31 |         assert (result[0, 0] > result[0, 1])
32 |         assert (result[1, 0] < result[1, 1])
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()
36 | 


--------------------------------------------------------------------------------
/tests/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/metrics/__init__.py


--------------------------------------------------------------------------------
/tests/metrics/test_map.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest
 3 | 
 4 | class TestMAP(unittest.TestCase):
 5 |     def test_map(self):
 6 |         from aprec.evaluation.metrics.map import MAP
 7 |         from aprec.api.action import Action
 8 |         recommended = [(6, 0.9), (3, 0.85), (5, 0.71), (0, 0.63), (4, 0.47), (2, 0.36), (1, 0.24), (7, 0.16)]
 9 |         actual = [Action(user_id = 1, item_id = 6, timestamp=1),
10 |                   Action(user_id = 1, item_id = 5, timestamp=2),
11 |                   Action(user_id = 1, item_id = 0, timestamp=3),
12 |                   Action(user_id = 1, item_id = 2, timestamp=4),
13 |                   ]
14 |         map = MAP(8)
15 |         self.assertEqual(map(recommended, actual), 0.7708333333333333)
16 | 
17 | if __name__ == "__main__":
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------
/tests/metrics/test_mrr.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest
 3 | 
 4 | class TestMRR(unittest.TestCase):
 5 |     def test_map(self):
 6 |         from aprec.evaluation.metrics.mrr import MRR
 7 |         from aprec.api.action import Action
 8 |         recommended = [(1, 2), (2, 1), (3, 0.5)]
 9 |         actual = [Action(user_id = 1, item_id = 4, timestamp=1),
10 |                   Action(user_id = 1, item_id = 3, timestamp=2)]
11 |         mrr = MRR()
12 |         self.assertEqual(mrr(recommended, actual), 1/3)
13 | 
14 | if __name__ == "__main__":
15 |     unittest.main()
16 | 


--------------------------------------------------------------------------------
/tests/metrics/test_ndcg.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestNDCG(unittest.TestCase):
 4 |     def test_ndcg(self):
 5 |         from aprec.evaluation.metrics.ndcg import NDCG
 6 |         from aprec.api.action import Action
 7 |         recommended = [(1, 2), (2, 1), (3, 0.5)]
 8 |         actual = [Action(user_id = 1, item_id = 4, timestamp=1), 
 9 |                   Action(user_id = 1, item_id = 3, timestamp=2)]
10 |         ndcg = NDCG(3)
11 |         self.assertEqual(ndcg(recommended, actual), 0.5)
12 | 
13 | if __name__ == "__main__":
14 |     unittest.main()
15 | 


--------------------------------------------------------------------------------
/tests/metrics/test_pairwise_cos_sim.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestPairwiseCosSim(unittest.TestCase):
 4 |     def test_pairwise_cos_sim(self):
 5 |         from aprec.evaluation.metrics.pairwise_cos_sim import PairwiseCosSim
 6 |         from aprec.api.action import Action
 7 |         actions = [Action(user_id=1, item_id=1, timestamp=1),
 8 |                    Action(user_id=1, item_id=3, timestamp=2),
 9 | 
10 |                    Action(user_id=2, item_id=1, timestamp=2),
11 |                    Action(user_id=2, item_id=2, timestamp=2),
12 |                    Action(user_id=2, item_id=3, timestamp=2)]
13 | 
14 |         pairwise_cos_sim = PairwiseCosSim(actions, 10)
15 | 
16 |         recommended = [(1, 2), (2, 1), (3, 0.5)]
17 |         actual = [Action(user_id = 1, item_id = 1, timestamp=1),
18 |                   Action(user_id = 1, item_id = 2, timestamp=2)]
19 |         self.assertEqual(pairwise_cos_sim(recommended, actual), 2/3)
20 | 
21 | if __name__ == "__main__":
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/tests/metrics/test_precision.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestPrecision(unittest.TestCase):
 4 |     def test_precsion(self):
 5 |         from aprec.evaluation.metrics.precision import Precision
 6 |         from aprec.api.action import Action
 7 | 
 8 |         recommended = [(1, 2), (2, 1), (3, 0.5)]
 9 |         actual = [Action(user_id = 1, item_id = 1, timestamp=1), 
10 |                   Action(user_id = 1, item_id = 3, timestamp=2)]
11 |         precision_1 = Precision(1)
12 |         precision_2 = Precision(2)
13 |         precision_3 = Precision(3)
14 |         self.assertEqual(precision_1(recommended, actual), 1)
15 |         self.assertEqual(precision_2(recommended, actual), 0.5)
16 |         self.assertEqual(precision_3(recommended, actual), 2/3)
17 | 
18 | if __name__ == "__main__":
19 |     unittest.main()
20 | 


--------------------------------------------------------------------------------
/tests/metrics/test_proxy_metric.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | from aprec.api.action import Action
 7 | from aprec.evaluation.metrics.sampled_proxy_metric import SampledProxy
 8 | from aprec.evaluation.metrics.precision import Precision
 9 | 
10 | 
11 | class TestSampledProxyMetric(unittest.TestCase):
12 |     def test_proxy_precision(self):
13 |         recommended = [(1, 2), (2, 1), (3, 0.5)]
14 |         actual = [Action(user_id = 1, item_id = 1, timestamp=1),
15 |                   Action(user_id = 1, item_id = 3, timestamp=2)]
16 |         all_item_ids = [1, 2, 3, 4, 5, 6]
17 |         random.seed(31337)
18 |         np.random.seed(31337)
19 |         metric = SampledProxy(all_item_ids, [1./6] * 6, 2, Precision(3))
20 |         self.assertAlmostEqual(metric(recommended, actual), 2./3)
21 | 
22 | if __name__ == "__main__":
23 |     unittest.main()
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/misc/__init__.py


--------------------------------------------------------------------------------
/tests/misc/test_evaluate_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestEvaluateRecommender(unittest.TestCase):
 4 |     def test_evaluate(self):
 5 |         import tempfile
 6 |         from aprec.datasets.movielens20m import get_movielens20m_actions
 7 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 8 |         from aprec.recommenders.top_recommender import TopRecommender
 9 |         from aprec.utils.generator_limit import generator_limit
10 |         from aprec.evaluation.split_actions import TemporalGlobal
11 |         from aprec.evaluation.n_actions_for_user import n_actions_for_user
12 |         from aprec.evaluation.evaluate_recommender import evaluate_recommender
13 |         from aprec.evaluation.metrics.precision import Precision
14 |         from aprec.evaluation.metrics.recall import Recall
15 | 
16 |         recommender = FilterSeenRecommender(TopRecommender())
17 |         actions = generator_limit(get_movielens20m_actions(), 10000)
18 |         split_actions = TemporalGlobal((70, 30))
19 |         train, test = split_actions(actions)
20 |         test = n_actions_for_user(test, 1)
21 |         for action in train:
22 |             recommender.add_action(action)
23 |         recommender.rebuild_model()
24 |         metrics = [Precision(1), Recall(1), Precision(5), Recall(5), Precision(10), Recall(10)]
25 |         output_dir = tempfile.mkdtemp()
26 |         result = evaluate_recommender(recommender, test, metrics, output_dir, "top_recommender")
27 |         reference_result = {'precision@1': 0.0, 'recall@1': 0.0,
28 |                             'precision@5': 0.00425531914893617, 'recall@5': 0.02127659574468085,
29 |                             'precision@10': 0.002127659574468085, 'recall@10': 0.02127659574468085}
30 |         self.assertEqual(reference_result, result)
31 | 
32 |         
33 | if __name__ == "__main__":
34 |     unittest.main()
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/misc/test_item_id.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import tempfile
 3 | class TestItemId(unittest.TestCase):
 4 |     def test_get_id(self):
 5 |         from aprec.utils.item_id import ItemId
 6 |         items_dict = ItemId()
 7 |         self.assertEqual(items_dict.get_id("aaa"), 0)
 8 |         self.assertEqual(items_dict.get_id("bbb"), 1)
 9 |         self.assertEqual(items_dict.get_id("ccc"), 2)
10 |         self.assertEqual(items_dict.get_id("ddd"), 3)
11 |         self.assertEqual(items_dict.get_id("aaa"), 0)
12 |         self.assertEqual(items_dict.get_id("ccc"), 2)
13 |         self.assertEqual(items_dict.reverse_id(2), "ccc")
14 |         self.assertTrue(items_dict.has_id(2))
15 |         self.assertFalse(items_dict.has_id(4))
16 |         self.assertTrue(items_dict.has_item("aaa"))
17 |         self.assertFalse(items_dict.has_item("fff"))
18 |         self.assertRaises(KeyError, items_dict.reverse_id, 4)
19 |         with tempfile.NamedTemporaryFile(suffix="_dict.txt") as tmp:
20 |             items_dict.save(tmp.name)
21 |             new_dict = ItemId.load(tmp.name)
22 | 
23 |         self.assertEqual(items_dict.straight, new_dict.straight)
24 |         self.assertEqual(items_dict.reverse, new_dict.reverse)
25 |         self.assertEqual(new_dict.get_id("aaa"), 0)
26 |         self.assertEqual(new_dict.get_id("bbb"), 1)
27 |         self.assertEqual(new_dict.get_id("ccc"), 2)
28 |         self.assertEqual(new_dict.get_id("ddd"), 3)
29 |         self.assertEqual(new_dict.get_id("aaa"), 0)
30 |         self.assertEqual(new_dict.get_id("ccc"), 2)
31 |  
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()
36 | 


--------------------------------------------------------------------------------
/tests/misc/test_keras_ndcg.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.metrics.ndcg import KerasNDCG
 2 | import tensorflow.keras.backend as K
 3 | import tensorflow as tf
 4 | 
 5 | import numpy as np
 6 | import unittest
 7 | 
 8 | class TestKerasNDCG(unittest.TestCase):
 9 |     def setUp(cls):
10 |         tf.keras.backend.clear_session()
11 | 
12 |     def tearDown(cls):
13 |         tf.keras.backend.clear_session()
14 | 
15 | 
16 |     def test_keras_ndcg(self):
17 |         EPS=1e-5
18 |         y_true = K.constant(np.array([[0, 1, 0], [1, 1, 0]]))
19 |         y_pred = K.constant(np.array([[0.1, 0.2, 0.3], [0.6, 0.5, 0.4]]))
20 |         keras_ndcg = KerasNDCG(2)
21 |         res = keras_ndcg(y_true, y_pred)
22 |         assert abs(res - K.constant(0.815464854)) < EPS
23 | 
24 | if __name__ == "__main__":
25 |     unittest.main()
26 | 


--------------------------------------------------------------------------------
/tests/misc/test_kion_challenge_featurizer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | class TestKionChallengeFeaturizer(unittest.TestCase):
 3 |     def test_kion_challenge_featurizer(self):
 4 |         from aprec.recommenders.kion_challenge_featurizer import KionChallengeFeaturizer
 5 |         from aprec.datasets.mts_kion import get_users
 6 |         from aprec.datasets.mts_kion import get_items
 7 |         from aprec.datasets.mts_kion import get_mts_kion_dataset
 8 | 
 9 | 
10 |         featurizer = KionChallengeFeaturizer()
11 |         for user in get_users():
12 |             featurizer.add_user(user)
13 |         for item in get_items():
14 |             featurizer.add_item(item)
15 |         for action in get_mts_kion_dataset(20000):
16 |             featurizer.add_action(action)
17 |         featurizer.build()
18 |         candidates =  ['7638', '6686', '9506']
19 |         features = featurizer.get_features('176549', candidates)
20 |         self.assertEquals(len(features),len(candidates))
21 |         for i in range(len(candidates)):
22 |             self.assertEquals(len(features[i]), len(featurizer.feature_names))
23 |         pass
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/misc/test_n_actions_for_user.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | REFERENCE_1_ACTION =\
 3 | """Action(uid=0, item=0, ts=2)
 4 | Action(uid=1, item=3, ts=0)
 5 | Action(uid=2, item=0, ts=0)
 6 | Action(uid=3, item=2, ts=3)"""
 7 | 
 8 | 
 9 | REFERENCE_2_ACTION =\
10 | """Action(uid=0, item=0, ts=2)
11 | Action(uid=0, item=2, ts=4)
12 | Action(uid=1, item=0, ts=0)
13 | Action(uid=1, item=3, ts=0)
14 | Action(uid=2, item=0, ts=0)
15 | Action(uid=2, item=2, ts=2)
16 | Action(uid=3, item=0, ts=4)
17 | Action(uid=3, item=2, ts=3)"""
18 | 
19 | def sorted_actions_str(actions):
20 |     return "\n".join(sorted([str(action) for action in actions]))
21 | 
22 | class TestNActionsForUser(unittest.TestCase):
23 |     def test_n_actions_for_user(self):
24 |         from aprec.tests.generate_actions import generate_actions
25 |         from aprec.evaluation.n_actions_for_user import n_actions_for_user
26 | 
27 | 
28 |         actions = generate_actions(10)
29 |         actions_1 = n_actions_for_user(actions, 1)
30 |         actions_2 = n_actions_for_user(actions, 2)
31 |         self.assertEqual(sorted_actions_str(actions_1), REFERENCE_1_ACTION)
32 |         self.assertEqual(sorted_actions_str(actions_2), REFERENCE_2_ACTION)
33 |         
34 | 
35 | if __name__ == "__main__":
36 |     unittest.main()
37 | 


--------------------------------------------------------------------------------
/tests/misc/test_recommender_evaluator.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestRecommenderEvaluator(unittest.TestCase):
 5 |     def test_recommender_evaluator(self):
 6 |         from aprec.datasets.movielens20m import get_movielens20m_actions
 7 |         from aprec.evaluation.samplers.pop_sampler import PopTargetItemsSampler
 8 |         from aprec.utils.generator_limit import generator_limit
 9 |         from aprec.evaluation.split_actions import LeaveOneOut
10 |         from aprec.evaluation.metrics.precision import Precision
11 |         from aprec.recommenders.top_recommender import TopRecommender
12 |         from aprec.evaluation.evaluate_recommender import RecommendersEvaluator
13 |         import tempfile
14 | 
15 | 
16 |         actions = [action for action in generator_limit(get_movielens20m_actions(), 100000)]
17 |         recommenders= {"top_recommender": TopRecommender}
18 | 
19 |         data_splitter = LeaveOneOut(max_test_users=128)
20 |         metrics = [Precision(5)]
21 |         out_dir = tempfile.mkdtemp()
22 |         n_val_users=10
23 |         recommendations_limit = 10
24 |         target_items_sampler = PopTargetItemsSampler(20)
25 |         evaluator = RecommendersEvaluator(actions, recommenders, metrics,
26 |                                           out_dir, data_splitter, n_val_users,
27 |                                           recommendations_limit, 
28 |                                           target_items_sampler=target_items_sampler)
29 |         result = evaluator()['recommenders']['top_recommender']
30 | 
31 |         
32 |         del(result["model_build_time"])
33 |         del(result["model_inference_time"])
34 |         del(result["minutes_to_converge"])
35 |         del(result["model_metadata"]["tensorboard_dir"])
36 | 
37 |         self.assertEqual(result, 
38 |                 {'precision@5': 0.0078125, 'sampled_metrics': {'precision@5': 0.039062500000000014},
39 |                 'model_metadata': {"top 20 items": [("318", 556), ("296", 523), ("356", 501), ("593", 493),
40 |                 ("260", 425), ("50", 410), ("527", 407), ("2571", 403),
41 |                 ("110", 372), ("1196", 356), ("457", 355), ("1198", 355), 
42 |                 ("2858", 349), ("589", 341), ("608", 339), ("1210", 338),
43 |                 ("1", 334), ("858", 334), ("47", 324), ("2959", 321)]}})
44 |         
45 | if __name__ == "__main__":
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/tests/misc/test_split_actions.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from aprec.evaluation.split_actions import TemporalGlobal, RandomSplit
 3 | from aprec.datasets.movielens20m import get_movielens20m_actions
 4 | from aprec.tests.generate_actions import generate_actions
 5 | from aprec.utils.generator_limit import generator_limit
 6 | from aprec.evaluation.evaluate_recommender import group_by_user
 7 | 
 8 | 
 9 | class TestSplitActions(unittest.TestCase):
10 | 
11 |     def test_split_actions(self):
12 |         actions =  generate_actions(100)
13 |         split_actions = TemporalGlobal((7, 1, 2))
14 |         splitted = split_actions(actions)
15 |         self.assertEqual(len(splitted), 3)
16 |         self.assertEqual(len(splitted[0]), 70)
17 |         self.assertEqual(len(splitted[1]), 10)
18 |         self.assertEqual(len(splitted[2]), 20)
19 |         assert(times_func(splitted[0], max) <= times_func(splitted[1], min))
20 |         assert(times_func(splitted[1], max) <= times_func(splitted[2], min))
21 |         self.assertEqual(set(actions), set(splitted[0] + splitted[1] + splitted[2]))
22 | 
23 |     def test_random_split(self):
24 |         user_ids = set()
25 |         actions = []
26 |         for action in generator_limit(get_movielens20m_actions(), 10000):
27 |             actions.append(action)
28 |             user_ids.add(action.user_id)
29 |         random_split = RandomSplit(0.5, 10)
30 |         train, test = random_split(actions)
31 |         train_users =  group_by_user(train)
32 |         test_users = group_by_user(test)
33 |         self.assertEqual(len(test_users), 10)
34 |         for user in test_users:
35 |             self.assertTrue(abs(len(test_users[user]) - len(train_users[user])) <= 1)
36 |             test_items = set([action.item_id for action in test_users[user]])
37 |             train_items = set([action.item_id for action in train_users[user]])
38 |             self.assertEqual(len(train_items.intersection(test_items)), 0)
39 | 
40 | 
41 | 
42 | def times_func(actions, func):
43 |     return func([action.timestamp for action in actions])
44 |         
45 | if __name__ == "__main__":
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/tests/ml_sequences.py:
--------------------------------------------------------------------------------
 1 | def ml_sequences(n_actions):
 2 |     from aprec.utils.generator_limit import generator_limit
 3 |     from aprec.datasets.movielens20m import get_movielens20m_actions
 4 |     from aprec.utils.item_id import ItemId
 5 |     from collections import defaultdict
 6 |     sequences_dict = defaultdict(list)
 7 |     actions = [action for action in generator_limit(get_movielens20m_actions(), n_actions)]
 8 |     actions.sort(key = lambda action: action.timestamp)
 9 |     item_ids = ItemId()
10 |     for action in actions:
11 |         sequences_dict[action.user_id].append((action.timestamp, item_ids.get_id(action.item_id)))
12 |     sequences = list(sequences_dict.values())
13 |     return sequences, item_ids
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/recommenders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/recommenders/__init__.py


--------------------------------------------------------------------------------
/tests/recommenders/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/recommenders/baselines/__init__.py


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_conditional_top_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestConditionalTopRecommender(unittest.TestCase):
 4 |     def test_conditional_top_recommender(self):
 5 |         from typing import List
 6 |         from aprec.api.action import Action
 7 |         from aprec.recommenders.conditional_top_recommender import ConditionalTopRecommender
 8 | 
 9 |         recommender = ConditionalTopRecommender(conditional_field='country_id')
10 |         actions: List[Action] = [
11 |             Action(user_id=0, item_id=0, timestamp=0, data={'country_id': 100}),
12 |             Action(user_id=0, item_id=0, timestamp=10, data={'country_id': 100}),
13 |             Action(user_id=0, item_id=1, timestamp=20, data={'country_id': 100}),
14 |         ]
15 |         for action in actions:
16 |             recommender.add_action(action)
17 |         recommender.rebuild_model()
18 |         recommendations = recommender.recommend(0, 1)
19 |         self.assertEqual(recommendations, [(0, 2)])
20 |     
21 | if __name__ == "__main__":
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_constnat_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestConstantRecommender(unittest.TestCase):
 4 |     def test_constant_recommender(self):
 5 |         from aprec.recommenders.constant_recommender import ConstantRecommender
 6 |         constant_recommender = ConstantRecommender(((1, 1),(2, 0.5), (3, 0.4)))
 7 |         self.assertEqual(constant_recommender.recommend(1, 2), ((1, 1), (2, 0.5)))
 8 | 
 9 | if __name__ == "__main__":
10 |     unittest.main()
11 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_deepmf.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | USER_ID = '120'
 4 | 
 5 | class TestDeepMF(unittest.TestCase):
 6 |     def test_deepmf_recommender(self):
 7 |         from aprec.recommenders.deep_mf import DeepMFRecommender
 8 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 9 |         from aprec.datasets.movielens20m import get_movielens20m_actions
10 |         from aprec.utils.generator_limit import generator_limit
11 | 
12 |         mlp_recommender = DeepMFRecommender(100, 1000, steps=20)
13 |         recommender = FilterSeenRecommender(mlp_recommender)
14 |         for action in generator_limit(get_movielens20m_actions(), 10000):
15 |             recommender.add_action(action)
16 |         recommender.rebuild_model()
17 |         recs = recommender.recommend(USER_ID, 10)
18 |         print(recs)
19 | 
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     unittest.main()
24 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_filter_seen_recommender.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import unittest
 4 | 
 5 | class TestFilterSeenRecommender(unittest.TestCase):
 6 |     def test_constant_recommender(self):
 7 |         from aprec.recommenders.constant_recommender import ConstantRecommender
 8 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 9 |         from aprec.api.action import Action
10 | 
11 |         constant_recommender = ConstantRecommender(((1, 1),(2, 0.5), (3, 0.4)))
12 |         recommender = FilterSeenRecommender(constant_recommender)
13 |         recommender.add_action(Action(user_id=1, item_id=2, timestamp=1))
14 |         self.assertEqual(recommender.recommend(1, 2), [(1, 1), (3, 0.4)])
15 | 
16 |     def test_filte_seen_sampled_rankings(self):
17 |         from aprec.datasets.movielens20m import get_movielens20m_actions
18 |         from aprec.recommenders.top_recommender import TopRecommender
19 |         from aprec.utils.generator_limit import generator_limit
20 |         from aprec.api.items_ranking_request import ItemsRankingRequest
21 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
22 | 
23 |         recommender = FilterSeenRecommender(TopRecommender())
24 |         ranking_request = ItemsRankingRequest(user_id='1', item_ids=['1196', '589'])
25 |         recommender.add_test_items_ranking_request(ranking_request)
26 |         for action in generator_limit(get_movielens20m_actions(), 1000):
27 |             recommender.add_action(action)
28 |         recommender.rebuild_model()
29 |         recommendations = recommender.get_item_rankings()
30 |         self.assertEqual(recommendations,{'1': [('589', 9), ('1196', -float('inf'))]})
31 | 
32 | if __name__ == "__main__":
33 |     unittest.main()


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_first_order_mc_recommender.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest
 3 | 
 4 | USER_ID = '120'
 5 | 
 6 | class TestFirstOrderMCRecommender(unittest.TestCase):
 7 |     def test_first_order_mc_recommender(self):
 8 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 9 |         from aprec.recommenders.first_order_mc import FirstOrderMarkovChainRecommender
10 |         from aprec.datasets.movielens20m import get_movielens20m_actions
11 |         from aprec.utils.generator_limit import generator_limit
12 |         recommender = FilterSeenRecommender(FirstOrderMarkovChainRecommender())
13 |         for action in generator_limit(get_movielens20m_actions(), 100000):
14 |             recommender.add_action(action)
15 |         recommender.rebuild_model()
16 |         recs = recommender.recommend(USER_ID, 10)
17 |         print(recs)
18 | 
19 |     def test_sampled_rankings(self):
20 |         from aprec.api.items_ranking_request import ItemsRankingRequest
21 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
22 |         from aprec.recommenders.first_order_mc import FirstOrderMarkovChainRecommender
23 |         from aprec.datasets.movielens20m import get_movielens20m_actions
24 |         from aprec.utils.generator_limit import generator_limit
25 | 
26 |         recommender = FilterSeenRecommender(FirstOrderMarkovChainRecommender())
27 |         for action in generator_limit(get_movielens20m_actions(), 100000):
28 |             recommender.add_action(action)
29 |         ranking_request = ItemsRankingRequest('120', ['608', '294', '648'])
30 |         recommender.add_test_items_ranking_request(ranking_request)
31 |         recommender.rebuild_model()
32 |         sampled_scores = recommender.get_item_rankings()
33 |         self.assertEqual(len(sampled_scores), 1)
34 |         predicted_scores = sampled_scores['120']
35 |         unseen_item = '294'
36 |         for item, score in predicted_scores:
37 |             if item == unseen_item:
38 |                 self.assertEqual(score, 0)
39 |             else:
40 |                 self.assertGreater(score, 0)
41 | 
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     unittest.main()
46 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_item_item_recommender.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest
 3 | 
 4 | USER_ID = '120' 
 5 | 
 6 | REFERENCE_COLD_START =  [('296', 62), ('318', 62), ('356', 60),
 7 |                          ('593', 48), ('260', 44), ('50', 43), ('527', 43), ('608', 42), ('47', 41), ('480', 40)]
 8 | 
 9 | REFERENCE_USER_RECOMMENDATIONS = [('276', 0.5), ('450', 0.5), ('296', 0.48612153038259565), ('292', 0.47265625),
10 |                                   ('361', 0.4444444444444444), ('225', 0.4375),
11 |                                   ('593', 0.436046511627907), ('474', 0.4166666666666667),
12 |                                   ('1089', 0.38813151563753007), ('588', 0.3820662768031189)]
13 | 
14 | class TestItemItemRecommender(unittest.TestCase):
15 |     def compare_recommendations(self, rec1, rec2):
16 |         print(rec1, rec2)
17 |         self.assertEqual(len(rec1), len(rec2))
18 |         for i in range(len(rec1)):
19 |             self.assertEqual(rec1[i][0], rec2[i][0])
20 |             self.assertAlmostEqual(rec1[i][1], rec2[i][1])
21 |          
22 |     def test_item_item_recommender(self):
23 |         from aprec.recommenders.item_item import ItemItemRecommender
24 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
25 |         from aprec.datasets.movielens20m import get_movielens20m_actions, get_movies_catalog
26 |         from aprec.utils.generator_limit import generator_limit
27 |         from aprec.api.action import Action
28 |         item_item_recommender = ItemItemRecommender()
29 |         recommender = FilterSeenRecommender(item_item_recommender)
30 |         catalog = get_movies_catalog()
31 | 
32 |         for action in generator_limit(get_movielens20m_actions(), 10000):
33 |             recommender.add_action(action)
34 |         recommender.rebuild_model()
35 |         recs_cold_start = recommender.recommend(12341324, 10)
36 |         self.compare_recommendations(recs_cold_start, REFERENCE_COLD_START)
37 |         recs = recommender.recommend(USER_ID, 10)
38 |         self.compare_recommendations(recs, REFERENCE_USER_RECOMMENDATIONS)
39 | 
40 |         actions =  [Action('1', 1, 1), 
41 |                     Action('1', 2, 2),
42 |                     Action('2', 2, 1),
43 |                     Action('2', 3, 1)]
44 |         recommender = ItemItemRecommender()
45 |         for action in actions:
46 |             recommender.add_action(action)
47 |         recommender.rebuild_model()
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_lightfm_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestLightFMRecommender(unittest.TestCase):
 4 |     def test_lightfm_recommender(self):
 5 |         USER_ID = '120'
 6 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 7 |         from aprec.recommenders.lightfm import LightFMRecommender
 8 |         from aprec.datasets.movielens20m import get_movielens20m_actions
 9 |         from aprec.utils.generator_limit import generator_limit
10 | 
11 |         lightfm_recommender = LightFMRecommender(30, 'bpr')
12 |         recommender = FilterSeenRecommender(lightfm_recommender)
13 |         for action in generator_limit(get_movielens20m_actions(), 10000):
14 |             recommender.add_action(action)
15 |         recommender.rebuild_model()
16 |         recs = recommender.recommend(USER_ID, 10)
17 |         print(recs)
18 | 
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_matrix_factorization_recommender.py:
--------------------------------------------------------------------------------
 1 | from aprec.losses import top1
 2 | from aprec.recommenders.matrix_factorization import MatrixFactorizationRecommender
 3 | from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 4 | from aprec.datasets.movielens20m import get_movielens20m_actions
 5 | from aprec.utils.generator_limit import generator_limit
 6 | import tensorflow as tf
 7 | import unittest
 8 | 
 9 | USER_ID = '120'
10 | 
11 | class TestMatrixFactorizationRecommender(unittest.TestCase):
12 |     def setUp(cls):
13 |         tf.keras.backend.clear_session()
14 | 
15 |     def tearDown(cls):
16 |         tf.keras.backend.clear_session()
17 | 
18 | 
19 | 
20 |     def test_matrix_factorization_recommender_recommender(self):
21 |         losses = ['bce', 'bpr', 'lambdarank', 'xendcg', 'climf', 'top1']
22 |         for loss in losses: 
23 |             print(f"testing matrix factorization model with {loss} loss")
24 |             matrix_factorization_recommender = MatrixFactorizationRecommender(32, 5, loss, batch_size=10)
25 |             recommender = FilterSeenRecommender(matrix_factorization_recommender)
26 |             for action in generator_limit(get_movielens20m_actions(), 10000):
27 |                 recommender.add_action(action)
28 |             recommender.rebuild_model()
29 |             recs = recommender.recommend(USER_ID, 10)
30 |             print(recs)
31 | 
32 |     def test_recommend_batch(self):
33 |         matrix_factorization_recommender = MatrixFactorizationRecommender(32, 5, 'bce', batch_size=10)
34 |         recommender = FilterSeenRecommender(matrix_factorization_recommender)
35 |         user_ids = set()
36 |         for action in generator_limit(get_movielens20m_actions(), 10000):
37 |             recommender.add_action(action)
38 |             user_ids.add(action.user_id)
39 |         recommender.rebuild_model()
40 |         requests = [(user_id, None) for user_id in ['142', '111', '57', '37', '136', '88']]
41 |         batch_recommendations = recommender.recommend_batch(requests, 10)
42 |         print(batch_recommendations)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_mlp_historical.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.mlp_historical import GreedyMLPHistorical
 2 | from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 3 | from aprec.datasets.movielens20m import get_movielens20m_actions
 4 | from aprec.utils.generator_limit import generator_limit
 5 | import tensorflow as tf
 6 | import unittest
 7 | 
 8 | USER_ID = '120'
 9 | 
10 | class TestMLPRecommender(unittest.TestCase):
11 |     def setUp(cls):
12 |         tf.keras.backend.clear_session()
13 | 
14 |     def tearDown(cls):
15 |         tf.keras.backend.clear_session()
16 | 
17 | 
18 |     def test_mlp_recommender(self):
19 |         mlp_recommender = GreedyMLPHistorical(train_epochs=10, n_val_users=10, batch_size=5)
20 |         recommender = FilterSeenRecommender(mlp_recommender)
21 |         for action in generator_limit(get_movielens20m_actions(), 10000):
22 |             recommender.add_action(action)
23 |         recommender.rebuild_model()
24 |         recs = recommender.recommend(USER_ID, 10)
25 |         print(recs)
26 | 
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_mlp_recommender.py:
--------------------------------------------------------------------------------
 1 | from aprec.recommenders.mlp import GreedyMLP
 2 | from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 3 | from aprec.datasets.movielens20m import get_movielens20m_actions, get_movies_catalog
 4 | from aprec.utils.generator_limit import generator_limit
 5 | import tensorflow as tf
 6 | import unittest
 7 | 
 8 | USER_ID = '120' 
 9 | 
10 | class TestMLPRecommender(unittest.TestCase):
11 |     def setUp(cls):
12 |         tf.keras.backend.clear_session()
13 | 
14 |     def tearDown(cls):
15 |         tf.keras.backend.clear_session()
16 | 
17 | 
18 |     def test_mlp_recommender(self):
19 |         mlp_recommender = GreedyMLP(train_epochs=10)
20 |         recommender = FilterSeenRecommender(mlp_recommender)
21 |         for action in generator_limit(get_movielens20m_actions(), 10000):
22 |             recommender.add_action(action)
23 |         recommender.rebuild_model()
24 |         recs = recommender.recommend(USER_ID, 10)
25 |         print(recs)
26 | 
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_svd_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | USER_ID = '120' 
 4 | 
 5 | REFERENCE_COLD_START = [('318', 0.6019900660660039), ('296', 0.5928136146373703), ('356', 0.5671645460239426),
 6 |                         ('593', 0.494680602882191), ('50', 0.46695169879496523), ('47', 0.46184204110408533),
 7 |                         ('527', 0.4398795906398074), ('260', 0.43692734916941883),
 8 |                         ('1', 0.4210339121252358), ('589', 0.4195799728444275)]
 9 | 
10 | REFERENCE_USER_RECOMMENDATIONS = [('296', 0.5097028006608604),
11 |                                   ('457', 0.46596785899698745),
12 |                                   ('110', 0.46393997126655373),
13 |                                   ('380', 0.4291430391625074),
14 |                                   ('593', 0.4159414958428441),
15 |                                   ('1', 0.398391005348504),
16 |                                   ('1210', 0.35877141070731267),
17 |                                   ('260', 0.35489876705579815),
18 |                                   ('292', 0.34561595303551884),
19 |                                   ('733', 0.34348521664244525)]
20 | class TestSvdRecommender(unittest.TestCase):
21 |     def compare_recommendations(self, rec1, rec2):
22 |         self.assertEqual(len(rec1), len(rec2))
23 |         for i in range(len(rec1)):
24 |             self.assertEqual(rec1[i][0], rec2[i][0])
25 |             self.assertAlmostEqual(rec1[i][1], rec2[i][1])
26 |          
27 |     def test_svd_recommender(self):
28 |         from aprec.recommenders.svd import SvdRecommender
29 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
30 |         from aprec.datasets.movielens20m import get_movielens20m_actions
31 |         from aprec.utils.generator_limit import generator_limit
32 |         from aprec.api.action import Action
33 | 
34 |         svd_recommender = SvdRecommender(10, random_seed=31337)
35 |         recommender = FilterSeenRecommender(svd_recommender)
36 |         for action in generator_limit(get_movielens20m_actions(), 10000):
37 |             recommender.add_action(action)
38 |         recommender.rebuild_model()
39 |         self.compare_recommendations(recommender.recommend(12341324, 10), REFERENCE_COLD_START)
40 |         recs = recommender.recommend(USER_ID, 10)
41 |         self.compare_recommendations(recs, REFERENCE_USER_RECOMMENDATIONS)
42 | 
43 |         actions =  [Action('1', 1, 1), 
44 |                     Action('1', 2, 2),
45 |                     Action('2', 2, 1),
46 |                     Action('2', 3, 1)]
47 |         recommender = SvdRecommender(2, random_seed=31337)
48 |         for action in actions:
49 |             recommender.add_action(action)
50 |         recommender.rebuild_model()
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     unittest.main()
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_top_recommender.py:
--------------------------------------------------------------------------------
 1 | from aprec.datasets.movielens20m import get_movielens20m_actions, get_movies_catalog
 2 | from aprec.recommenders.top_recommender import TopRecommender
 3 | from aprec.utils.generator_limit import generator_limit
 4 | 
 5 | import unittest
 6 | class TestTopRecommender(unittest.TestCase):
 7 |     def test_top_recommender(self):
 8 |         recommender = TopRecommender()
 9 |         catalog = get_movies_catalog()
10 |         for action in generator_limit(get_movielens20m_actions(), 1000):
11 |             recommender.add_action(action)
12 |         recommender.rebuild_model()
13 |         recommendations = recommender.recommend(1, 5)
14 |         self.assertEqual(recommendations, [('260', 10), ('589', 9), ('1', 8), ('356', 8), ('480', 8)])
15 | 
16 |     def test_recent_top(self):
17 |         recommender = TopRecommender(recency=0.5)
18 |         for action in generator_limit(get_movielens20m_actions(), 1000):
19 |             recommender.add_action(action)
20 |         recommender.rebuild_model()
21 |         recommendations = recommender.recommend(1, 5)
22 |         self.assertEquals(recommendations,[('2959', 3), ('2762', 3), ('1196', 3), ('260', 3), ('587', 2)]
23 |  )
24 |         print(recommendations)
25 | 
26 | 
27 |     def test_top_recommender_ranking_request(self):
28 |         from aprec.api.items_ranking_request import ItemsRankingRequest
29 |         recommender = TopRecommender()
30 |         ranking_request = ItemsRankingRequest(user_id='1', item_ids=['1196', '589'])
31 |         recommender.add_test_items_ranking_request(ranking_request)
32 |         actions = list(generator_limit(get_movielens20m_actions(), 1000))
33 |         for action in actions:
34 |             recommender.add_action(action)
35 |         recommender.rebuild_model()
36 |         recommendations = recommender.get_item_rankings()
37 |         self.assertEqual(recommendations, {'1': [('589', 9), ('1196', 8)]})
38 | 
39 | if __name__ == "__main__":
40 |     unittest.main()
41 | 


--------------------------------------------------------------------------------
/tests/recommenders/baselines/test_transition_chain_recommender.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestTransitionsChainRecommender(unittest.TestCase):
 4 |     def test_transitions_chain_recommender(self):
 5 |         from typing import List
 6 |         from aprec.api.action import Action
 7 |         from aprec.recommenders.transition_chain_recommender import TransitionsChainRecommender
 8 |         recommender = TransitionsChainRecommender()
 9 |         actions: List[Action] = [
10 |             Action(user_id=0, item_id=0, timestamp=0, data={'utrip_id': 100}),
11 |             Action(user_id=0, item_id=1, timestamp=10, data={'utrip_id': 100}),
12 |             Action(user_id=0, item_id=2, timestamp=20, data={'utrip_id': 100}),
13 | 
14 |             Action(user_id=2, item_id=3, timestamp=0, data={'utrip_id': 200}),
15 |             Action(user_id=2, item_id=1, timestamp=10, data={'utrip_id': 200}),
16 |             Action(user_id=2, item_id=2, timestamp=20, data={'utrip_id': 200}),
17 | 
18 |             Action(user_id=3, item_id=1, timestamp=10, data={'utrip_id': 2000}),
19 |             Action(user_id=3, item_id=3, timestamp=20, data={'utrip_id': 2000}),
20 | 
21 |             Action(user_id=4, item_id=1, timestamp=0, data={'utrip_id': 300}),
22 |         ]
23 |         for action in actions:
24 |             recommender.add_action(action)
25 |         recommender.rebuild_model()
26 |         recommendations = recommender.recommend(4, 2)
27 |         self.assertEqual(recommendations, [(2, 2), (3, 1)])
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/tests/recommenders/sequential/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/recommenders/sequential/__init__.py


--------------------------------------------------------------------------------
/tests/recommenders/sequential/bert4rec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/recommenders/sequential/bert4rec/__init__.py


--------------------------------------------------------------------------------
/tests/recommenders/sequential/sasrec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asash/gsasrec/9dd47e83b949a84f68a4616c6f779df7f17ebb26/tests/recommenders/sequential/sasrec/__init__.py


--------------------------------------------------------------------------------
/tests/recommenders/sequential/sasrec/test_positional_encoding.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | class TestSinEmbedding(unittest.TestCase):
 5 |     def test_embedding(self):
 6 |         from aprec.recommenders.sequential.models.sasrec.sasrec import ExpPositionEncoding, SinePositionEncoding
 7 |         sinEncoder = SinePositionEncoding(50, 64)
 8 |         input = np.array([[0, 1, 2, 3],[1,2,3,4]])
 9 |         encoded = sinEncoder(input)
10 |         self.assertEqual(encoded.shape, (2, 4, 64))
11 | 
12 |         expEncoder = ExpPositionEncoding(50, 64)
13 |         input = np.array([[0, 1, 2, 3],[1,2,3,4]])
14 |         encoded = expEncoder(input)
15 |         self.assertEqual(encoded.shape, (2, 4, 64))
16 | 
17 | if __name__== "__main__":
18 |     unittest.main()


--------------------------------------------------------------------------------
/tests/recommenders/sequential/sasrec/test_sasrec_attention_map.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | 
 5 | class TestSasrecModel(unittest.TestCase):
 6 |     def test_sasrec_model(self):
 7 |         from aprec.recommenders.sequential.models.sasrec.sasrec import SASRecConfig
 8 |         from aprec.recommenders.sequential.sequential_recommender import SequentialRecommender
 9 |         from aprec.recommenders.sequential.sequential_recommender_config import SequentialRecommenderConfig
10 |         from aprec.recommenders.sequential.target_builders.positives_only_targets_builder import PositvesOnlyTargetBuilder
11 |         from aprec.recommenders.sequential.targetsplitters.last_item_splitter import SequenceContinuation
12 |         from aprec.datasets.movielens20m import get_movielens20m_actions
13 |         from aprec.utils.generator_limit import generator_limit
14 | 
15 |         sasrec_config = SASRecConfig(embedding_size=32)
16 |         recommender_config = SequentialRecommenderConfig(sasrec_config, train_epochs=10000, early_stop_epochs=50000,
17 |                                                batch_size=5,
18 |                                                training_time_limit=5,  
19 |                                                max_batches_per_epoch=100,
20 |                                                sequence_splitter=SequenceContinuation,
21 |                                                sequence_length=5,
22 |                                                targets_builder=PositvesOnlyTargetBuilder, 
23 |                                                use_keras_training=True)
24 |    
25 |         recommender = SequentialRecommender(recommender_config)
26 |         val_users = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
27 |         recommender.set_val_users(val_users)
28 |         for action in generator_limit(get_movielens20m_actions(), 10000):
29 |             recommender.add_action(action)
30 |         recommender.rebuild_model()
31 |         input_seq = recommender.get_model_inputs('120')[0]
32 |         seq, attn = recommender.model.get_seq_embedding(input_seq)
33 |         print(attn[0])
34 |         
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 |     


--------------------------------------------------------------------------------
/tests/recommenders/sequential/sasrec/test_sasrec_no_embedding_reuse.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestSasrecNoEmbeddingReuse(unittest.TestCase):
 5 |     def test_sasrec_model_no_reuse(self):
 6 |         from aprec.recommenders.sequential.targetsplitters.last_item_splitter import SequenceContinuation
 7 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 8 |         from aprec.datasets.movielens20m import get_movielens20m_actions
 9 |         from aprec.utils.generator_limit import generator_limit
10 |         from aprec.recommenders.sequential.models.sasrec.sasrec import SASRecConfig
11 |         from aprec.recommenders.sequential.sequential_recommender import SequentialRecommender
12 |         from aprec.recommenders.sequential.sequential_recommender_config import SequentialRecommenderConfig
13 |         from aprec.recommenders.sequential.target_builders.positives_only_targets_builder import PositvesOnlyTargetBuilder
14 |         val_users = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
15 |         sasrec_config = SASRecConfig(embedding_size=32, reuse_item_embeddings=False)
16 |         recommender_config = SequentialRecommenderConfig(sasrec_config, train_epochs=10000, early_stop_epochs=50000,
17 |                                                batch_size=5,
18 |                                                training_time_limit=5,  
19 |                                                use_keras_training=True,
20 |                                                max_batches_per_epoch=100,
21 |                                                sequence_splitter=SequenceContinuation,
22 |                                                sequence_length=5,
23 |                                                targets_builder=PositvesOnlyTargetBuilder, 
24 |                                                )
25 | 
26 |    
27 |         recommender = SequentialRecommender(recommender_config)
28 |         recommender.set_val_users(val_users)
29 |         recommender = FilterSeenRecommender(recommender)
30 |         for action in generator_limit(get_movielens20m_actions(), 10000):
31 |             recommender.add_action(action)
32 |         recommender.rebuild_model()
33 |         USER_ID='120'
34 |         recs = recommender.recommend(USER_ID, 10)
35 |         print(recs)
36 | 
37 | if __name__ == "__main__":
38 |     unittest.main()


--------------------------------------------------------------------------------
/tests/recommenders/sequential/sasrec/test_vanilla_sasrec.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | from aprec.recommenders.sequential.target_builders.positives_sequence_target_builder import PositivesSequenceTargetBuilder
 5 | 
 6 | class TestVanillaSasrec(unittest.TestCase):
 7 |     def setUp(self):
 8 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 9 | 
10 | 
11 |     def test_vanilla_sasrec(self):
12 |         from aprec.recommenders.sequential.sequential_recommender import SequentialRecommender
13 |         from aprec.datasets.movielens20m import get_movielens20m_actions, get_movies_catalog
14 |         from aprec.losses.bce import BCELoss
15 |         from aprec.recommenders.sequential.target_builders.negative_per_positive_target import NegativePerPositiveTargetBuilder
16 |         from aprec.recommenders.sequential.targetsplitters.shifted_sequence_splitter import ShiftedSequenceSplitter
17 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
18 |         from aprec.recommenders.sequential.models.sasrec.sasrec import SASRecConfig
19 |         from aprec.recommenders.sequential.sequential_recommender_config import SequentialRecommenderConfig
20 |         from aprec.utils.generator_limit import generator_limit
21 | 
22 |         USER_ID = '120'
23 |         val_users = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
24 |         model_config = SASRecConfig(embedding_size=32, vanilla=True, vanilla_num_negatives=5, vanilla_bce_t=1)
25 | 
26 |         recommender_config = SequentialRecommenderConfig(model_config, train_epochs=10000, early_stop_epochs=50000,
27 |                                                batch_size=5,
28 |                                                training_time_limit=5,  
29 |                                                sequence_splitter=ShiftedSequenceSplitter, 
30 |                                                targets_builder=PositivesSequenceTargetBuilder,
31 |                                                use_keras_training=False
32 |                                                )
33 |         
34 |         recommender = SequentialRecommender(recommender_config)
35 | 
36 |         recommender.set_val_users(val_users)
37 |         recommender = FilterSeenRecommender(recommender)
38 |         for action in generator_limit(get_movielens20m_actions(), 10000):
39 |             recommender.add_action(action)
40 |         recommender.rebuild_model()
41 |         recs = recommender.recommend(USER_ID, 10)
42 |         catalog = get_movies_catalog()
43 |         for rec in recs:
44 |             print(catalog.get_item(rec[0]), "\t", rec[1])
45 | 
46 | if __name__ == "__main__":
47 |     unittest.main()


--------------------------------------------------------------------------------
/tests/recommenders/sequential/test_add_mask_vectorizer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestAddMaskHistoryVectorizer(unittest.TestCase):
 4 |     def test_add_mask(self):
 5 |         from aprec.recommenders.sequential.history_vectorizers.add_mask_history_vectorizer import AddMaskHistoryVectorizer
 6 |         seq = [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
 7 |         vectorizer = AddMaskHistoryVectorizer()
 8 |         vectorizer.set_sequence_len(4)
 9 |         vectorizer.set_padding_value(7)
10 |         vectorized = vectorizer(seq)
11 |         self.assertEqual(len(vectorized), 4)
12 |         self.assertEqual(list(vectorized), [3, 4, 5, 8])
13 | 
14 |         seq = [(3, 3), (4, 4), (5, 5)]
15 |         vectorizer = AddMaskHistoryVectorizer()
16 |         vectorizer.set_sequence_len(4)
17 |         vectorizer.set_padding_value(7)
18 |         vectorized = vectorizer(seq)
19 |         self.assertEqual(len(vectorized), 4)
20 |         self.assertEqual(list(vectorized), [3, 4, 5, 8])
21 | 
22 | 
23 |         seq = [(4, 4), (5, 5)]
24 |         vectorizer = AddMaskHistoryVectorizer()
25 |         vectorizer.set_sequence_len(4)
26 |         vectorizer.set_padding_value(7)
27 |         vectorized = vectorizer(seq)
28 |         self.assertEqual(len(vectorized), 4)
29 |         self.assertEqual(list(vectorized), [7, 4, 5, 8])
30 | 
31 | if __name__ == "__main__":
32 |     unittest.main()
33 |     


--------------------------------------------------------------------------------
/tests/recommenders/sequential/test_caser_no_uid.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestCaserNoUid(unittest.TestCase):
 4 |     def test_caser_model_no_uid(self):
 5 |         from aprec.losses.bce import BCELoss
 6 |         from aprec.recommenders.sequential.sequential_recommender import SequentialRecommender
 7 |         from aprec.recommenders.sequential.sequential_recommender_config import SequentialRecommenderConfig
 8 |         from aprec.recommenders.sequential.models.caser import CaserConfig
 9 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
10 |         from aprec.datasets.movielens20m import get_movielens20m_actions
11 |         from aprec.utils.generator_limit import generator_limit
12 | 
13 | 
14 |         val_users = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
15 |         caser_config = CaserConfig()
16 |         recommender_config = SequentialRecommenderConfig(caser_config, train_epochs=10,
17 |                                                early_stop_epochs=5, batch_size=5, 
18 |                                                training_time_limit=10, 
19 |                                                loss=BCELoss(), 
20 |                                                sequence_length=5,
21 |                                                use_keras_training=True
22 |                                                )
23 |         recommender = SequentialRecommender(recommender_config)
24 |         recommender.set_val_users(val_users)
25 |         recommender = FilterSeenRecommender(recommender)
26 |         for action in generator_limit(get_movielens20m_actions(), 10000):
27 |             recommender.add_action(action)
28 |         recommender.rebuild_model()
29 |         USER_ID = '120'
30 |         recs = recommender.recommend(USER_ID, 10)
31 |         print(recs)
32 | 
33 | if __name__ == '__main__':
34 |     unittest.main()
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/recommenders/sequential/test_gru_model.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | class TestGRUModel(unittest.TestCase):
 4 |     def test_gru_model(self):
 5 |         from aprec.losses.bce import BCELoss
 6 |         from aprec.recommenders.sequential.sequential_recommender_config import SequentialRecommenderConfig
 7 | 
 8 |         from aprec.recommenders.sequential.models.gru4rec import GRU4RecConfig
 9 |         from aprec.recommenders.sequential.sequential_recommender import SequentialRecommender
10 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
11 |         from aprec.datasets.movielens20m import get_movielens20m_actions
12 |         from aprec.utils.generator_limit import generator_limit
13 |         USER_ID ='120'
14 | 
15 |         val_users = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
16 |         model_config = GRU4RecConfig()
17 |         recommender_config = SequentialRecommenderConfig(model_config, train_epochs=10, early_stop_epochs=5,
18 |                                                batch_size=5, training_time_limit=10, loss=BCELoss(), 
19 |                                                sequence_length=10)
20 |         recommender = SequentialRecommender(recommender_config)
21 |         recommender.set_val_users(val_users)
22 |         recommender = FilterSeenRecommender(recommender)
23 |         for action in generator_limit(get_movielens20m_actions(), 10000):
24 |             recommender.add_action(action)
25 |         recommender.rebuild_model()
26 |         recs = recommender.recommend(USER_ID, 10)
27 |         print(recs)
28 | 
29 | if __name__ == "__main__":
30 |     unittest.main()
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/recommenders/sequential/test_items_masking_target_builder.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | 
 5 | class TestItemsMaskingTargetBuilder(unittest.TestCase):
 6 |     def test_target_builder(self):
 7 |         from aprec.recommenders.sequential.target_builders.items_masking_target_builder import ItemsMaskingTargetsBuilder
 8 |         targets_builder = ItemsMaskingTargetsBuilder(relative_positions_encoding=False)
 9 |         targets_builder.set_sequence_len(5)
10 |         targets_builder.set_n_items(10)
11 |         targets_builder.build([(4, [(1, (1, 3)), (3, (3, 5))]), (3, [(1, (1, 6))])])
12 |         expected_targets = np.array([[-100, 3, -100, 5, -100], [-100, 6, -100, -100, -100]])
13 |         expected_positions = np.array([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
14 |         extra_inputs, target = targets_builder.get_targets(0, 2)
15 |         self.assertEqual(len(extra_inputs), 2)
16 |         self.assertEquals(len(extra_inputs), 2) # labels, positions
17 |         self.assertTrue(np.all(expected_targets == target))
18 |         self.assertTrue(np.all(extra_inputs[0] == target))
19 |         self.assertTrue(np.all(extra_inputs[1] == expected_positions))
20 | 
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 
26 | 


--------------------------------------------------------------------------------
/tests/recommenders/test_lambdamart_ensemble_recommender.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from tempfile import NamedTemporaryFile
 3 | import unittest
 4 | 
 5 | def train_model():
 6 |         import json
 7 |         import os
 8 |         from aprec.datasets.movielens20m import get_movielens20m_actions
 9 |         from aprec.recommenders.top_recommender import TopRecommender
10 |         from aprec.recommenders.svd import SvdRecommender
11 |         from aprec.recommenders.lambdamart_ensemble_recommender import LambdaMARTEnsembleRecommender
12 |         from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
13 |         import tempfile
14 |         from aprec.utils.generator_limit import generator_limit
15 |         import pandas as pd
16 | 
17 |         tempdir = tempfile.mkdtemp("lambdamart_recommender_test")
18 |         candidates_selection = FilterSeenRecommender(TopRecommender())
19 |         other_recommenders = {
20 |                                 "svd_recommender": SvdRecommender(128)
21 |                              }
22 |         recommender = LambdaMARTEnsembleRecommender(
23 |                             candidates_selection_recommender=candidates_selection, 
24 |                             other_recommenders=other_recommenders,
25 |                             n_ensemble_users=200, 
26 |                             n_ensemble_val_users=20, 
27 |                             log_dir=tempdir
28 |         ) 
29 |         
30 |         USER_ID = '120'
31 | 
32 |         for action in generator_limit(get_movielens20m_actions(), 100000):
33 |             recommender.add_action(action)
34 |         recommender.rebuild_model()
35 |         recs = recommender.recommend(USER_ID, 10)
36 |         recs = recommender.recommend('121', 10)
37 |         print(recs)
38 |         print(json.dumps(recommender.get_metadata()))
39 |         train_csv = pd.read_csv(os.path.join(tempdir, 'ensemble_train.csv.gz'), compression='gzip', delimiter=';')
40 |         val_csv = pd.read_csv(os.path.join(tempdir, 'ensemble_train.csv.gz'), compression='gzip', delimiter=';')
41 |         return recommender
42 | 
43 | def train_and_save(tempdir):
44 |     recommender = train_model()
45 | 
46 | 
47 | 
48 | class TestLambdaMartEnsembleRecommender(unittest.TestCase):
49 |     def test_lambdamart_ensemble_recommender(self):
50 |         recommender = train_model()
51 |         with NamedTemporaryFile() as tmp:
52 |             recommender.save(tmp.name)
53 | 
54 |         
55 | 
56 | if __name__ == "__main__":
57 |     unittest.main()
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/recommenders/test_vanilla_bert4rec.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | def get_actions():
 5 |     from aprec.utils.generator_limit import generator_limit
 6 |     from aprec.datasets.movielens20m import get_movielens20m_actions
 7 |     return [action for action in generator_limit(get_movielens20m_actions(), 100000)]
 8 | 
 9 | def get_recommender_and_add_actions():
10 |         recommender = get_recommender()
11 |         for action in get_actions():
12 |             recommender.add_action(action)
13 |         return recommender
14 | 
15 | def get_recommender():
16 |     from aprec.recommenders.vanilla_bert4rec import VanillaBERT4Rec
17 |     return  VanillaBERT4Rec(training_time_limit=5)
18 | 
19 | class TestVanillaBert4rec(unittest.TestCase):
20 |     def test_vanilla_bert4rec(self):
21 |         recommender = get_recommender_and_add_actions()
22 |         recommender.rebuild_model()
23 |         print(recommender.recommend('120', 10))
24 |         recs = recommender.recommend('cold-start-user', 10)
25 |         self.assertEqual(recs, [])
26 | 
27 |     def test_sampled_rankings(self):
28 |         from aprec.api.items_ranking_request import ItemsRankingRequest
29 |         recommender = get_recommender_and_add_actions()
30 |         predict_items = ['260', '294', '296']
31 |         ranking_request = ItemsRankingRequest('120', ['260', '294', '296'])
32 |         recommender.add_test_items_ranking_request(ranking_request)
33 |         recommender.rebuild_model()
34 |         sampled_scores = recommender.get_item_rankings()
35 |         self.assertEqual(len(sampled_scores), 1)
36 |         predicted_scores = sampled_scores['120']
37 |         unseen_item = '294'
38 |         for item, score in predicted_scores:
39 |             self.assertTrue(item in predict_items)
40 |             if item == unseen_item:
41 |                 self.assertEqual(score, -float('inf'))
42 |             else:
43 |                 self.assertGreater(score, -float('inf'))
44 |                 self.assertLess(score, float('inf'))
45 | 
46 | 
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 


--------------------------------------------------------------------------------
/ui/config.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | from aprec.datasets.movielens20m import get_movies_catalog, get_movielens20m_actions
 3 | from aprec.recommenders.mlp_historical import GreedyMLPHistorical
 4 | from aprec.recommenders.filter_seen_recommender import FilterSeenRecommender
 5 | import sys
 6 | 
 7 | CATALOG = get_movies_catalog()
 8 | 
 9 | actions = get_movielens20m_actions(1.0)
10 | recommender = FilterSeenRecommender(GreedyMLPHistorical(train_epochs=300))
11 | 
12 | cnt = 0
13 | for action in tqdm(actions, ascii=True):
14 |     recommender.add_action(action)
15 |     cnt += 1
16 | 
17 | sys.stderr.write("building model...")
18 | 
19 | recommender.rebuild_model()
20 | 
21 | sys.stderr.write("ready.")
22 | 
23 | RECOMMENDER = recommender
24 | 


--------------------------------------------------------------------------------
/ui/server.py:
--------------------------------------------------------------------------------
 1 | import tornado.ioloop
 2 | import tornado.web
 3 | import json
 4 | import os
 5 | 
 6 | from aprec.ui.config import CATALOG, RECOMMENDER
 7 | 
 8 | 
 9 | class SearchHandler(tornado.web.RequestHandler):
10 |     def get(self):
11 |         keyword = self.request.arguments.get("keyword")[0].decode("utf-8")
12 |         items = CATALOG.search(keyword)
13 |         result = []
14 |         for item in items:
15 |             result.append("[{}]  {}".format(item.item_id, item.title))
16 |         self.set_header('Content-Type', 'application/json')
17 |         self.write(json.dumps(result, indent=4))
18 | 
19 | 
20 | class RecommenderHandler(tornado.web.RequestHandler):
21 |     def post(self):
22 |         history_raw = [item.decode("utf-8") for item in self.request.arguments['history[]']]
23 |         history_item_ids = [item.split("]")[0].strip("[") for item in history_raw]
24 |         recommendations = RECOMMENDER.recommend_by_items(history_item_ids, 10)
25 |         result = []
26 |         for item in recommendations:
27 |             result.append("[{}] {}".format(item[0], CATALOG.get_item(item[0]).title))
28 |         self.set_header('Content-Type', 'application/json')
29 |         self.write(json.dumps(result, indent=4))
30 | 
31 | 
32 | def make_app():
33 |     current_dir = os.path.dirname(__file__)
34 |     static_dir = os.path.join(current_dir, "static")
35 |     print(static_dir)
36 |     return tornado.web.Application([
37 |         (r"/search", SearchHandler),
38 |         (r"/recommend", RecommenderHandler),
39 |         (r"/(.*)", tornado.web.StaticFileHandler, {"path": static_dir, "default_filename": "index.html"})
40 |     ])
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     app = make_app()
45 |     app.listen(31337)
46 |     tornado.ioloop.IOLoop.current().start()
47 | 


--------------------------------------------------------------------------------
/ui/static/app.js:
--------------------------------------------------------------------------------
 1 | function add_item_to_history(item){
 2 |     $('#history').append('<li class=list-group-item>' + item + "</li>");
 3 | }
 4 | 
 5 | function update_recommendations() {
 6 |     var last_movies = []
 7 |     var history_elems = $('#history>li');
 8 |     for (var i = 0; i < history_elems.length; ++i){
 9 |         last_movies.push(history_elems[i].textContent);
10 |     }
11 |     $.post("/recommend", {"history": last_movies}).done(function(response){
12 |         $("#recommendations").empty();
13 |         for (var i = 0; i < response.length; ++i){
14 |             $("#recommendations").append('<li class=list-group-item>' + response[i] + "</li>");
15 |         }
16 |     });
17 | }
18 | 
19 | $(document).ready(function() {
20 |     // Defining the local dataset
21 |     var movies = new Bloodhound({
22 |         datumTokenizer: Bloodhound.tokenizers.whitespace,
23 |         queryTokenizer: Bloodhound.tokenizers.whitespace,
24 |         remote: {
25 |             url: '/search?keyword=%QUERY',
26 |             wildcard: '%QUERY'
27 |         }
28 |     });
29 | 
30 |     // Initializing the typeahead
31 |     $('#search').typeahead({
32 |         hint: true,
33 |         highlight: true, /* Enable substring highlighting */
34 |         minLength: 1 /* Specify minimum characters required for showing suggestions */
35 |     },
36 |     {
37 |         name: 'moviesSearch',
38 |         source: movies
39 |     });
40 | 
41 | 
42 |     $('#search').bind('typeahead:selected', function(obj, datum, name) {
43 |         add_item_to_history(datum);
44 |         $('#search').typeahead('val', '');
45 |         update_recommendations();
46 |     });
47 |     // all custom jQuery will go here
48 | });


--------------------------------------------------------------------------------
/ui/static/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <title>Item-Item recommender demo page</title>
 5 |   <meta charset="utf-8">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |   <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">
 8 |   <link rel="stylesheet" href="typeahead.css">
 9 |   <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
10 | 
11 |   <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
12 |   <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"></script>
13 |   <script src="typeahead.js"></script>
14 |   <script src="app.js"></script>
15 | </head>
16 | <body>
17 | 
18 | <div class="jumbotron text-center">
19 |   <h1>Aprec recommender demo page</h1>
20 |   <p>Based on MovieLens 20M Dataset</p>
21 |   <p>Add few movies into liked list to get personal recommendations</p>
22 | </div>
23 | 
24 | <div class="container">
25 |   <div class="row bg-light">
26 |     <label for="search">Add a movie to history</label>
27 |     <input class="form-control" type="text" placeholder="Search by title" id="search" />
28 |   </div>
29 |   <div class="row">&nbsp;</div>
30 |   <div class="row">
31 |     <div class="col-sm-6">
32 |       <h3>Your history</h3>
33 |       <ul class="list-group" id="history">
34 |       </ul>
35 |     </div>
36 |     <div class="col-sm-6">
37 |       <h3>Recommended Movies</h3>
38 |       <ul class="list-group" id="recommendations">
39 |       </ul>
40 |     </div>
41 |   </div>
42 | </div>
43 | 
44 | </body>
45 | </html>
46 | 


--------------------------------------------------------------------------------
/utils/generator_limit.py:
--------------------------------------------------------------------------------
1 | def generator_limit(generator, n):
2 |     limit = 0
3 |     for item in generator:
4 |         if limit >= n:
5 |             break
6 |         yield item
7 |         limit += 1
8 |  
9 | 


--------------------------------------------------------------------------------
/utils/item_id.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter, defaultdict
 2 | from pathlib import PosixPath
 3 | 
 4 | 
 5 | class ItemId(object):
 6 |     def __init__(self):
 7 |         self.straight = {}
 8 |         self.reverse = {}
 9 |         self.get_count = Counter()
10 | 
11 |     def size(self):
12 |         return len(self.straight)
13 | 
14 |     def get_id(self, item_id):
15 |         if item_id not in self.straight:
16 |             self.straight[item_id] = len(self.straight)
17 |             self.reverse[self.straight[item_id]] = item_id
18 |         self.get_count[item_id] += 1
19 |         return self.straight[item_id]
20 | 
21 |     def has_id(self, id):
22 |         return id in self.reverse
23 | 
24 |     def has_item(self, item_id):
25 |         return item_id in self.straight
26 | 
27 |     def reverse_id(self, id):
28 |         return self.reverse[id] 
29 | 
30 |     def save(self, file_name):
31 |         with open(file_name, "w") as output:
32 |             for item in self.straight:
33 |                 output.write(f"{item} {self.straight[item]}\n")
34 |     
35 |     @staticmethod
36 |     def load(file_name):
37 |         straight, reverse = {}, {}
38 |         for line in open(file_name):
39 |             external, internal = line.rstrip().split(" ")
40 |             internal = int(internal)
41 |             straight[external] = internal 
42 |             reverse[internal] = external
43 |         result = ItemId()
44 |         result.straight = straight
45 |         result.reverse = reverse
46 |         return result


--------------------------------------------------------------------------------
/utils/os_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | import subprocess
 4 | import shlex
 5 | import logging
 6 | import hashlib
 7 | 
 8 | def get_dir():
 9 |     utils_dirname = os.path.dirname(os.path.abspath(__file__))
10 |     lib_dirname = os.path.abspath(os.path.join(utils_dirname, ".."))
11 |     return lib_dirname
12 | 
13 | def recursive_listdir(dir_name):
14 |     result = []
15 |     for name in os.listdir(dir_name):
16 |         full_name = os.path.join(dir_name, name)
17 |         if(os.path.isdir(full_name)):
18 |             result += recursive_listdir(full_name)
19 |         else:
20 |             result.append(full_name)
21 |     return result
22 | 
23 | def shell(cmd):
24 |     logging.info("running shell command: \n {}".format(cmd))
25 |     subprocess.check_call(shlex.split(cmd))
26 | 
27 | def mkdir_p(dir_path):
28 |     shell("mkdir -p {}".format(dir_path))
29 |     return Path(dir_path)
30 | 
31 | def mkdir_p_local(relative_dir_path):
32 |     """create folder inside of library if does not exists"""
33 |     local_dir = get_dir()
34 |     abspath = os.path.join(local_dir, relative_dir_path)
35 |     mkdir_p(abspath)
36 |     return abspath
37 | 
38 | 
39 | def file_md5(fname):
40 |     hash_md5 = hashlib.md5()
41 |     with open(fname, "rb") as f:
42 |         for chunk in iter(lambda: f.read(4096), b""):
43 |             hash_md5.update(chunk)
44 |     return hash_md5.hexdigest()
45 | 
46 | def console_logging():
47 |     logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
48 | 


--------------------------------------------------------------------------------