├── .gitignore
├── README.md
├── dataloader.py
├── dlrm_criteo_gpu.py
├── dlrm_criteo_tpu.py
├── noddlrm
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset.py
    │   └── utils.py
    ├── metrics
    │   ├── __init__.py
    │   ├── dict_mean.py
    │   └── ranking_metrics.py
    ├── modules
    │   ├── __init__.py
    │   ├── latent_factor.py
    │   ├── multi_layer_perceptron.py
    │   ├── pairwise_log_loss.py
    │   ├── pointwise_mse_loss.py
    │   └── second_order_feature_interaction.py
    └── recommenders
    │   ├── __init__.py
    │   └── dlrm.py
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tensorflow-dlrm
  2 | This is Nod's Tensorflow version of DLRM which is based on [**OpenRec**](http://www.openrec.ai/) DLRM model. We extract the Openrec DRML source code and fixed some bugs in their model definition to make it work with tensorflow-gpu==2.2 and python3.7
  3 | 
  4 | 
  5 | ## Install tensorflow-dlrm from source code ##
  6 | 
  7 | First, clone noddlrm using `git`:
  8 | 
  9 | ```sh
 10 | git clone https://github.com/NodLabs/tensorflow-dlrm
 11 | ```
 12 | 
 13 | Then, `cd` to the tensorflow-dlrm folder and run the install command(if you want to install 
 14 | noddlrm to your python lib):
 15 | 
 16 | ```sh
 17 | cd tensorflow-dlrm
 18 | python setup.py install
 19 | ```
 20 | Now you have installed noddlrm to you system.
 21 | 
 22 | ## Dataset download
 23 | 
 24 | All datasets can be downloaded from Google drive [here](https://drive.google.com/drive/folders/1taJ91txiMAWBMUtezc_N5gaYuTEpvW_e?usp=sharing).
 25 | In our example, we use the dataset criteo.
 26 | 
 27 | ## Training and get the saved model
 28 | Edit the  dlrm_criteo_gpu/tpu.py to use your dataset criteo path
 29 | Then run the example script we have provided.
 30 | ```sh
 31 | cd tensorflow-dlrm/
 32 | export PYTHONPATH="$PWD"
 33 | python3  dlrm_criteo_gpu.py
 34 | # python3  dlrm_criteo_tpu.py
 35 | ```
 36 | ## Outputs ##
 37 | ### GPU ###
 38 | ```sh
 39 | python3  dlrm_criteo_gpu.py
 40 | 
 41 | 2020-07-14 08:10:32.701182: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
 42 | 2020-07-14 08:10:32.729195: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 43 | 2020-07-14 08:10:32.729514: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
 44 | pciBusID: 0000:01:00.0 name: GeForce RTX 2080 computeCapability: 7.5
 45 | coreClock: 1.59GHz coreCount: 46 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s
 46 | 2020-07-14 08:10:32.729734: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 47 | 2020-07-14 08:10:32.730158: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcublas.so.10'; dlerror: libcublas.so.10: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 48 | 2020-07-14 08:10:32.730298: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 49 | 2020-07-14 08:10:32.730501: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcurand.so.10'; dlerror: libcurand.so.10: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 50 | 2020-07-14 08:10:32.730632: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcusolver.so.10'; dlerror: libcusolver.so.10: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 51 | 2020-07-14 08:10:32.730848: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcusparse.so.10'; dlerror: libcusparse.so.10: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/chi/bin/vulkansdk/x86_64/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 52 | 2020-07-14 08:10:32.766959: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
 53 | 2020-07-14 08:10:32.766987: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1598] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
 54 | Skipping registering GPU devices...
 55 | 2020-07-14 08:10:32.768812: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
 56 | 2020-07-14 08:10:32.799122: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2899885000 Hz
 57 | 2020-07-14 08:10:32.799874: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f2204000b20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 58 | 2020-07-14 08:10:32.799887: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 59 | 2020-07-14 08:10:32.801441: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
 60 | 2020-07-14 08:10:32.801453: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      
 61 | WARNING:tensorflow:From /home/chi/nnc_env/lib/python3.7/site-packages/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py:158: calling LinearOperator.__init__ (from tensorflow.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.
 62 | Instructions for updating:
 63 | Do not pass `graph_parents`.  They will  no longer be used.
 64 | Iter: 0, Loss: 0.24, AUC: 0.5614          
 65 | Iter: 100, Loss: 0.19, AUC: 0.6755           
 66 | Iter: 200, Loss: 0.17, AUC: 0.6976           
 67 | Iter: 300, Loss: 0.17, AUC: 0.7037           
 68 | Iter: 400, Loss: 0.17, AUC: 0.7062           
 69 | Iter: 500, Loss: 0.17, AUC: 0.7079           
 70 | Iter: 600, Loss: 0.17, AUC: 0.7080           
 71 | Iter: 700, Loss: 0.17, AUC: 0.7095           
 72 | Iter: 800, Loss: 0.17, AUC: 0.7099           
 73 | Iter: 900, Loss: 0.17, AUC: 0.7103           
 74 | 2020-07-14 08:21:00.611816: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
 75 | WARNING:tensorflow:From /home/chi/nnc_env/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
 76 | Instructions for updating:
 77 | If using Keras pass *_constraint arguments to layers.
 78 | 
 79 | ```
 80 | ### TPU ###
 81 | ```sh
 82 | python3  dlrm_criteo_tpu.py
 83 | 
 84 | 2020-07-14 15:16:10.152558: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
 85 | 2020-07-14 15:16:10.179204: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2300000000 Hz
 86 | 2020-07-14 15:16:10.183868: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x38c1f20 initialized for platform Host (this does not guarantee that XLA will be used).
 87 |  Devices:
 88 | 2020-07-14 15:16:10.183924: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 89 | 2020-07-14 15:16:10.232148: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.240.1.2:8470}
 90 | 2020-07-14 15:16:10.232200: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:31017}
 91 | 2020-07-14 15:16:10.255055: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.240.1.2:8470}
 92 | 2020-07-14 15:16:10.255110: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:31017}
 93 | 2020-07-14 15:16:10.259154: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:390] Started server with target: grpc://localhost:31017
 94 | All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TP
 95 | U'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), Logic
 96 | alDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(n
 97 | ame='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU')]
 98 | WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py:158: calling LinearOperator.__init__ (from tensorf
 99 | low.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.
100 | Instructions for updating:
101 | Do not pass `graph_parents`.  They will  no longer be used.
102 | WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py:158: calling LinearOperator.__init__ (from tensorf
103 | low.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.
104 | Instructions for updating:
105 | Do not pass `graph_parents`.  They will  no longer be used.
106 | Iter: 0, Loss: 0.22, AUC: 0.4884          
107 | Iter: 100, Loss: 0.19, AUC: 0.6258           
108 | Iter: 200, Loss: 0.18, AUC: 0.6673           
109 | Iter: 300, Loss: 0.18, AUC: 0.6827           
110 | Iter: 400, Loss: 0.17, AUC: 0.6939           
111 | Iter: 500, Loss: 0.17, AUC: 0.7030           
112 | Iter: 600, Loss: 0.17, AUC: 0.7066           
113 | Iter: 700, Loss: 0.17, AUC: 0.7079           
114 | Iter: 800, Loss: 0.17, AUC: 0.7100           
115 | Iter: 900, Loss: 0.17, AUC: 0.7107           
116 | 2020-07-14 15:22:10.259060: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
117 | WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.
118 | ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
119 | Instructions for updating:
120 | If using Keras pass *_constraint arguments to layers.
121 | WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.
122 | ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
123 | Instructions for updating:
124 | If using Keras pass *_constraint arguments to layers.
125 | ```
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/dataloader.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from io import BytesIO
  4 | from tensorflow.python.lib.io import file_io
  5 | 
  6 | def load_amazon_book(dataset_folder='dataset/'):
  7 | 
  8 |     raw_data = dict()
  9 |     raw_data['total_users'] = 99473
 10 |     raw_data['total_items'] = 450166
 11 | 
 12 |     raw_data['train_data'] = np.load(dataset_folder + 'amazon/user_data_train.npy')
 13 |     raw_data['val_data'] = np.load(dataset_folder + 'amazon/user_data_val.npy')
 14 |     raw_data['test_data'] = np.load(dataset_folder + 'amazon/user_data_test.npy')
 15 | 
 16 |     raw_data['item_features'] = np.array(np.memmap(dataset_folder + 'amazon/book_features_update.mem', 
 17 |                                 dtype=np.float32, mode='r', shape=(raw_data['max_item'], 4096)))
 18 |     raw_data['user_features'] = np.load(dataset_folder + 'amazon/user_features_categories.npy')
 19 |     return raw_data
 20 | 
 21 | def load_citeulike(dataset_folder='dataset/'):
 22 | 
 23 |     raw_data = dict()
 24 |     raw_data['total_users'] = 5551
 25 |     raw_data['total_items'] = 16980
 26 |     
 27 |     raw_data['train_data'] = np.load(dataset_folder + 'citeulike/user_data_train.npy')
 28 |     raw_data['val_data'] = np.load(dataset_folder + 'citeulike/user_data_val.npy')
 29 |     raw_data['test_data'] = np.load(dataset_folder + 'citeulike/user_data_test.npy')
 30 |     
 31 |     return raw_data
 32 | 
 33 | def load_tradesy(dataset_folder='dataset/'):
 34 | 
 35 |     raw_data = dict()
 36 |     raw_data['total_users'] = 19243
 37 |     raw_data['total_items'] = 165906
 38 |     
 39 |     raw_data['train_data'] = np.load(dataset_folder + 'tradesy/user_data_train.npy')
 40 |     raw_data['val_data'] = np.load(dataset_folder + 'tradesy/user_data_val.npy')
 41 |     raw_data['test_data'] = np.load(dataset_folder + 'tradesy/user_data_test.npy')
 42 | 
 43 |     raw_data['item_features'] = np.load(dataset_folder + 'tradesy/item_features.npy') / 32.671101
 44 |     return raw_data
 45 | 
 46 | 
 47 | def load_criteo_google_cloud(dataset_folder='dataset/'):
 48 |     # Data processing code adapted from https://github.com/facebookresearch/dlrm
 49 |     # Follow steps in https://github.com/ylongqi/dlrm/blob/master/data_utils.py to generate kaggle_processed.npz
 50 |     # Or using `./download_dataset.sh criteo` command to download the processed data.
 51 | 
 52 |     f = BytesIO(file_io.read_file_to_string(dataset_folder + 'criteo/kaggle_processed.npz', binary_mode=True))
 53 |     with np.load(f) as data:
 54 |         X_int = data["X_int"]
 55 |         X_cat = data["X_cat"]
 56 |         y = data["y"]
 57 |         counts = data["counts"]
 58 | 
 59 |     indices = np.arange(len(y))
 60 |     indices = np.array_split(indices, 7)
 61 |     for i in range(len(indices)):
 62 |         indices[i] = np.random.permutation(indices[i])
 63 | 
 64 |     train_indices = np.concatenate(indices[:-1])
 65 |     test_indices = indices[-1]
 66 |     val_indices, test_indices = np.array_split(test_indices, 2)
 67 |     train_indices = np.random.permutation(train_indices)
 68 | 
 69 |     raw_data = dict()
 70 | 
 71 |     raw_data['counts'] = counts
 72 | 
 73 |     raw_data['X_cat_train'] = X_cat[train_indices].astype(np.int32)
 74 |     raw_data['X_int_train'] = np.log(X_int[train_indices] + 1).astype(np.float32)
 75 |     raw_data['y_train'] = y[train_indices].astype(np.float32)
 76 | 
 77 |     raw_data['X_cat_val'] = X_cat[val_indices]
 78 |     raw_data['X_int_val'] = np.log(X_int[val_indices] + 1).astype(np.float32)
 79 |     raw_data['y_val'] = y[val_indices]
 80 | 
 81 |     raw_data['X_cat_test'] = X_cat[test_indices]
 82 |     raw_data['X_int_test'] = np.log(X_int[test_indices] + 1).astype(np.float32)
 83 |     raw_data['y_test'] = y[test_indices]
 84 | 
 85 |     return raw_data
 86 | 
 87 | def load_criteo(dataset_folder='dataset/'):
 88 |     
 89 |     # Data processing code adapted from https://github.com/facebookresearch/dlrm
 90 |     # Follow steps in https://github.com/ylongqi/dlrm/blob/master/data_utils.py to generate kaggle_processed.npz
 91 |     # Or using `./download_dataset.sh criteo` command to download the processed data.
 92 |     
 93 |     with np.load(dataset_folder + 'criteo/kaggle_processed.npz') as data:
 94 | 
 95 |         X_int = data["X_int"]
 96 |         X_cat = data["X_cat"]
 97 |         y = data["y"]
 98 |         counts = data["counts"]
 99 |     
100 |     indices = np.arange(len(y))
101 |     indices = np.array_split(indices, 7)
102 |     for i in range(len(indices)):
103 |         indices[i] = np.random.permutation(indices[i])
104 |     
105 |     train_indices = np.concatenate(indices[:-1])
106 |     test_indices = indices[-1]
107 |     val_indices, test_indices = np.array_split(test_indices, 2)
108 |     train_indices = np.random.permutation(train_indices)
109 |     
110 |     raw_data = dict()
111 |     
112 |     raw_data['counts'] = counts
113 |     
114 |     raw_data['X_cat_train'] = X_cat[train_indices].astype(np.int32)
115 |     raw_data['X_int_train'] = np.log(X_int[train_indices]+1).astype(np.float32)
116 |     raw_data['y_train'] = y[train_indices].astype(np.float32)
117 |     
118 |     raw_data['X_cat_val'] = X_cat[val_indices]
119 |     raw_data['X_int_val'] = np.log(X_int[val_indices]+1).astype(np.float32)
120 |     raw_data['y_val'] = y[val_indices]
121 |     
122 |     raw_data['X_cat_test'] = X_cat[test_indices]
123 |     raw_data['X_int_test'] = np.log(X_int[test_indices]+1).astype(np.float32)
124 |     raw_data['y_test'] = y[test_indices]
125 |     
126 |     return raw_data
127 |     


--------------------------------------------------------------------------------
/dlrm_criteo_gpu.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.data import Dataset
 2 | from noddlrm.recommenders import DLRM
 3 | from tensorflow.keras import optimizers
 4 | from tqdm import tqdm
 5 | import tensorflow as tf
 6 | import dataloader
 7 | 
 8 | raw_data = dataloader.load_criteo('../dataset/')
 9 | dim_embed = 4
10 | bottom_mlp_size = [8, 4]
11 | top_mlp_size = [128, 64, 1]
12 | total_iter = int(1e5)
13 | batch_size = 1024
14 | eval_interval = 100
15 | save_interval = eval_interval
16 | 
17 | # Sample 1000 batches for training
18 | train_dataset = Dataset.from_tensor_slices({
19 |                     'dense_features': raw_data['X_int_train'][:batch_size*1000],
20 |                     'sparse_features': raw_data['X_cat_train'][:batch_size*1000],
21 |                     'label': raw_data['y_train'][:batch_size*1000]
22 |                 }).batch(batch_size).prefetch(1).shuffle(5*batch_size)
23 |     
24 | # Sample 100 batches for validation
25 | val_dataset = Dataset.from_tensor_slices({
26 |                     'dense_features': raw_data['X_int_val'][:batch_size*100],
27 |                     'sparse_features': raw_data['X_cat_val'][:batch_size*100],
28 |                     'label': raw_data['y_val'][:batch_size*100]
29 |              }).batch(batch_size)
30 | 
31 | optimizer = optimizers.Adam()
32 | 
33 | dlrm_model = DLRM(
34 |                 m_spa=dim_embed,
35 |                 ln_emb=raw_data['counts'],
36 |                 ln_bot=bottom_mlp_size,
37 |                 ln_top=top_mlp_size
38 |              )
39 | 
40 | auc = tf.keras.metrics.AUC()
41 | 
42 | @tf.function
43 | def train_step(dense_features, sparse_features, label):
44 |     with tf.GradientTape() as tape:
45 |         loss_value = dlrm_model.get_myloss(dense_features, sparse_features, label)
46 |     gradients = tape.gradient(loss_value, dlrm_model.trainable_variables)
47 |     optimizer.apply_gradients(zip(gradients, dlrm_model.trainable_variables))
48 |     return loss_value
49 | 
50 | @tf.function
51 | def eval_step(dense_features, sparse_features, label):
52 |     pred = dlrm_model.inference(dense_features, sparse_features)
53 |     auc.update_state(y_true=label, y_pred=pred)
54 | 
55 | average_loss = tf.keras.metrics.Mean()
56 | 
57 | for train_iter, batch_data in enumerate(train_dataset):
58 |     
59 |     loss = train_step(**batch_data)
60 |     average_loss.update_state(loss)
61 |     print('%d iter training.' % train_iter, end='\r')
62 |     
63 |     if train_iter % eval_interval == 0:
64 |         for eval_batch_data in tqdm(val_dataset,
65 |                                     leave=False, 
66 |                                     desc='%d iter evaluation' % train_iter):
67 |             eval_step(**eval_batch_data)
68 |         print("Iter: %d, Loss: %.2f, AUC: %.4f" % (train_iter, 
69 |                                                    average_loss.result().numpy(),
70 |                                                    auc.result().numpy()))
71 |         average_loss.reset_states()
72 |         auc.reset_states()
73 | 
74 | dlrm_model.save('DLRMModel_tf2_2')
75 | 


--------------------------------------------------------------------------------
/dlrm_criteo_tpu.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.data import Dataset
 2 | from noddlrm.recommenders import DLRM
 3 | from tensorflow.keras import optimizers
 4 | from tqdm import tqdm
 5 | import tensorflow as tf
 6 | import dataloader
 7 | 
 8 | #setup tpu enviroment
 9 | resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://10.240.1.2')
10 | tf.config.experimental_connect_to_cluster(resolver)
11 | # This is the TPU initialization code that has to be at the beginning.
12 | tf.tpu.experimental.initialize_tpu_system(resolver)
13 | print("All devices: ", tf.config.list_logical_devices('TPU'))
14 | 
15 | raw_data = dataloader.load_criteo('../dataset/')
16 | dim_embed = 4
17 | bottom_mlp_size = [8, 4]
18 | top_mlp_size = [128, 64, 1]
19 | total_iter = int(1e5)
20 | batch_size = 1024
21 | eval_interval = 100
22 | save_interval = eval_interval
23 | 
24 | # Sample 1000 batches for training
25 | train_dataset = Dataset.from_tensor_slices({
26 |                     'dense_features': raw_data['X_int_train'][:batch_size*1000],
27 |                     'sparse_features': raw_data['X_cat_train'][:batch_size*1000],
28 |                     'label': raw_data['y_train'][:batch_size*1000]
29 |                 }).batch(batch_size).prefetch(1).shuffle(5*batch_size)
30 |     
31 | # Sample 100 batches for validation
32 | val_dataset = Dataset.from_tensor_slices({
33 |                     'dense_features': raw_data['X_int_val'][:batch_size*100],
34 |                     'sparse_features': raw_data['X_cat_val'][:batch_size*100],
35 |                     'label': raw_data['y_val'][:batch_size*100]
36 |              }).batch(batch_size)
37 | 
38 | optimizer = optimizers.Adam()
39 | 
40 | dlrm_model = DLRM(
41 |                 m_spa=dim_embed,
42 |                 ln_emb=raw_data['counts'],
43 |                 ln_bot=bottom_mlp_size,
44 |                 ln_top=top_mlp_size
45 |              )
46 | 
47 | auc = tf.keras.metrics.AUC()
48 | 
49 | @tf.function
50 | def train_step(dense_features, sparse_features, label):
51 |     with tf.GradientTape() as tape:
52 |         loss_value = dlrm_model.get_myloss(dense_features, sparse_features, label)
53 |     gradients = tape.gradient(loss_value, dlrm_model.trainable_variables)
54 |     optimizer.apply_gradients(zip(gradients, dlrm_model.trainable_variables))
55 |     return loss_value
56 | 
57 | @tf.function
58 | def eval_step(dense_features, sparse_features, label):
59 |     pred = dlrm_model.inference(dense_features, sparse_features)
60 |     auc.update_state(y_true=label, y_pred=pred)
61 | 
62 | average_loss = tf.keras.metrics.Mean()
63 | 
64 | for train_iter, batch_data in enumerate(train_dataset):
65 |     
66 |     loss = train_step(**batch_data)
67 |     average_loss.update_state(loss)
68 |     print('%d iter training.' % train_iter, end='\r')
69 |     
70 |     if train_iter % eval_interval == 0:
71 |         for eval_batch_data in tqdm(val_dataset,
72 |                                     leave=False, 
73 |                                     desc='%d iter evaluation' % train_iter):
74 |             eval_step(**eval_batch_data)
75 |         print("Iter: %d, Loss: %.2f, AUC: %.4f" % (train_iter, 
76 |                                                    average_loss.result().numpy(),
77 |                                                    auc.result().numpy()))
78 |         average_loss.reset_states()
79 |         auc.reset_states()
80 | 
81 | dlrm_model.save('gs://nodtpu/chi/drlm/models/criteo/')
82 | 


--------------------------------------------------------------------------------
/noddlrm/__init__.py:
--------------------------------------------------------------------------------
1 | from noddlrm.recommenders.dlrm import DLRM


--------------------------------------------------------------------------------
/noddlrm/data/__init__.py:
--------------------------------------------------------------------------------
1 | from noddlrm.data.utils import _DataStore
2 | from noddlrm.data.utils import _ParallelDataset
3 | from noddlrm.data.dataset import Dataset


--------------------------------------------------------------------------------
/noddlrm/data/dataset.py:
--------------------------------------------------------------------------------
  1 | from noddlrm.data import _ParallelDataset
  2 | from noddlrm.data  import _DataStore
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import random
  6 | 
  7 | def _pairwise_generator(datastore):
  8 |                 
  9 |     while True:
 10 |         entry = datastore.next_random_record()
 11 |         user_id = entry['user_id']
 12 |         p_item_id = entry['item_id']
 13 |         n_item_id = datastore.sample_negative_items(user_id)[0]
 14 |         yield {'user_id': user_id,
 15 |                 'p_item_id': p_item_id, 
 16 |                 'n_item_id': n_item_id}
 17 | 
 18 | def _stratified_pointwise_generator(datastore, pos_ratio):
 19 | 
 20 |     while True:
 21 |         if random.random() <= pos_ratio:
 22 |             entry = datastore.next_random_record()
 23 |             yield {'user_id': entry['user_id'],
 24 |                     'item_id': entry['item_id'], 
 25 |                     'label': 1.0}
 26 |         else:
 27 |             user_id = random.randint(0, datastore.total_users()-1)
 28 |             item_id = random.randint(0, datastore.total_items()-1)
 29 |             while datastore.is_positive(user_id, item_id):
 30 |                 user_id = random.randint(0, datastore.total_users()-1)
 31 |                 item_id = random.randint(0, datastore.total_items()-1)
 32 |             yield {'user_id': user_id,
 33 |                     'item_id': item_id, 
 34 |                     'label': 0.0}
 35 | 
 36 | def _per_pos_stratified_pointwise_generator(datastore, pos_ratio):
 37 | 
 38 |     num_negative_per_positive = int((1 - pos_ratio) / pos_ratio)
 39 | 
 40 |     while True:
 41 | 
 42 |         entry = datastore.next_random_record()
 43 |         user_id = entry['user_id']
 44 |         p_item_id = entry['item_id']
 45 |         yield {'user_id': user_id,
 46 |                 'item_id': p_item_id, 
 47 |                 'label': 1.0}
 48 |         
 49 |         count = 0
 50 |         for n_item_id in random.sample(range(datastore.total_items()), k=num_negative_per_positive + 1):
 51 |             if n_item_id == p_item_id:
 52 |                 continue
 53 |             yield {'user_id': user_id,
 54 |                     'item_id': n_item_id, 
 55 |                     'label': 0.0}
 56 |             count += 1
 57 |             if count >= num_negative_per_positive:
 58 |                 break
 59 | 
 60 | def _evaluation_generator(datastore, excl_datasets):
 61 | 
 62 |     eval_users = datastore.warm_users()
 63 | 
 64 |     for user_id in eval_users:
 65 | 
 66 |         pos_mask_npy = np.zeros(datastore.total_items(), dtype=np.bool)  # Reset pos_mask
 67 |         positive_items = datastore.get_positive_items(user_id)
 68 |         pos_mask_npy[positive_items] = True
 69 | 
 70 |         if datastore.contain_negatives():
 71 |             excl_mask_npy = np.ones(datastore.total_items(), dtype=np.bool)  # Reset excl_mask
 72 |             excl_mask_npy[positive_items] = False
 73 |             negative_items = datastore.get_negative_items(user_id)
 74 |             excl_mask_npy[negative_items] = False
 75 |         else:
 76 |             excl_mask_npy = np.zeros(datastore.total_items(), dtype=np.bool)  # Reset excl_mask
 77 | 
 78 |         excl_positive_items = []
 79 |         for excl_d in excl_datasets:
 80 |             excl_positive_items += excl_d.datastore.get_positive_items(user_id)
 81 |         excl_mask_npy[excl_positive_items] = True
 82 | 
 83 |         yield {'user_id': user_id,
 84 |                 'pos_mask': pos_mask_npy,
 85 |                 'excl_mask': excl_mask_npy}
 86 | 
 87 | class Dataset:
 88 |     
 89 |     def __init__(self, raw_data, total_users, total_items, implicit_negative=True, 
 90 |                  num_negatives=None, seed=None, sortby=None, asc=True, name=None):
 91 |         
 92 |         self.datastore = _DataStore(raw_data=raw_data, 
 93 |                                 total_users=total_users, 
 94 |                                 total_items=total_items, 
 95 |                                 implicit_negative=implicit_negative, 
 96 |                                 num_negatives=num_negatives, 
 97 |                                 seed=seed, sortby=sortby, name=name, asc=asc)
 98 |     
 99 |     def _build_dataset(self, generator, generator_params, output_types, output_shapes, 
100 |                        batch_size, num_parallel_calls, take=None):
101 |         
102 |         
103 |         return _ParallelDataset(generator=generator,
104 |                                 generator_params=generator_params,
105 |                                 output_types=output_types, 
106 |                                 output_shapes=output_shapes,
107 |                                 batch_size=batch_size, 
108 |                                 num_parallel_calls=num_parallel_calls, 
109 |                                 take=take)
110 |         
111 |     def pairwise(self, batch_size, num_parallel_calls=1, take=None):
112 |         
113 |         output_types = {'user_id': tf.int32, 
114 |                         'p_item_id': tf.int32, 
115 |                         'n_item_id': tf.int32}
116 |         output_shapes = {'user_id':[], 
117 |                         'p_item_id':[], 
118 |                         'n_item_id':[]}
119 |         
120 |         return self._build_dataset(generator=_pairwise_generator,
121 |                                    generator_params=(self.datastore, ),
122 |                                    output_types=output_types,
123 |                                    output_shapes=output_shapes,
124 |                                    batch_size=batch_size,
125 |                                    num_parallel_calls=num_parallel_calls, 
126 |                                    take=take)
127 |     
128 |     def stratified_pointwise(self, batch_size, pos_ratio=0.5, num_parallel_calls=1, take=None):
129 | 
130 |         output_types = {'user_id': tf.int32, 
131 |                         'item_id': tf.int32, 
132 |                         'label': tf.float32}
133 |         output_shapes = {'user_id':[], 
134 |                         'item_id':[], 
135 |                         'label':[]}
136 |         
137 |         return self._build_dataset(generator=_stratified_pointwise_generator,
138 |                                    generator_params=(self.datastore, pos_ratio),
139 |                                    output_types=output_types,
140 |                                    output_shapes=output_shapes,
141 |                                    batch_size=batch_size,
142 |                                    num_parallel_calls=num_parallel_calls, 
143 |                                    take=take)
144 |     
145 |     def per_pos_stratified_pointwise(self, batch_size, pos_ratio=0.5, num_parallel_calls=1, take=None):
146 | 
147 |         output_types = {'user_id': tf.int32, 
148 |                         'item_id': tf.int32, 
149 |                         'label': tf.float32}
150 |         output_shapes = {'user_id':[], 
151 |                         'item_id':[], 
152 |                         'label':[]}
153 |         
154 |         return self._build_dataset(generator=_per_pos_stratified_pointwise_generator,
155 |                                    generator_params=(self.datastore, pos_ratio),
156 |                                    output_types=output_types,
157 |                                    output_shapes=output_shapes,
158 |                                    batch_size=batch_size,
159 |                                    num_parallel_calls=num_parallel_calls, 
160 |                                    take=take)
161 | 
162 |     def evaluation(self, batch_size, excl_datasets=[]):
163 |             
164 |         output_types = {'user_id': tf.int32,
165 |                        'pos_mask': tf.bool,
166 |                        'excl_mask': tf.bool}
167 |         output_shapes = {'user_id': [], 
168 |                          'pos_mask': [self.datastore.total_items()], 
169 |                          'excl_mask': [self.datastore.total_items()]}
170 |         
171 |         return self._build_dataset(generator=_evaluation_generator,
172 |                                   generator_params=(self.datastore, excl_datasets),
173 |                                   output_types=output_types,
174 |                                   output_shapes=output_shapes,
175 |                                   batch_size=batch_size,
176 |                                   num_parallel_calls=1)
177 |         


--------------------------------------------------------------------------------
/noddlrm/data/utils.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing as mp
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import random
  5 | 
  6 | class _DataStore(object):
  7 |     
  8 |     def __init__(self, raw_data, total_users, total_items, implicit_negative=True, 
  9 |                  num_negatives=None, seed=None, sortby=None, asc=True, name=None):
 10 |         
 11 |         self.name = name
 12 |         random.seed(seed)
 13 |         if type(raw_data) == np.ndarray:
 14 |             self._raw_data = raw_data
 15 |         else:
 16 |             raise TypeError("Unsupported data input schema. Please use structured numpy array.")
 17 |         self._rand_ids = []
 18 |         
 19 |         self._total_users = total_users
 20 |         self._total_items = total_items
 21 |         
 22 |         self._sortby = sortby
 23 |         
 24 |         self._index_store = dict()
 25 |         self._implicit_negative = implicit_negative
 26 |         self._num_negatives = num_negatives
 27 |         if self._implicit_negative:
 28 |             self._index_store['positive'] = dict()
 29 |             for ind, entry in enumerate(self._raw_data):
 30 |                 if entry['user_id'] not in self._index_store['positive']:
 31 |                     self._index_store['positive'][entry['user_id']] = dict()
 32 |                 self._index_store['positive'][entry['user_id']][entry['item_id']] = ind
 33 |             self._index_store['positive_sets'] = dict()
 34 |             for user_id in self._index_store['positive']:
 35 |                 self._index_store['positive_sets'][user_id] = set(self._index_store['positive'][user_id])
 36 |             if num_negatives is not None:
 37 |                 self._index_store['negative'] = dict()
 38 |                 for user_id in self._index_store['positive']:
 39 |                     self._index_store['negative'][user_id] = dict()
 40 |                     shuffled_items = np.random.permutation(self._total_items)
 41 |                     for item in shuffled_items:
 42 |                         if item not in self._index_store['positive'][user_id]:
 43 |                             self._index_store['negative'][user_id][item] = None
 44 |                         if len(self._index_store['negative'][user_id]) == num_negatives:
 45 |                             break
 46 |                 self._index_store['negative_sets'] = dict()
 47 |                 for user_id in self._index_store['negative']:
 48 |                     self._index_store['negative_sets'][user_id] = set(self._index_store['negative'][user_id])
 49 |         else:
 50 |             self._index_store['positive'] = dict()
 51 |             self._index_store['negative'] = dict()
 52 |             for ind, entry in enumerate(self._raw_data):
 53 |                 if entry['label'] > 0:
 54 |                     if entry['user_id'] not in self._index_store['positive']:
 55 |                         self._index_store['positive'][entry['user_id']] = dict()
 56 |                     self._index_store['positive'][entry['user_id']][entry['item_id']] = ind
 57 |                 else:
 58 |                     if entry['user_id'] not in self._index_store['negative']:
 59 |                         self._index_store['negative'][entry['user_id']] = dict()
 60 |                     self._index_store['negative'][entry['user_id']][entry['item_id']] = ind
 61 |             self._index_store['positive_sets'] = dict()
 62 |             for user_id in self._index_store['positive']:
 63 |                 self._index_store['positive_sets'][user_id] = set(self._index_store['positive'][user_id])
 64 |             self._index_store['negative_sets'] = dict()
 65 |             for user_id in self._index_store['negative']:
 66 |                 self._index_store['negative_sets'][user_id] = set(self._index_store['negative'][user_id])
 67 |         
 68 |         if self._sortby is not None:
 69 |             self._index_store['positive_sorts'] = dict()
 70 |             for user_id in self._index_store['positive_sets']:
 71 |                 self._index_store['positive_sorts'][user_id] = sorted(list(self._index_store['positive_sets'][user_id]),
 72 |                                                                     key=lambda item:\
 73 |                                              self._raw_data[self._index_store['positive'][user_id][item]][self._sortby],
 74 |                                                                     reverse=not asc)
 75 |     def contain_negatives(self):
 76 |         
 77 |         if self._implicit_negative and self._num_negatives is None:
 78 |             return False
 79 |         else:
 80 |             return True
 81 |     
 82 |     def next_random_record(self):
 83 |         
 84 |         if len(self._rand_ids) == 0:
 85 |             self._rand_ids = list(range(len(self._raw_data)))
 86 |             random.shuffle(self._rand_ids)
 87 |         return self._raw_data[self._rand_ids.pop()]
 88 |         
 89 |     def is_positive(self, user_id, item_id):
 90 |         
 91 |         if user_id in self._index_store['positive'] and item_id in self._index_store['positive'][user_id]:
 92 |             return True
 93 |         return False
 94 |     
 95 |     def sample_positive_items(self, user_id, num_samples=1):
 96 |         
 97 |         if user_id in self._index_store['positive_sets']:
 98 |             return random.sample(self._index_store['positive_sets'][user_id], num_samples)
 99 |         else:
100 |             return []
101 |         
102 |     def sample_negative_items(self, user_id, num_samples=1):
103 |         
104 |         if 'negative_sets' in self._index_store:
105 |             if user_id in self._index_store['negative_sets']:
106 |                 return random.sample(self._index_store['negative_sets'][user_id], num_samples)
107 |             else:
108 |                 return []
109 |         else:
110 |             sample_id = random.randint(0, self._total_items-1)
111 |             sample_set = set()
112 |             while len(sample_set) < num_samples:
113 |                 if user_id not in self._index_store['positive_sets'] or sample_id not in self._index_store['positive_sets'][user_id]:
114 |                     sample_set.add(sample_id)
115 |                 sample_id = random.randint(0, self._total_items-1)
116 |             return list(sample_set)
117 |         
118 |     def get_positive_items(self, user_id, sort=False):
119 | 
120 |         if user_id in self._index_store['positive_sets']:
121 |             if sort:
122 |                 assert self._sortby is not None, "sortby key is not specified."
123 |                 return self._index_store['positive_sorts'][user_id]
124 |             else:
125 |                 return list(self._index_store['positive_sets'][user_id])
126 |         else:
127 |             return []
128 |     
129 |     def get_negative_items(self, user_id):
130 |         
131 |         if 'negative_sets' in self._index_store:
132 |             if user_id in self._index_store['negative_sets']:
133 |                 return list(self._index_store['negative_sets'][user_id])
134 |             else:
135 |                 return []
136 |         else:
137 |             negative_items = []
138 |             for item_id in range(self._total_items):
139 |                 if item_id not in self._index_store['positive_sets'][user_id]:
140 |                     negative_items.append(item_id)
141 |             return negative_items
142 |     
143 |     def warm_users(self, threshold=1):
144 |         
145 |         users_list = []
146 |         for user_id in self._index_store['positive']:
147 |             if len(self._index_store['positive'][user_id]) >= threshold:
148 |                 users_list.append(user_id)
149 |         return users_list
150 |     
151 |     def total_users(self):
152 |         
153 |         return self._total_users
154 | 
155 |     def total_items(self):
156 |         
157 |         return self._total_items
158 |     
159 |     def total_records(self):
160 | 
161 |         return len(self._raw_data)
162 | 
163 | 
164 | def _process(q, generator, generator_params, output_shapes, batch_size):
165 |             
166 |     batch_data = {key:[] for key in output_shapes}
167 |     num_data_points = 0
168 | 
169 |     for single_data in generator(*generator_params):
170 |         for key in single_data:
171 |             batch_data[key].append(single_data[key])
172 |         num_data_points += 1
173 |         if num_data_points == batch_size:
174 |             q.put(batch_data)
175 |             batch_data = {key:[] for key in output_shapes}
176 |             num_data_points = 0
177 |     
178 |     if num_data_points > 0:
179 |         q.put(batch_data)
180 |     q.put(None)
181 | 
182 | class _ParallelDataset:
183 |     
184 |     def __init__(self, generator, generator_params, output_types, output_shapes, 
185 |                  num_parallel_calls, batch_size, take):
186 |         
187 |         ctx = mp.get_context('spawn')
188 |         self._q = ctx.Queue(maxsize=num_parallel_calls)
189 |         self._output_types = output_types
190 |         self._take = take
191 |         self._count = 0
192 |         
193 |         self._p_list = []
194 |         
195 |         for i in range(num_parallel_calls):
196 |             self._p_list.append(ctx.Process(target=_process, args=(self._q, generator, generator_params, output_shapes, batch_size)))
197 |             self._p_list[i].daemon = True
198 |             self._p_list[i].start()
199 |     
200 |     def __iter__(self):
201 |         
202 |         return self
203 |     
204 |     def __next__(self):
205 |         
206 |         if self._take is None or self._count < self._take:
207 |             batch_data = self._q.get()
208 |             if batch_data is None:
209 |                 raise StopIteration()
210 |             else:
211 |                 self._count += 1
212 |                 return {key:tf.constant(batch_data[key], dtype=self._output_types[key]) for key in batch_data}
213 |         else:
214 |             raise StopIteration()
215 |         
216 |     


--------------------------------------------------------------------------------
/noddlrm/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from noddlrm.modules.ranking_metrics import *
2 | from noddlrm.modules.dict_mean import DictMean


--------------------------------------------------------------------------------
/noddlrm/metrics/dict_mean.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class DictMean:
 5 |     
 6 |     def __init__(self, state_shape):
 7 |         
 8 |         self._states = {}
 9 |         for key in state_shape:
10 |             shape = state_shape[key]
11 |             self._states[key] = {'sum': tf.Variable(tf.zeros(shape, dtype=tf.float32)),
12 |                                 'count': tf.Variable(tf.zeros([], dtype=tf.float32))}
13 |     
14 |     def reset_states(self):
15 |         
16 |         for key in self._states:
17 |             self._states[key]['sum'].assign(tf.zeros(tf.shape(self._states[key]['sum']), 
18 |                                                      dtype=tf.float32))
19 |             self._states[key]['count'].assign(0.)
20 |     
21 |     def update_state(self, state):
22 |         
23 |         for key in state:
24 |             self._states[key]['sum'].assign_add(tf.math.reduce_sum(state[key], axis=0))
25 |             self._states[key]['count'].assign_add(tf.cast(tf.shape(state[key])[0], tf.float32))
26 |         
27 |     def result(self):
28 |         
29 |         result = {}
30 |         for key in self._states:
31 |             result[key] = self._states[key]['sum'] / self._states[key]['count']
32 |         return result
33 |             


--------------------------------------------------------------------------------
/noddlrm/metrics/ranking_metrics.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def _log2(value):
 5 |     
 6 |     return tf.math.log(value) / tf.math.log(2.0)
 7 | 
 8 | def AUC(pos_mask, pred, excl_mask):
 9 |     
10 |     def _map_fn(tups):
11 |         
12 |         user_pos_mask, user_pred, user_excl_mask = tups
13 |         
14 |         eval_mask = tf.math.logical_not(tf.math.logical_or(user_pos_mask, user_excl_mask))
15 |         eval_pred = user_pred[eval_mask]
16 |         pos_pred = user_pred[user_pos_mask]
17 |         eval_num = tf.math.count_nonzero(eval_mask, dtype=tf.int32)
18 |         user_auc = tf.math.count_nonzero(eval_pred <= tf.reshape(pos_pred, (-1, 1)), dtype=tf.float32) \
19 |                     / tf.cast(tf.size(pos_pred) * eval_num, dtype=tf.float32)
20 |         
21 |         return user_auc
22 |         
23 |     auc = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32)
24 |             
25 |     return auc
26 | 
27 | 
28 | def NDCG(pos_mask, pred, excl_mask, at=[100]):
29 |     
30 |     def _map_fn(tups):
31 |         
32 |         user_pos_mask, user_pred, user_excl_mask = tups
33 |         user_pred = tf.math.exp(user_pred) * tf.cast(tf.math.logical_not(user_excl_mask), tf.float32)
34 |         pos_pred = user_pred[user_pos_mask]
35 |         rank_above = tf.math.count_nonzero(user_pred > tf.reshape(pos_pred, (-1, 1)), axis=1, dtype=tf.float32)
36 |         rank_above = tf.tile(tf.expand_dims(rank_above, 0), [len(at), 1])
37 |         tf_at = tf.reshape(tf.constant(at, dtype=tf.float32), [-1, 1])
38 |         log_recipr = tf.math.reciprocal(_log2(rank_above+2)) 
39 |         
40 |         user_ndcg = tf.reduce_sum(log_recipr * tf.cast(rank_above < tf_at, tf.float32),
41 |                                   axis=1)
42 |         
43 |         return user_ndcg
44 |         
45 |     ndcg = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32)
46 |             
47 |     return ndcg
48 | 
49 | 
50 | def Recall(pos_mask, pred, excl_mask, at=[100]):
51 |     
52 |     
53 |     def _map_fn(tups):
54 |         
55 |         user_pos_mask, user_pred, user_excl_mask = tups
56 |         user_pred = tf.math.exp(user_pred) * tf.cast(tf.math.logical_not(user_excl_mask), tf.float32)
57 |         pos_pred = user_pred[user_pos_mask]
58 |         rank_above = tf.math.count_nonzero(user_pred > tf.reshape(pos_pred, (-1, 1)), axis=1, dtype=tf.float32)
59 |         rank_above = tf.tile(tf.expand_dims(rank_above, 0), [len(at), 1])
60 |         tf_at = tf.reshape(tf.constant(at, dtype=tf.float32), [-1, 1]) 
61 |         
62 |         user_recall = tf.math.count_nonzero(rank_above < tf_at, axis=1, dtype=tf.float32) / \
63 |                         tf.cast(tf.size(pos_pred), tf.float32)
64 |         
65 |         return user_recall
66 |         
67 |     recall = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32)
68 |             
69 |     return recall


--------------------------------------------------------------------------------
/noddlrm/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from noddlrm.modules.latent_factor import LatentFactor
2 | from noddlrm.modules.pairwise_log_loss import PairwiseLogLoss
3 | from noddlrm.modules.pointwise_mse_loss import PointwiseMSELoss
4 | from noddlrm.modules.multi_layer_perceptron import MLP
5 | from noddlrm.modules.second_order_feature_interaction import SecondOrderFeatureInteraction


--------------------------------------------------------------------------------
/noddlrm/modules/latent_factor.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import Embedding
 2 | import tensorflow as tf
 3 | 
 4 | class LatentFactor(Embedding):
 5 |     
 6 |     def __init__(self, num_instances, dim, zero_init=False, name=None):
 7 |         
 8 |         if zero_init:
 9 |             initializer = 'zeros'
10 |         else:
11 |             initializer = 'uniform'
12 |         super(LatentFactor, self).__init__(input_dim=num_instances, 
13 |                                            output_dim=dim, 
14 |                                            embeddings_initializer=initializer,
15 |                                            name=name)
16 |     
17 |     def censor(self, censor_id):
18 |         
19 |         unique_censor_id, _ = tf.unique(censor_id)
20 |         embedding_gather = tf.gather(self.variables[0], indices=unique_censor_id)
21 |         norm = tf.norm(embedding_gather, axis=1, keepdims=True)
22 |         return self.variables[0].scatter_nd_update(indices=tf.expand_dims(unique_censor_id, 1), 
23 |                                                    updates=embedding_gather / tf.math.maximum(norm, 0.1))


--------------------------------------------------------------------------------
/noddlrm/modules/multi_layer_perceptron.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras import Sequential
 3 | from tensorflow.keras.layers import Dense
 4 | 
 5 | def MLP(units_list, use_bias=True, activation='relu', out_activation=None):
 6 |     
 7 |     mlp = Sequential()
 8 |     
 9 |     for units in units_list[:-1]:
10 |         mlp.add(Dense(units, 
11 |                         activation=activation, 
12 |                         use_bias=use_bias))
13 |     
14 |     mlp.add(Dense(units_list[-1], 
15 |                 activation=out_activation, 
16 |                 use_bias=use_bias))
17 |     
18 |     return mlp


--------------------------------------------------------------------------------
/noddlrm/modules/pairwise_log_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Layer
 3 | 
 4 | class PairwiseLogLoss(Layer):
 5 |     
 6 |     def __call__(self, user_vec, p_item_vec, n_item_vec, p_item_bias=None, n_item_bias=None):
 7 |         
 8 |         outputs = super(PairwiseLogLoss, self).__call__((user_vec, 
 9 |                                                 p_item_vec, 
10 |                                                 n_item_vec,
11 |                                                 p_item_bias, 
12 |                                                 n_item_bias))
13 |         return outputs
14 |     
15 |     def call(self, inputs):
16 |         
17 |         user_vec, p_item_vec, n_item_vec, p_item_bias, n_item_bias = inputs
18 |         
19 |         dot_user_pos = tf.math.reduce_sum(user_vec*p_item_vec,
20 |                                          axis=1,
21 |                                          keepdims=True)
22 |         dot_user_neg = tf.math.reduce_sum(user_vec*n_item_vec,
23 |                                          axis=1,
24 |                                          keepdims=True)
25 |         
26 |         if p_item_bias is not None:
27 |             dot_user_pos += p_item_bias
28 |             
29 |         if n_item_bias is not None:
30 |             dot_user_neg += n_item_bias
31 |             
32 |         loss = -tf.math.reduce_mean(tf.math.log_sigmoid(tf.math.maximum(dot_user_pos-dot_user_neg, -30.0)))
33 |         
34 |         return loss


--------------------------------------------------------------------------------
/noddlrm/modules/pointwise_mse_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Layer
 3 | 
 4 | class PointwiseMSELoss(Layer):
 5 |     
 6 |     def __init__(self, a=1.0, b=1.0, sigmoid=False):
 7 | 
 8 |         super(PointwiseMSELoss, self).__init__()
 9 |         self._a = a
10 |         self._b = b
11 |         self._sigmoid = sigmoid
12 |         
13 |     def __call__(self, user_vec, item_vec, item_bias, label):
14 |         
15 |         outputs = super(PointwiseMSELoss, self).__call__((user_vec, item_vec, item_bias, label))
16 |         return outputs
17 |     
18 |     def call(self, inputs):
19 |         
20 |         user_vec, item_vec, item_bias, label = inputs
21 | 
22 |         dot_user_item = tf.math.reduce_sum(tf.math.multiply(user_vec, item_vec),
23 |                                   axis=1, keepdims=False, name="dot_user_item")
24 | 
25 |         if self._sigmoid:
26 |             prediction = tf.math.sigmoid(dot_user_item + tf.reshape(item_bias, [-1]))
27 |         else:
28 |             prediction = dot_user_item + tf.reshape(item_bias, [-1])
29 | 
30 |         label_weight = (self._a - self._b) * label + self._b
31 |         return tf.math.reduce_sum(label_weight * tf.square(label - prediction))
32 |         


--------------------------------------------------------------------------------
/noddlrm/modules/second_order_feature_interaction.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import Layer
 2 | import tensorflow as tf
 3 | 
 4 | class SecondOrderFeatureInteraction(Layer):
 5 |     
 6 |     def __init__(self, self_interaction=False):
 7 |         
 8 |         self._self_interaction = self_interaction
 9 |         
10 |         super(SecondOrderFeatureInteraction, self).__init__()
11 |     
12 |     def call(self, inputs):
13 |         
14 |         '''
15 |         inputs: list of features with shape [batch_size, feature_dim]
16 |         '''
17 |         
18 |         batch_size = tf.shape(inputs[0])[0]
19 |         
20 |         concat_features = tf.stack(inputs, axis=1)
21 |         dot_products = tf.linalg.LinearOperatorLowerTriangular(tf.matmul(concat_features, concat_features, transpose_b=True)).to_dense()
22 | 
23 |         ones = tf.ones_like(dot_products)
24 |         mask = tf.linalg.band_part(ones, 0, -1)
25 |         
26 |         if not self._self_interaction:
27 |             mask = mask - tf.linalg.band_part(ones, 0, 0)
28 |             out_dim = int(len(inputs) * (len(inputs)-1) / 2)
29 |         else:
30 |             out_dim = int(len(inputs) * (len(inputs)+1) / 2)
31 |         
32 |         flat_interactions = tf.reshape(tf.boolean_mask(dot_products, mask), (batch_size, out_dim))
33 |             
34 |         return flat_interactions
35 | 


--------------------------------------------------------------------------------
/noddlrm/recommenders/__init__.py:
--------------------------------------------------------------------------------
1 | from noddlrm.recommenders.dlrm import DLRM


--------------------------------------------------------------------------------
/noddlrm/recommenders/dlrm.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import tensorflow as tf
  3 | from tensorflow.keras import Model
  4 | from noddlrm.modules import LatentFactor, SecondOrderFeatureInteraction, MLP
  5 | 
  6 | class DLRM(Model):
  7 |     
  8 |     def __init__(
  9 |         self, 
 10 |         m_spa,
 11 |         ln_emb,
 12 |         ln_bot,
 13 |         ln_top,
 14 |         arch_interaction_op='dot',
 15 |         arch_interaction_itself=False,
 16 |         sigmoid_bot=False,
 17 |         sigmoid_top=True,
 18 |         loss_func='mse',
 19 |         loss_threshold=0.0):
 20 |         
 21 |         '''
 22 |         m_spa: the dimensionality of sparse feature embeddings
 23 |         ln_emb: the size of sparse feature embeddings (num_instances)
 24 |         ln_bot: the size of the bottom MLP
 25 |         ln_top: the size of the top MLP
 26 |         '''
 27 |         
 28 |         super(DLRM, self).__init__()
 29 |         
 30 |         self._loss_threshold = loss_threshold
 31 |         self._loss_func = loss_func
 32 |         self._latent_factors = [LatentFactor(num_instances=num, 
 33 |                                              dim=m_spa) for num in ln_emb]
 34 |         self._mlp_bot = MLP(units_list=ln_bot, 
 35 |                             out_activation='sigmoid' if sigmoid_bot else 'relu')
 36 |         self._mlp_top = MLP(units_list=ln_top, 
 37 |                             out_activation='sigmoid' if sigmoid_top else 'relu')
 38 |         
 39 |         self._dot_interaction = None
 40 |         if arch_interaction_op == 'dot':
 41 |             self._dot_interaction = SecondOrderFeatureInteraction(
 42 |                                         self_interaction=arch_interaction_itself
 43 |                                     )
 44 |         
 45 |         elif self._arch_interaction_op != 'cat':
 46 |             sys.exit(
 47 |                 "ERROR: arch_interaction_op="
 48 |                 + self._arch_interaction_op
 49 |                 + " is not supported"
 50 |             )
 51 |         
 52 |         if loss_func == 'mse':
 53 |             self._loss = tf.keras.losses.MeanSquaredError()
 54 |         elif loss_func == 'bce':
 55 |             self._loss = tf.keras.losses.BinaryCrossentropy()
 56 |         else:
 57 |             sys.exit(
 58 |                 "ERROR: loss_func="
 59 |                 + loss_func
 60 |                 + " is not supported"
 61 |             )
 62 |         
 63 |     def get_myloss(self, dense_features, sparse_features, label):
 64 |         
 65 |         '''
 66 |         dense_features shape: [batch_size, num of dense features]
 67 |         sparse_features shape: [batch_size, num_of_sparse_features]
 68 |         label shape: [batch_size]
 69 |         '''
 70 |         
 71 |         prediction = self.inference(dense_features, sparse_features)
 72 |         loss = self._loss(y_true=label, 
 73 |                           y_pred=prediction)
 74 |         return loss
 75 | 
 76 |     def call(self, inputs, training=None, mask=None):
 77 |         dense_features, sparse_features = inputs
 78 |         return self.inference(dense_features, sparse_features)
 79 | 
 80 |     def inference(self, dense_features, sparse_features):
 81 |     
 82 |         '''
 83 |         dense_features shape: [batch_size, num of dense features]
 84 |         sparse_features shape: [num_of_sparse_features, batch_size]
 85 |         '''
 86 |         self._set_inputs([dense_features, sparse_features])
 87 |         sparse_emb_vecs = list(map(lambda pair: pair[1](pair[0]),
 88 |                                       zip(tf.unstack(sparse_features, axis=1), 
 89 |                                           self._latent_factors)))
 90 |         
 91 |         dense_emb_vec = self._mlp_bot(dense_features)
 92 |         
 93 |         if self._dot_interaction is not None:
 94 |             prediction = self._mlp_top(tf.concat([dense_emb_vec, 
 95 |                                               self._dot_interaction(sparse_emb_vecs + [dense_emb_vec])],
 96 |                                              axis=1))
 97 |         else:
 98 |             prediction = self._mlp_top(tf.concat(sparse_emb_vecs + [dense_emb_vec], 
 99 |                                              axis=1))
100 |         
101 |         if 0.0 < self._loss_threshold and self._loss_threshold < 1.0:
102 |             prediction = tf.clip_by_value(prediction, self._loss_threshold, 1.0 - self._loss_threshold)
103 |         
104 |         return tf.reshape(prediction, [-1])
105 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name='noddlrm',
 5 |     version='0.0.1',
 6 |     packages=find_packages(exclude=("tutorials",)),
 7 |     description="NOD DLRM - Adapted from OpenRec(https://openrec.ai/)",
 8 |     url="https://nod.ai/",
 9 |     license='Apache 2.0',
10 |     author='Chi Liu',
11 |     author_email='chi@nod-labs.com',
12 |     install_requires=[
13 |         'tqdm>=4.15.0',
14 |         'numpy>=1.13.0',
15 |         'termcolor>=1.1.0'
16 |           ],
17 |     classifiers=['Development Status :: 3 - Alpha',
18 |                  'License :: OSI Approved :: Apache Software License',
19 |                  'Programming Language :: Python :: 3.7',
20 |                  'Topic :: Scientific/Engineering :: Artificial Intelligence'],
21 | )
22 | 


--------------------------------------------------------------------------------