├── .gitignore
├── README.md
├── activation
    ├── __init__.py
    ├── entmax.py
    └── glu.py
├── config
    ├── __init__.py
    └── covertype.py
├── download_prepare_covertype.py
├── model
    ├── tabnet.py
    └── tabnet_reduced.py
├── requirements.txt
├── run.sh
├── train_classifier.py
└── util
    ├── __init__.py
    ├── data_helper.py
    ├── logging.py
    └── tfutils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | *.csv
132 | *.gz
133 | tflog/
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TabNet Reduced
 2 | 
 3 | Most of the code is taken from [here](https://github.com/google-research/google-research/tree/master/tabnet) for "TabNet: Attentive Interpretable Tabular Learning" by Sercan O. Arik and Tomas Pfister (paper: https://arxiv.org/abs/1908.07442).
 4 | 
 5 | The modified model, reduced TabNet, is defined in `model/tabnet_reduced.py`. There are two modifications:
 6 | * there is now 1 shared feature transformer and 1 decision-dependent feature transformer (from 2 and 2 before respectively), and
 7 | * the SparseMax mask for feature selection has been replaced by EntMax 1.5 (implementation in TensorFlow from [here](https://gist.github.com/justheuristic/60167e77a95221586be315ae527c3cbd)).
 8 | 
 9 | The combination of these modifications has improved the performance of TabNet with fewer parameters, particularly with a sharper mask for feature selection.
10 | 
11 | ## Training and Evaluation
12 | 
13 | As in the original repository, this repository contains an example implementation of TabNet on the Forest Covertype dataset (https://archive.ics.uci.edu/ml/datasets/covertype). 
14 | 
15 | To run the script, run `run.sh`. Otherwise, a manual approach can be taken as follows.
16 | 
17 | First, run `python download_prepare_covertype.py` to download and prepare the Forest Covertype dataset.
18 | This command creates `train.csv`, `val.csv`, and `test.csv` files under the `data/` directory (will create the directory if it does not exist).
19 | 
20 | To run the pipeline for training and evaluation, simply use `python train_classifier.py`. Note that Tensorboard logs are written in `tflog/`. 
21 | 
22 | For simplicity, the hyperparameters for both the reduced TabNet and TabNet model are kept the same. These can be found in `config/covertype.py`. To set training to reduced TabNet,
23 | set `REDUCED = True`, else set `REDUCED = False`.
24 | 
25 | ## Modifications for Other Datasets
26 | 
27 | To modify the experiment to other tabular datasets:
28 | - Substitute the `train.csv`, `val.csv`, and `test.csv` files under `data/` directory,
29 | - Create a new config in `config/` by copying `config/covertype.py` for the numerical and categorical features of the new dataset and hyperparameters,
30 | - Reoptimize the TabNet hyperparameters for the new dataset in your config,
31 | - Import the parameters in `train_classifier.py`,
32 | - Select the reduced TabNet architecture by setting `REDUCED = True`, and
33 | - Change `MODEL_NAME` in your config to a name you desire.
34 | 


--------------------------------------------------------------------------------
/activation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptuls/tabnet-modified/5b1f8d13584b552f4808d95ffd253830c696fb4e/activation/__init__.py


--------------------------------------------------------------------------------
/activation/entmax.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | # taken from https://gist.github.com/justheuristic/60167e77a95221586be315ae527c3cbd
  6 | def entmax15(inputs, axis=-1):
  7 |     """
  8 |     Entmax 1.5 implementation, heavily inspired by
  9 |      * paper: https://arxiv.org/pdf/1905.05702.pdf
 10 |      * pytorch code: https://github.com/deep-spin/entmax
 11 |     :param inputs: similar to softmax logits, but for entmax1.5
 12 |     :param axis: entmax1.5 outputs will sum to 1 over this axis
 13 |     :return: entmax activations of same shape as inputs
 14 |     """
 15 | 
 16 |     @tf.custom_gradient
 17 |     def _entmax_inner(inputs):
 18 |         with tf.name_scope("entmax"):
 19 |             inputs = inputs / 2  # divide by 2 so as to solve actual entmax
 20 |             # subtract max for stability
 21 |             inputs -= tf.reduce_max(inputs, axis, keep_dims=True)
 22 | 
 23 |             threshold, _ = entmax_threshold_and_support(inputs, axis)
 24 |             outputs_sqrt = tf.nn.relu(inputs - threshold)
 25 |             outputs = tf.square(outputs_sqrt)
 26 | 
 27 |         def grad_fn(d_outputs):
 28 |             with tf.name_scope("entmax_grad"):
 29 |                 d_inputs = d_outputs * outputs_sqrt
 30 |                 q = tf.reduce_sum(d_inputs, axis=axis, keep_dims=True)
 31 |                 q = q / tf.reduce_sum(outputs_sqrt, axis=axis, keep_dims=True)
 32 |                 d_inputs -= q * outputs_sqrt
 33 |                 return d_inputs
 34 | 
 35 |         return outputs, grad_fn
 36 | 
 37 |     return _entmax_inner(inputs)
 38 | 
 39 | 
 40 | @tf.custom_gradient
 41 | def sparse_entmax15_loss_with_logits(labels, logits):
 42 |     """
 43 |     Computes sample-wise entmax1.5 loss
 44 |     :param labels: reference answers vector int64[batch_size] \in [0, num_classes)
 45 |     :param logits: output matrix float32[batch_size, num_classes] (not actually logits :)
 46 |     :returns: elementwise loss, float32[batch_size]
 47 |     """
 48 |     assert logits.shape.ndims == 2 and labels.shape.ndims == 1
 49 |     with tf.name_scope("entmax_loss"):
 50 |         p_star = entmax15(logits, axis=-1)
 51 |         omega_entmax15 = (1 - (tf.reduce_sum(p_star * tf.sqrt(p_star), axis=-1))) / 0.75
 52 |         p_incr = p_star - tf.one_hot(labels, depth=tf.shape(logits)[-1], axis=-1)
 53 |         loss = omega_entmax15 + tf.einsum("ij,ij->i", p_incr, logits)
 54 | 
 55 |     def grad_fn(grad_output):
 56 |         with tf.name_scope("entmax_loss_grad"):
 57 |             return None, grad_output[..., None] * p_incr
 58 | 
 59 |     return loss, grad_fn
 60 | 
 61 | 
 62 | @tf.custom_gradient
 63 | def entmax15_loss_with_logits(labels, logits):
 64 |     """
 65 |     Computes sample-wise entmax1.5 loss
 66 |     :param logits: "logits" matrix float32[batch_size, num_classes]
 67 |     :param labels: reference answers indicators, float32[batch_size, num_classes]
 68 |     :returns: elementwise loss, float32[batch_size]
 69 |     
 70 |     WARNING: this function does not propagate gradients through :labels:
 71 |     This behavior is the same as like softmax_crossentropy_with_logits v1
 72 |     It may become an issue if you do something like co-distillation
 73 |     """
 74 |     assert labels.shape.ndims == logits.shape.ndims == 2
 75 |     with tf.name_scope("entmax_loss"):
 76 |         p_star = entmax15(logits, axis=-1)
 77 |         omega_entmax15 = (1 - (tf.reduce_sum(p_star * tf.sqrt(p_star), axis=-1))) / 0.75
 78 |         p_incr = p_star - labels
 79 |         loss = omega_entmax15 + tf.einsum("ij,ij->i", p_incr, logits)
 80 | 
 81 |     def grad_fn(grad_output):
 82 |         with tf.name_scope("entmax_loss_grad"):
 83 |             return None, grad_output[..., None] * p_incr
 84 | 
 85 |     return loss, grad_fn
 86 | 
 87 | 
 88 | def top_k_over_axis(inputs, k, axis=-1, **kwargs):
 89 |     """ performs tf.nn.top_k over any chosen axis """
 90 |     with tf.name_scope("top_k_along_axis"):
 91 |         if axis == -1:
 92 |             return tf.nn.top_k(inputs, k, **kwargs)
 93 | 
 94 |         perm_order = list(range(inputs.shape.ndims))
 95 |         perm_order.append(perm_order.pop(axis))
 96 |         inv_order = [perm_order.index(i) for i in range(len(perm_order))]
 97 | 
 98 |         input_perm = tf.transpose(inputs, perm_order)
 99 |         input_perm_sorted, sort_indices_perm = tf.nn.top_k(input_perm, k=k, **kwargs)
100 | 
101 |         input_sorted = tf.transpose(input_perm_sorted, inv_order)
102 |         sort_indices = tf.transpose(sort_indices_perm, inv_order)
103 |     return input_sorted, sort_indices
104 | 
105 | 
106 | def _make_ix_like(inputs, axis=-1):
107 |     """ creates indices 0, ... , input[axis] unsqueezed to input dimensios """
108 |     assert inputs.shape.ndims is not None
109 |     rho = tf.cast(tf.range(1, tf.shape(inputs)[axis] + 1), dtype=inputs.dtype)
110 |     view = [1] * inputs.shape.ndims
111 |     view[axis] = -1
112 |     return tf.reshape(rho, view)
113 | 
114 | 
115 | def gather_over_axis(values, indices, gather_axis):
116 |     """
117 |     replicates the behavior of torch.gather for tf<=1.8;
118 |     for newer versions use tf.gather with batch_dims
119 |     :param values: tensor [d0, ..., dn]
120 |     :param indices: int64 tensor of same shape as values except for gather_axis
121 |     :param gather_axis: performs gather along this axis
122 |     :returns: gathered values, same shape as values except for gather_axis
123 |         If gather_axis == 2
124 |         gathered_values[i, j, k, ...] = values[i, j, indices[i, j, k, ...], ...]
125 |         see torch.gather for more detils
126 |     """
127 |     assert indices.shape.ndims is not None
128 |     assert indices.shape.ndims == values.shape.ndims
129 | 
130 |     ndims = indices.shape.ndims
131 |     gather_axis = gather_axis % ndims
132 |     shape = tf.shape(indices)
133 | 
134 |     selectors = []
135 |     for axis_i in range(ndims):
136 |         if axis_i == gather_axis:
137 |             selectors.append(indices)
138 |         else:
139 |             index_i = tf.range(tf.cast(shape[axis_i], dtype=indices.dtype), dtype=indices.dtype)
140 |             index_i = tf.reshape(index_i, [-1 if i == axis_i else 1 for i in range(ndims)])
141 |             index_i = tf.tile(index_i, [shape[i] if i != axis_i else 1 for i in range(ndims)])
142 |             selectors.append(index_i)
143 | 
144 |     return tf.gather_nd(values, tf.stack(selectors, axis=-1))
145 | 
146 | 
147 | def entmax_threshold_and_support(inputs, axis=-1):
148 |     """
149 |     Computes clipping threshold for entmax1.5 over specified axis
150 |     NOTE this implementation uses the same heuristic as
151 |     the original code: https://tinyurl.com/pytorch-entmax-line-203
152 |     :param inputs: (entmax1.5 inputs - max) / 2
153 |     :param axis: entmax1.5 outputs will sum to 1 over this axis
154 |     """
155 | 
156 |     with tf.name_scope("entmax_threshold_and_support"):
157 |         num_outcomes = tf.shape(inputs)[axis]
158 |         inputs_sorted, _ = top_k_over_axis(inputs, k=num_outcomes, axis=axis, sorted=True)
159 | 
160 |         rho = _make_ix_like(inputs, axis=axis)
161 | 
162 |         mean = tf.cumsum(inputs_sorted, axis=axis) / rho
163 | 
164 |         mean_sq = tf.cumsum(tf.square(inputs_sorted), axis=axis) / rho
165 |         delta = (1 - rho * (mean_sq - tf.square(mean))) / rho
166 | 
167 |         delta_nz = tf.nn.relu(delta)
168 |         tau = mean - tf.sqrt(delta_nz)
169 | 
170 |         support_size = tf.reduce_sum(
171 |             tf.to_int64(tf.less_equal(tau, inputs_sorted)), axis=axis, keep_dims=True
172 |         )
173 | 
174 |         tau_star = gather_over_axis(tau, support_size - 1, axis)
175 |     return tau_star, support_size
176 | 


--------------------------------------------------------------------------------
/activation/glu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import tensorflow as tf
3 | 
4 | 
5 | def glu(act, n_units):
6 |     """Generalized linear unit nonlinear activation."""
7 |     return act[:, :n_units] * tf.nn.sigmoid(act[:, n_units:])
8 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptuls/tabnet-modified/5b1f8d13584b552f4808d95ffd253830c696fb4e/config/__init__.py


--------------------------------------------------------------------------------
/config/covertype.py:
--------------------------------------------------------------------------------
  1 | # Dataset size
  2 | # N_TRAIN_SAMPLES = 309871
  3 | N_VAL_SAMPLES = 154937
  4 | N_TEST_SAMPLES = 116203
  5 | NUM_FEATURES = 54
  6 | NUM_CLASSES = 7
  7 | 
  8 | # All feature columns in the data
  9 | LABEL_COLUMN = "Covertype"
 10 | 
 11 | BOOL_COLUMNS = [
 12 |     "Wilderness_Area1",
 13 |     "Wilderness_Area2",
 14 |     "Wilderness_Area3",
 15 |     "Wilderness_Area4",
 16 |     "Soil_Type1",
 17 |     "Soil_Type2",
 18 |     "Soil_Type3",
 19 |     "Soil_Type4",
 20 |     "Soil_Type5",
 21 |     "Soil_Type6",
 22 |     "Soil_Type7",
 23 |     "Soil_Type8",
 24 |     "Soil_Type9",
 25 |     "Soil_Type10",
 26 |     "Soil_Type11",
 27 |     "Soil_Type12",
 28 |     "Soil_Type13",
 29 |     "Soil_Type14",
 30 |     "Soil_Type15",
 31 |     "Soil_Type16",
 32 |     "Soil_Type17",
 33 |     "Soil_Type18",
 34 |     "Soil_Type19",
 35 |     "Soil_Type20",
 36 |     "Soil_Type21",
 37 |     "Soil_Type22",
 38 |     "Soil_Type23",
 39 |     "Soil_Type24",
 40 |     "Soil_Type25",
 41 |     "Soil_Type26",
 42 |     "Soil_Type27",
 43 |     "Soil_Type28",
 44 |     "Soil_Type29",
 45 |     "Soil_Type30",
 46 |     "Soil_Type31",
 47 |     "Soil_Type32",
 48 |     "Soil_Type33",
 49 |     "Soil_Type34",
 50 |     "Soil_Type35",
 51 |     "Soil_Type36",
 52 |     "Soil_Type37",
 53 |     "Soil_Type38",
 54 |     "Soil_Type39",
 55 |     "Soil_Type40",
 56 | ]
 57 | 
 58 | INT_COLUMNS = [
 59 |     "Elevation",
 60 |     "Aspect",
 61 |     "Slope",
 62 |     "Horizontal_Distance_To_Hydrology",
 63 |     "Vertical_Distance_To_Hydrology",
 64 |     "Horizontal_Distance_To_Roadways",
 65 |     "Hillshade_9am",
 66 |     "Hillshade_Noon",
 67 |     "Hillshade_3pm",
 68 |     "Horizontal_Distance_To_Fire_Points",
 69 | ]
 70 | 
 71 | STR_COLUMNS = []
 72 | STR_NUNIQUESS = []
 73 | 
 74 | FLOAT_COLUMNS = []
 75 | 
 76 | ENCODED_CATEGORICAL_COLUMNS = []
 77 | 
 78 | # Model hyperparameters
 79 | FEATURE_DIM = 64
 80 | OUTPUT_DIM = 64
 81 | NUM_DECISION_STEPS = 5
 82 | RELAXATION_FACTOR = 1.5
 83 | BATCH_MOMENTUM = 0.7
 84 | VIRTUAL_BATCH_SIZE = 512
 85 | 
 86 | # Training parameters
 87 | TRAIN_FILE = "data/train_covertype.csv"
 88 | VAL_FILE = "data/val_covertype.csv"
 89 | TEST_FILE = "data/test_covertype.csv"
 90 | MAX_STEPS = 1000000
 91 | DISPLAY_STEP = 1000
 92 | VAL_STEP = 10000
 93 | SAVE_STEP = 40000
 94 | TEST_STEP = 1000
 95 | INIT_LEARNING_RATE = 0.02
 96 | DECAY_EVERY = 500
 97 | DECAY_RATE = 0.95
 98 | BATCH_SIZE = 16384
 99 | SPARSITY_LOSS_WEIGHT = 0.0001
100 | GRADIENT_THRESH = 2000.0
101 | SEED = 1
102 | REDUCED = True
103 | MODEL_NAME = "tabnet_forest_covertype_reduced_model" if REDUCED else "tabnet_forest_covertype_model"
104 | 


--------------------------------------------------------------------------------
/download_prepare_covertype.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # This script was taken from TabNet's repository
17 | 
18 | """Downloads and prepares the Forest Covertype dataset."""
19 | 
20 | import gzip
21 | import os
22 | import shutil
23 | import pandas as pd
24 | from sklearn.model_selection import train_test_split
25 | import wget
26 | 
27 | URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"
28 | 
29 | 
30 | def main():
31 | 
32 |     if not os.path.exists("./data"):
33 |         os.makedirs("./data")
34 | 
35 |     filename = wget.download(URL)
36 |     with gzip.open(filename, "rb") as f_in:
37 |         with open("data/covtype.csv", "wb") as f_out:
38 |             shutil.copyfileobj(f_in, f_out)
39 | 
40 |     df = pd.read_csv("data/covtype.csv")
41 |     n_total = len(df)
42 | 
43 |     # Train, val and test split follows
44 |     # Rory Mitchell, Andrey Adinets, Thejaswi Rao, and Eibe Frank.
45 |     # Xgboost: Scalable GPU accelerated learning. arXiv:1806.11248, 2018.
46 | 
47 |     train_val_indices, test_indices = train_test_split(
48 |         range(n_total), test_size=0.2, random_state=0
49 |     )
50 |     train_indices, val_indices = train_test_split(
51 |         train_val_indices, test_size=0.2 / 0.6, random_state=0
52 |     )
53 | 
54 |     traindf = df.iloc[train_indices]
55 |     valdf = df.iloc[val_indices]
56 |     testdf = df.iloc[test_indices]
57 |     traindf = traindf.sample(frac=1)
58 | 
59 |     traindf.to_csv("data/train_covertype.csv", index=False, header=False)
60 |     valdf.to_csv("data/val_covertype.csv", index=False, header=False)
61 |     testdf.to_csv("data/test_covertype.csv", index=False, header=False)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/model/tabnet.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google Research Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """TabNet model."""
 17 | import numpy as np
 18 | import tensorflow.compat.v1 as tf
 19 | import sys
 20 | 
 21 | sys.path.append("..")
 22 | 
 23 | from activation.glu import glu
 24 | 
 25 | 
 26 | class TabNet(object):
 27 |     """TabNet model class."""
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         columns,
 32 |         num_features,
 33 |         feature_dim,
 34 |         output_dim,
 35 |         num_decision_steps,
 36 |         relaxation_factor,
 37 |         batch_momentum,
 38 |         virtual_batch_size,
 39 |         num_classes,
 40 |         epsilon=0.00001,
 41 |     ):
 42 |         """Initializes a TabNet instance.
 43 | 
 44 |     Args:
 45 |       columns: The Tensorflow column names for the dataset.
 46 |       num_features: The number of input features (i.e the number of columns for
 47 |         tabular data assuming each feature is represented with 1 dimension).
 48 |       feature_dim: Dimensionality of the hidden representation in feature
 49 |         transformation block. Each layer first maps the representation to a
 50 |         2*feature_dim-dimensional output and half of it is used to determine the
 51 |         nonlinearity of the GLU activation where the other half is used as an
 52 |         input to GLU, and eventually feature_dim-dimensional output is
 53 |         transferred to the next layer.
 54 |       output_dim: Dimensionality of the outputs of each decision step, which is
 55 |         later mapped to the final classification or regression output.
 56 |       num_decision_steps: Number of sequential decision steps.
 57 |       relaxation_factor: Relaxation factor that promotes the reuse of each
 58 |         feature at different decision steps. When it is 1, a feature is enforced
 59 |         to be used only at one decision step and as it increases, more
 60 |         flexibility is provided to use a feature at multiple decision steps.
 61 |       batch_momentum: Momentum in ghost batch normalization.
 62 |       virtual_batch_size: Virtual batch size in ghost batch normalization. The
 63 |         overall batch size should be an integer multiple of virtual_batch_size.
 64 |       num_classes: Number of output classes.
 65 |       epsilon: A small number for numerical stability of the entropy calcations.
 66 | 
 67 |     Returns:
 68 |       A TabNet instance.
 69 |     """
 70 | 
 71 |         self.columns = columns
 72 |         self.num_features = num_features
 73 |         self.feature_dim = feature_dim
 74 |         self.output_dim = output_dim
 75 |         self.num_decision_steps = num_decision_steps
 76 |         self.relaxation_factor = relaxation_factor
 77 |         self.batch_momentum = batch_momentum
 78 |         self.virtual_batch_size = virtual_batch_size
 79 |         self.num_classes = num_classes
 80 |         self.epsilon = epsilon
 81 | 
 82 |     def encoder(self, data, reuse, is_training):
 83 |         """TabNet encoder model."""
 84 | 
 85 |         with tf.variable_scope("Encoder", reuse=reuse):
 86 | 
 87 |             # Reads and normalizes input features.
 88 |             features = tf.feature_column.input_layer(data, self.columns)
 89 |             features = tf.layers.batch_normalization(
 90 |                 features, training=is_training, momentum=self.batch_momentum
 91 |             )
 92 |             batch_size = tf.shape(features)[0]
 93 | 
 94 |             # Initializes decision-step dependent variables.
 95 |             output_aggregated = tf.zeros([batch_size, self.output_dim])
 96 |             masked_features = features
 97 |             mask_values = tf.zeros([batch_size, self.num_features])
 98 |             aggregated_mask_values = tf.zeros([batch_size, self.num_features])
 99 |             complemantary_aggregated_mask_values = tf.ones([batch_size, self.num_features])
100 |             total_entropy = 0
101 | 
102 |             v_b = self.virtual_batch_size if is_training else 1
103 |             for ni in range(self.num_decision_steps):
104 |                 # Feature transformer with two shared and two decision step dependent
105 |                 # blocks is used below.
106 |                 reuse_flag = ni > 0
107 | 
108 |                 transform_f1 = tf.layers.dense(
109 |                     masked_features,
110 |                     self.feature_dim * 2,
111 |                     name="Transform_f1",
112 |                     reuse=reuse_flag,
113 |                     use_bias=False,
114 |                 )
115 |                 transform_f1 = tf.layers.batch_normalization(
116 |                     transform_f1,
117 |                     training=is_training,
118 |                     momentum=self.batch_momentum,
119 |                     virtual_batch_size=v_b,
120 |                 )
121 |                 transform_f1 = glu(transform_f1, self.feature_dim)
122 | 
123 |                 transform_f2 = tf.layers.dense(
124 |                     transform_f1,
125 |                     self.feature_dim * 2,
126 |                     name="Transform_f2",
127 |                     reuse=reuse_flag,
128 |                     use_bias=False,
129 |                 )
130 |                 transform_f2 = tf.layers.batch_normalization(
131 |                     transform_f2,
132 |                     training=is_training,
133 |                     momentum=self.batch_momentum,
134 |                     virtual_batch_size=v_b,
135 |                 )
136 |                 transform_f2 = (glu(transform_f2, self.feature_dim) + transform_f1) * np.sqrt(0.5)
137 | 
138 |                 transform_f3 = tf.layers.dense(
139 |                     transform_f2,
140 |                     self.feature_dim * 2,
141 |                     name="Transform_f3" + str(ni),
142 |                     use_bias=False,
143 |                 )
144 |                 transform_f3 = tf.layers.batch_normalization(
145 |                     transform_f3,
146 |                     training=is_training,
147 |                     momentum=self.batch_momentum,
148 |                     virtual_batch_size=v_b,
149 |                 )
150 |                 transform_f3 = (glu(transform_f3, self.feature_dim) + transform_f2) * np.sqrt(0.5)
151 | 
152 |                 transform_f4 = tf.layers.dense(
153 |                     transform_f3,
154 |                     self.feature_dim * 2,
155 |                     name="Transform_f4" + str(ni),
156 |                     use_bias=False,
157 |                 )
158 |                 transform_f4 = tf.layers.batch_normalization(
159 |                     transform_f4,
160 |                     training=is_training,
161 |                     momentum=self.batch_momentum,
162 |                     virtual_batch_size=v_b,
163 |                 )
164 |                 transform_f4 = (glu(transform_f4, self.feature_dim) + transform_f3) * np.sqrt(0.5)
165 | 
166 |                 if ni > 0:
167 | 
168 |                     decision_out = tf.nn.relu(transform_f4[:, : self.output_dim])
169 | 
170 |                     # Decision aggregation.
171 |                     output_aggregated += decision_out
172 | 
173 |                     # Aggregated masks are used for visualization of the
174 |                     # feature importance attributes.
175 |                     scale_agg = tf.reduce_sum(decision_out, axis=1, keep_dims=True) / (
176 |                         self.num_decision_steps - 1
177 |                     )
178 |                     aggregated_mask_values += mask_values * scale_agg
179 | 
180 |                 features_for_coef = transform_f4[:, self.output_dim :]
181 | 
182 |                 if ni < self.num_decision_steps - 1:
183 | 
184 |                     # Determines the feature masks via linear and nonlinear
185 |                     # transformations, taking into account of aggregated feature use.
186 |                     mask_values = tf.layers.dense(
187 |                         features_for_coef,
188 |                         self.num_features,
189 |                         name="Transform_coef" + str(ni),
190 |                         use_bias=False,
191 |                     )
192 |                     mask_values = tf.layers.batch_normalization(
193 |                         mask_values,
194 |                         training=is_training,
195 |                         momentum=self.batch_momentum,
196 |                         virtual_batch_size=v_b,
197 |                     )
198 |                     mask_values *= complemantary_aggregated_mask_values
199 |                     mask_values = tf.contrib.sparsemax.sparsemax(mask_values)
200 | 
201 |                     # Relaxation factor controls the amount of reuse of features between
202 |                     # different decision blocks and updated with the values of
203 |                     # coefficients.
204 |                     complemantary_aggregated_mask_values *= self.relaxation_factor - mask_values
205 | 
206 |                     # Entropy is used to penalize the amount of sparsity in feature
207 |                     # selection.
208 |                     total_entropy += tf.reduce_mean(
209 |                         tf.reduce_sum(-mask_values * tf.log(mask_values + self.epsilon), axis=1)
210 |                     ) / (self.num_decision_steps - 1)
211 | 
212 |                     # Feature selection.
213 |                     masked_features = tf.multiply(mask_values, features)
214 | 
215 |                     # Visualization of the feature selection mask at decision step ni
216 |                     tf.summary.image(
217 |                         "Mask_for_step" + str(ni),
218 |                         tf.expand_dims(tf.expand_dims(mask_values, 0), 3),
219 |                         max_outputs=1,
220 |                     )
221 | 
222 |             # Visualization of the aggregated feature importances
223 |             tf.summary.image(
224 |                 "Aggregated_mask",
225 |                 tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3),
226 |                 max_outputs=1,
227 |             )
228 | 
229 |             return output_aggregated, total_entropy
230 | 
231 |     def classify(self, activations, reuse):
232 |         """TabNet classify block."""
233 | 
234 |         with tf.variable_scope("Classify", reuse=reuse):
235 |             logits = tf.layers.dense(activations, self.num_classes, use_bias=False)
236 |             predictions = tf.nn.softmax(logits)
237 |             return logits, predictions
238 | 
239 |     def regress(self, activations, reuse):
240 |         """TabNet regress block."""
241 | 
242 |         with tf.variable_scope("Regress", reuse=reuse):
243 |             predictions = tf.layers.dense(activations, 1)
244 |             return predictions
245 | 


--------------------------------------------------------------------------------
/model/tabnet_reduced.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Modified reduced TabNet model."""
  3 | import numpy as np
  4 | import sys
  5 | 
  6 | sys.path.append("..")
  7 | 
  8 | import tensorflow.compat.v1 as tf
  9 | 
 10 | from activation.entmax import entmax15
 11 | from activation.glu import glu
 12 | 
 13 | 
 14 | class TabNetReduced(object):
 15 |     """Reduced TabNet model class."""
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         columns,
 20 |         num_features,
 21 |         feature_dim,
 22 |         output_dim,
 23 |         num_decision_steps,
 24 |         relaxation_factor,
 25 |         batch_momentum,
 26 |         virtual_batch_size,
 27 |         num_classes,
 28 |         epsilon=0.00001,
 29 |     ):
 30 |         """Initializes a reduced TabNet instance.
 31 | 
 32 |     Args:
 33 |       columns: The Tensorflow column names for the dataset.
 34 |       num_features: The number of input features (i.e the number of columns for
 35 |         tabular data assuming each feature is represented with 1 dimension).
 36 |       feature_dim: Dimensionality of the hidden representation in feature
 37 |         transformation block. Each layer first maps the representation to a
 38 |         2*feature_dim-dimensional output and half of it is used to determine the
 39 |         nonlinearity of the GLU activation where the other half is used as an
 40 |         input to GLU, and eventually feature_dim-dimensional output is
 41 |         transferred to the next layer.
 42 |       output_dim: Dimensionality of the outputs of each decision step, which is
 43 |         later mapped to the final classification or regression output.
 44 |       num_decision_steps: Number of sequential decision steps.
 45 |       relaxation_factor: Relaxation factor that promotes the reuse of each
 46 |         feature at different decision steps. When it is 1, a feature is enforced
 47 |         to be used only at one decision step and as it increases, more
 48 |         flexibility is provided to use a feature at multiple decision steps.
 49 |       batch_momentum: Momentum in ghost batch normalization.
 50 |       virtual_batch_size: Virtual batch size in ghost batch normalization. The
 51 |         overall batch size should be an integer multiple of virtual_batch_size.
 52 |       num_classes: Number of output classes.
 53 |       epsilon: A small number for numerical stability of the entropy calcations.
 54 | 
 55 |     Returns:
 56 |       A reduced TabNet instance.
 57 |     """
 58 | 
 59 |         self.columns = columns
 60 |         self.num_features = num_features
 61 |         self.feature_dim = feature_dim
 62 |         self.output_dim = output_dim
 63 |         self.num_decision_steps = num_decision_steps
 64 |         self.relaxation_factor = relaxation_factor
 65 |         self.batch_momentum = batch_momentum
 66 |         self.virtual_batch_size = virtual_batch_size
 67 |         self.num_classes = num_classes
 68 |         self.epsilon = epsilon
 69 | 
 70 |     def encoder(self, data, reuse, is_training):
 71 |         """Reduced TabNet encoder model."""
 72 | 
 73 |         with tf.variable_scope("Encoder", reuse=reuse):
 74 | 
 75 |             # Reads and normalizes input features.
 76 |             features = tf.feature_column.input_layer(data, self.columns)
 77 |             features = tf.layers.batch_normalization(
 78 |                 features, training=is_training, momentum=self.batch_momentum
 79 |             )
 80 |             batch_size = tf.shape(features)[0]
 81 | 
 82 |             # Initializes decision-step dependent variables.
 83 |             output_aggregated = tf.zeros([batch_size, self.output_dim])
 84 |             masked_features = features
 85 |             mask_values = tf.zeros([batch_size, self.num_features])
 86 |             aggregated_mask_values = tf.zeros([batch_size, self.num_features])
 87 |             complementary_aggregated_mask_values = tf.ones([batch_size, self.num_features])
 88 |             total_entropy = 0
 89 | 
 90 |             v_b = self.virtual_batch_size if is_training else 1
 91 |             # Feature transformer: a sort of recurrent structure
 92 |             # TODO: can we automate number of decision steps needed?
 93 |             for ni in range(self.num_decision_steps):
 94 |                 # Feature transformer with one shared and one decision step dependent
 95 |                 # blocks is used below. This departs from the original model
 96 |                 reuse_flag = ni > 0
 97 | 
 98 |                 # shared because of the same name
 99 |                 transform_f1 = tf.layers.dense(
100 |                     masked_features,
101 |                     self.feature_dim * 2,
102 |                     name="Transform_f1",
103 |                     reuse=reuse_flag,
104 |                     use_bias=False,
105 |                 )
106 |                 transform_f1 = tf.layers.batch_normalization(
107 |                     transform_f1,
108 |                     training=is_training,
109 |                     momentum=self.batch_momentum,
110 |                     virtual_batch_size=v_b,
111 |                 )
112 |                 transform_f1 = glu(transform_f1, self.feature_dim)
113 | 
114 |                 # step dependent
115 |                 transform_f2 = tf.layers.dense(
116 |                     transform_f1,
117 |                     self.feature_dim * 2,
118 |                     name="Transform_f1" + str(ni),
119 |                     use_bias=False,
120 |                 )
121 |                 transform_f2 = tf.layers.batch_normalization(
122 |                     transform_f2,
123 |                     training=is_training,
124 |                     momentum=self.batch_momentum,
125 |                     virtual_batch_size=v_b,
126 |                 )
127 |                 transform_f2 = (glu(transform_f2, self.feature_dim) + transform_f1) * np.sqrt(0.5)
128 | 
129 |                 if ni > 0:
130 |                     decision_out = tf.nn.relu(transform_f2[:, : self.output_dim])
131 | 
132 |                     # Decision aggregation.
133 |                     output_aggregated += decision_out
134 | 
135 |                     # Aggregated masks are used for visualization of the
136 |                     # feature importance attributes.
137 |                     scale_agg = tf.reduce_sum(decision_out, axis=1, keep_dims=True) / (
138 |                         self.num_decision_steps - 1
139 |                     )
140 |                     aggregated_mask_values += mask_values * scale_agg
141 | 
142 |                 features_for_coef = transform_f2[:, self.output_dim :]
143 | 
144 |                 # Attentive transformer
145 |                 if ni < self.num_decision_steps - 1:
146 | 
147 |                     # Determines the feature masks via linear and nonlinear
148 |                     # transformations, taking into account of aggregated feature use.
149 |                     mask_values = tf.layers.dense(
150 |                         features_for_coef,
151 |                         self.num_features,
152 |                         name="Transform_coef" + str(ni),
153 |                         use_bias=False,
154 |                     )
155 |                     mask_values = tf.layers.batch_normalization(
156 |                         mask_values,
157 |                         training=is_training,
158 |                         momentum=self.batch_momentum,
159 |                         virtual_batch_size=v_b,
160 |                     )
161 |                     mask_values *= complementary_aggregated_mask_values
162 |                     # replace sparsemax with entmax 1.5
163 |                     mask_values = entmax15(mask_values)
164 | 
165 |                     # Relaxation factor controls the amount of reuse of features between
166 |                     # different decision blocks and updated with the values of
167 |                     # coefficients.
168 |                     complementary_aggregated_mask_values *= self.relaxation_factor - mask_values
169 | 
170 |                     # Entropy is used to penalize the amount of sparsity in feature
171 |                     # selection.
172 |                     total_entropy += tf.reduce_mean(
173 |                         tf.reduce_sum(-mask_values * tf.log(mask_values + self.epsilon), axis=1)
174 |                     ) / (self.num_decision_steps - 1)
175 | 
176 |                     # Feature selection.
177 |                     masked_features = tf.multiply(mask_values, features)
178 | 
179 |                     # Visualization of the feature selection mask at decision step ni
180 |                     tf.summary.image(
181 |                         "Mask_for_step" + str(ni),
182 |                         tf.expand_dims(tf.expand_dims(mask_values, 0), 3),
183 |                         max_outputs=1,
184 |                     )
185 | 
186 |             # Visualization of the aggregated feature importances
187 |             tf.summary.image(
188 |                 "Aggregated_mask",
189 |                 tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3),
190 |                 max_outputs=1,
191 |             )
192 | 
193 |             return output_aggregated, total_entropy
194 | 
195 |     def classify(self, activations, reuse):
196 |         """Reduced TabNet classify block."""
197 | 
198 |         with tf.variable_scope("Classify", reuse=reuse):
199 |             logits = tf.layers.dense(activations, self.num_classes, use_bias=False)
200 |             predictions = tf.nn.softmax(logits)
201 |             return logits, predictions
202 | 
203 |     def regress(self, activations, reuse):
204 |         """Reduced TabNet regress block."""
205 | 
206 |         with tf.variable_scope("Regress", reuse=reuse):
207 |             predictions = tf.layers.dense(activations, 1)
208 |             return predictions
209 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==1.15.4
2 | absl-py>=0.5.0
3 | numpy==1.15.1
4 | scikit-learn==0.20.1
5 | wget>=3.2
6 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | virtualenv -p python3 .
 6 | source ./bin/activate
 7 | 
 8 | pip install tensorflow
 9 | pip install -r requirements.txt
10 | python -m train_classifier
11 | 


--------------------------------------------------------------------------------
/train_classifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Train the TabNet or reduced TabNet model on various datasets."""
  3 | import os
  4 | from absl import app
  5 | import numpy as np
  6 | import tensorflow.compat.v1 as tf
  7 | 
  8 | from datetime import datetime
  9 | from config.covertype import *
 10 | from model import tabnet, tabnet_reduced
 11 | from util import data_helper, logging
 12 | 
 13 | logger = logging.create_logger()
 14 | 
 15 | # Run Tensorflow on GPU 0
 16 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 17 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 18 | 
 19 | 
 20 | def sort_col_names(feature_cols):
 21 |     column_names = sorted(feature_cols)
 22 |     logger.info("Ordered column names, corresponding to the indexing in Tensorboard visualization")
 23 |     for fi in range(len(column_names)):
 24 |         logger.info(str(fi) + " : " + column_names[fi])
 25 | 
 26 | 
 27 | def main(unused_argv):
 28 |     # column order
 29 |     feature_columns = (
 30 |         INT_COLUMNS + ENCODED_CATEGORICAL_COLUMNS + BOOL_COLUMNS + STR_COLUMNS + FLOAT_COLUMNS
 31 |     )
 32 |     all_columns = feature_columns + [LABEL_COLUMN]
 33 | 
 34 |     # Fix random seeds
 35 |     tf.set_random_seed(SEED)
 36 |     np.random.seed(SEED)
 37 | 
 38 |     input_columns = data_helper.get_columns(
 39 |         INT_COLUMNS, ENCODED_CATEGORICAL_COLUMNS, BOOL_COLUMNS, FLOAT_COLUMNS, STR_COLUMNS
 40 |     )
 41 | 
 42 |     # Define the TabNet model
 43 |     tabnet_model = (
 44 |         (
 45 |             tabnet_reduced.TabNetReduced(
 46 |                 columns=input_columns,
 47 |                 num_features=NUM_FEATURES,
 48 |                 feature_dim=FEATURE_DIM,
 49 |                 output_dim=OUTPUT_DIM,
 50 |                 num_decision_steps=NUM_DECISION_STEPS,
 51 |                 relaxation_factor=RELAXATION_FACTOR,
 52 |                 batch_momentum=BATCH_MOMENTUM,
 53 |                 virtual_batch_size=VIRTUAL_BATCH_SIZE,
 54 |                 num_classes=NUM_CLASSES,
 55 |             )
 56 |         )
 57 |         if REDUCED
 58 |         else (
 59 |             tabnet.TabNet(
 60 |                 columns=input_columns,
 61 |                 num_features=NUM_FEATURES,
 62 |                 feature_dim=FEATURE_DIM,
 63 |                 output_dim=OUTPUT_DIM,
 64 |                 num_decision_steps=NUM_DECISION_STEPS,
 65 |                 relaxation_factor=RELAXATION_FACTOR,
 66 |                 batch_momentum=BATCH_MOMENTUM,
 67 |                 virtual_batch_size=VIRTUAL_BATCH_SIZE,
 68 |                 num_classes=NUM_CLASSES,
 69 |             )
 70 |         )
 71 |     )
 72 | 
 73 |     sort_col_names(feature_columns)
 74 | 
 75 |     # Input sampling
 76 |     train_batch = data_helper.input_fn(
 77 |         TRAIN_FILE,
 78 |         INT_COLUMNS,
 79 |         BOOL_COLUMNS,
 80 |         FLOAT_COLUMNS,
 81 |         STR_COLUMNS,
 82 |         LABEL_COLUMN,
 83 |         num_epochs=MAX_STEPS,
 84 |         shuffle=True,
 85 |         batch_size=BATCH_SIZE,
 86 |     )
 87 |     test_batch = data_helper.input_fn(
 88 |         TEST_FILE,
 89 |         INT_COLUMNS,
 90 |         BOOL_COLUMNS,
 91 |         FLOAT_COLUMNS,
 92 |         STR_COLUMNS,
 93 |         LABEL_COLUMN,
 94 |         num_epochs=MAX_STEPS,
 95 |         shuffle=False,
 96 |         batch_size=N_TEST_SAMPLES,
 97 |     )
 98 | 
 99 |     train_iter = train_batch.make_initializable_iterator()
100 |     test_iter = test_batch.make_initializable_iterator()
101 | 
102 |     feature_train_batch, label_train_batch = train_iter.get_next()
103 |     feature_test_batch, label_test_batch = test_iter.get_next()
104 | 
105 |     # Define the model and losses
106 |     encoded_train_batch, total_entropy = tabnet_model.encoder(
107 |         feature_train_batch, reuse=False, is_training=True
108 |     )
109 | 
110 |     logits_orig_batch, _ = tabnet_model.classify(encoded_train_batch, reuse=False)
111 | 
112 |     softmax_orig_key_op = tf.reduce_mean(
113 |         tf.nn.sparse_softmax_cross_entropy_with_logits(
114 |             logits=logits_orig_batch, labels=label_train_batch
115 |         )
116 |     )
117 | 
118 |     train_loss_op = softmax_orig_key_op + SPARSITY_LOSS_WEIGHT * total_entropy
119 |     tf.summary.scalar("Total loss", train_loss_op)
120 | 
121 |     # Optimization step
122 |     global_step = tf.train.get_or_create_global_step()
123 |     learning_rate = tf.train.exponential_decay(
124 |         INIT_LEARNING_RATE, global_step=global_step, decay_steps=DECAY_EVERY, decay_rate=DECAY_RATE
125 |     )
126 |     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
127 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
128 |     with tf.control_dependencies(update_ops):
129 |         gvs = optimizer.compute_gradients(train_loss_op)
130 |         capped_gvs = [
131 |             (tf.clip_by_value(grad, -GRADIENT_THRESH, GRADIENT_THRESH), var) for grad, var in gvs
132 |         ]
133 |         train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)
134 | 
135 |     # Model evaluation
136 |     # Test performance
137 |     encoded_test_batch, _ = tabnet_model.encoder(feature_test_batch, reuse=True, is_training=False)
138 | 
139 |     _, prediction_test = tabnet_model.classify(encoded_test_batch, reuse=True)
140 | 
141 |     predicted_labels = tf.cast(tf.argmax(prediction_test, 1), dtype=tf.int32)
142 |     test_eq_op = tf.equal(predicted_labels, label_test_batch)
143 |     test_acc_op = tf.reduce_mean(tf.cast(test_eq_op, dtype=tf.float32))
144 |     tf.summary.scalar("Test accuracy", test_acc_op)
145 | 
146 |     # Training setup
147 |     current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
148 |     model_name = MODEL_NAME + f"_{current_time}"
149 |     init = tf.initialize_all_variables()
150 |     init_local = tf.local_variables_initializer()
151 |     init_table = tf.tables_initializer(name="Initialize_all_tables")
152 |     saver = tf.train.Saver()
153 |     summaries = tf.summary.merge_all()
154 | 
155 |     with tf.Session() as sess:
156 |         summary_writer = tf.summary.FileWriter("./tflog/" + model_name, sess.graph)
157 | 
158 |         sess.run(init)
159 |         sess.run(init_local)
160 |         sess.run(init_table)
161 |         sess.run(train_iter.initializer)
162 |         sess.run(test_iter.initializer)
163 | 
164 |         for step in range(1, MAX_STEPS + 1):
165 |             if step % DISPLAY_STEP == 0:
166 |                 _, train_loss, merged_summary = sess.run([train_op, train_loss_op, summaries])
167 |                 summary_writer.add_summary(merged_summary, step)
168 |                 logger.info(
169 |                     "Step " + str(step) + ", Training Loss = " + "{:.4f}".format(train_loss)
170 |                 )
171 |             else:
172 |                 _ = sess.run(train_op)
173 | 
174 |             if step % TEST_STEP == 0:
175 |                 feed_arr = [vars()["summaries"], vars()["test_acc_op"]]
176 | 
177 |                 test_arr = sess.run(feed_arr)
178 |                 merged_summary = test_arr[0]
179 |                 test_acc = test_arr[1]
180 | 
181 |                 logger.info("Step " + str(step) + ", Test Accuracy = " + "{:.4f}".format(test_acc))
182 |                 summary_writer.add_summary(merged_summary, step)
183 | 
184 |             if step % SAVE_STEP == 0:
185 |                 saver.save(sess, "./checkpoints/" + model_name + ".ckpt")
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     app.run(main)
190 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptuls/tabnet-modified/5b1f8d13584b552f4808d95ffd253830c696fb4e/util/__init__.py


--------------------------------------------------------------------------------
/util/data_helper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import functools
  3 | import tensorflow as tf
  4 | from tensorflow.python.framework import dtypes
  5 | 
  6 | 
  7 | def set_defaults(int_columns, bool_columns, float_columns, str_columns):
  8 |     return (
  9 |         [[0] for col in int_columns]
 10 |         + [[""] for col in bool_columns]
 11 |         + [[0.0] for col in float_columns]
 12 |         + [[""] for col in str_columns]
 13 |         + [[-1]]
 14 |     )
 15 | 
 16 | 
 17 | def get_columns(int_columns, encoded_categorical_columns, bool_columns, float_columns, str_columns):
 18 |     """Get the representations for all input columns."""
 19 | 
 20 |     columns = []
 21 |     if float_columns:
 22 |         columns += [
 23 |             tf.feature_column.numeric_column(ci, dtype=dtypes.float32) for ci in float_columns
 24 |         ]
 25 |     if int_columns:
 26 |         columns += [tf.feature_column.numeric_column(ci, dtype=dtypes.int32) for ci in int_columns]
 27 |     if encoded_categorical_columns:
 28 |         columns += [
 29 |             tf.feature_column.numeric_column(ci, dtype=dtypes.int32)
 30 |             for ci in encoded_categorical_columns
 31 |         ]
 32 |     if str_columns:
 33 |         # pylint: disable=g-complex-comprehension
 34 |         str_nuniquess = len(set(str_columns))
 35 |         columns += [
 36 |             tf.feature_column.embedding_column(
 37 |                 tf.feature_column.categorical_column_with_hash_bucket(
 38 |                     ci, hash_bucket_size=int(3 * num)
 39 |                 ),
 40 |                 dimension=1,
 41 |             )
 42 |             for ci, num in zip(str_columns, str_nuniquess)
 43 |         ]
 44 |     if bool_columns:
 45 |         # pylint: disable=g-complex-comprehension
 46 |         columns += [
 47 |             tf.feature_column.embedding_column(
 48 |                 tf.feature_column.categorical_column_with_hash_bucket(ci, hash_bucket_size=3),
 49 |                 dimension=1,
 50 |             )
 51 |             for ci in bool_columns
 52 |         ]
 53 |     return columns
 54 | 
 55 | 
 56 | def parse_csv(int_columns, bool_columns, float_columns, str_columns, label_column, value_column):
 57 |     """Parses a CSV file based on the provided column types."""
 58 |     defaults = set_defaults(int_columns, bool_columns, float_columns, str_columns)
 59 |     all_columns = int_columns + bool_columns + float_columns + str_columns + [label_column]
 60 |     columns = tf.decode_csv(value_column, record_defaults=defaults)
 61 |     features = dict(zip(all_columns, columns))
 62 |     label = features.pop(label_column)
 63 |     classes = tf.cast(label, tf.int32) - 1
 64 |     return features, classes
 65 | 
 66 | 
 67 | def input_fn(
 68 |     data_file,
 69 |     int_columns,
 70 |     bool_columns,
 71 |     float_columns,
 72 |     str_columns,
 73 |     label_column,
 74 |     num_epochs,
 75 |     shuffle,
 76 |     batch_size,
 77 |     n_buffer=50,
 78 |     n_parallel=16,
 79 | ):
 80 |     """Function to read the input file and return the dataset.
 81 | 
 82 |     Args:
 83 |         data_file: Name of the file.
 84 |         num_epochs: Number of epochs.
 85 |         shuffle: Whether to shuffle the data.
 86 |         batch_size: Batch size.
 87 |         n_buffer: Buffer size.
 88 |         n_parallel: Number of cores for multi-core processing option.
 89 | 
 90 |     Returns:
 91 |         The Tensorflow dataset.
 92 |     """
 93 | 
 94 |     # Extract lines from input files using the Dataset API.
 95 |     dataset = tf.data.TextLineDataset(data_file)
 96 | 
 97 |     if shuffle:
 98 |         dataset = dataset.shuffle(buffer_size=n_buffer)
 99 | 
100 |     parse_csv_partial = functools.partial(
101 |         parse_csv,
102 |         int_columns,
103 |         bool_columns,
104 |         float_columns,
105 |         str_columns,
106 |         label_column,
107 |     )
108 | 
109 |     dataset = dataset.batch(batch_size, drop_remainder=True)
110 |     dataset = dataset.map(parse_csv_partial, num_parallel_calls=n_parallel)
111 | 
112 |     # Repeat after shuffling, to prevent separate epochs from blending together.
113 |     dataset = dataset.repeat(num_epochs)
114 |     return dataset
115 | 


--------------------------------------------------------------------------------
/util/logging.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import sys
 4 | 
 5 | 
 6 | def create_logger():
 7 |     log = logging.getLogger()
 8 |     log.setLevel(logging.INFO)
 9 | 
10 |     ch = logging.StreamHandler(sys.stdout)
11 |     ch.setLevel(logging.INFO)
12 |     formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
13 |     ch.setFormatter(formatter)
14 |     log.addHandler(ch)
15 |     return log
16 | 


--------------------------------------------------------------------------------
/util/tfutils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def create_tf_example(row, int_columns, float_columns, label_column):
 6 |     features = {}
 7 | 
 8 |     for feat_name in int_columns:
 9 |         features[feat_name] = tf.train.Feature(
10 |             int64_list=tf.train.Int64List(value=[row[feat_name].astype(dtype=int)])
11 |         )
12 | 
13 |     for feat_name in float_columns:
14 |         features[feat_name] = tf.train.Feature(
15 |             float_list=tf.train.FloatList(value=[row[feat_name]])
16 |         )
17 | 
18 |     features[label_column] = tf.train.Feature(float_list=tf.train.FloatList(value=[row[feat_name]]))
19 | 
20 |     tf_example = tf.train.Example(features=tf.train.Features(feature=features))
21 |     return tf_example
22 | 
23 | 
24 | def write_tfrecords(df, path):
25 |     with tf.python_io.TFRecordWriter(path) as writer:
26 |         for index, row in df.iterrows():
27 |             tf_example = create_tf_example(row)
28 |             writer.write(tf_example.SerializeToString())
29 | 


--------------------------------------------------------------------------------