├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── config
    ├── experiments
    │   └── simple_experiment.ini
    └── main.ini
├── data
    └── autism.tsv
├── experiments
    ├── __init__.py
    ├── classifier.py
    ├── dataset.py
    └── experiment.py
├── methods
    ├── __init__.py
    ├── selection.py
    └── selection_wrapper.py
├── requirements.txt
├── run.py
├── tests
    ├── __init__.py
    ├── corr.py
    ├── test_fisher.py
    ├── test_pearson.py
    ├── test_statistics.py
    └── ttest.py
└── utils
    ├── __init__.py
    ├── data_reader.py
    ├── log_saver.py
    └── statistics.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | .tsv filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | logs
104 | .idea/
105 | .pytest_cache/
106 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.5"
4 | script:
5 |   - pytest


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Tomasz Latkowski
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Build Status](https://travis-ci.org/tlatkowski/tf-feature-selection.svg?branch=master)](https://travis-ci.org/tlatkowski/tf-feature-selection) ![](https://img.shields.io/badge/Status-under--dev-red.svg) ![](https://img.shields.io/badge/Python-3.6-blue.svg) ![](https://img.shields.io/badge/Tensorflow-1.12.2-blue.svg) ![](https://img.shields.io/badge/License-MIT-blue.svg)
2 | # tf-feature-selection
3 | Implementation of feature selection methods using TensorFlow library.
4 | 


--------------------------------------------------------------------------------
/config/experiments/simple_experiment.ini:
--------------------------------------------------------------------------------
1 | [SELECTION]
2 | num_features = 100
3 | method = fisher
4 | 
5 | [CLASSIFIER]
6 | hidden_sizes = 20
7 | 


--------------------------------------------------------------------------------
/config/main.ini:
--------------------------------------------------------------------------------
1 | [TRAINING]
2 | num_epochs = 1000
3 | eval_every = 10


--------------------------------------------------------------------------------
/data/autism.tsv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b3b7d953d54f6bd08f9860347df05bbacfcccca254400cf4711b2be30e1cde71
3 | size 95496413
4 | 


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tlatkowski/tf-feature-selection/910f03764a675841eaf4578415ae697d78860b81/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/classifier.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def ff_neural_network(inputs, units):
 5 |     layer = tf.layers.dense(inputs, units=units, activation=tf.nn.tanh)
 6 |     output = tf.layers.dense(layer, units=1)
 7 |     return output
 8 | 
 9 | 
10 | class NeuralNetworkClassifier:
11 | 
12 |     def __init__(self, num_features, units):
13 |         self.x = tf.placeholder(dtype=tf.float64, shape=[None, num_features], name='inputs')
14 |         self.y = tf.placeholder(dtype=tf.float64, shape=[None, 1], name='labels')
15 | 
16 |         output = ff_neural_network(self.x, units=units)
17 | 
18 |         with tf.name_scope('loss'):
19 |             self.loss = tf.losses.sigmoid_cross_entropy(self.y, output)
20 |             self.opt = tf.train.AdamOptimizer(learning_rate=0.01).minimize(self.loss)
21 | 
22 |         with tf.name_scope('metrics'):
23 |             self.prediction = tf.nn.sigmoid(output)
24 | 
25 |             self.correct_predictions = tf.equal(self.prediction, self.y)
26 |             self.accuracy = tf.reduce_mean(tf.to_float(self.correct_predictions))
27 |             tf.summary.scalar("accuracy", self.accuracy)
28 |             tf.summary.scalar("loss", self.loss)
29 |             self.summary_op = tf.summary.merge_all()
30 | 


--------------------------------------------------------------------------------
/experiments/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.model_selection import StratifiedKFold
 3 | 
 4 | from utils.data_reader import read
 5 | 
 6 | 
 7 | class Dataset:
 8 | 
 9 |     def __init__(self, data_fn):
10 |         self.data = read(data_fn)
11 |         # FIXME
12 |         self.labels = np.concatenate([np.ones(82, dtype=np.float64), np.zeros(64, dtype=np.float64)])
13 |         self.labels = np.reshape(self.labels, (-1, 1))
14 | 
15 |         self.skf = StratifiedKFold(n_splits=10)
16 | 
17 |     def cross_validation(self):
18 |         return enumerate(self.skf.split(self.data, self.labels.reshape(146)))
19 | 
20 |     def get_data(self, indices):
21 |         return self.data[indices, :]
22 | 
23 |     def get_labels(self, indices):
24 |         selected_labels = self.labels[indices]
25 |         num_instances = [int(sum(selected_labels == 0)), int(sum(selected_labels == 1))]
26 |         return num_instances, selected_labels
27 | 


--------------------------------------------------------------------------------
/experiments/experiment.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from methods.selection import fisher, feature_correlation_with_class, t_test, random
 4 | from methods.selection_wrapper import SelectionWrapper
 5 | 
 6 | methods = {
 7 |     'fisher': fisher,
 8 |     'corr': feature_correlation_with_class,
 9 |     'ttest': t_test,
10 |     'random': random
11 | }
12 | 
13 | 
14 | class Experiment:
15 | 
16 |     def __init__(self, experiment_config, num_instances, classifier, dataset):
17 |         selection_method = methods[experiment_config['SELECTION']['method']]
18 |         num_features = int(experiment_config['SELECTION']['num_features'])
19 |         hidden_sizes = int(experiment_config['CLASSIFIER']['hidden_sizes'])
20 | 
21 |         with tf.name_scope('selection'):
22 |             self.selection_wrapper = SelectionWrapper(dataset,
23 |                                                       num_instances=num_instances,
24 |                                                       selection_method=selection_method,
25 |                                                       num_features=num_features)
26 | 
27 |         with tf.name_scope('classifier'):
28 |             self.clf = classifier(num_features, hidden_sizes)
29 | 


--------------------------------------------------------------------------------
/methods/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tlatkowski/tf-feature-selection/910f03764a675841eaf4578415ae697d78860b81/methods/__init__.py


--------------------------------------------------------------------------------
/methods/selection.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def fisher(data, num_instances: list, top_k_features=2):
 5 |     """
 6 |     Performs Fisher feature selection method according to the following formula:
 7 |     D(f) = (m1(f) - m2(f) / (std1(f) - std2(f))
 8 | 
 9 |     :param data:
10 |     :param num_instances:
11 |     :param top_k_features:
12 |     :return: the list of most significant features.
13 |     """
14 |     assert len(num_instances) == 2, "Fisher selection method can be performed for two-class problems."
15 | 
16 |     data = tf.convert_to_tensor(data)
17 |     num_features = data.get_shape().as_list()[-1]
18 |     if top_k_features > num_features:
19 |         top_k_features = num_features
20 |     class1, class2 = tf.split(data, num_instances)
21 | 
22 |     with tf.name_scope('fisher_selection'):
23 |         mean1, std1 = tf.nn.moments(class1, axes=0)
24 |         mean2, std2 = tf.nn.moments(class2, axes=0)
25 |         fisher_coeffs = tf.abs(mean1 - mean2) / (std1 + std2)
26 |         selected_features = tf.nn.top_k(fisher_coeffs, k=top_k_features)
27 | 
28 |     return selected_features
29 | 
30 | 
31 | def feature_correlation_with_class(data, num_instances: list, top_k_features=10):
32 |     """
33 |     Makes feature correlation with class selection according to the following formula:
34 |     D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
35 |     :return: the list of most significant features.
36 |     """
37 |     data = tf.convert_to_tensor(data)
38 |     num_features = data.get_shape().as_list()[-1]
39 |     if top_k_features > num_features:
40 |         top_k_features = num_features
41 |     class1, class2 = tf.split(data, num_instances)
42 | 
43 |     with tf.name_scope('corr_selection'):
44 |         mean1, std1 = tf.nn.moments(class1, axes=0)
45 |         mean2, std2 = tf.nn.moments(class2, axes=0)
46 |         mean, std = tf.nn.moments(data, axes=0)
47 |         corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2 * tf.square(std)
48 |         selected_features = tf.nn.top_k(corr_coeffs, k=top_k_features)
49 | 
50 |     return selected_features
51 | 
52 | 
53 | def t_test(data, num_instances: list, top_k_features=10):
54 |     """
55 |     Makes feature correlation with class selection according to the following formula:
56 |     D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
57 |     :return: the list of most significant features.
58 |     """
59 |     data = tf.convert_to_tensor(data)
60 |     num_features = data.get_shape().as_list()[-1]
61 |     if top_k_features > num_features:
62 |         top_k_features = num_features
63 |     class1, class2 = tf.split(data, num_instances)
64 | 
65 |     with tf.name_scope('t_test_selection'):
66 |         mean1, std1 = tf.nn.moments(class1, axes=0)
67 |         mean2, std2 = tf.nn.moments(class2, axes=0)
68 |         t_test_coeffs = tf.abs(mean1 - mean2) / tf.sqrt(
69 |             tf.square(std1) / num_instances[0] + tf.square(std2) / num_instances[1])
70 |         selected_features = tf.nn.top_k(t_test_coeffs, k=top_k_features)
71 | 
72 |     return selected_features
73 | 
74 | 
75 | def random(data, num_instances: list, top_k_features=10):
76 |     data = tf.convert_to_tensor(data)
77 |     num_features = data.get_shape().as_list()[-1]
78 |     if top_k_features > num_features:
79 |         top_k_features = num_features
80 |     class1, class2 = tf.split(data, num_instances)
81 | 
82 |     with tf.name_scope('random_selection'):
83 |         pass


--------------------------------------------------------------------------------
/methods/selection_wrapper.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class SelectionWrapper:
 5 | 
 6 |     def __init__(self, data, num_instances, selection_method=None, num_features=None):
 7 |         if data is None:
 8 |             raise ValueError('Provide data to make selection.')
 9 | 
10 |         if selection_method is None:
11 |             raise ValueError('Provide selection method.')
12 | 
13 |         if num_features is None:
14 |             data = tf.convert_to_tensor(data)
15 |             num_features = data.get_shape().as_list()[-1]
16 | 
17 |         self.values, self.indices = selection_method(data, num_instances, num_features)
18 |         self.selected_data = tf.gather(data, self.indices, axis=1)
19 | 
20 |     def select(self, data):
21 |         return tf.gather(data, self.indices, axis=1)
22 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | bleach==1.5.0
 2 | enum34==1.1.6
 3 | html5lib==0.9999999
 4 | Markdown==2.6.11
 5 | numpy==1.13.3
 6 | pandas==0.19.0
 7 | protobuf==3.5.1
 8 | python-dateutil==2.6.1
 9 | pytz==2018.3
10 | scikit-learn==0.19.1
11 | scipy==1.0.0
12 | six==1.11.0
13 | sklearn==0.0
14 | tensorflow==1.12.2
15 | tensorflow-tensorboard==0.4.0
16 | tqdm==4.19.6
17 | Werkzeug==0.15.3
18 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | from argparse import ArgumentParser
 3 | 
 4 | import tensorflow as tf
 5 | from tqdm import tqdm
 6 | 
 7 | from experiments.classifier import NeuralNetworkClassifier
 8 | from experiments.dataset import Dataset
 9 | from experiments.experiment import Experiment
10 | from utils.log_saver import LogSaver
11 | 
12 | 
13 | def run_experiment(experiment_config):
14 |     dataset = Dataset('data/autism.tsv')
15 |     num_epochs = 1000
16 |     eval_every = 10
17 | 
18 |     for fold_id, (train_idxs, test_idxs) in dataset.cross_validation():
19 | 
20 |         data_train_fold = dataset.get_data(train_idxs)
21 |         num_instances, labels_train_fold = dataset.get_labels(train_idxs)
22 | 
23 |         data_test_fold = dataset.get_data(test_idxs)
24 |         _, labels_test_fold = dataset.get_labels(test_idxs)
25 | 
26 |         with tf.Graph().as_default() as graph:
27 | 
28 |             experiment = Experiment(experiment_config, num_instances, NeuralNetworkClassifier, data_train_fold)
29 | 
30 |             with tf.Session() as session:
31 | 
32 |                 global_step = 0
33 |                 session.run(tf.global_variables_initializer())
34 | 
35 |                 log_saver = LogSaver('logs', 'fisher_fold{}'.format(fold_id), session.graph)
36 | 
37 |                 train_selected_data = session.run(experiment.selection_wrapper.selected_data)
38 |                 test_selected_data = session.run(experiment.selection_wrapper.select(data_test_fold))
39 | 
40 |                 tqdm_iter = tqdm(range(num_epochs), desc='Epochs')
41 | 
42 |                 for epoch in tqdm_iter:
43 |                     feed_dict = {experiment.clf.x: train_selected_data, experiment.clf.y: labels_train_fold}
44 |                     loss, _ = session.run([experiment.clf.loss, experiment.clf.opt],
45 |                                           feed_dict=feed_dict)
46 | 
47 |                     if epoch % eval_every == 0:
48 |                         summary = session.run(experiment.clf.summary_op, feed_dict=feed_dict)
49 |                         log_saver.log_train(summary, epoch)
50 | 
51 |                         feed_dict = {experiment.clf.x: test_selected_data, experiment.clf.y: labels_test_fold}
52 |                         summary = session.run(experiment.clf.summary_op, feed_dict=feed_dict)
53 |                         log_saver.log_test(summary, epoch)
54 | 
55 |                     tqdm_iter.set_postfix(loss='{:.2f}'.format(float(loss)), epoch=epoch)
56 | 
57 | 
58 | def main():
59 |     parser = ArgumentParser()
60 |     parser.add_argument('experiment',
61 |                         default='simple_experiment',
62 |                         choices=['simple_experiment'],
63 |                         help='model used during training (default: %(default))')
64 | 
65 |     args = parser.parse_args()
66 |     experiment_config = configparser.ConfigParser()
67 |     experiment_config.read('config/experiments/{}.ini'.format(args.experiment))
68 | 
69 |     run_experiment(experiment_config)
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     main()
74 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tlatkowski/tf-feature-selection/910f03764a675841eaf4578415ae697d78860b81/tests/__init__.py


--------------------------------------------------------------------------------
/tests/corr.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class CorrelationWithClassSelectionTest(tf.test.TestCase):
 5 | 
 6 |     def testCorrelationWithClassCorrectScore(self):
 7 |         raise NotImplementedError
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     tf.test.main()
12 | 


--------------------------------------------------------------------------------
/tests/test_fisher.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from methods.selection import fisher
 5 | from methods.selection_wrapper import SelectionWrapper
 6 | 
 7 | 
 8 | class TestFisherSelection(tf.test.TestCase):
 9 | 
10 |     def testFisherCorrectScore(self):
11 |         with self.test_session() as test_session:
12 |             data = np.array([[2, 2],
13 |                              [4, 4],
14 |                              [3, 6],
15 |                              [5, 6]])
16 |             num_instances = [2, 2]
17 |             top_k = 2
18 |             actual_most_significant_features, _ = test_session.run(fisher(data, num_instances, top_k))
19 |             correct_most_significant_features = [3., .5]
20 | 
21 |             self.assertAllEqual(actual_most_significant_features, correct_most_significant_features)
22 | 
23 |     def testFisherPickFirstSignificantFeature(self):
24 |         with self.test_session() as test_session:
25 |             data = np.array([[2, 2],
26 |                              [4, 4],
27 |                              [3, 6],
28 |                              [5, 6]])
29 | 
30 |             num_instances = [2, 2]
31 |             top_k = 1
32 |             selection_wrapper = SelectionWrapper(data,
33 |                                                  num_instances,
34 |                                                  fisher,
35 |                                                  num_features=top_k)
36 |             actual_most_significant_features = test_session.run(selection_wrapper.selected_data)
37 |             correct_most_significant_features = [[2.], [4.], [6.], [6.]]
38 | 
39 |             self.assertAllEqual(actual_most_significant_features, correct_most_significant_features)
40 | 
41 |     def testFisherCorrectOrderOfFeatures(self):
42 |         with self.test_session() as test_session:
43 |             data = np.array([[2, 2],
44 |                              [4, 4],
45 |                              [3, 6],
46 |                              [5, 6]])
47 |             num_instances = [2, 2]
48 |             top_k = 2
49 |             _, actual_most_significant_features = test_session.run(fisher(data, num_instances, top_k))
50 |             correct_most_significant_features = [1., 0.]
51 | 
52 |             self.assertAllEqual(actual_most_significant_features, correct_most_significant_features)
53 | 
54 |     def testMoreThan2ClassesIsNotAllowed(self):
55 |         with self.test_session() as test_session:
56 |             data = np.array([[2, 2],
57 |                              [4, 4],
58 |                              [3, 6],
59 |                              [5, 6]])
60 |             num_instances = [2, 2, 2]
61 |             top_k = 2
62 |             with self.assertRaises(AssertionError):
63 |                 _, actual_most_significant_features = test_session.run(fisher(data, num_instances, top_k))
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     tf.test.main()
68 | 


--------------------------------------------------------------------------------
/tests/test_pearson.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from utils.statistics import pearson_correlation
 5 | 
 6 | 
 7 | class TestPearson(tf.test.TestCase):
 8 | 
 9 |     def testPearsonCoefficientValueForTwoVectors(self):
10 |         with self.test_session() as test_session:
11 |             x1 = np.array([2., 3., 4.])
12 |             x2 = np.array([3., 1., 5.])
13 | 
14 |             actual_pearson_coefficient = test_session.run(pearson_correlation(x1, x2))
15 |             correct_pearson_coefficient = [.5]
16 | 
17 |             self.assertEqual(actual_pearson_coefficient, correct_pearson_coefficient)
18 | 
19 |     def testNegativePearsonCoefficientValueForTwoVectors(self):
20 |         with self.test_session() as test_session:
21 |             x1 = np.array([1., 2., 3.])
22 |             x2 = np.array([-1., -2., -3.])
23 | 
24 |             actual_pearson_coefficient = test_session.run(pearson_correlation(x1, x2))
25 |             correct_pearson_coefficient = [-1.]
26 | 
27 |             self.assertEqual(actual_pearson_coefficient, correct_pearson_coefficient)
28 | 
29 |     def testPositivePearsonCoefficientValueForTwoVectors(self):
30 |         with self.test_session() as test_session:
31 |             x1 = np.array([1., 2., 3.])
32 |             x2 = np.array([1., 2., 3.])
33 | 
34 |             actual_pearson_coefficient = test_session.run(pearson_correlation(x1, x2))
35 |             correct_pearson_coefficient = [1.]
36 | 
37 |             self.assertEqual(actual_pearson_coefficient, correct_pearson_coefficient)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     tf.test.main()
42 | 


--------------------------------------------------------------------------------
/tests/test_statistics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from utils.statistics import pooled_variance
 5 | 
 6 | 
 7 | class TestStatistics(tf.test.TestCase):
 8 | 
 9 |     def testPooledVariance(self):
10 |         with self.test_session() as test_session:
11 |             data = np.array([[2., 3., 4., 5.],
12 |                              [2., 3., 4., 5.],
13 |                              [2., 3., 4., 5.],
14 |                              [2., 3., 4., 5.]])
15 |             num_instances = [2, 2]
16 |             actual_pooled_variance = test_session.run(pooled_variance(data, num_instances))
17 |             correct_pooled_variance = [.0, .0, .0, .0]
18 | 
19 |             self.assertAllEqual(actual_pooled_variance, correct_pooled_variance)
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     tf.test.main()
24 | 


--------------------------------------------------------------------------------
/tests/ttest.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class TtestSelectionTest(tf.test.TestCase):
 5 | 
 6 |     def testTtestCorrectScore(self):
 7 |         raise NotImplementedError
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     tf.test.main()
12 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tlatkowski/tf-feature-selection/910f03764a675841eaf4578415ae697d78860b81/utils/__init__.py


--------------------------------------------------------------------------------
/utils/data_reader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | 
4 | def read(file_name):
5 |     data = pd.read_csv(file_name, sep='\t', header=None, index_col=0).T
6 |     return data.as_matrix()
7 | 


--------------------------------------------------------------------------------
/utils/log_saver.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class LogSaver:
 7 | 
 8 |     def __init__(self, logs_path, model_name, graph: tf.Graph):
 9 |         if not os.path.isdir(logs_path):
10 |             os.makedirs(logs_path)
11 |         self.test_summary_writer = tf.summary.FileWriter('{}/{}/test/'.format(logs_path, model_name), graph=graph)
12 |         self.train_summary_writer = tf.summary.FileWriter('{}/{}/train/'.format(logs_path, model_name), graph=graph)
13 | 
14 |     def log_test(self, summary, global_step):
15 |         self.test_summary_writer.add_summary(summary, global_step)
16 | 
17 |     def log_train(self, summary, global_step):
18 |         self.test_summary_writer.add_summary(summary, global_step)
19 | 


--------------------------------------------------------------------------------
/utils/statistics.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def pearson_correlation(x1, x2):
 5 |     x1 = tf.convert_to_tensor(x1)
 6 |     x2 = tf.convert_to_tensor(x2)
 7 |     m1, std1 = tf.nn.moments(x1, axes=0)
 8 |     m2, std2 = tf.nn.moments(x2, axes=0)
 9 |     l = tf.reduce_sum((x1 - m1) * (x2 - m2))
10 |     i = tf.reduce_sum((x1 - m1) ** 2) * tf.reduce_sum((x2 - m2) ** 2)
11 |     p = tf.sqrt(i)
12 |     return l / p
13 | 
14 | 
15 | def f_test(data, num_instances):
16 |     """
17 |     Performs F-statistic between the genes and the classification variable h
18 |     as the score of maximum relevance.
19 | 
20 |     :param data:
21 |     :param num_instances:
22 |     :return:
23 |     """
24 | 
25 |     data = tf.convert_to_tensor(data)
26 |     class1, class2 = tf.split(data, num_instances)
27 |     K = 2
28 |     with tf.name_scope('f_statistic'):
29 |         mean1, var1 = tf.nn.moments(class1, axes=0)
30 |         mean2, var2 = tf.nn.moments(class2, axes=0)
31 |         mean, var = tf.nn.moments(data, axes=0)
32 | 
33 |         pooled_var = pooled_variance(data, num_instances)
34 |         tf.reduce_sum(((mean1 - mean) + (mean2 - mean))/(K-1))/pooled_var
35 | 
36 | 
37 | def pooled_variance(data, num_instances):
38 |     K = len(num_instances)
39 |     n = sum(num_instances)
40 |     data = tf.convert_to_tensor(data, dtype=tf.float32)
41 |     split_classes = tf.split(data, num_instances)
42 |     vars = []
43 |     for i in range(len(split_classes)):
44 |         _, var = tf.nn.moments(split_classes[i], axes=0)
45 |         vars.append(var)
46 | 
47 |     n_k = tf.to_float(tf.reshape(num_instances, [K, -1]))
48 |     stacked_var = tf.stack(vars)
49 |     pooled_var = tf.reduce_sum(stacked_var * (n_k - 1), axis=0) / (n - K)
50 |     return pooled_var


--------------------------------------------------------------------------------