├── setup.py
├── test
    ├── create_digits_data.py
    └── test.py
├── README.markdown
├── LICENSE.txt
└── feast.py


/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | 
3 | setup(name='feast',
4 |       version='1.0',
5 |       py_modules=['feast'],
6 |       )
7 | 


--------------------------------------------------------------------------------
/test/create_digits_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | from sklearn import datasets
 3 | 
 4 | digits = datasets.load_digits()   # load the data from scikits
 5 | data = digits.images.reshape((digits.images.shape[0], -1))
 6 | labels = digits.target  # extract the labels
 7 | 
 8 | fw = open('digit.txt', 'w')
 9 | 
10 | for n in range(len(data)):
11 | 	mstr = ''
12 | 	for x in data[n]:
13 | 		mstr += str(x) + '\t'
14 | 	fw.write(mstr + str(labels[n]) + '\n')
15 | 
16 | fw.close()


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | # PyFeast
 2 | Python bindings to the FEAST Feature Selection Toolbox..
 3 | 
 4 | ## Download
 5 | 
 6 | [Downlaod Version 1.1](https://github.com/mutantturkey/PyFeast/releases/tag/v1.1)
 7 | ## About PyFeast
 8 | PyFeast is a interface for the FEAST feature selection toolbox, which was
 9 | originally written in C with a interface to Matlab.
10 | 
11 | Because Python is also commonly used in computational science, writing bindings 
12 | to enable researchers to utilize these feature selection algorithms in Python 
13 | was only natural.
14 | 
15 | At Drexel University's [EESI Lab](http://www.ece.drexel.edu/gailr/EESI/), we are using PyFeast to create a feature
16 | selection tool for the Department of Energy's upcoming KBase platform. We are also integrating a tool that utilizes
17 | PyFeast as a script for Qiime users: [Qiime Fizzy Branch](https://github.com/EESI/FizzyQIIME)
18 | 
19 | ## Requirements
20 | In order to use the feast module, you will need the following dependencies
21 | 
22 | * Python 2.7
23 | * Numpy
24 | * Linux or OS X 
25 | * [MIToolbox](https://github.com/Craigacp/MIToolbox)
26 | * [FEAST](https://github.com/Craigacp/FEAST) v1.1.1 or higher
27 | 
28 | ## Installation
29 | 
30 |     python ./setup.py build
31 |     sudo python ./setup.py install
32 | 
33 | ## Demonstration
34 | See test/test.py for an example with uniform data and an image
35 | data set. The image data set was collected from the digits example in 
36 | the Scikits-Learn toolbox. Make sure that if you are loading the data from a file and converting the data to a `numpy` array that you set `order="F"`. This is *very* important. 
37 | 
38 | ## Documentation
39 | We have documentation for each of the functions available [here](http://mutantturkey.github.com/PyFeast/feast-module.html)
40 | 
41 | ## References
42 | * [FEAST](http://www.cs.man.ac.uk/~gbrown/fstoolbox/) - The Feature Selection Toolbox  
43 | * [Fizzy](http://www.kbase.us/developer-zone/api-documentation/fizzy-feature-selection-service/)  - A KBase Service for Feature Selection
44 | * [Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection](http://jmlr.csail.mit.edu/papers/v13/brown12a.html) 
45 | 


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | from feast import *
  3 | import numpy as np
  4 | import csv
  5 | 
  6 | 
  7 | def check_result(selected_features, n_relevant):
  8 | 	selected_features = sorted(selected_features)
  9 | 	success = True
 10 | 	for k in range(n_relevant):
 11 | 		if k != selected_features[k]:
 12 | 			success = False
 13 | 	return success
 14 | 
 15 | def read_digits(fname='digit.txt'):
 16 | 	'''
 17 | 		read_digits(fname='digit.txt')
 18 | 
 19 | 		read a data file that contains the features and class labels. 
 20 | 		each row of the file is a feature vector with the class 
 21 | 		label appended. 
 22 | 	'''
 23 | 
 24 | 	fw = csv.reader(open(fname,'rb'), delimiter='\t')
 25 | 	data = []
 26 | 	for line in fw: 
 27 | 		data.append( [float(x) for x in line] )
 28 | 	data = np.array(data, order="F")
 29 | 	labels = data[:,len(data.transpose())-1]
 30 | 	data = data[:,:len(data.transpose())-1]
 31 | 	return data, labels
 32 | 
 33 | def uniform_data(n_observations = 1000, n_features = 50, n_relevant = 5):
 34 | 	import numpy as np
 35 | 	xmax = 10
 36 | 	xmin = 0
 37 | 	data = 1.0*np.random.randint(xmax + 1, size = (n_features, n_observations))
 38 | 	labels = np.zeros(n_observations)
 39 | 	delta = n_relevant * (xmax - xmin) / 2.0
 40 | 
 41 | 	for m in range(n_observations):
 42 | 		zz = 0.0
 43 | 		for k in range(n_relevant):
 44 | 			zz += data[k, m]
 45 | 		if zz > delta:
 46 | 			labels[m] = 1
 47 | 		else:
 48 | 			labels[m] = 2
 49 | 	data = data.transpose()
 50 | 	return data, labels
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | n_relevant = 5
 57 | data_source = 'uniform'    # set the data set we want to test
 58 | 
 59 | 
 60 | if data_source == 'uniform':
 61 | 	data, labels = uniform_data(n_relevant = n_relevant)
 62 | elif data_source == 'digits':
 63 | 	data, labels = read_digits('digit.txt')
 64 | 
 65 | n_observations = len(data)					# number of samples in the data set
 66 | n_features = len(data.transpose())	# number of features in the data set
 67 | n_select = 15												# how many features to select
 68 | method = 'MIM'											# feature selection algorithm
 69 | 
 70 | 
 71 | print '---> Information'
 72 | print '     :n_observations - ' + str(n_observations)
 73 | print '     :n_features     - ' + str(n_features)
 74 | print '     :n_select       - ' + str(n_select)
 75 | print '     :algorithm      - ' + str(method)
 76 | print ' '
 77 | print '---> Running unit tests on FEAST 4 Python... '
 78 | 
 79 | 
 80 | #################################################################
 81 | #################################################################
 82 | print '       Running BetaGamma... '
 83 | sf = BetaGamma(data, labels, n_select, beta=0.5, gamma=0.5)
 84 | if check_result(sf, n_relevant) == True:
 85 | 	print '          BetaGamma passed!'
 86 | else:
 87 | 	print '          BetaGamma failed!'
 88 | 
 89 | #################################################################
 90 | #################################################################
 91 | print '       Running CMIM... '
 92 | sf = CMIM(data, labels, n_select)
 93 | if check_result(sf, n_relevant) == True:
 94 | 	print '          CMIM passed!'
 95 | else:
 96 | 	print '          CMIM failed!'
 97 | 
 98 | 
 99 | #################################################################
100 | #################################################################
101 | print '       Running CondMI... '
102 | sf = CondMI(data, labels, n_select)
103 | if check_result(sf, n_relevant) == True:
104 | 	print '          CondMI passed!'
105 | else:
106 | 	print '          CondMI failed!'
107 | 
108 | 
109 | #################################################################
110 | #################################################################
111 | print '       Running DISR... '
112 | sf = DISR(data, labels, n_select)
113 | if check_result(sf, n_relevant) == True:
114 | 	print '          DISR passed!'
115 | else:
116 | 	print '          DISR failed!'
117 | 
118 | 
119 | #################################################################
120 | #################################################################
121 | print '       Running ICAP... '
122 | sf = ICAP(data, labels, n_select)
123 | if check_result(sf, n_relevant) == True:
124 | 	print '          ICAP passed!'
125 | else:
126 | 	print '          ICAP failed!'
127 | 
128 | 
129 | #################################################################
130 | #################################################################
131 | print '       Running JMI... '
132 | sf = JMI(data, labels, n_select)
133 | if check_result(sf, n_relevant) == True:
134 | 	print '          JMI passed!'
135 | else:
136 | 	print '          JMI failed!'
137 | 
138 | 
139 | #################################################################
140 | #################################################################
141 | print '       Running mRMR... '
142 | sf = mRMR(data, labels, n_select)
143 | if check_result(sf, n_relevant) == True:
144 | 	print '          mRMR passed!'
145 | else:
146 | 	print '          mRMR failed!'
147 | 
148 | #################################################################
149 | #################################################################
150 | print '       Running MIM...'
151 | sf = MIM(data, labels, n_select)
152 | if check_result(sf, n_relevant) == True:
153 | 	print '          MIM passed!'
154 | else:
155 | 	print '          MIM failed!'
156 | 
157 | print '---> Done unit tests!'
158 | 
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 |     Pyfeast - python bindings for the Feature Selection Toolbox
  4 |     
  5 |     Copyright (C) <year> Calvin Morrison, Gregory Ditzler
  6 | 
  7 |   This program is free software: you can redistribute it and/or modify
  8 | it under the terms of the GNU General Public License as published by
  9 | the Free Software Foundation, either version 3 of the License, or
 10 | (at your option) any later version.
 11 | 
 12 | This program is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | license below for more details..
 16 | 
 17 | 
 18 |                    GNU LESSER GENERAL PUBLIC LICENSE
 19 |                        Version 3, 29 June 2007
 20 | 
 21 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 22 |  Everyone is permitted to copy and distribute verbatim copies
 23 |  of this license document, but changing it is not allowed.
 24 | 
 25 | 
 26 |   This version of the GNU Lesser General Public License incorporates
 27 | the terms and conditions of version 3 of the GNU General Public
 28 | License, supplemented by the additional permissions listed below.
 29 | 
 30 |   0. Additional Definitions.
 31 | 
 32 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 33 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 34 | General Public License.
 35 | 
 36 |   "The Library" refers to a covered work governed by this License,
 37 | other than an Application or a Combined Work as defined below.
 38 | 
 39 |   An "Application" is any work that makes use of an interface provided
 40 | by the Library, but which is not otherwise based on the Library.
 41 | Defining a subclass of a class defined by the Library is deemed a mode
 42 | of using an interface provided by the Library.
 43 | 
 44 |   A "Combined Work" is a work produced by combining or linking an
 45 | Application with the Library.  The particular version of the Library
 46 | with which the Combined Work was made is also called the "Linked
 47 | Version".
 48 | 
 49 |   The "Minimal Corresponding Source" for a Combined Work means the
 50 | Corresponding Source for the Combined Work, excluding any source code
 51 | for portions of the Combined Work that, considered in isolation, are
 52 | based on the Application, and not on the Linked Version.
 53 | 
 54 |   The "Corresponding Application Code" for a Combined Work means the
 55 | object code and/or source code for the Application, including any data
 56 | and utility programs needed for reproducing the Combined Work from the
 57 | Application, but excluding the System Libraries of the Combined Work.
 58 | 
 59 |   1. Exception to Section 3 of the GNU GPL.
 60 | 
 61 |   You may convey a covered work under sections 3 and 4 of this License
 62 | without being bound by section 3 of the GNU GPL.
 63 | 
 64 |   2. Conveying Modified Versions.
 65 | 
 66 |   If you modify a copy of the Library, and, in your modifications, a
 67 | facility refers to a function or data to be supplied by an Application
 68 | that uses the facility (other than as an argument passed when the
 69 | facility is invoked), then you may convey a copy of the modified
 70 | version:
 71 | 
 72 |    a) under this License, provided that you make a good faith effort to
 73 |    ensure that, in the event an Application does not supply the
 74 |    function or data, the facility still operates, and performs
 75 |    whatever part of its purpose remains meaningful, or
 76 | 
 77 |    b) under the GNU GPL, with none of the additional permissions of
 78 |    this License applicable to that copy.
 79 | 
 80 |   3. Object Code Incorporating Material from Library Header Files.
 81 | 
 82 |   The object code form of an Application may incorporate material from
 83 | a header file that is part of the Library.  You may convey such object
 84 | code under terms of your choice, provided that, if the incorporated
 85 | material is not limited to numerical parameters, data structure
 86 | layouts and accessors, or small macros, inline functions and templates
 87 | (ten or fewer lines in length), you do both of the following:
 88 | 
 89 |    a) Give prominent notice with each copy of the object code that the
 90 |    Library is used in it and that the Library and its use are
 91 |    covered by this License.
 92 | 
 93 |    b) Accompany the object code with a copy of the GNU GPL and this license
 94 |    document.
 95 | 
 96 |   4. Combined Works.
 97 | 
 98 |   You may convey a Combined Work under terms of your choice that,
 99 | taken together, effectively do not restrict modification of the
100 | portions of the Library contained in the Combined Work and reverse
101 | engineering for debugging such modifications, if you also do each of
102 | the following:
103 | 
104 |    a) Give prominent notice with each copy of the Combined Work that
105 |    the Library is used in it and that the Library and its use are
106 |    covered by this License.
107 | 
108 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
109 |    document.
110 | 
111 |    c) For a Combined Work that displays copyright notices during
112 |    execution, include the copyright notice for the Library among
113 |    these notices, as well as a reference directing the user to the
114 |    copies of the GNU GPL and this license document.
115 | 
116 |    d) Do one of the following:
117 | 
118 |        0) Convey the Minimal Corresponding Source under the terms of this
119 |        License, and the Corresponding Application Code in a form
120 |        suitable for, and under terms that permit, the user to
121 |        recombine or relink the Application with a modified version of
122 |        the Linked Version to produce a modified Combined Work, in the
123 |        manner specified by section 6 of the GNU GPL for conveying
124 |        Corresponding Source.
125 | 
126 |        1) Use a suitable shared library mechanism for linking with the
127 |        Library.  A suitable mechanism is one that (a) uses at run time
128 |        a copy of the Library already present on the user's computer
129 |        system, and (b) will operate properly with a modified version
130 |        of the Library that is interface-compatible with the Linked
131 |        Version.
132 | 
133 |    e) Provide Installation Information, but only if you would otherwise
134 |    be required to provide such information under section 6 of the
135 |    GNU GPL, and only to the extent that such information is
136 |    necessary to install and execute a modified version of the
137 |    Combined Work produced by recombining or relinking the
138 |    Application with a modified version of the Linked Version. (If
139 |    you use option 4d0, the Installation Information must accompany
140 |    the Minimal Corresponding Source and Corresponding Application
141 |    Code. If you use option 4d1, you must provide the Installation
142 |    Information in the manner specified by section 6 of the GNU GPL
143 |    for conveying Corresponding Source.)
144 | 
145 |   5. Combined Libraries.
146 | 
147 |   You may place library facilities that are a work based on the
148 | Library side by side in a single library together with other library
149 | facilities that are not Applications and are not covered by this
150 | License, and convey such a combined library under terms of your
151 | choice, if you do both of the following:
152 | 
153 |    a) Accompany the combined library with a copy of the same work based
154 |    on the Library, uncombined with any other library facilities,
155 |    conveyed under the terms of this License.
156 | 
157 |    b) Give prominent notice with the combined library that part of it
158 |    is a work based on the Library, and explaining where to find the
159 |    accompanying uncombined form of the same work.
160 | 
161 |   6. Revised Versions of the GNU Lesser General Public License.
162 | 
163 |   The Free Software Foundation may publish revised and/or new versions
164 | of the GNU Lesser General Public License from time to time. Such new
165 | versions will be similar in spirit to the present version, but may
166 | differ in detail to address new problems or concerns.
167 | 
168 |   Each version is given a distinguishing version number. If the
169 | Library as you received it specifies that a certain numbered version
170 | of the GNU Lesser General Public License "or any later version"
171 | applies to it, you have the option of following the terms and
172 | conditions either of that published version or of any later version
173 | published by the Free Software Foundation. If the Library as you
174 | received it does not specify a version number of the GNU Lesser
175 | General Public License, you may choose any version of the GNU Lesser
176 | General Public License ever published by the Free Software Foundation.
177 | 
178 |   If the Library as you received it specifies that a proxy can decide
179 | whether future versions of the GNU Lesser General Public License shall
180 | apply, that proxy's public statement of acceptance of any version is
181 | permanent authorization for you to choose that version for the
182 | Library.
183 | 


--------------------------------------------------------------------------------
/feast.py:
--------------------------------------------------------------------------------
  1 | """
  2 |   The FEAST module provides an interface between the C-library
  3 |   for feature selection to Python. 
  4 | 
  5 |   References: 
  6 |   1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional
  7 |       likelihood maximization: A unifying framework for information
  8 |       theoretic feature selection," Journal of Machine Learning 
  9 |       Research, vol. 13, pp. 27-66, 2012.
 10 | 
 11 | """
 12 | __author__ = "Calvin Morrison"
 13 | __copyright__ = "Copyright 2013, EESI Laboratory"
 14 | __credits__ = ["Calvin Morrison", "Gregory Ditzler"]
 15 | __license__ = "GPL"
 16 | __version__ = "0.2.0"
 17 | __maintainer__ = "Calvin Morrison"
 18 | __email__ = "mutantturkey@gmail.com"
 19 | __status__ = "Release"
 20 | 
 21 | import numpy as np
 22 | import ctypes as c
 23 | 
 24 | libFSToolbox = c.CDLL("libFSToolbox.so"); 
 25 | 
 26 | def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
 27 |   """
 28 |     This algorithm implements conditional mutual information 
 29 |     feature select, such that beta and gamma control the 
 30 |     weight attached to the redundant mutual and conditional
 31 |     mutual information, respectively. 
 32 | 
 33 |       @param data: data in a Numpy array such that len(data) = 
 34 |         n_observations, and len(data.transpose()) = n_features
 35 |         (REQUIRED)
 36 |       @type data: ndarray
 37 |       @param labels: labels represented in a numpy list with 
 38 |         n_observations as the number of elements. That is 
 39 |         len(labels) = len(data) = n_observations.
 40 |         (REQUIRED)
 41 |       @type labels: ndarray
 42 |       @param n_select: number of features to select. (REQUIRED)
 43 |       @type n_select: integer
 44 |       @param beta: penalty attacted to I(X_j;X_k) 
 45 |       @type beta: float between 0 and 1.0 
 46 |       @param gamma: positive weight attached to the conditional
 47 |         redundancy term I(X_k;X_j|Y)
 48 |       @type gamma: float between 0 and 1.0 
 49 |       @return: features in the order they were selected. 
 50 |       @rtype: list
 51 |   """
 52 |   data, labels = check_data(data, labels)
 53 | 
 54 |   # python values
 55 |   n_observations, n_features = data.shape
 56 |   output = np.zeros(n_select)
 57 | 
 58 |   # cast as C types
 59 |   c_n_observations = c.c_int(n_observations)
 60 |   c_n_select = c.c_int(n_select)
 61 |   c_n_features = c.c_int(n_features)
 62 |   c_beta = c.c_double(beta)
 63 |   c_gamma = c.c_double(gamma)
 64 | 
 65 |   libFSToolbox.BetaGamma.restype = c.POINTER(c.c_double * n_select)
 66 |   features = libFSToolbox.BetaGamma(c_n_select,
 67 |                    c_n_observations,
 68 |                    c_n_features, 
 69 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
 70 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
 71 |                    output.ctypes.data_as(c.POINTER(c.c_double)),
 72 |                    c_beta,
 73 |                    c_gamma
 74 |                    )
 75 | 
 76 |   selected_features = []
 77 |   for i in features.contents:
 78 |     selected_features.append(i)
 79 |   return selected_features
 80 | 
 81 | 
 82 | def CIFE(data, labels, n_select):
 83 |   """
 84 |     This function implements the Condred feature selection algorithm.
 85 |     beta = 1; gamma = 1;
 86 | 
 87 |     @param data: A Numpy array such that len(data) = 
 88 |         n_observations, and len(data.transpose()) = n_features
 89 |     @type data: ndarray
 90 |     @param labels: labels represented in a numpy list with 
 91 |         n_observations as the number of elements. That is 
 92 |         len(labels) = len(data) = n_observations.
 93 |     @type labels: ndarray
 94 |     @param n_select:  number of features to select.
 95 |     @type n_select: integer
 96 |     @return selected_features: features in the order they were selected. 
 97 |     @rtype: list
 98 |   """
 99 |   return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
100 | 
101 | def CMIM(data, labels, n_select):
102 |   """
103 |     This function implements the conditional mutual information
104 |     maximization feature selection algorithm. Note that this 
105 |     implementation does not allow for the weighting of the 
106 |     redundancy terms that BetaGamma will allow you to do.
107 | 
108 |     @param data: A Numpy array such that len(data) = 
109 |         n_observations, and len(data.transpose()) = n_features
110 |     @type data: ndarray
111 |     @param labels: labels represented in a numpy array with 
112 |         n_observations as the number of elements. That is 
113 |         len(labels) = len(data) = n_observations.
114 |     @type labels: ndarray
115 |     @param n_select: number of features to select.
116 |     @type n_select: integer
117 |     @return: features in the order that they were selected. 
118 |     @rtype: list
119 |   """
120 |   data, labels = check_data(data, labels)
121 | 
122 |   # python values
123 |   n_observations, n_features = data.shape
124 |   output = np.zeros(n_select)
125 | 
126 |   # cast as C types
127 |   c_n_observations = c.c_int(n_observations)
128 |   c_n_select = c.c_int(n_select)
129 |   c_n_features = c.c_int(n_features)
130 | 
131 |   libFSToolbox.CMIM.restype = c.POINTER(c.c_double * n_select)
132 |   features = libFSToolbox.CMIM(c_n_select,
133 |                    c_n_observations,
134 |                    c_n_features, 
135 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
136 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
137 |                    output.ctypes.data_as(c.POINTER(c.c_double))
138 |                    )
139 | 
140 |   selected_features = []
141 |   for i in features.contents:
142 |     selected_features.append(i)
143 | 
144 |   return selected_features
145 | 
146 | 
147 | 
148 | def CondMI(data, labels, n_select):
149 |   """
150 |     This function implements the conditional mutual information
151 |     maximization feature selection algorithm. 
152 | 
153 |     @param data: data in a Numpy array such that len(data) = n_observations,
154 |        and len(data.transpose()) = n_features
155 |     @type data: ndarray
156 |     @param labels: represented in a numpy list with 
157 |       n_observations as the number of elements. That is 
158 |       len(labels) = len(data) = n_observations.
159 |     @type labels: ndarray
160 |     @param n_select: number of features to select.
161 |     @type n_select: integer
162 |     @return: features in the order they were selected. 
163 |     @rtype list
164 |   """
165 |   data, labels = check_data(data, labels)
166 | 
167 |   # python values
168 |   n_observations, n_features = data.shape
169 |   output = np.zeros(n_select)
170 | 
171 |   # cast as C types
172 |   c_n_observations = c.c_int(n_observations)
173 |   c_n_select = c.c_int(n_select)
174 |   c_n_features = c.c_int(n_features)
175 | 
176 |   libFSToolbox.CondMI.restype = c.POINTER(c.c_double * n_select)
177 |   features = libFSToolbox.CondMI(c_n_select,
178 |                    c_n_observations,
179 |                    c_n_features, 
180 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
181 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
182 |                    output.ctypes.data_as(c.POINTER(c.c_double))
183 |                    )
184 |   
185 |   selected_features = []
186 |   for i in features.contents:
187 |     selected_features.append(i)
188 | 
189 |   return selected_features
190 | 
191 | 
192 | def Condred(data, labels, n_select):
193 |   """
194 |     This function implements the Condred feature selection algorithm.
195 |     beta = 0; gamma = 1;
196 | 
197 |     @param data: data in a Numpy array such that len(data) = 
198 |         n_observations, and len(data.transpose()) = n_features
199 |     @type data: ndarray
200 |     @param labels: labels represented in a numpy list with 
201 |         n_observations as the number of elements. That is 
202 |         len(labels) = len(data) = n_observations.
203 |     @type labels: ndarray
204 |     @param n_select: number of features to select.
205 |     @type n_select: integer
206 |     @return: the features in the order they were selected. 
207 |     @rtype: list
208 |   """
209 |   data, labels = check_data(data, labels)
210 |   return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0)
211 | 
212 | 
213 | 
214 | def DISR(data, labels, n_select):
215 |   """
216 |     This function implements the double input symmetrical relevance
217 |     feature selection algorithm. 
218 | 
219 |     @param data: data in a Numpy array such that len(data) = 
220 |         n_observations, and len(data.transpose()) = n_features
221 |     @type data: ndarray
222 |     @param labels: labels represented in a numpy list with 
223 |         n_observations as the number of elements. That is 
224 |         len(labels) = len(data) = n_observations.
225 |     @type labels: ndarray
226 |     @param n_select: number of features to select. (REQUIRED)
227 |     @type n_select: integer
228 |     @return: the features in the order they were selected. 
229 |     @rtype: list
230 |   """
231 |   data, labels = check_data(data, labels)
232 | 
233 |   # python values
234 |   n_observations, n_features = data.shape
235 |   output = np.zeros(n_select)
236 | 
237 |   # cast as C types
238 |   c_n_observations = c.c_int(n_observations)
239 |   c_n_select = c.c_int(n_select)
240 |   c_n_features = c.c_int(n_features)
241 | 
242 |   libFSToolbox.DISR.restype = c.POINTER(c.c_double * n_select)
243 |   features = libFSToolbox.DISR(c_n_select,
244 |                    c_n_observations,
245 |                    c_n_features, 
246 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
247 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
248 |                    output.ctypes.data_as(c.POINTER(c.c_double))
249 |                    )
250 |   
251 |   selected_features = []
252 |   for i in features.contents:
253 |     selected_features.append(i)
254 | 
255 |   return selected_features
256 | 
257 | def ICAP(data, labels, n_select):
258 |   """
259 |     This function implements the interaction capping feature 
260 |     selection algorithm. 
261 | 
262 |     @param data: data in a Numpy array such that len(data) = 
263 |         n_observations, and len(data.transpose()) = n_features
264 |     @type data: ndarray
265 |     @param labels: labels represented in a numpy list with 
266 |         n_observations as the number of elements. That is 
267 |         len(labels) = len(data) = n_observations.
268 |     @type labels: ndarray
269 |     @param n_select: number of features to select. (REQUIRED)
270 |     @type n_select: integer
271 |     @return: the features in the order they were selected. 
272 |     @rtype: list
273 |   """
274 |   data, labels = check_data(data, labels)
275 | 
276 |   # python values
277 |   n_observations, n_features = data.shape
278 |   output = np.zeros(n_select)
279 | 
280 |   # cast as C types
281 |   c_n_observations = c.c_int(n_observations)
282 |   c_n_select = c.c_int(n_select)
283 |   c_n_features = c.c_int(n_features)
284 | 
285 |   libFSToolbox.ICAP.restype = c.POINTER(c.c_double * n_select)
286 |   features = libFSToolbox.ICAP(c_n_select,
287 |                    c_n_observations,
288 |                    c_n_features, 
289 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
290 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
291 |                    output.ctypes.data_as(c.POINTER(c.c_double))
292 |                    )
293 |   
294 |   selected_features = []
295 |   for i in features.contents:
296 |     selected_features.append(i)
297 | 
298 |   return selected_features
299 | 
300 | def JMI(data, labels, n_select):
301 |   """
302 |     This function implements the joint mutual information feature
303 |     selection algorithm. 
304 | 
305 |     @param data: data in a Numpy array such that len(data) = 
306 |         n_observations, and len(data.transpose()) = n_features
307 |     @type data: ndarray
308 |     @param labels: labels represented in a numpy list with 
309 |         n_observations as the number of elements. That is 
310 |         len(labels) = len(data) = n_observations.
311 |     @type labels: ndarray
312 |     @param n_select: number of features to select. (REQUIRED)
313 |     @type n_select: integer
314 |     @return: the features in the order they were selected. 
315 |     @rtype: list
316 |   """
317 |   data, labels = check_data(data, labels)
318 | 
319 |   # python values
320 |   n_observations, n_features = data.shape
321 |   output = np.zeros(n_select)
322 | 
323 |   # cast as C types
324 |   c_n_observations = c.c_int(n_observations)
325 |   c_n_select = c.c_int(n_select)
326 |   c_n_features = c.c_int(n_features)
327 | 
328 |   libFSToolbox.JMI.restype = c.POINTER(c.c_double * n_select)
329 |   features = libFSToolbox.JMI(c_n_select,
330 |                    c_n_observations,
331 |                    c_n_features, 
332 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
333 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
334 |                    output.ctypes.data_as(c.POINTER(c.c_double))
335 |                    )
336 | 
337 |   selected_features = []
338 |   for i in features.contents:
339 |     selected_features.append(i)
340 |   return selected_features
341 | 
342 | 
343 | 
344 | def MIFS(data, labels, n_select):
345 |   """
346 |     This function implements the MIFS algorithm.
347 |     beta = 1; gamma = 0;
348 | 
349 |     @param data: data in a Numpy array such that len(data) = 
350 |         n_observations, and len(data.transpose()) = n_features
351 |     @type data: ndarray
352 |     @param labels: labels represented in a numpy list with 
353 |         n_observations as the number of elements. That is 
354 |         len(labels) = len(data) = n_observations.
355 |     @type labels: ndarray
356 |     @param n_select: number of features to select. (REQUIRED)
357 |     @type n_select: integer
358 |     @return: the features in the order they were selected. 
359 |     @rtype: list
360 |   """
361 |   return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
362 | 
363 | 
364 | def MIM(data, labels, n_select):
365 |   """
366 |     This function implements the MIM algorithm.
367 |     beta = 0; gamma = 0;
368 | 
369 |     @param data: data in a Numpy array such that len(data) = 
370 |         n_observations, and len(data.transpose()) = n_features
371 |     @type data: ndarray
372 |     @param labels: labels represented in a numpy list with 
373 |         n_observations as the number of elements. That is 
374 |         len(labels) = len(data) = n_observations.
375 |     @type labels: ndarray
376 |     @param n_select: number of features to select. (REQUIRED)
377 |     @type n_select: integer
378 |     @return: the features in the order they were selected. 
379 |     @rtype: list
380 |   """
381 |   data, labels = check_data(data, labels)
382 |   
383 |   # python values
384 |   n_observations, n_features = data.shape
385 |   output = np.zeros(n_select)
386 | 
387 |   # cast as C types
388 |   c_n_observations = c.c_int(n_observations)
389 |   c_n_select = c.c_int(n_select)
390 |   c_n_features = c.c_int(n_features)
391 | 
392 |   libFSToolbox.MIM.restype = c.POINTER(c.c_double * n_select)
393 |   features = libFSToolbox.MIM(c_n_select,
394 |                    c_n_observations,
395 |                    c_n_features, 
396 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
397 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
398 |                    output.ctypes.data_as(c.POINTER(c.c_double))
399 |                    )
400 |   
401 |   selected_features = []
402 |   for i in features.contents:
403 |     selected_features.append(i)
404 |   return selected_features
405 | 
406 | 
407 | def mRMR(data, labels, n_select):
408 |   """
409 |     This funciton implements the max-relevance min-redundancy feature
410 |     selection algorithm. 
411 | 
412 |     @param data: data in a Numpy array such that len(data) = 
413 |         n_observations, and len(data.transpose()) = n_features
414 |     @type data: ndarray
415 |     @param labels: labels represented in a numpy list with 
416 |         n_observations as the number of elements. That is 
417 |         len(labels) = len(data) = n_observations.
418 |     @type labels: ndarray
419 |     @param n_select: number of features to select. (REQUIRED)
420 |     @type n_select: integer
421 |     @return: the features in the order they were selected. 
422 |     @rtype: list
423 |   """
424 |   data, labels = check_data(data, labels)
425 | 
426 |   # python values
427 |   n_observations, n_features = data.shape
428 |   output = np.zeros(n_select)
429 | 
430 |   # cast as C types
431 |   c_n_observations = c.c_int(n_observations)
432 |   c_n_select = c.c_int(n_select)
433 |   c_n_features = c.c_int(n_features)
434 | 
435 |   libFSToolbox.mRMR_D.restype = c.POINTER(c.c_double * n_select)
436 |   features = libFSToolbox.mRMR_D(c_n_select,
437 |                    c_n_observations,
438 |                    c_n_features, 
439 |                    data.ctypes.data_as(c.POINTER(c.c_double)),
440 |                    labels.ctypes.data_as(c.POINTER(c.c_double)),
441 |                    output.ctypes.data_as(c.POINTER(c.c_double))
442 |                    )
443 | 
444 |   selected_features = []
445 |   for i in features.contents:
446 |     selected_features.append(i)
447 |   return selected_features
448 | 
449 | def check_data(data, labels):
450 |   """
451 |     Check dimensions of the data and the labels.  Raise and exception
452 |     if there is a problem.
453 | 
454 |     Data and Labels are automatically cast as doubles before calling the 
455 |     feature selection functions
456 | 
457 |     @param data: the data 
458 |     @param labels: the labels
459 |     @return (data, labels): ndarray of floats
460 |     @rtype: tuple
461 |   """
462 | 
463 |   if isinstance(data, np.ndarray) is False:
464 |     raise Exception("data must be an numpy ndarray.")
465 |   if isinstance(labels, np.ndarray) is False:
466 |     raise Exception("labels must be an numpy ndarray.")
467 | 
468 |   if len(data) != len(labels):
469 |     raise Exception("data and labels must be the same length")
470 | 
471 |   return 1.0*np.array(data, order="F"), 1.0*np.array(labels, order="F")
472 | 


--------------------------------------------------------------------------------