├── .gitignore
├── AUTHORS.txt
├── LICENCE.txt
├── MANIFEST
├── MANIFEST.in
├── README.md
├── adenine
    ├── __init__.py
    ├── ade_config.py
    ├── cluster
    │   ├── __init__.py
    │   ├── agglomerative.py
    │   └── optics.py
    ├── core
    │   ├── __init__.py
    │   ├── analyze_results.py
    │   ├── define_pipeline.py
    │   ├── job_distribution.py
    │   ├── pipelines.py
    │   ├── plotting.py
    │   └── template
    │   │   ├── __init__.py
    │   │   ├── d3_template.py
    │   │   └── svg-crowbar.js
    ├── examples
    │   ├── ade_config.py
    │   └── data
    │   │   ├── X.csv
    │   │   ├── X.npy
    │   │   ├── X_missing.csv
    │   │   ├── Y_missing_test.csv
    │   │   ├── y.csv
    │   │   └── y.npy
    ├── externals
    │   ├── __init__.py
    │   └── hierarchical.py
    ├── test
    │   ├── X_missing.csv
    │   ├── Y_missing_test.csv
    │   ├── carttest.py
    │   ├── imputing_test.py
    │   └── imputing_test_lite.py
    └── utils
    │   ├── GEO2csv.py
    │   ├── __init__.py
    │   ├── data_source.py
    │   ├── extensions.py
    │   ├── extra.py
    │   ├── scores.py
    │   └── templates.py
├── doc
    ├── GiHubProjectPage.txt
    ├── Makefile
    ├── devPlan
    │   ├── plan.pdf
    │   └── plan.tex
    └── source
    │   ├── adenine_logo.pdf
    │   ├── adenine_logo.png
    │   ├── conf.py
    │   ├── dependencies.txt
    │   ├── drawing.svg
    │   ├── index.rst
    │   ├── modules.rst
    │   ├── slipGURUTheme
    │       ├── layout.html
    │       ├── static
    │       │   ├── logos.png
    │       │   └── slipGuru.css
    │       └── theme.conf
    │   ├── sphinxext
    │       ├── numpydoc
    │       │   ├── LICENSE.txt
    │       │   ├── MANIFEST.in
    │       │   ├── PKG-INFO
    │       │   ├── README.txt
    │       │   ├── __init__.py
    │       │   ├── comment_eater.py
    │       │   ├── compiler_unparse.py
    │       │   ├── docscrape.py
    │       │   ├── docscrape_sphinx.py
    │       │   ├── numpydoc.py
    │       │   ├── phantom_import.py
    │       │   ├── plot_directive.py
    │       │   ├── setup.cfg
    │       │   ├── setup.py
    │       │   ├── tests
    │       │   │   └── test_docscrape.py
    │       │   └── traitsdoc.py
    │       └── sphinxcontrib
    │       │   ├── __init__.py
    │       │   ├── programoutput.py
    │       │   └── spelling.py
    │   └── tutorial.rst
├── icon.png
├── requirements.txt
├── scripts
    ├── ade_GEO2csv.py
    ├── ade_analysis.py
    └── ade_run.py
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # OSX stuff
  2 | *.DS_Store
  3 | 
  4 | # Archivers
  5 | **/*.tar.gz
  6 | # -------------------------- Python -------------------------- #
  7 | 
  8 | # Jupyter Notebook checkpoints
  9 | *-checkpoint.ipynb
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # Temp
 17 | *~
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | env/
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | # Numpy files
 40 | #*.npy
 41 | 
 42 | # Dump files
 43 | *.pkl
 44 | 
 45 | # Images
 46 | *.png
 47 | !*adenine_logo.png
 48 | !icon.png
 49 | 
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *,cover
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # -------------------------- TeX -------------------------- #
 85 | 
 86 | *.aux
 87 | *.glo
 88 | *.idx
 89 | *.log
 90 | *.toc
 91 | *.ist
 92 | *.acn
 93 | *.acr
 94 | *.alg
 95 | *.bbl
 96 | *.blg
 97 | *.dvi
 98 | *.glg
 99 | *.gls
100 | *.ilg
101 | *.ind
102 | *.lof
103 | *.lot
104 | *.maf
105 | *.mtc
106 | *.mtc1
107 | *.out
108 | *.synctex.gz 
109 | 
110 | # -------------------------- results -------------------------- #
111 | **/results/**/*
112 | 
113 | # --- LaTeX --- #
114 | ## Core latex/pdflatex auxiliary files:
115 | *.aux
116 | *.lof
117 | *.log
118 | *.lot
119 | **.fls
120 | *.out
121 | *.toc
122 | *.fmt
123 | *.fot
124 | *.cb
125 | *.cb2
126 | 
127 | ## Intermediate documents:
128 | *.dvi
129 | *-converted-to.*
130 | # these rules might exclude image files for figures etc.
131 | # *.ps
132 | # *.eps
133 | #*.pdf
134 | 
135 | 
136 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
137 | *.bbl
138 | *.bcf
139 | *.blg
140 | *-blx.aux
141 | *-blx.bib
142 | *.brf
143 | *.run.xml
144 | 
145 | ## Build tool auxiliary files:
146 | *.fdb_latexmk
147 | .synctex
148 | .synctex.gz
149 | .synctex.gz(busy)
150 | *.pdfsync
151 | 
152 | ## Auxiliary and intermediate files from other packages:
153 | # algorithms
154 | *.alg
155 | *.loa
156 | 
157 | # achemso
158 | acs-*.bib
159 | 
160 | # amsthm
161 | *.thm
162 | 
163 | # beamer
164 | *.nav
165 | *.snm
166 | *.vrb
167 | 
168 | # cprotect
169 | *.cpt
170 | 
171 | # fixme
172 | *.lox
173 | 
174 | #(r)(e)ledmac/(r)(e)ledpar
175 | *.end
176 | *.?end
177 | *.[1-9]
178 | *.[1-9][0-9]
179 | *.[1-9][0-9][0-9]
180 | *.[1-9]R
181 | *.[1-9][0-9]R
182 | *.[1-9][0-9][0-9]R
183 | *.eledsec[1-9]
184 | *.eledsec[1-9]R
185 | *.eledsec[1-9][0-9]
186 | *.eledsec[1-9][0-9]R
187 | *.eledsec[1-9][0-9][0-9]
188 | *.eledsec[1-9][0-9][0-9]R
189 | 
190 | # glossaries
191 | *.acn
192 | *.acr
193 | *.glg
194 | *.glo
195 | *.gls
196 | *.glsdefs
197 | 
198 | # gnuplottex
199 | *-gnuplottex-*
200 | 
201 | # hyperref
202 | *.brf
203 | 
204 | # knitr
205 | *-concordance.tex
206 | # TODO Comment the next line if you want to keep your tikz graphics files
207 | *.tikz
208 | *-tikzDictionary
209 | 
210 | # listings
211 | *.lol
212 | 
213 | # makeidx
214 | *.idx
215 | *.ilg
216 | *.ind
217 | *.ist
218 | 
219 | # minitoc
220 | *.maf
221 | *.mlf
222 | *.mlt
223 | *.mtc
224 | *.mtc[0-9]
225 | *.mtc[1-9][0-9]
226 | 
227 | # minted
228 | _minted*
229 | *.pyg
230 | 
231 | # morewrites
232 | *.mw
233 | 
234 | # mylatexformat
235 | *.fmt
236 | 
237 | # nomencl
238 | *.nlo
239 | 
240 | # sagetex
241 | *.sagetex.sage
242 | *.sagetex.py
243 | *.sagetex.scmd
244 | 
245 | # sympy
246 | *.sout
247 | *.sympy
248 | sympy-plots-for-*.tex/
249 | 
250 | # pdfcomment
251 | *.upa
252 | *.upb
253 | 
254 | # pythontex
255 | *.pytxcode
256 | pythontex-files-*/
257 | 
258 | # thmtools
259 | *.loe
260 | 
261 | # TikZ & PGF
262 | *.dpth
263 | *.md5
264 | *.auxlock
265 | 
266 | # todonotes
267 | *.tdo
268 | 
269 | # xindy
270 | *.xdy
271 | 
272 | # xypic precompiled matrices
273 | *.xyc
274 | 
275 | # endfloat
276 | *.ttt
277 | *.fff
278 | 
279 | # Latexian
280 | TSWLatexianTemp*
281 | 
282 | ## Editors:
283 | # WinEdt
284 | *.bak
285 | *.sav
286 | 
287 | # Texpad
288 | .texpadtmp
289 | 
290 | # Kile
291 | *.backup
292 | 
293 | # KBibTeX
294 | *~[0-9]*
295 | 


--------------------------------------------------------------------------------
/AUTHORS.txt:
--------------------------------------------------------------------------------
1 | Samuele Fiorini [samuele dot fiorini at dibris dot unige dot it]
2 | Federico Tomasi [federico dot tomasi at dibris dot unige dot it]
3 | Annalisa Barla [annalisa dot barla at unige dot it]
4 | 


--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
 1 | =======================================================================================
 2 |     Samuele Fiorini [samuele dot fiorini at dibris dot unige dot it]
 3 |     Federico Tomasi [federico dot tomasi at dibris dot unige dot it]
 4 |     Annalisa Barla [annalisa dot barla at unige dot it]
 5 |  
 6 |     This file is part of adenine.
 7 |     
 8 |     The code is released under the BSD 2-Clause (FreeBSD) License.
 9 | 
10 |     Copyright (c) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla.
11 |     All rights reserved.
12 |     
13 |     Redistribution and use in source and binary forms, with or without
14 |     modification, are permitted provided that the following conditions are met:
15 |     
16 |         -   Redistributions of source code must retain the above copyright notice, 
17 |             this list of conditions and the following disclaimer.
18 |         -   Redistributions in binary form must reproduce the above copyright 
19 |             notice, this list of conditions and the following disclaimer in the 
20 |             documentation and/or other materials provided with the distribution.
21 |     
22 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 |     IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26 |     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 |     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 |     PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 |     WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 |     POSSIBILITY OF SUCH DAMAGE.
32 | =======================================================================================
33 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | # file GENERATED by distutils, do NOT edit
 2 | README.md
 3 | setup.cfg
 4 | setup.py
 5 | adenine/__init__.py
 6 | adenine/ade_config.py
 7 | adenine/core/__init__.py
 8 | adenine/core/analyze_results.py
 9 | adenine/core/define_pipeline.py
10 | adenine/core/job_distribution.py
11 | adenine/core/pipelines.py
12 | adenine/core/plotting.py
13 | adenine/externals/__init__.py
14 | adenine/externals/hierarchical.py
15 | adenine/utils/__init__.py
16 | adenine/utils/data_source.py
17 | adenine/utils/extensions.py
18 | adenine/utils/extra.py
19 | adenine/utils/scores.py
20 | adenine/utils/templates.py
21 | scripts/ade_analysis.py
22 | scripts/ade_run.py
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.txt
3 | include adenine/examples
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img src="http://www.slipguru.unige.it/Software/adenine/_static/ade_logo_bitmap.png"><br><br>
  3 | </p>
  4 | 
  5 | -----------------
  6 | 
  7 | # Adenine: A data exploration pipeline
  8 | 
  9 | **adenine** is a machine learning and data mining Python library for exploratory data analysis.
 10 | 
 11 | The main structure of **adenine** can be summarized in the following 4 steps.
 12 | 
 13 | 1. **Imputing:** Does your dataset have missing entries? In the first step you can fill the missing values choosing between different strategies: feature-wise median, mean and most frequent value or k-NN imputing.
 14 | 
 15 | 2. **Preprocessing:** Have you ever wondered what would have changed if only your data have been preprocessed in a different way? Or is it data preprocessing a good idea after all? **adenine** includes several preprocessing procedures, such as: data recentering, Min-Max scaling, standardization and normalization. **adenine** also allows you to compare the results of the analysis made with different preprocessing strategies.
 16 | 
 17 | 3. **Dimensionality Reduction:** In the context of data exploration, this phase becomes particularly helpful for high dimensional data. This step includes manifold learning (such as isomap, multidimensional scaling, etc) and unsupervised feature learning (principal component analysis, kernel PCA, Bernoulli RBM, etc) techniques.
 18 | 
 19 | 4. **Clustering:** This step aims at grouping data into clusters in an unsupervised manner. Several techniques such as k-means, spectral or hierarchical clustering are offered.
 20 | 
 21 | The final output of **adenine** is a compact, textual and graphical representation of the results obtained from the pipelines made with each possible combination of the algorithms selected at each step.
 22 | 
 23 | **adenine** can run on multiple cores/machines* and it is fully `scikit-learn` compliant.
 24 | 
 25 | ## Installation
 26 | 
 27 | **adenine** supports Python 2.7
 28 | 
 29 | ### Pip installation
 30 | `$ pip install adenine`
 31 | 
 32 | ### Installing from sources
 33 | ```bash
 34 | $ git clone https://github.com/slipguru/adenine
 35 | $ cd adenine
 36 | $ python setup.py install
 37 | ```
 38 | 
 39 | ## Try Adenine
 40 | 
 41 | ### 1. Create your configuration file
 42 | Start from the provided template and edit your configuration file with your favourite text editor
 43 | ```bash
 44 | $ ade_run.py -c my-config-file.py
 45 | $ vim my-config-file.py
 46 | ...
 47 | ```
 48 | ```python
 49 | from adenine.utils import data_source
 50 | 
 51 | # --------------------------  EXPERMIENT INFO ------------------------- #
 52 | exp_tag = '_experiment'
 53 | output_root_folder = 'results'
 54 | plotting_context = 'notebook'  # one of {paper, notebook, talk, poster}
 55 | file_format = 'pdf'  # or 'png'
 56 | 
 57 | # ----------------------------  INPUT DATA ---------------------------- #
 58 | # Load an example dataset or specify your input data in tabular format
 59 | X, y, feat_names, index = data_source.load('iris')
 60 | 
 61 | # -----------------------  PIPELINES DEFINITION ------------------------ #
 62 | # --- Missing Values Imputing --- #
 63 | step0 = {'Impute': [True, {'missing_values': 'NaN',
 64 |                             'strategy': ['nearest_neighbors']}]}
 65 | 
 66 | # --- Data Preprocessing --- #
 67 | step1 = {'MinMax': [True, {'feature_range': [(0, 1)]}]}
 68 | 
 69 | # --- Unsupervised feature learning --- #
 70 | step2 = {'KernelPCA': [True, {'kernel': ['linear', 'rbf', 'poly']}],
 71 |          'Isomap': [False, {'n_neighbors': 5}],
 72 |          'MDS': [True, {'metric': True}],
 73 |          'tSNE': [False],
 74 |          'RBM': [True, {'n_components': 256}]
 75 |          }
 76 | 
 77 | # --- Clustering --- #
 78 | # affinity ca be precumputed for AP, Spectral and Hierarchical
 79 | step3 = {'KMeans': [True, {'n_clusters': [3, 'auto']}],
 80 |          'Spectral': [False, {'n_clusters': [3]}],
 81 |          'Hierarchical': [False, {'n_clusters': [3],
 82 |                                   'affinity': ['euclidean'],
 83 |                                   'linkage':  ['ward', 'average']}]
 84 |          }
 85 | ```
 86 | 
 87 | ### 2. Run the pipelines
 88 | ```bash
 89 | $ ade_run.py my-config-file.py
 90 | ```
 91 | 
 92 | ### 3. Automatically generate beautiful publication-ready plots and textual results
 93 | ```bash
 94 | $ ade_analysis.py results/ade_experiment_<TODAY>
 95 | ```
 96 | 
 97 | ## Need more info?
 98 | Check out the project [homepage](http://slipguru.github.io/adenine/index.html)
 99 | 
100 | ## *Got large-scale data?
101 | 
102 | **adenine** takes advantage of `mpi4py` to distribute the execution of the pipelines on HPC architectures
103 | ```bash
104 | $ mpirun -np <MPI-TASKS> --hosts <HOSTS-LIST> ade_run.py my-config-file.py
105 | ```
106 | 
107 | ## Citation
108 | 
109 | If you use **adenine** in a scientific publication, we would appreciate citations:
110 | ```tex
111 | @{coming soon}
112 | ```
113 | 


--------------------------------------------------------------------------------
/adenine/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################################################################
 2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 3 | #
 4 | # FreeBSD License
 5 | ######################################################################
 6 | 
 7 | __version__ = "0.1.4"
 8 | 
 9 | from adenine import utils
10 | from adenine import core
11 | from adenine.core import main
12 | 


--------------------------------------------------------------------------------
/adenine/ade_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Configuration file for adenine."""
 4 | 
 5 | from adenine.utils import data_source
 6 | 
 7 | # --------------------------  EXPERMIENT INFO ------------------------- #
 8 | exp_tag = '_experiment'
 9 | output_root_folder = 'results'
10 | plotting_context = 'notebook'  # one of {paper, notebook, talk, poster}
11 | file_format = 'pdf'  # or 'png'
12 | use_compression = False  # use gzip to compress the results
13 | 
14 | # ----------------------------  INPUT DATA ---------------------------- #
15 | # Load an example dataset or specify your input data in tabular format
16 | data_file = 'data.csv'
17 | labels_file = 'labels.csv'  # OPTIONAL
18 | samples_on = 'rows'  # if samples lie on columns use 'cols' or 'col'
19 | data_sep = ','  # the data separator. e.g., ',', '\t', ' ', ...
20 | X, y, feat_names, index = data_source.load('custom',
21 |                                            data_file, labels_file,
22 |                                            samples_on=samples_on,
23 |                                            sep=data_sep)
24 | 
25 | # -----------------------  PIPELINES DEFINITION ------------------------ #
26 | # --- Missing values imputing --- #
27 | step0 = {'Impute': [False, {'missing_values': 'NaN',
28 |                             'strategy': ['median',
29 |                                          'mean',
30 |                                          'nearest_neighbors']}]}
31 | 
32 | # --- Data preprocessing --- #
33 | step1 = {'None': [False], 'Recenter': [False], 'Standardize': [False],
34 |          'Normalize': [False, {'norm': ['l1', 'l2']}],
35 |          'MinMax': [False, {'feature_range': [(0, 1), (-1, 1)]}]}
36 | 
37 | # --- Unsupervised features learning --- #
38 | # affinity ca be precumputed for SE
39 | step2 = {'PCA': [False, {'n_components': 3}],
40 |          'IncrementalPCA': [False],
41 |          'RandomizedPCA': [False],
42 |          'KernelPCA': [False, {'kernel': ['linear', 'rbf', 'poly']}],
43 |          'Isomap': [False, {'n_neighbors': 5}],
44 |          'LLE': [False, {'n_neighbors': 5,
45 |                          'method': ['standard', 'modified',
46 |                                     'hessian', 'ltsa']}],
47 |          'SE': [False, {'affinity': ['nearest_neighbors', 'rbf']}],
48 |          'MDS': [False, {'metric': True}],
49 |          'tSNE': [False],
50 |          'RBM': [False, {'n_components': 256}],
51 |          'None': [False]
52 |          }
53 | 
54 | # --- Clustering --- #
55 | # affinity ca be precumputed for AP, Spectral and Hierarchical
56 | step3 = {'KMeans': [False, {'n_clusters': [3, 'auto']}],
57 |          'AP': [False, {'preference': ['auto']}],
58 |          'MS': [False],
59 |          'Spectral': [False, {'n_clusters': [3, 8]}],
60 |          'Hierarchical': [False, {'n_clusters': [3, 8],
61 |                                   'affinity': ['manhattan', 'euclidean'],
62 |                                   'linkage':  ['ward', 'complete', 'average']}]
63 |          }
64 | 


--------------------------------------------------------------------------------
/adenine/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | from adenine.cluster.optics import Optics
2 | from adenine.cluster.agglomerative import AgglomerativeClustering
3 | 


--------------------------------------------------------------------------------
/adenine/cluster/agglomerative.py:
--------------------------------------------------------------------------------
  1 | """Agglomerative clustering class extension."""
  2 | import logging
  3 | import numpy as np
  4 | from sklearn.externals.joblib import Memory
  5 | from adenine.externals import AgglomerativeClustering
  6 | 
  7 | 
  8 | class AgglomerativeClustering(AgglomerativeClustering):
  9 |     """Extension of sklearn Agglomerative Clustering.
 10 | 
 11 |     This Agglomerative Clustering class, if required, can perform automatic
 12 |     discovery of the number of clusters.
 13 |     """
 14 | 
 15 |     def __init__(self, n_clusters=2, affinity="euclidean",
 16 |                  memory=Memory(cachedir=None, verbose=0),
 17 |                  connectivity=None, n_components=None,
 18 |                  compute_full_tree='auto', linkage='ward',
 19 |                  pooling_func=np.mean, return_distance=False):
 20 |         """Agglomerative Clustering.
 21 | 
 22 |         Recursively merges the pair of clusters that minimally increases
 23 |         a given linkage distance.
 24 | 
 25 |         Read more in the :ref:`User Guide <hierarchical_clustering>`.
 26 | 
 27 |         Parameters
 28 |         ----------
 29 |         n_clusters : int, default=2
 30 |             The number of clusters to find.
 31 | 
 32 |         connectivity : array-like or callable, optional
 33 |             Connectivity matrix. Defines for each sample the neighboring
 34 |             samples following a given structure of the data.
 35 |             This can be a connectivity matrix itself or a callable that
 36 |             transforms the data into a connectivity matrix, such as derived
 37 |             from kneighbors_graph. Default is None, i.e, the
 38 |             hierarchical clustering algorithm is unstructured.
 39 | 
 40 |         affinity : string or callable, default: "euclidean"
 41 |             Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
 42 |             "manhattan", "cosine", or 'precomputed'.
 43 |             If linkage is "ward", only "euclidean" is accepted.
 44 | 
 45 |         memory : Instance of joblib.Memory or string (optional)
 46 |             Used to cache the output of the computation of the tree.
 47 |             By default, no caching is done. If a string is given, it is the
 48 |             path to the caching directory.
 49 | 
 50 |         n_components : int (optional)
 51 |             Number of connected components. If None the number of connected
 52 |             components is estimated from the connectivity matrix.
 53 |             NOTE: This parameter is now directly determined from the
 54 |             connectivity matrix and will be removed in 0.18
 55 | 
 56 |         compute_full_tree : bool or 'auto' (optional)
 57 |             Stop early the construction of the tree at n_clusters. This is
 58 |             useful to decrease computation time if the number of clusters is
 59 |             not small compared to the number of samples. This option is
 60 |             useful only when specifying a connectivity matrix. Note also that
 61 |             when varying the number of clusters and using caching, it may
 62 |             be advantageous to compute the full tree.
 63 | 
 64 |         linkage : {"ward", "complete", "average"}, optional, default: "ward"
 65 |             Which linkage criterion to use. The linkage criterion determines
 66 |             which distance to use between sets of observation. The algorithm
 67 |             will merge the pairs of cluster that minimize this criterion.
 68 | 
 69 |             - ward minimizes the variance of the clusters being merged.
 70 |             - average uses the average of the distances of each observation of
 71 |               the two sets.
 72 |             - complete or maximum linkage uses the maximum distances between
 73 |               all observations of the two sets.
 74 | 
 75 |         pooling_func : callable, default=np.mean
 76 |             This combines the values of agglomerated features into a single
 77 |             value, and should accept an array of shape [M, N] and the keyword
 78 |             argument ``axis=1``, and reduce it to an array of size [M].
 79 | 
 80 |         Attributes
 81 |         ----------
 82 |         labels_ : array [n_samples]
 83 |             cluster labels for each point
 84 | 
 85 |         n_leaves_ : int
 86 |             Number of leaves in the hierarchical tree.
 87 | 
 88 |         n_components_ : int
 89 |             The estimated number of connected components in the graph.
 90 | 
 91 |         children_ : array-like, shape (n_nodes-1, 2)
 92 |             The children of each non-leaf node. Values less than `n_samples`
 93 |             correspond to leaves of the tree which are the original samples.
 94 |             A node `i` greater than or equal to `n_samples` is a non-leaf
 95 |             node and has children `children_[i - n_samples]`. Alternatively
 96 |             at the i-th iteration, children[i][0] and children[i][1]
 97 |             are merged to form node `n_samples + i`
 98 | 
 99 |         """
100 |         super(AgglomerativeClustering, self). __init__(
101 |             n_clusters, affinity,
102 |             memory, connectivity, n_components,
103 |             compute_full_tree, linkage,
104 |             pooling_func, return_distance)
105 | 
106 |     def fit(self, X, **kwargs):
107 |         """Fit the hierarchical clustering on the data.
108 | 
109 |         Parameters
110 |         ----------
111 |         X : array-like, shape = [n_samples, n_features]
112 |             The samples a.k.a. observations.
113 | 
114 |         Returns
115 |         -------
116 |         self
117 |         """
118 |         if self.n_clusters == 'auto':
119 |             # assign an arbitrary high number for the max number of clusters
120 |             self.n_clusters = int(.75 * X.shape[0])
121 |         super(AgglomerativeClustering, self).fit(X, **kwargs)
122 |         try:
123 |             # use self.distances
124 |             # TODO
125 |             raise NotImplementedError()
126 |         except AttributeError:
127 |             logging.error("Automatic discovery of the number of clusters "
128 |                           "cannot be performed. AgglomerativeClustering from "
129 |                           "adenine.external does not contain a "
130 |                           "`self.distances` attribute. Try to update adenine.")
131 |         # hence, when optimal_clusters is defined, use it
132 |         optimal_clusters = -1  # TODO
133 |         self.n_clusters = optimal_clusters
134 |         # perform the standard fit
135 |         super(AgglomerativeClustering, self).fit(X, **kwargs)
136 | 


--------------------------------------------------------------------------------
/adenine/core/__init__.py:
--------------------------------------------------------------------------------
1 | ######################################################################
2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
3 | #
4 | # FreeBSD License
5 | ######################################################################
6 | 
7 | from adenine.core.job_distribution import main
8 | 


--------------------------------------------------------------------------------
/adenine/core/job_distribution.py:
--------------------------------------------------------------------------------
  1 | """Master slave."""
  2 | from __future__ import print_function
  3 | import os
  4 | import imp
  5 | import logging
  6 | import shutil
  7 | import gzip
  8 | import numpy as np
  9 | 
 10 | from collections import deque
 11 | from six.moves import cPickle as pkl
 12 | 
 13 | from adenine.core import define_pipeline
 14 | from adenine.core.pipelines import pipe_worker
 15 | from adenine.utils import extra
 16 | 
 17 | try:
 18 |     from mpi4py import MPI
 19 | 
 20 |     COMM = MPI.COMM_WORLD
 21 |     RANK = COMM.Get_rank()
 22 |     NAME = MPI.Get_processor_name()
 23 | 
 24 |     IS_MPI_JOB = COMM.Get_size() > 1
 25 | 
 26 | except ImportError:
 27 |     # print("mpi4py module not found. MPI job distribution disabled.")
 28 |     COMM = None
 29 |     RANK = 0
 30 |     NAME = 'localhost'
 31 | 
 32 |     IS_MPI_JOB = False
 33 | 
 34 | # MAX_RESUBMISSIONS = 2
 35 | # constants to use as tags in communications
 36 | DO_WORK = 100
 37 | EXIT = 200
 38 | 
 39 | 
 40 | def master_single_machine(pipes, X):
 41 |     """Fit and transform/predict some pipelines on some data (single machine).
 42 | 
 43 |     This function fits each pipeline in the input list on the provided data.
 44 |     The results are dumped into a pkl file as a dictionary of dictionaries of
 45 |     the form {'pipe_id': {'stepID' : [alg_name, level, params, data_out,
 46 |     data_in, model_obj, voronoi_suitable_object], ...}, ...}. The model_obj is
 47 |     the sklearn model which has been fit on the dataset, the
 48 |     voronoi_suitable_object is the very same model but fitted on just the first
 49 |     two dimensions of the dataset. If a pipeline fails for some reasons the
 50 |     content of the stepID key is a list of np.nan.
 51 | 
 52 |     Parameters
 53 |     -----------
 54 |     pipes : list of list of tuples
 55 |         Each tuple contains a label and a sklearn Pipeline object.
 56 |     X : array of float, shape : n_samples x n_features, default : ()
 57 |         The input data matrix.
 58 | 
 59 |     Returns
 60 |     -----------
 61 |     pipes_dump : dict
 62 |         Dictionary with the results of the computation.
 63 |     """
 64 |     import multiprocessing as mp
 65 |     jobs = []
 66 |     manager = mp.Manager()
 67 |     pipes_dump = manager.dict()
 68 | 
 69 |     # Submit jobs
 70 |     for i, pipe in enumerate(pipes):
 71 |         pipe_id = 'pipe' + str(i)
 72 |         proc = mp.Process(target=pipe_worker,
 73 |                           args=(pipe_id, pipe, pipes_dump, X))
 74 |         jobs.append(proc)
 75 |         proc.start()
 76 |         logging.info("Job: %s submitted", pipe_id)
 77 | 
 78 |     # Collect results
 79 |     count = 0
 80 |     for proc in jobs:
 81 |         proc.join()
 82 |         count += 1
 83 |     logging.info("%d jobs collected", count)
 84 | 
 85 |     # import joblib as jl
 86 |     # jl.Parallel(n_jobs=-1) \
 87 |     #     (jl.delayed(pipe_worker)(
 88 |     #         'pipe' + str(i), pipe, pipes_dump, X) for i, pipe in enumerate(
 89 |     #             pipes))
 90 | 
 91 |     return dict(pipes_dump)
 92 | 
 93 | 
 94 | @extra.timed
 95 | def master(config):
 96 |     """Distribute pipelines with mpi4py or multiprocessing."""
 97 |     # Pipeline definition
 98 |     pipes = define_pipeline.parse_steps(
 99 |         [config.step0, config.step1,
100 |          config.step2, config.step3])
101 | 
102 |     if not IS_MPI_JOB:
103 |         return master_single_machine(pipes, config.X)
104 | 
105 |     # RUN PIPELINES
106 |     nprocs = COMM.Get_size()
107 |     # print(NAME + ": start running slaves", nprocs, NAME)
108 |     queue = deque(list(enumerate(pipes)))
109 | 
110 |     pipe_dump = dict()
111 |     count = 0
112 |     n_pipes = len(queue)
113 | 
114 |     # seed the slaves by sending work to each processor
115 |     for rankk in range(1, min(nprocs, n_pipes)):
116 |         pipe_tuple = queue.popleft()
117 |         COMM.send(pipe_tuple, dest=rankk, tag=DO_WORK)
118 |         # print(NAME + ": send to rank", rankk)
119 | 
120 |     # loop until there's no more work to do. If queue is empty skips the loop.
121 |     while queue:
122 |         pipe_tuple = queue.popleft()
123 |         # receive result from slave
124 |         status = MPI.Status()
125 |         pipe_id, step_dump = COMM.recv(
126 |             source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
127 |         pipe_dump[pipe_id] = step_dump
128 |         count += 1
129 |         # send to the same slave new work
130 |         COMM.send(pipe_tuple, dest=status.source, tag=DO_WORK)
131 | 
132 |     # there's no more work to do, so receive all the results from the slaves
133 |     for rankk in range(1, min(nprocs, n_pipes)):
134 |         # print(NAME + ": master - waiting from", rankk)
135 |         status = MPI.Status()
136 |         pipe_id, step_dump = COMM.recv(
137 |             source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
138 |         pipe_dump[pipe_id] = step_dump
139 |         count += 1
140 | 
141 |     # tell all the slaves to exit by sending an empty message with the EXIT_TAG
142 |     for rankk in range(1, nprocs):
143 |         # print(NAME + ": master - killing", rankk)
144 |         COMM.send(0, dest=rankk, tag=EXIT)
145 | 
146 |     # print(NAME + ": terminating master")
147 |     return pipe_dump
148 | 
149 | 
150 | def slave(X):
151 |     """Pipeline evaluation.
152 | 
153 |     Parameters
154 |     ----------
155 |     X : array of float, shape : n_samples x n_features, default : ()
156 |         The input data matrix.
157 |     """
158 |     try:
159 |         while True:
160 |             status_ = MPI.Status()
161 |             received = COMM.recv(source=0, tag=MPI.ANY_TAG, status=status_)
162 |             # check the tag of the received message
163 |             if status_.tag == EXIT:
164 |                 return
165 |             # do the work
166 |             i, pipe = received
167 |             # print(NAME + ": slave received", RANK, i)
168 |             pipe_id = 'pipe' + str(i)
169 |             step_dump = pipe_worker(
170 |                 pipe_id, pipe, None, X)
171 |             COMM.send((pipe_id, step_dump), dest=0, tag=0)
172 | 
173 |     except StandardError as exc:
174 |         print("Quitting ... TB:", str(exc))
175 | 
176 | 
177 | def main(config_file):
178 |     """Generate the pipelines."""
179 | 
180 |     if RANK == 0:
181 |         # Load the configuration file
182 |         config_path = os.path.abspath(config_file)
183 | 
184 |         # For some reason, it must be atomic
185 |         imp.acquire_lock()
186 |         config = imp.load_source('ade_config', config_path)
187 |         imp.release_lock()
188 | 
189 |     # this barrier prevents the slave to re-download the same GEO
190 |     # dataset if not locally present
191 |     if IS_MPI_JOB:
192 |         # Wait for all jobs to end
193 |         COMM.barrier()
194 | 
195 |     if RANK != 0:
196 |         # Load the configuration file
197 |         config_path = os.path.abspath(config_file)
198 | 
199 |         # For some reason, it must be atomic
200 |         imp.acquire_lock()
201 |         config = imp.load_source('ade_config', config_path)
202 |         imp.release_lock()
203 | 
204 |     if hasattr(config, 'use_compression'):
205 |         use_compression = config.use_compression
206 |     else:
207 |         use_compression = False
208 | 
209 |     extra.set_module_defaults(
210 |         config, {
211 |             'step0': {'Impute': [False]},
212 |             'step1': {'None': [True]},
213 |             'step2': {'None': [True]},
214 |             'step3': {'None': [False]},
215 |             'exp_tag': 'debug',
216 |             'output_root_folder': 'results',
217 |             'verbose': False})
218 | 
219 |     # Read the variables from the config file
220 |     X = config.X
221 | 
222 |     if RANK == 0:
223 |         # Get the experiment tag and the output root folder
224 |         exp_tag, root = config.exp_tag, config.output_root_folder
225 |         if not os.path.exists(root):
226 |             os.makedirs(root)
227 | 
228 |         filename = '_'.join(('ade', exp_tag, extra.get_time()))
229 |         logfile = os.path.join(root, filename + '.log')
230 |         logging.basicConfig(filename=logfile, level=logging.INFO, filemode='w',
231 |                             format='%(levelname)s (%(name)s): %(message)s')
232 |         root_logger = logging.getLogger()
233 |         lsh = logging.StreamHandler()
234 |         lsh.setLevel(logging.DEBUG if config.verbose else logging.ERROR)
235 |         lsh.setFormatter(
236 |             logging.Formatter('%(levelname)s (%(name)s): %(message)s'))
237 |         root_logger.addHandler(lsh)
238 |         pipes_dump = master(config)
239 |     else:
240 |         slave(X)
241 | 
242 |     if IS_MPI_JOB:
243 |         # Wait for all jobs to end
244 |         COMM.barrier()
245 | 
246 |     if RANK == 0:
247 |         # Output Name
248 |         outfile = filename
249 |         outfolder = os.path.join(root, outfile)
250 | 
251 |         # Create exp folder into the root folder
252 |         os.makedirs(outfolder)
253 | 
254 |         # pkl Dump
255 |         logging.info('Saving Adenine results...')
256 |         if use_compression:
257 |             with gzip.open(os.path.join(outfolder, outfile + '.pkl.tz'),
258 |                            'wb') as out:
259 |                 pkl.dump(pipes_dump, out)
260 |             logging.info("Dump : %s", os.path.join(outfolder, outfile + '.pkl.tz'))
261 |         else:
262 |             with open(os.path.join(outfolder, outfile + '.pkl'), 'wb') as out:
263 |                 pkl.dump(pipes_dump, out)
264 |                 logging.info("Dump : %s", os.path.join(outfolder, outfile + '.pkl'))
265 | 
266 |         # Retrieve info from the config file
267 |         _index = config.index if hasattr(config, 'index') \
268 |             else np.arange(X.shape[0])
269 |         _y = config.y if hasattr(config, 'y') else None
270 |         if use_compression:
271 |             with gzip.open(os.path.join(outfolder, '__data.pkl.tz'), 'wb') as out:
272 |                 pkl.dump({'X': X, 'y': _y, 'index': _index}, out)
273 |             logging.info("Dump : %s", os.path.join(outfolder, '__data.pkl.tz'))
274 |         else:
275 |             with open(os.path.join(outfolder, '__data.pkl'), 'wb') as out:
276 |                 pkl.dump({'X': X, 'y': _y, 'index': _index}, out)
277 |             logging.info("Dump : %s", os.path.join(outfolder, '__data.pkl'))
278 | 
279 |         # Copy the ade_config just used into the outFolder
280 |         shutil.copy(config_path, os.path.join(outfolder, 'ade_config.py'))
281 | 
282 |         root_logger.handlers[0].close()
283 | 
284 |         # Move the logging file into the outFolder
285 |         shutil.move(logfile, outfolder)
286 | 


--------------------------------------------------------------------------------
/adenine/core/pipelines.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | ######################################################################
  5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
  6 | #
  7 | # FreeBSD License
  8 | ######################################################################
  9 | 
 10 | import copy
 11 | import logging
 12 | import numpy as np
 13 | 
 14 | 
 15 | def create(pdef):
 16 |     """Scikit-learn Pipelines objects creation (deprecated).
 17 | 
 18 |     This function creates a list of sklearn Pipeline objects starting from the
 19 |     list of list of tuples given in input that could be created using the
 20 |     adenine.core.define_pipeline module.
 21 | 
 22 |     Parameters
 23 |     -----------
 24 |     pdef : list of list of tuples
 25 |         This arguments contains the specification needed by sklearn in order
 26 |         to create a working Pipeline object.
 27 | 
 28 |     Returns
 29 |     -----------
 30 |     pipes : list of sklearn.pipeline.Pipeline objects
 31 |         The list of Piplines, each of them can be fitted and trasformed
 32 |         with some data.
 33 |     """
 34 |     from sklearn.pipeline import Pipeline
 35 |     return [Pipeline(p) for p in pdef]
 36 | 
 37 | 
 38 | def which_level(label):
 39 |     """Define the step level according to the input step label [DEPRECATED].
 40 | 
 41 |     This function return the level (i.e.: imputing, preproc, dimred, clustring,
 42 |     None) according to the step label provided as input.
 43 | 
 44 |     Parameters
 45 |     -----------
 46 |     label : string
 47 |         This is the step level as it is reported in the ade_config file.
 48 | 
 49 |     Returns
 50 |     -----------
 51 |     level : {imputing, preproc, dimred, clustering, None}
 52 |         The appropriate level of the input step.
 53 |     """
 54 |     if not isinstance(label, basestring):
 55 |         raise ValueError("String expected")
 56 | 
 57 |     label = label.lower()
 58 |     if label.startswith('impute'):
 59 |         level = 'imputing'
 60 |     elif label in ('recenter', 'standardize', 'normalize', 'minmax'):
 61 |         level = 'preproc'
 62 |     elif label in ('pca', 'incrementalpca', 'randomizedpca', 'kernelpca',
 63 |                    'isomap', 'lle', 'se', 'mds', 'tsne', 'rbm'):
 64 |         level = 'dimred'
 65 |     elif label in ('kmeans', 'ap', 'ms', 'spectral',
 66 |                    'hierarchical'):
 67 |         level = 'clustering'
 68 |     else:
 69 |         level = 'None'
 70 |     return level
 71 | 
 72 | 
 73 | def evaluate(level, step, X):
 74 |     """Transform or predict according to the input level.
 75 | 
 76 |     This function uses the transform or the predict method on the input
 77 |     sklearn-like step according to its level (i.e. imputing, preproc, dimred,
 78 |     clustering, none).
 79 | 
 80 |     Parameters
 81 |     -----------
 82 |     level : {'imputing', 'preproc', 'dimred', 'clustering', 'None'}
 83 |         The step level.
 84 | 
 85 |     step : sklearn-like object
 86 |         This might be an Imputer, or a PCA, or a KMeans (and so on...)
 87 |         sklearn-like object.
 88 | 
 89 |     X : array of float, shape : n_samples x n_features
 90 |         The input data matrix.
 91 | 
 92 |     Returns
 93 |     -----------
 94 |     res : array of float
 95 |         A matrix projection in case of dimred, a label vector in case of
 96 |         clustering, and so on.
 97 |     """
 98 |     if level in ('imputing', 'preproc', 'dimred', 'None'):
 99 |         if hasattr(step, 'embedding_'):
100 |             res = step.embedding_
101 |         else:
102 |             res = step.transform(X)
103 |     elif level == 'clustering':
104 |         if hasattr(step, 'labels_'):
105 |             res = step.labels_  # e.g. in case of spectral clustering
106 |         elif hasattr(step, 'affinity') and step.affinity == 'precomputed':
107 |             if not hasattr(step.estimator, 'labels_'):
108 |                 step.estimator.fit(X)
109 |             res = step.estimator.labels_
110 |         else:
111 |             res = step.predict(X)
112 |     return res
113 | 
114 | 
115 | def pipe_worker(pipe_id, pipe, pipes_dump, X):
116 |     """Parallel pipelines execution.
117 | 
118 |     Parameters
119 |     -----------
120 |     pipe_id : string
121 |         Pipeline identifier.
122 | 
123 |     pipe : list of tuples
124 |         Tuple containing a label and a sklearn Pipeline object.
125 | 
126 |     pipes_dump : multiprocessing.Manager.dict
127 |         Dictionary containing the results of the parallel execution.
128 | 
129 |     X : array of float, shape : n_samples x n_features, default : ()
130 |         The input data matrix.
131 |     """
132 |     step_dump = dict()
133 | 
134 |     # COPY X as X_curr (to avoid that the next pipeline
135 |     # works on the results of the previuos one)
136 |     X_curr = np.array(X)
137 |     for j, step in enumerate(pipe):
138 |         # step[0] -> step_label | step[1] -> model, sklearn (or sklearn-like)
139 |         # object
140 |         step_id = 'step' + str(j)
141 |         # 1. define which level of step is this (i.e.: imputing, preproc,
142 |         # dimred, clustering, none)
143 |         level = step[-1]
144 |         # 2. fit the model (whatever it is)
145 |         if step[1].get_params().get('method') == 'hessian':
146 |             # check hessian lle constraints
147 |             n_components = step[1].get_params().get('n_components')
148 |             n_neighbors = 1 + (n_components * (n_components + 3) / 2)
149 |             step[1].set_params(n_neighbors=n_neighbors)
150 |         try:
151 |             step[1].fit(X_curr)
152 | 
153 |             # 3. evaluate (i.e. transform or predict according to the level)
154 |             # X_curr = evaluate(level, step[1], X_curr)
155 |             X_next = evaluate(level, step[1], X_curr)
156 |             # 3.1 if the model is suitable for voronoi tessellation: fit also
157 |             # on 2D
158 |             mdl_voronoi = None
159 |             if hasattr(step[1], 'cluster_centers_'):
160 |                 mdl_voronoi = copy.copy(step[1].best_estimator_ if hasattr(
161 |                     step[1], 'best_estimator_') else step[1])
162 |                 if not hasattr(step[1], 'affinity') or step[1].affinity != 'precomputed':
163 |                     mdl_voronoi.fit(X_curr[:, :2])
164 |                 else:
165 |                     mdl_voronoi.fit(X_curr)
166 | 
167 |             # 4. save the results in a dictionary of dictionaries of the form:
168 |             # save memory and do not dump data after preprocessing (unused in
169 |             # analysys)
170 |             if level in ('preproc', 'imputing'):
171 |                 result = [step[0], level, step[1].get_params(),
172 |                           np.empty(0), np.empty(0), step[1], mdl_voronoi]
173 |                 X_curr = np.array(X_next)  # update the matrix
174 | 
175 |             # save memory dumping X_curr only in case of clustering
176 |             elif level == 'dimred':
177 |                 result = [step[0], level, step[1].get_params(),
178 |                           X_next, np.empty(0), step[1], mdl_voronoi]
179 |                 X_curr = X_next  # update the matrix
180 | 
181 |             # clustering
182 |             elif level == 'clustering':
183 |                 result = [step[0], level, step[1].get_params(),
184 |                           X_next, X_curr, step[1], mdl_voronoi]
185 |             if level != 'None':
186 |                 step_dump[step_id] = result
187 | 
188 |         except (AssertionError, ValueError) as e:
189 |             logging.critical("Pipeline %s failed at step %s. "
190 |                              "Traceback: %s", pipe_id, step[0], e)
191 | 
192 | 
193 |     # Monkey-patch, see: https://github.com/scikit-learn/scikit-learn/issues/7562
194 |     # and wait for the next numpy update
195 |     # step_dump['step2'][-2] = None
196 | 
197 |     if pipes_dump is None:
198 |         return step_dump
199 | 
200 |     pipes_dump[pipe_id] = step_dump
201 | 


--------------------------------------------------------------------------------
/adenine/core/template/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/core/template/__init__.py


--------------------------------------------------------------------------------
/adenine/core/template/d3_template.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # note: "format" this string to load data in csv format with % string
 3 | D3_TREE = r"""
 4 | <!DOCTYPE html>
 5 | <meta charset="utf-8">
 6 | <style>
 7 | 
 8 | .node circle {
 9 |   fill: #999;
10 | }
11 | 
12 | .node text {
13 |   font: 10px sans-serif;
14 | }
15 | 
16 | .node--internal circle {
17 |   fill: #555;
18 | }
19 | 
20 | .node--internal text {
21 |   text-shadow: 0 1px 0 #fff, 0 -1px 0 #fff, 1px 0 0 #fff, -1px 0 0 #fff;
22 | }
23 | 
24 | .link {
25 |   fill: none;
26 |   stroke: #555;
27 |   stroke-opacity: 0.4;
28 |   stroke-width: 1.5px;
29 | }
30 | 
31 | </style>
32 | <button class="download" onClick="(function () { var e = document.createElement('script'); e.setAttribute('src', %s); e.setAttribute('class', 'svg-crowbar'); document.body.appendChild(e); })();">
33 |     <big>⇩</big> Download SVG
34 |   </button>
35 | <svg width="1000" height="1000"></svg>
36 | <script src="http://d3js.org/d3.v4.min.js"></script>
37 | <script>
38 | 
39 | var svg = d3.select("svg"),
40 |     width = +svg.attr("width"),
41 |     height = +svg.attr("height"),
42 |     g = svg.append("g").attr("transform", "translate(40,0)");
43 | 
44 | var tree = d3.cluster()
45 |     .size([height, width - 160]);
46 | 
47 | var stratify = d3.stratify();
48 | //    .parentId(function(d) { return d.id.substring(0, d.id.lastIndexOf(".")); });
49 | 
50 | d3.csv(%s, function(error, data) {
51 |   if (error) throw error;
52 | 
53 |   // var root = stratify(data)
54 |   //   .sort(function(a, b) { return (a.height - b.height) || a.id.localeCompare(b.id); });
55 |   var root = d3.stratify()
56 |     .id(function(d) { return d.name; })
57 |     .parentId(function(d) { return d.parent; })
58 |     (data);
59 | 
60 |   tree(root);
61 | 
62 |   var link = g.selectAll(".link")
63 |       .data(root.descendants().slice(1))
64 |     .enter().append("path")
65 |       .attr("class", "link")
66 |       .attr("d", function(d) {
67 |         return "M" + d.y + "," + d.x
68 |             + "C" + (d.parent.y + 50) + "," + d.x
69 |             + " " + (d.parent.y + 50) + "," + d.parent.x
70 |             + " " + d.parent.y + "," + d.parent.x;
71 |       });
72 | 
73 |   var node = g.selectAll(".node")
74 |       .data(root.descendants())
75 |     .enter().append("g")
76 |       .attr("class", function(d) { return "node" + (d.children ? " node--internal" : " node--leaf"); })
77 |       .attr("transform", function(d) { return "translate(" + d.y + "," + d.x + ")"; })
78 | 
79 |   node.append("circle")
80 |       .attr("r", 5);
81 | 
82 |   node.append("text")
83 |       .style("font-size", "20px")
84 |       .style("font-weight", "bold")
85 |       .attr("dy", 1)
86 |       .attr("x", function(d) { return d.children ? -8 : 8; })
87 |       .style("text-anchor", function(d) { return d.children ? "end" : "start"; })
88 |       .text(function(d) { return d.children ? " " : d.id; });
89 | });
90 | 
91 | </script>
92 | 
93 | """
94 | 


--------------------------------------------------------------------------------
/adenine/core/template/svg-crowbar.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |   var doctype = '<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">';
  3 | 
  4 |   window.URL = (window.URL || window.webkitURL);
  5 | 
  6 |   var body = document.body;
  7 | 
  8 |   var prefix = {
  9 |     xmlns: "http://www.w3.org/2000/xmlns/",
 10 |     xlink: "http://www.w3.org/1999/xlink",
 11 |     svg: "http://www.w3.org/2000/svg"
 12 |   }
 13 | 
 14 |   initialize();
 15 | 
 16 |   function initialize() {
 17 |     var documents = [window.document],
 18 |         SVGSources = [];
 19 |         iframes = document.querySelectorAll("iframe"),
 20 |         objects = document.querySelectorAll("object");
 21 | 
 22 |     [].forEach.call(iframes, function(el) {
 23 |       try {
 24 |         if (el.contentDocument) {
 25 |           documents.push(el.contentDocument);
 26 |         }
 27 |       } catch(err) {
 28 |         console.log(err)
 29 |       }
 30 |     });
 31 | 
 32 |     [].forEach.call(objects, function(el) {
 33 |       try {
 34 |         if (el.contentDocument) {
 35 |           documents.push(el.contentDocument);
 36 |         }
 37 |       } catch(err) {
 38 |         console.log(err)
 39 |       }
 40 |     });
 41 | 
 42 |     documents.forEach(function(doc) {
 43 |       var styles = getStyles(doc);
 44 |       var newSources = getSources(doc, styles);
 45 |       // because of prototype on NYT pages
 46 |       for (var i = 0; i < newSources.length; i++) {
 47 |         SVGSources.push(newSources[i]);
 48 |       };
 49 |     })
 50 |     if (SVGSources.length > 1) {
 51 |       createPopover(SVGSources);
 52 |     } else if (SVGSources.length > 0) {
 53 |       download(SVGSources[0]);
 54 |     } else {
 55 |       alert("The Crowbar couldn’t find any SVG nodes.");
 56 |     }
 57 |   }
 58 | 
 59 |   function createPopover(sources) {
 60 |     cleanup();
 61 | 
 62 |     sources.forEach(function(s1) {
 63 |       sources.forEach(function(s2) {
 64 |         if (s1 !== s2) {
 65 |           if ((Math.abs(s1.top - s2.top) < 38) && (Math.abs(s1.left - s2.left) < 38)) {
 66 |             s2.top += 38;
 67 |             s2.left += 38;
 68 |           }
 69 |         }
 70 |       })
 71 |     });
 72 | 
 73 |     var buttonsContainer = document.createElement("div");
 74 |     body.appendChild(buttonsContainer);
 75 | 
 76 |     buttonsContainer.setAttribute("class", "svg-crowbar");
 77 |     buttonsContainer.style["z-index"] = 1e7;
 78 |     buttonsContainer.style["position"] = "absolute";
 79 |     buttonsContainer.style["top"] = 0;
 80 |     buttonsContainer.style["left"] = 0;
 81 | 
 82 | 
 83 | 
 84 |     var background = document.createElement("div");
 85 |     body.appendChild(background);
 86 | 
 87 |     background.setAttribute("class", "svg-crowbar");
 88 |     background.style["background"] = "rgba(255, 255, 255, 0.7)";
 89 |     background.style["position"] = "fixed";
 90 |     background.style["left"] = 0;
 91 |     background.style["top"] = 0;
 92 |     background.style["width"] = "100%";
 93 |     background.style["height"] = "100%";
 94 | 
 95 |     sources.forEach(function(d, i) {
 96 |       var buttonWrapper = document.createElement("div");
 97 |       buttonsContainer.appendChild(buttonWrapper);
 98 |       buttonWrapper.setAttribute("class", "svg-crowbar");
 99 |       buttonWrapper.style["position"] = "absolute";
100 |       buttonWrapper.style["top"] = (d.top + document.body.scrollTop) + "px";
101 |       buttonWrapper.style["left"] = (document.body.scrollLeft + d.left) + "px";
102 |       buttonWrapper.style["padding"] = "4px";
103 |       buttonWrapper.style["border-radius"] = "3px";
104 |       buttonWrapper.style["color"] = "white";
105 |       buttonWrapper.style["text-align"] = "center";
106 |       buttonWrapper.style["font-family"] = "'Helvetica Neue'";
107 |       buttonWrapper.style["background"] = "rgba(0, 0, 0, 0.8)";
108 |       buttonWrapper.style["box-shadow"] = "0px 4px 18px rgba(0, 0, 0, 0.4)";
109 |       buttonWrapper.style["cursor"] = "move";
110 |       buttonWrapper.textContent =  "SVG #" + i + ": " + (d.id ? "#" + d.id : "") + (d.class ? "." + d.class : "");
111 | 
112 |       var button = document.createElement("button");
113 |       buttonWrapper.appendChild(button);
114 |       button.setAttribute("data-source-id", i)
115 |       button.style["width"] = "150px";
116 |       button.style["font-size"] = "12px";
117 |       button.style["line-height"] = "1.4em";
118 |       button.style["margin"] = "5px 0 0 0";
119 |       button.textContent = "Download";
120 | 
121 |       button.onclick = function(el) {
122 |         // console.log(el, d, i, sources)
123 |         download(d);
124 |       };
125 | 
126 |     });
127 | 
128 |   }
129 | 
130 |   function cleanup() {
131 |     var crowbarElements = document.querySelectorAll(".svg-crowbar");
132 | 
133 |     [].forEach.call(crowbarElements, function(el) {
134 |       el.parentNode.removeChild(el);
135 |     });
136 |   }
137 | 
138 | 
139 |   function getSources(doc, styles) {
140 |     var svgInfo = [],
141 |         svgs = doc.querySelectorAll("svg");
142 | 
143 |     styles = (styles === undefined) ? "" : styles;
144 | 
145 |     [].forEach.call(svgs, function (svg) {
146 | 
147 |       svg.setAttribute("version", "1.1");
148 | 
149 |       var defsEl = document.createElement("defs");
150 |       svg.insertBefore(defsEl, svg.firstChild); //TODO   .insert("defs", ":first-child")
151 |       // defsEl.setAttribute("class", "svg-crowbar");
152 | 
153 |       var styleEl = document.createElement("style")
154 |       defsEl.appendChild(styleEl);
155 |       styleEl.setAttribute("type", "text/css");
156 | 
157 | 
158 |       // removing attributes so they aren't doubled up
159 |       svg.removeAttribute("xmlns");
160 |       svg.removeAttribute("xlink");
161 | 
162 |       // These are needed for the svg
163 |       if (!svg.hasAttributeNS(prefix.xmlns, "xmlns")) {
164 |         svg.setAttributeNS(prefix.xmlns, "xmlns", prefix.svg);
165 |       }
166 | 
167 |       if (!svg.hasAttributeNS(prefix.xmlns, "xmlns:xlink")) {
168 |         svg.setAttributeNS(prefix.xmlns, "xmlns:xlink", prefix.xlink);
169 |       }
170 | 
171 |       var source = (new XMLSerializer()).serializeToString(svg).replace('</style>', '<![CDATA[' + styles + ']]></style>');
172 |       var rect = svg.getBoundingClientRect();
173 |       svgInfo.push({
174 |         top: rect.top,
175 |         left: rect.left,
176 |         width: rect.width,
177 |         height: rect.height,
178 |         class: svg.getAttribute("class"),
179 |         id: svg.getAttribute("id"),
180 |         childElementCount: svg.childElementCount,
181 |         source: [doctype + source]
182 |       });
183 |     });
184 |     return svgInfo;
185 |   }
186 | 
187 |   function download(source) {
188 |     var filename = "untitled";
189 | 
190 |     if (source.id) {
191 |       filename = source.id;
192 |     } else if (source.class) {
193 |       filename = source.class;
194 |     } else if (window.document.title) {
195 |       filename = window.document.title.replace(/[^a-z0-9]/gi, '-').toLowerCase();
196 |     }
197 | 
198 |     var url = window.URL.createObjectURL(new Blob(source.source, { "type" : "text\/xml" }));
199 | 
200 |     var a = document.createElement("a");
201 |     body.appendChild(a);
202 |     a.setAttribute("class", "svg-crowbar");
203 |     a.setAttribute("download", filename + ".svg");
204 |     a.setAttribute("href", url);
205 |     a.style["display"] = "none";
206 |     a.click();
207 | 
208 |     setTimeout(function() {
209 |       window.URL.revokeObjectURL(url);
210 |     }, 10);
211 |   }
212 | 
213 |   function getStyles(doc) {
214 |     var styles = "",
215 |         styleSheets = doc.styleSheets;
216 | 
217 |     if (styleSheets) {
218 |       for (var i = 0; i < styleSheets.length; i++) {
219 |         processStyleSheet(styleSheets[i]);
220 |       }
221 |     }
222 | 
223 |     function processStyleSheet(ss) {
224 |       if (ss.cssRules) {
225 |         for (var i = 0; i < ss.cssRules.length; i++) {
226 |           var rule = ss.cssRules[i];
227 |           if (rule.type === 3) {
228 |             // Import Rule
229 |             processStyleSheet(rule.styleSheet);
230 |           } else {
231 |             // hack for illustrator crashing on descendent selectors
232 |             if (rule.selectorText) {
233 |               if (rule.selectorText.indexOf(">") === -1) {
234 |                 styles += "\n" + rule.cssText;
235 |               }
236 |             }
237 |           }
238 |         }
239 |       }
240 |     }
241 |     return styles;
242 |   }
243 | 
244 | })();
245 | 


--------------------------------------------------------------------------------
/adenine/examples/ade_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | from adenine.utils import data_source
11 | from adenine.utils import extra
12 | 
13 | # --------------------------  EXPERMIENT INFO ------------------------- #
14 | exp_tag = 'debug'
15 | output_root_folder = 'results'
16 | file_format = 'png' # or 'png'
17 | plotting_context = 'paper' # one of {paper, notebook, talk, poster}
18 | 
19 | # ----------------------------  INPUT DATA ---------------------------- #
20 | X, y, feat_names, class_names = data_source.load('iris')
21 | #X, y, feat_names, class_names = data_source.load('gauss', n_samples=300)
22 | # X, y, feat_names, class_names = data_source.load('circles')
23 | # X, y, feat_names, class_names = data_source.load('digits')
24 | # X, y, feat_names, class_names = data_source.load('diabetes')
25 | # X, y, feat_names, class_names = data_source.load('boston')
26 | # X, y, feat_names, class_names = data_source.load('custom', 'data/X.npy', 'data/y.npy')
27 | # X, y, feat_names, class_names = data_source.load('custom', 'data/X.csv', 'data/y.csv')
28 | 
29 | # X, y, feat_names, class_names = data_source.load('custom', '/home/fede/src/adenine/adenine/examples/TM_matrix.csv')
30 | # X = extra.ensure_symmetry(X)
31 | # X = 1. - X  # i want affinity
32 | 
33 | # -----------------------  PIPELINE DEFINITION ------------------------ #
34 | 
35 | # --- Missing Values Imputing --- #
36 | # step0 = {'Impute': [False, {'missing_values': 'NaN',
37 |                             # 'strategy': ['median','mean','nearest_neighbors']}]}
38 | 
39 | # --- Data Preprocessing --- #
40 | step1 = {'None': [False], 'Recenter': [False], 'Standardize': [False],
41 |          'Normalize': [True, {'norm': ['l2']}],
42 |          'MinMax': [False, {'feature_range': [(0,1), (-1,1)]}]}
43 | 
44 | # --- Dimensionality Reduction & Manifold Learning --- #
45 | step2 = {'PCA': [True, {'n_components': 2}],
46 |          'IncrementalPCA': [False, {'n_components': 3}],
47 |          'RandomizedPCA':  [False, {'n_components': 3}],
48 |          'KernelPCA':      [False, {'n_components': 2,
49 |                                     'kernel': ['linear','rbf','poly'], 'gamma': 2}],
50 |          'Isomap': [False, {'n_components': 3, 'n_neighbors': 5}],
51 |          'LLE':    [False, {'n_components': 3, 'n_neighbors': 5, # xxx
52 |                             'method': ['standard','modified','hessian','ltsa']}],
53 |          'SE':   [False, {'n_components': 3, 'affinity': ['nearest_neighbors','rbf']}], # can be 'precomputed'
54 |          'MDS':  [False, {'n_components': 3, 'metric': [True, False]}],
55 |          'tSNE': [False, {'n_components': 3}],
56 |          'RMB': [True, {'n_components': 256}],
57 |          'None': [False, {}]
58 |          }
59 | 
60 | # --- Clustering --- #
61 | step3 = {'KMeans': [False, {'n_clusters': [2]}], # cannot be 'precomputed'
62 |          'AP': [False, {'preference': ['auto']}], # can be 'precomputed'
63 |          'MS': [False], # cannot be 'precomputed'
64 |          'Spectral': [False, {'n_clusters': [2]}], # can be 'precomputed'
65 |          'Hierarchical': [False, {'n_clusters': [3],
66 |                                   #'affinity': ['manhattan','euclidean'],
67 |                                   'affinity': ['euclidean'],
68 |                                   #'linkage':  ['ward','complete','average']}]
69 |                                   'linkage':  ['ward']}]
70 |         }
71 | 


--------------------------------------------------------------------------------
/adenine/examples/data/X.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/examples/data/X.npy


--------------------------------------------------------------------------------
/adenine/examples/data/X_missing.csv:
--------------------------------------------------------------------------------
  1 | nan,-1.156862383698490815e+00,2.325969437444890264e-01,-1.226910178531959300e-01
  2 | nan,nan,1.662453908639822120e-01,nan
  3 | nan,1.078908257010945171e+00,nan,-4.521696668277273012e-01
  4 | nan,nan,nan,nan
  5 | -7.075918621950672005e-01,nan,1.107003123417464430e+00,3.925003704893688106e-01
  6 | nan,-9.307682348162302777e-01,nan,-1.238600366412410531e-01
  7 | -6.394675777546744433e-01,nan,nan,nan
  8 | -1.463084369363933934e+00,nan,1.495331971007898719e+00,nan
  9 | nan,-9.088301530861653266e-01,-3.489239577414761095e-01,nan
 10 | 4.314432145948063901e-01,1.981623502664172642e+00,-4.459802189090978919e-01,nan
 11 | 6.142043420674385412e-02,-6.118179692142385884e-01,nan,nan
 12 | 1.653774270442252892e+00,nan,-7.105778390461989780e-01,-4.253239641661243908e-01
 13 | nan,7.938380876396712305e-01,nan,9.734189421253153229e-01
 14 | nan,nan,-9.325734485898492521e-01,nan
 15 | -4.692381743406294770e-01,nan,nan,-9.854055694219750194e-02
 16 | 2.856917897910776771e-01,5.442976535996231213e-01,-9.027838452675212011e-01,nan
 17 | 3.228887691158054407e-01,1.178646684094214914e+00,-8.115360478215190021e-01,nan
 18 | nan,-8.872088861442941621e-01,nan,nan
 19 | 2.767596836546558081e-01,nan,nan,3.134103300122419861e-02
 20 | nan,nan,9.208184217167506569e-01,nan
 21 | nan,1.446536613951119543e+00,nan,nan
 22 | 6.346579181673047687e-01,nan,nan,nan
 23 | 1.392886349437185034e+00,nan,-1.198599027981311460e+00,-1.132697590062041293e+00
 24 | -1.434715432544310998e+00,1.026936160278541399e+00,1.124128303209935176e+00,nan
 25 | nan,2.330603277258347372e-01,nan,nan
 26 | nan,nan,1.142952665043716731e+00,nan
 27 | nan,1.140373046240707788e+00,nan,4.270122455127892680e-01
 28 | nan,-1.361512094713794196e+00,nan,nan
 29 | 1.209893933270316246e+00,nan,-1.359040178533376775e+00,nan
 30 | -4.502313522982358540e-01,nan,nan,nan
 31 | 2.195009994421553146e-01,-6.037814919824179283e-01,nan,-3.282889270210308762e-03
 32 | -9.284702738379502218e-01,nan,9.642905063476822081e-01,nan
 33 | -1.210009835227054298e+00,9.288008161700008758e-01,7.662876912390784723e-01,6.563682753521845603e-01
 34 | nan,nan,-1.080027626003194241e+00,nan
 35 | nan,-7.893344721439290446e-01,3.208092641934174316e-01,-2.744291585393637267e-01
 36 | 1.641059936670823949e-01,nan,-4.154673123178226346e-02,-1.690398094574822596e-01
 37 | nan,7.584241169204418709e-01,nan,nan
 38 | 6.565432358878454944e-02,nan,nan,nan
 39 | nan,9.926811177344899706e-01,nan,nan
 40 | -1.041542225925210924e-01,nan,nan,nan
 41 | nan,9.143221607270377582e-01,nan,1.288919718901406775e+00
 42 | nan,5.019338363269474357e-01,7.100040463856538420e-01,1.545841940359383937e+00
 43 | -6.600020519878190273e-01,1.104240175105188904e+00,1.132483378627092474e+00,1.465827932390227684e+00
 44 | 1.744402579749481652e-01,-1.587272191140688182e+00,2.497575268980154195e-01,-8.134691613104610974e-02
 45 | nan,nan,nan,nan
 46 | nan,7.501720661828157333e-01,nan,nan
 47 | -3.342554750911648775e-01,-7.137329100818909922e-01,-2.407084197162502048e-01,-8.340083131232416125e-02
 48 | nan,nan,nan,-9.673348217456705367e-01
 49 | nan,nan,-2.391048052682311353e-01,nan
 50 | nan,9.300718567010340943e-01,nan,nan
 51 | 1.017586261554328741e+00,nan,nan,nan
 52 | 1.303392086200408251e+00,1.176009676693779760e+00,nan,nan
 53 | nan,4.836988853116996889e-01,-1.092880083369777822e+00,nan
 54 | -9.319851610608677062e-01,1.397977267254582046e+00,nan,1.535659733771689517e+00
 55 | -6.408969054855628844e-01,nan,nan,nan
 56 | nan,-9.323550811636887037e-01,nan,3.632022074493482244e-01
 57 | nan,nan,-3.625792307095718203e-01,nan
 58 | 9.145070118313443075e-01,nan,nan,-6.360363342469680381e-01
 59 | -1.007658038296989078e+00,nan,1.179926256982412269e+00,7.463843472733622253e-01
 60 | -1.187303971214693110e+00,1.107806472262462982e+00,nan,1.585263871893016763e+00
 61 | -9.031651321481354300e-01,nan,nan,1.321148563212554361e+00
 62 | nan,7.999924887361942183e-01,-5.778430086333533611e-01,nan
 63 | 1.059243736095132560e+00,1.522616402375389422e+00,-9.552541500540825403e-01,nan
 64 | nan,1.609635090789223621e+00,nan,nan
 65 | -1.606690560441900173e-01,-1.229925236813150580e+00,-3.819256182206764993e-01,7.356022877628196621e-01
 66 | nan,7.062859368497632628e-01,nan,nan
 67 | -1.411870672836224694e+00,nan,7.086113942358228668e-01,1.492798947017852651e+00
 68 | -3.040514043122001242e-01,-5.813862246900292075e-01,nan,-2.108471835999596866e-01
 69 | 8.507789005424097883e-01,8.773006104066740640e-01,-7.045392379849533260e-01,nan
 70 | 3.074991953218891294e-01,-1.102530028510551263e+00,-5.869166677052294334e-01,1.138990811361808436e-01
 71 | nan,nan,1.461701841444241090e+00,nan
 72 | nan,nan,nan,nan
 73 | nan,nan,-1.098651949849287046e+00,nan
 74 | -1.095979133848796971e+00,1.455316797130397521e+00,1.227837823199790623e+00,nan
 75 | nan,nan,nan,nan
 76 | -6.120563267013116177e-02,nan,nan,-3.142630129259655902e-01
 77 | nan,-6.724793646662509117e-01,-3.051652072034404806e-01,5.981953113846391057e-01
 78 | -1.329461562127501661e+00,nan,1.783776000666230654e+00,nan
 79 | -1.430891473540749415e+00,nan,nan,nan
 80 | 1.398979808841118500e+00,1.213372537922308148e+00,-1.139778282564449130e+00,nan
 81 | nan,nan,nan,nan
 82 | nan,1.053890414736040837e+00,nan,-8.991149068911545861e-01
 83 | nan,nan,nan,-1.715028446120915873e-01
 84 | -2.975404547273184930e-01,-9.113023565308768781e-01,1.227527126258068924e-01,nan
 85 | nan,nan,-8.837576863617241374e-01,nan
 86 | 5.783526399497983528e-01,-1.568427441620841467e+00,nan,nan
 87 | -2.836003143929887171e-01,nan,nan,nan
 88 | nan,nan,6.143182346112907588e-02,nan
 89 | nan,nan,nan,nan
 90 | -8.537457775861589937e-02,9.917736724082389932e-01,-4.268716727368595532e-01,nan
 91 | nan,-1.019031454530859415e+00,nan,nan
 92 | nan,nan,1.042925054651744121e+00,9.984537828953213845e-01
 93 | -1.995090873945649934e-01,nan,5.535149367168700207e-01,nan
 94 | 2.228127146357133381e-01,nan,nan,nan
 95 | nan,-1.382705804273485883e+00,nan,nan
 96 | nan,nan,nan,nan
 97 | 3.381358892249306525e-01,-1.326733296875020063e+00,nan,4.291618094613102175e-01
 98 | nan,1.592912086579896691e+00,nan,5.686762988479203695e-01
 99 | nan,nan,-7.593116778742476924e-01,nan
100 | nan,1.163667252112682515e+00,-6.318787198098960722e-01,nan
101 | 


--------------------------------------------------------------------------------
/adenine/examples/data/Y_missing_test.csv:
--------------------------------------------------------------------------------
  1 | 1.000000000000000000e+00
  2 | 1.000000000000000000e+00
  3 | 2.000000000000000000e+00
  4 | 0.000000000000000000e+00
  5 | 0.000000000000000000e+00
  6 | 1.000000000000000000e+00
  7 | 0.000000000000000000e+00
  8 | 0.000000000000000000e+00
  9 | 1.000000000000000000e+00
 10 | 2.000000000000000000e+00
 11 | 1.000000000000000000e+00
 12 | 2.000000000000000000e+00
 13 | 0.000000000000000000e+00
 14 | 2.000000000000000000e+00
 15 | 1.000000000000000000e+00
 16 | 2.000000000000000000e+00
 17 | 2.000000000000000000e+00
 18 | 1.000000000000000000e+00
 19 | 1.000000000000000000e+00
 20 | 0.000000000000000000e+00
 21 | 2.000000000000000000e+00
 22 | 2.000000000000000000e+00
 23 | 2.000000000000000000e+00
 24 | 0.000000000000000000e+00
 25 | 2.000000000000000000e+00
 26 | 0.000000000000000000e+00
 27 | 0.000000000000000000e+00
 28 | 1.000000000000000000e+00
 29 | 2.000000000000000000e+00
 30 | 0.000000000000000000e+00
 31 | 1.000000000000000000e+00
 32 | 0.000000000000000000e+00
 33 | 0.000000000000000000e+00
 34 | 2.000000000000000000e+00
 35 | 1.000000000000000000e+00
 36 | 1.000000000000000000e+00
 37 | 0.000000000000000000e+00
 38 | 1.000000000000000000e+00
 39 | 2.000000000000000000e+00
 40 | 1.000000000000000000e+00
 41 | 0.000000000000000000e+00
 42 | 0.000000000000000000e+00
 43 | 0.000000000000000000e+00
 44 | 1.000000000000000000e+00
 45 | 0.000000000000000000e+00
 46 | 0.000000000000000000e+00
 47 | 1.000000000000000000e+00
 48 | 2.000000000000000000e+00
 49 | 1.000000000000000000e+00
 50 | 2.000000000000000000e+00
 51 | 2.000000000000000000e+00
 52 | 2.000000000000000000e+00
 53 | 2.000000000000000000e+00
 54 | 0.000000000000000000e+00
 55 | 0.000000000000000000e+00
 56 | 1.000000000000000000e+00
 57 | 1.000000000000000000e+00
 58 | 2.000000000000000000e+00
 59 | 0.000000000000000000e+00
 60 | 0.000000000000000000e+00
 61 | 0.000000000000000000e+00
 62 | 2.000000000000000000e+00
 63 | 2.000000000000000000e+00
 64 | 0.000000000000000000e+00
 65 | 1.000000000000000000e+00
 66 | 2.000000000000000000e+00
 67 | 0.000000000000000000e+00
 68 | 1.000000000000000000e+00
 69 | 2.000000000000000000e+00
 70 | 1.000000000000000000e+00
 71 | 0.000000000000000000e+00
 72 | 1.000000000000000000e+00
 73 | 2.000000000000000000e+00
 74 | 0.000000000000000000e+00
 75 | 2.000000000000000000e+00
 76 | 1.000000000000000000e+00
 77 | 1.000000000000000000e+00
 78 | 0.000000000000000000e+00
 79 | 0.000000000000000000e+00
 80 | 2.000000000000000000e+00
 81 | 1.000000000000000000e+00
 82 | 2.000000000000000000e+00
 83 | 1.000000000000000000e+00
 84 | 1.000000000000000000e+00
 85 | 2.000000000000000000e+00
 86 | 1.000000000000000000e+00
 87 | 0.000000000000000000e+00
 88 | 1.000000000000000000e+00
 89 | 2.000000000000000000e+00
 90 | 2.000000000000000000e+00
 91 | 1.000000000000000000e+00
 92 | 0.000000000000000000e+00
 93 | 1.000000000000000000e+00
 94 | 1.000000000000000000e+00
 95 | 1.000000000000000000e+00
 96 | 2.000000000000000000e+00
 97 | 1.000000000000000000e+00
 98 | 0.000000000000000000e+00
 99 | 1.000000000000000000e+00
100 | 2.000000000000000000e+00
101 | 


--------------------------------------------------------------------------------
/adenine/examples/data/y.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/examples/data/y.npy


--------------------------------------------------------------------------------
/adenine/externals/__init__.py:
--------------------------------------------------------------------------------
1 | from .hierarchical import AgglomerativeClustering
2 | 


--------------------------------------------------------------------------------
/adenine/test/Y_missing_test.csv:
--------------------------------------------------------------------------------
  1 | 0.000000000000000000e+00
  2 | 0.000000000000000000e+00
  3 | 0.000000000000000000e+00
  4 | 0.000000000000000000e+00
  5 | 0.000000000000000000e+00
  6 | 0.000000000000000000e+00
  7 | 0.000000000000000000e+00
  8 | 0.000000000000000000e+00
  9 | 0.000000000000000000e+00
 10 | 0.000000000000000000e+00
 11 | 0.000000000000000000e+00
 12 | 0.000000000000000000e+00
 13 | 0.000000000000000000e+00
 14 | 0.000000000000000000e+00
 15 | 0.000000000000000000e+00
 16 | 0.000000000000000000e+00
 17 | 0.000000000000000000e+00
 18 | 0.000000000000000000e+00
 19 | 0.000000000000000000e+00
 20 | 0.000000000000000000e+00
 21 | 0.000000000000000000e+00
 22 | 0.000000000000000000e+00
 23 | 0.000000000000000000e+00
 24 | 0.000000000000000000e+00
 25 | 0.000000000000000000e+00
 26 | 0.000000000000000000e+00
 27 | 0.000000000000000000e+00
 28 | 0.000000000000000000e+00
 29 | 0.000000000000000000e+00
 30 | 0.000000000000000000e+00
 31 | 0.000000000000000000e+00
 32 | 0.000000000000000000e+00
 33 | 0.000000000000000000e+00
 34 | 0.000000000000000000e+00
 35 | 0.000000000000000000e+00
 36 | 0.000000000000000000e+00
 37 | 0.000000000000000000e+00
 38 | 0.000000000000000000e+00
 39 | 0.000000000000000000e+00
 40 | 0.000000000000000000e+00
 41 | 0.000000000000000000e+00
 42 | 0.000000000000000000e+00
 43 | 0.000000000000000000e+00
 44 | 0.000000000000000000e+00
 45 | 0.000000000000000000e+00
 46 | 0.000000000000000000e+00
 47 | 0.000000000000000000e+00
 48 | 0.000000000000000000e+00
 49 | 0.000000000000000000e+00
 50 | 0.000000000000000000e+00
 51 | 1.000000000000000000e+00
 52 | 1.000000000000000000e+00
 53 | 1.000000000000000000e+00
 54 | 1.000000000000000000e+00
 55 | 1.000000000000000000e+00
 56 | 1.000000000000000000e+00
 57 | 1.000000000000000000e+00
 58 | 1.000000000000000000e+00
 59 | 1.000000000000000000e+00
 60 | 1.000000000000000000e+00
 61 | 1.000000000000000000e+00
 62 | 1.000000000000000000e+00
 63 | 1.000000000000000000e+00
 64 | 1.000000000000000000e+00
 65 | 1.000000000000000000e+00
 66 | 1.000000000000000000e+00
 67 | 1.000000000000000000e+00
 68 | 1.000000000000000000e+00
 69 | 1.000000000000000000e+00
 70 | 1.000000000000000000e+00
 71 | 1.000000000000000000e+00
 72 | 1.000000000000000000e+00
 73 | 1.000000000000000000e+00
 74 | 1.000000000000000000e+00
 75 | 1.000000000000000000e+00
 76 | 1.000000000000000000e+00
 77 | 1.000000000000000000e+00
 78 | 1.000000000000000000e+00
 79 | 1.000000000000000000e+00
 80 | 1.000000000000000000e+00
 81 | 1.000000000000000000e+00
 82 | 1.000000000000000000e+00
 83 | 1.000000000000000000e+00
 84 | 1.000000000000000000e+00
 85 | 1.000000000000000000e+00
 86 | 1.000000000000000000e+00
 87 | 1.000000000000000000e+00
 88 | 1.000000000000000000e+00
 89 | 1.000000000000000000e+00
 90 | 1.000000000000000000e+00
 91 | 1.000000000000000000e+00
 92 | 1.000000000000000000e+00
 93 | 1.000000000000000000e+00
 94 | 1.000000000000000000e+00
 95 | 1.000000000000000000e+00
 96 | 1.000000000000000000e+00
 97 | 1.000000000000000000e+00
 98 | 1.000000000000000000e+00
 99 | 1.000000000000000000e+00
100 | 1.000000000000000000e+00
101 | 2.000000000000000000e+00
102 | 2.000000000000000000e+00
103 | 2.000000000000000000e+00
104 | 2.000000000000000000e+00
105 | 2.000000000000000000e+00
106 | 2.000000000000000000e+00
107 | 2.000000000000000000e+00
108 | 2.000000000000000000e+00
109 | 2.000000000000000000e+00
110 | 2.000000000000000000e+00
111 | 2.000000000000000000e+00
112 | 2.000000000000000000e+00
113 | 2.000000000000000000e+00
114 | 2.000000000000000000e+00
115 | 2.000000000000000000e+00
116 | 2.000000000000000000e+00
117 | 2.000000000000000000e+00
118 | 2.000000000000000000e+00
119 | 2.000000000000000000e+00
120 | 2.000000000000000000e+00
121 | 2.000000000000000000e+00
122 | 2.000000000000000000e+00
123 | 2.000000000000000000e+00
124 | 2.000000000000000000e+00
125 | 2.000000000000000000e+00
126 | 2.000000000000000000e+00
127 | 2.000000000000000000e+00
128 | 2.000000000000000000e+00
129 | 2.000000000000000000e+00
130 | 2.000000000000000000e+00
131 | 2.000000000000000000e+00
132 | 2.000000000000000000e+00
133 | 2.000000000000000000e+00
134 | 2.000000000000000000e+00
135 | 2.000000000000000000e+00
136 | 2.000000000000000000e+00
137 | 2.000000000000000000e+00
138 | 2.000000000000000000e+00
139 | 2.000000000000000000e+00
140 | 2.000000000000000000e+00
141 | 2.000000000000000000e+00
142 | 2.000000000000000000e+00
143 | 2.000000000000000000e+00
144 | 2.000000000000000000e+00
145 | 2.000000000000000000e+00
146 | 2.000000000000000000e+00
147 | 2.000000000000000000e+00
148 | 2.000000000000000000e+00
149 | 2.000000000000000000e+00
150 | 2.000000000000000000e+00
151 | 


--------------------------------------------------------------------------------
/adenine/test/carttest.py:
--------------------------------------------------------------------------------
1 | from adenine.utils.extra import modified_cartesian
2 | 
3 | A = [(1,0), (2,0)]
4 | B = [(3,0),(4,0)]
5 | C = []
6 | D = [(5,0),(6,0)]
7 | 
8 | modified_cartesian(A,B,C,D)
9 | 


--------------------------------------------------------------------------------
/adenine/test/imputing_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | from __future__ import division
11 | 
12 | import numpy as np
13 | 
14 | from adenine.utils import data_source
15 | from adenine.utils.extensions import Imputer
16 | 
17 | 
18 | def test(missing_rate):
19 |     """
20 |     Testing the KNN data imputing.
21 |     """
22 |     Xreal, y, feat_names, class_names = data_source.load('iris')
23 |     # Xreal, y, feat_names, class_names = data_source.load('gauss', n_samples=100)
24 |     n, p = Xreal.shape
25 |     print("{} x {} matrix loaded".format(n, p))
26 | 
27 |     # Choose the missing rate
28 |     # missing_rate = 0.5
29 |     n_missing = int(missing_rate * (n*p))
30 | 
31 |     # Create holes in the matrix
32 |     np.random.seed(42)
33 |     idx = np.random.permutation(n*p)
34 |     xx = Xreal.ravel().copy()
35 |     xx[idx[:n_missing]] = np.nan
36 |     X = np.reshape(xx, (n, p))
37 |     print("{} values deleted".format(n_missing))
38 | 
39 |     # Save data
40 |     np.savetxt('X_missing.csv', X, delimiter=',')
41 |     np.savetxt('Y_missing_test.csv', y, delimiter=',')
42 | 
43 |     # Start test
44 |     strategies = ["mean", "median", "most_frequent", "nearest_neighbors"]
45 | 
46 |     imp = Imputer(strategy=strategies[3])
47 |     Ximp = imp.fit_transform(X)
48 | 
49 |     if len(np.where(np.isnan(Ximp))[0]) == 0:
50 |         print("All values were imputed according to: {}-strategy".format(imp.strategy))
51 |     else:
52 |         print("Empty values: {}".format(len(np.where(np.isnan(Ximp))[0])))
53 | 
54 |     # Check results
55 |     dist = np.sqrt(np.sum((Xreal[imp.X_mask,:].ravel() - Ximp.ravel())**2))
56 |     print("dist(Xreal - Ximp) = {}".format(dist))
57 | 
58 |     # print(Ximp)
59 | 
60 | 
61 | def main():
62 |     for missing_rate in np.linspace(0.01, 0.3, 2):
63 |         print("\nmissing rate: {}".format(missing_rate))
64 |         test(missing_rate)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/adenine/test/imputing_test_lite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | from __future__ import division
11 | 
12 | import numpy as np
13 | 
14 | from adenine.utils import data_source
15 | from adenine.utils.extensions import Imputer
16 | 
17 | 
18 | def test(missing_rate):
19 |     """
20 |     Testing the KNN data imputing.
21 |     """
22 |     np.random.seed(42)
23 |     Xreal, y, feat_names, class_names = data_source.load('iris')
24 |     # Xreal, y, feat_names, class_names = data_source.load('gauss', n_samples=80)
25 |     n, p = Xreal.shape
26 |     print("{} x {} matrix loaded".format(n, p))
27 | 
28 |     # Choose the missing rate
29 |     # missing_rate = 0.5
30 |     n_missing = int(missing_rate * (n*p))
31 | 
32 |     # Create holes in the matrix
33 |     idx = np.random.permutation(n*p)
34 |     xx = Xreal.ravel().copy()
35 |     xx[idx[:n_missing]] = np.nan
36 |     X = np.reshape(xx, (n, p))
37 |     # X[0,:] = np.nan
38 |     print("{} values deleted".format(n_missing))
39 | 
40 |     # Save data
41 |     np.savetxt('X_missing.csv', X, delimiter=',')
42 |     np.savetxt('Y_missing_test.csv', y, delimiter=',')
43 | 
44 |     # Start test
45 |     strategies = ["mean", "median", "most_frequent", "nearest_neighbors"]
46 | 
47 |     imp = Imputer(strategy=strategies[3])
48 |     Ximp = imp.fit_transform(X)
49 |     # Xtr = X[:50, :]
50 |     # Xts = X[50:, :]
51 |     # imp.fit(Xtr)
52 |     # Ximp = imp.transform(Xts)
53 | 
54 |     if len(np.where(np.isnan(Ximp))[0]) == 0:
55 |         print("All values were imputed according to: {}-strategy".format(imp.strategy))
56 |     else:
57 |         print("Empty values: {}".format(len(np.where(np.isnan(Ximp))[0])))
58 | 
59 |     # Check results
60 |     dist = np.sqrt(np.sum((Xreal[imp.X_mask,:].ravel() - Ximp.ravel())**2))
61 |     print("dist(Xreal - Ximp) = {}".format(dist))
62 | 
63 | 
64 | 
65 | def main():
66 |     # for missing_rate in np.linspace(0.01, 0.3, 2):
67 |     missing_rate = 0.3
68 |     print("\nmissing rate: {}".format(missing_rate))
69 |     test(missing_rate)
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     main()
74 | 


--------------------------------------------------------------------------------
/adenine/utils/GEO2csv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """This module contains utility functions for GEO DataSets wrangling."""
  5 | 
  6 | ######################################################################
  7 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
  8 | #
  9 | # FreeBSD License
 10 | ######################################################################
 11 | 
 12 | import GEOparse
 13 | import logging
 14 | import os
 15 | import pandas as pd
 16 | from sklearn import datasets
 17 | from six.moves import filter
 18 | 
 19 | 
 20 | def get_GEO(accession_number, phenotype_name='title', return_gse=False):
 21 |     """Get the GEO data from its accession number.
 22 | 
 23 |     Parameters
 24 |     -----------
 25 |     accession_number : string
 26 |         'GSEXXXXX' is any GEO accession ID loaded by `GEOparse`.
 27 | 
 28 |     Returns
 29 |     -----------
 30 |     data : sklearn.datasets.base.Bunch
 31 |         the dataset bunch
 32 |     gse : GEOparse.GEOTypes.GSE
 33 |         the GEOparse object
 34 |     """
 35 |     gse = GEOparse.get_GEO(geo=accession_number, destdir=os.curdir,
 36 |                            silent=True, include_data=True,
 37 |                            how='full')
 38 |     xx = gse.pivot_samples('VALUE').transpose()
 39 |     index = xx.index.tolist()
 40 |     feature_names = xx.columns.tolist()
 41 |     yy = gse.phenotype_data[phenotype_name]
 42 |     data = datasets.base.Bunch(data=xx.values, target=yy.values,
 43 |                                feature_names=feature_names,
 44 |                                index=index)
 45 | 
 46 | 
 47 |     print('* Desired labels can be found with --label_field = ')
 48 |     for k in gse.phenotype_data.keys():
 49 |         print('\t{}'.format(k))
 50 | 
 51 |     out = [data]
 52 |     if return_gse:
 53 |         out.append(gse)
 54 | 
 55 |     return out
 56 | 
 57 | 
 58 | def label_mapper(raw_labels, new_labels):
 59 |     """Map some raw labels into new labels.
 60 | 
 61 |     When dealing with GEO DataSets it is very common that each GSM sample has
 62 |     a different phenotye (e.g. 'Brain - 001', 'Brain - 002', ...). This
 63 |     function maps these raw labels into new homogeneous labels.
 64 | 
 65 |     Parameters
 66 |     -----------
 67 |     raw_labels : list of strings
 68 |         list of unpreprocessed labels
 69 |     new_labels : list of strings
 70 |         list of labels to map
 71 | 
 72 |     Returns
 73 |     -----------
 74 |     y : array of float, shape : n_samples
 75 |         the modified label vector
 76 | 
 77 |     Examples
 78 |     -----------
 79 |     >>> raw_labels = ['Brain - 001', 'Brain - 002', 'Muscle - 001', 'Muscle - 002']
 80 |     >>> label_mapper(raw_labels, ['Brain', 'Muscle'])
 81 |     ['Brain', 'Brain', 'Muscle', 'Muscle']
 82 |     """
 83 |     y = []
 84 |     for rl in raw_labels:
 85 |         for nl in new_labels:
 86 |             if nl in rl:
 87 |                 y.append(nl)
 88 |                 break
 89 |         else:
 90 |             y.append(rl)
 91 |             # print('No mapping rule for %s', rl)
 92 |     return y
 93 | 
 94 | 
 95 | def GEO_select_samples(data, labels, selected_labels, index,
 96 |                        feature_names=None):
 97 |     """GEO DataSets data selection tool.
 98 | 
 99 |     Modify the labels with `label_mapper` then return only the samples with
100 |     labels in selected_labels.
101 | 
102 |     Parameters
103 |     -----------
104 |     data : array of float, shape : n_samples x n_features
105 |         the dataset
106 |     labels : numpy array (n_samples,)
107 |         the labels vector
108 |     selected_labels : list of strings
109 |         a subset of new_labels containing only the samples wanted in the
110 |         final dataset
111 |     index : list of strings
112 |         the sample indexes
113 |     feature_names : list of strings
114 |         the feature set
115 |     samples_on : string in ['col', 'cols', 'row', 'rows']
116 |         wether the samples are on columns or rows
117 | 
118 |     Returns
119 |     -----------
120 |     data : sklearn.datasets.base.Bunch
121 |         An instance of the sklearn.datasets.base.Bunch class, the meaningful
122 |         attributes are .data, the data matrix, and .target, the label vector.
123 |     """
124 |     mapped_y = pd.DataFrame(data=label_mapper(labels, selected_labels),
125 |                             index=index, columns=['Phenotype'])
126 |     y = mapped_y[mapped_y['Phenotype'].isin(selected_labels)]
127 |     X = pd.DataFrame(data, index=index, columns=feature_names).loc[y.index]
128 |     return datasets.base.Bunch(data=X.values, feature_names=X.columns,
129 |                                target=y.values.ravel(), index=X.index.tolist())
130 | 
131 | def id2gs(data, gse):
132 |     """Convert IDs into GENE_SYMBOL.
133 | 
134 |     Parameters
135 |     -----------
136 |     data : sklearn.datasets.base.Bunch
137 |         the dataset bunch
138 |     gse : GEOparse.GEOTypes.GSE
139 |         the GEOparse object
140 | 
141 |     Returns
142 |     -----------
143 |     data : sklearn.datasets.base.Bunch
144 |         where feature_names has the gene symbols
145 |     """
146 |     # Get the platform name
147 |     platform = gse.gpls.keys()[0]
148 | 
149 |     # Create the lookup table
150 |     lookup_table = pd.DataFrame(data=gse.gpls[platform].table['GENE_SYMBOL'].values,
151 |                                 index=gse.gpls[platform].table['ID'].values,
152 |                                 columns=['GENE_SYMBOL'])
153 |     # Correct NaN failures
154 |     for i, lt_value in enumerate(lookup_table.values.ravel()):
155 |         if pd.isnull(lt_value):
156 |             lookup_table.values[i] = str(lookup_table.index[i])+'__NO-MATCH'
157 |     gene_symbol = [lookup_table['GENE_SYMBOL'].loc[_id] for _id in data.feature_names]
158 | 
159 |     # Make bunch and return
160 |     return datasets.base.Bunch(data=data.data, feature_names=gene_symbol,
161 |                                target=data.target, index=data.index)
162 | 
163 | 
164 | def restrict_to_signature(data, signature):
165 |     """Restrict the data to the genes in the signature.
166 | 
167 |     Parameters
168 |     -----------
169 |     data : sklearn.datasets.base.Bunch
170 |         the dataset bunch
171 |     signature : list
172 |         list of signature genes
173 | 
174 |     Returns
175 |     -----------
176 |     data : sklearn.datasets.base.Bunch
177 |         where feature_names has the gene symbols restricted to signature
178 |     """
179 |     df = pd.DataFrame(data=data.data, index=data.index,
180 |                       columns=data.feature_names)
181 |     # Filter out signatures gene not in the gene set
182 |     signature = list(filter(lambda x: x in data.feature_names, signature))
183 |     df = df[signature]
184 |     # Make bunch and return
185 |     return datasets.base.Bunch(data=df.values, feature_names=df.columns,
186 |                                target=data.target, index=data.index)
187 | 


--------------------------------------------------------------------------------
/adenine/utils/__init__.py:
--------------------------------------------------------------------------------
1 | ######################################################################
2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
3 | #
4 | # FreeBSD License
5 | ######################################################################
6 | 


--------------------------------------------------------------------------------
/adenine/utils/data_source.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """This module is mainly a wrapper for some sklearn.datasets functions."""
  5 | 
  6 | ######################################################################
  7 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
  8 | #
  9 | # FreeBSD License
 10 | ######################################################################
 11 | import sys
 12 | import os
 13 | import logging
 14 | import numpy as np
 15 | import pandas as pd
 16 | from sklearn import datasets
 17 | from sklearn.preprocessing import Binarizer
 18 | 
 19 | # Legacy import
 20 | try:
 21 |     from sklearn.model_selection import StratifiedShuffleSplit
 22 | except ImportError:
 23 |     from sklearn.cross_validation import StratifiedShuffleSplit
 24 | 
 25 | 
 26 | def generate_gauss(mu=None, std=None, n_sample=None):
 27 |     """Create a Gaussian dataset.
 28 | 
 29 |     Generates a dataset with n_sample * n_class examples and n_dim dimensions.
 30 | 
 31 |     Parameters
 32 |     -----------
 33 |     mu : array of float, shape : n_class x n_dim
 34 |         The mean of each class.
 35 | 
 36 |     std :  array of float, shape : n_class
 37 |         The standard deviation of each Gaussian distribution.
 38 | 
 39 |     n_sample : int
 40 |         Number of point per class.
 41 |     """
 42 |     n_class, n_var = mu.shape
 43 | 
 44 |     X = np.zeros((n_sample * n_class, n_var))
 45 |     y = np.zeros(n_sample * n_class, dtype=int)
 46 | 
 47 |     start = 0
 48 |     for i, s, m in zip(range(n_class), std, mu):
 49 |         end = start + n_sample
 50 |         X[start:end, :] = s * np.random.randn(n_sample, n_var) + m
 51 |         y[start:end] = i
 52 |         start = end
 53 | 
 54 |     return X, y
 55 | 
 56 | 
 57 | def load_custom(x_filename, y_filename, samples_on='rows', **kwargs):
 58 |     """Load a custom dataset.
 59 | 
 60 |     This function loads the data matrix and the label vector returning a
 61 |     unique sklearn-like object dataSetObj.
 62 | 
 63 |     Parameters
 64 |     -----------
 65 |     x_filename : string
 66 |         The data matrix file name.
 67 | 
 68 |     y_filename : string
 69 |         The label vector file name.
 70 | 
 71 |     samples_on : string
 72 |         This can be either in ['row', 'rows'] if the samples lie on the row of
 73 |         the input data matrix, or viceversa in ['col', 'cols'] the other way
 74 |         around.
 75 | 
 76 |     kwargs : dict
 77 |         Arguments of pandas.read_csv function.
 78 | 
 79 |     Returns
 80 |     -----------
 81 |     data : sklearn.datasets.base.Bunch
 82 |         An instance of the sklearn.datasets.base.Bunch class, the meaningful
 83 |         attributes are .data, the data matrix, and .target, the label vector.
 84 |     """
 85 |     if x_filename is None:
 86 |         raise IOError("Filename for X must be specified with mode 'custom'.")
 87 | 
 88 |     if x_filename.endswith('.npy'):  # it an .npy file is provided
 89 |         try:  # labels are not mandatory
 90 |             y = np.load(y_filename)
 91 |         except IOError as e:
 92 |             y = None
 93 |             e.strerror = "No labels file provided"
 94 |             logging.error("I/O error({0}): {1}".format(e.errno, e.strerror))
 95 |         X = np.load(x_filename)
 96 |         if samples_on not in ['row', 'rows']:
 97 |             # data matrix must be n_samples x n_features
 98 |             X = X.T
 99 |         return datasets.base.Bunch(data=X, target=y,
100 |                                    index=np.arange(X.shape[0]))
101 | 
102 |     elif x_filename.endswith('.csv') or x_filename.endswith('.txt'):
103 |         y = None
104 |         kwargs.setdefault('header', 0)  # header on first row
105 |         kwargs.setdefault('index_col', 0)  # indexes on first
106 |         try:
107 |             dfx = pd.read_csv(x_filename, **kwargs)
108 |             if samples_on not in ['row', 'rows']:
109 |                 # data matrix must be n_samples x n_features
110 |                 dfx = dfx.transpose()
111 |             if y_filename is not None:
112 |                 # Before loading labels, remove parameters that were likely
113 |                 # specified for data only.
114 |                 kwargs.pop('usecols', None)
115 |                 y = pd.read_csv(y_filename, **kwargs).as_matrix().ravel()
116 | 
117 |         except IOError as e:
118 |             e.strerror = "Can't open {} or {}".format(x_filename, y_filename)
119 |             logging.error("I/O error({0}): {1}".format(e.errno, e.strerror))
120 |             sys.exit(-1)
121 | 
122 |         return datasets.base.Bunch(data=dfx.as_matrix(), feature_names=dfx.columns.tolist(),
123 |                                    target=y, index=dfx.index.tolist())
124 | 
125 | 
126 | def load(opt='custom', x_filename=None, y_filename=None, n_samples=0,
127 |          samples_on='rows', **kwargs):
128 |     """Load a specified dataset.
129 | 
130 |     This function can be used either to load one of the standard scikit-learn
131 |     datasets or a different dataset saved as X.npy Y.npy in the working
132 |     directory.
133 | 
134 |     Parameters
135 |     -----------
136 |     opt : {'iris', 'digits', 'diabetes', 'boston', 'circles', 'moons',
137 |           'custom', 'GSEXXXXX'}, default: 'custom'
138 |         Name of a predefined dataset to be loaded. 'iris', 'digits', 'diabetes'
139 |         'boston', 'circles' and 'moons' refer to the correspondent
140 |         `scikit-learn` datasets. 'custom' can be used to load a custom dataset
141 |         which name is specified in `x_filename` and `y_filename` (optional).
142 | 
143 |     x_filename : string, default : None
144 |         The data matrix file name.
145 | 
146 |     y_filename : string, default : None
147 |         The label vector file name.
148 | 
149 |     n_samples : int
150 |         The number of samples to be loaded. This comes handy when dealing with
151 |         large datasets. When n_samples is less than the actual size of the
152 |         dataset this function performs a random subsampling that is stratified
153 |         w.r.t. the labels (if provided).
154 | 
155 |     samples_on : string
156 |         This can be either in ['row', 'rows'] if the samples lie on the row of
157 |         the input data matrix, or viceversa in ['col', 'cols'] the other way
158 |         around.
159 | 
160 |     data_sep : string
161 |         The data separator. For instance comma, tab, blank space, etc.
162 | 
163 |     Returns
164 |     -----------
165 |     X : array of float, shape : n_samples x n_features
166 |         The input data matrix.
167 | 
168 |     y : array of float, shape : n_samples
169 |         The label vector; np.nan if missing.
170 | 
171 |     feature_names : array of integers (or strings), shape : n_features
172 |         The feature names; a range of number if missing.
173 | 
174 |     index : list of integers (or strings)
175 |         This is the samples identifier, if provided as first column (or row) of
176 |         of the input file. Otherwise it is just an incremental range of size
177 |         n_samples.
178 |     """
179 |     data = None
180 |     try:
181 |         if opt.lower() == 'iris':
182 |             data = datasets.load_iris()
183 |         elif opt.lower() == 'digits':
184 |             data = datasets.load_digits()
185 |         elif opt.lower() == 'diabetes':
186 |             data = datasets.load_diabetes()
187 |             b = Binarizer(threshold=np.mean(data.target))
188 |             data.target = b.fit_transform(data.data)
189 |         elif opt.lower() == 'boston':
190 |             data = datasets.load_boston()
191 |             b = Binarizer(threshold=np.mean(data.target))
192 |             data.target = b.fit_transform(data.data)
193 |         elif opt.lower() == 'gauss':
194 |             means = np.array([[-1, 1, 1, 1], [0, -1, 0, 0], [1, 1, -1, -1]])
195 |             sigmas = np.array([0.33, 0.33, 0.33])
196 |             if n_samples <= 1:
197 |                 n_samples = 333
198 |             xx, yy = generate_gauss(mu=means, std=sigmas, n_sample=n_samples)
199 |             data = datasets.base.Bunch(data=xx, target=yy)
200 |         elif opt.lower() == 'circles':
201 |             if n_samples == 0:
202 |                 n_samples = 400
203 |             xx, yy = datasets.make_circles(n_samples=n_samples, factor=.3,
204 |                                            noise=.05)
205 |             data = datasets.base.Bunch(data=xx, target=yy)
206 |         elif opt.lower() == 'moons':
207 |             if n_samples == 0:
208 |                 n_samples = 400
209 |             xx, yy = datasets.make_moons(n_samples=n_samples, noise=.01)
210 |             data = datasets.base.Bunch(data=xx, target=yy)
211 |         elif opt.lower() == 'custom':
212 |             data = load_custom(x_filename, y_filename, samples_on, **kwargs)
213 |         elif opt.lower().startswith('gse'):
214 |             raise Exception("Use ade_GEO2csv.py to convert GEO DataSets"
215 |                             "into csv files.")
216 |     except IOError as e:
217 |         print("I/O error({0}): {1}".format(e.errno, e.strerror))
218 | 
219 |     X, y = data.data, data.target
220 |     if n_samples > 0 and X.shape[0] > n_samples:
221 |         if y is not None:
222 |             try:  # Legacy for sklearn
223 |                 sss = StratifiedShuffleSplit(y, test_size=n_samples, n_iter=1)
224 |                 # idx = np.random.permutation(X.shape[0])[:n_samples]
225 |             except TypeError:
226 |                 sss = StratifiedShuffleSplit(test_size=n_samples) \
227 |                     .split(X, y)
228 |             _, idx = list(sss)[0]
229 |         else:
230 |             idx = np.arange(X.shape[0])
231 |             np.random.shuffle(idx)
232 |             idx = idx[:n_samples]
233 | 
234 |         X, y = X[idx, :], y[idx]
235 |     else:
236 |         # The length of index must be consistent with the number of samples
237 |         idx = np.arange(X.shape[0])
238 | 
239 |     feat_names = data.feature_names if hasattr(data, 'feature_names') \
240 |         else np.arange(X.shape[1])
241 |     index = np.array(data.index)[idx] if hasattr(data, 'index') \
242 |         else np.arange(X.shape[0])
243 | 
244 |     return X, y, feat_names, index
245 | 


--------------------------------------------------------------------------------
/adenine/utils/extra.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | ######################################################################
  5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
  6 | #
  7 | # FreeBSD License
  8 | ######################################################################
  9 | 
 10 | import os
 11 | import time
 12 | import matplotlib; matplotlib.use('Agg')
 13 | import seaborn as sns
 14 | 
 15 | from datetime import datetime
 16 | from itertools import product
 17 | 
 18 | 
 19 | class Palette():
 20 |     """Wrapper for seaborn palette."""
 21 | 
 22 |     def __init__(self, name='Set1', n_colors=6):
 23 |         self.name = name
 24 |         self.palette = sns.color_palette(name, n_colors)
 25 | 
 26 |     def get(self, i=0):
 27 |         return self.palette[i]
 28 | 
 29 |     def next(self):
 30 |         self.palette.append(self.palette.pop(0))
 31 |         return self.palette[-1]
 32 | 
 33 |     def reset(self, n_colors=6):
 34 |         self.palette = sns.color_palette(self.name, n_colors)
 35 | 
 36 | 
 37 | # ensure_list = lambda x: x if type(x) == list else [x]
 38 | def ensure_list(x):
 39 |     return x if type(x) == list else [x]
 40 | 
 41 | 
 42 | def values_iterator(dictionary):
 43 |     """Add support for python2 or 3 dictionary iterators."""
 44 |     try:
 45 |         v = dictionary.itervalues()  # python 2
 46 |     except:
 47 |         v = dictionary.values()  # python 3
 48 |     return v
 49 | 
 50 | 
 51 | def items_iterator(dictionary):
 52 |     """Add support for python2 or 3 dictionary iterators."""
 53 |     try:
 54 |         gen = dictionary.iteritems()  # python 2
 55 |     except:
 56 |         gen = dictionary.items()  # python 3
 57 |     return gen
 58 | 
 59 | 
 60 | def modified_cartesian(*args, **kwargs):
 61 |     """Modified Cartesian product.
 62 | 
 63 |     This takes two (or more) lists and returns their Cartesian product.
 64 |     If one of two list is empty this function returns the non-empty one.
 65 | 
 66 |     Parameters
 67 |     -----------
 68 |     *args : lists, length : two or more
 69 |         The group of input lists.
 70 | 
 71 |     Returns
 72 |     -----------
 73 |     cp : list
 74 |         The Cartesian Product of the two (or more) nonempty input lists.
 75 |     """
 76 |     # Get the non-empty input lists
 77 |     if kwargs.get('pipes_mode', False):
 78 |         nonempty = [ensure_list(arg) for arg in args if len(ensure_list(arg)) > 0]
 79 |     else:
 80 |         nonempty = [ensure_list(arg) if len(ensure_list(arg)) > 0 else [None] for arg in args]
 81 | 
 82 |     # Cartesian product
 83 |     return [list(c) for c in product(*nonempty)]
 84 | 
 85 | 
 86 | def make_time_flag():
 87 |     """Generate a time flag.
 88 | 
 89 |     This function simply generates a time flag using the current time.
 90 | 
 91 |     Returns
 92 |     -----------
 93 |     timeFlag : string
 94 |         A unique time flag.
 95 |     """
 96 |     y = str(time.localtime().tm_year)
 97 |     mo = str(time.localtime().tm_mon)
 98 |     d = str(time.localtime().tm_mday)
 99 |     h = str(time.localtime().tm_hour)
100 |     mi = str(time.localtime().tm_min)
101 |     s = str(time.localtime().tm_sec)
102 |     return h + ':' + mi + ':' + s + '_' + d + '-' + mo + '-' + y
103 | 
104 | 
105 | def sec_to_time(seconds):
106 |     """Transform seconds into a formatted time string.
107 | 
108 |     Parameters
109 |     -----------
110 |     seconds : int
111 |         Seconds to be transformed.
112 | 
113 |     Returns
114 |     -----------
115 |     time : string
116 |         A well formatted time string.
117 |     """
118 |     m, s = divmod(seconds, 60)
119 |     h, m = divmod(m, 60)
120 |     return "%02d:%02d:%02d" % (h, m, s)
121 | 
122 | 
123 | def get_time():
124 |     """Get time of now, in string."""
125 |     return datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H.%M.%S')
126 | 
127 | 
128 | def title_from_filename(root, step_sep="$\mapsto$"):
129 |     # Define the plot title. List is smth like ['results', 'ade_debug_', 'Standardize', 'PCA']
130 |     i = [i for i, s in enumerate(root.split(os.sep)) if 'ade_' in s][0]
131 | 
132 |     # lambda function below does: ('a_b_c') -> 'c b a'
133 |     return step_sep.join(map(lambda x: ' '.join(x.split('_')[::-1]), root.split(os.sep)[i+1:]))
134 | 
135 | 
136 | def ensure_symmetry(X):
137 |     """Ensure matrix symmetry.
138 | 
139 |     Parameters
140 |     -----------
141 |     X : numpy.ndarray
142 |         Input matrix of precomputed pairwise distances.
143 | 
144 |     Returns
145 |     -----------
146 |     new_X : numpy.ndarray
147 |         Symmetric distance matrix. Values are averaged.
148 |     """
149 |     if not (X.T == X).all():
150 |         return (X.T + X) / 2.
151 |     else:
152 |         return X
153 | 
154 | 
155 | def timed(function):
156 |     """Decorator that measures wall time of the decored function."""
157 |     def timed_function(*args, **kwargs):
158 |         t0 = time.time()
159 |         result = function(*args, **kwargs)
160 |         print("\nAdenine {} - Elapsed time : {} s\n"
161 |               .format(function.__name__, sec_to_time(time.time() - t0)))
162 |         return result
163 |     return timed_function
164 | 
165 | 
166 | def set_module_defaults(module, dictionary):
167 |     """Set default variables of a module, given a dictionary.
168 | 
169 |     Used after the loading of the configuration file to set some defaults.
170 |     """
171 |     for k, v in items_iterator(dictionary):
172 |         try:
173 |             getattr(module, k)
174 |         except AttributeError:
175 |             setattr(module, k, v)
176 | 


--------------------------------------------------------------------------------
/adenine/utils/scores.py:
--------------------------------------------------------------------------------
  1 | """Validation utils for clustering algorithms.
  2 | 
  3 | Notes
  4 | -----
  5 | Precision, recall and F score
  6 |     In multiclass classification / clustering, a confusion matrix can be
  7 |     obtained. To validate the result, one can use precision, recall and
  8 |     f score. These are obtained using TP, FP, FN, TN.
  9 |     In particular, for each class (true label) x, in a confusion matrix cm:
 10 |     - true positive: diagonal position, cm(x, x).
 11 |     - false positive: sum of column x (without main diagonal),
 12 |         sum(cm(:, x)) - cm(x, x).
 13 |     - false negative: sum of row x (without main diagonal),
 14 |         sum(cm(x, :), 2) - cm(x, x).
 15 |     - true negative: sum of all the matrix without tp, fp, fn.
 16 | 
 17 |     Averaging over all classes (with or without weighting) gives values for the
 18 |     entire model.
 19 | 
 20 | Author: Federico Tomasi
 21 | Copyright (c) 2016, Federico Tomasi.
 22 | Licensed under the FreeBSD license (see LICENSE.txt).
 23 | """
 24 | import matplotlib; matplotlib.use('Agg')
 25 | import numpy as np
 26 | import pandas as pd
 27 | import seaborn as sns
 28 | 
 29 | 
 30 | def get_clones_real_estimated(filename):
 31 |     """Get true and estimated labels from a partis-generated dataset."""
 32 |     df = pd.read_csv(filename, dialect='excel-tab', header=0,
 33 |                      usecols=('SEQUENCE_ID', 'CLONE'))
 34 |     df['CLONE_ID'] = df['SEQUENCE_ID'].str.split('_').apply(lambda x: x[3])
 35 | 
 36 |     clone_ids = np.array(df['CLONE_ID'], dtype=str)
 37 |     found_ids = np.array(df['CLONE'], dtype=str)
 38 |     return clone_ids, found_ids
 39 | 
 40 | 
 41 | def order_cm(cm):
 42 |     """Reorder a multiclass confusion matrix."""
 43 |     # reorder rows
 44 |     idx_rows = np.max(cm, axis=1).argsort()[::-1]
 45 |     b = cm[idx_rows, :]
 46 | 
 47 |     # reorder cols
 48 |     max_idxs = np.ones(b.shape[1], dtype=bool)
 49 |     final_idxs = []
 50 |     for i, row in enumerate(b.copy()):
 51 |         if i == b.shape[0] or not max_idxs.any():
 52 |             break
 53 |         row[~max_idxs] = np.min(cm) - 1
 54 |         max_idx = np.argmax(row)
 55 |         final_idxs.append(max_idx)
 56 |         max_idxs[max_idx] = False
 57 | 
 58 |     idx_cols = np.append(np.array(final_idxs, dtype=int),
 59 |                          np.argwhere(max_idxs).T[0])  # residuals
 60 | 
 61 |     # needs also this one
 62 |     b = b[:, idx_cols]
 63 |     bb = b.copy()
 64 |     max_idxs = np.ones(b.shape[0], dtype=bool)
 65 |     final_idxs = []
 66 |     for i in range(b.shape[1]):
 67 |         # for each column
 68 |         if i == b.shape[1] or not max_idxs.any():
 69 |             break
 70 |         col = bb[:, i]
 71 |         col[~max_idxs] = -1
 72 |         max_idx = np.argmax(col)
 73 |         final_idxs.append(max_idx)
 74 |         max_idxs[max_idx] = False
 75 | 
 76 |     idx_rows2 = np.append(np.array(final_idxs, dtype=int),
 77 |                           np.argwhere(max_idxs).T[0])  # residuals
 78 | 
 79 |     idx = np.argsort(idx_rows)
 80 |     return b[idx_rows2, :], idx_rows2[idx], idx_cols
 81 | 
 82 | 
 83 | def confusion_matrix(true_labels, estimated_labels, ordered=True):
 84 |     """Return a confusion matrix in a multiclass / multilabel problem."""
 85 |     true_labels = np.array(true_labels, dtype=str)
 86 |     estimated_labels = np.array(estimated_labels, dtype=str)
 87 |     if true_labels.shape[0] != estimated_labels.shape[0]:
 88 |         raise ValueError("Inputs must have the same dimensions.")
 89 |     rows = np.unique(true_labels)
 90 |     cols = np.unique(estimated_labels)
 91 | 
 92 |     # padding only on columns
 93 |     cm = np.zeros((rows.shape[0], max(cols.shape[0], rows.shape[0])))
 94 |     from collections import Counter
 95 |     for i, row in enumerate(rows):
 96 |         idx_rows = true_labels == row
 97 |         counter = Counter(estimated_labels[idx_rows])
 98 |         for g in counter:
 99 |             idx_col = np.where(cols == g)[0][0]
100 |             cm[i, idx_col] += counter[g]
101 | 
102 |     cols = np.append(cols, ['pad'] * (cm.shape[1] - cols.shape[0]))
103 |     if ordered:
104 |         cm, rr, cc = order_cm(cm)
105 |         rows, cols = rows[rr], cols[cc]
106 |     return cm, rows, cols
107 | 
108 | 
109 | def precision_recall_fscore(a, method='micro', beta=1.):
110 |     """Return a precision / recall value for multiclass confuison matrix cm.
111 | 
112 |     See
113 |     http://stats.stackexchange.com/questions/44261/how-to-determine-the-quality-of-a-multiclass-classifier
114 |     """
115 |     def _single_measures(a, i):
116 |         tp = a[i, i]
117 |         fp = np.sum(a[:, i]) - tp
118 |         fn = np.sum(a[i, :]) - tp
119 |         tn = a.sum() - tp - fp - fn
120 |         return tp, fp, fn, tn
121 | 
122 |     singles = zip(*[_single_measures(a, i) for i in range(min(a.shape))])
123 |     tps, fps, fns, tns = map(lambda x: np.array(list(x), dtype=float), singles)
124 | 
125 |     if method == 'micro':
126 |         precision = float(tps.sum()) / (tps + fps).sum()
127 |         recall = float(tps.sum()) / (tps + fns).sum()
128 |     elif method == 'macro':
129 |         sum_ = tps + fps
130 |         idx = np.where(sum_)
131 |         precision = (tps[idx] / sum_[idx]).mean()
132 | 
133 |         sum_ = tps + fns
134 |         idx = np.where(sum_)
135 |         recall = (tps[idx] / sum_[idx]).mean()
136 |     fscore = (1 + beta * beta) * precision * recall / \
137 |         (beta * beta * precision + recall)
138 |     return precision, recall, fscore
139 | 
140 | 
141 | def show_heatmap(filename):
142 |     """Show confusion matrix given of a partis-generated tab-delimited db."""
143 |     true_labels, estimated_labels = get_clones_real_estimated(filename)
144 |     cm, rows, cols = confusion_matrix(true_labels, estimated_labels)
145 |     df = pd.DataFrame(cm, index=rows, columns=cols)
146 |     sns.heatmap(df)
147 |     sns.plt.show()
148 | 


--------------------------------------------------------------------------------
/adenine/utils/templates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | def new_fun(arg1 = 'Default', arg2 = 'Default'):
11 |     """Short explanation.
12 | 
13 |     This is the very long explanation
14 | 
15 |     Parameters
16 |     -----------
17 |     arg1 : type, default : 'Default'
18 |         What is arg1.
19 | 
20 |     arg2 : {'Default', 'Different', 'Another'}
21 |         What is arg2.
22 | 
23 |     Returns
24 |     -----------
25 |     out : type
26 |         What is out.
27 |     """
28 | 


--------------------------------------------------------------------------------
/doc/GiHubProjectPage.txt:
--------------------------------------------------------------------------------
 1 | ### Welcome to ADENINE.
 2 | ADENINE is a machine learning and data mining framework that helps you answering the tedious question: are my data relevant for the problem I'm dealing with?
 3 | 
 4 | ### Implementation
 5 | With ADENINE you can build different unsupervised data analysis pipelines made of the following steps:
 6 | 
 7 | 1. missing values imputing
 8 | 2. preprocessing
 9 | 3. dimensionality reduction
10 | 4. clustering
11 | 
12 | a list of the most common state-of-the-art methods is available for each step.
13 | 
14 | ### Dependencies
15 | ADENINE is developed using Python 2.7 and inherits its main functionalities from:
16 | * numpy
17 | * scipy
18 | * scikit-learn
19 | * matplotlib
20 | * seaborn
21 | 
22 | ### Authors and Contributors
23 | Current developers: Samuele Fiorini (@samuelefiorini) and Federico Tomasi (@fdtomasi).
24 | 
25 | ### Support or Contact
26 | Having trouble with ADENINE? Check out our [documentation](http://www.slipguru.unige.it/Software/adenine/) or contact us:
27 | * samuele [dot] fiorini [at] dibris [dot] unige [dot] it
28 | * federico [dot] tomasi [at] dibris [dot] unige [dot] it
29 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/adenine.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/adenine.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/adenine"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/adenine"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/doc/devPlan/plan.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/devPlan/plan.pdf


--------------------------------------------------------------------------------
/doc/devPlan/plan.tex:
--------------------------------------------------------------------------------
  1 | %----------------------------------------------------------------------------------------
  2 | %	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
  3 | %----------------------------------------------------------------------------------------
  4 | 
  5 | \documentclass[paper=a4, fontsize=10pt]{scrartcl} % A4 paper and 10pt font size
  6 | 
  7 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs
  8 | \usepackage[english]{babel} % English language/hyphenation
  9 | \usepackage{amsmath,amsfonts,amsthm} % Math packages
 10 | 
 11 | \usepackage[margin=1in]{geometry}
 12 | 
 13 | \usepackage{xspace} % space after new commands
 14 | \usepackage{hyperref}
 15 | \usepackage{enumitem}
 16 | 
 17 | 
 18 | \usepackage{sectsty} % Allows customizing section commands
 19 | \allsectionsfont{\centering \normalfont\scshape} % Make all sections centered, the default font and small caps
 20 | 
 21 | \usepackage{fancyhdr} % Custom headers and footers
 22 | \pagestyle{fancyplain} % Makes all pages in the document conform to the custom headers and footers
 23 | \fancyhead{} % No page header - if you want one, create it in the same way as the footers below
 24 | \fancyfoot[L]{} % Empty left footer
 25 | \fancyfoot[C]{} % Empty center footer
 26 | \fancyfoot[R]{\thepage} % Page numbering for right footer
 27 | \renewcommand{\headrulewidth}{0pt} % Remove header underlines
 28 | \renewcommand{\footrulewidth}{0pt} % Remove footer underlines
 29 | \setlength{\headheight}{11pt} % Customize the height of the header
 30 | 
 31 | \numberwithin{equation}{section} % Number equations within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 32 | \numberwithin{figure}{section} % Number figures within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 33 | \numberwithin{table}{section} % Number tables within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 34 | 
 35 | \setlength\parindent{0pt} % Removes all indentation from paragraphs - comment this line for an assignment with lots of text
 36 | 
 37 | %----------------------------------------------------------------------------------------
 38 | %	TITLE SECTION
 39 | %----------------------------------------------------------------------------------------
 40 | 
 41 | \newcommand{\horrule}[1]{\rule{\linewidth}{#1}} % Create horizontal rule command with 1 argument of height
 42 | \newcommand{\adenine}{{\tt adenine}\xspace}
 43 | 
 44 | \title{	
 45 | \normalfont \normalsize
 46 | \huge{\tt ADENINE}: A Data ExploratioN pipelINE \\
 47 | \horrule{2pt} \\[0.5cm] % Thick bottom horizontal rule
 48 | development plan \\ % The assignment title
 49 | }
 50 | 
 51 | \author{Samuele Fiorini} % Your name
 52 | 
 53 | \date{\normalsize\today} % Today's date or a custom date
 54 | 
 55 | \begin{document}
 56 | 
 57 | \maketitle % Print the title
 58 | 
 59 | %----------------------------------------------------------------------------------------
 60 | %	PROBLEM 1
 61 | %----------------------------------------------------------------------------------------
 62 | 
 63 | \section{Introduction and Motivation}
 64 | 
 65 | A question that arises at the beginning of almost every new data analysis is
 66 | the following:  {\sl are my data relevant for the problem I'm dealing with}? \\
 67 | 
 68 | The final goal of this project (named \adenine) is to help its user to have a glimpse of the answer of
 69 | this tedious question. \\
 70 | 
 71 | In order to reach this goal, \adenine will take advantage of machine learning and
 72 | data mining techniques. The final pipeline will essentially consist of three steps:
 73 | 
 74 | \begin{enumerate}
 75 | 	
 76 |   	\item {\bf Preprocessing}: have you ever wondered what would have 
 77 | 	changed if only  your data have been preprocessed in a different way? Or if
 78 | 	data preprocessing is a good idea   at all? \adenine will offer several
 79 | 	preprocessing procedures, such as: data centering, Min-Max scaling,
 80 | 	standardization or normalization and allows you to compare the results of the
 81 | 	analysis conducted with different starting point.
 82 | 	
 83 |   	\item {\bf Dimensionality Reduction} (DR): in the context of data
 84 | 	exploration, this  phase becomes particularly helpful for high dimensional data (e.g.
 85 | 	-omics scenario).   This step, generically named DR, may actually include some
 86 | 	manifold learning   (such as Isomap, Multidimensional Scaling, etc), supervised
 87 | 	(Linear   Discriminant Analysis) and unsupervised (Principal Component Analysis, 
 88 | 	kernel PCA)  techniques.
 89 | 
 90 | 	\item {\bf Clustering}: this section aims at grouping data into clusters without taking
 91 | 	into account the class labels. Several techniques such as K-Means, Spectral or Hierarchical
 92 | 	clustering will work on both original and dimensionality reduced data.
 93 | 
 94 | \end{enumerate}
 95 | 
 96 | The final output of \adenine will be an as compact as possible visual and textual representation of 
 97 | the results obtained from the pipelines made with each possible combination of the algorithms
 98 | implemented at each step. As an example, referring to a pipeline built as:
 99 | 
100 | \begin{center}
101 | {\sl Data normalization $\rightarrow$ PCA $\rightarrow$ K-Means}
102 | \end{center}
103 | 
104 | the output would be something like:
105 | 
106 | \begin{itemize}
107 | 
108 | 	\item an output file containing the norm of the original variables (which has
109 | 	been  used to coerce all the features in $[0,1]$),
110 | 
111 |   	\item a 2-D or 3-D scatter plot of the data projected along the principal
112 | 	components   and the percentage of explained variance associated   with each
113 | 	one of them,
114 | 
115 | 	\item a pictorial representation of the data clustering results
116 | 	obtained with the optimum number of cluster (learned from the data).
117 | 
118 | \end{itemize}
119 | 
120 | \subsection{Material for PhD progress}
121 | 
122 | The study behind the implementation of \adenine  will be useful in terms of
123 | four PhD courses of my first-year work plan:
124 | 
125 | \begin{enumerate}
126 | 
127 | 	\item {\sl A Machine Learning Crash Course} [DIBRIS] (Odone, Rosasco): \adenine will cover
128 | 	a fair number of (mainly unsupervised) machine learning techniques. Hence, this course
129 | 	has been fundamental to acquire the statistical learning background needed to become aware of
130 | 	the underlying mechanisms of the algorithms.
131 | 
132 | 	\item {\sl Programming Concepts in Python} [DIBRIS] (Tacchella):  I plan to implement \adenine in
133 | 	{\tt Python}. Hence,  most of the implementation choices will be made on the basis of the material
134 | 	covered in the course.
135 | 
136 |   	\item {\sl Programming Complex Heterogeneous Parallel Systems} [IMATI]
137 | 	(Clematis,   D'Agostino, Danovaro, Galizia) and {the \sl 24th Summer School on
138 | 	Parallel Computing} [CINECA] (Erbacci): \adenine will present several {\sl embarrassingly
139 | 	parallel workload} as well as several {\sl isolate GPU accelerable} computations. 
140 | 	The former PhD course and the latter school will allow me to develop the parallel computing
141 | 	attitude I need to implement \adenine in an as optimized as possible way.
142 | 
143 | \end{enumerate}
144 | 
145 | 
146 | \section{Implemented Algorithms}
147 | 
148 | The implementation of nearly all the algorithms of \adenine will refer to the
149 | \href{http://scikit-learn.org/stable/index.html}{\tt scikit-learn} python
150 | library. See the following \href{http://scikit-learn.org/stable/unsupervised_learning.html}{\tt link} for a
151 | comprehensive list,
152 | 
153 | \subsection{Preprocessing}
154 | 
155 | At this step the data will be fed to the following preprocessing procedures:
156 | \begin{enumerate}[start = 0]
157 | 	\item no preprocessing: the analysis will be conducted on raw data;
158 | 
159 | 	\item na\"ive recentering: remove the mean;
160 | 
161 | 	\item standardization: remove the mean and scale each feature by 
162 | 	their standard deviations, this will make the data normally distributed;
163 | 
164 | 	\item normalization: scale all the samples to have unit norm
165 | 
166 | \end{enumerate}
167 | 
168 | In its first version \adenine will allow the user to impute the missing values by means of the
169 | median, the mean or the most frequent value (future works are in Section~\ref{sec:future}).
170 | See the {\tt sklearn} \href{http://scikit-learn.org/stable/modules/preprocessing.html}{docs}
171 | on data preprocessing for further details.
172 | 
173 | \subsection{Dimensionality reduction}
174 | 
175 | The following is a work-in-progress list of the techniques I plan to
176 | make available in \adenine. The list includes algorithms that come
177 | from very different standpoint, but that have a common outcome:
178 | the estimation of a low-dimensional embedding (manifold) in which the data can
179 | be projected for visualization or further purposes.
180 | 
181 | \begin{enumerate}[label=(\alph*)]
182 | 
183 | 	\item Principal Component Analysis (PCA), in its Incremental or Randomized variants
184 | 	in case of big data;
185 | 
186 | 	\item Kernel PCA, which may come along different kernels (Gaussian,
187 | 	polynomial, and so on);
188 | 	
189 | 	\item Isomap;
190 | 
191 | 	\item Locally Linear Embedding (LLE), in its modified (MLLE) or Hessian 
192 | 	(HLLE) regularized version;
193 | 
194 | 	\item Spectral Embedding (SE);
195 | 
196 | 	\item Local Tangent Space Alignment (LTSA);
197 | 
198 | 	\item Multidimensional Scaling (MDS), in its metric and non-metric version;
199 | 
200 | 	\item t-distributed Stochastic Neighbor Embedding (t-SNE).
201 | 
202 | \end{enumerate}
203 | 
204 | \subsection{Clustering}
205 | 
206 | On the same line, this section presents a list of the clustering techniques I 
207 | plan to include in \adenine.
208 | 
209 | \begin{enumerate}
210 | 
211 | 	\item [($\alpha$)] K-Means, in its Mini-Batch variant for big data;
212 | 
213 | 	\item [($\beta$)] Affinity Propagation;
214 | 
215 | 	\item [($\gamma$)] Mean Shift;
216 | 
217 | 	\item [($\delta$)] Spectral Clustering;
218 | 
219 | 	\item [($\epsilon$)] Hierarchical Agglomerative Clustering, exploring
220 | 	different linkage type, i.e., Ward, complete, average as well as different 
221 | 	metrics, e.g. Euclidean, Manhattan, Minkowski, etc.;
222 | 
223 | 	\item [($\zeta$)] DBSCAN;
224 | 
225 | 	\item [($\eta$)] Birch.
226 | 
227 | \end{enumerate}
228 | 
229 | Several indexes to analyze the clustering performances will be included, some
230 | of them may require ground truth labels (such as Adjusted Rand Index (ARI), the
231 | Adjusted Mutual Information (AMI), the homogeneity, completeness or V measure
232 | scores), while others may evaluate the cluster compactness or the separation
233 | between clusters (such as the silhouette score).
234 | 
235 | \section{Future Works} \label{sec:future}
236 | 
237 | Indeed \adenine is not meant to be an all-inclusive tool. This section, that
238 | will always be a work-in-progress, aims at mentioning all the features that
239 | are not going to be implemented in the first version of \adenine, but that may
240 | be implemented later on.
241 | 
242 | \begin{itemize}
243 | 
244 | 	\item How can we handle missing values? \adenine may have some statistically robust
245 | 	imputation tools (such as low-rank matrix completion, or collaborative filtering) in
246 | 	future versions;
247 | 
248 | 	\item Kernel K-Means;
249 | 
250 | 	\item Dictionary Learning;
251 | 
252 | 	\item Factor Analysis;
253 | 
254 | 	\item Non-negative Matrix Factorization;
255 | 
256 | 	\item Outliers Detection.
257 | 
258 | \end{itemize}
259 | %----------------------------------------------------------------------------------------
260 | 
261 | \end{document}
262 | 


--------------------------------------------------------------------------------
/doc/source/adenine_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/adenine_logo.pdf


--------------------------------------------------------------------------------
/doc/source/adenine_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/adenine_logo.png


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # adenine documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri May 22 12:31:54 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | sys.path.insert(0, os.path.abspath('.'))
 22 | sys.path.insert(0, os.path.abspath('sphinxext'))
 23 | 
 24 | from adenine import __version__ as VERSION
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'sphinx.ext.autodoc',
 36 |     'sphinx.ext.doctest',
 37 |     'sphinx.ext.todo',
 38 |     'sphinx.ext.coverage',
 39 |     'sphinx.ext.mathjax',
 40 |     'sphinx.ext.viewcode',
 41 |     'sphinx.ext.autosummary',
 42 |     'sphinx.ext.intersphinx',
 43 |     'numpydoc',
 44 |     'sphinxcontrib.programoutput',
 45 | ]
 46 | 
 47 | # Extension configurations
 48 | autoclass_content = 'init'
 49 | autodoc_member_order = 'bysource'
 50 | numpydoc_show_class_members = False
 51 | 
 52 | # Add any paths that contain templates here, relative to this directory.
 53 | templates_path = ['_templates']
 54 | 
 55 | # The suffix of source filenames.
 56 | source_suffix = '.rst'
 57 | 
 58 | # The encoding of source files.
 59 | #source_encoding = 'utf-8-sig'
 60 | 
 61 | # The master toctree document.
 62 | master_doc = 'index'
 63 | 
 64 | # General information about the project.
 65 | project = u'ADENINE'
 66 | copyright = u'2016, Samuele Fiorini - Federico Tomasi - Annalisa Barla'
 67 | #modindex_common_prefix = ['adenine.']
 68 | 
 69 | # The version info for the project you're documenting, acts as replacement for
 70 | # |version| and |release|, also used in various other places throughout the
 71 | # built documents.
 72 | #
 73 | # The short X.Y version.
 74 | version = VERSION
 75 | # The full version, including alpha/beta/rc tags.
 76 | release = version
 77 | 
 78 | # The language for content autogenerated by Sphinx. Refer to documentation
 79 | # for a list of supported languages.
 80 | #language = None
 81 | 
 82 | # There are two options for replacing |today|: either, you set today to some
 83 | # non-false value, then it is used:
 84 | #today = ''
 85 | # Else, today_fmt is used as the format for a strftime call.
 86 | #today_fmt = '%B %d, %Y'
 87 | 
 88 | # List of patterns, relative to source directory, that match files and
 89 | # directories to ignore when looking for source files.
 90 | exclude_patterns = []
 91 | 
 92 | # The reST default role (used for this markup: `text`) to use for all
 93 | # documents.
 94 | #default_role = None
 95 | 
 96 | # If true, '()' will be appended to :func: etc. cross-reference text.
 97 | #add_function_parentheses = True
 98 | 
 99 | # If true, the current module name will be prepended to all description
100 | # unit titles (such as .. function::).
101 | #add_module_names = True
102 | 
103 | # If true, sectionauthor and moduleauthor directives will be shown in the
104 | # output. They are ignored by default.
105 | #show_authors = False
106 | 
107 | # The name of the Pygments (syntax highlighting) style to use.
108 | pygments_style = 'sphinx'
109 | 
110 | # A list of ignored prefixes for module index sorting.
111 | #modindex_common_prefix = []
112 | 
113 | # If true, keep warnings as "system message" paragraphs in the built documents.
114 | #keep_warnings = False
115 | 
116 | 
117 | # -- Options for HTML output ----------------------------------------------
118 | 
119 | # The theme to use for HTML and HTML Help pages.  See the documentation for
120 | # a list of builtin themes.
121 | # html_theme = 'default'
122 | # html_theme = "nature"
123 | html_theme = 'slipGURUTheme'
124 | 
125 | 
126 | # Theme options are theme-specific and customize the look and feel of a theme
127 | # further.  For a list of options available for each theme, see the
128 | # documentation.
129 | #html_theme_options = {}
130 | 
131 | # Add any paths that contain custom themes here, relative to this directory.
132 | html_theme_path = ['.']
133 | 
134 | # The name for this set of Sphinx documents.  If None, it defaults to
135 | # "<project> v<release> documentation".
136 | #html_title = None
137 | 
138 | # A shorter title for the navigation bar.  Default is the same as html_title.
139 | #html_short_title = None
140 | 
141 | # The name of an image file (relative to this directory) to place at the top
142 | # of the sidebar.
143 | html_logo = 'adenine_logo.png'
144 | 
145 | # The name of an image file (within the static path) to use as favicon of the
146 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
147 | # pixels large.
148 | #html_favicon = None
149 | 
150 | # Add any paths that contain custom static files (such as style sheets) here,
151 | # relative to this directory. They are copied after the builtin static files,
152 | # so a file named "default.css" will overwrite the builtin "default.css".
153 | html_static_path = ['_static']
154 | 
155 | # Add any extra paths that contain custom files (such as robots.txt or
156 | # .htaccess) here, relative to this directory. These files are copied
157 | # directly to the root of the documentation.
158 | #html_extra_path = []
159 | 
160 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
161 | # using the given strftime format.
162 | #html_last_updated_fmt = '%b %d, %Y'
163 | 
164 | # If true, SmartyPants will be used to convert quotes and dashes to
165 | # typographically correct entities.
166 | #html_use_smartypants = True
167 | 
168 | # Custom sidebar templates, maps document names to template names.
169 | #html_sidebars = {}
170 | 
171 | # Additional templates that should be rendered to pages, maps page names to
172 | # template names.
173 | #html_additional_pages = {}
174 | 
175 | # If false, no module index is generated.
176 | #html_domain_indices = True
177 | 
178 | # If false, no index is generated.
179 | #html_use_index = True
180 | 
181 | # If true, the index is split into individual pages for each letter.
182 | #html_split_index = False
183 | 
184 | # If true, links to the reST sources are added to the pages.
185 | #html_show_sourcelink = True
186 | 
187 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
188 | #html_show_sphinx = True
189 | 
190 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
191 | #html_show_copyright = True
192 | 
193 | # If true, an OpenSearch description file will be output, and all pages will
194 | # contain a <link> tag referring to it.  The value of this option must be the
195 | # base URL from which the finished HTML is served.
196 | #html_use_opensearch = ''
197 | 
198 | # This is the file name suffix for HTML files (e.g. ".xhtml").
199 | #html_file_suffix = None
200 | 
201 | # Output file base name for HTML help builder.
202 | htmlhelp_basename = 'adeninedoc'
203 | 
204 | 
205 | # -- Options for LaTeX output ---------------------------------------------
206 | 
207 | latex_elements = {
208 | # The paper size ('letterpaper' or 'a4paper').
209 | #'papersize': 'letterpaper',
210 | 
211 | # The font size ('10pt', '11pt' or '12pt').
212 | #'pointsize': '10pt',
213 | 
214 | # Additional stuff for the LaTeX preamble.
215 | #'preamble': '',
216 | }
217 | 
218 | # Grouping the document tree into LaTeX files. List of tuples
219 | # (source start file, target name, title,
220 | #  author, documentclass [howto, manual, or own class]).
221 | latex_documents = [
222 |   ('index', 'adenine.tex', u'adenine Documentation',
223 |    u'Samuele Fiorini - Federico Tomasi - Annalisa Barla', 'manual'),
224 | ]
225 | 
226 | # The name of an image file (relative to this directory) to place at the top of
227 | # the title page.
228 | latex_logo = 'adenine_logo.png'
229 | 
230 | # For "manual" documents, if this is true, then toplevel headings are parts,
231 | # not chapters.
232 | #latex_use_parts = False
233 | 
234 | # If true, show page references after internal links.
235 | #latex_show_pagerefs = False
236 | 
237 | # If true, show URL addresses after external links.
238 | #latex_show_urls = False
239 | 
240 | # Documents to append as an appendix to all manuals.
241 | #latex_appendices = []
242 | 
243 | # If false, no module index is generated.
244 | #latex_domain_indices = True
245 | 
246 | 
247 | # -- Options for manual page output ---------------------------------------
248 | 
249 | # One entry per manual page. List of tuples
250 | # (source start file, name, description, authors, manual section).
251 | man_pages = [
252 |     ('index', 'adenine', u'adenine Documentation',
253 |      [u'Samuele Fiorini - Federico Tomasi - Annalisa Barla'], 1)
254 | ]
255 | 
256 | # If true, show URL addresses after external links.
257 | #man_show_urls = False
258 | 
259 | 
260 | # -- Options for Texinfo output -------------------------------------------
261 | 
262 | # Grouping the document tree into Texinfo files. List of tuples
263 | # (source start file, target name, title, author,
264 | #  dir menu entry, description, category)
265 | texinfo_documents = [
266 |   ('index', 'adenine', u'adenine Documentation',
267 |    u'Samuele Fiorini - Federico Tomasi - Annalisa Barla', 'adenine', 'One line description of project.',
268 |    'Miscellaneous'),
269 | ]
270 | 
271 | # Documents to append as an appendix to all manuals.
272 | #texinfo_appendices = []
273 | 
274 | # If false, no module index is generated.
275 | #texinfo_domain_indices = True
276 | 
277 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
278 | #texinfo_show_urls = 'footnote'
279 | 
280 | # If true, do not generate a @detailmenu in the "Top" node's menu.
281 | #texinfo_no_detailmenu = False
282 | 


--------------------------------------------------------------------------------
/doc/source/dependencies.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | seaborn
4 | pydot
5 | scikit-learn
6 | 


--------------------------------------------------------------------------------
/doc/source/drawing.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="372.04724"
 13 |    height="262.20471"
 14 |    id="svg3144"
 15 |    version="1.1"
 16 |    inkscape:version="0.48.4 r9939"
 17 |    sodipodi:docname="drawing.svg">
 18 |   <defs
 19 |      id="defs3146" />
 20 |   <sodipodi:namedview
 21 |      id="base"
 22 |      pagecolor="#ffff00"
 23 |      bordercolor="#666666"
 24 |      borderopacity="1.0"
 25 |      inkscape:pageopacity="0"
 26 |      inkscape:pageshadow="2"
 27 |      inkscape:zoom="0.98994949"
 28 |      inkscape:cx="5.8092099"
 29 |      inkscape:cy="244.18657"
 30 |      inkscape:document-units="px"
 31 |      inkscape:current-layer="layer1"
 32 |      showgrid="false"
 33 |      inkscape:window-width="1279"
 34 |      inkscape:window-height="844"
 35 |      inkscape:window-x="290"
 36 |      inkscape:window-y="217"
 37 |      inkscape:window-maximized="0"
 38 |      inkscape:snap-grids="false"
 39 |      inkscape:snap-to-guides="false" />
 40 |   <metadata
 41 |      id="metadata3149">
 42 |     <rdf:RDF>
 43 |       <cc:Work
 44 |          rdf:about="">
 45 |         <dc:format>image/svg+xml</dc:format>
 46 |         <dc:type
 47 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
 48 |         <dc:title></dc:title>
 49 |       </cc:Work>
 50 |     </rdf:RDF>
 51 |   </metadata>
 52 |   <g
 53 |      inkscape:label="Layer 1"
 54 |      inkscape:groupmode="layer"
 55 |      id="layer1"
 56 |      style="display:inline"
 57 |      transform="translate(0,-790.15744)">
 58 |     <path
 59 |        inkscape:connector-curvature="0"
 60 |        d="m 72.358577,833.42082 c 25.0125,-8.2375 52.612503,-12.65 78.337503,-5 18.16249,5.2125 33.59999,17.5375 44.62499,32.6625 6.1625,8.2 11.2625,17.1 16.325,26 -7.5875,3.4 -15.0625,7.025 -22.6375,10.4625 -3.85,-7.0625 -7.65,-14.15 -11.725,-21.075 -4.525,6.125 -9.1,12.225 -14.35,17.7375 -5.95,6.2875 -11.23749,13.3625 -14.18749,21.5875 -3.9375,1.7375 -7.9375,3.3375 -11.9625,4.8875 6.3875,-7.125 13.5625,-13.9125 17.16249,-23 4.26251,-11.175 9.45,-22.0625 16.25,-31.9375 -3.5625,-5.475 -7.7875,-10.5 -12.48749,-15.025 -5.1375,8.7 -11.375,16.65 -17.3875,24.7375 -7.7625,10.4875 -18.0125,18.9625 -24.8125,30.1875 -5.2625,8.0625 -7.1125,17.6875 -11.375,26.2375 -6.250003,1.7875 -12.625003,3.15 -19.050003,4.15 7.2375,-13.0875 20.337493,-21.6125 27.525003,-34.7125 4.5625,-7.625 6.8625,-16.275 10.7375,-24.2125 6.25,-12.4875 12.9375,-24.85 21.6875,-35.8 -5,-2.5875 -10.1875,-4.9 -15.6875,-6.2125 -6.9375,14.5875 -17.075,27.2875 -26.95,39.95 -10.275003,11.9625 -22.512503,23.1 -27.812503,38.4 -2.8125,8.4375 -6.1375,16.8 -11.5625,23.95 -5.2875,-0.25 -10.55,-0.95 -15.724996,-2.0875 7.399996,-14.2375 21.487496,-23.3375 29.374996,-37.225 5.7125,-9.1125 8.15,-19.75 13.2,-29.1875 5.0375,-9.4375 9.875,-19.05 16.174993,-27.725 2.13751,-3.1 4.65001,-5.925 6.51251,-9.2125 -10.8,-0.7625 -21.650003,-0.2 -32.387503,1.0375 -2.5875,0.3 -5.2125,0.85 -7.8125,0.425"
 61 |        style="fill:#c44f34;fill-opacity:1;fill-rule:nonzero;stroke:none"
 62 |        id="path3051"
 63 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
 64 |        inkscape:export-xdpi="90"
 65 |        inkscape:export-ydpi="90" />
 66 |     <path
 67 |        inkscape:connector-curvature="0"
 68 |        d="m 247.57107,871.45832 c 21.1875,-8.3875 43.2,-15.8625 66.15,-17.025 13.8375,-0.65 28.725,1.9125 39.55,11.125 8.3625,7.025 13.2125,17.3625 15.1375,27.9625 -5.625,-7.3375 -11.9875,-14.1125 -19.5125,-19.5125 l -0.375,-0.6 c -2.65,-1.0125 -5.1625,-2.325 -7.7125,-3.5375 -6.2375,-2.6625 -13.0125,-3.8625 -19.775,-3.9375 -4.1,-0.0375 -8.1875,0.25 -12.2375,0.8375 -6.925,1.1 -13.7875,2.675 -20.4625,4.8375 -3.8125,1.3125 -7.625,2.625 -11.425,3.9625 -19.75,7.1875 -38.575,16.5625 -57.6375,25.325 -7.3875,3.525 -14.7625,7.075 -22.2125,10.4625 -33.125,15.15 -67.07499,29.9875 -103.374993,35.375 -19.8125,2.6875 -41.1625,2.1625 -59.149996,-7.5375 -7.8125,-4.25 -14.7125,-10.2875 -19.675,-17.7 10.1375,6.0125 20.775,11.4875 32.4375,13.8125 5.174996,1.1375 10.437496,1.8375 15.724996,2.0875 7.3875,0.2875 14.775,-0.2625 22.0625,-1.3625 6.425,-1 12.8,-2.3625 19.050003,-4.15 11.0875,-3.0625 22.0625,-6.675 32.65,-11.2 4.025,-1.55 8.025,-3.15 11.9625,-4.8875 13.63749,-5.625 26.87499,-12.0875 40.26249,-18.25 7.575,-3.4375 15.05,-7.0625 22.6375,-10.4625 11.8,-5.6 23.825,-10.7 35.925,-15.625"
 69 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
 70 |        id="path3057"
 71 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
 72 |        inkscape:export-xdpi="90"
 73 |        inkscape:export-ydpi="90" />
 74 |     <path
 75 |        inkscape:connector-curvature="0"
 76 |        d="m 308.79607,866.77082 c 4.05,-0.5875 8.1375,-0.875 12.2375,-0.8375 -7.4125,10.5125 -17.8875,18.825 -23.1125,30.85 -6.8625,16.2125 -15.075,31.9875 -25.825,46 7.35,0.425 14.825,0.3625 22.0625,-1.0875 4.4625,-7.45 8.7375,-15.075 14.275,-21.8375 8.05,-11.875 20.4875,-20.7875 25.525,-34.625 1.8125,-5.3125 3.8375,-10.5875 6.85,-15.3625 2.55,1.2125 5.0625,2.525 7.7125,3.5375 l 0.375,0.6 c 0,0.775 -0.0125,2.325 -0.025,3.1 -4.3875,3.7 -8.0875,8.2 -11.8875,12.5 -6.15,7.125 -8.9625,16.2125 -12.625,24.7 -4.2125,8.5125 -8.7375,16.9125 -14.4,24.55 9.325,-1.8875 18.3125,-5.1 27.575,-7.2125 -20.2875,10.6 -42.025,19.425 -65.0375,21.3625 -16,1.375 -32.675,-1.5375 -46.4,-10.15 -12.2625,-7.725 -21.4875,-19.3 -29.0375,-31.5 7.45,-3.3875 14.825,-6.9375 22.2125,-10.4625 6.0375,10.55 12.575,21.0625 21.65,29.3 7.425,-12.7625 17.1375,-23.9375 26.2375,-35.4875 4.375,-5.7625 6.65,-12.6875 9.75,-19.1375 3.8,-1.3375 7.6125,-2.65 11.425,-3.9625 -3.8625,5.0625 -7.9125,9.975 -12.1375,14.75 -5.475,6.2375 -8.8375,13.8875 -12.25,21.35 -4.675,9.55 -9.9625,18.8625 -16.4375,27.3375 4.0625,2.4875 8.475,4.325 12.95,5.9125 6.4375,-11.65 14.2,-22.5 22.525,-32.875 5.8875,-7.0375 12.35,-13.7125 16.9375,-21.725 3.6125,-6.225 5.45,-13.275 8.875,-19.5875"
 77 |        style="fill:#c44f34;fill-opacity:1;fill-rule:nonzero;stroke:none"
 78 |        id="path3061"
 79 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
 80 |        inkscape:export-xdpi="90"
 81 |        inkscape:export-ydpi="90" />
 82 |     <path
 83 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
 84 |        d="m 43.998415,958.82543 c -3.8125,0.0375 -7.6375,0.0438 -11.4375,0.0937 -7.5125,17.5 -15.143749,34.96248 -22.7187497,52.43752 4.2249997,0.037 8.4312497,0.044 12.6562497,-0.031 1.5875,-3.75 3.225,-7.4937 4.875,-11.2187 7.4125,0.1125 14.850004,-0.24377 22.25,0.2187 1.35,3.675 3.025,7.2125 4.4375,10.875 4.25,0.275 8.55,0.1813 12.8125,0.1563 -7.65,-17.48754 -15.112504,-35.08127 -22.875,-52.53127 z m -5.5,16.15625 c 2.25,4.8625 4.2625,9.85 6.4375,14.75 -4.375,0.0375 -8.775,0.18125 -13.125,-0.15625 2.3625,-4.8 4.1,-9.90625 6.6875,-14.59375 z"
 85 |        id="path3069"
 86 |        inkscape:connector-curvature="0" />
 87 |     <path
 88 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
 89 |        d="m 73.217165,958.48168 c -0.625,17.575 -0.025,35.22498 -0.3125,52.81252 11.4125,-0.375 23.475002,1.8062 34.312505,-2.8438 17.9375,-7.9 18.5625,-38.04372 0.8125,-46.53122 -10.850003,-5.2375 -23.225005,-2.6375 -34.812505,-3.4375 z m 14.40625,10.34375 c 6.672046,-0.10459 14.096875,0.16562 18.406255,5.875 3.6125,5.3875 3.76875,12.74375 1.21875,18.59375 -3.95,8.05002 -14.962505,8.62502 -22.375005,7.06252 -0.1375,-10.48754 -0.025,-20.98127 -0.0625,-31.46877 0.910938,-0.0156 1.859351,-0.0476 2.8125,-0.0625 z"
 90 |        id="path3071"
 91 |        inkscape:connector-curvature="0" />
 92 |     <path
 93 |        inkscape:connector-curvature="0"
 94 |        d="m 130.40858,958.94582 c 12.725,-0.15 25.44999,-0.0625 38.17499,-0.05 -0.025,3.475 -0.025,6.95 -0.0125,10.4125 -8.6625,0.1 -17.32499,0.1125 -25.98749,-0.0125 -0.4625,3.575 -0.2625,7.175 -0.2625,10.775 7.8625,0.125 15.7375,-0.025 23.61249,0.0875 -0.0375,3.3 -0.0375,6.6 0.0125,9.9 -7.87499,0.1375 -15.74999,0.025 -23.62499,0.0625 -0.0125,3.625 -0.1625,7.26252 0.225,10.87498 8.9375,-0.125 17.88749,-0.05 26.82499,-0.05 0.025,3.475 0.025,6.95 0,10.425 -12.98749,-0.038 -25.97499,0.087 -38.94999,-0.062 0.0625,-17.44998 0.075,-34.91248 -0.0125,-52.36248"
 95 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
 96 |        id="path3073"
 97 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
 98 |        inkscape:export-xdpi="90"
 99 |        inkscape:export-ydpi="90" />
100 |     <path
101 |        inkscape:connector-curvature="0"
102 |        d="m 179.12107,958.93332 c 3.75,-0.475 7.55,-0.3 11.3125,-0.1375 8.575,10.925 16.45,22.4625 25.575,32.9125 0.15,-11.0125 0,-22.0125 0.075,-33.0125 3.9375,-0.05 7.8875,-0.05 11.8375,0.0125 0.025,17.55 0,35.1 0,52.64998 -3.925,0.013 -7.85,0.013 -11.775,0.025 -8.4375,-10.7625 -16.4,-21.91248 -25,-32.53748 -0.175,10.825 -0.075,21.66248 -0.0625,32.48748 -3.975,0.062 -7.95,0.062 -11.9125,-0.025 0.0375,-17.46248 0.125,-34.92498 -0.05,-52.37498"
103 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
104 |        id="path3075"
105 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
106 |        inkscape:export-xdpi="90"
107 |        inkscape:export-ydpi="90" />
108 |     <path
109 |        inkscape:connector-curvature="0"
110 |        d="m 240.37107,958.90832 c 3.9625,-0.4375 7.9625,-0.2125 11.9375,-0.2125 0.0125,17.55 -0.0125,35.1 0.0125,52.64998 -3.975,0.05 -7.95,0.075 -11.925,-0.075 0.0875,-17.44998 0.1375,-34.91248 -0.025,-52.36248"
111 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
112 |        id="path3077"
113 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
114 |        inkscape:export-xdpi="90"
115 |        inkscape:export-ydpi="90" />
116 |     <path
117 |        inkscape:connector-curvature="0"
118 |        d="m 265.40857,958.83332 c 3.7625,-0.275 7.525,-0.2375 11.2875,-0.025 8.4875,11.2125 17,22.3875 25.5875,33.525 0.0875,-11.2125 0,-22.425 0.05,-33.6375 3.9375,-0.05 7.8875,-0.05 11.825,0 0.0375,17.5625 0.025,35.1125 0.0125,52.67498 -3.9375,0 -7.875,0 -11.8,0 -8.4125,-10.7625 -16.3375,-21.91248 -24.9875,-32.47498 -0.1625,10.8125 -0.0625,21.62498 -0.05,32.43748 -3.975,0.062 -7.95,0.062 -11.9125,-0.025 0.05,-17.48748 0.075,-34.97498 -0.0125,-52.47498"
119 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
120 |        id="path3079"
121 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
122 |        inkscape:export-xdpi="90"
123 |        inkscape:export-ydpi="90" />
124 |     <path
125 |        inkscape:connector-curvature="0"
126 |        d="m 326.63357,958.99582 c 12.7375,-0.2625 25.4625,-0.0875 38.2,-0.0875 -0.0375,3.45 -0.0375,6.9 0,10.35 -8.775,0.4125 -17.575,-0.2625 -26.35,0.35 0.0875,3.4875 0.0875,6.975 0.075,10.4625 7.8875,0.15 15.7625,-0.0625 23.65,0.125 -0.0625,3.275 -0.0625,6.5625 0,9.85 -7.8875,0.1625 -15.7625,0.025 -23.6375,0.0875 -0.0125,3.6125 -0.175,7.25002 0.2375,10.84998 8.9375,-0.1 17.875,-0.038 26.8125,-0.025 0.0125,3.4625 0.0125,6.9375 0.0125,10.4125 -13,-0.05 -25.9875,0.1125 -38.975,-0.087 0.075,-17.42498 0.1125,-34.86248 -0.025,-52.28748"
127 |        style="fill:#1046c4;fill-opacity:1;fill-rule:nonzero;stroke:none"
128 |        id="path3081"
129 |        inkscape:export-filename="/home/samu/Pictures/path3085.png"
130 |        inkscape:export-xdpi="90"
131 |        inkscape:export-ydpi="90" />
132 |   </g>
133 | </svg>
134 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. adenine documentation master file, created by
 2 |    sphinx-quickstart on Fri May 22 12:31:54 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | =====================================
 7 | ADENINE (A Data ExploratioN pIpeliNE)
 8 | =====================================
 9 | 
10 | **ADENINE** is a machine learning and data mining Python pipeline that helps you to answer this tedious question: are my data relevant with the problem I'm dealing with?
11 | 
12 | The main structure of adenine can be summarized in the following 4 steps.
13 | 
14 | 1. **Imputing:** Does your dataset have missing entries? In the first step you can fill the missing values choosing between different strategies: feature-wise median, mean and most frequent value or a more stable k-NN imputing.
15 | 
16 | 2. **Preprocessing:** Have you ever wondered what would have changed if only  your data have been preprocessed in a different way? Or is it data preprocessing a good idea after all? ADENINE offers several preprocessing procedures, such as: data recentering, Min-Max scaling, standardization or normalization and allows you to compare the results of the analysis made with different preprocessing step as starting point.
17 | 
18 | 3. **Dimensionality Reduction:** In the context of data exploration, this phase becomes particularly helpful for high dimensional data. This step includes some manifold learning (such as isomap, multidimensional scaling, etc) and unsupervised dimensionality reduction (principal component analysis, kernel PCA) techniques.
19 | 
20 | 4. **Clustering:** This step aims at grouping data into clusters in an unsupervised manner. Several techniques such as k-means, spectral or hierarchical clustering are offered.
21 | 
22 | The final output of adenine is a compact and textual representation of the results obtained from the pipelines made with each possible combination of the algorithms implemented at each step.
23 | 
24 | User documentation
25 | ==================
26 | .. toctree::
27 |    :maxdepth: 2
28 | 
29 |  tutorial.rst
30 | 
31 | .. _api:
32 | 
33 | ***********************
34 | API
35 | ***********************
36 | 
37 | .. toctree::
38 |    :maxdepth: 1
39 | 
40 | 
41 | Pipeline utilities
42 | -----------------------------
43 | 
44 | .. automodule:: adenine.core.define_pipeline
45 |    :members:
46 | 
47 | .. automodule:: adenine.core.pipelines
48 |    :members:
49 | 
50 | .. automodule:: adenine.core.analyze_results
51 |    :members:
52 | 
53 | Input Data
54 | -----------------------------
55 | 
56 | .. automodule:: adenine.utils.data_source
57 |    :members:
58 |    
59 |    
60 | Plotting functions
61 | -----------------------------
62 | 
63 | .. automodule:: adenine.core.plotting
64 |    :members:
65 | 
66 | 
67 | Extra tools
68 | -----------------------------
69 | 
70 | .. automodule:: adenine.utils.extra
71 |    :members:
72 | 
73 | 
74 | .. Indices and tables
75 | .. ==================
76 | 
77 | .. * :ref:`genindex`
78 | .. * :ref:`modindex`
79 | .. * :ref:`search`
80 | 
81 | 


--------------------------------------------------------------------------------
/doc/source/modules.rst:
--------------------------------------------------------------------------------
1 | .
2 | =
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    adenine
8 |    setup
9 | 


--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "basic/layout.html" %}
 2 | 
 3 | {% block sidebarsearch %}
 4 |     {{ super() }}
 5 |     <p class="logo">
 6 |        <img class="logo" src="{{ pathto('_static/logos.png', 1) }}" alt="Logos" usemap="#logosmap" />
 7 |     </p>
 8 | {% endblock %}
 9 | 
10 | {% block extrahead %}
11 | <meta name="keywords"
12 |       content="SlipGuru, 'University of Genoa', statistical learning,
13 |       computational biology, Python, distributed, computing, parallel" />
14 | <map id="logosmap" name="logosmap">
15 |     <area shape="rect" alt="SlipGURU" title="SlipGURU" coords="0,0,89,112"
16 |           href="http://slipguru.disi.unige.it/" />
17 |     <area shape="rect" alt="Dipartimento di Informatica e Scienze dell'Informazione"
18 |           title="Dipartimento di Informatica e Scienze dell'Informazione"
19 |           coords="95,4,200,34" href="http://www.disi.unige.it/" />
20 |     <area shape="rect" alt="Università Degli Studi di Genova"
21 |           title="Università Degli Studi di Genova" coords="124,48,171,107"
22 |           href="http://www.unige.it/" />
23 | </map>
24 | {% endblock %}
25 | 
26 | {% block sidebarrel %}
27 |    {% if prev %}
28 |       {{ super() }}
29 |    {% else %}
30 |    {% endif %}
31 | {% endblock %}
32 | 
33 | {% block sidebartoc %}
34 |    {% if prev %}
35 |       {{ super() }}
36 |    {% else %}
37 |       <h3>Download</h3>
38 |       <p>Current version: <b>{{ release }}</b></p>
39 |       <p>Get {{ project }} from the
40 |          <a href="http://pypi.python.org/pypi/{{ project }}">Python Package Index</a>,
41 |          or install it with:
42 |       </p>
43 |       <pre>pip install --upgrade {{ project }}</pre>
44 |       <p> or clone it from our <a href="https://github.com/slipguru/{{ project }}">GitHub</a> repository: </p>
45 |       <pre>git clone https://github.com/slipguru/{{ project }}</pre>
46 |       <!--or:-->
47 |       <!--<pre>easy_install -U {{ project }}</pre>-->
48 | <!--      <p>Latest
49 |          <a href="https://bitbucket.org/slipguru/{{ project|lower }}/downloads/{{ project }}.pdf">
50 |             documentation in pdf
51 |          </a>
52 |          is also available. -->
53 |       </p>
54 |    {% endif %}
55 | {% endblock %}
56 | 


--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/static/logos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/slipGURUTheme/static/logos.png


--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/static/slipGuru.css:
--------------------------------------------------------------------------------
  1 | @import "default.css";
  2 | 
  3 | /**
  4 |  * Spacing fixes
  5 |  */
  6 | 
  7 | div.body p, div.body dd, div.body li {
  8 |   line-height: 125%;
  9 | }
 10 | 
 11 | ul.simple {
 12 |     margin-top: 0;
 13 |     margin-bottom: 0;
 14 |     padding-top: 0;
 15 |     padding-bottom: 0;
 16 | }
 17 | 
 18 | /* spacing around blockquoted fields in parameters/attributes/returns */
 19 | td.field-body > blockquote {
 20 |     margin-top: 0.1em;
 21 |     margin-bottom: 0.5em;
 22 | }
 23 | 
 24 | /* spacing around example code */
 25 | div.highlight > pre {
 26 |     padding: 2px 5px 2px 5px;
 27 | }
 28 | 
 29 | /* spacing in see also definition lists */
 30 | dl.last > dd {
 31 |     margin-top: 1px;
 32 |     margin-bottom: 5px;
 33 |     margin-left: 30px;
 34 | }
 35 | 
 36 | /* hide overflowing content in the sidebar */
 37 | div.sphinxsidebarwrapper p.topless {
 38 |     overflow: hidden;
 39 | }
 40 | 
 41 | /**
 42 |  * Hide dummy toctrees
 43 |  */
 44 | 
 45 | ul {
 46 |   padding-top: 0;
 47 |   padding-bottom: 0;
 48 |   margin-top: 0;
 49 |   margin-bottom: 0;
 50 | }
 51 | ul li {
 52 |   padding-top: 0;
 53 |   padding-bottom: 0;
 54 |   margin-top: 0;
 55 |   margin-bottom: 0;
 56 | }
 57 | ul li a.reference {
 58 |   padding-top: 0;
 59 |   padding-bottom: 0;
 60 |   margin-top: 0;
 61 |   margin-bottom: 0;
 62 | }
 63 | 
 64 | /**
 65 |  * Make high-level subsections easier to distinguish from top-level ones
 66 |  */
 67 | div.body h3 {
 68 |   background-color: transparent;
 69 | }
 70 | 
 71 | div.body h4 {
 72 |   border: none;
 73 |   background-color: transparent;
 74 | }
 75 | 
 76 | /**
 77 |  * Scipy colors
 78 |  */
 79 | 
 80 | body {
 81 |   background-color: rgb(100,135,220);
 82 | }
 83 | 
 84 | div.document {
 85 |   background-color: rgb(230,230,230);
 86 | }
 87 | 
 88 | div.sphinxsidebar {
 89 |   background-color: rgb(230,230,230);
 90 | }
 91 | 
 92 | div.related {
 93 |   background-color: rgb(100,135,220);
 94 | }
 95 | 
 96 | div.sphinxsidebar h3 {
 97 |   color: rgb(0,102,204);
 98 | }
 99 | 
100 | div.sphinxsidebar h3 a {
101 |   color: rgb(0,102,204);
102 | }
103 | 
104 | div.sphinxsidebar h4 {
105 |   color: rgb(0,82,194);
106 | }
107 | 
108 | div.sphinxsidebar p {
109 |   color: black;
110 | }
111 | 
112 | div.sphinxsidebar a {
113 |   color: #355f7c;
114 | }
115 | 
116 | div.sphinxsidebar ul.want-points {
117 |   list-style: disc;
118 | }
119 | 
120 | .field-list th {
121 |   color: rgb(0,102,204);
122 |   white-space: nowrap;
123 | }
124 | 
125 | /**
126 |  * Extra admonitions
127 |  */
128 | 
129 | div.tip {
130 |   background-color: #ffffe4;
131 |   border: 1px solid #ee6;
132 | }
133 | 
134 | div.plot-output {
135 |   clear-after: both;
136 | }
137 | 
138 | div.plot-output .figure {
139 |   float: left;
140 |   text-align: center;
141 |   margin-bottom: 0;
142 |   padding-bottom: 0;
143 | }
144 | 
145 | div.plot-output .caption {
146 |   margin-top: 2;
147 |   padding-top: 0;
148 | }
149 | 
150 | div.plot-output p.admonition-title {
151 |   display: none;
152 | }
153 | 
154 | div.plot-output:after {
155 |   content: "";
156 |   display: block;
157 |   height: 0;
158 |   clear: both;
159 | }
160 | 
161 | 
162 | /*
163 | div.admonition-example {
164 |     background-color: #e4ffe4;
165 |     border: 1px solid #ccc;
166 | }*/
167 | 
168 | 
169 | /**
170 |  * Styling for field lists
171 |  */
172 | 
173 | table.field-list th {
174 |   border-left: 1px solid #aaa !important;
175 |   padding-left: 5px;
176 | }
177 | 
178 | table.field-list {
179 |   border-collapse: separate;
180 |   border-spacing: 10px;
181 | }
182 | 
183 | /**
184 |  * Styling for footnotes
185 |  */
186 | 
187 | table.footnote td, table.footnote th {
188 |   border: none;
189 | }
190 | 


--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = default
3 | stylesheet = slipGuru.css
4 | pygments_style = sphinx
5 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | -------------------------------------------------------------------------------
 2 |     The files
 3 |     - numpydoc.py
 4 |     - autosummary.py
 5 |     - autosummary_generate.py
 6 |     - docscrape.py
 7 |     - docscrape_sphinx.py
 8 |     - phantom_import.py
 9 |     have the following license:
10 | 
11 | Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
12 | 
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are
15 | met:
16 | 
17 |  1. Redistributions of source code must retain the above copyright
18 |     notice, this list of conditions and the following disclaimer.
19 |  2. Redistributions in binary form must reproduce the above copyright
20 |     notice, this list of conditions and the following disclaimer in
21 |     the documentation and/or other materials provided with the
22 |     distribution.
23 | 
24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | -------------------------------------------------------------------------------
37 |     The files
38 |     - compiler_unparse.py
39 |     - comment_eater.py
40 |     - traitsdoc.py
41 |     have the following license:
42 | 
43 | This software is OSI Certified Open Source Software.
44 | OSI Certified is a certification mark of the Open Source Initiative.
45 | 
46 | Copyright (c) 2006, Enthought, Inc.
47 | All rights reserved.
48 | 
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 | 
52 |  * Redistributions of source code must retain the above copyright notice, this
53 |    list of conditions and the following disclaimer.
54 |  * Redistributions in binary form must reproduce the above copyright notice,
55 |    this list of conditions and the following disclaimer in the documentation
56 |    and/or other materials provided with the distribution.
57 |  * Neither the name of Enthought, Inc. nor the names of its contributors may
58 |    be used to endorse or promote products derived from this software without
59 |    specific prior written permission.
60 | 
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 | 
72 | 
73 | -------------------------------------------------------------------------------
74 |     The files
75 |     - only_directives.py
76 |     - plot_directive.py
77 |     originate from Matplotlib (http://matplotlib.sf.net/) which has
78 |     the following license:
79 | 
80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
81 | 
82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
83 | 
84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
85 | 
86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
87 | 
88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
89 | 
90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
91 | 
92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
93 | 
94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
95 | 
96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
97 | 
98 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: numpydoc
 3 | Version: 0.4
 4 | Summary: Sphinx extension to support docstrings in Numpy format
 5 | Home-page: http://github.com/numpy/numpy/tree/master/doc/sphinxext
 6 | Author: Pauli Virtanen and others
 7 | Author-email: pav@iki.fi
 8 | License: BSD
 9 | Description: UNKNOWN
10 | Keywords: sphinx numpy
11 | Platform: UNKNOWN
12 | Classifier: Development Status :: 3 - Alpha
13 | Classifier: Environment :: Plugins
14 | Classifier: License :: OSI Approved :: BSD License
15 | Classifier: Topic :: Documentation
16 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/README.txt:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | numpydoc -- Numpy's Sphinx extensions
 3 | =====================================
 4 | 
 5 | Numpy's documentation uses several custom extensions to Sphinx.  These
 6 | are shipped in this ``numpydoc`` package, in case you want to make use
 7 | of them in third-party projects.
 8 | 
 9 | The following extensions are available:
10 | 
11 |   - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 |     the code description directives ``np:function``, ``np-c:function``, etc.
13 |     that support the Numpy docstring syntax.
14 | 
15 |   - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 | 
17 |   - ``numpydoc.plot_directive``: Adaptation of Matplotlib's ``plot::``
18 |     directive. Note that this implementation may still undergo severe
19 |     changes or eventually be deprecated.
20 | 
21 | 
22 | numpydoc
23 | ========
24 | 
25 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
26 | following the Numpy/Scipy format to a form palatable to Sphinx.
27 | 
28 | Options
29 | -------
30 | 
31 | The following options can be set in conf.py:
32 | 
33 | - numpydoc_use_plots: bool
34 | 
35 |   Whether to produce ``plot::`` directives for Examples sections that
36 |   contain ``import matplotlib``.
37 | 
38 | - numpydoc_show_class_members: bool
39 | 
40 |   Whether to show all members of a class in the Methods and Attributes
41 |   sections automatically.
42 | 
43 | - numpydoc_edit_link: bool  (DEPRECATED -- edit your HTML template instead)
44 | 
45 |   Whether to insert an edit link after docstrings.
46 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/__init__.py:
--------------------------------------------------------------------------------
1 | from numpydoc import setup
2 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/comment_eater.py:
--------------------------------------------------------------------------------
  1 | from cStringIO import StringIO
  2 | import compiler
  3 | import inspect
  4 | import textwrap
  5 | import tokenize
  6 | 
  7 | from compiler_unparse import unparse
  8 | 
  9 | 
 10 | class Comment(object):
 11 |     """ A comment block.
 12 |     """
 13 |     is_comment = True
 14 |     def __init__(self, start_lineno, end_lineno, text):
 15 |         # int : The first line number in the block. 1-indexed.
 16 |         self.start_lineno = start_lineno
 17 |         # int : The last line number. Inclusive!
 18 |         self.end_lineno = end_lineno
 19 |         # str : The text block including '#' character but not any leading spaces.
 20 |         self.text = text
 21 | 
 22 |     def add(self, string, start, end, line):
 23 |         """ Add a new comment line.
 24 |         """
 25 |         self.start_lineno = min(self.start_lineno, start[0])
 26 |         self.end_lineno = max(self.end_lineno, end[0])
 27 |         self.text += string
 28 | 
 29 |     def __repr__(self):
 30 |         return '%s(%r, %r, %r)' % (self.__class__.__name__, self.start_lineno,
 31 |             self.end_lineno, self.text)
 32 | 
 33 | 
 34 | class NonComment(object):
 35 |     """ A non-comment block of code.
 36 |     """
 37 |     is_comment = False
 38 |     def __init__(self, start_lineno, end_lineno):
 39 |         self.start_lineno = start_lineno
 40 |         self.end_lineno = end_lineno
 41 | 
 42 |     def add(self, string, start, end, line):
 43 |         """ Add lines to the block.
 44 |         """
 45 |         if string.strip():
 46 |             # Only add if not entirely whitespace.
 47 |             self.start_lineno = min(self.start_lineno, start[0])
 48 |             self.end_lineno = max(self.end_lineno, end[0])
 49 | 
 50 |     def __repr__(self):
 51 |         return '%s(%r, %r)' % (self.__class__.__name__, self.start_lineno,
 52 |             self.end_lineno)
 53 | 
 54 | 
 55 | class CommentBlocker(object):
 56 |     """ Pull out contiguous comment blocks.
 57 |     """
 58 |     def __init__(self):
 59 |         # Start with a dummy.
 60 |         self.current_block = NonComment(0, 0)
 61 | 
 62 |         # All of the blocks seen so far.
 63 |         self.blocks = []
 64 | 
 65 |         # The index mapping lines of code to their associated comment blocks.
 66 |         self.index = {}
 67 | 
 68 |     def process_file(self, file):
 69 |         """ Process a file object.
 70 |         """
 71 |         for token in tokenize.generate_tokens(file.next):
 72 |             self.process_token(*token)
 73 |         self.make_index()
 74 | 
 75 |     def process_token(self, kind, string, start, end, line):
 76 |         """ Process a single token.
 77 |         """
 78 |         if self.current_block.is_comment:
 79 |             if kind == tokenize.COMMENT:
 80 |                 self.current_block.add(string, start, end, line)
 81 |             else:
 82 |                 self.new_noncomment(start[0], end[0])
 83 |         else:
 84 |             if kind == tokenize.COMMENT:
 85 |                 self.new_comment(string, start, end, line)
 86 |             else:
 87 |                 self.current_block.add(string, start, end, line)
 88 | 
 89 |     def new_noncomment(self, start_lineno, end_lineno):
 90 |         """ We are transitioning from a noncomment to a comment.
 91 |         """
 92 |         block = NonComment(start_lineno, end_lineno)
 93 |         self.blocks.append(block)
 94 |         self.current_block = block
 95 | 
 96 |     def new_comment(self, string, start, end, line):
 97 |         """ Possibly add a new comment.
 98 |         
 99 |         Only adds a new comment if this comment is the only thing on the line.
100 |         Otherwise, it extends the noncomment block.
101 |         """
102 |         prefix = line[:start[1]]
103 |         if prefix.strip():
104 |             # Oops! Trailing comment, not a comment block.
105 |             self.current_block.add(string, start, end, line)
106 |         else:
107 |             # A comment block.
108 |             block = Comment(start[0], end[0], string)
109 |             self.blocks.append(block)
110 |             self.current_block = block
111 | 
112 |     def make_index(self):
113 |         """ Make the index mapping lines of actual code to their associated
114 |         prefix comments.
115 |         """
116 |         for prev, block in zip(self.blocks[:-1], self.blocks[1:]):
117 |             if not block.is_comment:
118 |                 self.index[block.start_lineno] = prev
119 | 
120 |     def search_for_comment(self, lineno, default=None):
121 |         """ Find the comment block just before the given line number.
122 | 
123 |         Returns None (or the specified default) if there is no such block.
124 |         """
125 |         if not self.index:
126 |             self.make_index()
127 |         block = self.index.get(lineno, None)
128 |         text = getattr(block, 'text', default)
129 |         return text
130 | 
131 | 
132 | def strip_comment_marker(text):
133 |     """ Strip # markers at the front of a block of comment text.
134 |     """
135 |     lines = []
136 |     for line in text.splitlines():
137 |         lines.append(line.lstrip('#'))
138 |     text = textwrap.dedent('\n'.join(lines))
139 |     return text
140 | 
141 | 
142 | def get_class_traits(klass):
143 |     """ Yield all of the documentation for trait definitions on a class object.
144 |     """
145 |     # FIXME: gracefully handle errors here or in the caller?
146 |     source = inspect.getsource(klass)
147 |     cb = CommentBlocker()
148 |     cb.process_file(StringIO(source))
149 |     mod_ast = compiler.parse(source)
150 |     class_ast = mod_ast.node.nodes[0]
151 |     for node in class_ast.code.nodes:
152 |         # FIXME: handle other kinds of assignments?
153 |         if isinstance(node, compiler.ast.Assign):
154 |             name = node.nodes[0].name
155 |             rhs = unparse(node.expr).strip()
156 |             doc = strip_comment_marker(cb.search_for_comment(node.lineno, default=''))
157 |             yield name, rhs, doc
158 | 
159 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/docscrape_sphinx.py:
--------------------------------------------------------------------------------
  1 | import re, inspect, textwrap, pydoc
  2 | import sphinx
  3 | from docscrape import NumpyDocString, FunctionDoc, ClassDoc
  4 | 
  5 | class SphinxDocString(NumpyDocString):
  6 |     def __init__(self, docstring, config={}):
  7 |         self.use_plots = config.get('use_plots', False)
  8 |         NumpyDocString.__init__(self, docstring, config=config)
  9 | 
 10 |     # string conversion routines
 11 |     def _str_header(self, name, symbol='`'):
 12 |         return ['.. rubric:: ' + name, '']
 13 | 
 14 |     def _str_field_list(self, name):
 15 |         return [':' + name + ':']
 16 | 
 17 |     def _str_indent(self, doc, indent=4):
 18 |         out = []
 19 |         for line in doc:
 20 |             out += [' '*indent + line]
 21 |         return out
 22 | 
 23 |     def _str_signature(self):
 24 |         return ['']
 25 |         if self['Signature']:
 26 |             return ['``%s``' % self['Signature']] + ['']
 27 |         else:
 28 |             return ['']
 29 | 
 30 |     def _str_summary(self):
 31 |         return self['Summary'] + ['']
 32 | 
 33 |     def _str_extended_summary(self):
 34 |         return self['Extended Summary'] + ['']
 35 | 
 36 |     def _str_param_list(self, name):
 37 |         out = []
 38 |         if self[name]:
 39 |             out += self._str_field_list(name)
 40 |             out += ['']
 41 |             for param,param_type,desc in self[name]:
 42 |                 out += self._str_indent(['**%s** : %s' % (param.strip(),
 43 |                                                           param_type)])
 44 |                 out += ['']
 45 |                 out += self._str_indent(desc,8)
 46 |                 out += ['']
 47 |         return out
 48 | 
 49 |     @property
 50 |     def _obj(self):
 51 |         if hasattr(self, '_cls'):
 52 |             return self._cls
 53 |         elif hasattr(self, '_f'):
 54 |             return self._f
 55 |         return None
 56 | 
 57 |     def _str_member_list(self, name):
 58 |         """
 59 |         Generate a member listing, autosummary:: table where possible,
 60 |         and a table where not.
 61 | 
 62 |         """
 63 |         out = []
 64 |         if self[name]:
 65 |             out += ['.. rubric:: %s' % name, '']
 66 |             prefix = getattr(self, '_name', '')
 67 | 
 68 |             if prefix:
 69 |                 prefix = '~%s.' % prefix
 70 | 
 71 |             autosum = []
 72 |             others = []
 73 |             for param, param_type, desc in self[name]:
 74 |                 param = param.strip()
 75 |                 if not self._obj or hasattr(self._obj, param):
 76 |                     autosum += ["   %s%s" % (prefix, param)]
 77 |                 else:
 78 |                     others.append((param, param_type, desc))
 79 | 
 80 |             if autosum:
 81 |                 out += ['.. autosummary::', '   :toctree:', '']
 82 |                 out += autosum
 83 | 
 84 |             if others:
 85 |                 maxlen_0 = max([len(x[0]) for x in others])
 86 |                 maxlen_1 = max([len(x[1]) for x in others])
 87 |                 hdr = "="*maxlen_0 + "  " + "="*maxlen_1 + "  " + "="*10
 88 |                 fmt = '%%%ds  %%%ds  ' % (maxlen_0, maxlen_1)
 89 |                 n_indent = maxlen_0 + maxlen_1 + 4
 90 |                 out += [hdr]
 91 |                 for param, param_type, desc in others:
 92 |                     out += [fmt % (param.strip(), param_type)]
 93 |                     out += self._str_indent(desc, n_indent)
 94 |                 out += [hdr]
 95 |             out += ['']
 96 |         return out
 97 | 
 98 |     def _str_section(self, name):
 99 |         out = []
100 |         if self[name]:
101 |             out += self._str_header(name)
102 |             out += ['']
103 |             content = textwrap.dedent("\n".join(self[name])).split("\n")
104 |             out += content
105 |             out += ['']
106 |         return out
107 | 
108 |     def _str_see_also(self, func_role):
109 |         out = []
110 |         if self['See Also']:
111 |             see_also = super(SphinxDocString, self)._str_see_also(func_role)
112 |             out = ['.. seealso::', '']
113 |             out += self._str_indent(see_also[2:])
114 |         return out
115 | 
116 |     def _str_warnings(self):
117 |         out = []
118 |         if self['Warnings']:
119 |             out = ['.. warning::', '']
120 |             out += self._str_indent(self['Warnings'])
121 |         return out
122 | 
123 |     def _str_index(self):
124 |         idx = self['index']
125 |         out = []
126 |         if len(idx) == 0:
127 |             return out
128 | 
129 |         out += ['.. index:: %s' % idx.get('default','')]
130 |         for section, references in idx.iteritems():
131 |             if section == 'default':
132 |                 continue
133 |             elif section == 'refguide':
134 |                 out += ['   single: %s' % (', '.join(references))]
135 |             else:
136 |                 out += ['   %s: %s' % (section, ','.join(references))]
137 |         return out
138 | 
139 |     def _str_references(self):
140 |         out = []
141 |         if self['References']:
142 |             out += self._str_header('References')
143 |             if isinstance(self['References'], str):
144 |                 self['References'] = [self['References']]
145 |             out.extend(self['References'])
146 |             out += ['']
147 |             # Latex collects all references to a separate bibliography,
148 |             # so we need to insert links to it
149 |             if sphinx.__version__ >= "0.6":
150 |                 out += ['.. only:: latex','']
151 |             else:
152 |                 out += ['.. latexonly::','']
153 |             items = []
154 |             for line in self['References']:
155 |                 m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
156 |                 if m:
157 |                     items.append(m.group(1))
158 |             out += ['   ' + ", ".join(["[%s]_" % item for item in items]), '']
159 |         return out
160 | 
161 |     def _str_examples(self):
162 |         examples_str = "\n".join(self['Examples'])
163 | 
164 |         if (self.use_plots and 'import matplotlib' in examples_str
165 |                 and 'plot::' not in examples_str):
166 |             out = []
167 |             out += self._str_header('Examples')
168 |             out += ['.. plot::', '']
169 |             out += self._str_indent(self['Examples'])
170 |             out += ['']
171 |             return out
172 |         else:
173 |             return self._str_section('Examples')
174 | 
175 |     def __str__(self, indent=0, func_role="obj"):
176 |         out = []
177 |         out += self._str_signature()
178 |         out += self._str_index() + ['']
179 |         out += self._str_summary()
180 |         out += self._str_extended_summary()
181 |         for param_list in ('Parameters', 'Returns', 'Other Parameters',
182 |                            'Raises', 'Warns'):
183 |             out += self._str_param_list(param_list)
184 |         out += self._str_warnings()
185 |         out += self._str_see_also(func_role)
186 |         out += self._str_section('Notes')
187 |         out += self._str_references()
188 |         out += self._str_examples()
189 |         for param_list in ('Attributes', 'Methods'):
190 |             out += self._str_member_list(param_list)
191 |         out = self._str_indent(out,indent)
192 |         return '\n'.join(out)
193 | 
194 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
195 |     def __init__(self, obj, doc=None, config={}):
196 |         self.use_plots = config.get('use_plots', False)
197 |         FunctionDoc.__init__(self, obj, doc=doc, config=config)
198 | 
199 | class SphinxClassDoc(SphinxDocString, ClassDoc):
200 |     def __init__(self, obj, doc=None, func_doc=None, config={}):
201 |         self.use_plots = config.get('use_plots', False)
202 |         ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config)
203 | 
204 | class SphinxObjDoc(SphinxDocString):
205 |     def __init__(self, obj, doc=None, config={}):
206 |         self._f = obj
207 |         SphinxDocString.__init__(self, doc, config=config)
208 | 
209 | def get_doc_object(obj, what=None, doc=None, config={}):
210 |     if what is None:
211 |         if inspect.isclass(obj):
212 |             what = 'class'
213 |         elif inspect.ismodule(obj):
214 |             what = 'module'
215 |         elif callable(obj):
216 |             what = 'function'
217 |         else:
218 |             what = 'object'
219 |     if what == 'class':
220 |         return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc,
221 |                               config=config)
222 |     elif what in ('function', 'method'):
223 |         return SphinxFunctionDoc(obj, doc=doc, config=config)
224 |     else:
225 |         if doc is None:
226 |             doc = pydoc.getdoc(obj)
227 |         return SphinxObjDoc(obj, doc, config=config)
228 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/numpydoc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ========
  3 | numpydoc
  4 | ========
  5 | 
  6 | Sphinx extension that handles docstrings in the Numpy standard format. [1]
  7 | 
  8 | It will:
  9 | 
 10 | - Convert Parameters etc. sections to field lists.
 11 | - Convert See Also section to a See also entry.
 12 | - Renumber references.
 13 | - Extract the signature from the docstring, if it can't be determined otherwise.
 14 | 
 15 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
 16 | 
 17 | """
 18 | 
 19 | import os, re, pydoc
 20 | from docscrape_sphinx import get_doc_object, SphinxDocString
 21 | from sphinx.util.compat import Directive
 22 | import inspect
 23 | 
 24 | def mangle_docstrings(app, what, name, obj, options, lines,
 25 |                       reference_offset=[0]):
 26 | 
 27 |     cfg = dict(use_plots=app.config.numpydoc_use_plots,
 28 |                show_class_members=app.config.numpydoc_show_class_members)
 29 | 
 30 |     if what == 'module':
 31 |         # Strip top title
 32 |         title_re = re.compile(ur'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
 33 |                               re.I|re.S)
 34 |         lines[:] = title_re.sub(u'', u"\n".join(lines)).split(u"\n")
 35 |     else:
 36 |         doc = get_doc_object(obj, what, u"\n".join(lines), config=cfg)
 37 |         lines[:] = unicode(doc).split(u"\n")
 38 | 
 39 |     if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
 40 |            obj.__name__:
 41 |         if hasattr(obj, '__module__'):
 42 |             v = dict(full_name=u"%s.%s" % (obj.__module__, obj.__name__))
 43 |         else:
 44 |             v = dict(full_name=obj.__name__)
 45 |         lines += [u'', u'.. htmlonly::', '']
 46 |         lines += [u'    %s' % x for x in
 47 |                   (app.config.numpydoc_edit_link % v).split("\n")]
 48 | 
 49 |     # replace reference numbers so that there are no duplicates
 50 |     references = []
 51 |     for line in lines:
 52 |         line = line.strip()
 53 |         m = re.match(ur'^.. \[([a-z0-9_.-])\]', line, re.I)
 54 |         if m:
 55 |             references.append(m.group(1))
 56 | 
 57 |     # start renaming from the longest string, to avoid overwriting parts
 58 |     references.sort(key=lambda x: -len(x))
 59 |     if references:
 60 |         for i, line in enumerate(lines):
 61 |             for r in references:
 62 |                 if re.match(ur'^\d+$', r):
 63 |                     new_r = u"R%d" % (reference_offset[0] + int(r))
 64 |                 else:
 65 |                     new_r = u"%s%d" % (r, reference_offset[0])
 66 |                 lines[i] = lines[i].replace(u'[%s]_' % r,
 67 |                                             u'[%s]_' % new_r)
 68 |                 lines[i] = lines[i].replace(u'.. [%s]' % r,
 69 |                                             u'.. [%s]' % new_r)
 70 | 
 71 |     reference_offset[0] += len(references)
 72 | 
 73 | def mangle_signature(app, what, name, obj, options, sig, retann):
 74 |     # Do not try to inspect classes that don't define `__init__`
 75 |     if (inspect.isclass(obj) and
 76 |         (not hasattr(obj, '__init__') or
 77 |         'initializes x; see ' in pydoc.getdoc(obj.__init__))):
 78 |         return '', ''
 79 | 
 80 |     if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return
 81 |     if not hasattr(obj, '__doc__'): return
 82 | 
 83 |     doc = SphinxDocString(pydoc.getdoc(obj))
 84 |     if doc['Signature']:
 85 |         sig = re.sub(u"^[^(]*", u"", doc['Signature'])
 86 |         return sig, u''
 87 | 
 88 | def setup(app, get_doc_object_=get_doc_object):
 89 |     global get_doc_object
 90 |     get_doc_object = get_doc_object_
 91 | 
 92 |     app.connect('autodoc-process-docstring', mangle_docstrings)
 93 |     app.connect('autodoc-process-signature', mangle_signature)
 94 |     app.add_config_value('numpydoc_edit_link', None, False)
 95 |     app.add_config_value('numpydoc_use_plots', None, False)
 96 |     app.add_config_value('numpydoc_show_class_members', True, True)
 97 | 
 98 |     # Extra mangling domains
 99 |     app.add_domain(NumpyPythonDomain)
100 |     app.add_domain(NumpyCDomain)
101 | 
102 | #------------------------------------------------------------------------------
103 | # Docstring-mangling domains
104 | #------------------------------------------------------------------------------
105 | 
106 | from docutils.statemachine import ViewList
107 | from sphinx.domains.c import CDomain
108 | from sphinx.domains.python import PythonDomain
109 | 
110 | class ManglingDomainBase(object):
111 |     directive_mangling_map = {}
112 | 
113 |     def __init__(self, *a, **kw):
114 |         super(ManglingDomainBase, self).__init__(*a, **kw)
115 |         self.wrap_mangling_directives()
116 | 
117 |     def wrap_mangling_directives(self):
118 |         for name, objtype in self.directive_mangling_map.items():
119 |             self.directives[name] = wrap_mangling_directive(
120 |                 self.directives[name], objtype)
121 | 
122 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
123 |     name = 'np'
124 |     directive_mangling_map = {
125 |         'function': 'function',
126 |         'class': 'class',
127 |         'exception': 'class',
128 |         'method': 'function',
129 |         'classmethod': 'function',
130 |         'staticmethod': 'function',
131 |         'attribute': 'attribute',
132 |     }
133 | 
134 | class NumpyCDomain(ManglingDomainBase, CDomain):
135 |     name = 'np-c'
136 |     directive_mangling_map = {
137 |         'function': 'function',
138 |         'member': 'attribute',
139 |         'macro': 'function',
140 |         'type': 'class',
141 |         'var': 'object',
142 |     }
143 | 
144 | def wrap_mangling_directive(base_directive, objtype):
145 |     class directive(base_directive):
146 |         def run(self):
147 |             env = self.state.document.settings.env
148 | 
149 |             name = None
150 |             if self.arguments:
151 |                 m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0])
152 |                 name = m.group(2).strip()
153 | 
154 |             if not name:
155 |                 name = self.arguments[0]
156 | 
157 |             lines = list(self.content)
158 |             mangle_docstrings(env.app, objtype, name, None, None, lines)
159 |             self.content = ViewList(lines, self.content.parent)
160 | 
161 |             return base_directive.run(self)
162 | 
163 |     return directive
164 | 
165 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/phantom_import.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ==============
  3 | phantom_import
  4 | ==============
  5 | 
  6 | Sphinx extension to make directives from ``sphinx.ext.autodoc`` and similar
  7 | extensions to use docstrings loaded from an XML file.
  8 | 
  9 | This extension loads an XML file in the Pydocweb format [1] and
 10 | creates a dummy module that contains the specified docstrings. This
 11 | can be used to get the current docstrings from a Pydocweb instance
 12 | without needing to rebuild the documented module.
 13 | 
 14 | .. [1] http://code.google.com/p/pydocweb
 15 | 
 16 | """
 17 | import imp, sys, compiler, types, os, inspect, re
 18 | 
 19 | def setup(app):
 20 |     app.connect('builder-inited', initialize)
 21 |     app.add_config_value('phantom_import_file', None, True)
 22 | 
 23 | def initialize(app):
 24 |     fn = app.config.phantom_import_file
 25 |     if (fn and os.path.isfile(fn)):
 26 |         print "[numpydoc] Phantom importing modules from", fn, "..."
 27 |         import_phantom_module(fn)
 28 | 
 29 | #------------------------------------------------------------------------------
 30 | # Creating 'phantom' modules from an XML description
 31 | #------------------------------------------------------------------------------
 32 | def import_phantom_module(xml_file):
 33 |     """
 34 |     Insert a fake Python module to sys.modules, based on a XML file.
 35 | 
 36 |     The XML file is expected to conform to Pydocweb DTD. The fake
 37 |     module will contain dummy objects, which guarantee the following:
 38 | 
 39 |     - Docstrings are correct.
 40 |     - Class inheritance relationships are correct (if present in XML).
 41 |     - Function argspec is *NOT* correct (even if present in XML).
 42 |       Instead, the function signature is prepended to the function docstring.
 43 |     - Class attributes are *NOT* correct; instead, they are dummy objects.
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     xml_file : str
 48 |         Name of an XML file to read
 49 |     
 50 |     """
 51 |     import lxml.etree as etree
 52 | 
 53 |     object_cache = {}
 54 | 
 55 |     tree = etree.parse(xml_file)
 56 |     root = tree.getroot()
 57 | 
 58 |     # Sort items so that
 59 |     # - Base classes come before classes inherited from them
 60 |     # - Modules come before their contents
 61 |     all_nodes = dict([(n.attrib['id'], n) for n in root])
 62 |     
 63 |     def _get_bases(node, recurse=False):
 64 |         bases = [x.attrib['ref'] for x in node.findall('base')]
 65 |         if recurse:
 66 |             j = 0
 67 |             while True:
 68 |                 try:
 69 |                     b = bases[j]
 70 |                 except IndexError: break
 71 |                 if b in all_nodes:
 72 |                     bases.extend(_get_bases(all_nodes[b]))
 73 |                 j += 1
 74 |         return bases
 75 | 
 76 |     type_index = ['module', 'class', 'callable', 'object']
 77 |     
 78 |     def base_cmp(a, b):
 79 |         x = cmp(type_index.index(a.tag), type_index.index(b.tag))
 80 |         if x != 0: return x
 81 | 
 82 |         if a.tag == 'class' and b.tag == 'class':
 83 |             a_bases = _get_bases(a, recurse=True)
 84 |             b_bases = _get_bases(b, recurse=True)
 85 |             x = cmp(len(a_bases), len(b_bases))
 86 |             if x != 0: return x
 87 |             if a.attrib['id'] in b_bases: return -1
 88 |             if b.attrib['id'] in a_bases: return 1
 89 |         
 90 |         return cmp(a.attrib['id'].count('.'), b.attrib['id'].count('.'))
 91 | 
 92 |     nodes = root.getchildren()
 93 |     nodes.sort(base_cmp)
 94 | 
 95 |     # Create phantom items
 96 |     for node in nodes:
 97 |         name = node.attrib['id']
 98 |         doc = (node.text or '').decode('string-escape') + "\n"
 99 |         if doc == "\n": doc = ""
100 | 
101 |         # create parent, if missing
102 |         parent = name
103 |         while True:
104 |             parent = '.'.join(parent.split('.')[:-1])
105 |             if not parent: break
106 |             if parent in object_cache: break
107 |             obj = imp.new_module(parent)
108 |             object_cache[parent] = obj
109 |             sys.modules[parent] = obj
110 | 
111 |         # create object
112 |         if node.tag == 'module':
113 |             obj = imp.new_module(name)
114 |             obj.__doc__ = doc
115 |             sys.modules[name] = obj
116 |         elif node.tag == 'class':
117 |             bases = [object_cache[b] for b in _get_bases(node)
118 |                      if b in object_cache]
119 |             bases.append(object)
120 |             init = lambda self: None
121 |             init.__doc__ = doc
122 |             obj = type(name, tuple(bases), {'__doc__': doc, '__init__': init})
123 |             obj.__name__ = name.split('.')[-1]
124 |         elif node.tag == 'callable':
125 |             funcname = node.attrib['id'].split('.')[-1]
126 |             argspec = node.attrib.get('argspec')
127 |             if argspec:
128 |                 argspec = re.sub('^[^(]*', '', argspec)
129 |                 doc = "%s%s\n\n%s" % (funcname, argspec, doc)
130 |             obj = lambda: 0
131 |             obj.__argspec_is_invalid_ = True
132 |             obj.func_name = funcname
133 |             obj.__name__ = name
134 |             obj.__doc__ = doc
135 |             if inspect.isclass(object_cache[parent]):
136 |                 obj.__objclass__ = object_cache[parent]
137 |         else:
138 |             class Dummy(object): pass
139 |             obj = Dummy()
140 |             obj.__name__ = name
141 |             obj.__doc__ = doc
142 |             if inspect.isclass(object_cache[parent]):
143 |                 obj.__get__ = lambda: None
144 |         object_cache[name] = obj
145 | 
146 |         if parent:
147 |             if inspect.ismodule(object_cache[parent]):
148 |                 obj.__module__ = parent
149 |                 setattr(object_cache[parent], name.split('.')[-1], obj)
150 | 
151 |     # Populate items
152 |     for node in root:
153 |         obj = object_cache.get(node.attrib['id'])
154 |         if obj is None: continue
155 |         for ref in node.findall('ref'):
156 |             if node.tag == 'class':
157 |                 if ref.attrib['ref'].startswith(node.attrib['id'] + '.'):
158 |                     setattr(obj, ref.attrib['name'],
159 |                             object_cache.get(ref.attrib['ref']))
160 |             else:
161 |                 setattr(obj, ref.attrib['name'],
162 |                         object_cache.get(ref.attrib['ref']))
163 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/setup.cfg:
--------------------------------------------------------------------------------
1 | [egg_info]
2 | tag_build = 
3 | tag_date = 0
4 | tag_svn_revision = 0
5 | 
6 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | import setuptools
 3 | import sys, os
 4 | 
 5 | version = "0.4"
 6 | 
 7 | setup(
 8 |     name="numpydoc",
 9 |     packages=["numpydoc"],
10 |     package_dir={"numpydoc": ""},
11 |     version=version,
12 |     description="Sphinx extension to support docstrings in Numpy format",
13 |     # classifiers from http://pypi.python.org/pypi?%3Aaction=list_classifiers
14 |     classifiers=["Development Status :: 3 - Alpha",
15 |                  "Environment :: Plugins",
16 |                  "License :: OSI Approved :: BSD License",
17 |                  "Topic :: Documentation"],
18 |     keywords="sphinx numpy",
19 |     author="Pauli Virtanen and others",
20 |     author_email="pav@iki.fi",
21 |     url="http://github.com/numpy/numpy/tree/master/doc/sphinxext",
22 |     license="BSD",
23 |     zip_safe=False,
24 |     install_requires=["Sphinx >= 1.0.1"],
25 |     package_data={'numpydoc': 'tests', '': ''},
26 |     entry_points={
27 |         "console_scripts": [
28 |             "autosummary_generate = numpydoc.autosummary_generate:main",
29 |         ],
30 |     },
31 | )
32 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/traitsdoc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =========
  3 | traitsdoc
  4 | =========
  5 | 
  6 | Sphinx extension that handles docstrings in the Numpy standard format, [1]
  7 | and support Traits [2].
  8 | 
  9 | This extension can be used as a replacement for ``numpydoc`` when support
 10 | for Traits is required.
 11 | 
 12 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
 13 | .. [2] http://code.enthought.com/projects/traits/
 14 | 
 15 | """
 16 | 
 17 | import inspect
 18 | import os
 19 | import pydoc
 20 | 
 21 | import docscrape
 22 | import docscrape_sphinx
 23 | from docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString
 24 | 
 25 | import numpydoc
 26 | 
 27 | import comment_eater
 28 | 
 29 | class SphinxTraitsDoc(SphinxClassDoc):
 30 |     def __init__(self, cls, modulename='', func_doc=SphinxFunctionDoc):
 31 |         if not inspect.isclass(cls):
 32 |             raise ValueError("Initialise using a class. Got %r" % cls)
 33 |         self._cls = cls
 34 | 
 35 |         if modulename and not modulename.endswith('.'):
 36 |             modulename += '.'
 37 |         self._mod = modulename
 38 |         self._name = cls.__name__
 39 |         self._func_doc = func_doc
 40 | 
 41 |         docstring = pydoc.getdoc(cls)
 42 |         docstring = docstring.split('\n')
 43 | 
 44 |         # De-indent paragraph
 45 |         try:
 46 |             indent = min(len(s) - len(s.lstrip()) for s in docstring
 47 |                          if s.strip())
 48 |         except ValueError:
 49 |             indent = 0
 50 | 
 51 |         for n,line in enumerate(docstring):
 52 |             docstring[n] = docstring[n][indent:]
 53 | 
 54 |         self._doc = docscrape.Reader(docstring)
 55 |         self._parsed_data = {
 56 |             'Signature': '',
 57 |             'Summary': '',
 58 |             'Description': [],
 59 |             'Extended Summary': [],
 60 |             'Parameters': [],
 61 |             'Returns': [],
 62 |             'Raises': [],
 63 |             'Warns': [],
 64 |             'Other Parameters': [],
 65 |             'Traits': [],
 66 |             'Methods': [],
 67 |             'See Also': [],
 68 |             'Notes': [],
 69 |             'References': '',
 70 |             'Example': '',
 71 |             'Examples': '',
 72 |             'index': {}
 73 |             }
 74 | 
 75 |         self._parse()
 76 | 
 77 |     def _str_summary(self):
 78 |         return self['Summary'] + ['']
 79 | 
 80 |     def _str_extended_summary(self):
 81 |         return self['Description'] + self['Extended Summary'] + ['']
 82 | 
 83 |     def __str__(self, indent=0, func_role="func"):
 84 |         out = []
 85 |         out += self._str_signature()
 86 |         out += self._str_index() + ['']
 87 |         out += self._str_summary()
 88 |         out += self._str_extended_summary()
 89 |         for param_list in ('Parameters', 'Traits', 'Methods',
 90 |                            'Returns','Raises'):
 91 |             out += self._str_param_list(param_list)
 92 |         out += self._str_see_also("obj")
 93 |         out += self._str_section('Notes')
 94 |         out += self._str_references()
 95 |         out += self._str_section('Example')
 96 |         out += self._str_section('Examples')
 97 |         out = self._str_indent(out,indent)
 98 |         return '\n'.join(out)
 99 | 
100 | def looks_like_issubclass(obj, classname):
101 |     """ Return True if the object has a class or superclass with the given class
102 |     name.
103 | 
104 |     Ignores old-style classes.
105 |     """
106 |     t = obj
107 |     if t.__name__ == classname:
108 |         return True
109 |     for klass in t.__mro__:
110 |         if klass.__name__ == classname:
111 |             return True
112 |     return False
113 | 
114 | def get_doc_object(obj, what=None, config=None):
115 |     if what is None:
116 |         if inspect.isclass(obj):
117 |             what = 'class'
118 |         elif inspect.ismodule(obj):
119 |             what = 'module'
120 |         elif callable(obj):
121 |             what = 'function'
122 |         else:
123 |             what = 'object'
124 |     if what == 'class':
125 |         doc = SphinxTraitsDoc(obj, '', func_doc=SphinxFunctionDoc, config=config)
126 |         if looks_like_issubclass(obj, 'HasTraits'):
127 |             for name, trait, comment in comment_eater.get_class_traits(obj):
128 |                 # Exclude private traits.
129 |                 if not name.startswith('_'):
130 |                     doc['Traits'].append((name, trait, comment.splitlines()))
131 |         return doc
132 |     elif what in ('function', 'method'):
133 |         return SphinxFunctionDoc(obj, '', config=config)
134 |     else:
135 |         return SphinxDocString(pydoc.getdoc(obj), config=config)
136 | 
137 | def setup(app):
138 |     # init numpydoc
139 |     numpydoc.setup(app, get_doc_object)
140 | 
141 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/sphinxcontrib/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     sphinxcontrib
 4 |     ~~~~~~~~~~~~~
 5 | 
 6 |     This package is a namespace package that contains all extensions
 7 |     distributed in the ``sphinx-contrib`` distribution.
 8 | 
 9 |     :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
10 |     :license: BSD, see LICENSE for details.
11 | """
12 | 
13 | __import__('pkg_resources').declare_namespace(__name__)
14 | 
15 | 


--------------------------------------------------------------------------------
/doc/source/sphinxext/sphinxcontrib/programoutput.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2010, 2011, Sebastian Wiesner <lunaryorn@googlemail.com>
  3 | # All rights reserved.
  4 | 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | 
  8 | # 1. Redistributions of source code must retain the above copyright notice,
  9 | #    this list of conditions and the following disclaimer.
 10 | # 2. Redistributions in binary form must reproduce the above copyright
 11 | #    notice, this list of conditions and the following disclaimer in the
 12 | #    documentation and/or other materials provided with the distribution.
 13 | 
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 19 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 20 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 22 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 | # POSSIBILITY OF SUCH DAMAGE.
 25 | 
 26 | 
 27 | """
 28 |     sphinxcontrib.programoutput
 29 |     ===========================
 30 | 
 31 |     This extension provides a directive to include the output of commands as
 32 |     literal block while building the docs.
 33 | 
 34 |     .. moduleauthor::  Sebastian Wiesner  <lunaryorn@googlemail.com>
 35 | """
 36 | 
 37 | from __future__ import (print_function, division, unicode_literals,
 38 |                         absolute_import)
 39 | 
 40 | import sys
 41 | import shlex
 42 | from subprocess import Popen, PIPE, STDOUT
 43 | from collections import defaultdict, namedtuple
 44 | 
 45 | from docutils import nodes
 46 | from docutils.parsers import rst
 47 | from docutils.parsers.rst.directives import flag, unchanged, nonnegative_int
 48 | 
 49 | 
 50 | __version__ = '0.5'
 51 | 
 52 | 
 53 | class program_output(nodes.Element):
 54 |     pass
 55 | 
 56 | 
 57 | def _slice(value):
 58 |     parts = [int(v.strip()) for v in value.split(',')]
 59 |     if len(parts) > 2:
 60 |         raise ValueError('too many slice parts')
 61 |     return tuple((parts + [None]*2)[:2])
 62 | 
 63 | 
 64 | class ProgramOutputDirective(rst.Directive):
 65 |     has_content = False
 66 |     final_argument_whitespace = True
 67 |     required_arguments = 1
 68 | 
 69 |     option_spec = dict(shell=flag, prompt=flag, nostderr=flag,
 70 |                        ellipsis=_slice, extraargs=unchanged,
 71 |                        returncode=nonnegative_int)
 72 | 
 73 |     def run(self):
 74 |         node = program_output()
 75 |         node.line = self.lineno
 76 |         node['command'] = self.arguments[0]
 77 | 
 78 |         if self.name == 'command-output':
 79 |             node['show_prompt'] = True
 80 |         else:
 81 |             node['show_prompt'] = 'prompt' in self.options
 82 | 
 83 |         node['hide_standard_error'] = 'nostderr' in self.options
 84 |         node['extraargs'] = self.options.get('extraargs', '')
 85 |         node['use_shell'] = 'shell' in self.options
 86 |         node['returncode'] = self.options.get('returncode', 0)
 87 |         if 'ellipsis' in self.options:
 88 |             node['strip_lines'] = self.options['ellipsis']
 89 |         return [node]
 90 | 
 91 | 
 92 | _Command = namedtuple('Command', 'command shell hide_standard_error')
 93 | 
 94 | 
 95 | class Command(_Command): #pylint: disable=W0232
 96 |     """
 97 |     A command to be executed.
 98 |     """
 99 | 
100 |     def __new__(cls, command, shell=False, hide_standard_error=False):
101 |         if isinstance(command, list):
102 |             command = tuple(command)
103 |         return _Command.__new__(cls, command, shell, hide_standard_error)
104 | 
105 |     @classmethod
106 |     def from_program_output_node(cls, node):
107 |         """
108 |         Create a command from a :class:`program_output` node.
109 |         """
110 |         extraargs = node.get('extraargs', '')
111 |         command = (node['command'] + ' ' + extraargs).strip()
112 |         return cls(command, node['use_shell'], node['hide_standard_error'])
113 | 
114 |     def execute(self):
115 |         """
116 |         Execute this command.
117 | 
118 |         Return the :class:`~subprocess.Popen` object representing the running
119 |         command.
120 |         """
121 |         # pylint: disable=E1101
122 |         if isinstance(self.command, unicode):
123 |             command = self.command.encode(sys.getfilesystemencoding())
124 |         else:
125 |             command = self.command
126 |         if isinstance(command, basestring) and not self.shell:
127 |             command = shlex.split(command)
128 |         return Popen(command, shell=self.shell, stdout=PIPE,
129 |                      stderr=PIPE if self.hide_standard_error else STDOUT)
130 | 
131 |     def get_output(self):
132 |         """
133 |         Get the output of this command.
134 | 
135 |         Return a tuple ``(returncode, output)``.  ``returncode`` is the
136 |         integral return code of the process, ``output`` is the output as
137 |         unicode string, with final trailing spaces and new lines stripped.
138 |         """
139 |         process = self.execute()
140 |         output = process.communicate()[0].decode(
141 |             sys.getfilesystemencoding()).rstrip()
142 |         return process.returncode, output
143 | 
144 |     def __str__(self):
145 |         # pylint: disable=E1101
146 |         if isinstance(self.command, tuple):
147 |             return repr(list(self.command))
148 |         return repr(self.command)
149 | 
150 | 
151 | class ProgramOutputCache(defaultdict): # pylint: disable=W0232
152 |     """
153 |     Execute command and cache their output.
154 | 
155 |     This class is a mapping.  Its keys are :class:`Command` objects represeting
156 |     command invocations.  Its values are tuples of the form ``(returncode,
157 |     output)``, where ``returncode`` is the integral return code of the command,
158 |     and ``output`` is the output as unicode string.
159 | 
160 |     The first time, a key is retrieved from this object, the command is
161 |     invoked, and its result is cached.  Subsequent access to the same key
162 |     returns the cached value.
163 |     """
164 | 
165 |     def __missing__(self, command):
166 |         """
167 |         Called, if a command was not found in the cache.
168 | 
169 |         ``command`` is an instance of :class:`Command`.
170 |         """
171 |         result = command.get_output()
172 |         self[command] = result
173 |         return result
174 | 
175 | 
176 | def run_programs(app, doctree):
177 |     """
178 |     Execute all programs represented by ``program_output`` nodes in
179 |     ``doctree``.  Each ``program_output`` node in ``doctree`` is then
180 |     replaced with a node, that represents the output of this program.
181 | 
182 |     The program output is retrieved from the cache in
183 |     ``app.env.programoutput_cache``.
184 |     """
185 |     if app.config.programoutput_use_ansi:
186 |         # enable ANSI support, if requested by config
187 |         from sphinxcontrib.ansi import ansi_literal_block
188 |         node_class = ansi_literal_block
189 |     else:
190 |         node_class = nodes.literal_block
191 | 
192 |     cache = app.env.programoutput_cache
193 | 
194 |     for node in doctree.traverse(program_output):
195 |         command = Command.from_program_output_node(node)
196 |         try:
197 |             returncode, output = cache[command]
198 |         except EnvironmentError as error:
199 |             error_message = 'Command {0} failed: {1}'.format(command, error)
200 |             error_node = doctree.reporter.error(error_message, base_node=node)
201 |             node.replace_self(error_node)
202 |         else:
203 |             if returncode != node['returncode']:
204 |                 app.warn('Unexpected return code {0} from command {1}'.format(
205 |                     returncode, command))
206 | 
207 |             # replace lines with ..., if ellipsis is specified
208 |             if 'strip_lines' in node:
209 |                 lines = output.splitlines()
210 |                 start, stop = node['strip_lines']
211 |                 lines[start:stop] = ['...']
212 |                 output = '\n'.join(lines)
213 | 
214 |             if node['show_prompt']:
215 |                 tmpl = app.config.programoutput_prompt_template
216 |                 output = tmpl.format(command=node['command'], output=output,
217 |                                      returncode=returncode)
218 | 
219 |             new_node = node_class(output, output)
220 |             new_node['language'] = 'text'
221 |             node.replace_self(new_node)
222 | 
223 | 
224 | def init_cache(app):
225 |     """
226 |     Initialize the cache for program output at
227 |     ``app.env.programoutput_cache``, if not already present (e.g. being
228 |     loaded from a pickled environment).
229 | 
230 |     The cache is of type :class:`ProgramOutputCache`.
231 |     """
232 |     if not hasattr(app.env, 'programoutput_cache'):
233 |         app.env.programoutput_cache = ProgramOutputCache()
234 | 
235 | 
236 | def setup(app):
237 |     app.add_config_value('programoutput_use_ansi', False, 'env')
238 |     app.add_config_value('programoutput_prompt_template',
239 |                          '$ {command}\n{output}', 'env')
240 |     app.add_directive('program-output', ProgramOutputDirective)
241 |     app.add_directive('command-output', ProgramOutputDirective)
242 |     app.connect(b'builder-inited', init_cache)
243 |     app.connect(b'doctree-read', run_programs)
244 | 


--------------------------------------------------------------------------------
/doc/source/tutorial.rst:
--------------------------------------------------------------------------------
  1 | .. _tutorial:
  2 | 
  3 | Quick start tutorial
  4 | ====================
  5 | ADENINE may be installed using standard Python tools (with
  6 | administrative or sudo permissions on GNU-Linux platforms)::
  7 | 
  8 |     $ pip install adenine
  9 | 
 10 |     or
 11 | 
 12 |     $ easy_install adenine
 13 | 
 14 | Installation from sources
 15 | -------------------------
 16 | If you like to manually install ADENINE, download the .zip or .tar.gz archive
 17 | from `<http://slipguru.github.io/adenine/>`_. Then extract it and move into the root directory::
 18 | 
 19 |     $ unzip slipguru-adenine-|release|.zip
 20 |     $ cd adenine-|release|/
 21 | 
 22 | or::
 23 | 
 24 |     $ tar xvf slipguru-adenine-|release|.tar.gz
 25 |     $ cd adenine-|release|/
 26 | 
 27 | Otherwise you can clone our `GitHub repository <https://github.com/slipguru/adenine>`_::
 28 | 
 29 |    $ git clone https://github.com/slipguru/adenine.git
 30 | 
 31 | From here, you can follow the standard Python installation step::
 32 | 
 33 |     $ python setup.py install
 34 | 
 35 | After ADENINE installation, you should have access to two scripts,
 36 | named with a common ``ade_`` prefix::
 37 | 
 38 |     $ ade_<TAB>
 39 |     ade_analysis.py    ade_run.py
 40 | 
 41 | This tutorial assumes that you downloaded and extracted ADENINE
 42 | source package which contains a ``examples\data`` directory with some data files (``.npy`` or ``.csv``) which will be used to show ADENINE functionalities.
 43 | 
 44 | ADENINE needs only 3 ingredients:
 45 | 
 46 | * ``n_samples x n_variables`` input matrix
 47 | * ``n_samples x 1`` output vector (optional)
 48 | * ``configuration`` file
 49 | 
 50 | 
 51 | Input data format
 52 | -----------------
 53 | Input data are assumed to be:
 54 | 
 55 | * ``numpy`` array stored in ``.npy`` files organized with a row for each sample and a column for each feature,
 56 | * tabular data stored in comma separated ``.csv`` files presenting the variables header on the first row and the sample indexes on the first column,
 57 | * toy examples available from ``adenine.utils.data_source`` function.
 58 | 
 59 | .. _configuration:
 60 | 
 61 | Configuration File
 62 | ------------------
 63 | ADENINE configuration file is a standard Python script. It is
 64 | imported as a module, then all the code is executed. In this file the user can define all the option needed to read the data and to create the pipelines.
 65 | 
 66 | .. literalinclude:: ../../adenine/ade_config.py
 67 |    :language: python
 68 | 
 69 | .. _experiment:
 70 | 
 71 | Experiment runner
 72 | -----------------
 73 | The ``ade_run.py`` script, executes the full ADENINE framework. The prototype is the following::
 74 | 
 75 |     $ ade_run.py ade_config.py
 76 | 
 77 | When launched, the script reads the data, then it creates and runs each pipeline saving the results in a tree-like structure which has the current folder as root.
 78 | 
 79 | .. _analysis:
 80 | 
 81 | Results analysis
 82 | ----------------
 83 | The ``ade_analysis.py`` script provides useful summaries and graphs from the results of the experiment. This script accepts as only parameter a result directory
 84 | already created::
 85 | 
 86 |     $ ade_analysis.py result-dir
 87 | 
 88 | The script produces a set of textual and graphical results. An output example obtained by one of the implemented pipelines is represented below.
 89 | 
 90 | .. image:: pca.png
 91 |    :scale: 80 %
 92 |    :alt: broken link
 93 | 
 94 | .. image:: kpca.png
 95 |    :scale: 80 %
 96 |    :alt: broken link
 97 | 
 98 | You can reproduce the example above specifying ``data_source.load('circles')`` in the configuration file.
 99 | 
100 | Example dataset
101 | ----------------
102 | An example dataset can be dowloaded :download:`here <TCGA-PANCAN-HiSeq-801x20531.tar.gz>`. The dataset is a random extraction of 801 samples (with dimension 20531) measuring RNA-Seq gene expression of patients affected by 5 different types of tumor: breast invasive carcinoma (BRCA), kidney renal clear cell carcinoma (KIRC), colon  (COAD), lung  (LUAD) and prostate adenocarcinoma (PRAD). The full dataset is maintained by The Cancer Genome Atlas Pan-Cancer Project [1] and we refer to the `original repository <https://www.synapse.org/#!Synapse:syn4301332>`_ for furher details.
103 | 
104 | Reference
105 | ----------------
106 | [1] Weinstein, John N., et al. "The cancer genome atlas pan-cancer analysis project." Nature genetics 45.10 (2013): 1113-1120.
107 | 


--------------------------------------------------------------------------------
/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/icon.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | functools32==3.2.3.post2
 3 | matplotlib==2.0.0
 4 | numpy==1.12.0
 5 | pandas==0.19.2
 6 | pydot==p
 7 | pyparsing==2.1.4
 8 | python-dateutil==2.6.0
 9 | pytz==2016.10
10 | scikit-learn==0.18.1
11 | scipy==0.18.1
12 | seaborn==0.7.1
13 | six==1.10.0
14 | subprocess32==3.2.7
15 | GEOparse==0.1.10
16 | fastcluster==1.1.20
17 | 


--------------------------------------------------------------------------------
/scripts/ade_GEO2csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | import argparse
11 | import pandas as pd
12 | 
13 | from adenine.utils import GEO2csv
14 | from adenine import __version__
15 | 
16 | 
17 | def main():
18 |     """Adenine GEO2csv main script."""
19 |     parser = argparse.ArgumentParser(description='Adenine script for '
20 |                                                  'GEO2csv conversion.')
21 |     parser.add_argument('--version', action='version',
22 |                         version='%(prog)s v' + __version__)
23 |     parser.add_argument('accession_number', help='GEO DataSets Accession number')
24 |     parser.add_argument('--label_field', dest='pheno_name',
25 |                         default='title', help='The field in which '
26 |                         'phenotypes information are stored.')
27 |     parser.add_argument('--phenotypes', '--pheno', dest='pheno',
28 |                         action='store', default=None,
29 |                         help='Select samples by their phenotypes ('
30 |                         'comma separated) e.g.: Severe,Mild,Control,...')
31 |     parser.add_argument('--gene_symbol', action='store_true', dest='gs',
32 |                         help='Use this option to convert the platform IDs '
33 |                         'to gene symbols')
34 |     parser.add_argument('--signature', dest='signature',
35 |                         default=None, help='Generate a data matrix comprising '
36 |                         'only the genes in the signature.')
37 |     args = parser.parse_args()
38 | 
39 |     # Get the data
40 |     try:
41 |         if args.gs or (args.signature is not None):
42 |             data, gse = GEO2csv.get_GEO(args.accession_number, args.pheno_name, True)
43 |         else:
44 |             data = GEO2csv.get_GEO(args.accession_number, args.pheno_name)[0]
45 |         print('* GEO dataset {} loaded'.format(args.accession_number))
46 | 
47 |         # Filter samples per phenotype
48 |         if args.pheno is not None:
49 |             data = GEO2csv.GEO_select_samples(
50 |                     data.data, data.target, selected_labels=args.pheno.split(','),
51 |                     index=data.index, feature_names=data.feature_names)
52 |             print('* Phenotypes {}'.format(args.pheno))
53 | 
54 |         if args.gs or (args.signature is not None):
55 |             data = GEO2csv.id2gs(data, gse)
56 |             print('* Probe ID converted to gene symbols')
57 | 
58 |         if args.signature is not None:
59 |             data = GEO2csv.restrict_to_signature(data, args.signature.split(','))
60 |             print('* Dataset restricted to {}'.format(data.feature_names))
61 | 
62 |         # Save dataset
63 |         pd.DataFrame(data=data.data, columns=data.feature_names,
64 |                      index=data.index).to_csv('{}_data.csv'.format(args.accession_number))
65 |         print('* {}_data.csv created: {} samples x {} features'.format(args.accession_number,
66 |                                                                      *data.data.shape))
67 |         pd.DataFrame(data=data.target, columns=['Phenotype'],
68 |                      index=data.index).to_csv('{}_labels.csv'.format(args.accession_number))
69 |         print('* {}_labels.csv created: {} samples'.format(args.accession_number,
70 |                                                          len(data.target)))
71 | 
72 |     except Exception as e:
73 |         print('Raised {}'.format(e))
74 |         raise ValueError('Cannot parse {}. Check '
75 |         'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}'
76 |         ' for more info on the GEO series'.format(args.accession_number,
77 |         args.accession_number))
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/scripts/ade_analysis.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """Adenine analysis script."""
  3 | ######################################################################
  4 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
  5 | #
  6 | # FreeBSD License
  7 | ######################################################################
  8 | 
  9 | from __future__ import print_function
 10 | 
 11 | import imp
 12 | import sys
 13 | import os
 14 | import time
 15 | import logging
 16 | import argparse
 17 | import gzip
 18 | import numpy as np
 19 | try:
 20 |     import cPickle as pkl
 21 | except:
 22 |     import pickle as pkl
 23 | 
 24 | from adenine.core import analyze_results
 25 | from adenine.utils import extra
 26 | 
 27 | 
 28 | def init_main():
 29 |     """Init analysis main."""
 30 |     from adenine import __version__
 31 |     parser = argparse.ArgumentParser(description='Adenine script for '
 32 |                                                  'analysing pipelines.')
 33 |     parser.add_argument('--version', action='version',
 34 |                         version='%(prog)s v' + __version__)
 35 |     parser.add_argument("result_folder", help="specify results directory")
 36 |     args = parser.parse_args()
 37 | 
 38 |     root_folder = args.result_folder
 39 |     filename = [f for f in os.listdir(root_folder)
 40 |                 if os.path.isfile(os.path.join(root_folder, f)) and
 41 |                 '.pkl' in f  and f != "__data.pkl"]
 42 |     if not filename:
 43 |         sys.stderr.write("No .pkl file found in {}. Aborting...\n"
 44 |                          .format(root_folder))
 45 |         sys.exit(-1)
 46 | 
 47 |     # Run analysis
 48 |     # print("Starting the analysis of {}".format(filename))
 49 |     main(os.path.join(os.path.abspath(root_folder), filename[0]))
 50 | 
 51 | 
 52 | def main(dumpfile):
 53 |     """Analyze the pipelines."""
 54 |     # Load the configuration file
 55 |     config_path = os.path.dirname(dumpfile)
 56 |     config_path = os.path.join(os.path.abspath(config_path), 'ade_config.py')
 57 |     config = imp.load_source('ade_config', config_path)
 58 |     extra.set_module_defaults(config, {'file_format': 'pdf',
 59 |                                        'plotting_context': 'paper',
 60 |                                        'verbose': False})
 61 |     if hasattr(config, 'use_compression'):
 62 |         use_compression = config.use_compression
 63 |     else:
 64 |         use_compression = False
 65 | 
 66 |     # Load the results used with ade_run.py
 67 |     try:
 68 |         if use_compression:
 69 |             with gzip.open(os.path.join(os.path.dirname(dumpfile),
 70 |                                         '__data.pkl.tz'), 'r') as fdata:
 71 |                 data_X_y_index = pkl.load(fdata)
 72 |                 data = data_X_y_index['X']
 73 |                 labels = data_X_y_index['y']
 74 |                 index = data_X_y_index['index']
 75 |         else:
 76 |             with open(os.path.join(os.path.dirname(dumpfile),
 77 |                                    '__data.pkl'), 'r') as fdata:
 78 |                 data_X_y_index = pkl.load(fdata)
 79 |                 data = data_X_y_index['X']
 80 |                 labels = data_X_y_index['y']
 81 |                 index = data_X_y_index['index']
 82 |     except IOError:
 83 |         if use_compression:
 84 |             data_filename = '__data.pkl.tz'
 85 |         else:
 86 |             data_filename = '__data.pkl'
 87 | 
 88 |         sys.stderr.write("Cannot load {} Reloading data from "
 89 |                          "config file ...".format(data_filename))
 90 |         data = config.X
 91 |         labels = config.y
 92 |         index = config.index if hasattr(config, 'index') \
 93 |             else np.arange(data.shape[0])
 94 | 
 95 |     # Read the feature names from the config file
 96 |     feat_names = config.feat_names if hasattr(config, 'feat_names') \
 97 |         else np.arange(data.shape[1])
 98 |     # Initialize the log file
 99 |     filename = 'results_' + os.path.basename(dumpfile)[0:-7]
100 |     logfile = os.path.join(os.path.dirname(dumpfile), filename + '.log')
101 |     logging.basicConfig(filename=logfile, level=logging.INFO, filemode='w',
102 |                         format='%(levelname)s (%(name)s): %(message)s')
103 |     root_logger = logging.getLogger()
104 |     lsh = logging.StreamHandler()
105 |     lsh.setLevel(20 if config.verbose else logging.ERROR)
106 |     lsh.setFormatter(
107 |         logging.Formatter('%(levelname)s (%(name)s): %(message)s'))
108 |     root_logger.addHandler(lsh)
109 | 
110 |     tic = time.time()
111 |     print("\nUnpickling output ...", end=' ')
112 |     # Load the results
113 |     if use_compression:
114 |         with gzip.open(dumpfile, 'r') as fres:
115 |             res = pkl.load(fres)
116 |     else:
117 |         with open(dumpfile, 'r') as fres:
118 |             res = pkl.load(fres)
119 | 
120 |     print("done: {} s".format(extra.sec_to_time(time.time() - tic)))
121 | 
122 |     # Analyze the pipelines
123 |     analyze_results.analyze(input_dict=res, root=os.path.dirname(dumpfile),
124 |                             y=labels, feat_names=feat_names, index=index,
125 |                             plotting_context=config.plotting_context,
126 |                             file_format=config.file_format)
127 | 
128 |     root_logger.handlers[0].close()
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     init_main()
133 | 


--------------------------------------------------------------------------------
/scripts/ade_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ######################################################################
 5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
 6 | #
 7 | # FreeBSD License
 8 | ######################################################################
 9 | 
10 | import os
11 | import shutil
12 | import argparse
13 | 
14 | from adenine import main
15 | 
16 | 
17 | def init_main():
18 |     """Initialize main for ade_run.py."""
19 |     from adenine import __version__
20 |     parser = argparse.ArgumentParser(description='Adenine script for '
21 |                                                  'pipeline generation.')
22 |     parser.add_argument('--version', action='version',
23 |                         version='%(prog)s v' + __version__)
24 |     parser.add_argument("-c", "--create", dest="create", action="store_true",
25 |                         help="create config file", default=False)
26 |     parser.add_argument("configuration_file", help="specify config file",
27 |                         default='ade_config.py')
28 |     args = parser.parse_args()
29 | 
30 |     if args.create:
31 |         import adenine as ade
32 |         std_config_path = os.path.join(ade.__path__[0], 'ade_config.py')
33 |         # Check for .pyc
34 |         if std_config_path.endswith('.pyc'):
35 |             std_config_path = std_config_path[:-1]
36 |         # Check if the file already exists
37 |         if os.path.exists(args.configuration_file):
38 |             parser.error("adenine configuration file already exists")
39 |         # Copy the config file
40 |         shutil.copy(std_config_path, args.configuration_file)
41 |     else:
42 |         main(args.configuration_file)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     init_main()
47 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """adenine setup script."""
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | # Package Version
 7 | from adenine import __version__ as version
 8 | 
 9 | setup(
10 |     name='adenine',
11 |     version=version,
12 | 
13 |     description=('A Data ExploratioN pIpeliNE'),
14 |     long_description=open('README.md').read(),
15 |     author='Samuele Fiorini, Federico Tomasi',
16 |     author_email='{samuele.fiorini, federico.tomasi}@dibris.unige.it',
17 |     maintainer='Samuele Fiorini, Federico Tomasi',
18 |     maintainer_email='{samuele.fiorini, federico.tomasi}@dibris.unige.it',
19 |     url='https://github.com/slipguru/adenine',
20 |     download_url='https://github.com/slipguru/adenine/tarball/'+version,
21 |     classifiers=[
22 |         'Development Status :: 4 - Beta',
23 |         'Environment :: Console',
24 |         'Intended Audience :: Science/Research',
25 |         'Intended Audience :: Developers',
26 |         'Programming Language :: Python',
27 |         'License :: OSI Approved :: BSD License',
28 |         'Topic :: Software Development',
29 |         'Topic :: Scientific/Engineering :: Bio-Informatics',
30 |         'Operating System :: POSIX',
31 |         'Operating System :: Unix',
32 |         'Operating System :: MacOS'
33 |     ],
34 |     license='FreeBSD',
35 | 
36 |     packages=['adenine', 'adenine.core', 'adenine.utils', 'adenine.externals'],
37 |     install_requires=['numpy (>=1.10.1)',
38 |                       'scipy (>=0.16.1)',
39 |                       'scikit-learn (>=0.18)',
40 |                       'matplotlib (>=1.5.1)',
41 |                       'seaborn (>=0.7.0)',
42 |                     #   'joblib',
43 |                       'fastcluster (>=1.1.20)',
44 |                       'GEOparse (>=0.1.10)',
45 |                       'pydot (>=1.2.3)'],
46 |     scripts=['scripts/ade_run.py', 'scripts/ade_analysis.py',
47 |              'scripts/ade_GEO2csv.py'],
48 | )
49 | 


--------------------------------------------------------------------------------