├── .gitignore
├── AUTHORS.txt
├── LICENCE.txt
├── MANIFEST
├── MANIFEST.in
├── README.md
├── adenine
├── __init__.py
├── ade_config.py
├── cluster
│ ├── __init__.py
│ ├── agglomerative.py
│ └── optics.py
├── core
│ ├── __init__.py
│ ├── analyze_results.py
│ ├── define_pipeline.py
│ ├── job_distribution.py
│ ├── pipelines.py
│ ├── plotting.py
│ └── template
│ │ ├── __init__.py
│ │ ├── d3_template.py
│ │ └── svg-crowbar.js
├── examples
│ ├── ade_config.py
│ └── data
│ │ ├── X.csv
│ │ ├── X.npy
│ │ ├── X_missing.csv
│ │ ├── Y_missing_test.csv
│ │ ├── y.csv
│ │ └── y.npy
├── externals
│ ├── __init__.py
│ └── hierarchical.py
├── test
│ ├── X_missing.csv
│ ├── Y_missing_test.csv
│ ├── carttest.py
│ ├── imputing_test.py
│ └── imputing_test_lite.py
└── utils
│ ├── GEO2csv.py
│ ├── __init__.py
│ ├── data_source.py
│ ├── extensions.py
│ ├── extra.py
│ ├── scores.py
│ └── templates.py
├── doc
├── GiHubProjectPage.txt
├── Makefile
├── devPlan
│ ├── plan.pdf
│ └── plan.tex
└── source
│ ├── adenine_logo.pdf
│ ├── adenine_logo.png
│ ├── conf.py
│ ├── dependencies.txt
│ ├── drawing.svg
│ ├── index.rst
│ ├── modules.rst
│ ├── slipGURUTheme
│ ├── layout.html
│ ├── static
│ │ ├── logos.png
│ │ └── slipGuru.css
│ └── theme.conf
│ ├── sphinxext
│ ├── numpydoc
│ │ ├── LICENSE.txt
│ │ ├── MANIFEST.in
│ │ ├── PKG-INFO
│ │ ├── README.txt
│ │ ├── __init__.py
│ │ ├── comment_eater.py
│ │ ├── compiler_unparse.py
│ │ ├── docscrape.py
│ │ ├── docscrape_sphinx.py
│ │ ├── numpydoc.py
│ │ ├── phantom_import.py
│ │ ├── plot_directive.py
│ │ ├── setup.cfg
│ │ ├── setup.py
│ │ ├── tests
│ │ │ └── test_docscrape.py
│ │ └── traitsdoc.py
│ └── sphinxcontrib
│ │ ├── __init__.py
│ │ ├── programoutput.py
│ │ └── spelling.py
│ └── tutorial.rst
├── icon.png
├── requirements.txt
├── scripts
├── ade_GEO2csv.py
├── ade_analysis.py
└── ade_run.py
├── setup.cfg
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # OSX stuff
2 | *.DS_Store
3 |
4 | # Archivers
5 | **/*.tar.gz
6 | # -------------------------- Python -------------------------- #
7 |
8 | # Jupyter Notebook checkpoints
9 | *-checkpoint.ipynb
10 |
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | *$py.class
15 |
16 | # Temp
17 | *~
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | env/
25 | build/
26 | develop-eggs/
27 | dist/
28 | downloads/
29 | eggs/
30 | .eggs/
31 | lib/
32 | lib64/
33 | parts/
34 | sdist/
35 | var/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | # Numpy files
40 | #*.npy
41 |
42 | # Dump files
43 | *.pkl
44 |
45 | # Images
46 | *.png
47 | !*adenine_logo.png
48 | !icon.png
49 |
50 |
51 | # PyInstaller
52 | # Usually these files are written by a python script from a template
53 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
54 | *.manifest
55 | *.spec
56 |
57 | # Installer logs
58 | pip-log.txt
59 | pip-delete-this-directory.txt
60 |
61 | # Unit test / coverage reports
62 | htmlcov/
63 | .tox/
64 | .coverage
65 | .coverage.*
66 | .cache
67 | nosetests.xml
68 | coverage.xml
69 | *,cover
70 |
71 | # Translations
72 | *.mo
73 | *.pot
74 |
75 | # Django stuff:
76 | *.log
77 |
78 | # Sphinx documentation
79 | docs/_build/
80 |
81 | # PyBuilder
82 | target/
83 |
84 | # -------------------------- TeX -------------------------- #
85 |
86 | *.aux
87 | *.glo
88 | *.idx
89 | *.log
90 | *.toc
91 | *.ist
92 | *.acn
93 | *.acr
94 | *.alg
95 | *.bbl
96 | *.blg
97 | *.dvi
98 | *.glg
99 | *.gls
100 | *.ilg
101 | *.ind
102 | *.lof
103 | *.lot
104 | *.maf
105 | *.mtc
106 | *.mtc1
107 | *.out
108 | *.synctex.gz
109 |
110 | # -------------------------- results -------------------------- #
111 | **/results/**/*
112 |
113 | # --- LaTeX --- #
114 | ## Core latex/pdflatex auxiliary files:
115 | *.aux
116 | *.lof
117 | *.log
118 | *.lot
119 | **.fls
120 | *.out
121 | *.toc
122 | *.fmt
123 | *.fot
124 | *.cb
125 | *.cb2
126 |
127 | ## Intermediate documents:
128 | *.dvi
129 | *-converted-to.*
130 | # these rules might exclude image files for figures etc.
131 | # *.ps
132 | # *.eps
133 | #*.pdf
134 |
135 |
136 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
137 | *.bbl
138 | *.bcf
139 | *.blg
140 | *-blx.aux
141 | *-blx.bib
142 | *.brf
143 | *.run.xml
144 |
145 | ## Build tool auxiliary files:
146 | *.fdb_latexmk
147 | .synctex
148 | .synctex.gz
149 | .synctex.gz(busy)
150 | *.pdfsync
151 |
152 | ## Auxiliary and intermediate files from other packages:
153 | # algorithms
154 | *.alg
155 | *.loa
156 |
157 | # achemso
158 | acs-*.bib
159 |
160 | # amsthm
161 | *.thm
162 |
163 | # beamer
164 | *.nav
165 | *.snm
166 | *.vrb
167 |
168 | # cprotect
169 | *.cpt
170 |
171 | # fixme
172 | *.lox
173 |
174 | #(r)(e)ledmac/(r)(e)ledpar
175 | *.end
176 | *.?end
177 | *.[1-9]
178 | *.[1-9][0-9]
179 | *.[1-9][0-9][0-9]
180 | *.[1-9]R
181 | *.[1-9][0-9]R
182 | *.[1-9][0-9][0-9]R
183 | *.eledsec[1-9]
184 | *.eledsec[1-9]R
185 | *.eledsec[1-9][0-9]
186 | *.eledsec[1-9][0-9]R
187 | *.eledsec[1-9][0-9][0-9]
188 | *.eledsec[1-9][0-9][0-9]R
189 |
190 | # glossaries
191 | *.acn
192 | *.acr
193 | *.glg
194 | *.glo
195 | *.gls
196 | *.glsdefs
197 |
198 | # gnuplottex
199 | *-gnuplottex-*
200 |
201 | # hyperref
202 | *.brf
203 |
204 | # knitr
205 | *-concordance.tex
206 | # TODO Comment the next line if you want to keep your tikz graphics files
207 | *.tikz
208 | *-tikzDictionary
209 |
210 | # listings
211 | *.lol
212 |
213 | # makeidx
214 | *.idx
215 | *.ilg
216 | *.ind
217 | *.ist
218 |
219 | # minitoc
220 | *.maf
221 | *.mlf
222 | *.mlt
223 | *.mtc
224 | *.mtc[0-9]
225 | *.mtc[1-9][0-9]
226 |
227 | # minted
228 | _minted*
229 | *.pyg
230 |
231 | # morewrites
232 | *.mw
233 |
234 | # mylatexformat
235 | *.fmt
236 |
237 | # nomencl
238 | *.nlo
239 |
240 | # sagetex
241 | *.sagetex.sage
242 | *.sagetex.py
243 | *.sagetex.scmd
244 |
245 | # sympy
246 | *.sout
247 | *.sympy
248 | sympy-plots-for-*.tex/
249 |
250 | # pdfcomment
251 | *.upa
252 | *.upb
253 |
254 | # pythontex
255 | *.pytxcode
256 | pythontex-files-*/
257 |
258 | # thmtools
259 | *.loe
260 |
261 | # TikZ & PGF
262 | *.dpth
263 | *.md5
264 | *.auxlock
265 |
266 | # todonotes
267 | *.tdo
268 |
269 | # xindy
270 | *.xdy
271 |
272 | # xypic precompiled matrices
273 | *.xyc
274 |
275 | # endfloat
276 | *.ttt
277 | *.fff
278 |
279 | # Latexian
280 | TSWLatexianTemp*
281 |
282 | ## Editors:
283 | # WinEdt
284 | *.bak
285 | *.sav
286 |
287 | # Texpad
288 | .texpadtmp
289 |
290 | # Kile
291 | *.backup
292 |
293 | # KBibTeX
294 | *~[0-9]*
295 |
--------------------------------------------------------------------------------
/AUTHORS.txt:
--------------------------------------------------------------------------------
1 | Samuele Fiorini [samuele dot fiorini at dibris dot unige dot it]
2 | Federico Tomasi [federico dot tomasi at dibris dot unige dot it]
3 | Annalisa Barla [annalisa dot barla at unige dot it]
4 |
--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
1 | =======================================================================================
2 | Samuele Fiorini [samuele dot fiorini at dibris dot unige dot it]
3 | Federico Tomasi [federico dot tomasi at dibris dot unige dot it]
4 | Annalisa Barla [annalisa dot barla at unige dot it]
5 |
6 | This file is part of adenine.
7 |
8 | The code is released under the BSD 2-Clause (FreeBSD) License.
9 |
10 | Copyright (c) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla.
11 | All rights reserved.
12 |
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are met:
15 |
16 | - Redistributions of source code must retain the above copyright notice,
17 | this list of conditions and the following disclaimer.
18 | - Redistributions in binary form must reproduce the above copyright
19 | notice, this list of conditions and the following disclaimer in the
20 | documentation and/or other materials provided with the distribution.
21 |
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | POSSIBILITY OF SUCH DAMAGE.
32 | =======================================================================================
33 |
--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | README.md
3 | setup.cfg
4 | setup.py
5 | adenine/__init__.py
6 | adenine/ade_config.py
7 | adenine/core/__init__.py
8 | adenine/core/analyze_results.py
9 | adenine/core/define_pipeline.py
10 | adenine/core/job_distribution.py
11 | adenine/core/pipelines.py
12 | adenine/core/plotting.py
13 | adenine/externals/__init__.py
14 | adenine/externals/hierarchical.py
15 | adenine/utils/__init__.py
16 | adenine/utils/data_source.py
17 | adenine/utils/extensions.py
18 | adenine/utils/extra.py
19 | adenine/utils/scores.py
20 | adenine/utils/templates.py
21 | scripts/ade_analysis.py
22 | scripts/ade_run.py
23 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.txt
3 | include adenine/examples
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | 
3 |
4 |
5 | -----------------
6 |
7 | # Adenine: A data exploration pipeline
8 |
9 | **adenine** is a machine learning and data mining Python library for exploratory data analysis.
10 |
11 | The main structure of **adenine** can be summarized in the following 4 steps.
12 |
13 | 1. **Imputing:** Does your dataset have missing entries? In the first step you can fill the missing values choosing between different strategies: feature-wise median, mean and most frequent value or k-NN imputing.
14 |
15 | 2. **Preprocessing:** Have you ever wondered what would have changed if only your data have been preprocessed in a different way? Or is it data preprocessing a good idea after all? **adenine** includes several preprocessing procedures, such as: data recentering, Min-Max scaling, standardization and normalization. **adenine** also allows you to compare the results of the analysis made with different preprocessing strategies.
16 |
17 | 3. **Dimensionality Reduction:** In the context of data exploration, this phase becomes particularly helpful for high dimensional data. This step includes manifold learning (such as isomap, multidimensional scaling, etc) and unsupervised feature learning (principal component analysis, kernel PCA, Bernoulli RBM, etc) techniques.
18 |
19 | 4. **Clustering:** This step aims at grouping data into clusters in an unsupervised manner. Several techniques such as k-means, spectral or hierarchical clustering are offered.
20 |
21 | The final output of **adenine** is a compact, textual and graphical representation of the results obtained from the pipelines made with each possible combination of the algorithms selected at each step.
22 |
23 | **adenine** can run on multiple cores/machines* and it is fully `scikit-learn` compliant.
24 |
25 | ## Installation
26 |
27 | **adenine** supports Python 2.7
28 |
29 | ### Pip installation
30 | `$ pip install adenine`
31 |
32 | ### Installing from sources
33 | ```bash
34 | $ git clone https://github.com/slipguru/adenine
35 | $ cd adenine
36 | $ python setup.py install
37 | ```
38 |
39 | ## Try Adenine
40 |
41 | ### 1. Create your configuration file
42 | Start from the provided template and edit your configuration file with your favourite text editor
43 | ```bash
44 | $ ade_run.py -c my-config-file.py
45 | $ vim my-config-file.py
46 | ...
47 | ```
48 | ```python
49 | from adenine.utils import data_source
50 |
51 | # -------------------------- EXPERMIENT INFO ------------------------- #
52 | exp_tag = '_experiment'
53 | output_root_folder = 'results'
54 | plotting_context = 'notebook' # one of {paper, notebook, talk, poster}
55 | file_format = 'pdf' # or 'png'
56 |
57 | # ---------------------------- INPUT DATA ---------------------------- #
58 | # Load an example dataset or specify your input data in tabular format
59 | X, y, feat_names, index = data_source.load('iris')
60 |
61 | # ----------------------- PIPELINES DEFINITION ------------------------ #
62 | # --- Missing Values Imputing --- #
63 | step0 = {'Impute': [True, {'missing_values': 'NaN',
64 | 'strategy': ['nearest_neighbors']}]}
65 |
66 | # --- Data Preprocessing --- #
67 | step1 = {'MinMax': [True, {'feature_range': [(0, 1)]}]}
68 |
69 | # --- Unsupervised feature learning --- #
70 | step2 = {'KernelPCA': [True, {'kernel': ['linear', 'rbf', 'poly']}],
71 | 'Isomap': [False, {'n_neighbors': 5}],
72 | 'MDS': [True, {'metric': True}],
73 | 'tSNE': [False],
74 | 'RBM': [True, {'n_components': 256}]
75 | }
76 |
77 | # --- Clustering --- #
78 | # affinity ca be precumputed for AP, Spectral and Hierarchical
79 | step3 = {'KMeans': [True, {'n_clusters': [3, 'auto']}],
80 | 'Spectral': [False, {'n_clusters': [3]}],
81 | 'Hierarchical': [False, {'n_clusters': [3],
82 | 'affinity': ['euclidean'],
83 | 'linkage': ['ward', 'average']}]
84 | }
85 | ```
86 |
87 | ### 2. Run the pipelines
88 | ```bash
89 | $ ade_run.py my-config-file.py
90 | ```
91 |
92 | ### 3. Automatically generate beautiful publication-ready plots and textual results
93 | ```bash
94 | $ ade_analysis.py results/ade_experiment_
95 | ```
96 |
97 | ## Need more info?
98 | Check out the project [homepage](http://slipguru.github.io/adenine/index.html)
99 |
100 | ## *Got large-scale data?
101 |
102 | **adenine** takes advantage of `mpi4py` to distribute the execution of the pipelines on HPC architectures
103 | ```bash
104 | $ mpirun -np --hosts ade_run.py my-config-file.py
105 | ```
106 |
107 | ## Citation
108 |
109 | If you use **adenine** in a scientific publication, we would appreciate citations:
110 | ```tex
111 | @{coming soon}
112 | ```
113 |
--------------------------------------------------------------------------------
/adenine/__init__.py:
--------------------------------------------------------------------------------
1 | ######################################################################
2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
3 | #
4 | # FreeBSD License
5 | ######################################################################
6 |
7 | __version__ = "0.1.4"
8 |
9 | from adenine import utils
10 | from adenine import core
11 | from adenine.core import main
12 |
--------------------------------------------------------------------------------
/adenine/ade_config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """Configuration file for adenine."""
4 |
5 | from adenine.utils import data_source
6 |
7 | # -------------------------- EXPERMIENT INFO ------------------------- #
8 | exp_tag = '_experiment'
9 | output_root_folder = 'results'
10 | plotting_context = 'notebook' # one of {paper, notebook, talk, poster}
11 | file_format = 'pdf' # or 'png'
12 | use_compression = False # use gzip to compress the results
13 |
14 | # ---------------------------- INPUT DATA ---------------------------- #
15 | # Load an example dataset or specify your input data in tabular format
16 | data_file = 'data.csv'
17 | labels_file = 'labels.csv' # OPTIONAL
18 | samples_on = 'rows' # if samples lie on columns use 'cols' or 'col'
19 | data_sep = ',' # the data separator. e.g., ',', '\t', ' ', ...
20 | X, y, feat_names, index = data_source.load('custom',
21 | data_file, labels_file,
22 | samples_on=samples_on,
23 | sep=data_sep)
24 |
25 | # ----------------------- PIPELINES DEFINITION ------------------------ #
26 | # --- Missing values imputing --- #
27 | step0 = {'Impute': [False, {'missing_values': 'NaN',
28 | 'strategy': ['median',
29 | 'mean',
30 | 'nearest_neighbors']}]}
31 |
32 | # --- Data preprocessing --- #
33 | step1 = {'None': [False], 'Recenter': [False], 'Standardize': [False],
34 | 'Normalize': [False, {'norm': ['l1', 'l2']}],
35 | 'MinMax': [False, {'feature_range': [(0, 1), (-1, 1)]}]}
36 |
37 | # --- Unsupervised features learning --- #
38 | # affinity ca be precumputed for SE
39 | step2 = {'PCA': [False, {'n_components': 3}],
40 | 'IncrementalPCA': [False],
41 | 'RandomizedPCA': [False],
42 | 'KernelPCA': [False, {'kernel': ['linear', 'rbf', 'poly']}],
43 | 'Isomap': [False, {'n_neighbors': 5}],
44 | 'LLE': [False, {'n_neighbors': 5,
45 | 'method': ['standard', 'modified',
46 | 'hessian', 'ltsa']}],
47 | 'SE': [False, {'affinity': ['nearest_neighbors', 'rbf']}],
48 | 'MDS': [False, {'metric': True}],
49 | 'tSNE': [False],
50 | 'RBM': [False, {'n_components': 256}],
51 | 'None': [False]
52 | }
53 |
54 | # --- Clustering --- #
55 | # affinity ca be precumputed for AP, Spectral and Hierarchical
56 | step3 = {'KMeans': [False, {'n_clusters': [3, 'auto']}],
57 | 'AP': [False, {'preference': ['auto']}],
58 | 'MS': [False],
59 | 'Spectral': [False, {'n_clusters': [3, 8]}],
60 | 'Hierarchical': [False, {'n_clusters': [3, 8],
61 | 'affinity': ['manhattan', 'euclidean'],
62 | 'linkage': ['ward', 'complete', 'average']}]
63 | }
64 |
--------------------------------------------------------------------------------
/adenine/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | from adenine.cluster.optics import Optics
2 | from adenine.cluster.agglomerative import AgglomerativeClustering
3 |
--------------------------------------------------------------------------------
/adenine/cluster/agglomerative.py:
--------------------------------------------------------------------------------
1 | """Agglomerative clustering class extension."""
2 | import logging
3 | import numpy as np
4 | from sklearn.externals.joblib import Memory
5 | from adenine.externals import AgglomerativeClustering
6 |
7 |
8 | class AgglomerativeClustering(AgglomerativeClustering):
9 | """Extension of sklearn Agglomerative Clustering.
10 |
11 | This Agglomerative Clustering class, if required, can perform automatic
12 | discovery of the number of clusters.
13 | """
14 |
15 | def __init__(self, n_clusters=2, affinity="euclidean",
16 | memory=Memory(cachedir=None, verbose=0),
17 | connectivity=None, n_components=None,
18 | compute_full_tree='auto', linkage='ward',
19 | pooling_func=np.mean, return_distance=False):
20 | """Agglomerative Clustering.
21 |
22 | Recursively merges the pair of clusters that minimally increases
23 | a given linkage distance.
24 |
25 | Read more in the :ref:`User Guide `.
26 |
27 | Parameters
28 | ----------
29 | n_clusters : int, default=2
30 | The number of clusters to find.
31 |
32 | connectivity : array-like or callable, optional
33 | Connectivity matrix. Defines for each sample the neighboring
34 | samples following a given structure of the data.
35 | This can be a connectivity matrix itself or a callable that
36 | transforms the data into a connectivity matrix, such as derived
37 | from kneighbors_graph. Default is None, i.e, the
38 | hierarchical clustering algorithm is unstructured.
39 |
40 | affinity : string or callable, default: "euclidean"
41 | Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
42 | "manhattan", "cosine", or 'precomputed'.
43 | If linkage is "ward", only "euclidean" is accepted.
44 |
45 | memory : Instance of joblib.Memory or string (optional)
46 | Used to cache the output of the computation of the tree.
47 | By default, no caching is done. If a string is given, it is the
48 | path to the caching directory.
49 |
50 | n_components : int (optional)
51 | Number of connected components. If None the number of connected
52 | components is estimated from the connectivity matrix.
53 | NOTE: This parameter is now directly determined from the
54 | connectivity matrix and will be removed in 0.18
55 |
56 | compute_full_tree : bool or 'auto' (optional)
57 | Stop early the construction of the tree at n_clusters. This is
58 | useful to decrease computation time if the number of clusters is
59 | not small compared to the number of samples. This option is
60 | useful only when specifying a connectivity matrix. Note also that
61 | when varying the number of clusters and using caching, it may
62 | be advantageous to compute the full tree.
63 |
64 | linkage : {"ward", "complete", "average"}, optional, default: "ward"
65 | Which linkage criterion to use. The linkage criterion determines
66 | which distance to use between sets of observation. The algorithm
67 | will merge the pairs of cluster that minimize this criterion.
68 |
69 | - ward minimizes the variance of the clusters being merged.
70 | - average uses the average of the distances of each observation of
71 | the two sets.
72 | - complete or maximum linkage uses the maximum distances between
73 | all observations of the two sets.
74 |
75 | pooling_func : callable, default=np.mean
76 | This combines the values of agglomerated features into a single
77 | value, and should accept an array of shape [M, N] and the keyword
78 | argument ``axis=1``, and reduce it to an array of size [M].
79 |
80 | Attributes
81 | ----------
82 | labels_ : array [n_samples]
83 | cluster labels for each point
84 |
85 | n_leaves_ : int
86 | Number of leaves in the hierarchical tree.
87 |
88 | n_components_ : int
89 | The estimated number of connected components in the graph.
90 |
91 | children_ : array-like, shape (n_nodes-1, 2)
92 | The children of each non-leaf node. Values less than `n_samples`
93 | correspond to leaves of the tree which are the original samples.
94 | A node `i` greater than or equal to `n_samples` is a non-leaf
95 | node and has children `children_[i - n_samples]`. Alternatively
96 | at the i-th iteration, children[i][0] and children[i][1]
97 | are merged to form node `n_samples + i`
98 |
99 | """
100 | super(AgglomerativeClustering, self). __init__(
101 | n_clusters, affinity,
102 | memory, connectivity, n_components,
103 | compute_full_tree, linkage,
104 | pooling_func, return_distance)
105 |
106 | def fit(self, X, **kwargs):
107 | """Fit the hierarchical clustering on the data.
108 |
109 | Parameters
110 | ----------
111 | X : array-like, shape = [n_samples, n_features]
112 | The samples a.k.a. observations.
113 |
114 | Returns
115 | -------
116 | self
117 | """
118 | if self.n_clusters == 'auto':
119 | # assign an arbitrary high number for the max number of clusters
120 | self.n_clusters = int(.75 * X.shape[0])
121 | super(AgglomerativeClustering, self).fit(X, **kwargs)
122 | try:
123 | # use self.distances
124 | # TODO
125 | raise NotImplementedError()
126 | except AttributeError:
127 | logging.error("Automatic discovery of the number of clusters "
128 | "cannot be performed. AgglomerativeClustering from "
129 | "adenine.external does not contain a "
130 | "`self.distances` attribute. Try to update adenine.")
131 | # hence, when optimal_clusters is defined, use it
132 | optimal_clusters = -1 # TODO
133 | self.n_clusters = optimal_clusters
134 | # perform the standard fit
135 | super(AgglomerativeClustering, self).fit(X, **kwargs)
136 |
--------------------------------------------------------------------------------
/adenine/core/__init__.py:
--------------------------------------------------------------------------------
1 | ######################################################################
2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
3 | #
4 | # FreeBSD License
5 | ######################################################################
6 |
7 | from adenine.core.job_distribution import main
8 |
--------------------------------------------------------------------------------
/adenine/core/job_distribution.py:
--------------------------------------------------------------------------------
1 | """Master slave."""
2 | from __future__ import print_function
3 | import os
4 | import imp
5 | import logging
6 | import shutil
7 | import gzip
8 | import numpy as np
9 |
10 | from collections import deque
11 | from six.moves import cPickle as pkl
12 |
13 | from adenine.core import define_pipeline
14 | from adenine.core.pipelines import pipe_worker
15 | from adenine.utils import extra
16 |
17 | try:
18 | from mpi4py import MPI
19 |
20 | COMM = MPI.COMM_WORLD
21 | RANK = COMM.Get_rank()
22 | NAME = MPI.Get_processor_name()
23 |
24 | IS_MPI_JOB = COMM.Get_size() > 1
25 |
26 | except ImportError:
27 | # print("mpi4py module not found. MPI job distribution disabled.")
28 | COMM = None
29 | RANK = 0
30 | NAME = 'localhost'
31 |
32 | IS_MPI_JOB = False
33 |
34 | # MAX_RESUBMISSIONS = 2
35 | # constants to use as tags in communications
36 | DO_WORK = 100
37 | EXIT = 200
38 |
39 |
40 | def master_single_machine(pipes, X):
41 | """Fit and transform/predict some pipelines on some data (single machine).
42 |
43 | This function fits each pipeline in the input list on the provided data.
44 | The results are dumped into a pkl file as a dictionary of dictionaries of
45 | the form {'pipe_id': {'stepID' : [alg_name, level, params, data_out,
46 | data_in, model_obj, voronoi_suitable_object], ...}, ...}. The model_obj is
47 | the sklearn model which has been fit on the dataset, the
48 | voronoi_suitable_object is the very same model but fitted on just the first
49 | two dimensions of the dataset. If a pipeline fails for some reasons the
50 | content of the stepID key is a list of np.nan.
51 |
52 | Parameters
53 | -----------
54 | pipes : list of list of tuples
55 | Each tuple contains a label and a sklearn Pipeline object.
56 | X : array of float, shape : n_samples x n_features, default : ()
57 | The input data matrix.
58 |
59 | Returns
60 | -----------
61 | pipes_dump : dict
62 | Dictionary with the results of the computation.
63 | """
64 | import multiprocessing as mp
65 | jobs = []
66 | manager = mp.Manager()
67 | pipes_dump = manager.dict()
68 |
69 | # Submit jobs
70 | for i, pipe in enumerate(pipes):
71 | pipe_id = 'pipe' + str(i)
72 | proc = mp.Process(target=pipe_worker,
73 | args=(pipe_id, pipe, pipes_dump, X))
74 | jobs.append(proc)
75 | proc.start()
76 | logging.info("Job: %s submitted", pipe_id)
77 |
78 | # Collect results
79 | count = 0
80 | for proc in jobs:
81 | proc.join()
82 | count += 1
83 | logging.info("%d jobs collected", count)
84 |
85 | # import joblib as jl
86 | # jl.Parallel(n_jobs=-1) \
87 | # (jl.delayed(pipe_worker)(
88 | # 'pipe' + str(i), pipe, pipes_dump, X) for i, pipe in enumerate(
89 | # pipes))
90 |
91 | return dict(pipes_dump)
92 |
93 |
94 | @extra.timed
95 | def master(config):
96 | """Distribute pipelines with mpi4py or multiprocessing."""
97 | # Pipeline definition
98 | pipes = define_pipeline.parse_steps(
99 | [config.step0, config.step1,
100 | config.step2, config.step3])
101 |
102 | if not IS_MPI_JOB:
103 | return master_single_machine(pipes, config.X)
104 |
105 | # RUN PIPELINES
106 | nprocs = COMM.Get_size()
107 | # print(NAME + ": start running slaves", nprocs, NAME)
108 | queue = deque(list(enumerate(pipes)))
109 |
110 | pipe_dump = dict()
111 | count = 0
112 | n_pipes = len(queue)
113 |
114 | # seed the slaves by sending work to each processor
115 | for rankk in range(1, min(nprocs, n_pipes)):
116 | pipe_tuple = queue.popleft()
117 | COMM.send(pipe_tuple, dest=rankk, tag=DO_WORK)
118 | # print(NAME + ": send to rank", rankk)
119 |
120 | # loop until there's no more work to do. If queue is empty skips the loop.
121 | while queue:
122 | pipe_tuple = queue.popleft()
123 | # receive result from slave
124 | status = MPI.Status()
125 | pipe_id, step_dump = COMM.recv(
126 | source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
127 | pipe_dump[pipe_id] = step_dump
128 | count += 1
129 | # send to the same slave new work
130 | COMM.send(pipe_tuple, dest=status.source, tag=DO_WORK)
131 |
132 | # there's no more work to do, so receive all the results from the slaves
133 | for rankk in range(1, min(nprocs, n_pipes)):
134 | # print(NAME + ": master - waiting from", rankk)
135 | status = MPI.Status()
136 | pipe_id, step_dump = COMM.recv(
137 | source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
138 | pipe_dump[pipe_id] = step_dump
139 | count += 1
140 |
141 | # tell all the slaves to exit by sending an empty message with the EXIT_TAG
142 | for rankk in range(1, nprocs):
143 | # print(NAME + ": master - killing", rankk)
144 | COMM.send(0, dest=rankk, tag=EXIT)
145 |
146 | # print(NAME + ": terminating master")
147 | return pipe_dump
148 |
149 |
150 | def slave(X):
151 | """Pipeline evaluation.
152 |
153 | Parameters
154 | ----------
155 | X : array of float, shape : n_samples x n_features, default : ()
156 | The input data matrix.
157 | """
158 | try:
159 | while True:
160 | status_ = MPI.Status()
161 | received = COMM.recv(source=0, tag=MPI.ANY_TAG, status=status_)
162 | # check the tag of the received message
163 | if status_.tag == EXIT:
164 | return
165 | # do the work
166 | i, pipe = received
167 | # print(NAME + ": slave received", RANK, i)
168 | pipe_id = 'pipe' + str(i)
169 | step_dump = pipe_worker(
170 | pipe_id, pipe, None, X)
171 | COMM.send((pipe_id, step_dump), dest=0, tag=0)
172 |
173 | except StandardError as exc:
174 | print("Quitting ... TB:", str(exc))
175 |
176 |
177 | def main(config_file):
178 | """Generate the pipelines."""
179 |
180 | if RANK == 0:
181 | # Load the configuration file
182 | config_path = os.path.abspath(config_file)
183 |
184 | # For some reason, it must be atomic
185 | imp.acquire_lock()
186 | config = imp.load_source('ade_config', config_path)
187 | imp.release_lock()
188 |
189 | # this barrier prevents the slave to re-download the same GEO
190 | # dataset if not locally present
191 | if IS_MPI_JOB:
192 | # Wait for all jobs to end
193 | COMM.barrier()
194 |
195 | if RANK != 0:
196 | # Load the configuration file
197 | config_path = os.path.abspath(config_file)
198 |
199 | # For some reason, it must be atomic
200 | imp.acquire_lock()
201 | config = imp.load_source('ade_config', config_path)
202 | imp.release_lock()
203 |
204 | if hasattr(config, 'use_compression'):
205 | use_compression = config.use_compression
206 | else:
207 | use_compression = False
208 |
209 | extra.set_module_defaults(
210 | config, {
211 | 'step0': {'Impute': [False]},
212 | 'step1': {'None': [True]},
213 | 'step2': {'None': [True]},
214 | 'step3': {'None': [False]},
215 | 'exp_tag': 'debug',
216 | 'output_root_folder': 'results',
217 | 'verbose': False})
218 |
219 | # Read the variables from the config file
220 | X = config.X
221 |
222 | if RANK == 0:
223 | # Get the experiment tag and the output root folder
224 | exp_tag, root = config.exp_tag, config.output_root_folder
225 | if not os.path.exists(root):
226 | os.makedirs(root)
227 |
228 | filename = '_'.join(('ade', exp_tag, extra.get_time()))
229 | logfile = os.path.join(root, filename + '.log')
230 | logging.basicConfig(filename=logfile, level=logging.INFO, filemode='w',
231 | format='%(levelname)s (%(name)s): %(message)s')
232 | root_logger = logging.getLogger()
233 | lsh = logging.StreamHandler()
234 | lsh.setLevel(logging.DEBUG if config.verbose else logging.ERROR)
235 | lsh.setFormatter(
236 | logging.Formatter('%(levelname)s (%(name)s): %(message)s'))
237 | root_logger.addHandler(lsh)
238 | pipes_dump = master(config)
239 | else:
240 | slave(X)
241 |
242 | if IS_MPI_JOB:
243 | # Wait for all jobs to end
244 | COMM.barrier()
245 |
246 | if RANK == 0:
247 | # Output Name
248 | outfile = filename
249 | outfolder = os.path.join(root, outfile)
250 |
251 | # Create exp folder into the root folder
252 | os.makedirs(outfolder)
253 |
254 | # pkl Dump
255 | logging.info('Saving Adenine results...')
256 | if use_compression:
257 | with gzip.open(os.path.join(outfolder, outfile + '.pkl.tz'),
258 | 'wb') as out:
259 | pkl.dump(pipes_dump, out)
260 | logging.info("Dump : %s", os.path.join(outfolder, outfile + '.pkl.tz'))
261 | else:
262 | with open(os.path.join(outfolder, outfile + '.pkl'), 'wb') as out:
263 | pkl.dump(pipes_dump, out)
264 | logging.info("Dump : %s", os.path.join(outfolder, outfile + '.pkl'))
265 |
266 | # Retrieve info from the config file
267 | _index = config.index if hasattr(config, 'index') \
268 | else np.arange(X.shape[0])
269 | _y = config.y if hasattr(config, 'y') else None
270 | if use_compression:
271 | with gzip.open(os.path.join(outfolder, '__data.pkl.tz'), 'wb') as out:
272 | pkl.dump({'X': X, 'y': _y, 'index': _index}, out)
273 | logging.info("Dump : %s", os.path.join(outfolder, '__data.pkl.tz'))
274 | else:
275 | with open(os.path.join(outfolder, '__data.pkl'), 'wb') as out:
276 | pkl.dump({'X': X, 'y': _y, 'index': _index}, out)
277 | logging.info("Dump : %s", os.path.join(outfolder, '__data.pkl'))
278 |
279 | # Copy the ade_config just used into the outFolder
280 | shutil.copy(config_path, os.path.join(outfolder, 'ade_config.py'))
281 |
282 | root_logger.handlers[0].close()
283 |
284 | # Move the logging file into the outFolder
285 | shutil.move(logfile, outfolder)
286 |
--------------------------------------------------------------------------------
/adenine/core/pipelines.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | import copy
11 | import logging
12 | import numpy as np
13 |
14 |
15 | def create(pdef):
16 | """Scikit-learn Pipelines objects creation (deprecated).
17 |
18 | This function creates a list of sklearn Pipeline objects starting from the
19 | list of list of tuples given in input that could be created using the
20 | adenine.core.define_pipeline module.
21 |
22 | Parameters
23 | -----------
24 | pdef : list of list of tuples
25 | This arguments contains the specification needed by sklearn in order
26 | to create a working Pipeline object.
27 |
28 | Returns
29 | -----------
30 | pipes : list of sklearn.pipeline.Pipeline objects
31 | The list of Piplines, each of them can be fitted and trasformed
32 | with some data.
33 | """
34 | from sklearn.pipeline import Pipeline
35 | return [Pipeline(p) for p in pdef]
36 |
37 |
38 | def which_level(label):
39 | """Define the step level according to the input step label [DEPRECATED].
40 |
41 | This function return the level (i.e.: imputing, preproc, dimred, clustring,
42 | None) according to the step label provided as input.
43 |
44 | Parameters
45 | -----------
46 | label : string
47 | This is the step level as it is reported in the ade_config file.
48 |
49 | Returns
50 | -----------
51 | level : {imputing, preproc, dimred, clustering, None}
52 | The appropriate level of the input step.
53 | """
54 | if not isinstance(label, basestring):
55 | raise ValueError("String expected")
56 |
57 | label = label.lower()
58 | if label.startswith('impute'):
59 | level = 'imputing'
60 | elif label in ('recenter', 'standardize', 'normalize', 'minmax'):
61 | level = 'preproc'
62 | elif label in ('pca', 'incrementalpca', 'randomizedpca', 'kernelpca',
63 | 'isomap', 'lle', 'se', 'mds', 'tsne', 'rbm'):
64 | level = 'dimred'
65 | elif label in ('kmeans', 'ap', 'ms', 'spectral',
66 | 'hierarchical'):
67 | level = 'clustering'
68 | else:
69 | level = 'None'
70 | return level
71 |
72 |
73 | def evaluate(level, step, X):
74 | """Transform or predict according to the input level.
75 |
76 | This function uses the transform or the predict method on the input
77 | sklearn-like step according to its level (i.e. imputing, preproc, dimred,
78 | clustering, none).
79 |
80 | Parameters
81 | -----------
82 | level : {'imputing', 'preproc', 'dimred', 'clustering', 'None'}
83 | The step level.
84 |
85 | step : sklearn-like object
86 | This might be an Imputer, or a PCA, or a KMeans (and so on...)
87 | sklearn-like object.
88 |
89 | X : array of float, shape : n_samples x n_features
90 | The input data matrix.
91 |
92 | Returns
93 | -----------
94 | res : array of float
95 | A matrix projection in case of dimred, a label vector in case of
96 | clustering, and so on.
97 | """
98 | if level in ('imputing', 'preproc', 'dimred', 'None'):
99 | if hasattr(step, 'embedding_'):
100 | res = step.embedding_
101 | else:
102 | res = step.transform(X)
103 | elif level == 'clustering':
104 | if hasattr(step, 'labels_'):
105 | res = step.labels_ # e.g. in case of spectral clustering
106 | elif hasattr(step, 'affinity') and step.affinity == 'precomputed':
107 | if not hasattr(step.estimator, 'labels_'):
108 | step.estimator.fit(X)
109 | res = step.estimator.labels_
110 | else:
111 | res = step.predict(X)
112 | return res
113 |
114 |
115 | def pipe_worker(pipe_id, pipe, pipes_dump, X):
116 | """Parallel pipelines execution.
117 |
118 | Parameters
119 | -----------
120 | pipe_id : string
121 | Pipeline identifier.
122 |
123 | pipe : list of tuples
124 | Tuple containing a label and a sklearn Pipeline object.
125 |
126 | pipes_dump : multiprocessing.Manager.dict
127 | Dictionary containing the results of the parallel execution.
128 |
129 | X : array of float, shape : n_samples x n_features, default : ()
130 | The input data matrix.
131 | """
132 | step_dump = dict()
133 |
134 | # COPY X as X_curr (to avoid that the next pipeline
135 | # works on the results of the previuos one)
136 | X_curr = np.array(X)
137 | for j, step in enumerate(pipe):
138 | # step[0] -> step_label | step[1] -> model, sklearn (or sklearn-like)
139 | # object
140 | step_id = 'step' + str(j)
141 | # 1. define which level of step is this (i.e.: imputing, preproc,
142 | # dimred, clustering, none)
143 | level = step[-1]
144 | # 2. fit the model (whatever it is)
145 | if step[1].get_params().get('method') == 'hessian':
146 | # check hessian lle constraints
147 | n_components = step[1].get_params().get('n_components')
148 | n_neighbors = 1 + (n_components * (n_components + 3) / 2)
149 | step[1].set_params(n_neighbors=n_neighbors)
150 | try:
151 | step[1].fit(X_curr)
152 |
153 | # 3. evaluate (i.e. transform or predict according to the level)
154 | # X_curr = evaluate(level, step[1], X_curr)
155 | X_next = evaluate(level, step[1], X_curr)
156 | # 3.1 if the model is suitable for voronoi tessellation: fit also
157 | # on 2D
158 | mdl_voronoi = None
159 | if hasattr(step[1], 'cluster_centers_'):
160 | mdl_voronoi = copy.copy(step[1].best_estimator_ if hasattr(
161 | step[1], 'best_estimator_') else step[1])
162 | if not hasattr(step[1], 'affinity') or step[1].affinity != 'precomputed':
163 | mdl_voronoi.fit(X_curr[:, :2])
164 | else:
165 | mdl_voronoi.fit(X_curr)
166 |
167 | # 4. save the results in a dictionary of dictionaries of the form:
168 | # save memory and do not dump data after preprocessing (unused in
169 | # analysys)
170 | if level in ('preproc', 'imputing'):
171 | result = [step[0], level, step[1].get_params(),
172 | np.empty(0), np.empty(0), step[1], mdl_voronoi]
173 | X_curr = np.array(X_next) # update the matrix
174 |
175 | # save memory dumping X_curr only in case of clustering
176 | elif level == 'dimred':
177 | result = [step[0], level, step[1].get_params(),
178 | X_next, np.empty(0), step[1], mdl_voronoi]
179 | X_curr = X_next # update the matrix
180 |
181 | # clustering
182 | elif level == 'clustering':
183 | result = [step[0], level, step[1].get_params(),
184 | X_next, X_curr, step[1], mdl_voronoi]
185 | if level != 'None':
186 | step_dump[step_id] = result
187 |
188 | except (AssertionError, ValueError) as e:
189 | logging.critical("Pipeline %s failed at step %s. "
190 | "Traceback: %s", pipe_id, step[0], e)
191 |
192 |
193 | # Monkey-patch, see: https://github.com/scikit-learn/scikit-learn/issues/7562
194 | # and wait for the next numpy update
195 | # step_dump['step2'][-2] = None
196 |
197 | if pipes_dump is None:
198 | return step_dump
199 |
200 | pipes_dump[pipe_id] = step_dump
201 |
--------------------------------------------------------------------------------
/adenine/core/template/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/core/template/__init__.py
--------------------------------------------------------------------------------
/adenine/core/template/d3_template.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # note: "format" this string to load data in csv format with % string
3 | D3_TREE = r"""
4 |
5 |
6 |
32 |
35 |
36 |
37 |
92 |
93 | """
94 |
--------------------------------------------------------------------------------
/adenine/core/template/svg-crowbar.js:
--------------------------------------------------------------------------------
1 | (function() {
2 | var doctype = '';
3 |
4 | window.URL = (window.URL || window.webkitURL);
5 |
6 | var body = document.body;
7 |
8 | var prefix = {
9 | xmlns: "http://www.w3.org/2000/xmlns/",
10 | xlink: "http://www.w3.org/1999/xlink",
11 | svg: "http://www.w3.org/2000/svg"
12 | }
13 |
14 | initialize();
15 |
16 | function initialize() {
17 | var documents = [window.document],
18 | SVGSources = [];
19 | iframes = document.querySelectorAll("iframe"),
20 | objects = document.querySelectorAll("object");
21 |
22 | [].forEach.call(iframes, function(el) {
23 | try {
24 | if (el.contentDocument) {
25 | documents.push(el.contentDocument);
26 | }
27 | } catch(err) {
28 | console.log(err)
29 | }
30 | });
31 |
32 | [].forEach.call(objects, function(el) {
33 | try {
34 | if (el.contentDocument) {
35 | documents.push(el.contentDocument);
36 | }
37 | } catch(err) {
38 | console.log(err)
39 | }
40 | });
41 |
42 | documents.forEach(function(doc) {
43 | var styles = getStyles(doc);
44 | var newSources = getSources(doc, styles);
45 | // because of prototype on NYT pages
46 | for (var i = 0; i < newSources.length; i++) {
47 | SVGSources.push(newSources[i]);
48 | };
49 | })
50 | if (SVGSources.length > 1) {
51 | createPopover(SVGSources);
52 | } else if (SVGSources.length > 0) {
53 | download(SVGSources[0]);
54 | } else {
55 | alert("The Crowbar couldn’t find any SVG nodes.");
56 | }
57 | }
58 |
59 | function createPopover(sources) {
60 | cleanup();
61 |
62 | sources.forEach(function(s1) {
63 | sources.forEach(function(s2) {
64 | if (s1 !== s2) {
65 | if ((Math.abs(s1.top - s2.top) < 38) && (Math.abs(s1.left - s2.left) < 38)) {
66 | s2.top += 38;
67 | s2.left += 38;
68 | }
69 | }
70 | })
71 | });
72 |
73 | var buttonsContainer = document.createElement("div");
74 | body.appendChild(buttonsContainer);
75 |
76 | buttonsContainer.setAttribute("class", "svg-crowbar");
77 | buttonsContainer.style["z-index"] = 1e7;
78 | buttonsContainer.style["position"] = "absolute";
79 | buttonsContainer.style["top"] = 0;
80 | buttonsContainer.style["left"] = 0;
81 |
82 |
83 |
84 | var background = document.createElement("div");
85 | body.appendChild(background);
86 |
87 | background.setAttribute("class", "svg-crowbar");
88 | background.style["background"] = "rgba(255, 255, 255, 0.7)";
89 | background.style["position"] = "fixed";
90 | background.style["left"] = 0;
91 | background.style["top"] = 0;
92 | background.style["width"] = "100%";
93 | background.style["height"] = "100%";
94 |
95 | sources.forEach(function(d, i) {
96 | var buttonWrapper = document.createElement("div");
97 | buttonsContainer.appendChild(buttonWrapper);
98 | buttonWrapper.setAttribute("class", "svg-crowbar");
99 | buttonWrapper.style["position"] = "absolute";
100 | buttonWrapper.style["top"] = (d.top + document.body.scrollTop) + "px";
101 | buttonWrapper.style["left"] = (document.body.scrollLeft + d.left) + "px";
102 | buttonWrapper.style["padding"] = "4px";
103 | buttonWrapper.style["border-radius"] = "3px";
104 | buttonWrapper.style["color"] = "white";
105 | buttonWrapper.style["text-align"] = "center";
106 | buttonWrapper.style["font-family"] = "'Helvetica Neue'";
107 | buttonWrapper.style["background"] = "rgba(0, 0, 0, 0.8)";
108 | buttonWrapper.style["box-shadow"] = "0px 4px 18px rgba(0, 0, 0, 0.4)";
109 | buttonWrapper.style["cursor"] = "move";
110 | buttonWrapper.textContent = "SVG #" + i + ": " + (d.id ? "#" + d.id : "") + (d.class ? "." + d.class : "");
111 |
112 | var button = document.createElement("button");
113 | buttonWrapper.appendChild(button);
114 | button.setAttribute("data-source-id", i)
115 | button.style["width"] = "150px";
116 | button.style["font-size"] = "12px";
117 | button.style["line-height"] = "1.4em";
118 | button.style["margin"] = "5px 0 0 0";
119 | button.textContent = "Download";
120 |
121 | button.onclick = function(el) {
122 | // console.log(el, d, i, sources)
123 | download(d);
124 | };
125 |
126 | });
127 |
128 | }
129 |
130 | function cleanup() {
131 | var crowbarElements = document.querySelectorAll(".svg-crowbar");
132 |
133 | [].forEach.call(crowbarElements, function(el) {
134 | el.parentNode.removeChild(el);
135 | });
136 | }
137 |
138 |
139 | function getSources(doc, styles) {
140 | var svgInfo = [],
141 | svgs = doc.querySelectorAll("svg");
142 |
143 | styles = (styles === undefined) ? "" : styles;
144 |
145 | [].forEach.call(svgs, function (svg) {
146 |
147 | svg.setAttribute("version", "1.1");
148 |
149 | var defsEl = document.createElement("defs");
150 | svg.insertBefore(defsEl, svg.firstChild); //TODO .insert("defs", ":first-child")
151 | // defsEl.setAttribute("class", "svg-crowbar");
152 |
153 | var styleEl = document.createElement("style")
154 | defsEl.appendChild(styleEl);
155 | styleEl.setAttribute("type", "text/css");
156 |
157 |
158 | // removing attributes so they aren't doubled up
159 | svg.removeAttribute("xmlns");
160 | svg.removeAttribute("xlink");
161 |
162 | // These are needed for the svg
163 | if (!svg.hasAttributeNS(prefix.xmlns, "xmlns")) {
164 | svg.setAttributeNS(prefix.xmlns, "xmlns", prefix.svg);
165 | }
166 |
167 | if (!svg.hasAttributeNS(prefix.xmlns, "xmlns:xlink")) {
168 | svg.setAttributeNS(prefix.xmlns, "xmlns:xlink", prefix.xlink);
169 | }
170 |
171 | var source = (new XMLSerializer()).serializeToString(svg).replace('', '');
172 | var rect = svg.getBoundingClientRect();
173 | svgInfo.push({
174 | top: rect.top,
175 | left: rect.left,
176 | width: rect.width,
177 | height: rect.height,
178 | class: svg.getAttribute("class"),
179 | id: svg.getAttribute("id"),
180 | childElementCount: svg.childElementCount,
181 | source: [doctype + source]
182 | });
183 | });
184 | return svgInfo;
185 | }
186 |
187 | function download(source) {
188 | var filename = "untitled";
189 |
190 | if (source.id) {
191 | filename = source.id;
192 | } else if (source.class) {
193 | filename = source.class;
194 | } else if (window.document.title) {
195 | filename = window.document.title.replace(/[^a-z0-9]/gi, '-').toLowerCase();
196 | }
197 |
198 | var url = window.URL.createObjectURL(new Blob(source.source, { "type" : "text\/xml" }));
199 |
200 | var a = document.createElement("a");
201 | body.appendChild(a);
202 | a.setAttribute("class", "svg-crowbar");
203 | a.setAttribute("download", filename + ".svg");
204 | a.setAttribute("href", url);
205 | a.style["display"] = "none";
206 | a.click();
207 |
208 | setTimeout(function() {
209 | window.URL.revokeObjectURL(url);
210 | }, 10);
211 | }
212 |
213 | function getStyles(doc) {
214 | var styles = "",
215 | styleSheets = doc.styleSheets;
216 |
217 | if (styleSheets) {
218 | for (var i = 0; i < styleSheets.length; i++) {
219 | processStyleSheet(styleSheets[i]);
220 | }
221 | }
222 |
223 | function processStyleSheet(ss) {
224 | if (ss.cssRules) {
225 | for (var i = 0; i < ss.cssRules.length; i++) {
226 | var rule = ss.cssRules[i];
227 | if (rule.type === 3) {
228 | // Import Rule
229 | processStyleSheet(rule.styleSheet);
230 | } else {
231 | // hack for illustrator crashing on descendent selectors
232 | if (rule.selectorText) {
233 | if (rule.selectorText.indexOf(">") === -1) {
234 | styles += "\n" + rule.cssText;
235 | }
236 | }
237 | }
238 | }
239 | }
240 | }
241 | return styles;
242 | }
243 |
244 | })();
245 |
--------------------------------------------------------------------------------
/adenine/examples/ade_config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | from adenine.utils import data_source
11 | from adenine.utils import extra
12 |
13 | # -------------------------- EXPERMIENT INFO ------------------------- #
14 | exp_tag = 'debug'
15 | output_root_folder = 'results'
16 | file_format = 'png' # or 'png'
17 | plotting_context = 'paper' # one of {paper, notebook, talk, poster}
18 |
19 | # ---------------------------- INPUT DATA ---------------------------- #
20 | X, y, feat_names, class_names = data_source.load('iris')
21 | #X, y, feat_names, class_names = data_source.load('gauss', n_samples=300)
22 | # X, y, feat_names, class_names = data_source.load('circles')
23 | # X, y, feat_names, class_names = data_source.load('digits')
24 | # X, y, feat_names, class_names = data_source.load('diabetes')
25 | # X, y, feat_names, class_names = data_source.load('boston')
26 | # X, y, feat_names, class_names = data_source.load('custom', 'data/X.npy', 'data/y.npy')
27 | # X, y, feat_names, class_names = data_source.load('custom', 'data/X.csv', 'data/y.csv')
28 |
29 | # X, y, feat_names, class_names = data_source.load('custom', '/home/fede/src/adenine/adenine/examples/TM_matrix.csv')
30 | # X = extra.ensure_symmetry(X)
31 | # X = 1. - X # i want affinity
32 |
33 | # ----------------------- PIPELINE DEFINITION ------------------------ #
34 |
35 | # --- Missing Values Imputing --- #
36 | # step0 = {'Impute': [False, {'missing_values': 'NaN',
37 | # 'strategy': ['median','mean','nearest_neighbors']}]}
38 |
39 | # --- Data Preprocessing --- #
40 | step1 = {'None': [False], 'Recenter': [False], 'Standardize': [False],
41 | 'Normalize': [True, {'norm': ['l2']}],
42 | 'MinMax': [False, {'feature_range': [(0,1), (-1,1)]}]}
43 |
44 | # --- Dimensionality Reduction & Manifold Learning --- #
45 | step2 = {'PCA': [True, {'n_components': 2}],
46 | 'IncrementalPCA': [False, {'n_components': 3}],
47 | 'RandomizedPCA': [False, {'n_components': 3}],
48 | 'KernelPCA': [False, {'n_components': 2,
49 | 'kernel': ['linear','rbf','poly'], 'gamma': 2}],
50 | 'Isomap': [False, {'n_components': 3, 'n_neighbors': 5}],
51 | 'LLE': [False, {'n_components': 3, 'n_neighbors': 5, # xxx
52 | 'method': ['standard','modified','hessian','ltsa']}],
53 | 'SE': [False, {'n_components': 3, 'affinity': ['nearest_neighbors','rbf']}], # can be 'precomputed'
54 | 'MDS': [False, {'n_components': 3, 'metric': [True, False]}],
55 | 'tSNE': [False, {'n_components': 3}],
56 | 'RMB': [True, {'n_components': 256}],
57 | 'None': [False, {}]
58 | }
59 |
60 | # --- Clustering --- #
61 | step3 = {'KMeans': [False, {'n_clusters': [2]}], # cannot be 'precomputed'
62 | 'AP': [False, {'preference': ['auto']}], # can be 'precomputed'
63 | 'MS': [False], # cannot be 'precomputed'
64 | 'Spectral': [False, {'n_clusters': [2]}], # can be 'precomputed'
65 | 'Hierarchical': [False, {'n_clusters': [3],
66 | #'affinity': ['manhattan','euclidean'],
67 | 'affinity': ['euclidean'],
68 | #'linkage': ['ward','complete','average']}]
69 | 'linkage': ['ward']}]
70 | }
71 |
--------------------------------------------------------------------------------
/adenine/examples/data/X.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/examples/data/X.npy
--------------------------------------------------------------------------------
/adenine/examples/data/X_missing.csv:
--------------------------------------------------------------------------------
1 | nan,-1.156862383698490815e+00,2.325969437444890264e-01,-1.226910178531959300e-01
2 | nan,nan,1.662453908639822120e-01,nan
3 | nan,1.078908257010945171e+00,nan,-4.521696668277273012e-01
4 | nan,nan,nan,nan
5 | -7.075918621950672005e-01,nan,1.107003123417464430e+00,3.925003704893688106e-01
6 | nan,-9.307682348162302777e-01,nan,-1.238600366412410531e-01
7 | -6.394675777546744433e-01,nan,nan,nan
8 | -1.463084369363933934e+00,nan,1.495331971007898719e+00,nan
9 | nan,-9.088301530861653266e-01,-3.489239577414761095e-01,nan
10 | 4.314432145948063901e-01,1.981623502664172642e+00,-4.459802189090978919e-01,nan
11 | 6.142043420674385412e-02,-6.118179692142385884e-01,nan,nan
12 | 1.653774270442252892e+00,nan,-7.105778390461989780e-01,-4.253239641661243908e-01
13 | nan,7.938380876396712305e-01,nan,9.734189421253153229e-01
14 | nan,nan,-9.325734485898492521e-01,nan
15 | -4.692381743406294770e-01,nan,nan,-9.854055694219750194e-02
16 | 2.856917897910776771e-01,5.442976535996231213e-01,-9.027838452675212011e-01,nan
17 | 3.228887691158054407e-01,1.178646684094214914e+00,-8.115360478215190021e-01,nan
18 | nan,-8.872088861442941621e-01,nan,nan
19 | 2.767596836546558081e-01,nan,nan,3.134103300122419861e-02
20 | nan,nan,9.208184217167506569e-01,nan
21 | nan,1.446536613951119543e+00,nan,nan
22 | 6.346579181673047687e-01,nan,nan,nan
23 | 1.392886349437185034e+00,nan,-1.198599027981311460e+00,-1.132697590062041293e+00
24 | -1.434715432544310998e+00,1.026936160278541399e+00,1.124128303209935176e+00,nan
25 | nan,2.330603277258347372e-01,nan,nan
26 | nan,nan,1.142952665043716731e+00,nan
27 | nan,1.140373046240707788e+00,nan,4.270122455127892680e-01
28 | nan,-1.361512094713794196e+00,nan,nan
29 | 1.209893933270316246e+00,nan,-1.359040178533376775e+00,nan
30 | -4.502313522982358540e-01,nan,nan,nan
31 | 2.195009994421553146e-01,-6.037814919824179283e-01,nan,-3.282889270210308762e-03
32 | -9.284702738379502218e-01,nan,9.642905063476822081e-01,nan
33 | -1.210009835227054298e+00,9.288008161700008758e-01,7.662876912390784723e-01,6.563682753521845603e-01
34 | nan,nan,-1.080027626003194241e+00,nan
35 | nan,-7.893344721439290446e-01,3.208092641934174316e-01,-2.744291585393637267e-01
36 | 1.641059936670823949e-01,nan,-4.154673123178226346e-02,-1.690398094574822596e-01
37 | nan,7.584241169204418709e-01,nan,nan
38 | 6.565432358878454944e-02,nan,nan,nan
39 | nan,9.926811177344899706e-01,nan,nan
40 | -1.041542225925210924e-01,nan,nan,nan
41 | nan,9.143221607270377582e-01,nan,1.288919718901406775e+00
42 | nan,5.019338363269474357e-01,7.100040463856538420e-01,1.545841940359383937e+00
43 | -6.600020519878190273e-01,1.104240175105188904e+00,1.132483378627092474e+00,1.465827932390227684e+00
44 | 1.744402579749481652e-01,-1.587272191140688182e+00,2.497575268980154195e-01,-8.134691613104610974e-02
45 | nan,nan,nan,nan
46 | nan,7.501720661828157333e-01,nan,nan
47 | -3.342554750911648775e-01,-7.137329100818909922e-01,-2.407084197162502048e-01,-8.340083131232416125e-02
48 | nan,nan,nan,-9.673348217456705367e-01
49 | nan,nan,-2.391048052682311353e-01,nan
50 | nan,9.300718567010340943e-01,nan,nan
51 | 1.017586261554328741e+00,nan,nan,nan
52 | 1.303392086200408251e+00,1.176009676693779760e+00,nan,nan
53 | nan,4.836988853116996889e-01,-1.092880083369777822e+00,nan
54 | -9.319851610608677062e-01,1.397977267254582046e+00,nan,1.535659733771689517e+00
55 | -6.408969054855628844e-01,nan,nan,nan
56 | nan,-9.323550811636887037e-01,nan,3.632022074493482244e-01
57 | nan,nan,-3.625792307095718203e-01,nan
58 | 9.145070118313443075e-01,nan,nan,-6.360363342469680381e-01
59 | -1.007658038296989078e+00,nan,1.179926256982412269e+00,7.463843472733622253e-01
60 | -1.187303971214693110e+00,1.107806472262462982e+00,nan,1.585263871893016763e+00
61 | -9.031651321481354300e-01,nan,nan,1.321148563212554361e+00
62 | nan,7.999924887361942183e-01,-5.778430086333533611e-01,nan
63 | 1.059243736095132560e+00,1.522616402375389422e+00,-9.552541500540825403e-01,nan
64 | nan,1.609635090789223621e+00,nan,nan
65 | -1.606690560441900173e-01,-1.229925236813150580e+00,-3.819256182206764993e-01,7.356022877628196621e-01
66 | nan,7.062859368497632628e-01,nan,nan
67 | -1.411870672836224694e+00,nan,7.086113942358228668e-01,1.492798947017852651e+00
68 | -3.040514043122001242e-01,-5.813862246900292075e-01,nan,-2.108471835999596866e-01
69 | 8.507789005424097883e-01,8.773006104066740640e-01,-7.045392379849533260e-01,nan
70 | 3.074991953218891294e-01,-1.102530028510551263e+00,-5.869166677052294334e-01,1.138990811361808436e-01
71 | nan,nan,1.461701841444241090e+00,nan
72 | nan,nan,nan,nan
73 | nan,nan,-1.098651949849287046e+00,nan
74 | -1.095979133848796971e+00,1.455316797130397521e+00,1.227837823199790623e+00,nan
75 | nan,nan,nan,nan
76 | -6.120563267013116177e-02,nan,nan,-3.142630129259655902e-01
77 | nan,-6.724793646662509117e-01,-3.051652072034404806e-01,5.981953113846391057e-01
78 | -1.329461562127501661e+00,nan,1.783776000666230654e+00,nan
79 | -1.430891473540749415e+00,nan,nan,nan
80 | 1.398979808841118500e+00,1.213372537922308148e+00,-1.139778282564449130e+00,nan
81 | nan,nan,nan,nan
82 | nan,1.053890414736040837e+00,nan,-8.991149068911545861e-01
83 | nan,nan,nan,-1.715028446120915873e-01
84 | -2.975404547273184930e-01,-9.113023565308768781e-01,1.227527126258068924e-01,nan
85 | nan,nan,-8.837576863617241374e-01,nan
86 | 5.783526399497983528e-01,-1.568427441620841467e+00,nan,nan
87 | -2.836003143929887171e-01,nan,nan,nan
88 | nan,nan,6.143182346112907588e-02,nan
89 | nan,nan,nan,nan
90 | -8.537457775861589937e-02,9.917736724082389932e-01,-4.268716727368595532e-01,nan
91 | nan,-1.019031454530859415e+00,nan,nan
92 | nan,nan,1.042925054651744121e+00,9.984537828953213845e-01
93 | -1.995090873945649934e-01,nan,5.535149367168700207e-01,nan
94 | 2.228127146357133381e-01,nan,nan,nan
95 | nan,-1.382705804273485883e+00,nan,nan
96 | nan,nan,nan,nan
97 | 3.381358892249306525e-01,-1.326733296875020063e+00,nan,4.291618094613102175e-01
98 | nan,1.592912086579896691e+00,nan,5.686762988479203695e-01
99 | nan,nan,-7.593116778742476924e-01,nan
100 | nan,1.163667252112682515e+00,-6.318787198098960722e-01,nan
101 |
--------------------------------------------------------------------------------
/adenine/examples/data/Y_missing_test.csv:
--------------------------------------------------------------------------------
1 | 1.000000000000000000e+00
2 | 1.000000000000000000e+00
3 | 2.000000000000000000e+00
4 | 0.000000000000000000e+00
5 | 0.000000000000000000e+00
6 | 1.000000000000000000e+00
7 | 0.000000000000000000e+00
8 | 0.000000000000000000e+00
9 | 1.000000000000000000e+00
10 | 2.000000000000000000e+00
11 | 1.000000000000000000e+00
12 | 2.000000000000000000e+00
13 | 0.000000000000000000e+00
14 | 2.000000000000000000e+00
15 | 1.000000000000000000e+00
16 | 2.000000000000000000e+00
17 | 2.000000000000000000e+00
18 | 1.000000000000000000e+00
19 | 1.000000000000000000e+00
20 | 0.000000000000000000e+00
21 | 2.000000000000000000e+00
22 | 2.000000000000000000e+00
23 | 2.000000000000000000e+00
24 | 0.000000000000000000e+00
25 | 2.000000000000000000e+00
26 | 0.000000000000000000e+00
27 | 0.000000000000000000e+00
28 | 1.000000000000000000e+00
29 | 2.000000000000000000e+00
30 | 0.000000000000000000e+00
31 | 1.000000000000000000e+00
32 | 0.000000000000000000e+00
33 | 0.000000000000000000e+00
34 | 2.000000000000000000e+00
35 | 1.000000000000000000e+00
36 | 1.000000000000000000e+00
37 | 0.000000000000000000e+00
38 | 1.000000000000000000e+00
39 | 2.000000000000000000e+00
40 | 1.000000000000000000e+00
41 | 0.000000000000000000e+00
42 | 0.000000000000000000e+00
43 | 0.000000000000000000e+00
44 | 1.000000000000000000e+00
45 | 0.000000000000000000e+00
46 | 0.000000000000000000e+00
47 | 1.000000000000000000e+00
48 | 2.000000000000000000e+00
49 | 1.000000000000000000e+00
50 | 2.000000000000000000e+00
51 | 2.000000000000000000e+00
52 | 2.000000000000000000e+00
53 | 2.000000000000000000e+00
54 | 0.000000000000000000e+00
55 | 0.000000000000000000e+00
56 | 1.000000000000000000e+00
57 | 1.000000000000000000e+00
58 | 2.000000000000000000e+00
59 | 0.000000000000000000e+00
60 | 0.000000000000000000e+00
61 | 0.000000000000000000e+00
62 | 2.000000000000000000e+00
63 | 2.000000000000000000e+00
64 | 0.000000000000000000e+00
65 | 1.000000000000000000e+00
66 | 2.000000000000000000e+00
67 | 0.000000000000000000e+00
68 | 1.000000000000000000e+00
69 | 2.000000000000000000e+00
70 | 1.000000000000000000e+00
71 | 0.000000000000000000e+00
72 | 1.000000000000000000e+00
73 | 2.000000000000000000e+00
74 | 0.000000000000000000e+00
75 | 2.000000000000000000e+00
76 | 1.000000000000000000e+00
77 | 1.000000000000000000e+00
78 | 0.000000000000000000e+00
79 | 0.000000000000000000e+00
80 | 2.000000000000000000e+00
81 | 1.000000000000000000e+00
82 | 2.000000000000000000e+00
83 | 1.000000000000000000e+00
84 | 1.000000000000000000e+00
85 | 2.000000000000000000e+00
86 | 1.000000000000000000e+00
87 | 0.000000000000000000e+00
88 | 1.000000000000000000e+00
89 | 2.000000000000000000e+00
90 | 2.000000000000000000e+00
91 | 1.000000000000000000e+00
92 | 0.000000000000000000e+00
93 | 1.000000000000000000e+00
94 | 1.000000000000000000e+00
95 | 1.000000000000000000e+00
96 | 2.000000000000000000e+00
97 | 1.000000000000000000e+00
98 | 0.000000000000000000e+00
99 | 1.000000000000000000e+00
100 | 2.000000000000000000e+00
101 |
--------------------------------------------------------------------------------
/adenine/examples/data/y.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/adenine/examples/data/y.npy
--------------------------------------------------------------------------------
/adenine/externals/__init__.py:
--------------------------------------------------------------------------------
1 | from .hierarchical import AgglomerativeClustering
2 |
--------------------------------------------------------------------------------
/adenine/test/Y_missing_test.csv:
--------------------------------------------------------------------------------
1 | 0.000000000000000000e+00
2 | 0.000000000000000000e+00
3 | 0.000000000000000000e+00
4 | 0.000000000000000000e+00
5 | 0.000000000000000000e+00
6 | 0.000000000000000000e+00
7 | 0.000000000000000000e+00
8 | 0.000000000000000000e+00
9 | 0.000000000000000000e+00
10 | 0.000000000000000000e+00
11 | 0.000000000000000000e+00
12 | 0.000000000000000000e+00
13 | 0.000000000000000000e+00
14 | 0.000000000000000000e+00
15 | 0.000000000000000000e+00
16 | 0.000000000000000000e+00
17 | 0.000000000000000000e+00
18 | 0.000000000000000000e+00
19 | 0.000000000000000000e+00
20 | 0.000000000000000000e+00
21 | 0.000000000000000000e+00
22 | 0.000000000000000000e+00
23 | 0.000000000000000000e+00
24 | 0.000000000000000000e+00
25 | 0.000000000000000000e+00
26 | 0.000000000000000000e+00
27 | 0.000000000000000000e+00
28 | 0.000000000000000000e+00
29 | 0.000000000000000000e+00
30 | 0.000000000000000000e+00
31 | 0.000000000000000000e+00
32 | 0.000000000000000000e+00
33 | 0.000000000000000000e+00
34 | 0.000000000000000000e+00
35 | 0.000000000000000000e+00
36 | 0.000000000000000000e+00
37 | 0.000000000000000000e+00
38 | 0.000000000000000000e+00
39 | 0.000000000000000000e+00
40 | 0.000000000000000000e+00
41 | 0.000000000000000000e+00
42 | 0.000000000000000000e+00
43 | 0.000000000000000000e+00
44 | 0.000000000000000000e+00
45 | 0.000000000000000000e+00
46 | 0.000000000000000000e+00
47 | 0.000000000000000000e+00
48 | 0.000000000000000000e+00
49 | 0.000000000000000000e+00
50 | 0.000000000000000000e+00
51 | 1.000000000000000000e+00
52 | 1.000000000000000000e+00
53 | 1.000000000000000000e+00
54 | 1.000000000000000000e+00
55 | 1.000000000000000000e+00
56 | 1.000000000000000000e+00
57 | 1.000000000000000000e+00
58 | 1.000000000000000000e+00
59 | 1.000000000000000000e+00
60 | 1.000000000000000000e+00
61 | 1.000000000000000000e+00
62 | 1.000000000000000000e+00
63 | 1.000000000000000000e+00
64 | 1.000000000000000000e+00
65 | 1.000000000000000000e+00
66 | 1.000000000000000000e+00
67 | 1.000000000000000000e+00
68 | 1.000000000000000000e+00
69 | 1.000000000000000000e+00
70 | 1.000000000000000000e+00
71 | 1.000000000000000000e+00
72 | 1.000000000000000000e+00
73 | 1.000000000000000000e+00
74 | 1.000000000000000000e+00
75 | 1.000000000000000000e+00
76 | 1.000000000000000000e+00
77 | 1.000000000000000000e+00
78 | 1.000000000000000000e+00
79 | 1.000000000000000000e+00
80 | 1.000000000000000000e+00
81 | 1.000000000000000000e+00
82 | 1.000000000000000000e+00
83 | 1.000000000000000000e+00
84 | 1.000000000000000000e+00
85 | 1.000000000000000000e+00
86 | 1.000000000000000000e+00
87 | 1.000000000000000000e+00
88 | 1.000000000000000000e+00
89 | 1.000000000000000000e+00
90 | 1.000000000000000000e+00
91 | 1.000000000000000000e+00
92 | 1.000000000000000000e+00
93 | 1.000000000000000000e+00
94 | 1.000000000000000000e+00
95 | 1.000000000000000000e+00
96 | 1.000000000000000000e+00
97 | 1.000000000000000000e+00
98 | 1.000000000000000000e+00
99 | 1.000000000000000000e+00
100 | 1.000000000000000000e+00
101 | 2.000000000000000000e+00
102 | 2.000000000000000000e+00
103 | 2.000000000000000000e+00
104 | 2.000000000000000000e+00
105 | 2.000000000000000000e+00
106 | 2.000000000000000000e+00
107 | 2.000000000000000000e+00
108 | 2.000000000000000000e+00
109 | 2.000000000000000000e+00
110 | 2.000000000000000000e+00
111 | 2.000000000000000000e+00
112 | 2.000000000000000000e+00
113 | 2.000000000000000000e+00
114 | 2.000000000000000000e+00
115 | 2.000000000000000000e+00
116 | 2.000000000000000000e+00
117 | 2.000000000000000000e+00
118 | 2.000000000000000000e+00
119 | 2.000000000000000000e+00
120 | 2.000000000000000000e+00
121 | 2.000000000000000000e+00
122 | 2.000000000000000000e+00
123 | 2.000000000000000000e+00
124 | 2.000000000000000000e+00
125 | 2.000000000000000000e+00
126 | 2.000000000000000000e+00
127 | 2.000000000000000000e+00
128 | 2.000000000000000000e+00
129 | 2.000000000000000000e+00
130 | 2.000000000000000000e+00
131 | 2.000000000000000000e+00
132 | 2.000000000000000000e+00
133 | 2.000000000000000000e+00
134 | 2.000000000000000000e+00
135 | 2.000000000000000000e+00
136 | 2.000000000000000000e+00
137 | 2.000000000000000000e+00
138 | 2.000000000000000000e+00
139 | 2.000000000000000000e+00
140 | 2.000000000000000000e+00
141 | 2.000000000000000000e+00
142 | 2.000000000000000000e+00
143 | 2.000000000000000000e+00
144 | 2.000000000000000000e+00
145 | 2.000000000000000000e+00
146 | 2.000000000000000000e+00
147 | 2.000000000000000000e+00
148 | 2.000000000000000000e+00
149 | 2.000000000000000000e+00
150 | 2.000000000000000000e+00
151 |
--------------------------------------------------------------------------------
/adenine/test/carttest.py:
--------------------------------------------------------------------------------
1 | from adenine.utils.extra import modified_cartesian
2 |
3 | A = [(1,0), (2,0)]
4 | B = [(3,0),(4,0)]
5 | C = []
6 | D = [(5,0),(6,0)]
7 |
8 | modified_cartesian(A,B,C,D)
9 |
--------------------------------------------------------------------------------
/adenine/test/imputing_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | from __future__ import division
11 |
12 | import numpy as np
13 |
14 | from adenine.utils import data_source
15 | from adenine.utils.extensions import Imputer
16 |
17 |
18 | def test(missing_rate):
19 | """
20 | Testing the KNN data imputing.
21 | """
22 | Xreal, y, feat_names, class_names = data_source.load('iris')
23 | # Xreal, y, feat_names, class_names = data_source.load('gauss', n_samples=100)
24 | n, p = Xreal.shape
25 | print("{} x {} matrix loaded".format(n, p))
26 |
27 | # Choose the missing rate
28 | # missing_rate = 0.5
29 | n_missing = int(missing_rate * (n*p))
30 |
31 | # Create holes in the matrix
32 | np.random.seed(42)
33 | idx = np.random.permutation(n*p)
34 | xx = Xreal.ravel().copy()
35 | xx[idx[:n_missing]] = np.nan
36 | X = np.reshape(xx, (n, p))
37 | print("{} values deleted".format(n_missing))
38 |
39 | # Save data
40 | np.savetxt('X_missing.csv', X, delimiter=',')
41 | np.savetxt('Y_missing_test.csv', y, delimiter=',')
42 |
43 | # Start test
44 | strategies = ["mean", "median", "most_frequent", "nearest_neighbors"]
45 |
46 | imp = Imputer(strategy=strategies[3])
47 | Ximp = imp.fit_transform(X)
48 |
49 | if len(np.where(np.isnan(Ximp))[0]) == 0:
50 | print("All values were imputed according to: {}-strategy".format(imp.strategy))
51 | else:
52 | print("Empty values: {}".format(len(np.where(np.isnan(Ximp))[0])))
53 |
54 | # Check results
55 | dist = np.sqrt(np.sum((Xreal[imp.X_mask,:].ravel() - Ximp.ravel())**2))
56 | print("dist(Xreal - Ximp) = {}".format(dist))
57 |
58 | # print(Ximp)
59 |
60 |
61 | def main():
62 | for missing_rate in np.linspace(0.01, 0.3, 2):
63 | print("\nmissing rate: {}".format(missing_rate))
64 | test(missing_rate)
65 |
66 |
67 | if __name__ == '__main__':
68 | main()
69 |
--------------------------------------------------------------------------------
/adenine/test/imputing_test_lite.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | from __future__ import division
11 |
12 | import numpy as np
13 |
14 | from adenine.utils import data_source
15 | from adenine.utils.extensions import Imputer
16 |
17 |
18 | def test(missing_rate):
19 | """
20 | Testing the KNN data imputing.
21 | """
22 | np.random.seed(42)
23 | Xreal, y, feat_names, class_names = data_source.load('iris')
24 | # Xreal, y, feat_names, class_names = data_source.load('gauss', n_samples=80)
25 | n, p = Xreal.shape
26 | print("{} x {} matrix loaded".format(n, p))
27 |
28 | # Choose the missing rate
29 | # missing_rate = 0.5
30 | n_missing = int(missing_rate * (n*p))
31 |
32 | # Create holes in the matrix
33 | idx = np.random.permutation(n*p)
34 | xx = Xreal.ravel().copy()
35 | xx[idx[:n_missing]] = np.nan
36 | X = np.reshape(xx, (n, p))
37 | # X[0,:] = np.nan
38 | print("{} values deleted".format(n_missing))
39 |
40 | # Save data
41 | np.savetxt('X_missing.csv', X, delimiter=',')
42 | np.savetxt('Y_missing_test.csv', y, delimiter=',')
43 |
44 | # Start test
45 | strategies = ["mean", "median", "most_frequent", "nearest_neighbors"]
46 |
47 | imp = Imputer(strategy=strategies[3])
48 | Ximp = imp.fit_transform(X)
49 | # Xtr = X[:50, :]
50 | # Xts = X[50:, :]
51 | # imp.fit(Xtr)
52 | # Ximp = imp.transform(Xts)
53 |
54 | if len(np.where(np.isnan(Ximp))[0]) == 0:
55 | print("All values were imputed according to: {}-strategy".format(imp.strategy))
56 | else:
57 | print("Empty values: {}".format(len(np.where(np.isnan(Ximp))[0])))
58 |
59 | # Check results
60 | dist = np.sqrt(np.sum((Xreal[imp.X_mask,:].ravel() - Ximp.ravel())**2))
61 | print("dist(Xreal - Ximp) = {}".format(dist))
62 |
63 |
64 |
65 | def main():
66 | # for missing_rate in np.linspace(0.01, 0.3, 2):
67 | missing_rate = 0.3
68 | print("\nmissing rate: {}".format(missing_rate))
69 | test(missing_rate)
70 |
71 |
72 | if __name__ == '__main__':
73 | main()
74 |
--------------------------------------------------------------------------------
/adenine/utils/GEO2csv.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """This module contains utility functions for GEO DataSets wrangling."""
5 |
6 | ######################################################################
7 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
8 | #
9 | # FreeBSD License
10 | ######################################################################
11 |
12 | import GEOparse
13 | import logging
14 | import os
15 | import pandas as pd
16 | from sklearn import datasets
17 | from six.moves import filter
18 |
19 |
20 | def get_GEO(accession_number, phenotype_name='title', return_gse=False):
21 | """Get the GEO data from its accession number.
22 |
23 | Parameters
24 | -----------
25 | accession_number : string
26 | 'GSEXXXXX' is any GEO accession ID loaded by `GEOparse`.
27 |
28 | Returns
29 | -----------
30 | data : sklearn.datasets.base.Bunch
31 | the dataset bunch
32 | gse : GEOparse.GEOTypes.GSE
33 | the GEOparse object
34 | """
35 | gse = GEOparse.get_GEO(geo=accession_number, destdir=os.curdir,
36 | silent=True, include_data=True,
37 | how='full')
38 | xx = gse.pivot_samples('VALUE').transpose()
39 | index = xx.index.tolist()
40 | feature_names = xx.columns.tolist()
41 | yy = gse.phenotype_data[phenotype_name]
42 | data = datasets.base.Bunch(data=xx.values, target=yy.values,
43 | feature_names=feature_names,
44 | index=index)
45 |
46 |
47 | print('* Desired labels can be found with --label_field = ')
48 | for k in gse.phenotype_data.keys():
49 | print('\t{}'.format(k))
50 |
51 | out = [data]
52 | if return_gse:
53 | out.append(gse)
54 |
55 | return out
56 |
57 |
58 | def label_mapper(raw_labels, new_labels):
59 | """Map some raw labels into new labels.
60 |
61 | When dealing with GEO DataSets it is very common that each GSM sample has
62 | a different phenotye (e.g. 'Brain - 001', 'Brain - 002', ...). This
63 | function maps these raw labels into new homogeneous labels.
64 |
65 | Parameters
66 | -----------
67 | raw_labels : list of strings
68 | list of unpreprocessed labels
69 | new_labels : list of strings
70 | list of labels to map
71 |
72 | Returns
73 | -----------
74 | y : array of float, shape : n_samples
75 | the modified label vector
76 |
77 | Examples
78 | -----------
79 | >>> raw_labels = ['Brain - 001', 'Brain - 002', 'Muscle - 001', 'Muscle - 002']
80 | >>> label_mapper(raw_labels, ['Brain', 'Muscle'])
81 | ['Brain', 'Brain', 'Muscle', 'Muscle']
82 | """
83 | y = []
84 | for rl in raw_labels:
85 | for nl in new_labels:
86 | if nl in rl:
87 | y.append(nl)
88 | break
89 | else:
90 | y.append(rl)
91 | # print('No mapping rule for %s', rl)
92 | return y
93 |
94 |
95 | def GEO_select_samples(data, labels, selected_labels, index,
96 | feature_names=None):
97 | """GEO DataSets data selection tool.
98 |
99 | Modify the labels with `label_mapper` then return only the samples with
100 | labels in selected_labels.
101 |
102 | Parameters
103 | -----------
104 | data : array of float, shape : n_samples x n_features
105 | the dataset
106 | labels : numpy array (n_samples,)
107 | the labels vector
108 | selected_labels : list of strings
109 | a subset of new_labels containing only the samples wanted in the
110 | final dataset
111 | index : list of strings
112 | the sample indexes
113 | feature_names : list of strings
114 | the feature set
115 | samples_on : string in ['col', 'cols', 'row', 'rows']
116 | wether the samples are on columns or rows
117 |
118 | Returns
119 | -----------
120 | data : sklearn.datasets.base.Bunch
121 | An instance of the sklearn.datasets.base.Bunch class, the meaningful
122 | attributes are .data, the data matrix, and .target, the label vector.
123 | """
124 | mapped_y = pd.DataFrame(data=label_mapper(labels, selected_labels),
125 | index=index, columns=['Phenotype'])
126 | y = mapped_y[mapped_y['Phenotype'].isin(selected_labels)]
127 | X = pd.DataFrame(data, index=index, columns=feature_names).loc[y.index]
128 | return datasets.base.Bunch(data=X.values, feature_names=X.columns,
129 | target=y.values.ravel(), index=X.index.tolist())
130 |
131 | def id2gs(data, gse):
132 | """Convert IDs into GENE_SYMBOL.
133 |
134 | Parameters
135 | -----------
136 | data : sklearn.datasets.base.Bunch
137 | the dataset bunch
138 | gse : GEOparse.GEOTypes.GSE
139 | the GEOparse object
140 |
141 | Returns
142 | -----------
143 | data : sklearn.datasets.base.Bunch
144 | where feature_names has the gene symbols
145 | """
146 | # Get the platform name
147 | platform = gse.gpls.keys()[0]
148 |
149 | # Create the lookup table
150 | lookup_table = pd.DataFrame(data=gse.gpls[platform].table['GENE_SYMBOL'].values,
151 | index=gse.gpls[platform].table['ID'].values,
152 | columns=['GENE_SYMBOL'])
153 | # Correct NaN failures
154 | for i, lt_value in enumerate(lookup_table.values.ravel()):
155 | if pd.isnull(lt_value):
156 | lookup_table.values[i] = str(lookup_table.index[i])+'__NO-MATCH'
157 | gene_symbol = [lookup_table['GENE_SYMBOL'].loc[_id] for _id in data.feature_names]
158 |
159 | # Make bunch and return
160 | return datasets.base.Bunch(data=data.data, feature_names=gene_symbol,
161 | target=data.target, index=data.index)
162 |
163 |
164 | def restrict_to_signature(data, signature):
165 | """Restrict the data to the genes in the signature.
166 |
167 | Parameters
168 | -----------
169 | data : sklearn.datasets.base.Bunch
170 | the dataset bunch
171 | signature : list
172 | list of signature genes
173 |
174 | Returns
175 | -----------
176 | data : sklearn.datasets.base.Bunch
177 | where feature_names has the gene symbols restricted to signature
178 | """
179 | df = pd.DataFrame(data=data.data, index=data.index,
180 | columns=data.feature_names)
181 | # Filter out signatures gene not in the gene set
182 | signature = list(filter(lambda x: x in data.feature_names, signature))
183 | df = df[signature]
184 | # Make bunch and return
185 | return datasets.base.Bunch(data=df.values, feature_names=df.columns,
186 | target=data.target, index=data.index)
187 |
--------------------------------------------------------------------------------
/adenine/utils/__init__.py:
--------------------------------------------------------------------------------
1 | ######################################################################
2 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
3 | #
4 | # FreeBSD License
5 | ######################################################################
6 |
--------------------------------------------------------------------------------
/adenine/utils/data_source.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """This module is mainly a wrapper for some sklearn.datasets functions."""
5 |
6 | ######################################################################
7 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
8 | #
9 | # FreeBSD License
10 | ######################################################################
11 | import sys
12 | import os
13 | import logging
14 | import numpy as np
15 | import pandas as pd
16 | from sklearn import datasets
17 | from sklearn.preprocessing import Binarizer
18 |
19 | # Legacy import
20 | try:
21 | from sklearn.model_selection import StratifiedShuffleSplit
22 | except ImportError:
23 | from sklearn.cross_validation import StratifiedShuffleSplit
24 |
25 |
26 | def generate_gauss(mu=None, std=None, n_sample=None):
27 | """Create a Gaussian dataset.
28 |
29 | Generates a dataset with n_sample * n_class examples and n_dim dimensions.
30 |
31 | Parameters
32 | -----------
33 | mu : array of float, shape : n_class x n_dim
34 | The mean of each class.
35 |
36 | std : array of float, shape : n_class
37 | The standard deviation of each Gaussian distribution.
38 |
39 | n_sample : int
40 | Number of point per class.
41 | """
42 | n_class, n_var = mu.shape
43 |
44 | X = np.zeros((n_sample * n_class, n_var))
45 | y = np.zeros(n_sample * n_class, dtype=int)
46 |
47 | start = 0
48 | for i, s, m in zip(range(n_class), std, mu):
49 | end = start + n_sample
50 | X[start:end, :] = s * np.random.randn(n_sample, n_var) + m
51 | y[start:end] = i
52 | start = end
53 |
54 | return X, y
55 |
56 |
57 | def load_custom(x_filename, y_filename, samples_on='rows', **kwargs):
58 | """Load a custom dataset.
59 |
60 | This function loads the data matrix and the label vector returning a
61 | unique sklearn-like object dataSetObj.
62 |
63 | Parameters
64 | -----------
65 | x_filename : string
66 | The data matrix file name.
67 |
68 | y_filename : string
69 | The label vector file name.
70 |
71 | samples_on : string
72 | This can be either in ['row', 'rows'] if the samples lie on the row of
73 | the input data matrix, or viceversa in ['col', 'cols'] the other way
74 | around.
75 |
76 | kwargs : dict
77 | Arguments of pandas.read_csv function.
78 |
79 | Returns
80 | -----------
81 | data : sklearn.datasets.base.Bunch
82 | An instance of the sklearn.datasets.base.Bunch class, the meaningful
83 | attributes are .data, the data matrix, and .target, the label vector.
84 | """
85 | if x_filename is None:
86 | raise IOError("Filename for X must be specified with mode 'custom'.")
87 |
88 | if x_filename.endswith('.npy'): # it an .npy file is provided
89 | try: # labels are not mandatory
90 | y = np.load(y_filename)
91 | except IOError as e:
92 | y = None
93 | e.strerror = "No labels file provided"
94 | logging.error("I/O error({0}): {1}".format(e.errno, e.strerror))
95 | X = np.load(x_filename)
96 | if samples_on not in ['row', 'rows']:
97 | # data matrix must be n_samples x n_features
98 | X = X.T
99 | return datasets.base.Bunch(data=X, target=y,
100 | index=np.arange(X.shape[0]))
101 |
102 | elif x_filename.endswith('.csv') or x_filename.endswith('.txt'):
103 | y = None
104 | kwargs.setdefault('header', 0) # header on first row
105 | kwargs.setdefault('index_col', 0) # indexes on first
106 | try:
107 | dfx = pd.read_csv(x_filename, **kwargs)
108 | if samples_on not in ['row', 'rows']:
109 | # data matrix must be n_samples x n_features
110 | dfx = dfx.transpose()
111 | if y_filename is not None:
112 | # Before loading labels, remove parameters that were likely
113 | # specified for data only.
114 | kwargs.pop('usecols', None)
115 | y = pd.read_csv(y_filename, **kwargs).as_matrix().ravel()
116 |
117 | except IOError as e:
118 | e.strerror = "Can't open {} or {}".format(x_filename, y_filename)
119 | logging.error("I/O error({0}): {1}".format(e.errno, e.strerror))
120 | sys.exit(-1)
121 |
122 | return datasets.base.Bunch(data=dfx.as_matrix(), feature_names=dfx.columns.tolist(),
123 | target=y, index=dfx.index.tolist())
124 |
125 |
126 | def load(opt='custom', x_filename=None, y_filename=None, n_samples=0,
127 | samples_on='rows', **kwargs):
128 | """Load a specified dataset.
129 |
130 | This function can be used either to load one of the standard scikit-learn
131 | datasets or a different dataset saved as X.npy Y.npy in the working
132 | directory.
133 |
134 | Parameters
135 | -----------
136 | opt : {'iris', 'digits', 'diabetes', 'boston', 'circles', 'moons',
137 | 'custom', 'GSEXXXXX'}, default: 'custom'
138 | Name of a predefined dataset to be loaded. 'iris', 'digits', 'diabetes'
139 | 'boston', 'circles' and 'moons' refer to the correspondent
140 | `scikit-learn` datasets. 'custom' can be used to load a custom dataset
141 | which name is specified in `x_filename` and `y_filename` (optional).
142 |
143 | x_filename : string, default : None
144 | The data matrix file name.
145 |
146 | y_filename : string, default : None
147 | The label vector file name.
148 |
149 | n_samples : int
150 | The number of samples to be loaded. This comes handy when dealing with
151 | large datasets. When n_samples is less than the actual size of the
152 | dataset this function performs a random subsampling that is stratified
153 | w.r.t. the labels (if provided).
154 |
155 | samples_on : string
156 | This can be either in ['row', 'rows'] if the samples lie on the row of
157 | the input data matrix, or viceversa in ['col', 'cols'] the other way
158 | around.
159 |
160 | data_sep : string
161 | The data separator. For instance comma, tab, blank space, etc.
162 |
163 | Returns
164 | -----------
165 | X : array of float, shape : n_samples x n_features
166 | The input data matrix.
167 |
168 | y : array of float, shape : n_samples
169 | The label vector; np.nan if missing.
170 |
171 | feature_names : array of integers (or strings), shape : n_features
172 | The feature names; a range of number if missing.
173 |
174 | index : list of integers (or strings)
175 | This is the samples identifier, if provided as first column (or row) of
176 | of the input file. Otherwise it is just an incremental range of size
177 | n_samples.
178 | """
179 | data = None
180 | try:
181 | if opt.lower() == 'iris':
182 | data = datasets.load_iris()
183 | elif opt.lower() == 'digits':
184 | data = datasets.load_digits()
185 | elif opt.lower() == 'diabetes':
186 | data = datasets.load_diabetes()
187 | b = Binarizer(threshold=np.mean(data.target))
188 | data.target = b.fit_transform(data.data)
189 | elif opt.lower() == 'boston':
190 | data = datasets.load_boston()
191 | b = Binarizer(threshold=np.mean(data.target))
192 | data.target = b.fit_transform(data.data)
193 | elif opt.lower() == 'gauss':
194 | means = np.array([[-1, 1, 1, 1], [0, -1, 0, 0], [1, 1, -1, -1]])
195 | sigmas = np.array([0.33, 0.33, 0.33])
196 | if n_samples <= 1:
197 | n_samples = 333
198 | xx, yy = generate_gauss(mu=means, std=sigmas, n_sample=n_samples)
199 | data = datasets.base.Bunch(data=xx, target=yy)
200 | elif opt.lower() == 'circles':
201 | if n_samples == 0:
202 | n_samples = 400
203 | xx, yy = datasets.make_circles(n_samples=n_samples, factor=.3,
204 | noise=.05)
205 | data = datasets.base.Bunch(data=xx, target=yy)
206 | elif opt.lower() == 'moons':
207 | if n_samples == 0:
208 | n_samples = 400
209 | xx, yy = datasets.make_moons(n_samples=n_samples, noise=.01)
210 | data = datasets.base.Bunch(data=xx, target=yy)
211 | elif opt.lower() == 'custom':
212 | data = load_custom(x_filename, y_filename, samples_on, **kwargs)
213 | elif opt.lower().startswith('gse'):
214 | raise Exception("Use ade_GEO2csv.py to convert GEO DataSets"
215 | "into csv files.")
216 | except IOError as e:
217 | print("I/O error({0}): {1}".format(e.errno, e.strerror))
218 |
219 | X, y = data.data, data.target
220 | if n_samples > 0 and X.shape[0] > n_samples:
221 | if y is not None:
222 | try: # Legacy for sklearn
223 | sss = StratifiedShuffleSplit(y, test_size=n_samples, n_iter=1)
224 | # idx = np.random.permutation(X.shape[0])[:n_samples]
225 | except TypeError:
226 | sss = StratifiedShuffleSplit(test_size=n_samples) \
227 | .split(X, y)
228 | _, idx = list(sss)[0]
229 | else:
230 | idx = np.arange(X.shape[0])
231 | np.random.shuffle(idx)
232 | idx = idx[:n_samples]
233 |
234 | X, y = X[idx, :], y[idx]
235 | else:
236 | # The length of index must be consistent with the number of samples
237 | idx = np.arange(X.shape[0])
238 |
239 | feat_names = data.feature_names if hasattr(data, 'feature_names') \
240 | else np.arange(X.shape[1])
241 | index = np.array(data.index)[idx] if hasattr(data, 'index') \
242 | else np.arange(X.shape[0])
243 |
244 | return X, y, feat_names, index
245 |
--------------------------------------------------------------------------------
/adenine/utils/extra.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | import os
11 | import time
12 | import matplotlib; matplotlib.use('Agg')
13 | import seaborn as sns
14 |
15 | from datetime import datetime
16 | from itertools import product
17 |
18 |
19 | class Palette():
20 | """Wrapper for seaborn palette."""
21 |
22 | def __init__(self, name='Set1', n_colors=6):
23 | self.name = name
24 | self.palette = sns.color_palette(name, n_colors)
25 |
26 | def get(self, i=0):
27 | return self.palette[i]
28 |
29 | def next(self):
30 | self.palette.append(self.palette.pop(0))
31 | return self.palette[-1]
32 |
33 | def reset(self, n_colors=6):
34 | self.palette = sns.color_palette(self.name, n_colors)
35 |
36 |
37 | # ensure_list = lambda x: x if type(x) == list else [x]
38 | def ensure_list(x):
39 | return x if type(x) == list else [x]
40 |
41 |
42 | def values_iterator(dictionary):
43 | """Add support for python2 or 3 dictionary iterators."""
44 | try:
45 | v = dictionary.itervalues() # python 2
46 | except:
47 | v = dictionary.values() # python 3
48 | return v
49 |
50 |
51 | def items_iterator(dictionary):
52 | """Add support for python2 or 3 dictionary iterators."""
53 | try:
54 | gen = dictionary.iteritems() # python 2
55 | except:
56 | gen = dictionary.items() # python 3
57 | return gen
58 |
59 |
60 | def modified_cartesian(*args, **kwargs):
61 | """Modified Cartesian product.
62 |
63 | This takes two (or more) lists and returns their Cartesian product.
64 | If one of two list is empty this function returns the non-empty one.
65 |
66 | Parameters
67 | -----------
68 | *args : lists, length : two or more
69 | The group of input lists.
70 |
71 | Returns
72 | -----------
73 | cp : list
74 | The Cartesian Product of the two (or more) nonempty input lists.
75 | """
76 | # Get the non-empty input lists
77 | if kwargs.get('pipes_mode', False):
78 | nonempty = [ensure_list(arg) for arg in args if len(ensure_list(arg)) > 0]
79 | else:
80 | nonempty = [ensure_list(arg) if len(ensure_list(arg)) > 0 else [None] for arg in args]
81 |
82 | # Cartesian product
83 | return [list(c) for c in product(*nonempty)]
84 |
85 |
86 | def make_time_flag():
87 | """Generate a time flag.
88 |
89 | This function simply generates a time flag using the current time.
90 |
91 | Returns
92 | -----------
93 | timeFlag : string
94 | A unique time flag.
95 | """
96 | y = str(time.localtime().tm_year)
97 | mo = str(time.localtime().tm_mon)
98 | d = str(time.localtime().tm_mday)
99 | h = str(time.localtime().tm_hour)
100 | mi = str(time.localtime().tm_min)
101 | s = str(time.localtime().tm_sec)
102 | return h + ':' + mi + ':' + s + '_' + d + '-' + mo + '-' + y
103 |
104 |
105 | def sec_to_time(seconds):
106 | """Transform seconds into a formatted time string.
107 |
108 | Parameters
109 | -----------
110 | seconds : int
111 | Seconds to be transformed.
112 |
113 | Returns
114 | -----------
115 | time : string
116 | A well formatted time string.
117 | """
118 | m, s = divmod(seconds, 60)
119 | h, m = divmod(m, 60)
120 | return "%02d:%02d:%02d" % (h, m, s)
121 |
122 |
123 | def get_time():
124 | """Get time of now, in string."""
125 | return datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H.%M.%S')
126 |
127 |
128 | def title_from_filename(root, step_sep="$\mapsto$"):
129 | # Define the plot title. List is smth like ['results', 'ade_debug_', 'Standardize', 'PCA']
130 | i = [i for i, s in enumerate(root.split(os.sep)) if 'ade_' in s][0]
131 |
132 | # lambda function below does: ('a_b_c') -> 'c b a'
133 | return step_sep.join(map(lambda x: ' '.join(x.split('_')[::-1]), root.split(os.sep)[i+1:]))
134 |
135 |
136 | def ensure_symmetry(X):
137 | """Ensure matrix symmetry.
138 |
139 | Parameters
140 | -----------
141 | X : numpy.ndarray
142 | Input matrix of precomputed pairwise distances.
143 |
144 | Returns
145 | -----------
146 | new_X : numpy.ndarray
147 | Symmetric distance matrix. Values are averaged.
148 | """
149 | if not (X.T == X).all():
150 | return (X.T + X) / 2.
151 | else:
152 | return X
153 |
154 |
155 | def timed(function):
156 | """Decorator that measures wall time of the decored function."""
157 | def timed_function(*args, **kwargs):
158 | t0 = time.time()
159 | result = function(*args, **kwargs)
160 | print("\nAdenine {} - Elapsed time : {} s\n"
161 | .format(function.__name__, sec_to_time(time.time() - t0)))
162 | return result
163 | return timed_function
164 |
165 |
166 | def set_module_defaults(module, dictionary):
167 | """Set default variables of a module, given a dictionary.
168 |
169 | Used after the loading of the configuration file to set some defaults.
170 | """
171 | for k, v in items_iterator(dictionary):
172 | try:
173 | getattr(module, k)
174 | except AttributeError:
175 | setattr(module, k, v)
176 |
--------------------------------------------------------------------------------
/adenine/utils/scores.py:
--------------------------------------------------------------------------------
1 | """Validation utils for clustering algorithms.
2 |
3 | Notes
4 | -----
5 | Precision, recall and F score
6 | In multiclass classification / clustering, a confusion matrix can be
7 | obtained. To validate the result, one can use precision, recall and
8 | f score. These are obtained using TP, FP, FN, TN.
9 | In particular, for each class (true label) x, in a confusion matrix cm:
10 | - true positive: diagonal position, cm(x, x).
11 | - false positive: sum of column x (without main diagonal),
12 | sum(cm(:, x)) - cm(x, x).
13 | - false negative: sum of row x (without main diagonal),
14 | sum(cm(x, :), 2) - cm(x, x).
15 | - true negative: sum of all the matrix without tp, fp, fn.
16 |
17 | Averaging over all classes (with or without weighting) gives values for the
18 | entire model.
19 |
20 | Author: Federico Tomasi
21 | Copyright (c) 2016, Federico Tomasi.
22 | Licensed under the FreeBSD license (see LICENSE.txt).
23 | """
24 | import matplotlib; matplotlib.use('Agg')
25 | import numpy as np
26 | import pandas as pd
27 | import seaborn as sns
28 |
29 |
30 | def get_clones_real_estimated(filename):
31 | """Get true and estimated labels from a partis-generated dataset."""
32 | df = pd.read_csv(filename, dialect='excel-tab', header=0,
33 | usecols=('SEQUENCE_ID', 'CLONE'))
34 | df['CLONE_ID'] = df['SEQUENCE_ID'].str.split('_').apply(lambda x: x[3])
35 |
36 | clone_ids = np.array(df['CLONE_ID'], dtype=str)
37 | found_ids = np.array(df['CLONE'], dtype=str)
38 | return clone_ids, found_ids
39 |
40 |
41 | def order_cm(cm):
42 | """Reorder a multiclass confusion matrix."""
43 | # reorder rows
44 | idx_rows = np.max(cm, axis=1).argsort()[::-1]
45 | b = cm[idx_rows, :]
46 |
47 | # reorder cols
48 | max_idxs = np.ones(b.shape[1], dtype=bool)
49 | final_idxs = []
50 | for i, row in enumerate(b.copy()):
51 | if i == b.shape[0] or not max_idxs.any():
52 | break
53 | row[~max_idxs] = np.min(cm) - 1
54 | max_idx = np.argmax(row)
55 | final_idxs.append(max_idx)
56 | max_idxs[max_idx] = False
57 |
58 | idx_cols = np.append(np.array(final_idxs, dtype=int),
59 | np.argwhere(max_idxs).T[0]) # residuals
60 |
61 | # needs also this one
62 | b = b[:, idx_cols]
63 | bb = b.copy()
64 | max_idxs = np.ones(b.shape[0], dtype=bool)
65 | final_idxs = []
66 | for i in range(b.shape[1]):
67 | # for each column
68 | if i == b.shape[1] or not max_idxs.any():
69 | break
70 | col = bb[:, i]
71 | col[~max_idxs] = -1
72 | max_idx = np.argmax(col)
73 | final_idxs.append(max_idx)
74 | max_idxs[max_idx] = False
75 |
76 | idx_rows2 = np.append(np.array(final_idxs, dtype=int),
77 | np.argwhere(max_idxs).T[0]) # residuals
78 |
79 | idx = np.argsort(idx_rows)
80 | return b[idx_rows2, :], idx_rows2[idx], idx_cols
81 |
82 |
83 | def confusion_matrix(true_labels, estimated_labels, ordered=True):
84 | """Return a confusion matrix in a multiclass / multilabel problem."""
85 | true_labels = np.array(true_labels, dtype=str)
86 | estimated_labels = np.array(estimated_labels, dtype=str)
87 | if true_labels.shape[0] != estimated_labels.shape[0]:
88 | raise ValueError("Inputs must have the same dimensions.")
89 | rows = np.unique(true_labels)
90 | cols = np.unique(estimated_labels)
91 |
92 | # padding only on columns
93 | cm = np.zeros((rows.shape[0], max(cols.shape[0], rows.shape[0])))
94 | from collections import Counter
95 | for i, row in enumerate(rows):
96 | idx_rows = true_labels == row
97 | counter = Counter(estimated_labels[idx_rows])
98 | for g in counter:
99 | idx_col = np.where(cols == g)[0][0]
100 | cm[i, idx_col] += counter[g]
101 |
102 | cols = np.append(cols, ['pad'] * (cm.shape[1] - cols.shape[0]))
103 | if ordered:
104 | cm, rr, cc = order_cm(cm)
105 | rows, cols = rows[rr], cols[cc]
106 | return cm, rows, cols
107 |
108 |
109 | def precision_recall_fscore(a, method='micro', beta=1.):
110 | """Return a precision / recall value for multiclass confuison matrix cm.
111 |
112 | See
113 | http://stats.stackexchange.com/questions/44261/how-to-determine-the-quality-of-a-multiclass-classifier
114 | """
115 | def _single_measures(a, i):
116 | tp = a[i, i]
117 | fp = np.sum(a[:, i]) - tp
118 | fn = np.sum(a[i, :]) - tp
119 | tn = a.sum() - tp - fp - fn
120 | return tp, fp, fn, tn
121 |
122 | singles = zip(*[_single_measures(a, i) for i in range(min(a.shape))])
123 | tps, fps, fns, tns = map(lambda x: np.array(list(x), dtype=float), singles)
124 |
125 | if method == 'micro':
126 | precision = float(tps.sum()) / (tps + fps).sum()
127 | recall = float(tps.sum()) / (tps + fns).sum()
128 | elif method == 'macro':
129 | sum_ = tps + fps
130 | idx = np.where(sum_)
131 | precision = (tps[idx] / sum_[idx]).mean()
132 |
133 | sum_ = tps + fns
134 | idx = np.where(sum_)
135 | recall = (tps[idx] / sum_[idx]).mean()
136 | fscore = (1 + beta * beta) * precision * recall / \
137 | (beta * beta * precision + recall)
138 | return precision, recall, fscore
139 |
140 |
141 | def show_heatmap(filename):
142 | """Show confusion matrix given of a partis-generated tab-delimited db."""
143 | true_labels, estimated_labels = get_clones_real_estimated(filename)
144 | cm, rows, cols = confusion_matrix(true_labels, estimated_labels)
145 | df = pd.DataFrame(cm, index=rows, columns=cols)
146 | sns.heatmap(df)
147 | sns.plt.show()
148 |
--------------------------------------------------------------------------------
/adenine/utils/templates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | def new_fun(arg1 = 'Default', arg2 = 'Default'):
11 | """Short explanation.
12 |
13 | This is the very long explanation
14 |
15 | Parameters
16 | -----------
17 | arg1 : type, default : 'Default'
18 | What is arg1.
19 |
20 | arg2 : {'Default', 'Different', 'Another'}
21 | What is arg2.
22 |
23 | Returns
24 | -----------
25 | out : type
26 | What is out.
27 | """
28 |
--------------------------------------------------------------------------------
/doc/GiHubProjectPage.txt:
--------------------------------------------------------------------------------
1 | ### Welcome to ADENINE.
2 | ADENINE is a machine learning and data mining framework that helps you answering the tedious question: are my data relevant for the problem I'm dealing with?
3 |
4 | ### Implementation
5 | With ADENINE you can build different unsupervised data analysis pipelines made of the following steps:
6 |
7 | 1. missing values imputing
8 | 2. preprocessing
9 | 3. dimensionality reduction
10 | 4. clustering
11 |
12 | a list of the most common state-of-the-art methods is available for each step.
13 |
14 | ### Dependencies
15 | ADENINE is developed using Python 2.7 and inherits its main functionalities from:
16 | * numpy
17 | * scipy
18 | * scikit-learn
19 | * matplotlib
20 | * seaborn
21 |
22 | ### Authors and Contributors
23 | Current developers: Samuele Fiorini (@samuelefiorini) and Federico Tomasi (@fdtomasi).
24 |
25 | ### Support or Contact
26 | Having trouble with ADENINE? Check out our [documentation](http://www.slipguru.unige.it/Software/adenine/) or contact us:
27 | * samuele [dot] fiorini [at] dibris [dot] unige [dot] it
28 | * federico [dot] tomasi [at] dibris [dot] unige [dot] it
29 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 |
49 | clean:
50 | rm -rf $(BUILDDIR)/*
51 |
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | dirhtml:
58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
59 | @echo
60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
61 |
62 | singlehtml:
63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
64 | @echo
65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
66 |
67 | pickle:
68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
69 | @echo
70 | @echo "Build finished; now you can process the pickle files."
71 |
72 | json:
73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
74 | @echo
75 | @echo "Build finished; now you can process the JSON files."
76 |
77 | htmlhelp:
78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
79 | @echo
80 | @echo "Build finished; now you can run HTML Help Workshop with the" \
81 | ".hhp project file in $(BUILDDIR)/htmlhelp."
82 |
83 | qthelp:
84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
85 | @echo
86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/adenine.qhcp"
89 | @echo "To view the help file:"
90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/adenine.qhc"
91 |
92 | devhelp:
93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
94 | @echo
95 | @echo "Build finished."
96 | @echo "To view the help file:"
97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/adenine"
98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/adenine"
99 | @echo "# devhelp"
100 |
101 | epub:
102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | @echo
104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 |
106 | latex:
107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | @echo
109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | "(use \`make latexpdf' here to do that automatically)."
112 |
113 | latexpdf:
114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | @echo "Running LaTeX files through pdflatex..."
116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 |
119 | latexpdfja:
120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | @echo "Running LaTeX files through platex and dvipdfmx..."
122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 |
125 | text:
126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | @echo
128 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
129 |
130 | man:
131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | @echo
133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 |
135 | texinfo:
136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | @echo
138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | @echo "Run \`make' in that directory to run these through makeinfo" \
140 | "(use \`make info' here to do that automatically)."
141 |
142 | info:
143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | @echo "Running Texinfo files through makeinfo..."
145 | make -C $(BUILDDIR)/texinfo info
146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 |
148 | gettext:
149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | @echo
151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 |
153 | changes:
154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | @echo
156 | @echo "The overview file is in $(BUILDDIR)/changes."
157 |
158 | linkcheck:
159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | @echo
161 | @echo "Link check complete; look for any errors in the above output " \
162 | "or in $(BUILDDIR)/linkcheck/output.txt."
163 |
164 | doctest:
165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | @echo "Testing of doctests in the sources finished, look at the " \
167 | "results in $(BUILDDIR)/doctest/output.txt."
168 |
169 | xml:
170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | @echo
172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 |
174 | pseudoxml:
175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | @echo
177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 |
--------------------------------------------------------------------------------
/doc/devPlan/plan.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/devPlan/plan.pdf
--------------------------------------------------------------------------------
/doc/devPlan/plan.tex:
--------------------------------------------------------------------------------
1 | %----------------------------------------------------------------------------------------
2 | % PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
3 | %----------------------------------------------------------------------------------------
4 |
5 | \documentclass[paper=a4, fontsize=10pt]{scrartcl} % A4 paper and 10pt font size
6 |
7 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs
8 | \usepackage[english]{babel} % English language/hyphenation
9 | \usepackage{amsmath,amsfonts,amsthm} % Math packages
10 |
11 | \usepackage[margin=1in]{geometry}
12 |
13 | \usepackage{xspace} % space after new commands
14 | \usepackage{hyperref}
15 | \usepackage{enumitem}
16 |
17 |
18 | \usepackage{sectsty} % Allows customizing section commands
19 | \allsectionsfont{\centering \normalfont\scshape} % Make all sections centered, the default font and small caps
20 |
21 | \usepackage{fancyhdr} % Custom headers and footers
22 | \pagestyle{fancyplain} % Makes all pages in the document conform to the custom headers and footers
23 | \fancyhead{} % No page header - if you want one, create it in the same way as the footers below
24 | \fancyfoot[L]{} % Empty left footer
25 | \fancyfoot[C]{} % Empty center footer
26 | \fancyfoot[R]{\thepage} % Page numbering for right footer
27 | \renewcommand{\headrulewidth}{0pt} % Remove header underlines
28 | \renewcommand{\footrulewidth}{0pt} % Remove footer underlines
29 | \setlength{\headheight}{11pt} % Customize the height of the header
30 |
31 | \numberwithin{equation}{section} % Number equations within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
32 | \numberwithin{figure}{section} % Number figures within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
33 | \numberwithin{table}{section} % Number tables within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
34 |
35 | \setlength\parindent{0pt} % Removes all indentation from paragraphs - comment this line for an assignment with lots of text
36 |
37 | %----------------------------------------------------------------------------------------
38 | % TITLE SECTION
39 | %----------------------------------------------------------------------------------------
40 |
41 | \newcommand{\horrule}[1]{\rule{\linewidth}{#1}} % Create horizontal rule command with 1 argument of height
42 | \newcommand{\adenine}{{\tt adenine}\xspace}
43 |
44 | \title{
45 | \normalfont \normalsize
46 | \huge{\tt ADENINE}: A Data ExploratioN pipelINE \\
47 | \horrule{2pt} \\[0.5cm] % Thick bottom horizontal rule
48 | development plan \\ % The assignment title
49 | }
50 |
51 | \author{Samuele Fiorini} % Your name
52 |
53 | \date{\normalsize\today} % Today's date or a custom date
54 |
55 | \begin{document}
56 |
57 | \maketitle % Print the title
58 |
59 | %----------------------------------------------------------------------------------------
60 | % PROBLEM 1
61 | %----------------------------------------------------------------------------------------
62 |
63 | \section{Introduction and Motivation}
64 |
65 | A question that arises at the beginning of almost every new data analysis is
66 | the following: {\sl are my data relevant for the problem I'm dealing with}? \\
67 |
68 | The final goal of this project (named \adenine) is to help its user to have a glimpse of the answer of
69 | this tedious question. \\
70 |
71 | In order to reach this goal, \adenine will take advantage of machine learning and
72 | data mining techniques. The final pipeline will essentially consist of three steps:
73 |
74 | \begin{enumerate}
75 |
76 | \item {\bf Preprocessing}: have you ever wondered what would have
77 | changed if only your data have been preprocessed in a different way? Or if
78 | data preprocessing is a good idea at all? \adenine will offer several
79 | preprocessing procedures, such as: data centering, Min-Max scaling,
80 | standardization or normalization and allows you to compare the results of the
81 | analysis conducted with different starting point.
82 |
83 | \item {\bf Dimensionality Reduction} (DR): in the context of data
84 | exploration, this phase becomes particularly helpful for high dimensional data (e.g.
85 | -omics scenario). This step, generically named DR, may actually include some
86 | manifold learning (such as Isomap, Multidimensional Scaling, etc), supervised
87 | (Linear Discriminant Analysis) and unsupervised (Principal Component Analysis,
88 | kernel PCA) techniques.
89 |
90 | \item {\bf Clustering}: this section aims at grouping data into clusters without taking
91 | into account the class labels. Several techniques such as K-Means, Spectral or Hierarchical
92 | clustering will work on both original and dimensionality reduced data.
93 |
94 | \end{enumerate}
95 |
96 | The final output of \adenine will be an as compact as possible visual and textual representation of
97 | the results obtained from the pipelines made with each possible combination of the algorithms
98 | implemented at each step. As an example, referring to a pipeline built as:
99 |
100 | \begin{center}
101 | {\sl Data normalization $\rightarrow$ PCA $\rightarrow$ K-Means}
102 | \end{center}
103 |
104 | the output would be something like:
105 |
106 | \begin{itemize}
107 |
108 | \item an output file containing the norm of the original variables (which has
109 | been used to coerce all the features in $[0,1]$),
110 |
111 | \item a 2-D or 3-D scatter plot of the data projected along the principal
112 | components and the percentage of explained variance associated with each
113 | one of them,
114 |
115 | \item a pictorial representation of the data clustering results
116 | obtained with the optimum number of cluster (learned from the data).
117 |
118 | \end{itemize}
119 |
120 | \subsection{Material for PhD progress}
121 |
122 | The study behind the implementation of \adenine will be useful in terms of
123 | four PhD courses of my first-year work plan:
124 |
125 | \begin{enumerate}
126 |
127 | \item {\sl A Machine Learning Crash Course} [DIBRIS] (Odone, Rosasco): \adenine will cover
128 | a fair number of (mainly unsupervised) machine learning techniques. Hence, this course
129 | has been fundamental to acquire the statistical learning background needed to become aware of
130 | the underlying mechanisms of the algorithms.
131 |
132 | \item {\sl Programming Concepts in Python} [DIBRIS] (Tacchella): I plan to implement \adenine in
133 | {\tt Python}. Hence, most of the implementation choices will be made on the basis of the material
134 | covered in the course.
135 |
136 | \item {\sl Programming Complex Heterogeneous Parallel Systems} [IMATI]
137 | (Clematis, D'Agostino, Danovaro, Galizia) and {the \sl 24th Summer School on
138 | Parallel Computing} [CINECA] (Erbacci): \adenine will present several {\sl embarrassingly
139 | parallel workload} as well as several {\sl isolate GPU accelerable} computations.
140 | The former PhD course and the latter school will allow me to develop the parallel computing
141 | attitude I need to implement \adenine in an as optimized as possible way.
142 |
143 | \end{enumerate}
144 |
145 |
146 | \section{Implemented Algorithms}
147 |
148 | The implementation of nearly all the algorithms of \adenine will refer to the
149 | \href{http://scikit-learn.org/stable/index.html}{\tt scikit-learn} python
150 | library. See the following \href{http://scikit-learn.org/stable/unsupervised_learning.html}{\tt link} for a
151 | comprehensive list,
152 |
153 | \subsection{Preprocessing}
154 |
155 | At this step the data will be fed to the following preprocessing procedures:
156 | \begin{enumerate}[start = 0]
157 | \item no preprocessing: the analysis will be conducted on raw data;
158 |
159 | \item na\"ive recentering: remove the mean;
160 |
161 | \item standardization: remove the mean and scale each feature by
162 | their standard deviations, this will make the data normally distributed;
163 |
164 | \item normalization: scale all the samples to have unit norm
165 |
166 | \end{enumerate}
167 |
168 | In its first version \adenine will allow the user to impute the missing values by means of the
169 | median, the mean or the most frequent value (future works are in Section~\ref{sec:future}).
170 | See the {\tt sklearn} \href{http://scikit-learn.org/stable/modules/preprocessing.html}{docs}
171 | on data preprocessing for further details.
172 |
173 | \subsection{Dimensionality reduction}
174 |
175 | The following is a work-in-progress list of the techniques I plan to
176 | make available in \adenine. The list includes algorithms that come
177 | from very different standpoint, but that have a common outcome:
178 | the estimation of a low-dimensional embedding (manifold) in which the data can
179 | be projected for visualization or further purposes.
180 |
181 | \begin{enumerate}[label=(\alph*)]
182 |
183 | \item Principal Component Analysis (PCA), in its Incremental or Randomized variants
184 | in case of big data;
185 |
186 | \item Kernel PCA, which may come along different kernels (Gaussian,
187 | polynomial, and so on);
188 |
189 | \item Isomap;
190 |
191 | \item Locally Linear Embedding (LLE), in its modified (MLLE) or Hessian
192 | (HLLE) regularized version;
193 |
194 | \item Spectral Embedding (SE);
195 |
196 | \item Local Tangent Space Alignment (LTSA);
197 |
198 | \item Multidimensional Scaling (MDS), in its metric and non-metric version;
199 |
200 | \item t-distributed Stochastic Neighbor Embedding (t-SNE).
201 |
202 | \end{enumerate}
203 |
204 | \subsection{Clustering}
205 |
206 | On the same line, this section presents a list of the clustering techniques I
207 | plan to include in \adenine.
208 |
209 | \begin{enumerate}
210 |
211 | \item [($\alpha$)] K-Means, in its Mini-Batch variant for big data;
212 |
213 | \item [($\beta$)] Affinity Propagation;
214 |
215 | \item [($\gamma$)] Mean Shift;
216 |
217 | \item [($\delta$)] Spectral Clustering;
218 |
219 | \item [($\epsilon$)] Hierarchical Agglomerative Clustering, exploring
220 | different linkage type, i.e., Ward, complete, average as well as different
221 | metrics, e.g. Euclidean, Manhattan, Minkowski, etc.;
222 |
223 | \item [($\zeta$)] DBSCAN;
224 |
225 | \item [($\eta$)] Birch.
226 |
227 | \end{enumerate}
228 |
229 | Several indexes to analyze the clustering performances will be included, some
230 | of them may require ground truth labels (such as Adjusted Rand Index (ARI), the
231 | Adjusted Mutual Information (AMI), the homogeneity, completeness or V measure
232 | scores), while others may evaluate the cluster compactness or the separation
233 | between clusters (such as the silhouette score).
234 |
235 | \section{Future Works} \label{sec:future}
236 |
237 | Indeed \adenine is not meant to be an all-inclusive tool. This section, that
238 | will always be a work-in-progress, aims at mentioning all the features that
239 | are not going to be implemented in the first version of \adenine, but that may
240 | be implemented later on.
241 |
242 | \begin{itemize}
243 |
244 | \item How can we handle missing values? \adenine may have some statistically robust
245 | imputation tools (such as low-rank matrix completion, or collaborative filtering) in
246 | future versions;
247 |
248 | \item Kernel K-Means;
249 |
250 | \item Dictionary Learning;
251 |
252 | \item Factor Analysis;
253 |
254 | \item Non-negative Matrix Factorization;
255 |
256 | \item Outliers Detection.
257 |
258 | \end{itemize}
259 | %----------------------------------------------------------------------------------------
260 |
261 | \end{document}
262 |
--------------------------------------------------------------------------------
/doc/source/adenine_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/adenine_logo.pdf
--------------------------------------------------------------------------------
/doc/source/adenine_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/adenine_logo.png
--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # adenine documentation build configuration file, created by
4 | # sphinx-quickstart on Fri May 22 12:31:54 2015.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | import sys
16 | import os
17 |
18 | # If extensions (or modules to document with autodoc) are in another directory,
19 | # add these directories to sys.path here. If the directory is relative to the
20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
21 | sys.path.insert(0, os.path.abspath('.'))
22 | sys.path.insert(0, os.path.abspath('sphinxext'))
23 |
24 | from adenine import __version__ as VERSION
25 |
26 | # -- General configuration ------------------------------------------------
27 |
28 | # If your documentation needs a minimal Sphinx version, state it here.
29 | #needs_sphinx = '1.0'
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | 'sphinx.ext.autodoc',
36 | 'sphinx.ext.doctest',
37 | 'sphinx.ext.todo',
38 | 'sphinx.ext.coverage',
39 | 'sphinx.ext.mathjax',
40 | 'sphinx.ext.viewcode',
41 | 'sphinx.ext.autosummary',
42 | 'sphinx.ext.intersphinx',
43 | 'numpydoc',
44 | 'sphinxcontrib.programoutput',
45 | ]
46 |
47 | # Extension configurations
48 | autoclass_content = 'init'
49 | autodoc_member_order = 'bysource'
50 | numpydoc_show_class_members = False
51 |
52 | # Add any paths that contain templates here, relative to this directory.
53 | templates_path = ['_templates']
54 |
55 | # The suffix of source filenames.
56 | source_suffix = '.rst'
57 |
58 | # The encoding of source files.
59 | #source_encoding = 'utf-8-sig'
60 |
61 | # The master toctree document.
62 | master_doc = 'index'
63 |
64 | # General information about the project.
65 | project = u'ADENINE'
66 | copyright = u'2016, Samuele Fiorini - Federico Tomasi - Annalisa Barla'
67 | #modindex_common_prefix = ['adenine.']
68 |
69 | # The version info for the project you're documenting, acts as replacement for
70 | # |version| and |release|, also used in various other places throughout the
71 | # built documents.
72 | #
73 | # The short X.Y version.
74 | version = VERSION
75 | # The full version, including alpha/beta/rc tags.
76 | release = version
77 |
78 | # The language for content autogenerated by Sphinx. Refer to documentation
79 | # for a list of supported languages.
80 | #language = None
81 |
82 | # There are two options for replacing |today|: either, you set today to some
83 | # non-false value, then it is used:
84 | #today = ''
85 | # Else, today_fmt is used as the format for a strftime call.
86 | #today_fmt = '%B %d, %Y'
87 |
88 | # List of patterns, relative to source directory, that match files and
89 | # directories to ignore when looking for source files.
90 | exclude_patterns = []
91 |
92 | # The reST default role (used for this markup: `text`) to use for all
93 | # documents.
94 | #default_role = None
95 |
96 | # If true, '()' will be appended to :func: etc. cross-reference text.
97 | #add_function_parentheses = True
98 |
99 | # If true, the current module name will be prepended to all description
100 | # unit titles (such as .. function::).
101 | #add_module_names = True
102 |
103 | # If true, sectionauthor and moduleauthor directives will be shown in the
104 | # output. They are ignored by default.
105 | #show_authors = False
106 |
107 | # The name of the Pygments (syntax highlighting) style to use.
108 | pygments_style = 'sphinx'
109 |
110 | # A list of ignored prefixes for module index sorting.
111 | #modindex_common_prefix = []
112 |
113 | # If true, keep warnings as "system message" paragraphs in the built documents.
114 | #keep_warnings = False
115 |
116 |
117 | # -- Options for HTML output ----------------------------------------------
118 |
119 | # The theme to use for HTML and HTML Help pages. See the documentation for
120 | # a list of builtin themes.
121 | # html_theme = 'default'
122 | # html_theme = "nature"
123 | html_theme = 'slipGURUTheme'
124 |
125 |
126 | # Theme options are theme-specific and customize the look and feel of a theme
127 | # further. For a list of options available for each theme, see the
128 | # documentation.
129 | #html_theme_options = {}
130 |
131 | # Add any paths that contain custom themes here, relative to this directory.
132 | html_theme_path = ['.']
133 |
134 | # The name for this set of Sphinx documents. If None, it defaults to
135 | # " v documentation".
136 | #html_title = None
137 |
138 | # A shorter title for the navigation bar. Default is the same as html_title.
139 | #html_short_title = None
140 |
141 | # The name of an image file (relative to this directory) to place at the top
142 | # of the sidebar.
143 | html_logo = 'adenine_logo.png'
144 |
145 | # The name of an image file (within the static path) to use as favicon of the
146 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
147 | # pixels large.
148 | #html_favicon = None
149 |
150 | # Add any paths that contain custom static files (such as style sheets) here,
151 | # relative to this directory. They are copied after the builtin static files,
152 | # so a file named "default.css" will overwrite the builtin "default.css".
153 | html_static_path = ['_static']
154 |
155 | # Add any extra paths that contain custom files (such as robots.txt or
156 | # .htaccess) here, relative to this directory. These files are copied
157 | # directly to the root of the documentation.
158 | #html_extra_path = []
159 |
160 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
161 | # using the given strftime format.
162 | #html_last_updated_fmt = '%b %d, %Y'
163 |
164 | # If true, SmartyPants will be used to convert quotes and dashes to
165 | # typographically correct entities.
166 | #html_use_smartypants = True
167 |
168 | # Custom sidebar templates, maps document names to template names.
169 | #html_sidebars = {}
170 |
171 | # Additional templates that should be rendered to pages, maps page names to
172 | # template names.
173 | #html_additional_pages = {}
174 |
175 | # If false, no module index is generated.
176 | #html_domain_indices = True
177 |
178 | # If false, no index is generated.
179 | #html_use_index = True
180 |
181 | # If true, the index is split into individual pages for each letter.
182 | #html_split_index = False
183 |
184 | # If true, links to the reST sources are added to the pages.
185 | #html_show_sourcelink = True
186 |
187 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
188 | #html_show_sphinx = True
189 |
190 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
191 | #html_show_copyright = True
192 |
193 | # If true, an OpenSearch description file will be output, and all pages will
194 | # contain a tag referring to it. The value of this option must be the
195 | # base URL from which the finished HTML is served.
196 | #html_use_opensearch = ''
197 |
198 | # This is the file name suffix for HTML files (e.g. ".xhtml").
199 | #html_file_suffix = None
200 |
201 | # Output file base name for HTML help builder.
202 | htmlhelp_basename = 'adeninedoc'
203 |
204 |
205 | # -- Options for LaTeX output ---------------------------------------------
206 |
207 | latex_elements = {
208 | # The paper size ('letterpaper' or 'a4paper').
209 | #'papersize': 'letterpaper',
210 |
211 | # The font size ('10pt', '11pt' or '12pt').
212 | #'pointsize': '10pt',
213 |
214 | # Additional stuff for the LaTeX preamble.
215 | #'preamble': '',
216 | }
217 |
218 | # Grouping the document tree into LaTeX files. List of tuples
219 | # (source start file, target name, title,
220 | # author, documentclass [howto, manual, or own class]).
221 | latex_documents = [
222 | ('index', 'adenine.tex', u'adenine Documentation',
223 | u'Samuele Fiorini - Federico Tomasi - Annalisa Barla', 'manual'),
224 | ]
225 |
226 | # The name of an image file (relative to this directory) to place at the top of
227 | # the title page.
228 | latex_logo = 'adenine_logo.png'
229 |
230 | # For "manual" documents, if this is true, then toplevel headings are parts,
231 | # not chapters.
232 | #latex_use_parts = False
233 |
234 | # If true, show page references after internal links.
235 | #latex_show_pagerefs = False
236 |
237 | # If true, show URL addresses after external links.
238 | #latex_show_urls = False
239 |
240 | # Documents to append as an appendix to all manuals.
241 | #latex_appendices = []
242 |
243 | # If false, no module index is generated.
244 | #latex_domain_indices = True
245 |
246 |
247 | # -- Options for manual page output ---------------------------------------
248 |
249 | # One entry per manual page. List of tuples
250 | # (source start file, name, description, authors, manual section).
251 | man_pages = [
252 | ('index', 'adenine', u'adenine Documentation',
253 | [u'Samuele Fiorini - Federico Tomasi - Annalisa Barla'], 1)
254 | ]
255 |
256 | # If true, show URL addresses after external links.
257 | #man_show_urls = False
258 |
259 |
260 | # -- Options for Texinfo output -------------------------------------------
261 |
262 | # Grouping the document tree into Texinfo files. List of tuples
263 | # (source start file, target name, title, author,
264 | # dir menu entry, description, category)
265 | texinfo_documents = [
266 | ('index', 'adenine', u'adenine Documentation',
267 | u'Samuele Fiorini - Federico Tomasi - Annalisa Barla', 'adenine', 'One line description of project.',
268 | 'Miscellaneous'),
269 | ]
270 |
271 | # Documents to append as an appendix to all manuals.
272 | #texinfo_appendices = []
273 |
274 | # If false, no module index is generated.
275 | #texinfo_domain_indices = True
276 |
277 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
278 | #texinfo_show_urls = 'footnote'
279 |
280 | # If true, do not generate a @detailmenu in the "Top" node's menu.
281 | #texinfo_no_detailmenu = False
282 |
--------------------------------------------------------------------------------
/doc/source/dependencies.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | seaborn
4 | pydot
5 | scikit-learn
6 |
--------------------------------------------------------------------------------
/doc/source/drawing.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
134 |
--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
1 | .. adenine documentation master file, created by
2 | sphinx-quickstart on Fri May 22 12:31:54 2015.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | =====================================
7 | ADENINE (A Data ExploratioN pIpeliNE)
8 | =====================================
9 |
10 | **ADENINE** is a machine learning and data mining Python pipeline that helps you to answer this tedious question: are my data relevant with the problem I'm dealing with?
11 |
12 | The main structure of adenine can be summarized in the following 4 steps.
13 |
14 | 1. **Imputing:** Does your dataset have missing entries? In the first step you can fill the missing values choosing between different strategies: feature-wise median, mean and most frequent value or a more stable k-NN imputing.
15 |
16 | 2. **Preprocessing:** Have you ever wondered what would have changed if only your data have been preprocessed in a different way? Or is it data preprocessing a good idea after all? ADENINE offers several preprocessing procedures, such as: data recentering, Min-Max scaling, standardization or normalization and allows you to compare the results of the analysis made with different preprocessing step as starting point.
17 |
18 | 3. **Dimensionality Reduction:** In the context of data exploration, this phase becomes particularly helpful for high dimensional data. This step includes some manifold learning (such as isomap, multidimensional scaling, etc) and unsupervised dimensionality reduction (principal component analysis, kernel PCA) techniques.
19 |
20 | 4. **Clustering:** This step aims at grouping data into clusters in an unsupervised manner. Several techniques such as k-means, spectral or hierarchical clustering are offered.
21 |
22 | The final output of adenine is a compact and textual representation of the results obtained from the pipelines made with each possible combination of the algorithms implemented at each step.
23 |
24 | User documentation
25 | ==================
26 | .. toctree::
27 | :maxdepth: 2
28 |
29 | tutorial.rst
30 |
31 | .. _api:
32 |
33 | ***********************
34 | API
35 | ***********************
36 |
37 | .. toctree::
38 | :maxdepth: 1
39 |
40 |
41 | Pipeline utilities
42 | -----------------------------
43 |
44 | .. automodule:: adenine.core.define_pipeline
45 | :members:
46 |
47 | .. automodule:: adenine.core.pipelines
48 | :members:
49 |
50 | .. automodule:: adenine.core.analyze_results
51 | :members:
52 |
53 | Input Data
54 | -----------------------------
55 |
56 | .. automodule:: adenine.utils.data_source
57 | :members:
58 |
59 |
60 | Plotting functions
61 | -----------------------------
62 |
63 | .. automodule:: adenine.core.plotting
64 | :members:
65 |
66 |
67 | Extra tools
68 | -----------------------------
69 |
70 | .. automodule:: adenine.utils.extra
71 | :members:
72 |
73 |
74 | .. Indices and tables
75 | .. ==================
76 |
77 | .. * :ref:`genindex`
78 | .. * :ref:`modindex`
79 | .. * :ref:`search`
80 |
81 |
--------------------------------------------------------------------------------
/doc/source/modules.rst:
--------------------------------------------------------------------------------
1 | .
2 | =
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | adenine
8 | setup
9 |
--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "basic/layout.html" %}
2 |
3 | {% block sidebarsearch %}
4 | {{ super() }}
5 |
6 |
7 |
8 | {% endblock %}
9 |
10 | {% block extrahead %}
11 |
14 |
24 | {% endblock %}
25 |
26 | {% block sidebarrel %}
27 | {% if prev %}
28 | {{ super() }}
29 | {% else %}
30 | {% endif %}
31 | {% endblock %}
32 |
33 | {% block sidebartoc %}
34 | {% if prev %}
35 | {{ super() }}
36 | {% else %}
37 | Download
38 | Current version: {{ release }}
39 | Get {{ project }} from the
40 | Python Package Index,
41 | or install it with:
42 |
43 | pip install --upgrade {{ project }}
44 | or clone it from our GitHub repository:
45 | git clone https://github.com/slipguru/{{ project }}
46 |
47 |
48 |
53 |
54 | {% endif %}
55 | {% endblock %}
56 |
--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/static/logos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/doc/source/slipGURUTheme/static/logos.png
--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/static/slipGuru.css:
--------------------------------------------------------------------------------
1 | @import "default.css";
2 |
3 | /**
4 | * Spacing fixes
5 | */
6 |
7 | div.body p, div.body dd, div.body li {
8 | line-height: 125%;
9 | }
10 |
11 | ul.simple {
12 | margin-top: 0;
13 | margin-bottom: 0;
14 | padding-top: 0;
15 | padding-bottom: 0;
16 | }
17 |
18 | /* spacing around blockquoted fields in parameters/attributes/returns */
19 | td.field-body > blockquote {
20 | margin-top: 0.1em;
21 | margin-bottom: 0.5em;
22 | }
23 |
24 | /* spacing around example code */
25 | div.highlight > pre {
26 | padding: 2px 5px 2px 5px;
27 | }
28 |
29 | /* spacing in see also definition lists */
30 | dl.last > dd {
31 | margin-top: 1px;
32 | margin-bottom: 5px;
33 | margin-left: 30px;
34 | }
35 |
36 | /* hide overflowing content in the sidebar */
37 | div.sphinxsidebarwrapper p.topless {
38 | overflow: hidden;
39 | }
40 |
41 | /**
42 | * Hide dummy toctrees
43 | */
44 |
45 | ul {
46 | padding-top: 0;
47 | padding-bottom: 0;
48 | margin-top: 0;
49 | margin-bottom: 0;
50 | }
51 | ul li {
52 | padding-top: 0;
53 | padding-bottom: 0;
54 | margin-top: 0;
55 | margin-bottom: 0;
56 | }
57 | ul li a.reference {
58 | padding-top: 0;
59 | padding-bottom: 0;
60 | margin-top: 0;
61 | margin-bottom: 0;
62 | }
63 |
64 | /**
65 | * Make high-level subsections easier to distinguish from top-level ones
66 | */
67 | div.body h3 {
68 | background-color: transparent;
69 | }
70 |
71 | div.body h4 {
72 | border: none;
73 | background-color: transparent;
74 | }
75 |
76 | /**
77 | * Scipy colors
78 | */
79 |
80 | body {
81 | background-color: rgb(100,135,220);
82 | }
83 |
84 | div.document {
85 | background-color: rgb(230,230,230);
86 | }
87 |
88 | div.sphinxsidebar {
89 | background-color: rgb(230,230,230);
90 | }
91 |
92 | div.related {
93 | background-color: rgb(100,135,220);
94 | }
95 |
96 | div.sphinxsidebar h3 {
97 | color: rgb(0,102,204);
98 | }
99 |
100 | div.sphinxsidebar h3 a {
101 | color: rgb(0,102,204);
102 | }
103 |
104 | div.sphinxsidebar h4 {
105 | color: rgb(0,82,194);
106 | }
107 |
108 | div.sphinxsidebar p {
109 | color: black;
110 | }
111 |
112 | div.sphinxsidebar a {
113 | color: #355f7c;
114 | }
115 |
116 | div.sphinxsidebar ul.want-points {
117 | list-style: disc;
118 | }
119 |
120 | .field-list th {
121 | color: rgb(0,102,204);
122 | white-space: nowrap;
123 | }
124 |
125 | /**
126 | * Extra admonitions
127 | */
128 |
129 | div.tip {
130 | background-color: #ffffe4;
131 | border: 1px solid #ee6;
132 | }
133 |
134 | div.plot-output {
135 | clear-after: both;
136 | }
137 |
138 | div.plot-output .figure {
139 | float: left;
140 | text-align: center;
141 | margin-bottom: 0;
142 | padding-bottom: 0;
143 | }
144 |
145 | div.plot-output .caption {
146 | margin-top: 2;
147 | padding-top: 0;
148 | }
149 |
150 | div.plot-output p.admonition-title {
151 | display: none;
152 | }
153 |
154 | div.plot-output:after {
155 | content: "";
156 | display: block;
157 | height: 0;
158 | clear: both;
159 | }
160 |
161 |
162 | /*
163 | div.admonition-example {
164 | background-color: #e4ffe4;
165 | border: 1px solid #ccc;
166 | }*/
167 |
168 |
169 | /**
170 | * Styling for field lists
171 | */
172 |
173 | table.field-list th {
174 | border-left: 1px solid #aaa !important;
175 | padding-left: 5px;
176 | }
177 |
178 | table.field-list {
179 | border-collapse: separate;
180 | border-spacing: 10px;
181 | }
182 |
183 | /**
184 | * Styling for footnotes
185 | */
186 |
187 | table.footnote td, table.footnote th {
188 | border: none;
189 | }
190 |
--------------------------------------------------------------------------------
/doc/source/slipGURUTheme/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = default
3 | stylesheet = slipGuru.css
4 | pygments_style = sphinx
5 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/LICENSE.txt:
--------------------------------------------------------------------------------
1 | -------------------------------------------------------------------------------
2 | The files
3 | - numpydoc.py
4 | - autosummary.py
5 | - autosummary_generate.py
6 | - docscrape.py
7 | - docscrape_sphinx.py
8 | - phantom_import.py
9 | have the following license:
10 |
11 | Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen
12 |
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are
15 | met:
16 |
17 | 1. Redistributions of source code must retain the above copyright
18 | notice, this list of conditions and the following disclaimer.
19 | 2. Redistributions in binary form must reproduce the above copyright
20 | notice, this list of conditions and the following disclaimer in
21 | the documentation and/or other materials provided with the
22 | distribution.
23 |
24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 |
36 | -------------------------------------------------------------------------------
37 | The files
38 | - compiler_unparse.py
39 | - comment_eater.py
40 | - traitsdoc.py
41 | have the following license:
42 |
43 | This software is OSI Certified Open Source Software.
44 | OSI Certified is a certification mark of the Open Source Initiative.
45 |
46 | Copyright (c) 2006, Enthought, Inc.
47 | All rights reserved.
48 |
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 |
52 | * Redistributions of source code must retain the above copyright notice, this
53 | list of conditions and the following disclaimer.
54 | * Redistributions in binary form must reproduce the above copyright notice,
55 | this list of conditions and the following disclaimer in the documentation
56 | and/or other materials provided with the distribution.
57 | * Neither the name of Enthought, Inc. nor the names of its contributors may
58 | be used to endorse or promote products derived from this software without
59 | specific prior written permission.
60 |
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 |
72 |
73 | -------------------------------------------------------------------------------
74 | The files
75 | - only_directives.py
76 | - plot_directive.py
77 | originate from Matplotlib (http://matplotlib.sf.net/) which has
78 | the following license:
79 |
80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
81 |
82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
83 |
84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
85 |
86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
87 |
88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
89 |
90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
91 |
92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
93 |
94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
95 |
96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
97 |
98 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: numpydoc
3 | Version: 0.4
4 | Summary: Sphinx extension to support docstrings in Numpy format
5 | Home-page: http://github.com/numpy/numpy/tree/master/doc/sphinxext
6 | Author: Pauli Virtanen and others
7 | Author-email: pav@iki.fi
8 | License: BSD
9 | Description: UNKNOWN
10 | Keywords: sphinx numpy
11 | Platform: UNKNOWN
12 | Classifier: Development Status :: 3 - Alpha
13 | Classifier: Environment :: Plugins
14 | Classifier: License :: OSI Approved :: BSD License
15 | Classifier: Topic :: Documentation
16 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/README.txt:
--------------------------------------------------------------------------------
1 | =====================================
2 | numpydoc -- Numpy's Sphinx extensions
3 | =====================================
4 |
5 | Numpy's documentation uses several custom extensions to Sphinx. These
6 | are shipped in this ``numpydoc`` package, in case you want to make use
7 | of them in third-party projects.
8 |
9 | The following extensions are available:
10 |
11 | - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 | the code description directives ``np:function``, ``np-c:function``, etc.
13 | that support the Numpy docstring syntax.
14 |
15 | - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 |
17 | - ``numpydoc.plot_directive``: Adaptation of Matplotlib's ``plot::``
18 | directive. Note that this implementation may still undergo severe
19 | changes or eventually be deprecated.
20 |
21 |
22 | numpydoc
23 | ========
24 |
25 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
26 | following the Numpy/Scipy format to a form palatable to Sphinx.
27 |
28 | Options
29 | -------
30 |
31 | The following options can be set in conf.py:
32 |
33 | - numpydoc_use_plots: bool
34 |
35 | Whether to produce ``plot::`` directives for Examples sections that
36 | contain ``import matplotlib``.
37 |
38 | - numpydoc_show_class_members: bool
39 |
40 | Whether to show all members of a class in the Methods and Attributes
41 | sections automatically.
42 |
43 | - numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead)
44 |
45 | Whether to insert an edit link after docstrings.
46 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/__init__.py:
--------------------------------------------------------------------------------
1 | from numpydoc import setup
2 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/comment_eater.py:
--------------------------------------------------------------------------------
1 | from cStringIO import StringIO
2 | import compiler
3 | import inspect
4 | import textwrap
5 | import tokenize
6 |
7 | from compiler_unparse import unparse
8 |
9 |
10 | class Comment(object):
11 | """ A comment block.
12 | """
13 | is_comment = True
14 | def __init__(self, start_lineno, end_lineno, text):
15 | # int : The first line number in the block. 1-indexed.
16 | self.start_lineno = start_lineno
17 | # int : The last line number. Inclusive!
18 | self.end_lineno = end_lineno
19 | # str : The text block including '#' character but not any leading spaces.
20 | self.text = text
21 |
22 | def add(self, string, start, end, line):
23 | """ Add a new comment line.
24 | """
25 | self.start_lineno = min(self.start_lineno, start[0])
26 | self.end_lineno = max(self.end_lineno, end[0])
27 | self.text += string
28 |
29 | def __repr__(self):
30 | return '%s(%r, %r, %r)' % (self.__class__.__name__, self.start_lineno,
31 | self.end_lineno, self.text)
32 |
33 |
34 | class NonComment(object):
35 | """ A non-comment block of code.
36 | """
37 | is_comment = False
38 | def __init__(self, start_lineno, end_lineno):
39 | self.start_lineno = start_lineno
40 | self.end_lineno = end_lineno
41 |
42 | def add(self, string, start, end, line):
43 | """ Add lines to the block.
44 | """
45 | if string.strip():
46 | # Only add if not entirely whitespace.
47 | self.start_lineno = min(self.start_lineno, start[0])
48 | self.end_lineno = max(self.end_lineno, end[0])
49 |
50 | def __repr__(self):
51 | return '%s(%r, %r)' % (self.__class__.__name__, self.start_lineno,
52 | self.end_lineno)
53 |
54 |
55 | class CommentBlocker(object):
56 | """ Pull out contiguous comment blocks.
57 | """
58 | def __init__(self):
59 | # Start with a dummy.
60 | self.current_block = NonComment(0, 0)
61 |
62 | # All of the blocks seen so far.
63 | self.blocks = []
64 |
65 | # The index mapping lines of code to their associated comment blocks.
66 | self.index = {}
67 |
68 | def process_file(self, file):
69 | """ Process a file object.
70 | """
71 | for token in tokenize.generate_tokens(file.next):
72 | self.process_token(*token)
73 | self.make_index()
74 |
75 | def process_token(self, kind, string, start, end, line):
76 | """ Process a single token.
77 | """
78 | if self.current_block.is_comment:
79 | if kind == tokenize.COMMENT:
80 | self.current_block.add(string, start, end, line)
81 | else:
82 | self.new_noncomment(start[0], end[0])
83 | else:
84 | if kind == tokenize.COMMENT:
85 | self.new_comment(string, start, end, line)
86 | else:
87 | self.current_block.add(string, start, end, line)
88 |
89 | def new_noncomment(self, start_lineno, end_lineno):
90 | """ We are transitioning from a noncomment to a comment.
91 | """
92 | block = NonComment(start_lineno, end_lineno)
93 | self.blocks.append(block)
94 | self.current_block = block
95 |
96 | def new_comment(self, string, start, end, line):
97 | """ Possibly add a new comment.
98 |
99 | Only adds a new comment if this comment is the only thing on the line.
100 | Otherwise, it extends the noncomment block.
101 | """
102 | prefix = line[:start[1]]
103 | if prefix.strip():
104 | # Oops! Trailing comment, not a comment block.
105 | self.current_block.add(string, start, end, line)
106 | else:
107 | # A comment block.
108 | block = Comment(start[0], end[0], string)
109 | self.blocks.append(block)
110 | self.current_block = block
111 |
112 | def make_index(self):
113 | """ Make the index mapping lines of actual code to their associated
114 | prefix comments.
115 | """
116 | for prev, block in zip(self.blocks[:-1], self.blocks[1:]):
117 | if not block.is_comment:
118 | self.index[block.start_lineno] = prev
119 |
120 | def search_for_comment(self, lineno, default=None):
121 | """ Find the comment block just before the given line number.
122 |
123 | Returns None (or the specified default) if there is no such block.
124 | """
125 | if not self.index:
126 | self.make_index()
127 | block = self.index.get(lineno, None)
128 | text = getattr(block, 'text', default)
129 | return text
130 |
131 |
132 | def strip_comment_marker(text):
133 | """ Strip # markers at the front of a block of comment text.
134 | """
135 | lines = []
136 | for line in text.splitlines():
137 | lines.append(line.lstrip('#'))
138 | text = textwrap.dedent('\n'.join(lines))
139 | return text
140 |
141 |
142 | def get_class_traits(klass):
143 | """ Yield all of the documentation for trait definitions on a class object.
144 | """
145 | # FIXME: gracefully handle errors here or in the caller?
146 | source = inspect.getsource(klass)
147 | cb = CommentBlocker()
148 | cb.process_file(StringIO(source))
149 | mod_ast = compiler.parse(source)
150 | class_ast = mod_ast.node.nodes[0]
151 | for node in class_ast.code.nodes:
152 | # FIXME: handle other kinds of assignments?
153 | if isinstance(node, compiler.ast.Assign):
154 | name = node.nodes[0].name
155 | rhs = unparse(node.expr).strip()
156 | doc = strip_comment_marker(cb.search_for_comment(node.lineno, default=''))
157 | yield name, rhs, doc
158 |
159 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/docscrape_sphinx.py:
--------------------------------------------------------------------------------
1 | import re, inspect, textwrap, pydoc
2 | import sphinx
3 | from docscrape import NumpyDocString, FunctionDoc, ClassDoc
4 |
5 | class SphinxDocString(NumpyDocString):
6 | def __init__(self, docstring, config={}):
7 | self.use_plots = config.get('use_plots', False)
8 | NumpyDocString.__init__(self, docstring, config=config)
9 |
10 | # string conversion routines
11 | def _str_header(self, name, symbol='`'):
12 | return ['.. rubric:: ' + name, '']
13 |
14 | def _str_field_list(self, name):
15 | return [':' + name + ':']
16 |
17 | def _str_indent(self, doc, indent=4):
18 | out = []
19 | for line in doc:
20 | out += [' '*indent + line]
21 | return out
22 |
23 | def _str_signature(self):
24 | return ['']
25 | if self['Signature']:
26 | return ['``%s``' % self['Signature']] + ['']
27 | else:
28 | return ['']
29 |
30 | def _str_summary(self):
31 | return self['Summary'] + ['']
32 |
33 | def _str_extended_summary(self):
34 | return self['Extended Summary'] + ['']
35 |
36 | def _str_param_list(self, name):
37 | out = []
38 | if self[name]:
39 | out += self._str_field_list(name)
40 | out += ['']
41 | for param,param_type,desc in self[name]:
42 | out += self._str_indent(['**%s** : %s' % (param.strip(),
43 | param_type)])
44 | out += ['']
45 | out += self._str_indent(desc,8)
46 | out += ['']
47 | return out
48 |
49 | @property
50 | def _obj(self):
51 | if hasattr(self, '_cls'):
52 | return self._cls
53 | elif hasattr(self, '_f'):
54 | return self._f
55 | return None
56 |
57 | def _str_member_list(self, name):
58 | """
59 | Generate a member listing, autosummary:: table where possible,
60 | and a table where not.
61 |
62 | """
63 | out = []
64 | if self[name]:
65 | out += ['.. rubric:: %s' % name, '']
66 | prefix = getattr(self, '_name', '')
67 |
68 | if prefix:
69 | prefix = '~%s.' % prefix
70 |
71 | autosum = []
72 | others = []
73 | for param, param_type, desc in self[name]:
74 | param = param.strip()
75 | if not self._obj or hasattr(self._obj, param):
76 | autosum += [" %s%s" % (prefix, param)]
77 | else:
78 | others.append((param, param_type, desc))
79 |
80 | if autosum:
81 | out += ['.. autosummary::', ' :toctree:', '']
82 | out += autosum
83 |
84 | if others:
85 | maxlen_0 = max([len(x[0]) for x in others])
86 | maxlen_1 = max([len(x[1]) for x in others])
87 | hdr = "="*maxlen_0 + " " + "="*maxlen_1 + " " + "="*10
88 | fmt = '%%%ds %%%ds ' % (maxlen_0, maxlen_1)
89 | n_indent = maxlen_0 + maxlen_1 + 4
90 | out += [hdr]
91 | for param, param_type, desc in others:
92 | out += [fmt % (param.strip(), param_type)]
93 | out += self._str_indent(desc, n_indent)
94 | out += [hdr]
95 | out += ['']
96 | return out
97 |
98 | def _str_section(self, name):
99 | out = []
100 | if self[name]:
101 | out += self._str_header(name)
102 | out += ['']
103 | content = textwrap.dedent("\n".join(self[name])).split("\n")
104 | out += content
105 | out += ['']
106 | return out
107 |
108 | def _str_see_also(self, func_role):
109 | out = []
110 | if self['See Also']:
111 | see_also = super(SphinxDocString, self)._str_see_also(func_role)
112 | out = ['.. seealso::', '']
113 | out += self._str_indent(see_also[2:])
114 | return out
115 |
116 | def _str_warnings(self):
117 | out = []
118 | if self['Warnings']:
119 | out = ['.. warning::', '']
120 | out += self._str_indent(self['Warnings'])
121 | return out
122 |
123 | def _str_index(self):
124 | idx = self['index']
125 | out = []
126 | if len(idx) == 0:
127 | return out
128 |
129 | out += ['.. index:: %s' % idx.get('default','')]
130 | for section, references in idx.iteritems():
131 | if section == 'default':
132 | continue
133 | elif section == 'refguide':
134 | out += [' single: %s' % (', '.join(references))]
135 | else:
136 | out += [' %s: %s' % (section, ','.join(references))]
137 | return out
138 |
139 | def _str_references(self):
140 | out = []
141 | if self['References']:
142 | out += self._str_header('References')
143 | if isinstance(self['References'], str):
144 | self['References'] = [self['References']]
145 | out.extend(self['References'])
146 | out += ['']
147 | # Latex collects all references to a separate bibliography,
148 | # so we need to insert links to it
149 | if sphinx.__version__ >= "0.6":
150 | out += ['.. only:: latex','']
151 | else:
152 | out += ['.. latexonly::','']
153 | items = []
154 | for line in self['References']:
155 | m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
156 | if m:
157 | items.append(m.group(1))
158 | out += [' ' + ", ".join(["[%s]_" % item for item in items]), '']
159 | return out
160 |
161 | def _str_examples(self):
162 | examples_str = "\n".join(self['Examples'])
163 |
164 | if (self.use_plots and 'import matplotlib' in examples_str
165 | and 'plot::' not in examples_str):
166 | out = []
167 | out += self._str_header('Examples')
168 | out += ['.. plot::', '']
169 | out += self._str_indent(self['Examples'])
170 | out += ['']
171 | return out
172 | else:
173 | return self._str_section('Examples')
174 |
175 | def __str__(self, indent=0, func_role="obj"):
176 | out = []
177 | out += self._str_signature()
178 | out += self._str_index() + ['']
179 | out += self._str_summary()
180 | out += self._str_extended_summary()
181 | for param_list in ('Parameters', 'Returns', 'Other Parameters',
182 | 'Raises', 'Warns'):
183 | out += self._str_param_list(param_list)
184 | out += self._str_warnings()
185 | out += self._str_see_also(func_role)
186 | out += self._str_section('Notes')
187 | out += self._str_references()
188 | out += self._str_examples()
189 | for param_list in ('Attributes', 'Methods'):
190 | out += self._str_member_list(param_list)
191 | out = self._str_indent(out,indent)
192 | return '\n'.join(out)
193 |
194 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
195 | def __init__(self, obj, doc=None, config={}):
196 | self.use_plots = config.get('use_plots', False)
197 | FunctionDoc.__init__(self, obj, doc=doc, config=config)
198 |
199 | class SphinxClassDoc(SphinxDocString, ClassDoc):
200 | def __init__(self, obj, doc=None, func_doc=None, config={}):
201 | self.use_plots = config.get('use_plots', False)
202 | ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config)
203 |
204 | class SphinxObjDoc(SphinxDocString):
205 | def __init__(self, obj, doc=None, config={}):
206 | self._f = obj
207 | SphinxDocString.__init__(self, doc, config=config)
208 |
209 | def get_doc_object(obj, what=None, doc=None, config={}):
210 | if what is None:
211 | if inspect.isclass(obj):
212 | what = 'class'
213 | elif inspect.ismodule(obj):
214 | what = 'module'
215 | elif callable(obj):
216 | what = 'function'
217 | else:
218 | what = 'object'
219 | if what == 'class':
220 | return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc,
221 | config=config)
222 | elif what in ('function', 'method'):
223 | return SphinxFunctionDoc(obj, doc=doc, config=config)
224 | else:
225 | if doc is None:
226 | doc = pydoc.getdoc(obj)
227 | return SphinxObjDoc(obj, doc, config=config)
228 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/numpydoc.py:
--------------------------------------------------------------------------------
1 | """
2 | ========
3 | numpydoc
4 | ========
5 |
6 | Sphinx extension that handles docstrings in the Numpy standard format. [1]
7 |
8 | It will:
9 |
10 | - Convert Parameters etc. sections to field lists.
11 | - Convert See Also section to a See also entry.
12 | - Renumber references.
13 | - Extract the signature from the docstring, if it can't be determined otherwise.
14 |
15 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
16 |
17 | """
18 |
19 | import os, re, pydoc
20 | from docscrape_sphinx import get_doc_object, SphinxDocString
21 | from sphinx.util.compat import Directive
22 | import inspect
23 |
24 | def mangle_docstrings(app, what, name, obj, options, lines,
25 | reference_offset=[0]):
26 |
27 | cfg = dict(use_plots=app.config.numpydoc_use_plots,
28 | show_class_members=app.config.numpydoc_show_class_members)
29 |
30 | if what == 'module':
31 | # Strip top title
32 | title_re = re.compile(ur'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
33 | re.I|re.S)
34 | lines[:] = title_re.sub(u'', u"\n".join(lines)).split(u"\n")
35 | else:
36 | doc = get_doc_object(obj, what, u"\n".join(lines), config=cfg)
37 | lines[:] = unicode(doc).split(u"\n")
38 |
39 | if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
40 | obj.__name__:
41 | if hasattr(obj, '__module__'):
42 | v = dict(full_name=u"%s.%s" % (obj.__module__, obj.__name__))
43 | else:
44 | v = dict(full_name=obj.__name__)
45 | lines += [u'', u'.. htmlonly::', '']
46 | lines += [u' %s' % x for x in
47 | (app.config.numpydoc_edit_link % v).split("\n")]
48 |
49 | # replace reference numbers so that there are no duplicates
50 | references = []
51 | for line in lines:
52 | line = line.strip()
53 | m = re.match(ur'^.. \[([a-z0-9_.-])\]', line, re.I)
54 | if m:
55 | references.append(m.group(1))
56 |
57 | # start renaming from the longest string, to avoid overwriting parts
58 | references.sort(key=lambda x: -len(x))
59 | if references:
60 | for i, line in enumerate(lines):
61 | for r in references:
62 | if re.match(ur'^\d+$', r):
63 | new_r = u"R%d" % (reference_offset[0] + int(r))
64 | else:
65 | new_r = u"%s%d" % (r, reference_offset[0])
66 | lines[i] = lines[i].replace(u'[%s]_' % r,
67 | u'[%s]_' % new_r)
68 | lines[i] = lines[i].replace(u'.. [%s]' % r,
69 | u'.. [%s]' % new_r)
70 |
71 | reference_offset[0] += len(references)
72 |
73 | def mangle_signature(app, what, name, obj, options, sig, retann):
74 | # Do not try to inspect classes that don't define `__init__`
75 | if (inspect.isclass(obj) and
76 | (not hasattr(obj, '__init__') or
77 | 'initializes x; see ' in pydoc.getdoc(obj.__init__))):
78 | return '', ''
79 |
80 | if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return
81 | if not hasattr(obj, '__doc__'): return
82 |
83 | doc = SphinxDocString(pydoc.getdoc(obj))
84 | if doc['Signature']:
85 | sig = re.sub(u"^[^(]*", u"", doc['Signature'])
86 | return sig, u''
87 |
88 | def setup(app, get_doc_object_=get_doc_object):
89 | global get_doc_object
90 | get_doc_object = get_doc_object_
91 |
92 | app.connect('autodoc-process-docstring', mangle_docstrings)
93 | app.connect('autodoc-process-signature', mangle_signature)
94 | app.add_config_value('numpydoc_edit_link', None, False)
95 | app.add_config_value('numpydoc_use_plots', None, False)
96 | app.add_config_value('numpydoc_show_class_members', True, True)
97 |
98 | # Extra mangling domains
99 | app.add_domain(NumpyPythonDomain)
100 | app.add_domain(NumpyCDomain)
101 |
102 | #------------------------------------------------------------------------------
103 | # Docstring-mangling domains
104 | #------------------------------------------------------------------------------
105 |
106 | from docutils.statemachine import ViewList
107 | from sphinx.domains.c import CDomain
108 | from sphinx.domains.python import PythonDomain
109 |
110 | class ManglingDomainBase(object):
111 | directive_mangling_map = {}
112 |
113 | def __init__(self, *a, **kw):
114 | super(ManglingDomainBase, self).__init__(*a, **kw)
115 | self.wrap_mangling_directives()
116 |
117 | def wrap_mangling_directives(self):
118 | for name, objtype in self.directive_mangling_map.items():
119 | self.directives[name] = wrap_mangling_directive(
120 | self.directives[name], objtype)
121 |
122 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
123 | name = 'np'
124 | directive_mangling_map = {
125 | 'function': 'function',
126 | 'class': 'class',
127 | 'exception': 'class',
128 | 'method': 'function',
129 | 'classmethod': 'function',
130 | 'staticmethod': 'function',
131 | 'attribute': 'attribute',
132 | }
133 |
134 | class NumpyCDomain(ManglingDomainBase, CDomain):
135 | name = 'np-c'
136 | directive_mangling_map = {
137 | 'function': 'function',
138 | 'member': 'attribute',
139 | 'macro': 'function',
140 | 'type': 'class',
141 | 'var': 'object',
142 | }
143 |
144 | def wrap_mangling_directive(base_directive, objtype):
145 | class directive(base_directive):
146 | def run(self):
147 | env = self.state.document.settings.env
148 |
149 | name = None
150 | if self.arguments:
151 | m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0])
152 | name = m.group(2).strip()
153 |
154 | if not name:
155 | name = self.arguments[0]
156 |
157 | lines = list(self.content)
158 | mangle_docstrings(env.app, objtype, name, None, None, lines)
159 | self.content = ViewList(lines, self.content.parent)
160 |
161 | return base_directive.run(self)
162 |
163 | return directive
164 |
165 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/phantom_import.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============
3 | phantom_import
4 | ==============
5 |
6 | Sphinx extension to make directives from ``sphinx.ext.autodoc`` and similar
7 | extensions to use docstrings loaded from an XML file.
8 |
9 | This extension loads an XML file in the Pydocweb format [1] and
10 | creates a dummy module that contains the specified docstrings. This
11 | can be used to get the current docstrings from a Pydocweb instance
12 | without needing to rebuild the documented module.
13 |
14 | .. [1] http://code.google.com/p/pydocweb
15 |
16 | """
17 | import imp, sys, compiler, types, os, inspect, re
18 |
19 | def setup(app):
20 | app.connect('builder-inited', initialize)
21 | app.add_config_value('phantom_import_file', None, True)
22 |
23 | def initialize(app):
24 | fn = app.config.phantom_import_file
25 | if (fn and os.path.isfile(fn)):
26 | print "[numpydoc] Phantom importing modules from", fn, "..."
27 | import_phantom_module(fn)
28 |
29 | #------------------------------------------------------------------------------
30 | # Creating 'phantom' modules from an XML description
31 | #------------------------------------------------------------------------------
32 | def import_phantom_module(xml_file):
33 | """
34 | Insert a fake Python module to sys.modules, based on a XML file.
35 |
36 | The XML file is expected to conform to Pydocweb DTD. The fake
37 | module will contain dummy objects, which guarantee the following:
38 |
39 | - Docstrings are correct.
40 | - Class inheritance relationships are correct (if present in XML).
41 | - Function argspec is *NOT* correct (even if present in XML).
42 | Instead, the function signature is prepended to the function docstring.
43 | - Class attributes are *NOT* correct; instead, they are dummy objects.
44 |
45 | Parameters
46 | ----------
47 | xml_file : str
48 | Name of an XML file to read
49 |
50 | """
51 | import lxml.etree as etree
52 |
53 | object_cache = {}
54 |
55 | tree = etree.parse(xml_file)
56 | root = tree.getroot()
57 |
58 | # Sort items so that
59 | # - Base classes come before classes inherited from them
60 | # - Modules come before their contents
61 | all_nodes = dict([(n.attrib['id'], n) for n in root])
62 |
63 | def _get_bases(node, recurse=False):
64 | bases = [x.attrib['ref'] for x in node.findall('base')]
65 | if recurse:
66 | j = 0
67 | while True:
68 | try:
69 | b = bases[j]
70 | except IndexError: break
71 | if b in all_nodes:
72 | bases.extend(_get_bases(all_nodes[b]))
73 | j += 1
74 | return bases
75 |
76 | type_index = ['module', 'class', 'callable', 'object']
77 |
78 | def base_cmp(a, b):
79 | x = cmp(type_index.index(a.tag), type_index.index(b.tag))
80 | if x != 0: return x
81 |
82 | if a.tag == 'class' and b.tag == 'class':
83 | a_bases = _get_bases(a, recurse=True)
84 | b_bases = _get_bases(b, recurse=True)
85 | x = cmp(len(a_bases), len(b_bases))
86 | if x != 0: return x
87 | if a.attrib['id'] in b_bases: return -1
88 | if b.attrib['id'] in a_bases: return 1
89 |
90 | return cmp(a.attrib['id'].count('.'), b.attrib['id'].count('.'))
91 |
92 | nodes = root.getchildren()
93 | nodes.sort(base_cmp)
94 |
95 | # Create phantom items
96 | for node in nodes:
97 | name = node.attrib['id']
98 | doc = (node.text or '').decode('string-escape') + "\n"
99 | if doc == "\n": doc = ""
100 |
101 | # create parent, if missing
102 | parent = name
103 | while True:
104 | parent = '.'.join(parent.split('.')[:-1])
105 | if not parent: break
106 | if parent in object_cache: break
107 | obj = imp.new_module(parent)
108 | object_cache[parent] = obj
109 | sys.modules[parent] = obj
110 |
111 | # create object
112 | if node.tag == 'module':
113 | obj = imp.new_module(name)
114 | obj.__doc__ = doc
115 | sys.modules[name] = obj
116 | elif node.tag == 'class':
117 | bases = [object_cache[b] for b in _get_bases(node)
118 | if b in object_cache]
119 | bases.append(object)
120 | init = lambda self: None
121 | init.__doc__ = doc
122 | obj = type(name, tuple(bases), {'__doc__': doc, '__init__': init})
123 | obj.__name__ = name.split('.')[-1]
124 | elif node.tag == 'callable':
125 | funcname = node.attrib['id'].split('.')[-1]
126 | argspec = node.attrib.get('argspec')
127 | if argspec:
128 | argspec = re.sub('^[^(]*', '', argspec)
129 | doc = "%s%s\n\n%s" % (funcname, argspec, doc)
130 | obj = lambda: 0
131 | obj.__argspec_is_invalid_ = True
132 | obj.func_name = funcname
133 | obj.__name__ = name
134 | obj.__doc__ = doc
135 | if inspect.isclass(object_cache[parent]):
136 | obj.__objclass__ = object_cache[parent]
137 | else:
138 | class Dummy(object): pass
139 | obj = Dummy()
140 | obj.__name__ = name
141 | obj.__doc__ = doc
142 | if inspect.isclass(object_cache[parent]):
143 | obj.__get__ = lambda: None
144 | object_cache[name] = obj
145 |
146 | if parent:
147 | if inspect.ismodule(object_cache[parent]):
148 | obj.__module__ = parent
149 | setattr(object_cache[parent], name.split('.')[-1], obj)
150 |
151 | # Populate items
152 | for node in root:
153 | obj = object_cache.get(node.attrib['id'])
154 | if obj is None: continue
155 | for ref in node.findall('ref'):
156 | if node.tag == 'class':
157 | if ref.attrib['ref'].startswith(node.attrib['id'] + '.'):
158 | setattr(obj, ref.attrib['name'],
159 | object_cache.get(ref.attrib['ref']))
160 | else:
161 | setattr(obj, ref.attrib['name'],
162 | object_cache.get(ref.attrib['ref']))
163 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/setup.cfg:
--------------------------------------------------------------------------------
1 | [egg_info]
2 | tag_build =
3 | tag_date = 0
4 | tag_svn_revision = 0
5 |
6 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | import setuptools
3 | import sys, os
4 |
5 | version = "0.4"
6 |
7 | setup(
8 | name="numpydoc",
9 | packages=["numpydoc"],
10 | package_dir={"numpydoc": ""},
11 | version=version,
12 | description="Sphinx extension to support docstrings in Numpy format",
13 | # classifiers from http://pypi.python.org/pypi?%3Aaction=list_classifiers
14 | classifiers=["Development Status :: 3 - Alpha",
15 | "Environment :: Plugins",
16 | "License :: OSI Approved :: BSD License",
17 | "Topic :: Documentation"],
18 | keywords="sphinx numpy",
19 | author="Pauli Virtanen and others",
20 | author_email="pav@iki.fi",
21 | url="http://github.com/numpy/numpy/tree/master/doc/sphinxext",
22 | license="BSD",
23 | zip_safe=False,
24 | install_requires=["Sphinx >= 1.0.1"],
25 | package_data={'numpydoc': 'tests', '': ''},
26 | entry_points={
27 | "console_scripts": [
28 | "autosummary_generate = numpydoc.autosummary_generate:main",
29 | ],
30 | },
31 | )
32 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/numpydoc/traitsdoc.py:
--------------------------------------------------------------------------------
1 | """
2 | =========
3 | traitsdoc
4 | =========
5 |
6 | Sphinx extension that handles docstrings in the Numpy standard format, [1]
7 | and support Traits [2].
8 |
9 | This extension can be used as a replacement for ``numpydoc`` when support
10 | for Traits is required.
11 |
12 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
13 | .. [2] http://code.enthought.com/projects/traits/
14 |
15 | """
16 |
17 | import inspect
18 | import os
19 | import pydoc
20 |
21 | import docscrape
22 | import docscrape_sphinx
23 | from docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString
24 |
25 | import numpydoc
26 |
27 | import comment_eater
28 |
29 | class SphinxTraitsDoc(SphinxClassDoc):
30 | def __init__(self, cls, modulename='', func_doc=SphinxFunctionDoc):
31 | if not inspect.isclass(cls):
32 | raise ValueError("Initialise using a class. Got %r" % cls)
33 | self._cls = cls
34 |
35 | if modulename and not modulename.endswith('.'):
36 | modulename += '.'
37 | self._mod = modulename
38 | self._name = cls.__name__
39 | self._func_doc = func_doc
40 |
41 | docstring = pydoc.getdoc(cls)
42 | docstring = docstring.split('\n')
43 |
44 | # De-indent paragraph
45 | try:
46 | indent = min(len(s) - len(s.lstrip()) for s in docstring
47 | if s.strip())
48 | except ValueError:
49 | indent = 0
50 |
51 | for n,line in enumerate(docstring):
52 | docstring[n] = docstring[n][indent:]
53 |
54 | self._doc = docscrape.Reader(docstring)
55 | self._parsed_data = {
56 | 'Signature': '',
57 | 'Summary': '',
58 | 'Description': [],
59 | 'Extended Summary': [],
60 | 'Parameters': [],
61 | 'Returns': [],
62 | 'Raises': [],
63 | 'Warns': [],
64 | 'Other Parameters': [],
65 | 'Traits': [],
66 | 'Methods': [],
67 | 'See Also': [],
68 | 'Notes': [],
69 | 'References': '',
70 | 'Example': '',
71 | 'Examples': '',
72 | 'index': {}
73 | }
74 |
75 | self._parse()
76 |
77 | def _str_summary(self):
78 | return self['Summary'] + ['']
79 |
80 | def _str_extended_summary(self):
81 | return self['Description'] + self['Extended Summary'] + ['']
82 |
83 | def __str__(self, indent=0, func_role="func"):
84 | out = []
85 | out += self._str_signature()
86 | out += self._str_index() + ['']
87 | out += self._str_summary()
88 | out += self._str_extended_summary()
89 | for param_list in ('Parameters', 'Traits', 'Methods',
90 | 'Returns','Raises'):
91 | out += self._str_param_list(param_list)
92 | out += self._str_see_also("obj")
93 | out += self._str_section('Notes')
94 | out += self._str_references()
95 | out += self._str_section('Example')
96 | out += self._str_section('Examples')
97 | out = self._str_indent(out,indent)
98 | return '\n'.join(out)
99 |
100 | def looks_like_issubclass(obj, classname):
101 | """ Return True if the object has a class or superclass with the given class
102 | name.
103 |
104 | Ignores old-style classes.
105 | """
106 | t = obj
107 | if t.__name__ == classname:
108 | return True
109 | for klass in t.__mro__:
110 | if klass.__name__ == classname:
111 | return True
112 | return False
113 |
114 | def get_doc_object(obj, what=None, config=None):
115 | if what is None:
116 | if inspect.isclass(obj):
117 | what = 'class'
118 | elif inspect.ismodule(obj):
119 | what = 'module'
120 | elif callable(obj):
121 | what = 'function'
122 | else:
123 | what = 'object'
124 | if what == 'class':
125 | doc = SphinxTraitsDoc(obj, '', func_doc=SphinxFunctionDoc, config=config)
126 | if looks_like_issubclass(obj, 'HasTraits'):
127 | for name, trait, comment in comment_eater.get_class_traits(obj):
128 | # Exclude private traits.
129 | if not name.startswith('_'):
130 | doc['Traits'].append((name, trait, comment.splitlines()))
131 | return doc
132 | elif what in ('function', 'method'):
133 | return SphinxFunctionDoc(obj, '', config=config)
134 | else:
135 | return SphinxDocString(pydoc.getdoc(obj), config=config)
136 |
137 | def setup(app):
138 | # init numpydoc
139 | numpydoc.setup(app, get_doc_object)
140 |
141 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/sphinxcontrib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | sphinxcontrib
4 | ~~~~~~~~~~~~~
5 |
6 | This package is a namespace package that contains all extensions
7 | distributed in the ``sphinx-contrib`` distribution.
8 |
9 | :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
10 | :license: BSD, see LICENSE for details.
11 | """
12 |
13 | __import__('pkg_resources').declare_namespace(__name__)
14 |
15 |
--------------------------------------------------------------------------------
/doc/source/sphinxext/sphinxcontrib/programoutput.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2010, 2011, Sebastian Wiesner
3 | # All rights reserved.
4 |
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 |
8 | # 1. Redistributions of source code must retain the above copyright notice,
9 | # this list of conditions and the following disclaimer.
10 | # 2. Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 |
14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 |
26 |
27 | """
28 | sphinxcontrib.programoutput
29 | ===========================
30 |
31 | This extension provides a directive to include the output of commands as
32 | literal block while building the docs.
33 |
34 | .. moduleauthor:: Sebastian Wiesner
35 | """
36 |
37 | from __future__ import (print_function, division, unicode_literals,
38 | absolute_import)
39 |
40 | import sys
41 | import shlex
42 | from subprocess import Popen, PIPE, STDOUT
43 | from collections import defaultdict, namedtuple
44 |
45 | from docutils import nodes
46 | from docutils.parsers import rst
47 | from docutils.parsers.rst.directives import flag, unchanged, nonnegative_int
48 |
49 |
50 | __version__ = '0.5'
51 |
52 |
53 | class program_output(nodes.Element):
54 | pass
55 |
56 |
57 | def _slice(value):
58 | parts = [int(v.strip()) for v in value.split(',')]
59 | if len(parts) > 2:
60 | raise ValueError('too many slice parts')
61 | return tuple((parts + [None]*2)[:2])
62 |
63 |
64 | class ProgramOutputDirective(rst.Directive):
65 | has_content = False
66 | final_argument_whitespace = True
67 | required_arguments = 1
68 |
69 | option_spec = dict(shell=flag, prompt=flag, nostderr=flag,
70 | ellipsis=_slice, extraargs=unchanged,
71 | returncode=nonnegative_int)
72 |
73 | def run(self):
74 | node = program_output()
75 | node.line = self.lineno
76 | node['command'] = self.arguments[0]
77 |
78 | if self.name == 'command-output':
79 | node['show_prompt'] = True
80 | else:
81 | node['show_prompt'] = 'prompt' in self.options
82 |
83 | node['hide_standard_error'] = 'nostderr' in self.options
84 | node['extraargs'] = self.options.get('extraargs', '')
85 | node['use_shell'] = 'shell' in self.options
86 | node['returncode'] = self.options.get('returncode', 0)
87 | if 'ellipsis' in self.options:
88 | node['strip_lines'] = self.options['ellipsis']
89 | return [node]
90 |
91 |
92 | _Command = namedtuple('Command', 'command shell hide_standard_error')
93 |
94 |
95 | class Command(_Command): #pylint: disable=W0232
96 | """
97 | A command to be executed.
98 | """
99 |
100 | def __new__(cls, command, shell=False, hide_standard_error=False):
101 | if isinstance(command, list):
102 | command = tuple(command)
103 | return _Command.__new__(cls, command, shell, hide_standard_error)
104 |
105 | @classmethod
106 | def from_program_output_node(cls, node):
107 | """
108 | Create a command from a :class:`program_output` node.
109 | """
110 | extraargs = node.get('extraargs', '')
111 | command = (node['command'] + ' ' + extraargs).strip()
112 | return cls(command, node['use_shell'], node['hide_standard_error'])
113 |
114 | def execute(self):
115 | """
116 | Execute this command.
117 |
118 | Return the :class:`~subprocess.Popen` object representing the running
119 | command.
120 | """
121 | # pylint: disable=E1101
122 | if isinstance(self.command, unicode):
123 | command = self.command.encode(sys.getfilesystemencoding())
124 | else:
125 | command = self.command
126 | if isinstance(command, basestring) and not self.shell:
127 | command = shlex.split(command)
128 | return Popen(command, shell=self.shell, stdout=PIPE,
129 | stderr=PIPE if self.hide_standard_error else STDOUT)
130 |
131 | def get_output(self):
132 | """
133 | Get the output of this command.
134 |
135 | Return a tuple ``(returncode, output)``. ``returncode`` is the
136 | integral return code of the process, ``output`` is the output as
137 | unicode string, with final trailing spaces and new lines stripped.
138 | """
139 | process = self.execute()
140 | output = process.communicate()[0].decode(
141 | sys.getfilesystemencoding()).rstrip()
142 | return process.returncode, output
143 |
144 | def __str__(self):
145 | # pylint: disable=E1101
146 | if isinstance(self.command, tuple):
147 | return repr(list(self.command))
148 | return repr(self.command)
149 |
150 |
151 | class ProgramOutputCache(defaultdict): # pylint: disable=W0232
152 | """
153 | Execute command and cache their output.
154 |
155 | This class is a mapping. Its keys are :class:`Command` objects represeting
156 | command invocations. Its values are tuples of the form ``(returncode,
157 | output)``, where ``returncode`` is the integral return code of the command,
158 | and ``output`` is the output as unicode string.
159 |
160 | The first time, a key is retrieved from this object, the command is
161 | invoked, and its result is cached. Subsequent access to the same key
162 | returns the cached value.
163 | """
164 |
165 | def __missing__(self, command):
166 | """
167 | Called, if a command was not found in the cache.
168 |
169 | ``command`` is an instance of :class:`Command`.
170 | """
171 | result = command.get_output()
172 | self[command] = result
173 | return result
174 |
175 |
176 | def run_programs(app, doctree):
177 | """
178 | Execute all programs represented by ``program_output`` nodes in
179 | ``doctree``. Each ``program_output`` node in ``doctree`` is then
180 | replaced with a node, that represents the output of this program.
181 |
182 | The program output is retrieved from the cache in
183 | ``app.env.programoutput_cache``.
184 | """
185 | if app.config.programoutput_use_ansi:
186 | # enable ANSI support, if requested by config
187 | from sphinxcontrib.ansi import ansi_literal_block
188 | node_class = ansi_literal_block
189 | else:
190 | node_class = nodes.literal_block
191 |
192 | cache = app.env.programoutput_cache
193 |
194 | for node in doctree.traverse(program_output):
195 | command = Command.from_program_output_node(node)
196 | try:
197 | returncode, output = cache[command]
198 | except EnvironmentError as error:
199 | error_message = 'Command {0} failed: {1}'.format(command, error)
200 | error_node = doctree.reporter.error(error_message, base_node=node)
201 | node.replace_self(error_node)
202 | else:
203 | if returncode != node['returncode']:
204 | app.warn('Unexpected return code {0} from command {1}'.format(
205 | returncode, command))
206 |
207 | # replace lines with ..., if ellipsis is specified
208 | if 'strip_lines' in node:
209 | lines = output.splitlines()
210 | start, stop = node['strip_lines']
211 | lines[start:stop] = ['...']
212 | output = '\n'.join(lines)
213 |
214 | if node['show_prompt']:
215 | tmpl = app.config.programoutput_prompt_template
216 | output = tmpl.format(command=node['command'], output=output,
217 | returncode=returncode)
218 |
219 | new_node = node_class(output, output)
220 | new_node['language'] = 'text'
221 | node.replace_self(new_node)
222 |
223 |
224 | def init_cache(app):
225 | """
226 | Initialize the cache for program output at
227 | ``app.env.programoutput_cache``, if not already present (e.g. being
228 | loaded from a pickled environment).
229 |
230 | The cache is of type :class:`ProgramOutputCache`.
231 | """
232 | if not hasattr(app.env, 'programoutput_cache'):
233 | app.env.programoutput_cache = ProgramOutputCache()
234 |
235 |
236 | def setup(app):
237 | app.add_config_value('programoutput_use_ansi', False, 'env')
238 | app.add_config_value('programoutput_prompt_template',
239 | '$ {command}\n{output}', 'env')
240 | app.add_directive('program-output', ProgramOutputDirective)
241 | app.add_directive('command-output', ProgramOutputDirective)
242 | app.connect(b'builder-inited', init_cache)
243 | app.connect(b'doctree-read', run_programs)
244 |
--------------------------------------------------------------------------------
/doc/source/tutorial.rst:
--------------------------------------------------------------------------------
1 | .. _tutorial:
2 |
3 | Quick start tutorial
4 | ====================
5 | ADENINE may be installed using standard Python tools (with
6 | administrative or sudo permissions on GNU-Linux platforms)::
7 |
8 | $ pip install adenine
9 |
10 | or
11 |
12 | $ easy_install adenine
13 |
14 | Installation from sources
15 | -------------------------
16 | If you like to manually install ADENINE, download the .zip or .tar.gz archive
17 | from ``_. Then extract it and move into the root directory::
18 |
19 | $ unzip slipguru-adenine-|release|.zip
20 | $ cd adenine-|release|/
21 |
22 | or::
23 |
24 | $ tar xvf slipguru-adenine-|release|.tar.gz
25 | $ cd adenine-|release|/
26 |
27 | Otherwise you can clone our `GitHub repository `_::
28 |
29 | $ git clone https://github.com/slipguru/adenine.git
30 |
31 | From here, you can follow the standard Python installation step::
32 |
33 | $ python setup.py install
34 |
35 | After ADENINE installation, you should have access to two scripts,
36 | named with a common ``ade_`` prefix::
37 |
38 | $ ade_
39 | ade_analysis.py ade_run.py
40 |
41 | This tutorial assumes that you downloaded and extracted ADENINE
42 | source package which contains a ``examples\data`` directory with some data files (``.npy`` or ``.csv``) which will be used to show ADENINE functionalities.
43 |
44 | ADENINE needs only 3 ingredients:
45 |
46 | * ``n_samples x n_variables`` input matrix
47 | * ``n_samples x 1`` output vector (optional)
48 | * ``configuration`` file
49 |
50 |
51 | Input data format
52 | -----------------
53 | Input data are assumed to be:
54 |
55 | * ``numpy`` array stored in ``.npy`` files organized with a row for each sample and a column for each feature,
56 | * tabular data stored in comma separated ``.csv`` files presenting the variables header on the first row and the sample indexes on the first column,
57 | * toy examples available from ``adenine.utils.data_source`` function.
58 |
59 | .. _configuration:
60 |
61 | Configuration File
62 | ------------------
63 | ADENINE configuration file is a standard Python script. It is
64 | imported as a module, then all the code is executed. In this file the user can define all the option needed to read the data and to create the pipelines.
65 |
66 | .. literalinclude:: ../../adenine/ade_config.py
67 | :language: python
68 |
69 | .. _experiment:
70 |
71 | Experiment runner
72 | -----------------
73 | The ``ade_run.py`` script, executes the full ADENINE framework. The prototype is the following::
74 |
75 | $ ade_run.py ade_config.py
76 |
77 | When launched, the script reads the data, then it creates and runs each pipeline saving the results in a tree-like structure which has the current folder as root.
78 |
79 | .. _analysis:
80 |
81 | Results analysis
82 | ----------------
83 | The ``ade_analysis.py`` script provides useful summaries and graphs from the results of the experiment. This script accepts as only parameter a result directory
84 | already created::
85 |
86 | $ ade_analysis.py result-dir
87 |
88 | The script produces a set of textual and graphical results. An output example obtained by one of the implemented pipelines is represented below.
89 |
90 | .. image:: pca.png
91 | :scale: 80 %
92 | :alt: broken link
93 |
94 | .. image:: kpca.png
95 | :scale: 80 %
96 | :alt: broken link
97 |
98 | You can reproduce the example above specifying ``data_source.load('circles')`` in the configuration file.
99 |
100 | Example dataset
101 | ----------------
102 | An example dataset can be dowloaded :download:`here `. The dataset is a random extraction of 801 samples (with dimension 20531) measuring RNA-Seq gene expression of patients affected by 5 different types of tumor: breast invasive carcinoma (BRCA), kidney renal clear cell carcinoma (KIRC), colon (COAD), lung (LUAD) and prostate adenocarcinoma (PRAD). The full dataset is maintained by The Cancer Genome Atlas Pan-Cancer Project [1] and we refer to the `original repository `_ for furher details.
103 |
104 | Reference
105 | ----------------
106 | [1] Weinstein, John N., et al. "The cancer genome atlas pan-cancer analysis project." Nature genetics 45.10 (2013): 1113-1120.
107 |
--------------------------------------------------------------------------------
/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slipguru/adenine/cd0f65512cc4f66007a057e35619d124f6474389/icon.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cycler==0.10.0
2 | functools32==3.2.3.post2
3 | matplotlib==2.0.0
4 | numpy==1.12.0
5 | pandas==0.19.2
6 | pydot==p
7 | pyparsing==2.1.4
8 | python-dateutil==2.6.0
9 | pytz==2016.10
10 | scikit-learn==0.18.1
11 | scipy==0.18.1
12 | seaborn==0.7.1
13 | six==1.10.0
14 | subprocess32==3.2.7
15 | GEOparse==0.1.10
16 | fastcluster==1.1.20
17 |
--------------------------------------------------------------------------------
/scripts/ade_GEO2csv.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | import argparse
11 | import pandas as pd
12 |
13 | from adenine.utils import GEO2csv
14 | from adenine import __version__
15 |
16 |
17 | def main():
18 | """Adenine GEO2csv main script."""
19 | parser = argparse.ArgumentParser(description='Adenine script for '
20 | 'GEO2csv conversion.')
21 | parser.add_argument('--version', action='version',
22 | version='%(prog)s v' + __version__)
23 | parser.add_argument('accession_number', help='GEO DataSets Accession number')
24 | parser.add_argument('--label_field', dest='pheno_name',
25 | default='title', help='The field in which '
26 | 'phenotypes information are stored.')
27 | parser.add_argument('--phenotypes', '--pheno', dest='pheno',
28 | action='store', default=None,
29 | help='Select samples by their phenotypes ('
30 | 'comma separated) e.g.: Severe,Mild,Control,...')
31 | parser.add_argument('--gene_symbol', action='store_true', dest='gs',
32 | help='Use this option to convert the platform IDs '
33 | 'to gene symbols')
34 | parser.add_argument('--signature', dest='signature',
35 | default=None, help='Generate a data matrix comprising '
36 | 'only the genes in the signature.')
37 | args = parser.parse_args()
38 |
39 | # Get the data
40 | try:
41 | if args.gs or (args.signature is not None):
42 | data, gse = GEO2csv.get_GEO(args.accession_number, args.pheno_name, True)
43 | else:
44 | data = GEO2csv.get_GEO(args.accession_number, args.pheno_name)[0]
45 | print('* GEO dataset {} loaded'.format(args.accession_number))
46 |
47 | # Filter samples per phenotype
48 | if args.pheno is not None:
49 | data = GEO2csv.GEO_select_samples(
50 | data.data, data.target, selected_labels=args.pheno.split(','),
51 | index=data.index, feature_names=data.feature_names)
52 | print('* Phenotypes {}'.format(args.pheno))
53 |
54 | if args.gs or (args.signature is not None):
55 | data = GEO2csv.id2gs(data, gse)
56 | print('* Probe ID converted to gene symbols')
57 |
58 | if args.signature is not None:
59 | data = GEO2csv.restrict_to_signature(data, args.signature.split(','))
60 | print('* Dataset restricted to {}'.format(data.feature_names))
61 |
62 | # Save dataset
63 | pd.DataFrame(data=data.data, columns=data.feature_names,
64 | index=data.index).to_csv('{}_data.csv'.format(args.accession_number))
65 | print('* {}_data.csv created: {} samples x {} features'.format(args.accession_number,
66 | *data.data.shape))
67 | pd.DataFrame(data=data.target, columns=['Phenotype'],
68 | index=data.index).to_csv('{}_labels.csv'.format(args.accession_number))
69 | print('* {}_labels.csv created: {} samples'.format(args.accession_number,
70 | len(data.target)))
71 |
72 | except Exception as e:
73 | print('Raised {}'.format(e))
74 | raise ValueError('Cannot parse {}. Check '
75 | 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}'
76 | ' for more info on the GEO series'.format(args.accession_number,
77 | args.accession_number))
78 |
79 |
80 | if __name__ == '__main__':
81 | main()
82 |
--------------------------------------------------------------------------------
/scripts/ade_analysis.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Adenine analysis script."""
3 | ######################################################################
4 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
5 | #
6 | # FreeBSD License
7 | ######################################################################
8 |
9 | from __future__ import print_function
10 |
11 | import imp
12 | import sys
13 | import os
14 | import time
15 | import logging
16 | import argparse
17 | import gzip
18 | import numpy as np
19 | try:
20 | import cPickle as pkl
21 | except:
22 | import pickle as pkl
23 |
24 | from adenine.core import analyze_results
25 | from adenine.utils import extra
26 |
27 |
28 | def init_main():
29 | """Init analysis main."""
30 | from adenine import __version__
31 | parser = argparse.ArgumentParser(description='Adenine script for '
32 | 'analysing pipelines.')
33 | parser.add_argument('--version', action='version',
34 | version='%(prog)s v' + __version__)
35 | parser.add_argument("result_folder", help="specify results directory")
36 | args = parser.parse_args()
37 |
38 | root_folder = args.result_folder
39 | filename = [f for f in os.listdir(root_folder)
40 | if os.path.isfile(os.path.join(root_folder, f)) and
41 | '.pkl' in f and f != "__data.pkl"]
42 | if not filename:
43 | sys.stderr.write("No .pkl file found in {}. Aborting...\n"
44 | .format(root_folder))
45 | sys.exit(-1)
46 |
47 | # Run analysis
48 | # print("Starting the analysis of {}".format(filename))
49 | main(os.path.join(os.path.abspath(root_folder), filename[0]))
50 |
51 |
52 | def main(dumpfile):
53 | """Analyze the pipelines."""
54 | # Load the configuration file
55 | config_path = os.path.dirname(dumpfile)
56 | config_path = os.path.join(os.path.abspath(config_path), 'ade_config.py')
57 | config = imp.load_source('ade_config', config_path)
58 | extra.set_module_defaults(config, {'file_format': 'pdf',
59 | 'plotting_context': 'paper',
60 | 'verbose': False})
61 | if hasattr(config, 'use_compression'):
62 | use_compression = config.use_compression
63 | else:
64 | use_compression = False
65 |
66 | # Load the results used with ade_run.py
67 | try:
68 | if use_compression:
69 | with gzip.open(os.path.join(os.path.dirname(dumpfile),
70 | '__data.pkl.tz'), 'r') as fdata:
71 | data_X_y_index = pkl.load(fdata)
72 | data = data_X_y_index['X']
73 | labels = data_X_y_index['y']
74 | index = data_X_y_index['index']
75 | else:
76 | with open(os.path.join(os.path.dirname(dumpfile),
77 | '__data.pkl'), 'r') as fdata:
78 | data_X_y_index = pkl.load(fdata)
79 | data = data_X_y_index['X']
80 | labels = data_X_y_index['y']
81 | index = data_X_y_index['index']
82 | except IOError:
83 | if use_compression:
84 | data_filename = '__data.pkl.tz'
85 | else:
86 | data_filename = '__data.pkl'
87 |
88 | sys.stderr.write("Cannot load {} Reloading data from "
89 | "config file ...".format(data_filename))
90 | data = config.X
91 | labels = config.y
92 | index = config.index if hasattr(config, 'index') \
93 | else np.arange(data.shape[0])
94 |
95 | # Read the feature names from the config file
96 | feat_names = config.feat_names if hasattr(config, 'feat_names') \
97 | else np.arange(data.shape[1])
98 | # Initialize the log file
99 | filename = 'results_' + os.path.basename(dumpfile)[0:-7]
100 | logfile = os.path.join(os.path.dirname(dumpfile), filename + '.log')
101 | logging.basicConfig(filename=logfile, level=logging.INFO, filemode='w',
102 | format='%(levelname)s (%(name)s): %(message)s')
103 | root_logger = logging.getLogger()
104 | lsh = logging.StreamHandler()
105 | lsh.setLevel(20 if config.verbose else logging.ERROR)
106 | lsh.setFormatter(
107 | logging.Formatter('%(levelname)s (%(name)s): %(message)s'))
108 | root_logger.addHandler(lsh)
109 |
110 | tic = time.time()
111 | print("\nUnpickling output ...", end=' ')
112 | # Load the results
113 | if use_compression:
114 | with gzip.open(dumpfile, 'r') as fres:
115 | res = pkl.load(fres)
116 | else:
117 | with open(dumpfile, 'r') as fres:
118 | res = pkl.load(fres)
119 |
120 | print("done: {} s".format(extra.sec_to_time(time.time() - tic)))
121 |
122 | # Analyze the pipelines
123 | analyze_results.analyze(input_dict=res, root=os.path.dirname(dumpfile),
124 | y=labels, feat_names=feat_names, index=index,
125 | plotting_context=config.plotting_context,
126 | file_format=config.file_format)
127 |
128 | root_logger.handlers[0].close()
129 |
130 |
131 | if __name__ == '__main__':
132 | init_main()
133 |
--------------------------------------------------------------------------------
/scripts/ade_run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | ######################################################################
5 | # Copyright (C) 2016 Samuele Fiorini, Federico Tomasi, Annalisa Barla
6 | #
7 | # FreeBSD License
8 | ######################################################################
9 |
10 | import os
11 | import shutil
12 | import argparse
13 |
14 | from adenine import main
15 |
16 |
17 | def init_main():
18 | """Initialize main for ade_run.py."""
19 | from adenine import __version__
20 | parser = argparse.ArgumentParser(description='Adenine script for '
21 | 'pipeline generation.')
22 | parser.add_argument('--version', action='version',
23 | version='%(prog)s v' + __version__)
24 | parser.add_argument("-c", "--create", dest="create", action="store_true",
25 | help="create config file", default=False)
26 | parser.add_argument("configuration_file", help="specify config file",
27 | default='ade_config.py')
28 | args = parser.parse_args()
29 |
30 | if args.create:
31 | import adenine as ade
32 | std_config_path = os.path.join(ade.__path__[0], 'ade_config.py')
33 | # Check for .pyc
34 | if std_config_path.endswith('.pyc'):
35 | std_config_path = std_config_path[:-1]
36 | # Check if the file already exists
37 | if os.path.exists(args.configuration_file):
38 | parser.error("adenine configuration file already exists")
39 | # Copy the config file
40 | shutil.copy(std_config_path, args.configuration_file)
41 | else:
42 | main(args.configuration_file)
43 |
44 |
45 | if __name__ == '__main__':
46 | init_main()
47 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """adenine setup script."""
3 |
4 | from setuptools import setup
5 |
6 | # Package Version
7 | from adenine import __version__ as version
8 |
9 | setup(
10 | name='adenine',
11 | version=version,
12 |
13 | description=('A Data ExploratioN pIpeliNE'),
14 | long_description=open('README.md').read(),
15 | author='Samuele Fiorini, Federico Tomasi',
16 | author_email='{samuele.fiorini, federico.tomasi}@dibris.unige.it',
17 | maintainer='Samuele Fiorini, Federico Tomasi',
18 | maintainer_email='{samuele.fiorini, federico.tomasi}@dibris.unige.it',
19 | url='https://github.com/slipguru/adenine',
20 | download_url='https://github.com/slipguru/adenine/tarball/'+version,
21 | classifiers=[
22 | 'Development Status :: 4 - Beta',
23 | 'Environment :: Console',
24 | 'Intended Audience :: Science/Research',
25 | 'Intended Audience :: Developers',
26 | 'Programming Language :: Python',
27 | 'License :: OSI Approved :: BSD License',
28 | 'Topic :: Software Development',
29 | 'Topic :: Scientific/Engineering :: Bio-Informatics',
30 | 'Operating System :: POSIX',
31 | 'Operating System :: Unix',
32 | 'Operating System :: MacOS'
33 | ],
34 | license='FreeBSD',
35 |
36 | packages=['adenine', 'adenine.core', 'adenine.utils', 'adenine.externals'],
37 | install_requires=['numpy (>=1.10.1)',
38 | 'scipy (>=0.16.1)',
39 | 'scikit-learn (>=0.18)',
40 | 'matplotlib (>=1.5.1)',
41 | 'seaborn (>=0.7.0)',
42 | # 'joblib',
43 | 'fastcluster (>=1.1.20)',
44 | 'GEOparse (>=0.1.10)',
45 | 'pydot (>=1.2.3)'],
46 | scripts=['scripts/ade_run.py', 'scripts/ade_analysis.py',
47 | 'scripts/ade_GEO2csv.py'],
48 | )
49 |
--------------------------------------------------------------------------------