├── .gitignore
├── LICENSE
├── MAINTAINERS
├── README.md
├── examples
    └── toy.py
├── requirements.txt
└── tidybench
    ├── __init__.py
    ├── lasar.py
    ├── qrbs.py
    ├── selvar.py
    ├── selvarF.f
    ├── slarac.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | #   For a library or package, you might want to ignore these files since the code is
 86 | #   intended to run in multiple environments; otherwise, check them in:
 87 | # .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 
133 | # pytype static type analyzer
134 | .pytype/
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/MAINTAINERS:
--------------------------------------------------------------------------------
1 | Nikolaj Thams (nikolajthams)
2 | Gherardo Varando (gherardovarando)
3 | Sebastian Weichwald (sweichwald)
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # *TI*me series *D*iscover*Y* *BENCH*mark (tidybench)
 2 | 
 3 | This repository holds implementations of the following four algorithms for causal structure learning for time series,
 4 | 
 5 | * `SLARAC` (Subsampled Linear Auto-Regression Absolute Coefficients),
 6 | * `QRBS` (Quantiles of Ridge regressed Bootstrap Samples),
 7 | * `LASAR` (LASso Auto-Regression),
 8 | * `SELVAR` (Selective auto-regressive model),
 9 | 
10 | which came in first in 18 and close second in 13 out of the 34 competition categories in the [Causality 4 Climate competition](https://causeme.uv.es/neurips2019/) at the Conference on Neural Information Processing Systems 2019 (NeurIPS). For details on the competition tasks and the outcomes you may watch the [recording of the NeurIPS session](https://slideslive.com/38922052/competition-track-day-21) or consult [the result slides](https://causeme.uv.es/neurips2019/static/img/Runge_NeurIPS_compressed.pdf).
11 | (Algorithm names map as follows between `tidybench` and our competition implementations: `tidybench.slarac` was varvar, `tidybench.qrbs` was ridge, `tidybench.lasar` was varvar(lasso=True), and `tidybench.selvar` was selvar.)
12 | 
13 | More details can be found in our [accompanying paper](http://proceedings.mlr.press/v123/weichwald20a.html) and the respective well-documented code files.
14 | 
15 | Feel free to use our algorithms (AGPL-3.0 license). In fact, we encourage their use as baseline benchmarks and guidance of future algorithmic and methodological developments for structure learning from time series.
16 | 
17 | We kindly ask you to cite our [accompanying paper](http://proceedings.mlr.press/v123/weichwald20a.html) in case you find our code useful:
18 | ```
19 | @InProceedings{weichwald2020causal,
20 |   title = {{Causal structure learning from time series: Large regression coefficients may predict causal links better in practice than small p-values}},
21 |   author = {Weichwald, Sebastian and Jakobsen, Martin E. and Mogensen, Phillip B. and Petersen, Lasse and Thams, Nikolaj and Varando, Gherardo},
22 |   publisher = {PMLR},
23 |   series = {Proceedings of the NeurIPS 2019 Competition and Demonstration Track, Proceedings of Machine Learning Research},
24 |   volume = {123},
25 |   pages = {27--36},
26 |   year = {2020},
27 |   editor = {Hugo Jair Escalante and Raia Hadsell},
28 |   pdf = {http://proceedings.mlr.press/v123/weichwald20a/weichwald20a.pdf},
29 |   url = {http://proceedings.mlr.press/v123/weichwald20a.html},
30 | }
31 | ```
32 | 
33 | 
34 | 
35 | ## What you get
36 | 
37 | **Input**: time series data (and some method-specific parameters)
38 | 
39 | **Output**: score matrix indicating which structural links are inferred likely to exist
40 | 
41 | All four algorithms take as input multivariate time series data in form of a T x d matrix of T time samples of d variables and output a d x d score/adjacency matrix A. The (i,j)th entry corresponds to an edge from the i-th to the j-th time series component, where higher values correspond to edges that are inferred to be more likely to exist, given the observed data.
42 | 
43 | 
44 | ## Example
45 | 
46 | At the moment, only a [toy example](examples/toy.py) is provided.
47 | 
48 | 
49 | ## Requirements
50 | 
51 | `SLARAC`, `QRBS`, and `LASAR` require numpy and sklearn. These requirements are listed in the [requirements.txt](requirements.txt) and can be installed via `pip install -r requirements.txt`.
52 | 
53 | `SELVAR` requires lapack/blas installed and the compilation of
54 | [selvarF.f](tidybench/selvarF.f) with [f2py](https://docs.scipy.org/doc/numpy/f2py/)
55 | (e.g. `f2py -llapack -c -m selvarF selvarF.f`).
56 | 
57 | ## Who we are
58 | 
59 | We are a team of PhD students and Postdocs that formed at the [Copenhagen Causality Lab (CoCaLa)](https://math.ku.dk/cocala) of the University of Copenhagen ([Martin E Jakobsen](https://www.math.ku.dk/english/research/spt/cocala/?pure=en/persons/410383), [Phillip B Mogensen](https://www.math.ku.dk/english/staff/?pure=en/persons/467826), [Lasse Petersen](https://www.math.ku.dk/english/research/spt/cocala/?pure=en/persons/433485), [Nikolaj Thams](https://nikolajthams.github.io/), [Gherardo Varando](https://gherardovarando.github.io/), [Sebastian Weichwald](https://sweichwald.de)) to participate in the C4C competition.
60 | 


--------------------------------------------------------------------------------
/examples/toy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tidybench
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     """
 7 |     Generate time series data over three variables from a stable SVAR with
 8 |     the following structure:
 9 |     """
10 |     # --> X_1(t-1) --> X_1(t) --> X_1(t+1) -->
11 |     #              \         \
12 |     #               \         \
13 |     #                \         \
14 |     #                 \         \
15 |     #                  v         v
16 |     # --> X_2(t-1) --> X_2(t) --> X_2(t+1) -->
17 |     #              \   ^     \   ^
18 |     #               \ /       \ /
19 |     #                \         /
20 |     #               / \       / \
21 |     #              /   v     /   v
22 |     # --> X_3(t-1) --> X_3(t) --> X_3(t+1) -->
23 |     B = np.asarray([[1, 2, 0],
24 |                     [0, 1, 1],
25 |                     [0, 2, 1]]) / 3
26 |     T, d = 500, 3
27 |     X = np.random.randn(T, d)
28 |     for t in range(1, T):
29 |         X[t, :] += B.T.dot(X[t-1, :])
30 | 
31 |     # The true adjacency matrix is
32 |     A = B > 0
33 |     print('True adjacency matrix:')
34 |     print(A)
35 | 
36 |     print('Score matrix for the adjacency matrix as inferred by '
37 |           'slarac (post_standardised):')
38 |     print(tidybench.slarac(X, post_standardise=True).round(2))
39 | 
40 |     print('Score matrix for the adjacency matrix as inferred by '
41 |           'qrbs (post_standardised):')
42 |     print(tidybench.qrbs(X, post_standardise=True).round(2))
43 | 
44 |     print('Score matrix for the adjacency matrix as inferred by '
45 |           'lasar (post_standardised):')
46 |     print(tidybench.lasar(X, post_standardise=True).round(2))
47 | 
48 |     print('Score matrix for the adjacency matrix as inferred by '
49 |           'selvar (post_standardised):')
50 |     print(tidybench.selvar(X, post_standardise=True).round(2))
51 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.8.2
2 | scikit-learn>=0.17
3 | 


--------------------------------------------------------------------------------
/tidybench/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "lasar",
 3 |     "qrbs",
 4 |     "selvar",
 5 |     "slarac",
 6 | ]
 7 | 
 8 | 
 9 | from .lasar import lasar
10 | from .qrbs import qrbs
11 | from .selvar import selvar
12 | from .slarac import slarac
13 | 


--------------------------------------------------------------------------------
/tidybench/lasar.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implements the LASAR (LASso Auto-Regression) algorithm.
  3 | 
  4 | Based on an implementation that is originally due to Sebastian Weichwald
  5 | (sweichwald).
  6 | """
  7 | 
  8 | 
  9 | import numpy as np
 10 | from sklearn.linear_model import LassoLarsCV
 11 | from sklearn.utils import resample
 12 | from .utils import common_pre_post_processing
 13 | 
 14 | 
 15 | INV_GOLDEN_RATIO = 2 / (1 + np.sqrt(5))
 16 | 
 17 | 
 18 | @common_pre_post_processing
 19 | def lasar(data,
 20 |           maxlags=1,
 21 |           n_subsamples=100,
 22 |           subsample_sizes=[INV_GOLDEN_RATIO**(1 / k) for k in [1, 2, 3, 6]],
 23 |           cv=5,
 24 |           aggregate_lags=lambda x: x.max(axis=1).T,
 25 |           ):
 26 |     """LASAR (LASso Auto-Regression).
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     data : ndarray
 31 |         T (timepoints) x N (variables) input data
 32 | 
 33 |     maxlags : int
 34 |         Maximum number of lags to consider
 35 | 
 36 |     n_subsamples : int
 37 |         How often to subsample the data
 38 | 
 39 |     subsample_sizes : ndarray
 40 |         Possible sizes of the subsamples as fractions
 41 | 
 42 |     cv : int
 43 |         Number of cross-validation folds for the lasso variable selection step
 44 | 
 45 |     aggregate_lags : function
 46 |         Function that takes an N (to) x maxlags x N (from) ndarray as input and
 47 |         outputs an N x N ndarray aggregating the lag-resolved scores,
 48 |         for example
 49 |             lambda x: x.max(axis=1).T
 50 |         or
 51 |             lambda x: x.sum(axis=1).T
 52 | 
 53 |     Arguments for the common pre-processing steps of the data and the common
 54 |     post-processing steps of the scores are documented in
 55 |     utils.common_pre_post_processing
 56 | 
 57 |     Returns
 58 |     ----------
 59 |     scores : ndarray
 60 |         Array where the (i,j)th entry corresponds to the link X_i --> X_j
 61 |     """
 62 | 
 63 |     # T timepoints, N variables
 64 |     T, N = data.shape
 65 | 
 66 |     # Obtain absolute regression coefficients after refitting on a cross-
 67 |     # validated variable selection obtained by lasso regression on the entire
 68 |     # data set and random subsamples
 69 |     scores = np.abs(lassovar(data, maxlags, cv=cv))
 70 |     for subsample_size in np.random.choice(subsample_sizes, n_subsamples):
 71 |         n_samples = int(np.round(subsample_size * T))
 72 |         scores += np.abs(lassovar(data, maxlags, n_samples=n_samples, cv=cv))
 73 | 
 74 |     # Divide the sum to obtain the average
 75 |     scores /= (n_subsamples + 1)
 76 | 
 77 |     # aggregate lagged coefficients to square connectivity matrix
 78 |     scores = aggregate_lags(scores.reshape(N, -1, N))
 79 |     return scores
 80 | 
 81 | 
 82 | def lassovar(data, maxlags=1, n_samples=None, cv=5):
 83 |     # Stack data to perform regression of present on past values
 84 |     Y = data.T[:, maxlags:]
 85 |     d = Y.shape[0]
 86 |     Z = np.vstack([data.T[:, maxlags - k:-k]
 87 |                    for k in range(1, maxlags + 1)])
 88 |     Y, Z = Y.T, Z.T
 89 | 
 90 |     # Subsample data
 91 |     if n_samples is not None:
 92 |         Y, Z = resample(Y, Z, n_samples=n_samples)
 93 | 
 94 |     scores = np.zeros((d, d * maxlags))
 95 | 
 96 |     ls = LassoLarsCV(cv=cv, n_jobs=1)
 97 | 
 98 |     residuals = np.zeros(Y.shape)
 99 | 
100 |     # Consider one variable after the other as target
101 |     for j in range(d):
102 |         target = np.copy(Y[:, j])
103 |         selectedparents = np.full(d * maxlags, False)
104 |         # Include one lag after the other
105 |         for l in range(1, maxlags + 1):
106 |             ind_a = d * (l - 1)
107 |             ind_b = d * l
108 |             ls.fit(Z[:, ind_a:ind_b], target)
109 |             selectedparents[ind_a:ind_b] = ls.coef_ > 0
110 |             target -= ls.predict(Z[:, ind_a:ind_b])
111 | 
112 |         residuals[:, j] = np.copy(target)
113 | 
114 |         # Refit OLS using the selected variables to get rid of the bias
115 |         ZZ = Z[:, selectedparents]
116 |         B = np.linalg.lstsq(ZZ.T.dot(ZZ), ZZ.T.dot(Y[:, j]), rcond=None)[0]
117 |         scores[j, selectedparents] = B
118 | 
119 |     return scores
120 | 


--------------------------------------------------------------------------------
/tidybench/qrbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implements the QRBS (Quantiles of Ridge regressed Bootstrap Samples) algorithm.
 3 | 
 4 | Based on an implementation that is originally due to Nikolaj Thams
 5 | (nikolajthams).
 6 | """
 7 | 
 8 | 
 9 | import numpy as np
10 | from sklearn.linear_model import Ridge
11 | from sklearn.utils import resample
12 | from .utils import common_pre_post_processing
13 | 
14 | 
15 | @common_pre_post_processing
16 | def qrbs(data,
17 |          lags=1,
18 |          alpha=.005,
19 |          q=.75,
20 |          n_resamples=600,
21 |          ):
22 |     """
23 |     Perform bootstrapped ridge regression of data at time t on data in the past
24 | 
25 |     Parameters
26 |     ----------
27 |     data : ndarray
28 |         T (timepoints) x N (variables) input data
29 | 
30 |     lags : int
31 |         Number of lags to include in the modelling
32 | 
33 |     alpha : double
34 |         Penalization parameter used for the ridge regression
35 | 
36 |     q : double
37 |         The method performs 200 bootstrap samples, in each fitting a ridge
38 |         regression on a random subset of the data. This gives 200 estimates
39 |         of the effect i -> j.
40 |         We take the q'th quantile as the final estimate.
41 |         q = 1 corresponds to the max effect across samples, q = 0.5 to the
42 |         median effect.
43 | 
44 |     n_resamples : int
45 |         Number of bootstrap samples drawn
46 | 
47 |     Arguments for the common pre-processing steps of the data and the common
48 |     post-processing steps of the scores are documented in
49 |     utils.common_pre_post_processing
50 | 
51 |     Returns
52 |     ----------
53 |     scores : ndarray
54 |         Array with scores for each link i -> j
55 |     """
56 | 
57 |     # We regress y = data_t on X = data_[t-1, ..., t-lags]
58 |     y = np.diff(data, axis=0)[lags-1:]
59 |     X = np.concatenate([data[lag:-(lags-lag)]
60 |                         for lag in np.flip(np.arange(lags))], axis=1)
61 | 
62 |     # Initiate ridge regressor
63 |     ls = Ridge(alpha)
64 | 
65 |     # Bootstrap fit lasso coefficients
66 |     k = int(np.floor(data.shape[0]*0.7))
67 |     results = np.stack([
68 |         ls.fit(*resample(X, y, n_samples=k)).coef_
69 |         for _ in range(n_resamples)])
70 | 
71 |     # Aggregate lags by taking abs and summing
72 |     results = np.abs(
73 |         results.reshape(n_resamples, y.shape[1], lags, -1)).sum(axis=2)
74 |     scores = np.quantile(results, q, axis=0)
75 |     # Return transposed scores because ridge default beta*X means you can read
76 |     # parents by row. Instead by transposing, the parents of i are in column i
77 |     return scores.T
78 | 


--------------------------------------------------------------------------------
/tidybench/selvar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implements the SELVAR (Selective auto-regressive model) algorithm.
 3 | 
 4 | Based on an implementation that is originally due to Gherardo Varando
 5 | (gherardovarando).
 6 | """
 7 | 
 8 | from .utils import common_pre_post_processing
 9 | ###
10 | # compile selvarF.f with:
11 | #    f2py -llapack -c -m selvarF selvarF.f
12 | ###
13 | try:
14 |     from .selvarF import slvar, gtstat, gtcoef
15 | except ImportError:
16 |     slvar = None
17 |     gtstat = None
18 |     gtcoef = None
19 | 
20 | 
21 | @common_pre_post_processing
22 | def selvar(data,
23 |            maxlags=1,
24 |            batchsize=-1,
25 |            mxitr=-1,
26 |            trace=0,
27 |            ):
28 |     """
29 |     SELVAR (Selective auto-regressive model).
30 | 
31 |     Parameters
32 |     ----------
33 |     data : ndarray
34 |         T (timepoints) x N (variables) input data
35 | 
36 |     maxlags : int
37 |         Maximum number of lags to include in the model.
38 |         If maxlags < 0 then the maximum lag will be iteratively
39 |         adjusted for each variable until no decrease in PRSS.
40 | 
41 |     batchsize : int
42 |         Number of consecutive time points to use in each training batch.
43 |         If batchsize < 0 then batchsize is set to the maximum available
44 |         time boints given maxlags.
45 | 
46 |     mxitr : int
47 |         Maximum number of iterations (each variable) for the
48 |         hill-climbing search. If mxitr < 0 then the
49 |         search will stop only when no decrease in PRSS is possible.
50 | 
51 |     trace : int
52 |         If positive messages will be printed out during the search.
53 | 
54 |     Arguments for the common pre-processing steps of the data and the common
55 |     post-processing steps of the scores are documented in
56 |     utils.common_pre_post_processing
57 | 
58 |     Returns
59 |     ----------
60 |     scores : ndarray
61 |         Array with scores for each link i -> j
62 |     """
63 | 
64 |     if slvar is None:
65 |         raise RuntimeError("selvarF.f is not yet compiled")
66 | 
67 |     scores, lags, info = slvar(data, bs=int(batchsize), ml=int(maxlags),
68 |                                mxitr=int(mxitr), trc=int(trace))
69 | 
70 |     return scores
71 | 


--------------------------------------------------------------------------------
/tidybench/selvarF.f:
--------------------------------------------------------------------------------
  1 | c Implements the SELVAR (Selective auto-regressive model) algorithm.
  2 | c
  3 | c Based on an implementation that is originally due to Gherardo Varando (gherardovarando).
  4 | c
  5 |       SUBROUTINE SLVAR(T, N, X, BS, ML, MXITR, B, A, INFO, TRC)  
  6 | c     Select structure and lags of a (local) VAR model
  7 | c     Gherardo Varando (2019)
  8 |       INTEGER T, N, BS, ML, MXITR, A(N,N), INFO, TRC
  9 |       DOUBLE PRECISION X(T,N), B(N,N)
 10 | Cf2py integer, intent(in) n 
 11 | Cf2py integerm, intent(in) t
 12 | Cf2py intent(in) x
 13 | Cf2py integer, optional, intent(in):: bs = -1
 14 | Cf2py integer, optional, intent(in) :: ml = -1 
 15 | Cf2py integer, optional, intent(in) :: mxitr = -1
 16 | Cf2py optional, intent(in) :: lmb = 0
 17 | Cf2py intent(out) b
 18 | Cf2py intent(out) a 
 19 | Cf2py intent(out)  info
 20 | Cf2py integer, optional, intent(in) :: trc = 1
 21 | c     ON ENTRY
 22 | c        T integer
 23 | c          number of time points
 24 | c        N integer
 25 | c          number of variables 
 26 | c        X double precision (T, N)
 27 | c          matrix of observations
 28 | c        BS integer
 29 | c           size of the batch 
 30 | c            * IF BS .GT. 0 batch size = BS
 31 | c            * IF BS .LT. 0 batch size = T / -BS 
 32 | c            * IF BS .EQ. 0 
 33 | c        ML integer
 34 | c           maximum time lag 
 35 | c            * IF ML .GT. 0 maximum time lag = ML 
 36 | c            * IF ML .LT. 0 maximum time lag is searched iteratively  
 37 | c        MXITR integer 
 38 | c           maximum number of iterations in hc
 39 | c            * IF MXITR .EQ. 0 perform no search and just score edges
 40 | c            * IF MXITR .LT. 0 THEN MXITR = INF
 41 | c        B double precision (N,N) 
 42 | c          empty matrix
 43 | c        A integer (N,N) 
 44 | c          empty matrix for return 
 45 | c        TRC integer
 46 | c            IF TRC .GT. 0 print trace information
 47 | c     ON RETURN
 48 | c        B double precision (N, N)
 49 | c          matrix of scores
 50 | c        A integer (N,N)
 51 | c          matrix of estimated lags 
 52 | c        INFO integer 
 53 | c          information on errors
 54 | c     DETAILS 
 55 | c       
 56 | c     SUBROUTINES 
 57 | c       * GTPRSS 
 58 | c       * GTCOEF
 59 | c       * GTRSS 
 60 | c       * DGELS from LAPACK
 61 | c       * DORGQR from LAPACK
 62 | c     INTERNAL VARIABLES
 63 |       INTEGER I, J, K, FLG, ITR, TMP, IBST, KBST, ITML
 64 |       DOUBLE PRECISION XX(T, N + 1), YY(T, 1), 
 65 |      *                 WK(2*T*N), 
 66 |      *                 SCR, NWSCR, TMPSCR
 67 |       SCR = 0.0
 68 |       NWSCR = 0.0
 69 |       FLG = 0 
 70 |       ITR = 0
 71 |       ITML = 0
 72 |       IF (ML .LT. 1) ITML = 1
 73 |       IF (ML .GE. T .OR. ML .LT. 1) ML = 1
 74 |       IF (BS .LT. 0) BS = (T - ML) / (-BS) 
 75 |       IF (BS .GT. T - ML) BS = T - ML
 76 |       IF (MXITR .EQ. 0) GOTO 100 
 77 | c     print parameters
 78 |       IF (TRC .GT. 10) THEN
 79 |          WRITE(*,*) BS, ML 
 80 |       ENDIF
 81 |       DO 20 J = 1,N
 82 |          DO 10 I = 1,N
 83 | c        initialize the empty graph 
 84 |             A(I,J) = 0
 85 |  10      CONTINUE
 86 |  20   CONTINUE           
 87 |       DO 50 J=1,N 
 88 |          ITR = 0
 89 |          IF (ITML .GT. 0) ML = 1
 90 | c        compute initial score (prss) for j
 91 |          CALL GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK, 
 92 |      *                SCR, INFO)
 93 | c        hill-climb search start for j 
 94 |  500     CONTINUE
 95 | c        increase iteration counter
 96 |          ITR = ITR + 1
 97 |          FLG = 0
 98 |          TMPSCR = SCR
 99 |          IBST = -1
100 |          DO 40 K= 0,ML  
101 |             DO 30 I=1,N 
102 |                TMP = A(I,J)  
103 |                IF (K .NE. TMP) THEN
104 |                  A(I,J) = K 
105 |                  CALL GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK,
106 |      *                     NWSCR, INFO)
107 |                  IF (NWSCR .GE. 0 .AND. NWSCR .LT. TMPSCR) THEN
108 |                     TMPSCR = NWSCR 
109 |                     IBST = I
110 |                     KBST = K
111 |                  ENDIF
112 |                  A(I,J) = TMP
113 |               ENDIF
114 |  30        CONTINUE    
115 |  40      CONTINUE           
116 |          IF (IBST .GT. 0) THEN
117 |             A(IBST, J) = KBST
118 |             FLG = 1
119 |             SCR = TMPSCR
120 |             IF (TRC .GT. 0) THEN 
121 |                WRITE(*,"(a,a5,i3,a5,i3,a,i3,a5,i3)", ADVANCE = "NO") 
122 |      *                                    char(13), "ITER:", ITR, 
123 |      *                                             " ADD ", IBST,"-",J, 
124 |      *                                             " LAG=", KBST 
125 |             ENDIF
126 |          ENDIF     
127 |          IF (ITML .GT. 0) ML = MIN(ML + 1, T / 2)
128 |       IF ((MXITR .LT. 0 .OR. ITR .LT. MXITR) .AND. FLG .GT. 0) GOTO 500
129 |  50   CONTINUE        
130 |  100  CONTINUE
131 |       CALL GTCOEF(T, N, X, ML, BS, A, "ABS", 0, XX, YY, WK, B,
132 |      *             INFO)
133 |       MXITR = ITR
134 |       RETURN
135 | c     last line of SLVAR
136 |       END
137 | c
138 | c      
139 |       SUBROUTINE GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK, SCR,
140 |      *                 INFO)
141 | c     Get average Predicted RSS for a given variable  
142 | c     Gherardo Varando (2019)
143 |       INTEGER T, N, J, ML, A(N,N), INFO, BS
144 |       DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 
145 |      *                 WK(2*T*N), SCR
146 | Cf2py intent(in) n 
147 | Cf2py intent(in) t
148 | Cf2py intent(in) x
149 | Cf2py intent(in) bs 
150 | Cf2py intent(in)  ml 
151 | Cf2py intent(in)  a 
152 | Cf2py intent(in)  j 
153 | Cf2py optional, intent(cache) ::  xx = array((t,n+1))
154 | Cf2py optional, intent(cache) ::  yy = array((t,1))
155 | Cf2py optional, intent(cache) ::  wk = array(2*t*n)
156 | Cf2py intent(out)  scr
157 | Cf2py intent(out)  info
158 | c  
159 | c     ON ENTRY 
160 | c        T, N, ML, BS, X, A  as in DSELVAR  
161 | c        XX, YY, WK  working variables  
162 | c     ON RETURN
163 | c        SCR the computed average PRSS 
164 | c        INFO 
165 | c     INTERNAL VARIABLES 
166 |       INTEGER NF, NV, TT, I, K 
167 |       DOUBLE PRECISION TMP, TMPY
168 |       IF (ML .GE. T .OR. ML .LT. 1) ML = 1
169 |       IF (BS .LT. 0) BS = (T - ML) / (-BS) 
170 |       IF (BS .GT. T - ML) BS = T - ML
171 |       SCR = 0.0
172 |       NF = (T - ML) / BS
173 |       DO 100 K = 1, NF
174 |          NV = 1
175 |          DO 5  TT = 1, BS
176 |             XX(TT, NV) = 1   
177 |             YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 
178 |  5       CONTINUE
179 |          DO 20 I = 1, N 
180 |             IF (A(I,J) .GT. 0) THEN 
181 |                NV = NV + 1
182 |                IF (NV .GT. BS) THEN
183 |                   SCR = -1
184 |                   GOTO 110
185 |                ENDIF
186 |                DO 10 TT = 1, BS
187 |                   XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I)  
188 |  10            CONTINUE
189 |             ENDIF
190 |  20      CONTINUE
191 |          CALL DGELS("N", BS, NV, 1, XX, T,
192 |      *               YY, T, WK, 2*T*N, INFO)
193 |          IF (INFO .NE. 0) GOTO 110
194 | c        compute predictive sum of squares,
195 |          CALL DORGQR(NV, NV, NV, XX, T, WK(1), 
196 |      *               WK(NV + 1), 2*T*N - NV, INFO) 
197 |          DO 80 TT = 1, BS
198 |             TMPY = X(TT + ML + (K-1)*BS, J) - YY(1,1)
199 |             NV = 1
200 |             TMP = XX(TT, 1) ** 2 
201 |             DO 70 I = 1,N
202 |                IF (A(I,J) .GT. 0) THEN 
203 |                   NV = NV + 1
204 |                   TMP = TMP + (XX(TT,NV) ** 2)
205 |                   TMPY = TMPY - 
206 |      *                   (X(TT + ML - A(I,J) + (K-1)*BS, I) * YY(NV,1)) 
207 |                ENDIF
208 |  70         CONTINUE
209 |             SCR = SCR + (((TMPY) / (1 - TMP)) ** 2) 
210 |  80      CONTINUE              
211 |  100  CONTINUE        
212 |  110  CONTINUE
213 |       RETURN
214 |       END
215 | c
216 | c  
217 |       SUBROUTINE GTCOEF(T, N, X, ML, BS, A,JOB,NRM, XX, YY, WK, B,
218 |      *                 INFO)
219 | c     Get average (and sqared or absolute) coefficients 
220 | c     coefficients can be normalized by 
221 | c        b(i,J) = b(i,j) / sqrt(b(i,j)^2 + v(j) / v(i))   
222 | c     where b(i,j) is the coefficient of x(,i) in the regression of
223 | c     x(,j) and v(i) is the varaince of the residuals for x(,i) 
224 | c     Gherardo Varando (2019) 
225 |       INTEGER T, N, ML, BS, A(N,N), INFO 
226 |       DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 
227 |      *                 WK(2*T*N), B(N,N)
228 |       CHARACTER JOB*3
229 | Cf2py intent(in) t
230 | Cf2py intent(in) n 
231 | Cf2py intent(in) ml 
232 | Cf2py intent(in) bs 
233 | Cf2py intent(in) x
234 | Cf2py intent(in) a 
235 | Cf2py optional, intent(in) job
236 | Cf2py optional, intent(in) :: nrm = 0
237 | Cf2py optional, intent(cache) :: xx=array((t,n+1))
238 | Cf2py optional, intent(cache) :: yy=array((t,n+1))
239 | Cf2py optional, intent(cache) :: wk=array((2*t*n))
240 | Cf2py intent(out)  b
241 | Cf2py intent(out)  info
242 | c          
243 | c     ON ENTRY 
244 | c        T, N, ML, BS, X, A  as in SLVAR  
245 | c        JOB character 
246 | c            IF JOB .EQ. "ABS" the average absolute coefficients
247 | c            IF JOB .EQ. "SQR" the average square coefficients 
248 | c            ELSE the average coefficients 
249 | c        NRM integer
250 | c            IF NRM .GT. 0 normalize the coefficient
251 | c        XX, YY, WK  working variables  
252 | c     ON RETURN
253 | c        B the computed average coefficients  
254 | c        INFO 
255 | c     INTERNAL VARIABLES 
256 |       INTEGER NF, NV, TT, I, K, J
257 |       DOUBLE PRECISION V(N)
258 |       IF (ML .GE. T .OR. ML .LT. 1) ML = 1
259 |       IF (BS .LT. 0) BS = (T - ML) / (-BS) 
260 |       IF (BS .GT. T - ML) BS = T - ML
261 |       NF = (T - ML) / BS
262 |       DO 200 J = 1, N
263 |          V(J) = 0
264 |          DO 1 I = 1,N
265 |             B(I,J) = 0
266 |  1       CONTINUE
267 |          DO 100 K = 1, NF
268 |             NV = 1
269 |             DO 5  TT = 1, BS
270 |                XX(TT, NV) = 1   
271 |                YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 
272 |  5          CONTINUE
273 |             DO 20 I = 1, N 
274 |                IF (A(I,J) .GT. 0) THEN 
275 |                   NV = NV + 1
276 |                   DO 10 TT = 1, BS
277 |                      XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I)  
278 |  10               CONTINUE
279 |                ENDIF
280 |  20         CONTINUE
281 |             CALL DGELS("N", BS, NV, 1, XX, T,
282 |      *                 YY, T, WK, 2*T*N, INFO)
283 |             IF (INFO .NE. 0) GOTO 100
284 |             DO 30 I=NV+1, BS
285 |                V(J) = V(J) + YY(I,1) ** 2 / (BS * NF)
286 |  30         CONTINUE
287 |             NV = 1
288 |             DO 40 I = 1, N 
289 |                IF (A(I,J) .GT. 0) THEN 
290 |                   NV = NV + 1
291 |                   IF (JOB .EQ. "ABS") THEN
292 |                      B(I,J) = B(I,J) + (ABS(YY(NV,1))  / NF)
293 |                   ELSEIF (JOB .EQ. "SQR") THEN
294 |                      B(I,J) = B(I,J) + (YY(NV,1)**2)/NF
295 |                   ELSE
296 |                      B(I,J) = B(I,J) + YY(NV,1)/NF 
297 |                   ENDIF
298 |                ENDIF
299 |  40         CONTINUE
300 |  100     CONTINUE        
301 |  200  CONTINUE
302 |       IF (NRM .GT. 0) THEN 
303 |          DO 300 J = 1,N
304 |             DO 250 I = 1,N
305 |                B(I,J) = B(I,J) / SQRT( B(I,J)**2 + V(J)/V(I))
306 |  250        CONTINUE
307 |  300     CONTINUE
308 |       ENDIF
309 |       RETURN
310 |       END
311 | c 
312 | c
313 |       SUBROUTINE GTRSS(T, N, X, ML, BS, A, J, XX, YY, WK, SCR,
314 |      *                 INFO)
315 | c     get average residuals sum of squares for variable j  
316 | c     Gherardo Varando (2019)
317 |       INTEGER T, N, J, ML, A(N,N), INFO, BS
318 |       DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 
319 |      *                 WK(2*T*N),   SCR
320 | c          
321 | Cf2py intent(in) t
322 | Cf2py intent(in) n 
323 | Cf2py intent(in) ml 
324 | Cf2py intent(in) bs 
325 | Cf2py intent(in) x
326 | Cf2py intent(in) a 
327 | Cf2py intent(in) j 
328 | Cf2py optional, intent(cache) :: xx=array((t,n+1))
329 | Cf2py optional, intent(cache) :: yy = array((t,1))
330 | Cf2py optional, intent(cache) :: wk = array(2*t*n)
331 | Cf2py intent(out)  scr
332 | Cf2py intent(out)  info
333 | c          
334 | c     ON ENTRY 
335 | c        T, N, ML, BS, X, A  as in SLVAR  
336 | c        XX, YY, WK  working variables  
337 | c        J INTEGER  the variable to consider
338 | c     ON RETURN
339 | c        SCR the computed RSS for variable J 
340 | c        INFO 
341 | c     INTERNAL VARIABLES 
342 |       INTEGER NF, NV, TT, I, K 
343 |       IF (ML .GE. T .OR. ML .LT. 1) ML = 1
344 |       IF (BS .LT. 0) BS = (T - ML) / (-BS) 
345 |       IF (BS .GT. T - ML) BS = T - ML
346 |       SCR = 0.0
347 |       NF = (T - ML) / BS
348 |       DO 100 K = 1, NF
349 |          NV = 1
350 |          DO 5  TT = 1, BS
351 |             XX(TT, NV) = 1   
352 |             YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 
353 |  5       CONTINUE
354 |          DO 20 I = 1, N 
355 |             IF (A(I,J) .GT. 0) THEN 
356 |                NV = NV + 1
357 |                DO 10 TT = 1, BS
358 |                   XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I)  
359 |  10            CONTINUE
360 |             ENDIF
361 |  20      CONTINUE
362 |          CALL DGELS("N", BS, NV, 1, XX, T,
363 |      *               YY, T, WK, 2*T*N, INFO)
364 |          IF (INFO .NE. 0) GOTO 100
365 | c        compute RSS,
366 |          DO 30 TT = NV+1, BS
367 |            SCR = SCR + (YY(TT,1) ** 2) 
368 |  30      CONTINUE
369 |          SCR = SCR
370 |  100  CONTINUE        
371 |       SCR = SCR / (NF * BS)
372 |       RETURN
373 |       END
374 | c
375 | c
376 | c
377 |       SUBROUTINE GTSTAT(T, N, X, BS, ML, A, JOB, XX, YY, WK, B, DF)  
378 | c     Obtain the log likelihood-ratio statistics, the f-statistics 
379 | c     or the difference of residuals for each edge
380 | c     Gherardo Varando (2019)
381 |       INTEGER T, N, BS, ML, INFO, A(N,N), DF(N, 2)
382 |       DOUBLE PRECISION X(T,N), XX(T,N + 1), YY(T,1),WK(2*T*N), B(N,N) 
383 |       CHARACTER JOB*2
384 | Cf2py intent(in) t
385 | Cf2py intent(in) n 
386 | Cf2py intent(in) ml 
387 | Cf2py intent(in) bs 
388 | Cf2py intent(in) x
389 | Cf2py intent(in) a 
390 | Cf2py intent(in) job
391 | Cf2py optional, intent(cache) ::  xx = array((t,n))
392 | Cf2py optional, intent(cache) ::  yy = array((t,1))
393 | Cf2py optional, intent(cache) ::  wk = array(2*t*n) 
394 | Cf2py intent(out)  b
395 | Cf2py intent(out)  df
396 | c          
397 | c     ON ENTRY 
398 | c        T, N, ML, BS, X, A  as in SLVAR  
399 | c        XX, YY, WK  working variables  
400 | c        JOB CHARACHTER*2 
401 | c            IF "LR" the logarithm of the likelihood-ratio 
402 | c            IF "FS" the F-statistic 
403 | c            IF "ÐF" the difference of RSS
404 | c     ON RETURN
405 | c        B DOUBLE PRECISION(N,N)
406 | c          the requested statistics
407 | c        DF DOUBLE PRECISION(N,2)
408 | c          values to obtain degrees of freedom 
409 | c     INTERNAL VARIABLES 
410 |       INTEGER I,J,TMP
411 |       DOUBLE PRECISION SCR,NWSCR
412 |       IF (ML .LT. 1) THEN 
413 |          DO 10 J = 1,N 
414 |             DO 5 I = 1,N 
415 |                ML = MAX(ML,A(I,J))  
416 |  5          CONTINUE
417 |  10      CONTINUE
418 |       ENDIF
419 |       IF (ML .GE. T .OR. ML .LT. 1) ML = 1
420 |       IF (BS .LT. 0) BS = (T - ML) / (-BS) 
421 |       IF (BS .GT. T - ML) BS = T - ML      
422 |       NF = (T - ML) / BS
423 |       DO 60 J = 1,N 
424 |          DF(J, 1) = 0
425 |          DF(J, 2) = 0
426 | c        get rss for variable j 
427 |          CALL GTRSS(T, N, X, ML,BS, A, J, XX, YY, WK, 
428 |      *              SCR, INFO)
429 |          DO 55 I=1,N 
430 |             B(I,J) = 0
431 |             IF (A(I,J) .GT. 0) THEN
432 | c              add one parameter for each batch 
433 |                DF(J, 1) = DF(J, 1) + NF
434 | c              remove one edge from matrix a 
435 |                TMP = A(I,J) 
436 |                A(I,J) = 0 
437 | c              compute new score
438 |                CALL GTRSS(T, N, X, ML, BS, A, J, XX, YY, WK, 
439 |      *                       NWSCR, INFO)
440 | c              restore matrix a 
441 |                A(I,J) = TMP    
442 | c              store relevant statistic
443 |                IF (JOB .EQ. "FS") B(I,J) = (NWSCR - SCR) / SCR
444 |                IF (JOB .EQ. "LR") B(I,J) = (LOG(NWSCR) - LOG(SCR)) 
445 |      *                                     * NF * BS
446 |                IF (JOB .EQ. "DF") B(I,J) = NWSCR - SCR 
447 |                ENDIF
448 |  55         CONTINUE
449 |             DF(J,2) = DF(J,1) - NF
450 |  60      CONTINUE
451 | c     for f-statistics, finish computing and store df
452 |       IF (JOB .EQ. "FS") THEN
453 |          DO 70 J = 1,N
454 |             DF(J,2) = BS*NF - DF(J,1) 
455 |             DF(J,1) = NF 
456 |             DO 65 I = 1,N 
457 |                B(I,J) = B(I,J) * DF(J,2)
458 |  65         CONTINUE
459 |  70      CONTINUE              
460 |       ENDIF
461 |       RETURN
462 |       END      
463 | 


--------------------------------------------------------------------------------
/tidybench/slarac.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implements the SLARAC (Subsampled Linear Auto-Regression Absolute
  3 | Coefficients) algorithm.
  4 | 
  5 | Based on an implementation that is originally due to Sebastian Weichwald
  6 | (sweichwald).
  7 | """
  8 | 
  9 | 
 10 | import numpy as np
 11 | from sklearn.utils import resample
 12 | from .utils import common_pre_post_processing
 13 | 
 14 | 
 15 | INV_GOLDEN_RATIO = 2 / (1 + np.sqrt(5))
 16 | 
 17 | 
 18 | @common_pre_post_processing
 19 | def slarac(data,
 20 |            maxlags=1,
 21 |            n_subsamples=200,
 22 |            subsample_sizes=[INV_GOLDEN_RATIO**(1 / k) for k in [1, 2, 3, 6]],
 23 |            missing_values=None,
 24 |            aggregate_lags=lambda x: x.max(axis=1).T,
 25 |            ):
 26 |     """SLARAC (Subsampled Linear Auto-Regression Absolute Coefficients).
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     data : ndarray
 31 |         T (timepoints) x N (variables) input data
 32 | 
 33 |     maxlags : int
 34 |         Maximum number of lags to consider
 35 | 
 36 |     n_subsamples : int
 37 |         How often to subsample the data
 38 | 
 39 |     subsample_sizes : ndarray
 40 |         Possible sizes of the subsamples as fractions
 41 | 
 42 |     missing_values : float
 43 |         Values in the data matrix that equal missing_values are treated as
 44 |         missing values
 45 | 
 46 |     aggregate_lags : function
 47 |         Function that takes an N (to) x maxlags x N (from) ndarray as input and
 48 |         outputs an N x N ndarray aggregating the lag-resolved scores,
 49 |         for example
 50 |             lambda x: x.max(axis=1).T
 51 |         or
 52 |             lambda x: x.sum(axis=1).T
 53 | 
 54 |     Arguments for the common pre-processing steps of the data and the common
 55 |     post-processing steps of the scores are documented in
 56 |     utils.common_pre_post_processing
 57 | 
 58 |     Returns
 59 |     ----------
 60 |     scores : ndarray
 61 |         Array where the (i,j)th entry corresponds to the link X_i --> X_j
 62 |     """
 63 | 
 64 |     # T timepoints, N variables
 65 |     T, N = data.shape
 66 | 
 67 |     # Obtain absolute regression coefficients on the entire data set and
 68 |     # random subsamples
 69 |     scores = np.abs(varmodel(data, maxlags, missing_values=missing_values))
 70 |     for subsample_size in np.random.choice(subsample_sizes, n_subsamples):
 71 |         n_samples = int(np.round(subsample_size * T))
 72 |         scores += np.abs(varmodel(
 73 |             data, maxlags, n_samples=n_samples, missing_values=missing_values))
 74 | 
 75 |     # Drop the intercepts and divide the sum to obtain the average
 76 |     scores = scores[:, 1:] / (n_subsamples + 1)
 77 | 
 78 |     # Aggregate lagged coefficients to square connectivity matrix
 79 |     scores = aggregate_lags(scores.reshape(N, -1, N))
 80 |     return scores
 81 | 
 82 | 
 83 | def varmodel(data, maxlags=1, n_samples=None, missing_values=None):
 84 |     # Stack data to perform regression of present on past values
 85 |     Y = data.T[:, maxlags:]
 86 |     d = Y.shape[0]
 87 |     Z = np.vstack([np.ones((1, Y.shape[1]))] +
 88 |                   [data.T[:, maxlags - k:-k]
 89 |                    for k in range(1, maxlags + 1)])
 90 | 
 91 |     # Subsample data
 92 |     if n_samples is not None:
 93 |         Y, Z = resample(Y.T, Z.T, n_samples=n_samples)
 94 |         Y, Z = Y.T, Z.T
 95 | 
 96 |     # Missing value treatment
 97 |     if missing_values is not None:
 98 |         keepinds = (np.sum(Y == missing_values, axis=0)
 99 |                     + np.sum(Z == missing_values, axis=0)) == 0
100 |         Y = Y[:, keepinds]
101 |         Z = Z[:, keepinds]
102 | 
103 |     # Heuristic to determine a feasible number of lags
104 |     feasiblelag = maxlags
105 |     if Z.shape[1] / Z.shape[0] < INV_GOLDEN_RATIO:
106 |         feasiblelag = int(np.floor(
107 |             (Z.shape[1] / INV_GOLDEN_RATIO - 1) / d))
108 |     # Choose a random effective lag that is feasible and <= maxlag
109 |     efflag = np.random.choice(np.arange(1, max(maxlags, feasiblelag) + 1))
110 |     indcutoff = efflag * d + 1
111 | 
112 |     # Obtain linear regression coefficients
113 |     B = np.zeros((d, maxlags * d + 1))
114 |     B[:, :indcutoff] = np.linalg.lstsq(
115 |         Z[:indcutoff, :].dot(Z[:indcutoff, :].T),
116 |         Z[:indcutoff, :].dot(Y.T),
117 |         rcond=None)[0].T
118 |     return B
119 | 


--------------------------------------------------------------------------------
/tidybench/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def common_pre_post_processing(func_raw):
 5 |     """
 6 |     Used as decorator to add common pre-processing steps of the data (args[0])
 7 |     and common post-processing steps of the scores (out[0]) to an algorithm.
 8 | 
 9 |     Pre-/post-processing steps are performed in this order, if activated.
10 | 
11 |     Pre-processing of the data
12 |         pre_normalise: boolean; whether to normalise the data
13 | 
14 |     Post-processing of the scores
15 |         post_standardise: boolean; whether to standardise the scores
16 |         post_zeroonescaling: boolean; whether to scale the scores to [0, 1]
17 |         post_edgeprior: boolean; whether to divide the scores by their mean
18 |             (may be helpful for comparability of scores across datasets)
19 |     """
20 |     def func(*args, **kwargs):
21 |         pre_normalise = kwargs.pop("pre_normalise", False)
22 | 
23 |         post_standardise = kwargs.pop("post_standardise", False)
24 |         post_zeroonescaling = kwargs.pop("post_zeroonescaling", False)
25 |         post_edgeprior = kwargs.pop("post_edgeprior", False)
26 | 
27 |         # Pre-process the data
28 |         if pre_normalise:
29 |             args = list(args)
30 |             args[0] = standardise(args[0])
31 |             args = tuple(args)
32 | 
33 |         # Call original algorithm
34 |         out = func_raw(*args, **kwargs)
35 | 
36 |         # Post-process the scores (remaining outputs remain unchanged)
37 |         if type(out) == tuple and len(out) > 1:
38 |             scores = out[0]
39 |         else:
40 |             scores = out
41 | 
42 |         if post_standardise:
43 |             scores = standardise(scores, axis=None)
44 |         if post_zeroonescaling:
45 |             scores = (scores - scores.min()) / (scores.max() - scores.min())
46 |         if post_edgeprior:
47 |             scores /= scores.mean()
48 | 
49 |         if type(out) == tuple and len(out) > 1:
50 |             out = list(out)
51 |             out[0] = scores
52 |             out = tuple(out)
53 |         else:
54 |             out = scores
55 | 
56 |         return out
57 | 
58 |     return func
59 | 
60 | 
61 | def standardise(X, axis=0, keepdims=True, copy=False):
62 |     if copy:
63 |         X = np.copy(X)
64 |     X -= X.mean(axis=axis, keepdims=keepdims)
65 |     X /= X.std(axis=axis, keepdims=keepdims)
66 |     return X
67 | 


--------------------------------------------------------------------------------