├── .gitignore ├── LICENSE ├── MAINTAINERS ├── README.md ├── examples └── toy.py ├── requirements.txt └── tidybench ├── __init__.py ├── lasar.py ├── qrbs.py ├── selvar.py ├── selvarF.f ├── slarac.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | # For a library or package, you might want to ignore these files since the code is 86 | # intended to run in multiple environments; otherwise, check them in: 87 | # .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # pytype static type analyzer 134 | .pytype/ 135 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Nikolaj Thams (nikolajthams) 2 | Gherardo Varando (gherardovarando) 3 | Sebastian Weichwald (sweichwald) 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # *TI*me series *D*iscover*Y* *BENCH*mark (tidybench) 2 | 3 | This repository holds implementations of the following four algorithms for causal structure learning for time series, 4 | 5 | * `SLARAC` (Subsampled Linear Auto-Regression Absolute Coefficients), 6 | * `QRBS` (Quantiles of Ridge regressed Bootstrap Samples), 7 | * `LASAR` (LASso Auto-Regression), 8 | * `SELVAR` (Selective auto-regressive model), 9 | 10 | which came in first in 18 and close second in 13 out of the 34 competition categories in the [Causality 4 Climate competition](https://causeme.uv.es/neurips2019/) at the Conference on Neural Information Processing Systems 2019 (NeurIPS). For details on the competition tasks and the outcomes you may watch the [recording of the NeurIPS session](https://slideslive.com/38922052/competition-track-day-21) or consult [the result slides](https://causeme.uv.es/neurips2019/static/img/Runge_NeurIPS_compressed.pdf). 11 | (Algorithm names map as follows between `tidybench` and our competition implementations: `tidybench.slarac` was varvar, `tidybench.qrbs` was ridge, `tidybench.lasar` was varvar(lasso=True), and `tidybench.selvar` was selvar.) 12 | 13 | More details can be found in our [accompanying paper](http://proceedings.mlr.press/v123/weichwald20a.html) and the respective well-documented code files. 14 | 15 | Feel free to use our algorithms (AGPL-3.0 license). In fact, we encourage their use as baseline benchmarks and guidance of future algorithmic and methodological developments for structure learning from time series. 16 | 17 | We kindly ask you to cite our [accompanying paper](http://proceedings.mlr.press/v123/weichwald20a.html) in case you find our code useful: 18 | ``` 19 | @InProceedings{weichwald2020causal, 20 | title = {{Causal structure learning from time series: Large regression coefficients may predict causal links better in practice than small p-values}}, 21 | author = {Weichwald, Sebastian and Jakobsen, Martin E. and Mogensen, Phillip B. and Petersen, Lasse and Thams, Nikolaj and Varando, Gherardo}, 22 | publisher = {PMLR}, 23 | series = {Proceedings of the NeurIPS 2019 Competition and Demonstration Track, Proceedings of Machine Learning Research}, 24 | volume = {123}, 25 | pages = {27--36}, 26 | year = {2020}, 27 | editor = {Hugo Jair Escalante and Raia Hadsell}, 28 | pdf = {http://proceedings.mlr.press/v123/weichwald20a/weichwald20a.pdf}, 29 | url = {http://proceedings.mlr.press/v123/weichwald20a.html}, 30 | } 31 | ``` 32 | 33 | 34 | 35 | ## What you get 36 | 37 | **Input**: time series data (and some method-specific parameters) 38 | 39 | **Output**: score matrix indicating which structural links are inferred likely to exist 40 | 41 | All four algorithms take as input multivariate time series data in form of a T x d matrix of T time samples of d variables and output a d x d score/adjacency matrix A. The (i,j)th entry corresponds to an edge from the i-th to the j-th time series component, where higher values correspond to edges that are inferred to be more likely to exist, given the observed data. 42 | 43 | 44 | ## Example 45 | 46 | At the moment, only a [toy example](examples/toy.py) is provided. 47 | 48 | 49 | ## Requirements 50 | 51 | `SLARAC`, `QRBS`, and `LASAR` require numpy and sklearn. These requirements are listed in the [requirements.txt](requirements.txt) and can be installed via `pip install -r requirements.txt`. 52 | 53 | `SELVAR` requires lapack/blas installed and the compilation of 54 | [selvarF.f](tidybench/selvarF.f) with [f2py](https://docs.scipy.org/doc/numpy/f2py/) 55 | (e.g. `f2py -llapack -c -m selvarF selvarF.f`). 56 | 57 | ## Who we are 58 | 59 | We are a team of PhD students and Postdocs that formed at the [Copenhagen Causality Lab (CoCaLa)](https://math.ku.dk/cocala) of the University of Copenhagen ([Martin E Jakobsen](https://www.math.ku.dk/english/research/spt/cocala/?pure=en/persons/410383), [Phillip B Mogensen](https://www.math.ku.dk/english/staff/?pure=en/persons/467826), [Lasse Petersen](https://www.math.ku.dk/english/research/spt/cocala/?pure=en/persons/433485), [Nikolaj Thams](https://nikolajthams.github.io/), [Gherardo Varando](https://gherardovarando.github.io/), [Sebastian Weichwald](https://sweichwald.de)) to participate in the C4C competition. 60 | -------------------------------------------------------------------------------- /examples/toy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tidybench 3 | 4 | 5 | if __name__ == "__main__": 6 | """ 7 | Generate time series data over three variables from a stable SVAR with 8 | the following structure: 9 | """ 10 | # --> X_1(t-1) --> X_1(t) --> X_1(t+1) --> 11 | # \ \ 12 | # \ \ 13 | # \ \ 14 | # \ \ 15 | # v v 16 | # --> X_2(t-1) --> X_2(t) --> X_2(t+1) --> 17 | # \ ^ \ ^ 18 | # \ / \ / 19 | # \ / 20 | # / \ / \ 21 | # / v / v 22 | # --> X_3(t-1) --> X_3(t) --> X_3(t+1) --> 23 | B = np.asarray([[1, 2, 0], 24 | [0, 1, 1], 25 | [0, 2, 1]]) / 3 26 | T, d = 500, 3 27 | X = np.random.randn(T, d) 28 | for t in range(1, T): 29 | X[t, :] += B.T.dot(X[t-1, :]) 30 | 31 | # The true adjacency matrix is 32 | A = B > 0 33 | print('True adjacency matrix:') 34 | print(A) 35 | 36 | print('Score matrix for the adjacency matrix as inferred by ' 37 | 'slarac (post_standardised):') 38 | print(tidybench.slarac(X, post_standardise=True).round(2)) 39 | 40 | print('Score matrix for the adjacency matrix as inferred by ' 41 | 'qrbs (post_standardised):') 42 | print(tidybench.qrbs(X, post_standardise=True).round(2)) 43 | 44 | print('Score matrix for the adjacency matrix as inferred by ' 45 | 'lasar (post_standardised):') 46 | print(tidybench.lasar(X, post_standardise=True).round(2)) 47 | 48 | print('Score matrix for the adjacency matrix as inferred by ' 49 | 'selvar (post_standardised):') 50 | print(tidybench.selvar(X, post_standardise=True).round(2)) 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.8.2 2 | scikit-learn>=0.17 3 | -------------------------------------------------------------------------------- /tidybench/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "lasar", 3 | "qrbs", 4 | "selvar", 5 | "slarac", 6 | ] 7 | 8 | 9 | from .lasar import lasar 10 | from .qrbs import qrbs 11 | from .selvar import selvar 12 | from .slarac import slarac 13 | -------------------------------------------------------------------------------- /tidybench/lasar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements the LASAR (LASso Auto-Regression) algorithm. 3 | 4 | Based on an implementation that is originally due to Sebastian Weichwald 5 | (sweichwald). 6 | """ 7 | 8 | 9 | import numpy as np 10 | from sklearn.linear_model import LassoLarsCV 11 | from sklearn.utils import resample 12 | from .utils import common_pre_post_processing 13 | 14 | 15 | INV_GOLDEN_RATIO = 2 / (1 + np.sqrt(5)) 16 | 17 | 18 | @common_pre_post_processing 19 | def lasar(data, 20 | maxlags=1, 21 | n_subsamples=100, 22 | subsample_sizes=[INV_GOLDEN_RATIO**(1 / k) for k in [1, 2, 3, 6]], 23 | cv=5, 24 | aggregate_lags=lambda x: x.max(axis=1).T, 25 | ): 26 | """LASAR (LASso Auto-Regression). 27 | 28 | Parameters 29 | ---------- 30 | data : ndarray 31 | T (timepoints) x N (variables) input data 32 | 33 | maxlags : int 34 | Maximum number of lags to consider 35 | 36 | n_subsamples : int 37 | How often to subsample the data 38 | 39 | subsample_sizes : ndarray 40 | Possible sizes of the subsamples as fractions 41 | 42 | cv : int 43 | Number of cross-validation folds for the lasso variable selection step 44 | 45 | aggregate_lags : function 46 | Function that takes an N (to) x maxlags x N (from) ndarray as input and 47 | outputs an N x N ndarray aggregating the lag-resolved scores, 48 | for example 49 | lambda x: x.max(axis=1).T 50 | or 51 | lambda x: x.sum(axis=1).T 52 | 53 | Arguments for the common pre-processing steps of the data and the common 54 | post-processing steps of the scores are documented in 55 | utils.common_pre_post_processing 56 | 57 | Returns 58 | ---------- 59 | scores : ndarray 60 | Array where the (i,j)th entry corresponds to the link X_i --> X_j 61 | """ 62 | 63 | # T timepoints, N variables 64 | T, N = data.shape 65 | 66 | # Obtain absolute regression coefficients after refitting on a cross- 67 | # validated variable selection obtained by lasso regression on the entire 68 | # data set and random subsamples 69 | scores = np.abs(lassovar(data, maxlags, cv=cv)) 70 | for subsample_size in np.random.choice(subsample_sizes, n_subsamples): 71 | n_samples = int(np.round(subsample_size * T)) 72 | scores += np.abs(lassovar(data, maxlags, n_samples=n_samples, cv=cv)) 73 | 74 | # Divide the sum to obtain the average 75 | scores /= (n_subsamples + 1) 76 | 77 | # aggregate lagged coefficients to square connectivity matrix 78 | scores = aggregate_lags(scores.reshape(N, -1, N)) 79 | return scores 80 | 81 | 82 | def lassovar(data, maxlags=1, n_samples=None, cv=5): 83 | # Stack data to perform regression of present on past values 84 | Y = data.T[:, maxlags:] 85 | d = Y.shape[0] 86 | Z = np.vstack([data.T[:, maxlags - k:-k] 87 | for k in range(1, maxlags + 1)]) 88 | Y, Z = Y.T, Z.T 89 | 90 | # Subsample data 91 | if n_samples is not None: 92 | Y, Z = resample(Y, Z, n_samples=n_samples) 93 | 94 | scores = np.zeros((d, d * maxlags)) 95 | 96 | ls = LassoLarsCV(cv=cv, n_jobs=1) 97 | 98 | residuals = np.zeros(Y.shape) 99 | 100 | # Consider one variable after the other as target 101 | for j in range(d): 102 | target = np.copy(Y[:, j]) 103 | selectedparents = np.full(d * maxlags, False) 104 | # Include one lag after the other 105 | for l in range(1, maxlags + 1): 106 | ind_a = d * (l - 1) 107 | ind_b = d * l 108 | ls.fit(Z[:, ind_a:ind_b], target) 109 | selectedparents[ind_a:ind_b] = ls.coef_ > 0 110 | target -= ls.predict(Z[:, ind_a:ind_b]) 111 | 112 | residuals[:, j] = np.copy(target) 113 | 114 | # Refit OLS using the selected variables to get rid of the bias 115 | ZZ = Z[:, selectedparents] 116 | B = np.linalg.lstsq(ZZ.T.dot(ZZ), ZZ.T.dot(Y[:, j]), rcond=None)[0] 117 | scores[j, selectedparents] = B 118 | 119 | return scores 120 | -------------------------------------------------------------------------------- /tidybench/qrbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements the QRBS (Quantiles of Ridge regressed Bootstrap Samples) algorithm. 3 | 4 | Based on an implementation that is originally due to Nikolaj Thams 5 | (nikolajthams). 6 | """ 7 | 8 | 9 | import numpy as np 10 | from sklearn.linear_model import Ridge 11 | from sklearn.utils import resample 12 | from .utils import common_pre_post_processing 13 | 14 | 15 | @common_pre_post_processing 16 | def qrbs(data, 17 | lags=1, 18 | alpha=.005, 19 | q=.75, 20 | n_resamples=600, 21 | ): 22 | """ 23 | Perform bootstrapped ridge regression of data at time t on data in the past 24 | 25 | Parameters 26 | ---------- 27 | data : ndarray 28 | T (timepoints) x N (variables) input data 29 | 30 | lags : int 31 | Number of lags to include in the modelling 32 | 33 | alpha : double 34 | Penalization parameter used for the ridge regression 35 | 36 | q : double 37 | The method performs 200 bootstrap samples, in each fitting a ridge 38 | regression on a random subset of the data. This gives 200 estimates 39 | of the effect i -> j. 40 | We take the q'th quantile as the final estimate. 41 | q = 1 corresponds to the max effect across samples, q = 0.5 to the 42 | median effect. 43 | 44 | n_resamples : int 45 | Number of bootstrap samples drawn 46 | 47 | Arguments for the common pre-processing steps of the data and the common 48 | post-processing steps of the scores are documented in 49 | utils.common_pre_post_processing 50 | 51 | Returns 52 | ---------- 53 | scores : ndarray 54 | Array with scores for each link i -> j 55 | """ 56 | 57 | # We regress y = data_t on X = data_[t-1, ..., t-lags] 58 | y = np.diff(data, axis=0)[lags-1:] 59 | X = np.concatenate([data[lag:-(lags-lag)] 60 | for lag in np.flip(np.arange(lags))], axis=1) 61 | 62 | # Initiate ridge regressor 63 | ls = Ridge(alpha) 64 | 65 | # Bootstrap fit lasso coefficients 66 | k = int(np.floor(data.shape[0]*0.7)) 67 | results = np.stack([ 68 | ls.fit(*resample(X, y, n_samples=k)).coef_ 69 | for _ in range(n_resamples)]) 70 | 71 | # Aggregate lags by taking abs and summing 72 | results = np.abs( 73 | results.reshape(n_resamples, y.shape[1], lags, -1)).sum(axis=2) 74 | scores = np.quantile(results, q, axis=0) 75 | # Return transposed scores because ridge default beta*X means you can read 76 | # parents by row. Instead by transposing, the parents of i are in column i 77 | return scores.T 78 | -------------------------------------------------------------------------------- /tidybench/selvar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements the SELVAR (Selective auto-regressive model) algorithm. 3 | 4 | Based on an implementation that is originally due to Gherardo Varando 5 | (gherardovarando). 6 | """ 7 | 8 | from .utils import common_pre_post_processing 9 | ### 10 | # compile selvarF.f with: 11 | # f2py -llapack -c -m selvarF selvarF.f 12 | ### 13 | try: 14 | from .selvarF import slvar, gtstat, gtcoef 15 | except ImportError: 16 | slvar = None 17 | gtstat = None 18 | gtcoef = None 19 | 20 | 21 | @common_pre_post_processing 22 | def selvar(data, 23 | maxlags=1, 24 | batchsize=-1, 25 | mxitr=-1, 26 | trace=0, 27 | ): 28 | """ 29 | SELVAR (Selective auto-regressive model). 30 | 31 | Parameters 32 | ---------- 33 | data : ndarray 34 | T (timepoints) x N (variables) input data 35 | 36 | maxlags : int 37 | Maximum number of lags to include in the model. 38 | If maxlags < 0 then the maximum lag will be iteratively 39 | adjusted for each variable until no decrease in PRSS. 40 | 41 | batchsize : int 42 | Number of consecutive time points to use in each training batch. 43 | If batchsize < 0 then batchsize is set to the maximum available 44 | time boints given maxlags. 45 | 46 | mxitr : int 47 | Maximum number of iterations (each variable) for the 48 | hill-climbing search. If mxitr < 0 then the 49 | search will stop only when no decrease in PRSS is possible. 50 | 51 | trace : int 52 | If positive messages will be printed out during the search. 53 | 54 | Arguments for the common pre-processing steps of the data and the common 55 | post-processing steps of the scores are documented in 56 | utils.common_pre_post_processing 57 | 58 | Returns 59 | ---------- 60 | scores : ndarray 61 | Array with scores for each link i -> j 62 | """ 63 | 64 | if slvar is None: 65 | raise RuntimeError("selvarF.f is not yet compiled") 66 | 67 | scores, lags, info = slvar(data, bs=int(batchsize), ml=int(maxlags), 68 | mxitr=int(mxitr), trc=int(trace)) 69 | 70 | return scores 71 | -------------------------------------------------------------------------------- /tidybench/selvarF.f: -------------------------------------------------------------------------------- 1 | c Implements the SELVAR (Selective auto-regressive model) algorithm. 2 | c 3 | c Based on an implementation that is originally due to Gherardo Varando (gherardovarando). 4 | c 5 | SUBROUTINE SLVAR(T, N, X, BS, ML, MXITR, B, A, INFO, TRC) 6 | c Select structure and lags of a (local) VAR model 7 | c Gherardo Varando (2019) 8 | INTEGER T, N, BS, ML, MXITR, A(N,N), INFO, TRC 9 | DOUBLE PRECISION X(T,N), B(N,N) 10 | Cf2py integer, intent(in) n 11 | Cf2py integerm, intent(in) t 12 | Cf2py intent(in) x 13 | Cf2py integer, optional, intent(in):: bs = -1 14 | Cf2py integer, optional, intent(in) :: ml = -1 15 | Cf2py integer, optional, intent(in) :: mxitr = -1 16 | Cf2py optional, intent(in) :: lmb = 0 17 | Cf2py intent(out) b 18 | Cf2py intent(out) a 19 | Cf2py intent(out) info 20 | Cf2py integer, optional, intent(in) :: trc = 1 21 | c ON ENTRY 22 | c T integer 23 | c number of time points 24 | c N integer 25 | c number of variables 26 | c X double precision (T, N) 27 | c matrix of observations 28 | c BS integer 29 | c size of the batch 30 | c * IF BS .GT. 0 batch size = BS 31 | c * IF BS .LT. 0 batch size = T / -BS 32 | c * IF BS .EQ. 0 33 | c ML integer 34 | c maximum time lag 35 | c * IF ML .GT. 0 maximum time lag = ML 36 | c * IF ML .LT. 0 maximum time lag is searched iteratively 37 | c MXITR integer 38 | c maximum number of iterations in hc 39 | c * IF MXITR .EQ. 0 perform no search and just score edges 40 | c * IF MXITR .LT. 0 THEN MXITR = INF 41 | c B double precision (N,N) 42 | c empty matrix 43 | c A integer (N,N) 44 | c empty matrix for return 45 | c TRC integer 46 | c IF TRC .GT. 0 print trace information 47 | c ON RETURN 48 | c B double precision (N, N) 49 | c matrix of scores 50 | c A integer (N,N) 51 | c matrix of estimated lags 52 | c INFO integer 53 | c information on errors 54 | c DETAILS 55 | c 56 | c SUBROUTINES 57 | c * GTPRSS 58 | c * GTCOEF 59 | c * GTRSS 60 | c * DGELS from LAPACK 61 | c * DORGQR from LAPACK 62 | c INTERNAL VARIABLES 63 | INTEGER I, J, K, FLG, ITR, TMP, IBST, KBST, ITML 64 | DOUBLE PRECISION XX(T, N + 1), YY(T, 1), 65 | * WK(2*T*N), 66 | * SCR, NWSCR, TMPSCR 67 | SCR = 0.0 68 | NWSCR = 0.0 69 | FLG = 0 70 | ITR = 0 71 | ITML = 0 72 | IF (ML .LT. 1) ITML = 1 73 | IF (ML .GE. T .OR. ML .LT. 1) ML = 1 74 | IF (BS .LT. 0) BS = (T - ML) / (-BS) 75 | IF (BS .GT. T - ML) BS = T - ML 76 | IF (MXITR .EQ. 0) GOTO 100 77 | c print parameters 78 | IF (TRC .GT. 10) THEN 79 | WRITE(*,*) BS, ML 80 | ENDIF 81 | DO 20 J = 1,N 82 | DO 10 I = 1,N 83 | c initialize the empty graph 84 | A(I,J) = 0 85 | 10 CONTINUE 86 | 20 CONTINUE 87 | DO 50 J=1,N 88 | ITR = 0 89 | IF (ITML .GT. 0) ML = 1 90 | c compute initial score (prss) for j 91 | CALL GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK, 92 | * SCR, INFO) 93 | c hill-climb search start for j 94 | 500 CONTINUE 95 | c increase iteration counter 96 | ITR = ITR + 1 97 | FLG = 0 98 | TMPSCR = SCR 99 | IBST = -1 100 | DO 40 K= 0,ML 101 | DO 30 I=1,N 102 | TMP = A(I,J) 103 | IF (K .NE. TMP) THEN 104 | A(I,J) = K 105 | CALL GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK, 106 | * NWSCR, INFO) 107 | IF (NWSCR .GE. 0 .AND. NWSCR .LT. TMPSCR) THEN 108 | TMPSCR = NWSCR 109 | IBST = I 110 | KBST = K 111 | ENDIF 112 | A(I,J) = TMP 113 | ENDIF 114 | 30 CONTINUE 115 | 40 CONTINUE 116 | IF (IBST .GT. 0) THEN 117 | A(IBST, J) = KBST 118 | FLG = 1 119 | SCR = TMPSCR 120 | IF (TRC .GT. 0) THEN 121 | WRITE(*,"(a,a5,i3,a5,i3,a,i3,a5,i3)", ADVANCE = "NO") 122 | * char(13), "ITER:", ITR, 123 | * " ADD ", IBST,"-",J, 124 | * " LAG=", KBST 125 | ENDIF 126 | ENDIF 127 | IF (ITML .GT. 0) ML = MIN(ML + 1, T / 2) 128 | IF ((MXITR .LT. 0 .OR. ITR .LT. MXITR) .AND. FLG .GT. 0) GOTO 500 129 | 50 CONTINUE 130 | 100 CONTINUE 131 | CALL GTCOEF(T, N, X, ML, BS, A, "ABS", 0, XX, YY, WK, B, 132 | * INFO) 133 | MXITR = ITR 134 | RETURN 135 | c last line of SLVAR 136 | END 137 | c 138 | c 139 | SUBROUTINE GTPRSS(T, N, X, ML, BS, A, J, XX, YY, WK, SCR, 140 | * INFO) 141 | c Get average Predicted RSS for a given variable 142 | c Gherardo Varando (2019) 143 | INTEGER T, N, J, ML, A(N,N), INFO, BS 144 | DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 145 | * WK(2*T*N), SCR 146 | Cf2py intent(in) n 147 | Cf2py intent(in) t 148 | Cf2py intent(in) x 149 | Cf2py intent(in) bs 150 | Cf2py intent(in) ml 151 | Cf2py intent(in) a 152 | Cf2py intent(in) j 153 | Cf2py optional, intent(cache) :: xx = array((t,n+1)) 154 | Cf2py optional, intent(cache) :: yy = array((t,1)) 155 | Cf2py optional, intent(cache) :: wk = array(2*t*n) 156 | Cf2py intent(out) scr 157 | Cf2py intent(out) info 158 | c 159 | c ON ENTRY 160 | c T, N, ML, BS, X, A as in DSELVAR 161 | c XX, YY, WK working variables 162 | c ON RETURN 163 | c SCR the computed average PRSS 164 | c INFO 165 | c INTERNAL VARIABLES 166 | INTEGER NF, NV, TT, I, K 167 | DOUBLE PRECISION TMP, TMPY 168 | IF (ML .GE. T .OR. ML .LT. 1) ML = 1 169 | IF (BS .LT. 0) BS = (T - ML) / (-BS) 170 | IF (BS .GT. T - ML) BS = T - ML 171 | SCR = 0.0 172 | NF = (T - ML) / BS 173 | DO 100 K = 1, NF 174 | NV = 1 175 | DO 5 TT = 1, BS 176 | XX(TT, NV) = 1 177 | YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 178 | 5 CONTINUE 179 | DO 20 I = 1, N 180 | IF (A(I,J) .GT. 0) THEN 181 | NV = NV + 1 182 | IF (NV .GT. BS) THEN 183 | SCR = -1 184 | GOTO 110 185 | ENDIF 186 | DO 10 TT = 1, BS 187 | XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I) 188 | 10 CONTINUE 189 | ENDIF 190 | 20 CONTINUE 191 | CALL DGELS("N", BS, NV, 1, XX, T, 192 | * YY, T, WK, 2*T*N, INFO) 193 | IF (INFO .NE. 0) GOTO 110 194 | c compute predictive sum of squares, 195 | CALL DORGQR(NV, NV, NV, XX, T, WK(1), 196 | * WK(NV + 1), 2*T*N - NV, INFO) 197 | DO 80 TT = 1, BS 198 | TMPY = X(TT + ML + (K-1)*BS, J) - YY(1,1) 199 | NV = 1 200 | TMP = XX(TT, 1) ** 2 201 | DO 70 I = 1,N 202 | IF (A(I,J) .GT. 0) THEN 203 | NV = NV + 1 204 | TMP = TMP + (XX(TT,NV) ** 2) 205 | TMPY = TMPY - 206 | * (X(TT + ML - A(I,J) + (K-1)*BS, I) * YY(NV,1)) 207 | ENDIF 208 | 70 CONTINUE 209 | SCR = SCR + (((TMPY) / (1 - TMP)) ** 2) 210 | 80 CONTINUE 211 | 100 CONTINUE 212 | 110 CONTINUE 213 | RETURN 214 | END 215 | c 216 | c 217 | SUBROUTINE GTCOEF(T, N, X, ML, BS, A,JOB,NRM, XX, YY, WK, B, 218 | * INFO) 219 | c Get average (and sqared or absolute) coefficients 220 | c coefficients can be normalized by 221 | c b(i,J) = b(i,j) / sqrt(b(i,j)^2 + v(j) / v(i)) 222 | c where b(i,j) is the coefficient of x(,i) in the regression of 223 | c x(,j) and v(i) is the varaince of the residuals for x(,i) 224 | c Gherardo Varando (2019) 225 | INTEGER T, N, ML, BS, A(N,N), INFO 226 | DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 227 | * WK(2*T*N), B(N,N) 228 | CHARACTER JOB*3 229 | Cf2py intent(in) t 230 | Cf2py intent(in) n 231 | Cf2py intent(in) ml 232 | Cf2py intent(in) bs 233 | Cf2py intent(in) x 234 | Cf2py intent(in) a 235 | Cf2py optional, intent(in) job 236 | Cf2py optional, intent(in) :: nrm = 0 237 | Cf2py optional, intent(cache) :: xx=array((t,n+1)) 238 | Cf2py optional, intent(cache) :: yy=array((t,n+1)) 239 | Cf2py optional, intent(cache) :: wk=array((2*t*n)) 240 | Cf2py intent(out) b 241 | Cf2py intent(out) info 242 | c 243 | c ON ENTRY 244 | c T, N, ML, BS, X, A as in SLVAR 245 | c JOB character 246 | c IF JOB .EQ. "ABS" the average absolute coefficients 247 | c IF JOB .EQ. "SQR" the average square coefficients 248 | c ELSE the average coefficients 249 | c NRM integer 250 | c IF NRM .GT. 0 normalize the coefficient 251 | c XX, YY, WK working variables 252 | c ON RETURN 253 | c B the computed average coefficients 254 | c INFO 255 | c INTERNAL VARIABLES 256 | INTEGER NF, NV, TT, I, K, J 257 | DOUBLE PRECISION V(N) 258 | IF (ML .GE. T .OR. ML .LT. 1) ML = 1 259 | IF (BS .LT. 0) BS = (T - ML) / (-BS) 260 | IF (BS .GT. T - ML) BS = T - ML 261 | NF = (T - ML) / BS 262 | DO 200 J = 1, N 263 | V(J) = 0 264 | DO 1 I = 1,N 265 | B(I,J) = 0 266 | 1 CONTINUE 267 | DO 100 K = 1, NF 268 | NV = 1 269 | DO 5 TT = 1, BS 270 | XX(TT, NV) = 1 271 | YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 272 | 5 CONTINUE 273 | DO 20 I = 1, N 274 | IF (A(I,J) .GT. 0) THEN 275 | NV = NV + 1 276 | DO 10 TT = 1, BS 277 | XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I) 278 | 10 CONTINUE 279 | ENDIF 280 | 20 CONTINUE 281 | CALL DGELS("N", BS, NV, 1, XX, T, 282 | * YY, T, WK, 2*T*N, INFO) 283 | IF (INFO .NE. 0) GOTO 100 284 | DO 30 I=NV+1, BS 285 | V(J) = V(J) + YY(I,1) ** 2 / (BS * NF) 286 | 30 CONTINUE 287 | NV = 1 288 | DO 40 I = 1, N 289 | IF (A(I,J) .GT. 0) THEN 290 | NV = NV + 1 291 | IF (JOB .EQ. "ABS") THEN 292 | B(I,J) = B(I,J) + (ABS(YY(NV,1)) / NF) 293 | ELSEIF (JOB .EQ. "SQR") THEN 294 | B(I,J) = B(I,J) + (YY(NV,1)**2)/NF 295 | ELSE 296 | B(I,J) = B(I,J) + YY(NV,1)/NF 297 | ENDIF 298 | ENDIF 299 | 40 CONTINUE 300 | 100 CONTINUE 301 | 200 CONTINUE 302 | IF (NRM .GT. 0) THEN 303 | DO 300 J = 1,N 304 | DO 250 I = 1,N 305 | B(I,J) = B(I,J) / SQRT( B(I,J)**2 + V(J)/V(I)) 306 | 250 CONTINUE 307 | 300 CONTINUE 308 | ENDIF 309 | RETURN 310 | END 311 | c 312 | c 313 | SUBROUTINE GTRSS(T, N, X, ML, BS, A, J, XX, YY, WK, SCR, 314 | * INFO) 315 | c get average residuals sum of squares for variable j 316 | c Gherardo Varando (2019) 317 | INTEGER T, N, J, ML, A(N,N), INFO, BS 318 | DOUBLE PRECISION X(T,N), XX(T, N + 1), YY(T, 1), 319 | * WK(2*T*N), SCR 320 | c 321 | Cf2py intent(in) t 322 | Cf2py intent(in) n 323 | Cf2py intent(in) ml 324 | Cf2py intent(in) bs 325 | Cf2py intent(in) x 326 | Cf2py intent(in) a 327 | Cf2py intent(in) j 328 | Cf2py optional, intent(cache) :: xx=array((t,n+1)) 329 | Cf2py optional, intent(cache) :: yy = array((t,1)) 330 | Cf2py optional, intent(cache) :: wk = array(2*t*n) 331 | Cf2py intent(out) scr 332 | Cf2py intent(out) info 333 | c 334 | c ON ENTRY 335 | c T, N, ML, BS, X, A as in SLVAR 336 | c XX, YY, WK working variables 337 | c J INTEGER the variable to consider 338 | c ON RETURN 339 | c SCR the computed RSS for variable J 340 | c INFO 341 | c INTERNAL VARIABLES 342 | INTEGER NF, NV, TT, I, K 343 | IF (ML .GE. T .OR. ML .LT. 1) ML = 1 344 | IF (BS .LT. 0) BS = (T - ML) / (-BS) 345 | IF (BS .GT. T - ML) BS = T - ML 346 | SCR = 0.0 347 | NF = (T - ML) / BS 348 | DO 100 K = 1, NF 349 | NV = 1 350 | DO 5 TT = 1, BS 351 | XX(TT, NV) = 1 352 | YY(TT, 1) = X(TT + ML + (K-1) * BS, J) 353 | 5 CONTINUE 354 | DO 20 I = 1, N 355 | IF (A(I,J) .GT. 0) THEN 356 | NV = NV + 1 357 | DO 10 TT = 1, BS 358 | XX(TT, NV) = X(TT + ML - A(I,J) + (K - 1)*BS, I) 359 | 10 CONTINUE 360 | ENDIF 361 | 20 CONTINUE 362 | CALL DGELS("N", BS, NV, 1, XX, T, 363 | * YY, T, WK, 2*T*N, INFO) 364 | IF (INFO .NE. 0) GOTO 100 365 | c compute RSS, 366 | DO 30 TT = NV+1, BS 367 | SCR = SCR + (YY(TT,1) ** 2) 368 | 30 CONTINUE 369 | SCR = SCR 370 | 100 CONTINUE 371 | SCR = SCR / (NF * BS) 372 | RETURN 373 | END 374 | c 375 | c 376 | c 377 | SUBROUTINE GTSTAT(T, N, X, BS, ML, A, JOB, XX, YY, WK, B, DF) 378 | c Obtain the log likelihood-ratio statistics, the f-statistics 379 | c or the difference of residuals for each edge 380 | c Gherardo Varando (2019) 381 | INTEGER T, N, BS, ML, INFO, A(N,N), DF(N, 2) 382 | DOUBLE PRECISION X(T,N), XX(T,N + 1), YY(T,1),WK(2*T*N), B(N,N) 383 | CHARACTER JOB*2 384 | Cf2py intent(in) t 385 | Cf2py intent(in) n 386 | Cf2py intent(in) ml 387 | Cf2py intent(in) bs 388 | Cf2py intent(in) x 389 | Cf2py intent(in) a 390 | Cf2py intent(in) job 391 | Cf2py optional, intent(cache) :: xx = array((t,n)) 392 | Cf2py optional, intent(cache) :: yy = array((t,1)) 393 | Cf2py optional, intent(cache) :: wk = array(2*t*n) 394 | Cf2py intent(out) b 395 | Cf2py intent(out) df 396 | c 397 | c ON ENTRY 398 | c T, N, ML, BS, X, A as in SLVAR 399 | c XX, YY, WK working variables 400 | c JOB CHARACHTER*2 401 | c IF "LR" the logarithm of the likelihood-ratio 402 | c IF "FS" the F-statistic 403 | c IF "ÐF" the difference of RSS 404 | c ON RETURN 405 | c B DOUBLE PRECISION(N,N) 406 | c the requested statistics 407 | c DF DOUBLE PRECISION(N,2) 408 | c values to obtain degrees of freedom 409 | c INTERNAL VARIABLES 410 | INTEGER I,J,TMP 411 | DOUBLE PRECISION SCR,NWSCR 412 | IF (ML .LT. 1) THEN 413 | DO 10 J = 1,N 414 | DO 5 I = 1,N 415 | ML = MAX(ML,A(I,J)) 416 | 5 CONTINUE 417 | 10 CONTINUE 418 | ENDIF 419 | IF (ML .GE. T .OR. ML .LT. 1) ML = 1 420 | IF (BS .LT. 0) BS = (T - ML) / (-BS) 421 | IF (BS .GT. T - ML) BS = T - ML 422 | NF = (T - ML) / BS 423 | DO 60 J = 1,N 424 | DF(J, 1) = 0 425 | DF(J, 2) = 0 426 | c get rss for variable j 427 | CALL GTRSS(T, N, X, ML,BS, A, J, XX, YY, WK, 428 | * SCR, INFO) 429 | DO 55 I=1,N 430 | B(I,J) = 0 431 | IF (A(I,J) .GT. 0) THEN 432 | c add one parameter for each batch 433 | DF(J, 1) = DF(J, 1) + NF 434 | c remove one edge from matrix a 435 | TMP = A(I,J) 436 | A(I,J) = 0 437 | c compute new score 438 | CALL GTRSS(T, N, X, ML, BS, A, J, XX, YY, WK, 439 | * NWSCR, INFO) 440 | c restore matrix a 441 | A(I,J) = TMP 442 | c store relevant statistic 443 | IF (JOB .EQ. "FS") B(I,J) = (NWSCR - SCR) / SCR 444 | IF (JOB .EQ. "LR") B(I,J) = (LOG(NWSCR) - LOG(SCR)) 445 | * * NF * BS 446 | IF (JOB .EQ. "DF") B(I,J) = NWSCR - SCR 447 | ENDIF 448 | 55 CONTINUE 449 | DF(J,2) = DF(J,1) - NF 450 | 60 CONTINUE 451 | c for f-statistics, finish computing and store df 452 | IF (JOB .EQ. "FS") THEN 453 | DO 70 J = 1,N 454 | DF(J,2) = BS*NF - DF(J,1) 455 | DF(J,1) = NF 456 | DO 65 I = 1,N 457 | B(I,J) = B(I,J) * DF(J,2) 458 | 65 CONTINUE 459 | 70 CONTINUE 460 | ENDIF 461 | RETURN 462 | END 463 | -------------------------------------------------------------------------------- /tidybench/slarac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements the SLARAC (Subsampled Linear Auto-Regression Absolute 3 | Coefficients) algorithm. 4 | 5 | Based on an implementation that is originally due to Sebastian Weichwald 6 | (sweichwald). 7 | """ 8 | 9 | 10 | import numpy as np 11 | from sklearn.utils import resample 12 | from .utils import common_pre_post_processing 13 | 14 | 15 | INV_GOLDEN_RATIO = 2 / (1 + np.sqrt(5)) 16 | 17 | 18 | @common_pre_post_processing 19 | def slarac(data, 20 | maxlags=1, 21 | n_subsamples=200, 22 | subsample_sizes=[INV_GOLDEN_RATIO**(1 / k) for k in [1, 2, 3, 6]], 23 | missing_values=None, 24 | aggregate_lags=lambda x: x.max(axis=1).T, 25 | ): 26 | """SLARAC (Subsampled Linear Auto-Regression Absolute Coefficients). 27 | 28 | Parameters 29 | ---------- 30 | data : ndarray 31 | T (timepoints) x N (variables) input data 32 | 33 | maxlags : int 34 | Maximum number of lags to consider 35 | 36 | n_subsamples : int 37 | How often to subsample the data 38 | 39 | subsample_sizes : ndarray 40 | Possible sizes of the subsamples as fractions 41 | 42 | missing_values : float 43 | Values in the data matrix that equal missing_values are treated as 44 | missing values 45 | 46 | aggregate_lags : function 47 | Function that takes an N (to) x maxlags x N (from) ndarray as input and 48 | outputs an N x N ndarray aggregating the lag-resolved scores, 49 | for example 50 | lambda x: x.max(axis=1).T 51 | or 52 | lambda x: x.sum(axis=1).T 53 | 54 | Arguments for the common pre-processing steps of the data and the common 55 | post-processing steps of the scores are documented in 56 | utils.common_pre_post_processing 57 | 58 | Returns 59 | ---------- 60 | scores : ndarray 61 | Array where the (i,j)th entry corresponds to the link X_i --> X_j 62 | """ 63 | 64 | # T timepoints, N variables 65 | T, N = data.shape 66 | 67 | # Obtain absolute regression coefficients on the entire data set and 68 | # random subsamples 69 | scores = np.abs(varmodel(data, maxlags, missing_values=missing_values)) 70 | for subsample_size in np.random.choice(subsample_sizes, n_subsamples): 71 | n_samples = int(np.round(subsample_size * T)) 72 | scores += np.abs(varmodel( 73 | data, maxlags, n_samples=n_samples, missing_values=missing_values)) 74 | 75 | # Drop the intercepts and divide the sum to obtain the average 76 | scores = scores[:, 1:] / (n_subsamples + 1) 77 | 78 | # Aggregate lagged coefficients to square connectivity matrix 79 | scores = aggregate_lags(scores.reshape(N, -1, N)) 80 | return scores 81 | 82 | 83 | def varmodel(data, maxlags=1, n_samples=None, missing_values=None): 84 | # Stack data to perform regression of present on past values 85 | Y = data.T[:, maxlags:] 86 | d = Y.shape[0] 87 | Z = np.vstack([np.ones((1, Y.shape[1]))] + 88 | [data.T[:, maxlags - k:-k] 89 | for k in range(1, maxlags + 1)]) 90 | 91 | # Subsample data 92 | if n_samples is not None: 93 | Y, Z = resample(Y.T, Z.T, n_samples=n_samples) 94 | Y, Z = Y.T, Z.T 95 | 96 | # Missing value treatment 97 | if missing_values is not None: 98 | keepinds = (np.sum(Y == missing_values, axis=0) 99 | + np.sum(Z == missing_values, axis=0)) == 0 100 | Y = Y[:, keepinds] 101 | Z = Z[:, keepinds] 102 | 103 | # Heuristic to determine a feasible number of lags 104 | feasiblelag = maxlags 105 | if Z.shape[1] / Z.shape[0] < INV_GOLDEN_RATIO: 106 | feasiblelag = int(np.floor( 107 | (Z.shape[1] / INV_GOLDEN_RATIO - 1) / d)) 108 | # Choose a random effective lag that is feasible and <= maxlag 109 | efflag = np.random.choice(np.arange(1, max(maxlags, feasiblelag) + 1)) 110 | indcutoff = efflag * d + 1 111 | 112 | # Obtain linear regression coefficients 113 | B = np.zeros((d, maxlags * d + 1)) 114 | B[:, :indcutoff] = np.linalg.lstsq( 115 | Z[:indcutoff, :].dot(Z[:indcutoff, :].T), 116 | Z[:indcutoff, :].dot(Y.T), 117 | rcond=None)[0].T 118 | return B 119 | -------------------------------------------------------------------------------- /tidybench/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def common_pre_post_processing(func_raw): 5 | """ 6 | Used as decorator to add common pre-processing steps of the data (args[0]) 7 | and common post-processing steps of the scores (out[0]) to an algorithm. 8 | 9 | Pre-/post-processing steps are performed in this order, if activated. 10 | 11 | Pre-processing of the data 12 | pre_normalise: boolean; whether to normalise the data 13 | 14 | Post-processing of the scores 15 | post_standardise: boolean; whether to standardise the scores 16 | post_zeroonescaling: boolean; whether to scale the scores to [0, 1] 17 | post_edgeprior: boolean; whether to divide the scores by their mean 18 | (may be helpful for comparability of scores across datasets) 19 | """ 20 | def func(*args, **kwargs): 21 | pre_normalise = kwargs.pop("pre_normalise", False) 22 | 23 | post_standardise = kwargs.pop("post_standardise", False) 24 | post_zeroonescaling = kwargs.pop("post_zeroonescaling", False) 25 | post_edgeprior = kwargs.pop("post_edgeprior", False) 26 | 27 | # Pre-process the data 28 | if pre_normalise: 29 | args = list(args) 30 | args[0] = standardise(args[0]) 31 | args = tuple(args) 32 | 33 | # Call original algorithm 34 | out = func_raw(*args, **kwargs) 35 | 36 | # Post-process the scores (remaining outputs remain unchanged) 37 | if type(out) == tuple and len(out) > 1: 38 | scores = out[0] 39 | else: 40 | scores = out 41 | 42 | if post_standardise: 43 | scores = standardise(scores, axis=None) 44 | if post_zeroonescaling: 45 | scores = (scores - scores.min()) / (scores.max() - scores.min()) 46 | if post_edgeprior: 47 | scores /= scores.mean() 48 | 49 | if type(out) == tuple and len(out) > 1: 50 | out = list(out) 51 | out[0] = scores 52 | out = tuple(out) 53 | else: 54 | out = scores 55 | 56 | return out 57 | 58 | return func 59 | 60 | 61 | def standardise(X, axis=0, keepdims=True, copy=False): 62 | if copy: 63 | X = np.copy(X) 64 | X -= X.mean(axis=axis, keepdims=keepdims) 65 | X /= X.std(axis=axis, keepdims=keepdims) 66 | return X 67 | --------------------------------------------------------------------------------