├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── VERSION ├── abcpy ├── NN_utilities │ ├── __init__.py │ ├── algorithms.py │ ├── datasets.py │ ├── losses.py │ ├── networks.py │ ├── trainer.py │ └── utilities.py ├── __init__.py ├── acceptedparametersmanager.py ├── approx_lhd.py ├── backends │ ├── __init__.py │ ├── base.py │ ├── mpi.py │ ├── mpimanager.py │ └── spark.py ├── continuousmodels.py ├── discretemodels.py ├── distances.py ├── graphtools.py ├── inferences.py ├── jointapprox_lhd.py ├── jointdistances.py ├── modelselections.py ├── output.py ├── perturbationkernel.py ├── probabilisticmodels.py ├── statistics.py ├── statisticslearning.py ├── transformers.py └── utils.py ├── doc ├── Makefile ├── literature │ ├── DuttaS-ABCpy-PASC-2017.bib │ └── JSS_2021.bib └── source │ ├── ABC_rejection.png │ ├── DEVELOP.rst │ ├── Makefile │ ├── abcpy.rst │ ├── class-diagram.png │ ├── class-diagram.svg │ ├── conf.py │ ├── getting_started.rst │ ├── index.rst │ ├── installation.rst │ ├── network.pdf │ ├── network.png │ ├── network1.pdf │ ├── network1.png │ ├── parallelization.rst │ ├── postanalysis.rst │ ├── rtfd_requirements.txt │ └── user_customization.rst ├── environment.yml ├── examples ├── 1_getting_started.ipynb ├── 2_Rejection_ABC_closer_look.ipynb ├── approx_lhd │ ├── __init__.py │ ├── mcmc_hierarchical_models.py │ └── pmc_hierarchical_models.py ├── backends │ ├── README.md │ ├── apache_spark │ │ └── pmcabc_gaussian.py │ ├── dummy │ │ └── pmcabc_gaussian.py │ └── mpi │ │ ├── mpi_model_inferences.py │ │ └── pmcabc_gaussian.py ├── extensions │ ├── distances │ │ ├── __init__.py │ │ └── default_distance.py │ ├── models │ │ ├── README.md │ │ ├── __init__.py │ │ ├── gaussian_R │ │ │ ├── gaussian_model.R │ │ │ └── pmcabc_gaussian_model_simple.py │ │ ├── gaussian_cpp │ │ │ ├── Makefile │ │ │ ├── __init__.py │ │ │ ├── gaussian_model_simple.cpp │ │ │ ├── gaussian_model_simple.i │ │ │ └── pmcabc_gaussian_model_simple.py │ │ ├── gaussian_f90 │ │ │ ├── Makefile │ │ │ ├── gaussian_model_simple.f90 │ │ │ └── pmcabc_gaussian_model_simple.py │ │ └── gaussian_python │ │ │ ├── __init__.py │ │ │ └── pmcabc_gaussian_model_simple.py │ └── perturbationkernels │ │ ├── __init__.py │ │ ├── multivariate_normal_kernel.py │ │ └── pmcabc_perturbation_kernels.py ├── hierarchicalmodels │ ├── __init__.py │ └── pmcabc_inference_on_multiple_sets_of_obs.py ├── modelselection │ ├── __init__.py │ └── randomforest_modelselections.py └── statisticslearning │ ├── __init__.py │ ├── gaussian_statistics_learning_DrawFromPrior_reload_NNs.py │ ├── gaussian_statistics_learning_exponential_family.py │ └── pmcabc_gaussian_statistics_learning.py ├── requirements.txt ├── requirements ├── backend-mpi.txt ├── backend-spark.txt ├── coverage.txt └── neural_networks_requirements.txt ├── setup.cfg ├── setup.py └── tests ├── NN_utilities_networks_tests.py ├── NN_utilities_utilities_tests.py ├── __init__.py ├── acceptedparametersmanager_tests.py ├── approx_lhd_tests.py ├── backend_tests_mpi.py ├── backend_tests_mpi_model_mpi.py ├── continuousmodels_tests.py ├── discretemodels_tests.py ├── distances_tests.py ├── graphtools_tests.py ├── inferences_tests.py ├── jointapprox_lhd_tests.py ├── jointdistances_tests.py ├── modelselections_tests.py ├── output_tests.py ├── perturbationkernel_tests.py ├── pickle_tests.py ├── probabilisticmodels_tests.py ├── statistics_tests.py ├── statisticslearning_tests.py ├── test_examples.py ├── test_examples_mpi.py └── transformers_tests.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = abcpy 4 | 5 | [report] 6 | exclude_lines = 7 | if self.debug: 8 | pragma: no cover 9 | raise NotImplementedError 10 | if __name__ == .__main__.: 11 | if False: 12 | except ImportError: 13 | if not has_torch: 14 | ignore_errors = True 15 | omit = 16 | tests/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # vim backup files 92 | # 93 | **/.*.sw? 94 | 95 | .idea/ 96 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | language: python 3 | stages: 4 | - name: test 5 | - name: deploy 6 | addons: 7 | apt: 8 | sources: 9 | - deadsnakes 10 | packages: 11 | - gfortran 12 | - libboost-random-dev 13 | - python3.7-dev 14 | - python3.9-dev 15 | - python3-numpy 16 | - swig 17 | - libmpich-dev 18 | - mpich 19 | jobs: 20 | include: 21 | # Unit tests on different python versions, all on Ubuntu 22 | - stage: test 23 | name: "Python 3.6" 24 | python: "3.6" 25 | env: 26 | - UNIT_TEST=true 27 | - name: "Python 3.7" 28 | python: "3.7" 29 | env: 30 | - UNIT_TEST=true 31 | - name: "Python 3.8" 32 | python: "3.8" 33 | env: 34 | - UNIT_TEST=true 35 | - name: "Python 3.9" 36 | python: "3.9" 37 | env: 38 | - UNIT_TEST=true 39 | # Test coverage and without Pytorch for a single version 40 | - name: "Coverage" 41 | python: "3.8" 42 | env: 43 | - COVERAGE=true 44 | - name: "No pytorch" 45 | python: "3.8" 46 | env: 47 | - NO_PYTORCH=true 48 | - UNIT_TEST=true 49 | - stage: deploy 50 | script: 51 | - echo "Required dummy override of default 'script' in .travis.yml." 52 | after_success: 53 | - echo "Required dummy override of default 'after_success' in .travis.yml." 54 | before_deploy: 55 | - make clean 56 | - mkdir dist 57 | deploy: 58 | - provider: pypi 59 | user: mschoengens 60 | password: 61 | secure: tLopsTVkRfraHb/T1qfNvXk4L3StqpqFTflK0iAq/V+WSdARy7PDccj3P13aDo+Qvd2XYDPTSIVTveTOSHj46oser7+OmqrUYH9jQt681bmJ5aooHhw+3+NHa+fVBxMgzvCqJ+4Gvbf+3eDKowXICfPlTj5UrEil7s1jv91bSIm0HdI+mLyg1YstHOGt0O2Y6QEDPyEVRmFtyq7hB7EPheUvaJAfEl70LxV9fHiOuuQNcp9pnGRO6t9Sx4NIfIPIYzSdBoLaMSwgjy6ua1wF4iyMdKaDhMSajYb2+fWY1iyDJnmFj0/olpYUiZTrfWfQqz2j+uGT/YbmfZmSCcBTQI9ixJCtawqExoZODSq34uzc+N61riXdLEMOroxMobeBhuNj+bykp1IKaE99vYL/q8ta5dID15MtWIjWbLDVYQTQPkJ7fWllyxqOVRwa2rN37QbCctPbKpCs7WvEE7mJAaWJuOprw0AYjd2IH76YULkzbk3nR/v1nwyM2hGTYjePAy6Ue9jPgfeu9jEWu23O4u7+KMa1+scuLRP4DB1nlMStixjAJdiPMIo4OrvAiC8+ocntgi8t9+Quu5N8deyr9nM1pvWQyaNuHt3Yxd5oey3Q5UMtQFRCl5IyQKMTKttBg2p2L4wd0RdfrLgJXWkw/s6SBIyylCCDROr5gMEnPfY= 62 | distributions: "sdist bdist_wheel" 63 | on: 64 | tags: true 65 | branch: master 66 | 67 | install: 68 | - sudo apt-get install -y r-base # install R for testing example 69 | - pip install cython 70 | - pip install rpy2 # install the rpy2 library for testing example with R 71 | - pip install -r requirements.txt 72 | - pip install -r requirements/backend-spark.txt 73 | - if [[ ! $NO_PYTORCH == true ]]; then pip install -r requirements/neural_networks_requirements.txt; fi; 74 | - if [[ $COVERAGE == true ]]; then pip install -r requirements/coverage.txt; fi; 75 | before_script: 76 | - python --version 77 | script: 78 | - if [[ $UNIT_TEST == true ]]; then make test; fi; 79 | - if [[ $COVERAGE == true ]]; then make coveragetest; fi; 80 | after_success: 81 | - if [[ $COVERAGE == true ]]; then bash <(curl -s https://codecov.io/bash); fi; 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, ABCpy Developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted (subject to the limitations in the disclaimer 6 | below) provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | * Neither the name of the copyright holder nor the names of its contributors may be used 16 | to endorse or promote products derived from this software without specific 17 | prior written permission. 18 | 19 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS 20 | LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 22 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 26 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 30 | DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include requirements/* 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=$(shell cat VERSION) 2 | UNITTESTS=$(shell find tests -type f -name '*_tests.py') 3 | MAKEDIRS=$(shell find examples -name Makefile -exec dirname {} \;) 4 | whl_file = abcpy-${VERSION}-py3-none-any.whl 5 | 6 | .DEFAULT: help 7 | .PHONY: help clean doc doctest exampletest exampletest_mpi package test uninstall unittest unittest_mpi install reinstall $(MAKEDIRS) 8 | 9 | help: 10 | @echo Targets are: clean, doc, doctest, exampletest, exampletest_mpi, package, uninstall, unittest, unittest_mpi, test 11 | 12 | clean: 13 | find . -name "*.pyc" -type f -delete 14 | find . -name "*.pkl" -type f -delete 15 | find . -name "__pycache__" -delete 16 | find . -name ".#*" -delete 17 | find . -name "#*#" -delete 18 | 19 | $(MAKEDIRS): 20 | make -C $@ 21 | 22 | # testing 23 | 24 | test: unittest unittest_mpi exampletest exampletest_mpi doctest 25 | 26 | unittest: 27 | @echo "Running standard unit tests.." 28 | python3 -m unittest discover -s tests -v -p "*_tests.py" || (echo "Error in standard unit tests."; exit 1) 29 | @# remove temporary files created during testing 30 | @if test -f net.pth; then rm net.pth; fi 31 | @if test -f net_with_discard_wrapper.pth; then rm net_with_discard_wrapper.pth; fi 32 | @if test -f scaler.pkl; then rm scaler.pkl; fi 33 | @if test -f tmp.jnl; then rm tmp.jnl; fi 34 | @if test -f journal_tests_testfile.pkl; then rm journal_tests_testfile.pkl; fi 35 | 36 | unittest_mpi: 37 | @echo "Running MPI backend unit tests.." 38 | mpirun -np 2 python3 -m unittest discover -s tests -v -p "backend_tests_mpi.py" || (echo "Error in MPI unit tests."; exit 1) 39 | mpirun -np 3 python3 -m unittest discover -s tests -v -p "backend_tests_mpi_model_mpi.py" || (echo "Error in MPI unit tests."; exit 1) 40 | 41 | exampletest: $(MAKEDIRS) 42 | @echo "Testing standard examples.." 43 | python3 -m unittest -v tests/test_examples.py || (echo "Error in example tests."; exit 1) 44 | @if test -f scaler.pkl; then rm scaler.pkl; fi 45 | @if test -f seminn_net.pth; then rm seminn_net.pth; fi 46 | @if test -f triplet_net.pth; then rm triplet_net.pth; fi 47 | @if test -f tmp.jnl; then rm tmp.jnl; fi 48 | 49 | exampletest_mpi: 50 | @echo "Testing MPI backend examples.." 51 | mpirun -np 2 python3 -m unittest -v tests/test_examples_mpi.py || (echo "Error in MPI example tests."; exit 1) 52 | 53 | doctest: 54 | make -C doc html || (echo "Error in documentation generator."; exit 1) 55 | 56 | coveragetest: $(MAKEDIRS) # compile models here as well as we check them for codecov as well. 57 | @command -v coverage >/dev/null 2>&1 || { echo >&2 "Python package 'coverage' has to been installed. Please, run 'pip3 install coverage'."; exit;} 58 | # unittests 59 | @- $(foreach TEST, $(UNITTESTS), \ 60 | echo === Testing code coverage: $(TEST); \ 61 | coverage run -a --branch --source abcpy --omit \*__init__.py -m unittest $(TEST); \ 62 | ) 63 | # unittest_mpi 64 | @echo === Testing code coverage: tests/backend_tests_mpi.py 65 | mpirun -np 2 coverage run -a --branch --source abcpy --omit \*__init__.py -m unittest tests/backend_tests_mpi.py 66 | @echo === Testing code coverage: tests/backend_tests_mpi_model_mpi.py 67 | mpirun -np 3 coverage run -a --branch --source abcpy --omit \*__init__.py -m unittest tests/backend_tests_mpi_model_mpi.py 68 | # exampletest 69 | @echo === Testing code coverage: tests/test_examples.py 70 | @coverage run -a --branch --source abcpy --omit \*__init__.py -m unittest tests/test_examples.py 71 | # exampletest_mpi 72 | @echo === Testing code coverage: tests/examples_tests_mpi.py 73 | mpirun -np 2 coverage run -a --branch --source abcpy --omit \*__init__.py -m unittest -v tests/test_examples_mpi.py 74 | coverage html -d build/testcoverage 75 | coverage report 76 | @echo 77 | @echo Detailed test coverage report under build/testcoverage 78 | 79 | # documentation 80 | doc: 81 | make -C doc html 82 | 83 | # packaging 84 | package: whl_file 85 | 86 | 87 | uninstall: 88 | pip3 uninstall abcpy 89 | 90 | 91 | install: whl_file 92 | pip3 install --user build/dist/$(whl_file) 93 | 94 | reinstall: uninstall install 95 | 96 | 97 | whl_file: clean 98 | mkdir -p build/dist 99 | python3 setup.py -v bdist_wheel -d build/dist 100 | @echo 101 | @echo "Find" `ls build/dist` "in build/dist/." 102 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.6.3 2 | -------------------------------------------------------------------------------- /abcpy/NN_utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/abcpy/NN_utilities/__init__.py -------------------------------------------------------------------------------- /abcpy/NN_utilities/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ContrastiveLoss(nn.Module): 7 | """ 8 | Contrastive loss 9 | Takes embeddings of two samples and a target label == 1 if samples are from the same class and label == 0 otherwise. 10 | 11 | Code from https://github.com/adambielski/siamese-triplet""" 12 | 13 | def __init__(self, margin): 14 | super(ContrastiveLoss, self).__init__() 15 | self.margin = margin 16 | self.eps = 1e-9 17 | 18 | def forward(self, output1, output2, target, size_average=True): 19 | distances = (output2 - output1).pow(2).sum(1) # squared distances 20 | losses = 0.5 * (target.float() * distances + 21 | (1 + -1 * target).float() * F.relu(self.margin - (distances + self.eps).sqrt()).pow(2)) 22 | return losses.mean() if size_average else losses.sum() 23 | 24 | 25 | class TripletLoss(nn.Module): 26 | """ 27 | Triplet loss 28 | Takes embeddings of an anchor sample, a positive sample and a negative sample. 29 | 30 | Code from https://github.com/adambielski/siamese-triplet""" 31 | 32 | def __init__(self, margin): 33 | super(TripletLoss, self).__init__() 34 | self.margin = margin 35 | 36 | def forward(self, anchor, positive, negative, size_average=True): 37 | distance_positive = (anchor - positive).pow(2).sum(1) # .pow(.5) 38 | distance_negative = (anchor - negative).pow(2).sum(1) # .pow(.5) 39 | losses = F.relu(distance_positive - distance_negative + self.margin) 40 | return losses.mean() if size_average else losses.sum() 41 | 42 | 43 | def Fisher_divergence_loss(first_der_t, second_der_t, eta, lam=0): 44 | """lam is the regularization parameter of the Kingma & LeCun (2010) regularization""" 45 | inner_prod_second_der_eta = torch.bmm(second_der_t, eta.unsqueeze(-1)) # this is used twice 46 | 47 | if lam == 0: 48 | return sum( 49 | (0.5 * torch.bmm(first_der_t, eta.unsqueeze(-1)) ** 2 + inner_prod_second_der_eta).view(-1)) 50 | else: 51 | return sum( 52 | (0.5 * torch.bmm(first_der_t, eta.unsqueeze(-1)) ** 2 + 53 | inner_prod_second_der_eta + lam * inner_prod_second_der_eta ** 2).view(-1)) 54 | 55 | 56 | def Fisher_divergence_loss_with_c_x(first_der_t, second_der_t, eta, lam=0): 57 | # this enables to use the term c(x) in the approximating family, ie a term that depends only on x and not on theta. 58 | new_eta = torch.cat((eta, torch.ones(eta.shape[0], 1).to(eta)), 59 | dim=1) # the one tensor need to be on same device as eta. 60 | # then call the other loss function with this new_eta: 61 | return Fisher_divergence_loss(first_der_t, second_der_t, new_eta, lam=lam) 62 | -------------------------------------------------------------------------------- /abcpy/NN_utilities/trainer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | from tqdm import tqdm 5 | 6 | 7 | def fit(train_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, val_loader=None, early_stopping=False, 8 | epochs_early_stopping_interval=1, start_epoch_early_stopping=10, start_epoch_training=0, use_tqdm=True): 9 | """ 10 | Basic function to train a neural network given a train_loader, a loss function and an optimizer. 11 | 12 | Loaders, model, loss function and metrics should work together for a given task, 13 | i.e. The model should be able to process data output of loaders, 14 | loss function should process target output of loaders and outputs from the model 15 | 16 | Adapted from https://github.com/adambielski/siamese-triplet 17 | """ 18 | 19 | logger = logging.getLogger("NN Trainer") 20 | train_loss_list = [] 21 | if val_loader is not None: 22 | test_loss_list = [] 23 | if early_stopping: 24 | early_stopping_loss_list = [] # list of losses used for early stopping 25 | else: 26 | test_loss_list = None 27 | if early_stopping and val_loader is None: 28 | raise RuntimeError("You cannot perform early stopping if a validation loader is not provided to the training " 29 | "routine") 30 | 31 | for epoch in range(0, start_epoch_training): 32 | scheduler.step() 33 | 34 | for epoch in tqdm(range(start_epoch_training, n_epochs), disable=not use_tqdm): 35 | # Train stage 36 | train_loss = train_epoch(train_loader, model, loss_fn, optimizer, cuda) 37 | train_loss_list.append(train_loss) 38 | 39 | logger.debug('Epoch: {}/{}. Train set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, train_loss)) 40 | 41 | # Validation stage 42 | if val_loader is not None: 43 | val_loss = test_epoch(val_loader, model, loss_fn, cuda) 44 | test_loss_list.append(val_loss) 45 | 46 | logger.debug('Epoch: {}/{}. Validation set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, val_loss)) 47 | 48 | # early stopping: 49 | if early_stopping and (epoch + 1) % epochs_early_stopping_interval == 0: 50 | early_stopping_loss_list.append(val_loss) # save the previous validation loss. It is actually 51 | # we need to have at least two saved test losses for performing early stopping (in which case we know 52 | # we have saved the previous state_dict as well). 53 | if epoch + 1 >= start_epoch_early_stopping and len(early_stopping_loss_list) > 1: 54 | if early_stopping_loss_list[-1] > early_stopping_loss_list[-2]: 55 | logger.info("Training has been early stopped at epoch {}.".format(epoch + 1)) 56 | # reload the previous state dict: 57 | model.load_state_dict(net_state_dict) 58 | break # stop training 59 | # if we did not stop: update the state dict to the next value 60 | net_state_dict = model.state_dict() 61 | 62 | scheduler.step() 63 | 64 | return train_loss_list, test_loss_list 65 | 66 | def train_epoch(train_loader, model, loss_fn, optimizer, cuda): 67 | """Function implementing the training in one epoch. 68 | 69 | Adapted from https://github.com/adambielski/siamese-triplet 70 | """ 71 | model.train() 72 | total_loss = 0 73 | 74 | for batch_idx, (data, target) in enumerate(train_loader): 75 | target = target if len(target) > 0 else None 76 | if not type(data) in (tuple, list): 77 | data = (data,) 78 | if cuda: 79 | data = tuple(d.cuda() for d in data) 80 | if target is not None: 81 | target = target.cuda() 82 | 83 | optimizer.zero_grad() 84 | outputs = model(*data) 85 | 86 | if type(outputs) not in (tuple, list): 87 | outputs = (outputs,) 88 | 89 | loss_inputs = outputs 90 | if target is not None: 91 | target = (target,) 92 | loss_inputs += target 93 | 94 | loss_outputs = loss_fn(*loss_inputs) 95 | loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs 96 | total_loss += loss.item() 97 | loss.backward() 98 | optimizer.step() 99 | 100 | return total_loss / (batch_idx + 1) # divide here by the number of elements in the batch. 101 | 102 | 103 | def test_epoch(val_loader, model, loss_fn, cuda): 104 | """Function implementing the computation of the validation error, in batches. 105 | 106 | Adapted from https://github.com/adambielski/siamese-triplet 107 | """ 108 | with torch.no_grad(): 109 | model.eval() 110 | val_loss = 0 111 | for batch_idx, (data, target) in enumerate(val_loader): 112 | target = target if len(target) > 0 else None 113 | if not type(data) in (tuple, list): 114 | data = (data,) 115 | if cuda: 116 | data = tuple(d.cuda() for d in data) 117 | if target is not None: 118 | target = target.cuda() 119 | 120 | outputs = model(*data) 121 | 122 | if type(outputs) not in (tuple, list): 123 | outputs = (outputs,) 124 | loss_inputs = outputs 125 | if target is not None: 126 | target = (target,) 127 | loss_inputs += target 128 | 129 | loss_outputs = loss_fn(*loss_inputs) 130 | loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs 131 | val_loss += loss.item() 132 | 133 | return val_loss / (batch_idx + 1) # divide here by the number of elements in the batch. 134 | -------------------------------------------------------------------------------- /abcpy/NN_utilities/utilities.py: -------------------------------------------------------------------------------- 1 | try: 2 | import torch 3 | except ImportError: 4 | has_torch = False 5 | else: 6 | has_torch = True 7 | 8 | import logging 9 | from functools import reduce 10 | from operator import mul 11 | 12 | import numpy as np 13 | 14 | 15 | def dist2(x, y): 16 | """Compute the square of the Euclidean distance between 2 arrays of same length""" 17 | return np.dot(x - y, x - y) 18 | 19 | 20 | def compute_similarity_matrix(target, quantile=0.1, return_pairwise_distances=False): 21 | """Compute the similarity matrix between some values given a given quantile of the Euclidean distances. 22 | 23 | If return_pairwise_distances is True, it also returns a matrix with the pairwise distances with every distance.""" 24 | 25 | logger = logging.getLogger("Compute_similarity_matrix") 26 | 27 | n_samples = target.shape[0] 28 | 29 | pairwise_distances = np.zeros([n_samples] * 2) 30 | 31 | for i in range(n_samples): 32 | for j in range(n_samples): 33 | pairwise_distances[i, j] = dist2(target[i], target[j]) 34 | 35 | q = np.quantile(pairwise_distances[~np.eye(n_samples, dtype=bool)].reshape(-1), quantile) 36 | 37 | similarity_set = pairwise_distances < q 38 | 39 | logger.info("Fraction of similar pairs (epurated by self-similarity): {}".format( 40 | (np.sum(similarity_set) - n_samples) / n_samples ** 2)) 41 | 42 | if (np.sum(similarity_set) - n_samples) / n_samples ** 2 == 0: 43 | raise RuntimeError("The chosen quantile is too small, as there are no similar samples according to the " 44 | "corresponding threshold.\nPlease increase the quantile.") 45 | 46 | return (similarity_set, pairwise_distances) if return_pairwise_distances else similarity_set 47 | 48 | 49 | def save_net(path, net): 50 | """Function to save the Pytorch state_dict of a network to a file.""" 51 | torch.save(net.state_dict(), path) 52 | 53 | 54 | def load_net(path, network_class, *network_args, **network_kwargs): 55 | """Function to load a network from a Pytorch state_dict, given the corresponding network_class.""" 56 | net = network_class(*network_args, **network_kwargs) 57 | net.load_state_dict(torch.load(path)) 58 | return net.eval() # call the network to eval model. Needed with batch normalization and dropout layers. 59 | 60 | 61 | def jacobian(input, output, diffable=True): 62 | ''' 63 | Returns the Jacobian matrix (batch x in_size x out_size) of the function that produced the output evaluated at the 64 | input 65 | 66 | From https://github.com/mwcvitkovic/MASS-Learning/blob/master/models/utils.py 67 | 68 | Important: need to use diffable=True in order for the training routines based on these to work! 69 | 70 | ''' 71 | assert len(output.shape) == 2 72 | assert input.shape[0] == output.shape[0] 73 | in_size = reduce(mul, list(input.shape[1:]), 1) 74 | if (input.sum() + output.sum()).item() in [np.nan, np.inf]: 75 | raise ValueError 76 | J = torch.zeros(list(output.shape) + list(input.shape[1:])).to(input) 77 | # they are able here to do the gradient computation one batch at a time, of course still considering only one output coordinate at a time 78 | for i in range(output.shape[1]): 79 | g = torch.zeros(output.shape).to(input) 80 | g[:, i] = 1 81 | if diffable: 82 | J[:, i] = torch.autograd.grad(output, input, g, only_inputs=True, retain_graph=True, create_graph=True)[0] 83 | else: 84 | J[:, i] = torch.autograd.grad(output, input, g, only_inputs=True, retain_graph=True)[0] 85 | J = J.reshape(output.shape[0], output.shape[1], in_size) 86 | return J.transpose(2, 1) 87 | 88 | 89 | def jacobian_second_order(input, output, diffable=True): 90 | ''' 91 | Returns the Jacobian matrix (batch x in_size x out_size) of the function that produced the output evaluated at the 92 | input, as well as 93 | the matrix of second derivatives of outputs with respect to inputs (batch x in_size x out_size) 94 | 95 | Adapted from https://github.com/mwcvitkovic/MASS-Learning/blob/master/models/utils.py 96 | 97 | Important: need to use diffable=True in order for the training routines based on these to work! 98 | ''' 99 | assert len(output.shape) == 2 100 | assert input.shape[0] == output.shape[0] 101 | in_size = reduce(mul, list(input.shape[1:]), 1) 102 | if (input.sum() + output.sum()).item() in [np.nan, np.inf]: 103 | raise ValueError 104 | J = torch.zeros(list(output.shape) + list(input.shape[1:])).to(input) 105 | J2 = torch.zeros(list(output.shape) + list(input.shape[1:])).to(input) 106 | 107 | for i in range(output.shape[1]): 108 | g = torch.zeros(output.shape).to(input) 109 | g[:, i] = 1 110 | J[:, i] = torch.autograd.grad(output, input, g, only_inputs=True, retain_graph=True, create_graph=True)[0] 111 | J = J.reshape(output.shape[0], output.shape[1], in_size) 112 | 113 | for i in range(output.shape[1]): 114 | for j in range(input.shape[1]): 115 | g = torch.zeros(J.shape).to(input) 116 | g[:, i, j] = 1 117 | if diffable: 118 | J2[:, i, j] = torch.autograd.grad(J, input, g, only_inputs=True, retain_graph=True, create_graph=True)[ 119 | 0][:, j] 120 | else: 121 | J2[:, i, j] = torch.autograd.grad(J, input, g, only_inputs=True, retain_graph=True)[0][:, j] 122 | 123 | J2 = J2.reshape(output.shape[0], output.shape[1], in_size) 124 | 125 | return J.transpose(2, 1), J2.transpose(2, 1) 126 | 127 | 128 | def jacobian_hessian(input, output, diffable=True): 129 | ''' 130 | Returns the Jacobian matrix (batch x in_size x out_size) of the function that produced the output evaluated at the 131 | input, as well as the Hessian matrix (batch x in_size x in_size x out_size). 132 | 133 | This takes slightly more than the jacobian_second_order routine. 134 | 135 | Adapted from https://github.com/mwcvitkovic/MASS-Learning/blob/master/models/utils.py 136 | 137 | Important: need to use diffable=True in order for the training routines based on these to work! 138 | ''' 139 | assert len(output.shape) == 2 140 | assert input.shape[0] == output.shape[0] 141 | in_size = reduce(mul, list(input.shape[1:]), 1) 142 | if (input.sum() + output.sum()).item() in [np.nan, np.inf]: 143 | raise ValueError 144 | J = torch.zeros(list(output.shape) + list(input.shape[1:])).to(input) 145 | H = torch.zeros(list(output.shape) + list(input.shape[1:]) + list(input.shape[1:])).to(input) 146 | 147 | for i in range(output.shape[1]): 148 | g = torch.zeros(output.shape).to(input) 149 | g[:, i] = 1 150 | J[:, i] = torch.autograd.grad(output, input, g, only_inputs=True, retain_graph=True, create_graph=True)[0] 151 | J = J.reshape(output.shape[0], output.shape[1], in_size) 152 | 153 | for i in range(output.shape[1]): 154 | for j in range(input.shape[1]): 155 | g = torch.zeros(J.shape).to(input) 156 | g[:, i, j] = 1 157 | if diffable: 158 | H[:, i, j] = torch.autograd.grad(J, input, g, only_inputs=True, retain_graph=True, create_graph=True)[0] 159 | else: 160 | H[:, i, j] = torch.autograd.grad(J, input, g, only_inputs=True, retain_graph=True)[0] 161 | 162 | return J.transpose(2, 1), H.transpose(3, 1) 163 | 164 | 165 | def set_requires_grad(net, value): 166 | for param in net.parameters(): 167 | param.requires_grad = value 168 | -------------------------------------------------------------------------------- /abcpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | init file docstring 3 | """ 4 | -------------------------------------------------------------------------------- /abcpy/acceptedparametersmanager.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from abcpy.probabilisticmodels import Hyperparameter, ModelResultingFromOperation 4 | 5 | 6 | class AcceptedParametersManager: 7 | def __init__(self, model): 8 | """ 9 | This class manages the accepted parameters and other bds objects. 10 | 11 | Parameters 12 | ---------- 13 | model: list 14 | List of all root probabilistic models 15 | """ 16 | self.model = model 17 | 18 | # these are usually big tables, so we broadcast them to have them once 19 | # per executor instead of once per task 20 | self.observations_bds = None 21 | self.accepted_parameters_bds = None 22 | self.accepted_weights_bds = None 23 | self.accepted_cov_mats_bds = None 24 | 25 | # saves the current parameters relevant to each kernel 26 | self.kernel_parameters_bds = None 27 | 28 | def broadcast(self, backend, observations): 29 | """Broadcasts the observations to observations_bds using the specified backend. 30 | 31 | Parameters 32 | ---------- 33 | backend: abcpy.backends object 34 | The backend used by the inference algorithm 35 | observations: list 36 | A list containing all observed data 37 | """ 38 | self.observations_bds = backend.broadcast(observations) 39 | 40 | def update_kernel_values(self, backend, kernel_parameters): 41 | """Broadcasts new parameters for each kernel 42 | 43 | Parameters 44 | ---------- 45 | backend: abcpy.backends object 46 | The backend used by the inference algorithm 47 | kernel_parameters: list 48 | A list, in which each entry contains the values of the parameters associated with the corresponding kernel in the joint perturbation kernel 49 | """ 50 | 51 | self.kernel_parameters_bds = backend.broadcast(kernel_parameters) 52 | 53 | def update_broadcast(self, backend, accepted_parameters=None, accepted_weights=None, accepted_cov_mats=None): 54 | """Updates the broadcasted values using the specified backend 55 | 56 | Parameters 57 | ---------- 58 | backend: abcpy.backend object 59 | The backend to be used for broadcasting 60 | accepted_parameters: list 61 | The accepted parameters to be broadcasted 62 | accepted_weights: list 63 | The accepted weights to be broadcasted 64 | accepted_cov_mats: np.ndarray 65 | The accepted covariance matrix to be broadcasted 66 | """ 67 | 68 | # Used for Spark backend 69 | def destroy(bc): 70 | if bc != None: 71 | bc.unpersist 72 | # bc.destroy 73 | 74 | if not accepted_parameters is None: 75 | self.accepted_parameters_bds = backend.broadcast(accepted_parameters) 76 | if not accepted_weights is None: 77 | self.accepted_weights_bds = backend.broadcast(accepted_weights) 78 | if not accepted_cov_mats is None: 79 | self.accepted_cov_mats_bds = backend.broadcast(accepted_cov_mats) 80 | 81 | def get_mapping(self, models, is_root=True, index=0): 82 | """Returns the order in which the models are discovered during recursive depth-first search. 83 | Commonly used when returning the accepted_parameters_bds for certain models. 84 | 85 | Parameters 86 | ---------- 87 | models: list 88 | List of the root probabilistic models of the graph. 89 | is_root: boolean 90 | Specifies whether the current list of models is the list of overall root models 91 | index: integer 92 | The current index in depth-first search. 93 | 94 | Returns 95 | ------- 96 | list 97 | The first entry corresponds to the mapping of the root model, as well as all its parents. The second entry 98 | corresponds to the next index in depth-first search. 99 | """ 100 | 101 | # Implement a dfs to discover all nodes of the model 102 | mapping = [] 103 | 104 | for model in models: 105 | if not model.visited and not (isinstance(model, Hyperparameter)): 106 | model.visited = True 107 | 108 | # Only parameters that are neither root nor Hyperparameters are included in the mapping 109 | if not is_root and not (isinstance(model, ModelResultingFromOperation)): 110 | # for i in range(model.get_output_dimension()): 111 | mapping.append((model, index)) 112 | index += 1 113 | 114 | for parent in model.get_input_models(): 115 | parent_mapping, index = self.get_mapping([parent], is_root=False, index=index) 116 | for element in parent_mapping: 117 | mapping.append(element) 118 | 119 | # Reset the flags of all models 120 | if is_root: 121 | self._reset_flags() 122 | 123 | return [mapping, index] 124 | 125 | def get_accepted_parameters_bds_values(self, models): 126 | """ 127 | Returns the accepted bds values for the specified models. 128 | 129 | Parameters 130 | ---------- 131 | models: list 132 | Contains the probabilistic models for which the accepted bds values should be returned 133 | 134 | Returns 135 | ------- 136 | list: 137 | The accepted_parameters_bds values of all the probabilistic models specified in models. 138 | """ 139 | 140 | # Get the enumerated recursive depth-first search ordering 141 | mapping, mapping_index = self.get_mapping(self.model) 142 | 143 | # The self.accepted_parameters_bds.value() list has dimensions d x n_steps, where d is the number of free parameters 144 | accepted_bds_values = [[] for i in range(len(self.accepted_parameters_bds.value()))] 145 | 146 | # Add all columns that correspond to desired parameters to the list that is returned 147 | for model in models: 148 | for prob_model, index in mapping: 149 | if model == prob_model: 150 | for i in range(len(self.accepted_parameters_bds.value())): 151 | accepted_bds_values[i].append(self.accepted_parameters_bds.value()[i][index]) 152 | # accepted_bds_values = [np.array(x).reshape(-1, ) for x in accepted_bds_values] 153 | 154 | return accepted_bds_values 155 | 156 | def _reset_flags(self, models=None): 157 | """Resets the visited flags of all models specified, such that other functions can act on the graph freely. 158 | Commonly used after calling the get_mapping method. 159 | 160 | Parameters 161 | ---------- 162 | models: list 163 | List of abcpy.ProbabilisticModel objects, the models the root models for which, together with their parents, the flags should be reset 164 | """ 165 | if models is None: 166 | models = self.model 167 | 168 | for model in models: 169 | for parent in model.get_input_models(): 170 | if parent.visited: 171 | self._reset_flags([parent]) 172 | model.visited = False 173 | -------------------------------------------------------------------------------- /abcpy/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from abcpy.backends.base import * 2 | 3 | 4 | def BackendMPI(*args, **kwargs): 5 | # import and setup module mpimanager 6 | import abcpy.backends.mpimanager 7 | master_node_ranks = [0] 8 | process_per_model = 1 9 | if 'master_node_ranks' in kwargs: 10 | master_node_ranks = kwargs['master_node_ranks'] 11 | if 'process_per_model' in kwargs: 12 | process_per_model = kwargs['process_per_model'] 13 | abcpy.backends.mpimanager.create_mpi_manager(master_node_ranks, process_per_model) 14 | 15 | # import BackendMPI and return and instance 16 | from abcpy.backends.mpi import BackendMPI 17 | return BackendMPI(*args, **kwargs) 18 | 19 | 20 | def BackendMPITestHelper(*args, **kwargs): 21 | from abcpy.backends.mpi import BackendMPITestHelper 22 | return BackendMPITestHelper(*args, **kwargs) 23 | 24 | 25 | def BackendSpark(*args, **kwargs): 26 | from abcpy.backends.spark import BackendSpark 27 | return BackendSpark(*args, **kwargs) 28 | -------------------------------------------------------------------------------- /abcpy/backends/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class Backend(metaclass=ABCMeta): 5 | """ 6 | This is the base class for every parallelization backend. It essentially 7 | resembles the map/reduce API from Spark. 8 | 9 | An idea for the future is to implement a MPI version of the backend with the 10 | hope to be more complient with standard HPC infrastructure and a potential 11 | speed-up. 12 | 13 | """ 14 | 15 | @abstractmethod 16 | def parallelize(self, list): 17 | """ 18 | This method distributes the list on the available workers and returns a 19 | reference object. 20 | 21 | The list should be split into number of workers many parts. Each 22 | part should then be sent to a separate worker node. 23 | 24 | Parameters 25 | ---------- 26 | list: Python list 27 | the list that should get distributed on the worker nodes 28 | Returns 29 | ------- 30 | PDS class (parallel data set) 31 | A reference object that represents the parallelized list 32 | """ 33 | 34 | raise NotImplementedError 35 | 36 | @abstractmethod 37 | def broadcast(self, object): 38 | """ 39 | Send object to all worker nodes without splitting it up. 40 | 41 | Parameters 42 | ---------- 43 | object: Python object 44 | An abitrary object that should be available on all workers 45 | 46 | Returns 47 | ------- 48 | BDS class (broadcast data set) 49 | A reference to the broadcasted object 50 | """ 51 | 52 | raise NotImplementedError 53 | 54 | @abstractmethod 55 | def map(self, func, pds): 56 | """ 57 | A distributed implementation of map that works on parallel data sets (PDS). 58 | 59 | On every element of pds the function func is called. 60 | 61 | Parameters 62 | ---------- 63 | func: Python func 64 | A function that can be applied to every element of the pds 65 | pds: PDS class 66 | A parallel data set to which func should be applied 67 | 68 | Returns 69 | ------- 70 | PDS class 71 | a new parallel data set that contains the result of the map 72 | """ 73 | 74 | raise NotImplementedError 75 | 76 | @abstractmethod 77 | def collect(self, pds): 78 | """ 79 | Gather the pds from all the workers, send it to the master and return it as a standard Python list. 80 | 81 | Parameters 82 | ---------- 83 | pds: PDS class 84 | a parallel data set 85 | 86 | Returns 87 | ------- 88 | Python list 89 | all elements of pds as a list 90 | """ 91 | 92 | raise NotImplementedError 93 | 94 | 95 | class PDS: 96 | """ 97 | The reference class for parallel data sets (PDS). 98 | """ 99 | 100 | @abstractmethod 101 | def __init__(self): 102 | raise NotImplementedError 103 | 104 | 105 | class BDS: 106 | """ 107 | The reference class for broadcast data set (BDS). 108 | """ 109 | 110 | @abstractmethod 111 | def __init__(self): 112 | raise NotImplementedError 113 | 114 | @abstractmethod 115 | def value(self): 116 | """ 117 | This method should return the actual object that the broadcast data set represents. 118 | """ 119 | raise NotImplementedError 120 | 121 | 122 | class BackendDummy(Backend): 123 | """ 124 | This is a dummy parallelization backend, meaning it doesn't parallelize 125 | anything. It is mainly implemented for testing purpose. 126 | 127 | """ 128 | 129 | def __init__(self): 130 | pass 131 | 132 | def parallelize(self, python_list): 133 | """ 134 | This actually does nothing: it just wraps the Python list into dummy pds (PDSDummy). 135 | 136 | Parameters 137 | ---------- 138 | python_list: Python list 139 | Returns 140 | ------- 141 | PDSDummy (parallel data set) 142 | """ 143 | 144 | return PDSDummy(python_list) 145 | 146 | def broadcast(self, object): 147 | """ 148 | This actually does nothing: it just wraps the object into BDSDummy. 149 | 150 | Parameters 151 | ---------- 152 | object: Python object 153 | 154 | Returns 155 | ------- 156 | BDSDummy class 157 | """ 158 | 159 | return BDSDummy(object) 160 | 161 | def map(self, func, pds): 162 | """ 163 | This is a wrapper for the Python internal map function. 164 | 165 | Parameters 166 | ---------- 167 | func: Python func 168 | A function that can be applied to every element of the pds 169 | pds: PDSDummy class 170 | A pseudo-parallel data set to which func should be applied 171 | 172 | Returns 173 | ------- 174 | PDSDummy class 175 | a new pseudo-parallel data set that contains the result of the map 176 | """ 177 | 178 | result_map = map(func, pds.python_list) 179 | result_pds = PDSDummy(list(result_map)) 180 | return result_pds 181 | 182 | def collect(self, pds): 183 | """ 184 | Returns the Python list stored in PDSDummy 185 | 186 | Parameters 187 | ---------- 188 | pds: PDSDummy class 189 | a pseudo-parallel data set 190 | Returns 191 | ------- 192 | Python list 193 | all elements of pds as a list 194 | """ 195 | 196 | return pds.python_list 197 | 198 | 199 | class PDSDummy(PDS): 200 | """ 201 | This is a wrapper for a Python list to fake parallelization. 202 | """ 203 | 204 | def __init__(self, python_list): 205 | self.python_list = python_list 206 | 207 | 208 | class BDSDummy(BDS): 209 | """ 210 | This is a wrapper for a Python object to fake parallelization. 211 | """ 212 | 213 | def __init__(self, object): 214 | self.object = object 215 | 216 | def value(self): 217 | return self.object 218 | 219 | 220 | class NestedParallelizationController: 221 | @abstractmethod 222 | def nested_execution(self): 223 | raise NotImplementedError 224 | 225 | @abstractmethod 226 | def run_nested(self, func, *args, **kwargs): 227 | raise NotImplementedError 228 | -------------------------------------------------------------------------------- /abcpy/backends/mpimanager.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | 3 | mpimanager = None 4 | 5 | 6 | class MPIManager(object): 7 | """Defines the behavior of the slaves/worker processes 8 | 9 | This class construct the MPI communicators structure needed 10 | if the rank of the process is in scheduler_node_ranks, the process is a scheduler 11 | then there is process_per_model process per communicator 12 | """ 13 | 14 | def __init__(self, scheduler_node_ranks=[0], process_per_model=1): 15 | """ 16 | Parameters 17 | ---------- 18 | scheduler_node_ranks: Python list 19 | list of ranks computation should not happen on. 20 | Should include the scheduler so it doesn't get 21 | overwhelmed with work. 22 | 23 | process_per_model: Integer 24 | the number of process to allow to each model 25 | """ 26 | 27 | self._world_communicator = MPI.COMM_WORLD 28 | self._size = self._world_communicator.Get_size() 29 | self._rank = self._world_communicator.Get_rank() 30 | 31 | # Construct the appropriate communicators for resource allocation to models 32 | # There is one communicator for scheduler nodes 33 | # And one communicator per model 34 | self._scheduler_node_ranks = scheduler_node_ranks 35 | self._process_per_model = process_per_model 36 | self._model_color = int( 37 | ((self._rank - sum(i < self._rank for i in scheduler_node_ranks)) / process_per_model) + 1) 38 | if self._rank in scheduler_node_ranks: 39 | self._model_color = 0 40 | self._model_communicator = MPI.COMM_WORLD.Split(self._model_color, self._rank) 41 | self._model_size = self._model_communicator.Get_size() 42 | self._model_rank = self._model_communicator.Get_rank() 43 | 44 | # create a communicator to broadcast instructions to slaves 45 | self._scheduler_color = 1 46 | if self._model_color == 0 or self._model_rank == 0: 47 | self._scheduler_color = 0 48 | self._scheduler_communicator = MPI.COMM_WORLD.Split(self._scheduler_color, self._rank) 49 | self._scheduler_size = self._scheduler_communicator.Get_size() 50 | self._scheduler_rank = self._scheduler_communicator.Get_rank() 51 | 52 | self._leader = False 53 | self._scheduler = False 54 | self._team = False 55 | self._worker = False 56 | 57 | if self._rank == 0: 58 | self._scheduler = True 59 | elif self._model_rank == 0: 60 | self._team = True 61 | self._leader = True 62 | else: 63 | self._team = True 64 | self._worker = True 65 | 66 | def is_scheduler(self): 67 | ''' Tells if the process is a scheduler ''' 68 | return self._scheduler 69 | 70 | def is_team(self): 71 | ''' Tells if the process is a team ''' 72 | return self._team 73 | 74 | def is_leader(self): 75 | ''' Tells if the process is a leader ''' 76 | return self._leader 77 | 78 | def is_worker(self): 79 | ''' Tells if the process is a worker ''' 80 | return self._worker 81 | 82 | def get_scheduler_node_ranks(self): 83 | ''' Returns the list of scheduler node wanks ''' 84 | return self._scheduler_node_ranks 85 | 86 | def get_world_rank(self): 87 | ''' Returns the current rank ''' 88 | return self._rank 89 | 90 | def get_world_size(self): 91 | ''' Returns the size of the world communicator ''' 92 | return self._size 93 | 94 | def get_world_communicator(self): 95 | ''' Returns the world communicator ''' 96 | return self._world_communicator 97 | 98 | def get_model_rank(self): 99 | ''' Returns the rank in the world communicator ''' 100 | return self._model_rank 101 | 102 | def get_model_size(self): 103 | ''' Returns the size of the model communicator ''' 104 | return self._model_size 105 | 106 | def get_model_communicator(self): 107 | ''' Returns the model communicator ''' 108 | return self._model_communicator 109 | 110 | def get_scheduler_rank(self): 111 | ''' Returns the rank in the scheduler communicator ''' 112 | return self._scheduler_rank 113 | 114 | def get_scheduler_size(self): 115 | ''' Returns the size of the scheduler communicator ''' 116 | return self._scheduler_size 117 | 118 | def get_scheduler_communicator(self): 119 | ''' Returns the scheduler communicator ''' 120 | return self._scheduler_communicator 121 | 122 | 123 | def get_mpi_manager(): 124 | ''' Return the instance of mpimanager 125 | Creates one with default parameters is not already existing ''' 126 | if mpimanager is None: 127 | create_mpi_manager([0], 1) 128 | return mpimanager 129 | 130 | 131 | def create_mpi_manager(scheduler_node_ranks, process_per_model): 132 | ''' Creates the instance of mpimanager with given parameters ''' 133 | global mpimanager 134 | mpimanager = MPIManager(scheduler_node_ranks, process_per_model) 135 | -------------------------------------------------------------------------------- /abcpy/backends/spark.py: -------------------------------------------------------------------------------- 1 | from abcpy.backends import Backend, PDS, BDS 2 | 3 | 4 | class BackendSpark(Backend): 5 | """ 6 | A parallelization backend for Apache Spark. It is essetially a wrapper for 7 | the required Spark functionality. 8 | """ 9 | 10 | def __init__(self, sparkContext, parallelism=4): 11 | """ 12 | Initialize the backend with an existing and configured SparkContext. 13 | 14 | Parameters 15 | ---------- 16 | sparkContext: pyspark.SparkContext 17 | an existing and fully configured PySpark context 18 | parallelism: int 19 | defines on how many workers a distributed dataset can be distributed 20 | """ 21 | self.sc = sparkContext 22 | self.parallelism = parallelism 23 | 24 | def parallelize(self, python_list): 25 | """ 26 | This is a wrapper of pyspark.SparkContext.parallelize(). 27 | 28 | Parameters 29 | ---------- 30 | list: Python list 31 | list that is distributed on the workers 32 | 33 | Returns 34 | ------- 35 | PDSSpark class (parallel data set) 36 | A reference object that represents the parallelized list 37 | """ 38 | 39 | rdd = self.sc.parallelize(python_list, self.parallelism) 40 | pds = PDSSpark(rdd) 41 | return pds 42 | 43 | def broadcast(self, object): 44 | """ 45 | This is a wrapper for pyspark.SparkContext.broadcast(). 46 | 47 | Parameters 48 | ---------- 49 | object: Python object 50 | An abitrary object that should be available on all workers 51 | Returns 52 | ------- 53 | BDSSpark class (broadcast data set) 54 | A reference to the broadcasted object 55 | """ 56 | 57 | bcv = self.sc.broadcast(object) 58 | bds = BDSSpark(bcv) 59 | return bds 60 | 61 | def map(self, func, pds): 62 | """ 63 | This is a wrapper for pyspark.rdd.map() 64 | 65 | Parameters 66 | ---------- 67 | func: Python func 68 | A function that can be applied to every element of the pds 69 | pds: PDSSpark class 70 | A parallel data set to which func should be applied 71 | Returns 72 | ------- 73 | PDSSpark class 74 | a new parallel data set that contains the result of the map 75 | """ 76 | 77 | rdd = pds.rdd.map(func) 78 | new_pds = PDSSpark(rdd) 79 | return new_pds 80 | 81 | def collect(self, pds): 82 | """ 83 | A wrapper for pyspark.rdd.collect() 84 | 85 | Parameters 86 | ---------- 87 | pds: PDSSpark class 88 | a parallel data set 89 | Returns 90 | ------- 91 | Python list 92 | all elements of pds as a list 93 | """ 94 | 95 | python_list = pds.rdd.collect() 96 | return python_list 97 | 98 | 99 | class PDSSpark(PDS): 100 | """ 101 | This is a wrapper for Apache Spark RDDs. 102 | """ 103 | 104 | def __init__(self, rdd): 105 | """ 106 | Returns 107 | ------- 108 | rdd: pyspark.rdd 109 | initialize with an Spark RDD 110 | """ 111 | 112 | self.rdd = rdd 113 | 114 | 115 | class BDSSpark(BDS): 116 | """ 117 | This is a wrapper for Apache Spark Broadcast variables. 118 | """ 119 | 120 | def __init__(self, bcv): 121 | """ 122 | Parameters 123 | ---------- 124 | bcv: pyspark.broadcast.Broadcast 125 | Initialize with a Spark broadcast variable 126 | """ 127 | 128 | self.bcv = bcv 129 | 130 | def value(self): 131 | """ 132 | Returns 133 | ------- 134 | object 135 | returns the referenced object that was broadcasted. 136 | """ 137 | 138 | return self.bcv.value 139 | -------------------------------------------------------------------------------- /abcpy/jointapprox_lhd.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import numpy as np 4 | 5 | 6 | class JointApprox_likelihood(metaclass=ABCMeta): 7 | """This abstract base class defines how the combination of distances computed on the observed and 8 | simulated datasets corresponding to different root models should be implemented. 9 | """ 10 | 11 | @abstractmethod 12 | def __init__(self, models, approx_lhds): 13 | """The constructor of a sub-class must accept non-optional models and corresponding distances 14 | as parameters. 15 | 16 | Parameters 17 | ---------- 18 | models : list 19 | A list of root models which are of each of type abcpy.probabilisticmodels 20 | approx_lhds: list 21 | A list of approximate likelihoods which are of each of type abcpy.approx_lhd and they should be 22 | in the same order as corresponding root models for which it would be used to compute the 23 | approximate likelihood 24 | """ 25 | 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def likelihood(self, d1, d2): 30 | """To be overwritten by any sub-class: should calculate the distance between two 31 | sets of data d1 and d2. 32 | 33 | Notes 34 | ----- 35 | The data sets d1 and d2 are lists that contain the datasets corresponding to the root models. 36 | Both d1 and d2 should have the datasets in the same order as the root models are. 37 | 38 | Parameters 39 | ---------- 40 | d1: Python list 41 | Contains lists which are datasets corresponding to root models. 42 | d2: Python list 43 | Contains lists which are datasets corresponding to root models. 44 | 45 | Returns 46 | ------- 47 | float 48 | Computed approximate likelihood. 49 | """ 50 | 51 | raise NotImplemented 52 | 53 | 54 | class ProductCombination(JointApprox_likelihood): 55 | """ 56 | This class implements the product combination of different approximate likelihoods computed on different datasets 57 | corresponding to different root models. This is not used anymore as we not use loglikelihoods in PMC 58 | 59 | """ 60 | 61 | def __init__(self, models, approx_lhds): 62 | 63 | if len(models) != len(approx_lhds): 64 | raise ValueError('Number of root models and Number of assigned approximate likelihoods are not same') 65 | 66 | self.models = models 67 | self.approx_lhds = approx_lhds 68 | 69 | def likelihood(self, d1, d2): 70 | """Combine the distances between different datasets. 71 | 72 | Parameters 73 | ---------- 74 | d1, d2: list 75 | A list, containing lists describing the different data sets 76 | """ 77 | if not isinstance(d1, list): 78 | raise TypeError('Data is not of allowed types') 79 | if not isinstance(d2, list): 80 | raise TypeError('Data is not of allowed types') 81 | if len(d1) != len(d2): 82 | raise ValueError('Both the datasets should contain dataset for each of the root models') 83 | 84 | combined_likelihood = 1.0 85 | for ind in range(len(self.approx_lhds)): 86 | combined_likelihood *= self.approx_lhds[ind].likelihood(d1[ind], d2[ind]) 87 | 88 | return combined_likelihood 89 | 90 | 91 | class SumCombination(JointApprox_likelihood): 92 | """ 93 | This class implements the sum combination of different approximate loglikelihoods computed on different datasets 94 | corresponding to different root models 95 | 96 | """ 97 | 98 | def __init__(self, models, approx_lhds): 99 | 100 | if len(models) != len(approx_lhds): 101 | raise ValueError('Number of root models and Number of assigned approximate likelihoods are not same') 102 | 103 | self.models = models 104 | self.approx_lhds = approx_lhds 105 | 106 | def loglikelihood(self, d1, d2): 107 | """Combine the distances between different datasets. 108 | 109 | Parameters 110 | ---------- 111 | d1, d2: list 112 | A list, containing lists describing the different data sets 113 | """ 114 | if not isinstance(d1, list): 115 | raise TypeError('Data is not of allowed types') 116 | if not isinstance(d2, list): 117 | raise TypeError('Data is not of allowed types') 118 | if len(d1) != len(d2): 119 | raise ValueError('Both the datasets should contain dataset for each of the root models') 120 | 121 | combined_likelihood = 0 122 | for ind in range(len(self.approx_lhds)): 123 | combined_likelihood += self.approx_lhds[ind].loglikelihood(d1[ind], d2[ind]) 124 | 125 | return combined_likelihood 126 | 127 | def likelihood(self, d1, d2): 128 | return np.exp(self.loglikelihood(d1, d2)) 129 | -------------------------------------------------------------------------------- /abcpy/jointdistances.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import numpy as np 4 | 5 | 6 | class JointDistance(metaclass=ABCMeta): 7 | """This abstract base class defines how the combination of distances computed on the observed and 8 | simulated datasets corresponding to different root models should be implemented. 9 | """ 10 | 11 | @abstractmethod 12 | def __init__(self, models, distances): 13 | """The constructor of a sub-class must accept non-optional models and corresponding distances 14 | as parameters. 15 | 16 | Parameters 17 | ---------- 18 | models : list 19 | A list of root models which are of each of type abcpy.probabilisticmodels 20 | distances: list 21 | A list of distances which are of each of type abcpy.distances and they should be 22 | in the same order as corresponding root models for which it would be used to compute the 23 | distance 24 | """ 25 | 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def distance(d1, d2): 30 | """To be overwritten by any sub-class: should calculate the distance between two 31 | sets of data d1 and d2. 32 | 33 | Notes 34 | ----- 35 | The data sets d1 and d2 are lists that contain the datasets corresponding to the root models. 36 | Both d1 and d2 should have the datasets in the same order as the root models are. 37 | 38 | Parameters 39 | ---------- 40 | d1: Python list 41 | Contains lists which are datasets corresponding to root models. 42 | d2: Python list 43 | Contains lists which are datasets corresponding to root models. 44 | 45 | Returns 46 | ------- 47 | numpy.ndarray 48 | The distance between the two input data sets. 49 | """ 50 | 51 | raise NotImplementedError 52 | 53 | @abstractmethod 54 | def dist_max(self): 55 | """To be overwritten by sub-class: should return maximum possible value of the 56 | desired distance function. 57 | 58 | Examples 59 | -------- 60 | If the desired distance maps to :math:`\mathbb{R}`, this method should return numpy.inf. 61 | 62 | Returns 63 | ------- 64 | numpy.float 65 | The maximal possible value of the desired distance function. 66 | """ 67 | 68 | raise NotImplementedError 69 | 70 | 71 | class LinearCombination(JointDistance): 72 | """ 73 | This class implements the linear combination of different distances computed on different datasets corresponding to 74 | different root models 75 | 76 | The maximum value of the distance is linear combination of the maximum value of the different distances it combines. 77 | """ 78 | 79 | def __init__(self, models, distances, weights=None): 80 | """Combine the distances between different datasets. 81 | 82 | Parameters 83 | ---------- 84 | weights: list 85 | A list, containing the weights (for linear combination) corresponding to each of the distances. Should be 86 | the same length of models. The default value if None, for which we assign equal weights to all distances 87 | """ 88 | if len(models) != len(distances): 89 | raise ValueError('Number of root models and Number of assigned distances are not same') 90 | 91 | if weights is None: 92 | self.weights = weights 93 | self.weights = np.ones(shape=(len(distances, ))) / len(distances) 94 | else: 95 | if len(distances) != len(weights): 96 | raise ValueError('Number of distances and Number of weights are not same') 97 | else: 98 | weights = np.array(weights) 99 | self.weights = np.array(weights / sum(weights)).reshape(-1, ) 100 | 101 | self.models = models 102 | self.distances = distances 103 | 104 | def distance(self, d1, d2): 105 | """Combine the distances between different datasets. 106 | 107 | Parameters 108 | ---------- 109 | d1, d2: list 110 | A list, containing lists describing the different data sets 111 | """ 112 | if not isinstance(d1, list): 113 | raise TypeError('Data is not of allowed types') 114 | if not isinstance(d2, list): 115 | raise TypeError('Data is not of allowed types') 116 | if len(d1) != len(d2): 117 | raise ValueError('Both the datasets should contain dataset for each of the root models') 118 | 119 | combined_distance = 0.0 120 | for ind in range(len(self.distances)): 121 | combined_distance += self.weights[ind] * self.distances[ind].distance(d1[ind], d2[ind]) 122 | 123 | return combined_distance 124 | 125 | def dist_max(self): 126 | combined_distance_max = 0.0 127 | for ind in range(len(self.distances)): 128 | combined_distance_max += self.weights[ind] * self.distances[ind].dist_max() 129 | return combined_distance_max 130 | -------------------------------------------------------------------------------- /abcpy/utils.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | import numpy as np 4 | import ot 5 | 6 | 7 | def cached(func): 8 | cache = {} 9 | 10 | @wraps(func) 11 | def wrapped(x): 12 | if x not in cache: 13 | cache[x] = func(x) 14 | return cache[x] 15 | 16 | return wrapped 17 | 18 | 19 | def wass_dist(samples_1, samples_2, weights_1=None, weights_2=None, num_iter_max=100000, **kwargs): 20 | """ 21 | Computes the Wasserstein 2 distance between two empirical distributions with weights. This uses the POT library to 22 | estimate Wasserstein distance. The Wasserstein distance computation can take long if the number of samples in the 23 | two datasets is large (cost of the computation scales in fact quadratically with the number of samples). 24 | 25 | Parameters 26 | ---------- 27 | samples_1 : np.ndarray 28 | Samples defining the first empirical distribution, with shape (nxd), n being the number of samples in the 29 | first empirical distribution and d the dimension of the random variable. 30 | samples_2 : np.ndarray 31 | Samples defining the second empirical distribution, with shape (mxd), m being the number of samples in the 32 | second empirical distribution and d the dimension of the random variable. 33 | weights_1 : np.ndarray, optional 34 | Weights defining the first empirical distribution, with shape (n), n being the number of samples in the 35 | first empirical distribution. Weights are normalized internally to the function. If not provided, they are 36 | assumed to be identical for all samples. 37 | weights_2 : np.ndarray, optional 38 | Weights defining the second empirical distribution, with shape (m), m being the number of samples in the 39 | second empirical distribution. Weights are normalized internally to the function. If not provided, they are 40 | assumed to be identical for all samples. 41 | num_iter_max : integer, optional 42 | The maximum number of iterations in the linear programming algorithm to estimate the Wasserstein distance. 43 | Default to 100000. 44 | kwargs 45 | Additional arguments passed to ot.emd2 46 | 47 | Returns 48 | ------- 49 | float 50 | The estimated 2-Wasserstein distance. 51 | """ 52 | n = samples_1.shape[0] 53 | m = samples_2.shape[0] 54 | 55 | if weights_1 is None: 56 | a = np.ones((n,)) / n 57 | else: 58 | if len(weights_1) != n: 59 | raise RuntimeError("Number of weights and number of samples need to be the same.") 60 | a = weights_1 / np.sum(weights_1) 61 | if weights_2 is None: 62 | b = np.ones((m,)) / m 63 | else: 64 | if len(weights_2) != m: 65 | raise RuntimeError("Number of weights and number of samples need to be the same.") 66 | b = weights_2 / np.sum(weights_2) 67 | 68 | # loss matrix 69 | M = ot.dist(x1=samples_1, x2=samples_2) # this returns squared distance! 70 | cost = ot.emd2(a, b, M, numItermax=num_iter_max, **kwargs) 71 | 72 | return np.sqrt(cost) 73 | -------------------------------------------------------------------------------- /doc/literature/DuttaS-ABCpy-PASC-2017.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{Dutta:2017:AUE:3093172.3093233, 2 | author = {Dutta, Ritabrata and Schoengens, Marcel and Onnela, Jukka-Pekka and Mira, Antonietta}, 3 | title = {ABCpy: A User-Friendly, Extensible, and Parallel Library for Approximate Bayesian Computation}, 4 | booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference}, 5 | series = {PASC '17}, 6 | year = {2017}, 7 | isbn = {978-1-4503-5062-4}, 8 | location = {Lugano, Switzerland}, 9 | pages = {8:1--8:9}, 10 | articleno = {8}, 11 | numpages = {9}, 12 | url = {http://doi.acm.org/10.1145/3093172.3093233}, 13 | doi = {10.1145/3093172.3093233}, 14 | acmid = {3093233}, 15 | publisher = {ACM}, 16 | address = {New York, NY, USA}, 17 | keywords = {ABC, Library, Parallel, Spark}, 18 | } -------------------------------------------------------------------------------- /doc/literature/JSS_2021.bib: -------------------------------------------------------------------------------- 1 | @article{JSSv100i07, 2 | title={{ABCpy}: A High-Performance Computing Perspective to Approximate Bayesian Computation}, 3 | volume={100}, 4 | url={https://www.jstatsoft.org/index.php/jss/article/view/v100i07}, 5 | doi={10.18637/jss.v100.i07}, 6 | abstract={&lt;p&gt;ABCpy is a highly modular scientific library for approximate Bayesian computation (ABC) written in Python. The main contribution of this paper is to document a software engineering effort that enables domain scientists to easily apply ABC to their research without being ABC experts; using ABCpy they can easily run large parallel simulations without much knowledge about parallelization. Further, ABCpy enables ABC experts to easily develop new inference schemes and evaluate them in a standardized environment and to extend the library with new algorithms. These benefits come mainly from the modularity of ABCpy. We give an overview of the design of ABCpy and provide a performance evaluation concentrating on parallelization. This points us towards the inherent imbalance in some of the ABC algorithms. We develop a dynamic scheduling MPI implementation to mitigate this issue and evaluate the various ABC algorithms according to their adaptability towards high-performance computing.&lt;/p&gt;}, 7 | number={7}, 8 | journal={Journal of Statistical Software}, 9 | author={Dutta, Ritabrata and Schoengens, Marcel and Pacchiardi, Lorenzo and Ummadisingu, Avinash and Widmer, Nicole and K\"unzli, Pierre and Onnela, Jukka-Pekka and Mira, Antonietta}, 10 | year={2021}, 11 | pages={1–38} 12 | } -------------------------------------------------------------------------------- /doc/source/ABC_rejection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/ABC_rejection.png -------------------------------------------------------------------------------- /doc/source/DEVELOP.rst: -------------------------------------------------------------------------------- 1 | Branching Scheme 2 | ================ 3 | 4 | We use the branching strategy described in this `blog post `_. 5 | 6 | 7 | Deploy a new Release 8 | ==================== 9 | 10 | This documentation is mainly intended for the main developers. The deployment of 11 | new releases is automated using Travis CI. However, there are still a few manual 12 | steps required in order to deploy a new release. Assume we want to deploy the 13 | new version `M.m.b': 14 | 15 | 1. Create a release branch `release-M.m.b` 16 | 2. Adapt `VERSION` file in the repos root directory: `echo M.m.b > VERSION` 17 | 3. Adapt `README.md` file: adapt links to correct version of `User Documentation` and `Reference` 18 | 4. Adapt `doc/source/installation.rst` file: to install correct version of ABCpy 19 | 5. Merge all desired feature branches into the release branch 20 | 6. Create a pull/ merge request: release branch -> master 21 | 22 | After a successful merge: 23 | 24 | 7. Create tag vM.m.b (`git tag vM.m.b`) 25 | 8. Retag tag `stable` to the current version 26 | 9. Push the tag (`git push --tags`) 27 | 10. Create a release in GitHub 28 | 29 | The new tag on master will signal Travis to deploy a new package to Pypi while 30 | the GitHub release is just for user documentation. 31 | -------------------------------------------------------------------------------- /doc/source/abcpy.rst: -------------------------------------------------------------------------------- 1 | abcpy package 2 | ============= 3 | 4 | This reference gives details about the API of modules, classes and functions included in ABCpy. 5 | 6 | The following diagram shows selected classes with their most important 7 | methods. Abstract classes, which cannot be instantiated, are highlighted in 8 | dark gray and derived classes are highlighted in light gray. Inheritance is 9 | shown by filled arrows. Arrows with no filling highlight associations, e.g., 10 | :py:class:`Distance ` is associated with :py:class:`Statistics ` 11 | because it calls a method of the instantiated class to translate the input data to summary statistics. 12 | 13 | .. image:: class-diagram.png 14 | 15 | .. currentmodule:: abcpy 16 | 17 | abcpy.acceptedparametersmanager module 18 | -------------------------------------- 19 | 20 | .. automodule:: abcpy.acceptedparametersmanager 21 | :members: 22 | :special-members: __init__, _reset_flags 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | 27 | abcpy.approx_lhd module 28 | ----------------------- 29 | 30 | .. automodule:: abcpy.approx_lhd 31 | :members: 32 | :special-members: __init__ 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | abcpy.backends module 37 | --------------------- 38 | 39 | .. automodule:: abcpy.backends.base 40 | :members: 41 | :special-members: __init__ 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | .. automodule:: abcpy.backends.spark 46 | :members: 47 | :special-members: __init__ 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | .. automodule:: abcpy.backends.mpi 52 | :members: 53 | :special-members: __init__ 54 | :undoc-members: 55 | :show-inheritance: 56 | 57 | abcpy.continuousmodels module 58 | ----------------------------- 59 | 60 | .. automodule:: abcpy.continuousmodels 61 | :members: 62 | :special-members: __init__, _check_parameters_at_initialization, _check_parameters_before_sampling, _check_output 63 | :undoc-members: 64 | :show-inheritance: 65 | 66 | abcpy.discretemodels module 67 | --------------------------- 68 | 69 | .. automodule:: abcpy.discretemodels 70 | :members: 71 | :special-members: __init__, _check_parameters_at_initialization, _check_parameters_before_sampling, _check_output 72 | :undoc-members: 73 | :show-inheritance: 74 | 75 | abcpy.distances module 76 | ---------------------- 77 | 78 | .. automodule:: abcpy.distances 79 | :members: 80 | :special-members: __init__, _calculate_summary_stat 81 | :undoc-members: 82 | :show-inheritance: 83 | 84 | abcpy.graphtools module 85 | ----------------------- 86 | 87 | .. automodule:: abcpy.graphtools 88 | :members: 89 | :special-members: __init__, _sample_from_prior, _reset_flags, _get_mapping, _get_names_and_parameters 90 | :undoc-members: 91 | :show-inheritance: 92 | 93 | abcpy.inferences module 94 | ----------------------- 95 | 96 | .. automodule:: abcpy.inferences 97 | :members: 98 | :special-members: __init__ 99 | :undoc-members: 100 | :show-inheritance: 101 | 102 | abcpy.modelselections module 103 | ---------------------------- 104 | 105 | .. automodule:: abcpy.modelselections 106 | :members: 107 | :special-members: __init__ 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | abcpy.NN_utilities module 112 | ------------------------- 113 | 114 | Functions and classes needed for the neural network based summary statistics learning. 115 | 116 | .. automodule:: abcpy.NN_utilities.algorithms 117 | :members: 118 | :special-members: __init__ 119 | :undoc-members: 120 | :show-inheritance: 121 | 122 | .. automodule:: abcpy.NN_utilities.datasets 123 | :members: 124 | :special-members: __init__ 125 | :undoc-members: 126 | :show-inheritance: 127 | 128 | .. automodule:: abcpy.NN_utilities.losses 129 | :members: 130 | :special-members: __init__ 131 | :undoc-members: 132 | :show-inheritance: 133 | 134 | .. automodule:: abcpy.NN_utilities.networks 135 | :members: 136 | :special-members: __init__ 137 | :undoc-members: 138 | :show-inheritance: 139 | 140 | .. automodule:: abcpy.NN_utilities.trainer 141 | :members: 142 | :special-members: __init__ 143 | :undoc-members: 144 | :show-inheritance: 145 | 146 | .. automodule:: abcpy.NN_utilities.utilities 147 | :members: 148 | :special-members: __init__ 149 | :undoc-members: 150 | :show-inheritance: 151 | 152 | 153 | 154 | abcpy.output module 155 | ------------------- 156 | 157 | .. automodule:: abcpy.output 158 | :members: 159 | :special-members: __init__ 160 | :undoc-members: 161 | :show-inheritance: 162 | 163 | abcpy.perturbationkernel module 164 | ------------------------------- 165 | 166 | .. automodule:: abcpy.perturbationkernel 167 | :members: 168 | :special-members: __init__ 169 | :undoc-members: 170 | :show-inheritance: 171 | 172 | abcpy.probabilisticmodels module 173 | -------------------------------- 174 | 175 | .. automodule:: abcpy.probabilisticmodels 176 | :members: 177 | :special-members: __init__, _check_input, _check_output, __get_item__, __add__, __sub__, __mul__, __truediv__, __pow__ 178 | :undoc-members: 179 | :show-inheritance: 180 | 181 | 182 | abcpy.statistics module 183 | ----------------------- 184 | 185 | .. automodule:: abcpy.statistics 186 | :members: 187 | :special-members: __init__ 188 | :undoc-members: 189 | :show-inheritance: 190 | 191 | abcpy.statisticslearning module 192 | ------------------------------- 193 | 194 | .. automodule:: abcpy.statisticslearning 195 | :members: 196 | :special-members: __init__ 197 | :undoc-members: 198 | :show-inheritance: 199 | 200 | abcpy.transformers module 201 | ------------------------------- 202 | 203 | .. automodule:: abcpy.transformers 204 | :members: 205 | :special-members: __init__ 206 | :undoc-members: 207 | :show-inheritance: 208 | -------------------------------------------------------------------------------- /doc/source/class-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/class-diagram.png -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. abcpy documentation master file, created by 2 | sphinx-quickstart on Tue Jan 17 14:32:33 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to ABCpy's documentation! 7 | ================================= 8 | 9 | :Release: |version| 10 | :Date: |today| 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: User Documentation 15 | 16 | installation 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | getting_started 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | user_customization 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | 31 | parallelization 32 | 33 | .. toctree:: 34 | :maxdepth: 2 35 | 36 | postanalysis 37 | 38 | .. toctree:: 39 | :maxdepth: 2 40 | :caption: Developer Documentation 41 | 42 | DEVELOP 43 | 44 | .. toctree:: 45 | :maxdepth: 2 46 | :caption: Reference 47 | 48 | abcpy 49 | 50 | 51 | Indices and tables 52 | ================== 53 | 54 | * :ref:`genindex` 55 | * :ref:`modindex` 56 | * :ref:`search` 57 | 58 | -------------------------------------------------------------------------------- /doc/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | 1. Installation 4 | =============== 5 | 6 | ABCpy requires Python3 and is not compatible with Python2. The simplest way to install ABCpy is via PyPI and we 7 | recommended to use this method. 8 | 9 | Installation from PyPI 10 | ~~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Simplest way to install 13 | :: 14 | 15 | pip3 install abcpy 16 | 17 | This also works in a virtual environment. 18 | 19 | 20 | Installation from Source 21 | ~~~~~~~~~~~~~~~~~~~~~~~~ 22 | 23 | If you prefer to work on the source, clone the repository 24 | :: 25 | 26 | git clone https://github.com/eth-cscs/abcpy.git 27 | 28 | Make sure all requirements are installed 29 | :: 30 | 31 | cd abcpy 32 | pip3 install -r requirements.txt 33 | 34 | To create a package and install it, do 35 | :: 36 | 37 | make package 38 | pip3 install wheel 39 | pip3 install build/dist/abcpy-0.6.3-py3-none-any.whl 40 | 41 | ``wheel`` is required to install in this way. 42 | 43 | 44 | Note that ABCpy requires Python3. 45 | 46 | Requirements 47 | ~~~~~~~~~~~~ 48 | 49 | 50 | Basic requirements are listed in ``requirements.txt`` in the repository (`click here 51 | `_). That also includes packages required for MPI parallelization there, which is very often used. However, we also provide support for parallelization with Apache Spark (see below). 52 | 53 | Additional packages are required for additional features: 54 | 55 | 56 | - ``torch`` is needed in order to use neural networks to learn summary statistics. It can be installed by running: :: 57 | 58 | pip install -r requirements/neural_networks_requirements.txt 59 | - In order to use Apache Spark for parallelization, ``findspark`` and ``pyspark`` are required; install them by: :: 60 | 61 | pip install -r requirements/backend-spark.txt 62 | 63 | 64 | 65 | Troubleshooting ``mpi4py`` installation 66 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 67 | 68 | ``mpi4py`` requires a working MPI implementation to be installed; check the `official docs 69 | `_ for more info. On Ubuntu, that can be installed with: 70 | :: 71 | 72 | sudo apt-get install libopenmpi-dev 73 | 74 | Even when that is present, running ``pip install mpi4py`` can sometimes lead to errors. In fact, as specified in the `official docs 75 | `_, the ``mpicc`` compiler needs to be in the search path. If that is not the case, a workaround is: 76 | :: 77 | 78 | env MPICC=/path/to/mpicc pip install mpi4py 79 | 80 | In some cases, even the above may not be enough. A possibility is using ``conda`` (``conda install mpi4py``) which usually handles package dependencies better than ``pip``. Alternatively, you can try by installing directly ``mpi4py`` from the package manager; in Ubuntu, you can do: 81 | :: 82 | 83 | sudo apt install python3-mpi4py 84 | 85 | which however does not work with virtual environments. 86 | 87 | -------------------------------------------------------------------------------- /doc/source/network.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/network.pdf -------------------------------------------------------------------------------- /doc/source/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/network.png -------------------------------------------------------------------------------- /doc/source/network1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/network1.pdf -------------------------------------------------------------------------------- /doc/source/network1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/doc/source/network1.png -------------------------------------------------------------------------------- /doc/source/postanalysis.rst: -------------------------------------------------------------------------------- 1 | .. _postanalysis: 2 | 3 | 5. Post Analysis 4 | ================ 5 | 6 | The output of an inference scheme is a Journal 7 | (:py:class:`abcpy.output.Journal`) which holds all the necessary results and 8 | convenient methods to do the post analysis. 9 | 10 | Basis Analysis 11 | ~~~~~~~~~~~~~~ 12 | 13 | One can easily access the sampled parameters and corresponding weights using: 14 | 15 | .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py 16 | :language: python 17 | :lines: 77-78 18 | :dedent: 4 19 | 20 | The output of :py:meth:`get_parameters()` is a Python dictionary. The keys for this dictionary are the names you specified for the parameters. The corresponding values are the marginal posterior samples of that parameter. Here is a short example of what you would specify, and what would be the output in the end: 21 | 22 | .. code-block:: python 23 | 24 | a = Normal([[1],[0.1]], name='parameter_1') 25 | b = MultivariateNormal([[1,1],[[0.1,0],[0,0.1]]], name='parameter_2') 26 | 27 | If one defined a model with these two parameters as inputs and ``n_sample=2``, the following would be the output of :py:meth:`journal.get_parameters()`: 28 | 29 | .. code-block:: python 30 | 31 | {'parameter_1' : [[0.95],[0.97]], 'parameter_2': [[0.98,1.03],[1.06,0.92]]} 32 | 33 | These are samples at the final step of ABC algorithm. If you want samples from the earlier steps of a sequential algorithm you can get a Python dictionary for that step by using: 34 | 35 | .. code-block:: python 36 | 37 | journal.get_parameters(step_number) 38 | 39 | Since this is a dictionary, you can also access the values for each step as: 40 | 41 | .. code-block:: python 42 | 43 | journal.get_parameters(step_number)["name"] 44 | 45 | 46 | For the post analysis basic functions are provided: 47 | 48 | .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py 49 | :language: python 50 | :lines: 80-82 51 | :dedent: 4 52 | 53 | Also, to ensure reproducibility, every journal stores the parameters of the 54 | algorithm that created it: 55 | 56 | .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py 57 | :language: python 58 | :lines: 85 59 | :dedent: 4 60 | 61 | And certainly, a journal can easily be saved to and loaded from disk: 62 | 63 | .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py 64 | :language: python 65 | :lines: 91, 94 66 | :dedent: 4 67 | 68 | Posterior plots and diagnostics 69 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 70 | 71 | 72 | You can plot the inferred posterior distribution of the parameters in the following way: 73 | 74 | .. literalinclude:: ../../examples/backends/dummy/pmcabc_gaussian.py 75 | :language: python 76 | :lines: 88 77 | :dedent: 4 78 | 79 | The above line plots the posterior distribution for all the parameters and stores it in ``posterior.png``; if you instead want to plot it for some 80 | parameters only, you can use the ``parameters_to_show`` argument; in addition, the ``ranges_parameters`` argument can be 81 | used to provide a dictionary specifying the limits for the axis in the plots: 82 | 83 | .. code-block:: python 84 | 85 | journal.plot_posterior_distr(parameters_to_show='parameter_1', 86 | ranges_parameters={'parameter_1': [0,2]}) 87 | 88 | 89 | For journals generated with sequential algorithms, we provide a way to check the convergence by plotting the estimated 90 | Effective Sample Size (ESS) at each iteration, as well as an estimate of the Wasserstein distance between the empirical 91 | distributions defined by the samples and weights at subsequent iterations: 92 | 93 | .. code-block:: python 94 | 95 | journal.plot_ESS() 96 | journal.Wass_convergence_plot() 97 | 98 | 99 | Instead, for journals generated by MCMC, we provide way to plot the traceplot for the required parameters: 100 | 101 | .. code-block:: python 102 | 103 | journal.traceplot() 104 | 105 | 106 | 107 | Posterior resampling and predictive check 108 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 109 | 110 | In some cases, you may want to resample (for instance, bootstrapping or subsampling) the posterior samples stored in a 111 | Journal, by tacking into account the posterior weights. 112 | This can be done using the :py:meth:`resample()` method. Behind the scenes, 113 | this uses the numpy.random.choice method, and it inherits arguments from it. It allows to do different things, 114 | for instance: 115 | 116 | 117 | * if the set of posterior samples (weighted or unweighted) is too large, you can obtained a subsampled (without replacement) set by doing: 118 | 119 | .. code-block:: python 120 | 121 | new_journal = journal.resample(n_samples=100, replace=False) 122 | 123 | 124 | * Alternatively, if the used algorithm returns weighted posterior samples, you may want instead an unweighted set of samples obtained by sampling with replacement (commonly called bootstrapping); this can be done with the following line (where the number of required bootstrapped samples in the new journal is unspecified and therefore corresponding to the number of samples in the old ``journal``): 125 | 126 | .. code-block:: python 127 | 128 | new_journal = journal.resample() 129 | 130 | 131 | Finally, in some cases you may want to generate simulations from the model for parameter values sampled from the posterior, 132 | for instance in order to check similarity with the original observation (predictive check). ABCpy provides the 133 | :py:class:`output.GenerateFromJournal` to do that. This class needs to be instanstiated 134 | by providing to it the model and the backend which you want to use for the simulation; then, you can pass a Journal as argument to the 135 | :py:meth:`generate()` method in order to generate simulations from the 136 | posterior samples contained there: 137 | 138 | .. code-block:: python 139 | 140 | generate_from_journal = GenerateFromJournal([model], backend=backend) 141 | parameters, simulations, normalized_weights = generate_from_journal.generate(journal) 142 | 143 | -------------------------------------------------------------------------------- /doc/source/rtfd_requirements.txt: -------------------------------------------------------------------------------- 1 | cloudpickle 2 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | - defaults 5 | dependencies: 6 | - numpy 7 | - scipy 8 | - scikit-learn>=0.23.1 9 | - glmnet>=2.2.1 10 | - openmpi 11 | - mpi4py 12 | - cloudpickle 13 | - matplotlib 14 | - tqdm 15 | - pot 16 | - pip 17 | - pip: 18 | - abcpy -------------------------------------------------------------------------------- /examples/approx_lhd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/approx_lhd/__init__.py -------------------------------------------------------------------------------- /examples/approx_lhd/pmc_hierarchical_models.py: -------------------------------------------------------------------------------- 1 | """An example showing how to implement a bayesian network in ABCpy. We consider here a model of school grades which 2 | depend on some variables.""" 3 | import logging 4 | 5 | 6 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 7 | """Perform inference for this example. 8 | 9 | Parameters 10 | ---------- 11 | steps : integer, optional 12 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 13 | n_samples : integer, optional 14 | Number of posterior samples to generate. The default value is 250. 15 | n_samples_per_param : integer, optional 16 | Number of data points in each simulated data set. The default value is 10. 17 | 18 | Returns 19 | ------- 20 | abcpy.output.Journal 21 | A journal containing simulation results, metadata and optionally intermediate results. 22 | """ 23 | logging.basicConfig(level=logging_level) 24 | # Observed data corresponding to model_1 defined below 25 | grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 26 | 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 27 | 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 28 | 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 29 | 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 30 | 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 31 | 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 32 | 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 33 | 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 34 | 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 35 | 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 36 | 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 37 | 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 38 | 3.8668863066511303, 3.8837108501541007] 39 | 40 | # The prior information changing the class size and social background, depending on school location 41 | from abcpy.continuousmodels import Uniform, Normal 42 | school_location = Uniform([[0.2], [0.3]], name="school_location") 43 | 44 | # The average class size of a certain school 45 | class_size = Normal([[school_location], [0.1]], name="class_size") 46 | 47 | # The social background of a student 48 | background = Normal([[school_location], [0.1]], name="background") 49 | 50 | # The grade a student would receive without any bias 51 | grade_without_additional_effects = Normal([[4.5], [0.25]], name="grade_without_additional_effects") 52 | 53 | # The grade a student of a certain school receives; this defined a new random variable by subtraction 54 | final_grade = grade_without_additional_effects - class_size - background 55 | 56 | # Observed data corresponding to model_2 defined below 57 | scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 58 | 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 59 | 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 60 | 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 61 | 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 62 | 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 63 | 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 64 | 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 65 | 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 66 | 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 67 | 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 68 | 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 69 | 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 70 | 1.0893224045062178, 0.8032302688764734, 2.868438615047827] 71 | 72 | # A quantity that determines whether a student will receive a scholarship 73 | scholarship_without_additional_effects = Normal([[2], [0.5]], name="scholarship_without_additional_effects") 74 | 75 | # A quantity determining whether a student receives a scholarship, including his social background 76 | final_scholarship = scholarship_without_additional_effects + 3 * background 77 | 78 | # Define a summary statistics for final grade and final scholarship 79 | from abcpy.statistics import Identity 80 | statistics_calculator_final_grade = Identity(degree=2, cross=False) 81 | statistics_calculator_final_scholarship = Identity(degree=3, cross=False) 82 | 83 | # Define an approximate likelihood for final grade and final scholarship 84 | from abcpy.approx_lhd import SynLikelihood 85 | approx_lhd_final_grade = SynLikelihood(statistics_calculator_final_grade) 86 | approx_lhd_final_scholarship = SynLikelihood(statistics_calculator_final_scholarship) 87 | 88 | # Define a backend 89 | from abcpy.backends import BackendDummy as Backend 90 | backend = Backend() 91 | 92 | # Define a perturbation kernel to explore parameter space 93 | from abcpy.perturbationkernel import DefaultKernel 94 | kernel = DefaultKernel([school_location, class_size, grade_without_additional_effects, 95 | background, scholarship_without_additional_effects]) 96 | 97 | # Define sampler to use with the 98 | from abcpy.inferences import PMC 99 | sampler = PMC([final_grade, final_scholarship], 100 | [approx_lhd_final_grade, approx_lhd_final_scholarship], backend, kernel, seed=1) 101 | 102 | # Sample 103 | journal = sampler.sample([grades_obs, scholarship_obs], steps, n_sample, n_samples_per_param) 104 | return journal 105 | 106 | 107 | def analyse_journal(journal): 108 | # output parameters and weights 109 | print(journal.get_parameters()) 110 | print(journal.weights) 111 | 112 | # do post analysis 113 | print(journal.posterior_mean()) 114 | print(journal.posterior_cov()) 115 | 116 | # print configuration 117 | print(journal.configuration) 118 | 119 | # plot posterior 120 | journal.plot_posterior_distr(path_to_save="posterior.png") 121 | 122 | # save and load journal 123 | journal.save("experiments.jnl") 124 | 125 | from abcpy.output import Journal 126 | new_journal = Journal.fromFile('experiments.jnl') 127 | 128 | 129 | if __name__ == "__main__": 130 | journal = infer_parameters(logging_level=logging.INFO) 131 | analyse_journal(journal) 132 | -------------------------------------------------------------------------------- /examples/backends/README.md: -------------------------------------------------------------------------------- 1 | # Parallelization Backends 2 | We showcase here how to use the different parallelization backends with the same inference problem. See [here](https://abcpy.readthedocs.io/en/latest/parallelization.html#) for more information. 3 | 4 | ## Apache Spark 5 | 6 | This uses the Apache Spark backend for parallelization. It relies on the `pyspark` and `findspark` library. 7 | 8 | In this setup, the number of parallel processes is defined inside the Python code, with the following lines: 9 | 10 | import pyspark 11 | sc = pyspark.SparkContext() 12 | from abcpy.backends import BackendSpark as Backend 13 | backend = Backend(sc, parallelism=4) 14 | 15 | Then, the parallel script can be run with: 16 | 17 | PYSPARK_PYTHON=python3 spark-submit apache_spark/pmcabc_gaussian.py 18 | 19 | where the environment variable `PYSPARK_PYTHON` is set as often Spark installations use Python2 by default. 20 | 21 | 22 | ## Dummy 23 | 24 | This is a dummy backend which does not parallelize; it is useful for debug and testing purposes. Simply run the Python file as normal. 25 | 26 | ## MPI 27 | 28 | This used MPI to distribute the inference task; we exploit the `mpi4py` Python library for using MPI from Python. 29 | 30 | Mainly, we distribute data generation from the model, which is usually the most expensive part in ABC inference. 31 | 32 | We have two files in `mpi` folder: 33 | 1. `pmcabc_gaussian.py` performs a simple inference experiment on a gaussian model with PMCABC; this is the same as in the other two backends 34 | 2. `mpi_model_inferences.py` showcases how to use nested MPI parallelization with a model which already has some level of parallelization with MPI. That is done with several ABC algorithms. See below to understand how to run this file correctly. 35 | 36 | To run the files with MPI, the following command is required: 37 | 38 | mpirun -n python3 39 | 40 | For instance, to run `pmcabc_gaussian.py` with 4 tasks, we can run: 41 | 42 | mpirun -n 4 python3 mpi/pmcabc_gaussian.py 43 | 44 | ### Nested parallelization with MPI 45 | 46 | In `mpi_model_inferences.py`, the model itself is parallelized with MPI. We can run nested parallelized inference by considering _n_ independent model instances (ie we simulate _n_ independent copies of the model at once) each of which is assigned _m_ MPI tasks. Moreover, we also require one additional MPI task to work as a master in this setup. Therefore, in total we need _(n * m) + 1_ MPI tasks. In this case, we have set _m=2_ in the Python code via the lines: 47 | 48 | ``` 49 | from abcpy.backends import BackendMPI as Backend 50 | backend = Backend(process_per_model=2) 51 | ``` 52 | 53 | Let's say we want to parallelize the model _n=3_ times. Therefore, we use the following command: 54 | 55 | mpirun -n 7 python3 mpi/mpi_model_inferences.py 56 | 57 | as _(3*2) + 1 = 7_. Note that, in this scenario, using only 6 tasks overall leads to failure of the script due to how the tasks are assigned to the model instances. 58 | -------------------------------------------------------------------------------- /examples/backends/apache_spark/pmcabc_gaussian.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | 6 | def setup_backend(): 7 | import pyspark 8 | sc = pyspark.SparkContext() 9 | from abcpy.backends import BackendSpark as Backend 10 | backend = Backend(sc, parallelism=4) 11 | return backend 12 | 13 | 14 | def infer_parameters(backend, steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 15 | logging.basicConfig(level=logging_level) 16 | # define observation for true parameters mean=170, std=15 17 | height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 18 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 19 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 20 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 21 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 22 | 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 23 | 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 24 | 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 25 | 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 26 | 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 27 | 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 28 | 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] 29 | 30 | # define prior 31 | from abcpy.continuousmodels import Uniform 32 | mu = Uniform([[150], [200]], name='mu') 33 | sigma = Uniform([[5], [25]], name='sigma') 34 | 35 | # define the model 36 | from abcpy.continuousmodels import Normal 37 | height = Normal([mu, sigma], name='height') 38 | 39 | # define statistics 40 | from abcpy.statistics import Identity 41 | statistics_calculator = Identity(degree=2, cross=False) 42 | 43 | # define distance 44 | from abcpy.distances import LogReg 45 | distance_calculator = LogReg(statistics_calculator, seed=42) 46 | 47 | # define sampling scheme 48 | from abcpy.inferences import PMCABC 49 | sampler = PMCABC([height], [distance_calculator], backend, seed=1) 50 | 51 | # sample from scheme 52 | eps_arr = np.array([.75]) 53 | epsilon_percentile = 10 54 | journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 55 | 56 | return journal 57 | 58 | 59 | def analyse_journal(journal): 60 | # output parameters and weights 61 | print(journal.get_parameters()) 62 | print(journal.get_weights()) 63 | 64 | # do post analysis 65 | print(journal.posterior_mean()) 66 | print(journal.posterior_cov()) 67 | 68 | # print configuration 69 | print(journal.configuration) 70 | 71 | # plot posterior 72 | journal.plot_posterior_distr(path_to_save="posterior.png") 73 | 74 | # save and load journal 75 | journal.save("experiments.jnl") 76 | 77 | from abcpy.output import Journal 78 | new_journal = Journal.fromFile('experiments.jnl') 79 | 80 | 81 | if __name__ == "__main__": 82 | backend = setup_backend() 83 | journal = infer_parameters(backend, steps=1, logging_level=logging.INFO) 84 | analyse_journal(journal) 85 | -------------------------------------------------------------------------------- /examples/backends/dummy/pmcabc_gaussian.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | 6 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 7 | """Perform inference for this example. 8 | 9 | Parameters 10 | ---------- 11 | steps : integer, optional 12 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 13 | n_samples : integer, optional 14 | Number of posterior samples to generate. The default value is 250. 15 | n_samples_per_param : integer, optional 16 | Number of data points in each simulated data set. The default value is 10. 17 | 18 | Returns 19 | ------- 20 | abcpy.output.Journal 21 | A journal containing simulation results, metadata and optionally intermediate results. 22 | """ 23 | logging.basicConfig(level=logging_level) 24 | # define observation for true parameters mean=170, std=15 25 | height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 26 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 27 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 28 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 29 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 30 | 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 31 | 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 32 | 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 33 | 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 34 | 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 35 | 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 36 | 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] 37 | 38 | # define prior 39 | from abcpy.continuousmodels import Uniform 40 | mu = Uniform([[150], [200]], name='mu') 41 | sigma = Uniform([[5], [25]], name='sigma') 42 | 43 | # define the model 44 | from abcpy.continuousmodels import Normal 45 | height = Normal([mu, sigma], name='height') 46 | 47 | # define statistics 48 | from abcpy.statistics import Identity 49 | statistics_calculator = Identity(degree=2, cross=False) 50 | 51 | # define distance 52 | from abcpy.distances import LogReg 53 | distance_calculator = LogReg(statistics_calculator, seed=42) 54 | 55 | # define kernel 56 | from abcpy.perturbationkernel import DefaultKernel 57 | kernel = DefaultKernel([mu, sigma]) 58 | 59 | # define backend 60 | # Note, the dummy backend does not parallelize the code! 61 | from abcpy.backends import BackendDummy as Backend 62 | backend = Backend() 63 | 64 | # define sampling scheme 65 | from abcpy.inferences import PMCABC 66 | sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) 67 | 68 | eps_arr = np.array([.75]) 69 | epsilon_percentile = 10 70 | journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 71 | 72 | return journal 73 | 74 | 75 | def analyse_journal(journal): 76 | # output parameters and weights 77 | print(journal.get_parameters()) 78 | print(journal.get_weights()) 79 | 80 | # do post analysis 81 | print(journal.posterior_mean()) 82 | print(journal.posterior_cov()) 83 | 84 | # print configuration 85 | print(journal.configuration) 86 | 87 | # plot posterior 88 | journal.plot_posterior_distr(path_to_save="posterior.png") 89 | 90 | # save and load journal 91 | journal.save("experiments.jnl") 92 | 93 | from abcpy.output import Journal 94 | new_journal = Journal.fromFile('experiments.jnl') 95 | 96 | 97 | if __name__ == "__main__": 98 | journal = infer_parameters(logging_level=logging.INFO) 99 | analyse_journal(journal) 100 | -------------------------------------------------------------------------------- /examples/backends/mpi/pmcabc_gaussian.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | 6 | def setup_backend(): 7 | from abcpy.backends import BackendMPI as Backend 8 | backend = Backend() 9 | # The above line is equivalent to: 10 | # backend = Backend(process_per_model=1) 11 | # Notice: Models not parallelized by MPI should not be given process_per_model > 1 12 | return backend 13 | 14 | 15 | def infer_parameters(backend, steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 16 | """Perform inference for this example. 17 | 18 | Parameters 19 | ---------- 20 | backend 21 | The parallelization backend 22 | steps : integer, optional 23 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 24 | n_samples : integer, optional 25 | Number of posterior samples to generate. The default value is 250. 26 | n_samples_per_param : integer, optional 27 | Number of data points in each simulated data set. The default value is 10. 28 | 29 | Returns 30 | ------- 31 | abcpy.output.Journal 32 | A journal containing simulation results, metadata and optionally intermediate results. 33 | """ 34 | logging.basicConfig(level=logging_level) 35 | # define observation for true parameters mean=170, std=15 36 | height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 37 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 38 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 39 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 40 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 41 | 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 42 | 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 43 | 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 44 | 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 45 | 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 46 | 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 47 | 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] 48 | 49 | # define prior 50 | from abcpy.continuousmodels import Uniform 51 | mu = Uniform([[150], [200]], name='mu') 52 | sigma = Uniform([[5], [25]], name='sigma') 53 | 54 | # define the model 55 | from abcpy.continuousmodels import Normal 56 | height = Normal([mu, sigma], name='height') 57 | 58 | # define statistics 59 | from abcpy.statistics import Identity 60 | statistics_calculator = Identity(degree=2, cross=False) 61 | 62 | # define distance 63 | from abcpy.distances import LogReg 64 | distance_calculator = LogReg(statistics_calculator, seed=42) 65 | 66 | # define sampling scheme 67 | from abcpy.inferences import PMCABC 68 | sampler = PMCABC([height], [distance_calculator], backend, seed=1) 69 | 70 | # sample from scheme 71 | eps_arr = np.array([.75]) 72 | epsilon_percentile = 10 73 | journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 74 | 75 | return journal 76 | 77 | 78 | def analyse_journal(journal): 79 | # output parameters and weights 80 | print(journal.get_parameters()) 81 | print(journal.get_weights()) 82 | 83 | # do post analysis 84 | print(journal.posterior_mean()) 85 | print(journal.posterior_cov()) 86 | 87 | # print configuration 88 | print(journal.configuration) 89 | 90 | # plot posterior 91 | journal.plot_posterior_distr(path_to_save="posterior.png") 92 | 93 | # save and load journal 94 | journal.save("experiments.jnl") 95 | 96 | from abcpy.output import Journal 97 | new_journal = Journal.fromFile('experiments.jnl') 98 | 99 | 100 | def setUpModule(): 101 | ''' 102 | If an exception is raised in a setUpModule then none of 103 | the tests in the module will be run. 104 | 105 | This is useful because the slaves run in a while loop on initialization 106 | only responding to the master's commands and will never execute anything else. 107 | 108 | On termination of master, the slaves call quit() that raises a SystemExit(). 109 | Because of the behaviour of setUpModule, it will not run any unit tests 110 | for the slave and we now only need to write unit-tests from the master's 111 | point of view. 112 | ''' 113 | setup_backend() 114 | 115 | 116 | if __name__ == "__main__": 117 | backend = setup_backend() 118 | journal = infer_parameters(backend, logging_level=logging.INFO) 119 | analyse_journal(journal) 120 | -------------------------------------------------------------------------------- /examples/extensions/distances/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/extensions/distances/__init__.py -------------------------------------------------------------------------------- /examples/extensions/distances/default_distance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from abcpy.distances import Distance, Euclidean 4 | 5 | 6 | class DefaultJointDistance(Distance): 7 | """ 8 | This class showcases how to implement a distance. It is actually a wrapper of the Euclidean distance, which is 9 | applied on each component of the provided datasets and summed. 10 | 11 | Parameters 12 | ---------- 13 | statistics: abcpy.statistics object 14 | The statistics calculator to be used 15 | """ 16 | 17 | def __init__(self, statistics): 18 | self.statistics_calc = statistics 19 | self.distance_calc = Euclidean(self.statistics_calc) 20 | 21 | def distance(self, d1, d2): 22 | total_distance = 0 23 | for observed_data, simulated_data in zip(d1, d2): 24 | total_distance += self.distance_calc.distance([observed_data], [simulated_data]) 25 | total_distance /= len(d2) 26 | return total_distance 27 | 28 | def dist_max(self): 29 | return np.inf 30 | -------------------------------------------------------------------------------- /examples/extensions/models/README.md: -------------------------------------------------------------------------------- 1 | # Wrapping models written in external code 2 | 3 | In this folder we showcase how to wrap models written in C++, R and FORTRAN. We use the same model in all cases (a simple gaussian one) and we also provide the corresponding Python implementation for the sake of reference. 4 | 5 | ## C++ 6 | 7 | We use [Swig](http://www.swig.org/) here to interface C++ with Python. In order to use that, an interface file has to be created correctly, which specifies how to interface C++ with Python. 8 | 9 | Check [here](https://abcpy.readthedocs.io/en/latest/user_customization.html#wrap-a-model-written-in-c) for more detailed explanation. 10 | 11 | ### Instructions 12 | 13 | 1. Go inside the `gaussian_cpp` folder. 14 | 2. Run `make` (requires a C++ compiler, eg `g++`). This automatically creates an additional Python file (`gaussian_model_simple.py`) and a compiled file (`_gaussian_model_simple.so`). 15 | 3. Run the `pmcabc-gaussian_model_simple.py` file. 16 | 17 | 18 | ### Common issues 19 | 20 | You may encounter some issue with the `boost` library which can be solved by installing it and putting it into the correct search path; in Ubuntu, install it with: 21 | 22 | ```sudo apt-get install libboost-all-dev``` 23 | 24 | ### Link Time Optimization (LTO): 25 | 26 | For more efficient compilation, usually C++ compilers use LTO to link previously compiled libraries to the currently compiled code. That can lead to issues however in this case, if for instance the Python3 executable was compiled with another version of compiler than the one currently installed. For this reason, Makefile here disables LTO by adding the flag `-fno-lto` to the two lines calling the C++ compiler. 27 | 28 | In case your C++ code is large and compilation takes long, you can remove those flags, even if that may break the compilation for the reasons outlined above. 29 | 30 | Check [here](https://github.com/ContinuumIO/anaconda-issues/issues/6619) for more information. 31 | 32 | ## FORTRAN 33 | 34 | We can use easily the [F2PY](https://numpy.org/doc/stable/f2py/) tool to connect FORTRAN code to Python. This is part of Numpy. 35 | 36 | ### Instructions 37 | 38 | 1. Go inside the `gaussian_f90` folder. 39 | 2. Run `make`; (requires a FORTRAN compiler, eg `F90`); this will produce a compiled file. 40 | 3. Run the `pmcabc-gaussian_model_simple.py` file. 41 | 42 | ## R 43 | 44 | We use here the `rpy2` Python package to import R code in Python. 45 | 46 | Check [here](https://abcpy.readthedocs.io/en/latest/user_customization.html#wrap-a-model-written-in-r) for more detailed explanation. 47 | 48 | ### Instructions 49 | 50 | This does not require any compilation, as R is not a compiled language. 51 | 52 | 1. Go inside the `gaussian_R` folder. 53 | 2. Run the `pmcabc-gaussian_model_simple.py` file, which includes code to import the corresponding R code. 54 | -------------------------------------------------------------------------------- /examples/extensions/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/extensions/models/__init__.py -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_R/gaussian_model.R: -------------------------------------------------------------------------------- 1 | simple_gaussian <- function(mu, sigma, k = 1, seed = seed) { 2 | set.seed(seed) 3 | output <- rnorm(k, mu, sigma) 4 | return(output) 5 | } -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_R/pmcabc_gaussian_model_simple.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from numbers import Number 3 | 4 | import numpy as np 5 | import rpy2.robjects as robjects 6 | import rpy2.robjects.numpy2ri 7 | 8 | from abcpy.probabilisticmodels import ProbabilisticModel, Continuous, InputConnector 9 | 10 | rpy2.robjects.numpy2ri.activate() 11 | try: 12 | robjects.r(''' 13 | source('examples/extensions/models/gaussian_R/gaussian_model.R') 14 | ''') 15 | except: 16 | robjects.r(''' 17 | source('gaussian_model.R') 18 | ''') 19 | 20 | r_simple_gaussian = robjects.globalenv['simple_gaussian'] 21 | 22 | 23 | class Gaussian(ProbabilisticModel, Continuous): 24 | 25 | def __init__(self, parameters, name='Gaussian'): 26 | # We expect input of type parameters = [mu, sigma] 27 | if not isinstance(parameters, list): 28 | raise TypeError('Input of Normal model is of type list') 29 | 30 | if len(parameters) != 2: 31 | raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') 32 | 33 | input_connector = InputConnector.from_list(parameters) 34 | super().__init__(input_connector, name) 35 | 36 | def _check_input(self, input_values): 37 | # Check whether input has correct type or format 38 | if len(input_values) != 2: 39 | raise ValueError('Number of parameters of Normal model must be 2.') 40 | 41 | # Check whether input is from correct domain 42 | mu = input_values[0] 43 | sigma = input_values[1] 44 | if sigma < 0: 45 | return False 46 | 47 | return True 48 | 49 | def _check_output(self, values): 50 | if not isinstance(values, Number): 51 | raise ValueError('Output of the normal distribution is always a number.') 52 | 53 | # At this point values is a number (int, float); full domain for Normal is allowed 54 | return True 55 | 56 | def get_output_dimension(self): 57 | return 1 58 | 59 | def forward_simulate(self, input_values, k, rng=np.random.RandomState()): 60 | # Extract the input parameters 61 | mu = input_values[0] 62 | sigma = input_values[1] 63 | seed = rng.randint(np.iinfo(np.int32).max) 64 | 65 | # Do the actual forward simulation 66 | vector_of_k_samples = list(r_simple_gaussian(mu, sigma, k, seed=seed)) 67 | 68 | # Format the output to obey API 69 | result = [np.array([x]) for x in vector_of_k_samples] 70 | return result 71 | 72 | def pdf(self, input_values, x): 73 | mu = input_values[0] 74 | sigma = input_values[1] 75 | pdf = np.norm(mu, sigma).pdf(x) 76 | return pdf 77 | 78 | 79 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 80 | """Perform inference for this example. 81 | 82 | Parameters 83 | ---------- 84 | steps : integer, optional 85 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 86 | n_samples : integer, optional 87 | Number of posterior samples to generate. The default value is 250. 88 | n_samples_per_param : integer, optional 89 | Number of data points in each simulated data set. The default value is 10. 90 | 91 | Returns 92 | ------- 93 | abcpy.output.Journal 94 | A journal containing simulation results, metadata and optionally intermediate results. 95 | """ 96 | logging.basicConfig(level=logging_level) 97 | # define observation for true parameters mean=170, std=15 98 | y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 99 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 100 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 101 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 102 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 103 | 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 104 | 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 105 | 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 106 | 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 107 | 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 108 | 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 109 | 202.67075179617672, 211.75963110985992, 217.45423324370509] 110 | 111 | # define prior 112 | from abcpy.continuousmodels import Uniform 113 | mu = Uniform([[150], [200]], name="mu") 114 | sigma = Uniform([[5], [25]], name="sigma") 115 | 116 | # define the model 117 | model = Gaussian([mu, sigma], name='height') 118 | 119 | # define statistics 120 | from abcpy.statistics import Identity 121 | statistics_calculator = Identity(degree=2, cross=False) 122 | 123 | # define distance 124 | from abcpy.distances import LogReg 125 | distance_calculator = LogReg(statistics_calculator, seed=42) 126 | 127 | # define backend 128 | from abcpy.backends import BackendDummy as Backend 129 | backend = Backend() 130 | 131 | # define sampling scheme 132 | from abcpy.inferences import PMCABC 133 | sampler = PMCABC([model], [distance_calculator], backend, seed=1) 134 | 135 | # sample from scheme 136 | eps_arr = np.array([.75]) 137 | epsilon_percentile = 10 138 | journal = sampler.sample([y_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 139 | 140 | return journal 141 | 142 | 143 | def analyse_journal(journal): 144 | # output parameters and weights 145 | print(journal.get_parameters()) 146 | print(journal.get_weights()) 147 | 148 | # do post analysis 149 | print(journal.posterior_mean()) 150 | print(journal.posterior_cov()) 151 | 152 | # print configuration 153 | print(journal.configuration) 154 | 155 | # plot posterior 156 | journal.plot_posterior_distr(path_to_save="posterior.png") 157 | 158 | # save and load journal 159 | journal.save("experiments.jnl") 160 | 161 | from abcpy.output import Journal 162 | new_journal = Journal.fromFile('experiments.jnl') 163 | 164 | 165 | if __name__ == "__main__": 166 | journal = infer_parameters(logging_level=logging.INFO) 167 | analyse_journal(journal) 168 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_cpp/Makefile: -------------------------------------------------------------------------------- 1 | SWIG=swig 2 | SWIGFLAGS=-python -c++ 3 | WGET=wget -q 4 | 5 | CC=g++ 6 | CPPFLAGS=-fPIC 7 | INCLUDEPATH=$(shell python3-config --includes) 8 | INCLUDEPATHNUMPY=$(shell python3 -c 'import numpy as np; print(np.get_include())') 9 | PYTHONLINKERSETTINGS=$(shell python3-config --ldflags | cut -d" " -f 1) 10 | PYTHONLIBS=$(shell python3-config --libs) 11 | 12 | cpp_simple: _gaussian_model_simple.so gaussian_model_simple.py 13 | 14 | clean: 15 | rm _gaussian_model_simple.so gaussian_model_simple.py 16 | 17 | %.py: %.i 18 | $(SWIG) $(SWIGFLAGS) -o $@ $< 19 | 20 | %_wrap.cpp: %.i numpy.i 21 | $(SWIG) $(SWIGFLAGS) -o $@ $< 22 | 23 | %.o: %.cpp 24 | $(CC) $(CPPFLAGS) -I $(INCLUDEPATHNUMPY) $(INCLUDEPATH) -fno-lto -c $< -o $@ 25 | 26 | _%.so: %.o %_wrap.o 27 | $(CC) -shared $^ $(PYTHONLINKERSETTINGS) $(PYTHONLIBS) -fno-lto -o $@ 28 | 29 | %.i: 30 | $(WGET) "https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i" 31 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_cpp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/extensions/models/gaussian_cpp/__init__.py -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_cpp/gaussian_model_simple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | 8 | // Simulation function of the gaussian model 9 | void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed) { 10 | boost::mt19937 rng(seed); 11 | boost::normal_distribution<> nd(mu, sigma); 12 | boost::variate_generator > sampler(rng, nd); 13 | 14 | for (int i=0; i 6 | #include 7 | #include 8 | 9 | extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed); 10 | %} 11 | 12 | %include "numpy.i" 13 | 14 | %init %{ 15 | import_array(); 16 | %} 17 | 18 | %apply (double* ARGOUT_ARRAY1, int DIM1 ) {(double* result, unsigned int k)}; 19 | 20 | extern void gaussian_model(double* result, unsigned int k, double mu, double sigma, int seed); 21 | 22 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_cpp/pmcabc_gaussian_model_simple.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from numbers import Number 3 | 4 | import numpy as np 5 | from examples.extensions.models.gaussian_cpp.gaussian_model_simple import \ 6 | gaussian_model # this is the file produced upon compiling 7 | 8 | from abcpy.probabilisticmodels import ProbabilisticModel, Continuous, InputConnector 9 | 10 | 11 | class Gaussian(ProbabilisticModel, Continuous): 12 | 13 | def __init__(self, parameters, name='Gaussian'): 14 | # We expect input of type parameters = [mu, sigma] 15 | if not isinstance(parameters, list): 16 | raise TypeError('Input of Normal model is of type list') 17 | 18 | if len(parameters) != 2: 19 | raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') 20 | 21 | input_connector = InputConnector.from_list(parameters) 22 | super().__init__(input_connector, name) 23 | 24 | def _check_input(self, input_values): 25 | # Check whether input has correct type or format 26 | if len(input_values) != 2: 27 | raise ValueError('Number of parameters of Normal model must be 2.') 28 | 29 | # Check whether input is from correct domain 30 | mu = input_values[0] 31 | sigma = input_values[1] 32 | if sigma < 0: 33 | return False 34 | 35 | return True 36 | 37 | def _check_output(self, values): 38 | if not isinstance(values, Number): 39 | raise ValueError('Output of the normal distribution is always a number.') 40 | 41 | # At this point values is a number (int, float); full domain for Normal is allowed 42 | return True 43 | 44 | def get_output_dimension(self): 45 | return 1 46 | 47 | def forward_simulate(self, input_values, k, rng=np.random.RandomState()): 48 | # Extract the input parameters 49 | mu = input_values[0] 50 | sigma = input_values[1] 51 | seed = rng.randint(np.iinfo(np.int32).max) 52 | 53 | # Do the actual forward simulation 54 | vector_of_k_samples = gaussian_model(k, mu, sigma, seed) # call the C++ code 55 | 56 | # Format the output to obey API 57 | result = [np.array([x]) for x in vector_of_k_samples] 58 | return result 59 | 60 | def pdf(self, input_values, x): 61 | mu = input_values[0] 62 | sigma = input_values[1] 63 | pdf = np.norm(mu, sigma).pdf(x) 64 | return pdf 65 | 66 | 67 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 68 | """Perform inference for this example. 69 | 70 | Parameters 71 | ---------- 72 | steps : integer, optional 73 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 74 | n_samples : integer, optional 75 | Number of posterior samples to generate. The default value is 250. 76 | n_samples_per_param : integer, optional 77 | Number of data points in each simulated data set. The default value is 10. 78 | 79 | Returns 80 | ------- 81 | abcpy.output.Journal 82 | A journal containing simulation results, metadata and optionally intermediate results. 83 | """ 84 | logging.basicConfig(level=logging_level) 85 | # define observation for true parameters mean=170, std=15 86 | y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 87 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 88 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 89 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 90 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 91 | 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 92 | 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 93 | 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 94 | 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 95 | 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 96 | 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 97 | 202.67075179617672, 211.75963110985992, 217.45423324370509] 98 | 99 | # define prior 100 | from abcpy.continuousmodels import Uniform 101 | mu = Uniform([[150], [200]], name="mu") 102 | sigma = Uniform([[5], [25]], name="sigma") 103 | 104 | # define the model 105 | model = Gaussian([mu, sigma], name='height') 106 | 107 | # define statistics 108 | from abcpy.statistics import Identity 109 | statistics_calculator = Identity(degree=2, cross=False) 110 | 111 | # define distance 112 | from abcpy.distances import LogReg 113 | distance_calculator = LogReg(statistics_calculator, seed=42) 114 | 115 | # define backend 116 | from abcpy.backends import BackendDummy as Backend 117 | backend = Backend() 118 | 119 | # define sampling scheme 120 | from abcpy.inferences import PMCABC 121 | sampler = PMCABC([model], [distance_calculator], backend, seed=1) 122 | 123 | # sample from scheme 124 | eps_arr = np.array([.75]) 125 | epsilon_percentile = 10 126 | journal = sampler.sample([y_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 127 | 128 | return journal 129 | 130 | 131 | def analyse_journal(journal): 132 | # output parameters and weights 133 | print(journal.get_parameters()) 134 | print(journal.get_weights()) 135 | 136 | # do post analysis 137 | print(journal.posterior_mean()) 138 | print(journal.posterior_cov()) 139 | 140 | # print configuration 141 | print(journal.configuration) 142 | 143 | # plot posterior 144 | journal.plot_posterior_distr(path_to_save="posterior.png") 145 | 146 | # save and load journal 147 | journal.save("experiments.jnl") 148 | 149 | from abcpy.output import Journal 150 | new_journal = Journal.fromFile('experiments.jnl') 151 | 152 | 153 | if __name__ == "__main__": 154 | journal = infer_parameters(logging_level=logging.INFO) 155 | analyse_journal(journal) 156 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_f90/Makefile: -------------------------------------------------------------------------------- 1 | F2PY=python -m numpy.f2py 2 | EXT_SUFFIX := $(shell python3-config --extension-suffix) 3 | 4 | default: gaussian_model_simple$(EXT_SUFFIX) 5 | 6 | %$(EXT_SUFFIX): %.f90 7 | echo $(F2PY) 8 | $(F2PY) -c -m $* $< 9 | 10 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_f90/gaussian_model_simple.f90: -------------------------------------------------------------------------------- 1 | module gaussian_model 2 | contains 3 | subroutine gaussian(output, mu, sigma, k, seed) 4 | integer, intent(in) :: k, seed 5 | real(8), intent(in) :: mu, sigma 6 | real(8), intent(out) :: output(k) 7 | 8 | integer :: i, n 9 | real(8) :: r, theta 10 | real(8), dimension(:), allocatable :: temp 11 | integer(4), dimension(:), allocatable :: seed_arr 12 | 13 | ! get random seed array size and fill seed_arr with provided seed 14 | call random_seed(size = n) 15 | allocate(seed_arr(n)) 16 | seed_arr = seed 17 | call random_seed(put = seed_arr) 18 | 19 | ! create 2k random numbers uniformly from [0,1] 20 | if(allocated(temp)) then 21 | deallocate(temp) 22 | end if 23 | allocate(temp(k * 2)) 24 | call random_number(temp) 25 | 26 | ! Use Box-Muller transfrom to create normally distributed variables 27 | do i = 1, k 28 | r = (-2.0 * log(temp(2 * i - 1)))**0.5 29 | theta = 2 * 3.1415926 * temp(2 * i) 30 | output(i) = mu + sigma * r * sin(theta) 31 | end do 32 | end subroutine gaussian 33 | end module gaussian_model 34 | 35 | program main 36 | use gaussian_model 37 | implicit none 38 | 39 | integer, parameter :: k = 100 40 | integer :: seed = 9, i 41 | real(8) :: mu = 10.0, sigma = 2.0 42 | real(8) :: output(k) 43 | 44 | call gaussian(output, mu, sigma, k, seed) 45 | 46 | do i = 1, k 47 | write(*, *) output(i) 48 | end do 49 | end program main 50 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_f90/pmcabc_gaussian_model_simple.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from numbers import Number 3 | 4 | import numpy as np 5 | from examples.extensions.models.gaussian_f90.gaussian_model_simple import gaussian_model 6 | 7 | from abcpy.probabilisticmodels import ProbabilisticModel, Continuous, InputConnector 8 | 9 | 10 | class Gaussian(ProbabilisticModel, Continuous): 11 | def __init__(self, parameters, seed=None, name="gaussian"): 12 | if not isinstance(parameters, list): 13 | raise TypeError('Input of Normal model is of type list') 14 | 15 | if len(parameters) != 2: 16 | raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') 17 | 18 | input_connector = InputConnector.from_list(parameters) 19 | super().__init__(input_connector, name) 20 | 21 | def _check_input(self, input_values): 22 | # Check whether input has correct type or format 23 | if len(input_values) != 2: 24 | raise ValueError('Number of parameters of Normal model must be 2.') 25 | 26 | # Check whether input is from correct domain 27 | mu = input_values[0] 28 | sigma = input_values[1] 29 | if sigma < 0: 30 | return False 31 | 32 | return True 33 | 34 | def _check_output(self, values): 35 | if not isinstance(values, Number): 36 | raise ValueError('Output of the normal distribution is always a number.') 37 | 38 | # At this point values is a number (int, float); full domain for Normal is allowed 39 | return True 40 | 41 | def get_output_dimension(self): 42 | return 1 43 | 44 | def forward_simulate(self, input_values, k, rng=np.random.RandomState()): 45 | # Extract the input parameters 46 | mu = input_values[0] 47 | sigma = input_values[1] 48 | 49 | seed = rng.randint(100000) 50 | 51 | # Do the actual forward simulation 52 | vector_of_k_samples = np.array(gaussian_model(mu, sigma, k, seed)) 53 | 54 | # Format the output to obey API 55 | result = [np.array([x]) for x in vector_of_k_samples] 56 | return result 57 | 58 | def pdf(self, input_values, x): 59 | mu = input_values[0] 60 | sigma = input_values[1] 61 | pdf = np.norm(mu, sigma).pdf(x) 62 | return pdf 63 | 64 | 65 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 66 | """Perform inference for this example. 67 | 68 | Parameters 69 | ---------- 70 | steps : integer, optional 71 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 72 | n_samples : integer, optional 73 | Number of posterior samples to generate. The default value is 250. 74 | n_samples_per_param : integer, optional 75 | Number of data points in each simulated data set. The default value is 10. 76 | 77 | Returns 78 | ------- 79 | abcpy.output.Journal 80 | A journal containing simulation results, metadata and optionally intermediate results. 81 | """ 82 | logging.basicConfig(level=logging_level) 83 | # define observation for true parameters mean=170, std=15 84 | y_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 85 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 86 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 87 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 88 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 89 | 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 90 | 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 91 | 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 92 | 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 93 | 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 94 | 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 95 | 202.67075179617672, 211.75963110985992, 217.45423324370509] 96 | 97 | # define prior 98 | from abcpy.continuousmodels import Uniform 99 | mu = Uniform([[150], [200]], name="mu") 100 | sigma = Uniform([[5], [25]], name="sigma") 101 | 102 | # define the model 103 | model = Gaussian([mu, sigma], name='height') 104 | 105 | # define statistics 106 | from abcpy.statistics import Identity 107 | statistics_calculator = Identity(degree=2, cross=False) 108 | 109 | # define distance 110 | from abcpy.distances import LogReg 111 | distance_calculator = LogReg(statistics_calculator, seed=42) 112 | 113 | # define kernel 114 | from abcpy.perturbationkernel import DefaultKernel 115 | kernel = DefaultKernel([mu, sigma]) 116 | 117 | # define backend 118 | from abcpy.backends import BackendDummy as Backend 119 | backend = Backend() 120 | 121 | # define sampling scheme 122 | from abcpy.inferences import PMCABC 123 | sampler = PMCABC([model], [distance_calculator], backend, kernel, seed=1) 124 | 125 | # sample from scheme 126 | eps_arr = np.array([.75]) 127 | epsilon_percentile = 10 128 | journal = sampler.sample([y_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 129 | 130 | return journal 131 | 132 | 133 | def analyse_journal(journal): 134 | # output parameters and weights 135 | print(journal.get_parameters()) 136 | print(journal.get_weights()) 137 | 138 | # do post analysis 139 | print(journal.posterior_mean()) 140 | print(journal.posterior_cov()) 141 | 142 | # print configuration 143 | print(journal.configuration) 144 | 145 | # plot posterior 146 | journal.plot_posterior_distr(path_to_save="posterior.png") 147 | 148 | # save and load journal 149 | journal.save("experiments.jnl") 150 | 151 | from abcpy.output import Journal 152 | new_journal = Journal.fromFile('experiments.jnl') 153 | 154 | 155 | if __name__ == "__main__": 156 | journal = infer_parameters(logging_level=logging.INFO) 157 | analyse_journal(journal) 158 | -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/extensions/models/gaussian_python/__init__.py -------------------------------------------------------------------------------- /examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from numbers import Number 3 | 4 | import numpy as np 5 | 6 | from abcpy.probabilisticmodels import ProbabilisticModel, Continuous, InputConnector 7 | 8 | 9 | class Gaussian(ProbabilisticModel, Continuous): 10 | """ 11 | logging.basicConfig(level=logging_level) 12 | This class is an re-implementation of the `abcpy.continuousmodels.Normal` for documentation purposes. 13 | """ 14 | 15 | def __init__(self, parameters, name='Gaussian'): 16 | # We expect input of type parameters = [mu, sigma] 17 | if not isinstance(parameters, list): 18 | raise TypeError('Input of Normal model is of type list') 19 | 20 | if len(parameters) != 2: 21 | raise RuntimeError('Input list must be of length 2, containing [mu, sigma].') 22 | 23 | input_connector = InputConnector.from_list(parameters) 24 | super().__init__(input_connector, name) 25 | 26 | def _check_input(self, input_values): 27 | # Check whether input has correct type or format 28 | if len(input_values) != 2: 29 | raise ValueError('Number of parameters of Normal model must be 2.') 30 | 31 | # Check whether input is from correct domain 32 | mu = input_values[0] 33 | sigma = input_values[1] 34 | if sigma < 0: 35 | return False 36 | 37 | return True 38 | 39 | def _check_output(self, values): 40 | if not isinstance(values, Number): 41 | raise ValueError('Output of the normal distribution is always a number.') 42 | 43 | # At this point values is a number (int, float); full domain for Normal is allowed 44 | return True 45 | 46 | def get_output_dimension(self): 47 | return 1 48 | 49 | def forward_simulate(self, input_values, k, rng=np.random.RandomState()): 50 | # Extract the input parameters 51 | mu = input_values[0] 52 | sigma = input_values[1] 53 | 54 | # Do the actual forward simulation 55 | vector_of_k_samples = np.array(rng.normal(mu, sigma, k)) 56 | 57 | # Format the output to obey API 58 | result = [np.array([x]) for x in vector_of_k_samples] 59 | return result 60 | 61 | def pdf(self, input_values, x): 62 | mu = input_values[0] 63 | sigma = input_values[1] 64 | pdf = np.norm(mu, sigma).pdf(x) 65 | return pdf 66 | 67 | 68 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 69 | """Perform inference for this example. 70 | 71 | Parameters 72 | ---------- 73 | steps : integer, optional 74 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 75 | n_samples : integer, optional 76 | Number of posterior samples to generate. The default value is 250. 77 | n_samples_per_param : integer, optional 78 | Number of data points in each simulated data set. The default value is 10. 79 | 80 | Returns 81 | ------- 82 | abcpy.output.Journal 83 | A journal containing simulation results, metadata and optionally intermediate results. 84 | """ 85 | logging.basicConfig(level=logging_level) 86 | # define observation for true parameters mean=170, std=15 87 | height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 88 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 89 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 90 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 91 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 92 | 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 93 | 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 94 | 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 95 | 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 96 | 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 97 | 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 98 | 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] 99 | # define prior 100 | from abcpy.continuousmodels import Uniform 101 | mu = Uniform([[150], [200]], name="mu") 102 | sigma = Uniform([[5], [25]], name="sigma") 103 | # define the model 104 | from abcpy.continuousmodels import Normal as Gaussian 105 | height = Gaussian([mu, sigma], name='height') 106 | 107 | # define statistics 108 | from abcpy.statistics import Identity 109 | statistics_calculator = Identity(degree=2, cross=False) 110 | 111 | # define distance 112 | from abcpy.distances import LogReg 113 | distance_calculator = LogReg(statistics_calculator, seed=42) 114 | 115 | # define kernel 116 | from abcpy.perturbationkernel import DefaultKernel 117 | kernel = DefaultKernel([mu, sigma]) 118 | 119 | # define backend 120 | # Note, the dummy backend does not parallelize the code! 121 | from abcpy.backends import BackendDummy as Backend 122 | backend = Backend() 123 | 124 | # define sampling scheme 125 | from abcpy.inferences import PMCABC 126 | sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) 127 | 128 | # sample from scheme 129 | eps_arr = np.array([.75]) 130 | epsilon_percentile = 10 131 | journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 132 | 133 | return journal 134 | 135 | 136 | def analyse_journal(journal): 137 | # output parameters and weights 138 | print(journal.get_parameters()) 139 | print(journal.get_weights()) 140 | 141 | # do post analysis 142 | print(journal.posterior_mean()) 143 | print(journal.posterior_cov()) 144 | 145 | # print configuration 146 | print(journal.configuration) 147 | 148 | # plot posterior 149 | journal.plot_posterior_distr(path_to_save="posterior.png") 150 | 151 | # save and load journal 152 | journal.save("experiments.jnl") 153 | 154 | from abcpy.output import Journal 155 | new_journal = Journal.fromFile('experiments.jnl') 156 | 157 | 158 | if __name__ == "__main__": 159 | journal = infer_parameters(logging_level=logging.INFO) 160 | analyse_journal(journal) 161 | -------------------------------------------------------------------------------- /examples/extensions/perturbationkernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/extensions/perturbationkernels/__init__.py -------------------------------------------------------------------------------- /examples/extensions/perturbationkernels/multivariate_normal_kernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import multivariate_normal 3 | 4 | from abcpy.perturbationkernel import PerturbationKernel, ContinuousKernel 5 | 6 | 7 | class MultivariateNormalKernel(PerturbationKernel, ContinuousKernel): 8 | """This class defines a kernel perturbing the parameters using a multivariate normal distribution.""" 9 | 10 | def __init__(self, models): 11 | self.models = models 12 | 13 | def calculate_cov(self, accepted_parameters_manager, kernel_index): 14 | """ 15 | Calculates the covariance matrix relevant to this kernel. 16 | 17 | Parameters 18 | ---------- 19 | accepted_parameters_manager: abcpy.AcceptedParametersManager object 20 | AcceptedParametersManager to be used. 21 | kernel_index: integer 22 | The index of the kernel in the list of kernels of the joint kernel. 23 | 24 | Returns 25 | ------- 26 | list 27 | The covariance matrix corresponding to this kernel. 28 | """ 29 | continuous_model = [[] for i in 30 | range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] 31 | for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): 32 | if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], 33 | (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): 34 | continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] 35 | else: 36 | continuous_model[i] = np.concatenate( 37 | accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) 38 | continuous_model = np.array(continuous_model).astype(float) 39 | 40 | if accepted_parameters_manager.accepted_weights_bds is not None: 41 | weights = accepted_parameters_manager.accepted_weights_bds.value() 42 | cov = np.cov(continuous_model, aweights=weights.reshape(-1).astype(float), rowvar=False) 43 | else: 44 | cov = np.cov(continuous_model, rowvar=False) 45 | return cov 46 | 47 | def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.random.RandomState()): 48 | """ 49 | Updates the parameter values contained in the accepted_paramters_manager using a multivariate normal distribution. 50 | 51 | Parameters 52 | ---------- 53 | accepted_parameters_manager: abcpy.AcceptedParametersManager object 54 | Defines the AcceptedParametersManager to be used. 55 | kernel_index: integer 56 | The index of the kernel in the list of kernels in the joint kernel. 57 | row_index: integer 58 | The index of the row that should be considered from the accepted_parameters_bds matrix. 59 | rng: random number generator 60 | The random number generator to be used. 61 | 62 | Returns 63 | ------- 64 | np.ndarray 65 | The perturbed parameter values. 66 | """ 67 | 68 | # Get all current parameter values relevant for this model and the structure 69 | continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] 70 | 71 | if isinstance(continuous_model_values[row_index][0], 72 | (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): 73 | # Perturb 74 | cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) 75 | continuous_model_values = np.array(continuous_model_values).astype(float) 76 | 77 | # Perturbed values anc split according to the structure 78 | perturbed_continuous_values = rng.multivariate_normal(continuous_model_values[row_index], cov) 79 | else: 80 | # print('Hello') 81 | # Learn the structure 82 | struct = [[] for i in range(len(continuous_model_values[row_index]))] 83 | for i in range(len(continuous_model_values[row_index])): 84 | struct[i] = continuous_model_values[row_index][i].shape[0] 85 | struct = np.array(struct).cumsum() 86 | continuous_model_values = np.concatenate(continuous_model_values[row_index]) 87 | 88 | # Perturb 89 | cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) 90 | continuous_model_values = np.array(continuous_model_values).astype(float) 91 | 92 | # Perturbed values anc split according to the structure 93 | perturbed_continuous_values = np.split(rng.multivariate_normal(continuous_model_values, cov), struct)[:-1] 94 | 95 | return perturbed_continuous_values 96 | 97 | def pdf(self, accepted_parameters_manager, kernel_index, mean, x): 98 | """Calculates the pdf of the kernel. 99 | Commonly used to calculate weights. 100 | 101 | Parameters 102 | ---------- 103 | accepted_parameters_manager: abcpy.AcceptedParametersManager object 104 | The AcceptedParametersManager to be used. 105 | kernel_index: integer 106 | The index of the kernel in the list of kernels in the joint kernel. 107 | index: integer 108 | The row to be considered in the accepted_parameters_bds matrix. 109 | x: The point at which the pdf should be evaluated. 110 | 111 | Returns 112 | ------- 113 | float 114 | The pdf evaluated at point x. 115 | """ 116 | 117 | if isinstance(mean[0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): 118 | mean = np.array(mean).astype(float) 119 | cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) 120 | return multivariate_normal(mean, cov, allow_singular=True).pdf(np.array(x).astype(float)) 121 | else: 122 | mean = np.array(np.concatenate(mean)).astype(float) 123 | cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) 124 | return multivariate_normal(mean, cov, allow_singular=True).pdf(np.concatenate(x)) 125 | -------------------------------------------------------------------------------- /examples/hierarchicalmodels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/hierarchicalmodels/__init__.py -------------------------------------------------------------------------------- /examples/modelselection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/modelselection/__init__.py -------------------------------------------------------------------------------- /examples/modelselection/randomforest_modelselections.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from abcpy.modelselections import RandomForest 4 | 5 | 6 | def infer_model(logging_level=logging.WARN): 7 | logging.basicConfig(level=logging_level) 8 | # define observation for true parameters mean=170, std=15 9 | y_obs = [160.82499176] 10 | 11 | # Create a array of models 12 | from abcpy.continuousmodels import Uniform, Normal, StudentT 13 | model_array = [None] * 2 14 | 15 | # Model 1: Gaussian 16 | mu1 = Uniform([[150], [200]], name='mu1') 17 | sigma1 = Uniform([[5.0], [25.0]], name='sigma1') 18 | model_array[0] = Normal([mu1, sigma1]) 19 | 20 | # Model 2: Student t 21 | mu2 = Uniform([[150], [200]], name='mu2') 22 | sigma2 = Uniform([[1], [30.0]], name='sigma2') 23 | model_array[1] = StudentT([mu2, sigma2]) 24 | 25 | # define statistics 26 | from abcpy.statistics import Identity 27 | statistics_calculator = Identity(degree=2, cross=False) 28 | 29 | # define backend 30 | from abcpy.backends import BackendDummy as Backend 31 | backend = Backend() 32 | 33 | # Initiate the Model selection scheme 34 | modelselection = RandomForest(model_array, statistics_calculator, backend, seed=1) 35 | 36 | # Choose the correct model 37 | model = modelselection.select_model(y_obs, n_samples=100, n_samples_per_param=1, ) 38 | 39 | # Compute the posterior probability of the chosen model 40 | model_prob = modelselection.posterior_probability(y_obs) 41 | 42 | return model, model_prob 43 | 44 | 45 | if __name__ == "__main__": 46 | model, model_prob = infer_model(logging_level=logging.INFO) 47 | print(f"The correct model is {model.name} with estimated posterior probability {model_prob[0]}.") 48 | -------------------------------------------------------------------------------- /examples/statisticslearning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/examples/statisticslearning/__init__.py -------------------------------------------------------------------------------- /examples/statisticslearning/pmcabc_gaussian_statistics_learning.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | 6 | def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): 7 | """Perform inference for this example. 8 | 9 | Parameters 10 | ---------- 11 | steps : integer, optional 12 | Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 13 | n_samples : integer, optional 14 | Number of posterior samples to generate. The default value is 250. 15 | n_samples_per_param : integer, optional 16 | Number of data points in each simulated data set. The default value is 10. 17 | 18 | Returns 19 | ------- 20 | abcpy.output.Journal 21 | A journal containing simulation results, metadata and optionally intermediate results. 22 | """ 23 | logging.basicConfig(level=logging_level) 24 | # define backend 25 | # Note, the dummy backend does not parallelize the code! 26 | from abcpy.backends import BackendDummy as Backend 27 | backend = Backend() 28 | 29 | # define observation for true parameters mean=170, std=15 30 | height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 31 | 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 32 | 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 33 | 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 34 | 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 35 | 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 36 | 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 37 | 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 38 | 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 39 | 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 40 | 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 41 | 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] 42 | 43 | # define prior 44 | from abcpy.continuousmodels import Uniform 45 | mu = Uniform([[150], [200]], name="mu") 46 | sigma = Uniform([[5], [25]], name="sigma") 47 | 48 | # define the model 49 | from abcpy.continuousmodels import Normal 50 | height = Normal([mu, sigma], ) 51 | 52 | # define statistics 53 | from abcpy.statistics import Identity 54 | statistics_calculator = Identity(degree=3, cross=True) 55 | 56 | # Learn the optimal summary statistics using Semiautomatic summary selection 57 | from abcpy.statisticslearning import Semiautomatic 58 | statistics_learning = Semiautomatic([height], statistics_calculator, backend, 59 | n_samples=1000, n_samples_per_param=1, seed=1) 60 | 61 | # Redefine the statistics function 62 | new_statistics_calculator = statistics_learning.get_statistics() 63 | 64 | # Learn the optimal summary statistics using SemiautomaticNN summary selection; 65 | # we use 200 samples as a validation set for early stopping: 66 | from abcpy.statisticslearning import SemiautomaticNN 67 | statistics_learning = SemiautomaticNN([height], statistics_calculator, backend, 68 | n_samples=1000, n_samples_val=200, n_epochs=20, use_tqdm=False, 69 | n_samples_per_param=1, seed=1, early_stopping=True) 70 | 71 | # Redefine the statistics function 72 | new_statistics_calculator = statistics_learning.get_statistics() 73 | 74 | # define distance 75 | from abcpy.distances import Euclidean 76 | distance_calculator = Euclidean(new_statistics_calculator) 77 | 78 | # define kernel 79 | from abcpy.perturbationkernel import DefaultKernel 80 | kernel = DefaultKernel([mu, sigma]) 81 | 82 | # define sampling scheme 83 | from abcpy.inferences import PMCABC 84 | sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) 85 | 86 | eps_arr = np.array([500]) # starting value of epsilon; the smaller, the slower the algorithm. 87 | # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous 88 | # iteration from the observation 89 | epsilon_percentile = 10 90 | journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) 91 | 92 | return journal 93 | 94 | 95 | def analyse_journal(journal): 96 | # output parameters and weights 97 | print(journal.opt_values) 98 | print(journal.get_weights()) 99 | 100 | # do post analysis 101 | print(journal.posterior_mean()) 102 | print(journal.posterior_cov()) 103 | 104 | # print configuration 105 | print(journal.configuration) 106 | 107 | # plot posterior 108 | journal.plot_posterior_distr(path_to_save="posterior.png") 109 | 110 | # save and load journal 111 | journal.save("experiments.jnl") 112 | 113 | from abcpy.output import Journal 114 | new_journal = Journal.fromFile('experiments.jnl') 115 | 116 | 117 | if __name__ == "__main__": 118 | journal = infer_parameters(logging_level=logging.INFO) 119 | analyse_journal(journal) 120 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | scikit-learn>=0.23.1 4 | glmnet>=2.2.1 5 | sphinx 6 | sphinx_rtd_theme 7 | coverage 8 | mpi4py 9 | cloudpickle 10 | matplotlib 11 | tqdm 12 | pot 13 | -------------------------------------------------------------------------------- /requirements/backend-mpi.txt: -------------------------------------------------------------------------------- 1 | mpi4py 2 | cloudpickle 3 | -------------------------------------------------------------------------------- /requirements/backend-spark.txt: -------------------------------------------------------------------------------- 1 | findspark 2 | pyspark -------------------------------------------------------------------------------- /requirements/coverage.txt: -------------------------------------------------------------------------------- 1 | codecov 2 | -------------------------------------------------------------------------------- /requirements/neural_networks_requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # PyPI config file 2 | 3 | [metadata] 4 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Always prefer setuptools over distutils 2 | import sys 3 | from setuptools import setup, find_packages 4 | from os import path 5 | 6 | try: # for pip >= 10 7 | from pip._internal.req import parse_requirements 8 | except ImportError: # for pip <= 9.0.3 9 | from pip.req import parse_requirements 10 | 11 | try: # for pip >= 19.3 12 | from pip._internal.network.session import PipSession 13 | except ImportError: 14 | try: # for pip < 19.3 and >=10 15 | from pip._internal.download import PipSession 16 | except ImportError: # for pip <= 9.0.3 17 | from pip.download import PipSession 18 | 19 | here = path.abspath(path.dirname(__file__)) 20 | 21 | install_reqs_raw = parse_requirements('requirements.txt', session=PipSession()) 22 | 23 | try: 24 | install_reqs = [str(ir.req) for ir in install_reqs_raw] 25 | except AttributeError: 26 | install_reqs = [str(ir.requirement) for ir in install_reqs_raw] 27 | 28 | with open(path.join(here, 'VERSION')) as f: 29 | version = f.readline().strip() 30 | file_tgz = 'v' + version + '.tar.gz' 31 | 32 | setup( 33 | name='abcpy', 34 | 35 | # Versions should comply with PEP440. For a discussion on single-sourcing 36 | # the version across setup.py and the project code, see 37 | # https://packaging.python.org/en/latest/single_source_version.html 38 | version=version, 39 | 40 | description='A framework for approximate Bayesian computation (ABC) that speeds up inference by parallelizing computation on single computers or whole clusters.', 41 | long_description='ABCpy is a highly modular, scientific library for approximate Bayesian computation (ABC) written in Python. It is designed to run all included ABC algorithms in parallel, either using multiple cores of a single computer or using an Apache Spark or MPI enabled cluster. The modularity helps domain scientists to easily apply ABC to their research without being ABC experts; using ABCpy they can easily run large parallel simulations without much knowledge about parallelization, even without much additional effort to parallelize their code. Further, ABCpy enables ABC experts to easily develop new inference schemes and evaluate them in a standardized environment, and to extend the library with new algorithms. These benefits come mainly from the modularity of ABCpy.', 42 | 43 | # The project's main homepage. 44 | url='https://github.com/eth-cscs/abcpy', 45 | download_url = 'https://github.com/eth-cscs/abcpy/archive/' + file_tgz, 46 | 47 | # Author details 48 | author='The abcpy authors', 49 | author_email='', 50 | 51 | # Choose your license 52 | license='BSD-3', 53 | 54 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 55 | classifiers=[ 56 | # How mature is this project? Common values are 57 | # 3 - Alpha 58 | # 4 - Beta 59 | # 5 - Production/Stable 60 | 'Development Status :: 4 - Beta', 61 | 62 | 'Programming Language :: Python :: 3', 63 | 'Programming Language :: Python :: 3.6', 64 | 'Programming Language :: Python :: 3.7', 65 | 'Programming Language :: Python :: 3.8', 66 | 'Programming Language :: Python :: 3.9', 67 | ], 68 | 69 | # What does your project relate to? 70 | keywords='abcpy', 71 | 72 | # You can just specify the packages manually here if your project is 73 | # simple. Or you can use find_packages(). 74 | packages=find_packages(), 75 | 76 | # Alternatively, if you want to distribute just a my_module.py, uncomment 77 | # this: 78 | # py_modules=["my_module"], 79 | 80 | # List run-time dependencies here. These will be installed by pip when 81 | # your project is installed. For an analysis of "install_requires" vs pip's 82 | # requirements files see: 83 | # https://packaging.python.org/en/latest/requirements.html 84 | # install_requires=['numpy', 'scipy'], 85 | install_requires=install_reqs, 86 | 87 | 88 | # List additional groups of dependencies here (e.g. development 89 | # dependencies). You can install these using the following syntax, 90 | # for example: 91 | # $ pip install -e .[dev,test] 92 | #extras_require={ 93 | # 'dev': ['check-manifest'], 94 | # 'test': ['coverage'], 95 | #}, 96 | 97 | # If there are data files included in your packages that need to be 98 | # installed, specify them here. If using Python 2.6 or less, then these 99 | # have to be included in MANIFEST.in as well. 100 | # package_data={ 101 | # 'sample': ['package_data.dat'], 102 | # }, 103 | 104 | # Although 'package_data' is the preferred approach, in some case you may 105 | # need to place data files outside of your packages. See: 106 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa 107 | # In this case, 'data_file' will be installed into '/my_data' 108 | #data_files=[('my_data', ['data/data_file'])], 109 | 110 | # To provide executable scripts, use entry points in preference to the 111 | # "scripts" keyword. Entry points provide cross-platform support and allow 112 | # pip to create the appropriate form of executable for the target platform. 113 | #entry_points={ 114 | # 'console_scripts': [ 115 | # 'sample=sample:main', 116 | # ], 117 | #}, 118 | ) 119 | -------------------------------------------------------------------------------- /tests/NN_utilities_networks_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | try: 4 | import torch 5 | except ImportError: 6 | has_torch = False 7 | else: 8 | has_torch = True 9 | from abcpy.NN_utilities.networks import createDefaultNNWithDerivatives, createDefaultNN, DiscardLastOutputNet 10 | from abcpy.NN_utilities.utilities import jacobian_second_order, jacobian, jacobian_hessian 11 | 12 | 13 | class test_default_NN_with_derivatives(unittest.TestCase): 14 | 15 | def setUp(self): 16 | if has_torch: 17 | self.net = createDefaultNNWithDerivatives(5, 2, nonlinearity=torch.nn.Softplus)() 18 | self.net_first_der_only = createDefaultNNWithDerivatives(5, 2, nonlinearity=torch.nn.Softplus, 19 | first_derivative_only=True)() 20 | self.tensor = torch.randn((10, 5), requires_grad=True) 21 | 22 | def test_first_der(self): 23 | if has_torch: 24 | # compute derivative with forward pass 25 | y, f1 = self.net_first_der_only.forward_and_derivatives(self.tensor) 26 | f2 = jacobian(self.tensor, y) 27 | 28 | assert torch.allclose(f1, f2) 29 | 30 | def test_first_and_second_der(self): 31 | if has_torch: 32 | # compute derivative with forward pass 33 | y, f1, s1 = self.net.forward_and_derivatives(self.tensor) 34 | f2, s2 = jacobian_second_order(self.tensor, y) 35 | 36 | assert torch.allclose(f1, f2) 37 | assert torch.allclose(s1, s2) 38 | 39 | def test_first_der_and_hessian(self): 40 | if has_torch: 41 | # compute derivative with forward pass 42 | y, f1, H1 = self.net.forward_and_full_derivatives(self.tensor) 43 | f2, H2 = jacobian_hessian(self.tensor, y) 44 | 45 | assert torch.allclose(f1, f2) 46 | assert torch.allclose(H1, H2) 47 | 48 | def test_error(self): 49 | if has_torch: 50 | with self.assertRaises(RuntimeError): 51 | self.net = createDefaultNNWithDerivatives(5, 2, nonlinearity=torch.nn.Softsign)() 52 | 53 | 54 | class test_discard_last_output_wrapper(unittest.TestCase): 55 | 56 | def setUp(self): 57 | if has_torch: 58 | self.net = createDefaultNN(2, 3)() 59 | self.net_with_discard_wrapper = DiscardLastOutputNet(self.net) 60 | # reference input and output 61 | torch.random.manual_seed(1) 62 | self.tensor_1 = torch.randn(2) 63 | self.tensor_2 = torch.randn(1, 2) 64 | self.tensor_3 = torch.randn(1, 3, 2) 65 | 66 | def test_output(self): 67 | if has_torch: 68 | out = self.net(self.tensor_1) 69 | out_discard = self.net_with_discard_wrapper(self.tensor_1) 70 | self.assertTrue(torch.allclose(out[:-1], out_discard)) 71 | 72 | out = self.net(self.tensor_2) 73 | out_discard = self.net_with_discard_wrapper(self.tensor_2) 74 | self.assertTrue(torch.allclose(out[:, :-1], out_discard)) 75 | 76 | out = self.net(self.tensor_3) 77 | out_discard = self.net_with_discard_wrapper(self.tensor_3) 78 | self.assertTrue(torch.allclose(out[:, :, :-1], out_discard)) 79 | 80 | 81 | if __name__ == '__main__': 82 | unittest.main() 83 | -------------------------------------------------------------------------------- /tests/NN_utilities_utilities_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from abcpy.statistics import Identity, LinearTransformation, NeuralEmbedding 6 | 7 | try: 8 | import torch 9 | except ImportError: 10 | has_torch = False 11 | else: 12 | has_torch = True 13 | from abcpy.NN_utilities.utilities import jacobian_second_order, jacobian, jacobian_hessian 14 | from abcpy.NN_utilities.networks import createDefaultNN 15 | 16 | 17 | class test_jacobian_functions(unittest.TestCase): 18 | # it tests that this gives the correct errors and that the result is same if you put diffable=True or False. 19 | # it does not test that the derivatives numerical errors are correct but they are. 20 | 21 | def setUp(self): 22 | if has_torch: 23 | net = createDefaultNN(5, 2, nonlinearity=torch.nn.Softplus(), batch_norm_last_layer=False)() 24 | net_bn = createDefaultNN(5, 2, nonlinearity=torch.nn.Softplus(), batch_norm_last_layer=True)() 25 | self.tensor = torch.randn((10, 5), requires_grad=True) 26 | self.y = net(self.tensor) 27 | self.y_bn = net_bn(self.tensor) 28 | 29 | self.y_with_infinities = self.y.detach().clone() 30 | self.y_with_infinities[0, 0] = np.inf 31 | 32 | self.f, self.s = jacobian_second_order(self.tensor, self.y) # reference derivatives 33 | self.f_bn, self.s_bn = jacobian_second_order(self.tensor, self.y_bn) # reference derivatives 34 | 35 | def test_first_der(self): 36 | if has_torch: 37 | # compute derivative with forward pass 38 | f2 = jacobian(self.tensor, self.y, diffable=False) 39 | 40 | assert torch.allclose(self.f, f2) 41 | 42 | def test_first_and_second_der(self): 43 | if has_torch: 44 | # compute derivative with forward pass 45 | f2, s2 = jacobian_second_order(self.tensor, self.y, diffable=False) 46 | 47 | assert torch.allclose(self.f, f2) 48 | assert torch.allclose(self.s, s2) 49 | 50 | def test_first_der_and_hessian(self): 51 | if has_torch: 52 | # compute derivative with forward pass 53 | f1, H1 = jacobian_hessian(self.tensor, self.y) 54 | f2, H2 = jacobian_hessian(self.tensor, self.y, diffable=False) 55 | s2 = torch.einsum('biik->bik', H2) # obtain the second order jacobian from Hessian matrix 56 | 57 | assert torch.allclose(self.f, f2) 58 | assert torch.allclose(f1, f2) 59 | assert torch.allclose(H1, H2) 60 | assert torch.allclose(self.s, s2) 61 | 62 | def test_first_der_bn(self): 63 | if has_torch: 64 | # compute derivative with forward pass 65 | f2 = jacobian(self.tensor, self.y_bn, diffable=False) 66 | 67 | assert torch.allclose(self.f_bn, f2) 68 | 69 | def test_first_and_second_der_bn(self): 70 | if has_torch: 71 | # compute derivative with forward pass 72 | f2, s2 = jacobian_second_order(self.tensor, self.y_bn, diffable=False) 73 | 74 | assert torch.allclose(self.f_bn, f2) 75 | assert torch.allclose(self.s_bn, s2) 76 | 77 | def test_first_der_and_hessian_bn(self): 78 | if has_torch: 79 | # compute derivative with forward pass 80 | f1, H1 = jacobian_hessian(self.tensor, self.y_bn) 81 | f2, H2 = jacobian_hessian(self.tensor, self.y_bn, diffable=False) 82 | s2 = torch.einsum('biik->bik', H2) # obtain the second order jacobian from Hessian matrix 83 | 84 | assert torch.allclose(self.f_bn, f2) 85 | assert torch.allclose(f1, f2) 86 | assert torch.allclose(H1, H2) 87 | assert torch.allclose(self.s_bn, s2) 88 | 89 | def test_errors(self): 90 | if has_torch: 91 | with self.assertRaises(ValueError): 92 | f1 = jacobian(self.tensor, self.y_with_infinities) 93 | with self.assertRaises(ValueError): 94 | f1, s1 = jacobian_second_order(self.tensor, self.y_with_infinities) 95 | with self.assertRaises(ValueError): 96 | f1, H1 = jacobian_hessian(self.tensor, self.y_with_infinities) 97 | 98 | 99 | if __name__ == '__main__': 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-cscs/abcpy/caf0fd899424da69c0ef0bcd499696c5a077cdb1/tests/__init__.py -------------------------------------------------------------------------------- /tests/acceptedparametersmanager_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from abcpy.acceptedparametersmanager import * 4 | from abcpy.backends import BackendDummy as Backend 5 | from abcpy.continuousmodels import Normal 6 | from abcpy.discretemodels import Binomial 7 | 8 | """Tests whether the methods defined for AcceptedParametersManager work as intended.""" 9 | 10 | 11 | class BroadcastTests(unittest.TestCase): 12 | """Tests whether observations can be broadcasted using broadcast.""" 13 | 14 | def test(self): 15 | model = Normal([1, 0.1]) 16 | Manager = AcceptedParametersManager([model]) 17 | backend = Backend() 18 | Manager.broadcast(backend, [1, 2, 3]) 19 | self.assertEqual(Manager.observations_bds.value(), [1, 2, 3]) 20 | 21 | 22 | class UpdateKernelValuesTests(unittest.TestCase): 23 | """Tests whether kernel_parameters_bds can be updated.""" 24 | 25 | def test(self): 26 | model = Normal([1, 0.1]) 27 | Manager = AcceptedParametersManager([model]) 28 | backend = Backend() 29 | Manager.update_kernel_values(backend, [1]) 30 | self.assertEqual(Manager.kernel_parameters_bds.value(), [1]) 31 | 32 | 33 | class UpdateBroadcastTests(unittest.TestCase): 34 | """Tests whether it is possible to update accepted_parameters_bds, accepted_weights_bds and accepted_cov_mats_bds through update_broadcast.""" 35 | 36 | def setUp(self): 37 | self.model = Normal([1, 0.1]) 38 | self.backend = Backend() 39 | self.Manager = AcceptedParametersManager([self.model]) 40 | 41 | def test_accepted_parameters(self): 42 | self.Manager.update_broadcast(self.backend, [1, 2, 3]) 43 | self.assertEqual(self.Manager.accepted_parameters_bds.value(), [1, 2, 3]) 44 | 45 | def test_accepted_weights(self): 46 | self.Manager.update_broadcast(self.backend, accepted_weights=[1, 2, 3]) 47 | self.assertEqual(self.Manager.accepted_weights_bds.value(), [1, 2, 3]) 48 | 49 | def test_accepted_cov_matsrix(self): 50 | self.Manager.update_broadcast(self.backend, accepted_cov_mats=[[1, 0], [0, 1]]) 51 | self.assertEqual(self.Manager.accepted_cov_mats_bds.value(), [[1, 0], [0, 1]]) 52 | 53 | 54 | class GetMappingTests(unittest.TestCase): 55 | """Tests whether the dfs mapping returned from get_mapping is in the correct order.""" 56 | 57 | def test(self): 58 | B1 = Binomial([10, 0.2]) 59 | N1 = Normal([0.1, 0.01]) 60 | N2 = Normal([0.3, N1]) 61 | graph = Normal([B1, N2]) 62 | 63 | Manager = AcceptedParametersManager([graph]) 64 | 65 | mapping, mapping_index = Manager.get_mapping([graph]) 66 | self.assertEqual(mapping, [(B1, 0), (N2, 1), (N1, 2)]) 67 | 68 | 69 | class GetAcceptedParametersBdsValuesTests(unittest.TestCase): 70 | """Tests whether get_accepted_parameters_bds_values returns the correct values.""" 71 | 72 | def test(self): 73 | B1 = Binomial([10, 0.2]) 74 | N1 = Normal([0.1, 0.01]) 75 | N2 = Normal([0.3, N1]) 76 | graph = Normal([B1, N2]) 77 | 78 | Manager = AcceptedParametersManager([graph]) 79 | backend = Backend() 80 | Manager.update_broadcast(backend, [[2, 3, 4], [0.27, 0.32, 0.28], [0.97, 0.12, 0.99]]) 81 | 82 | values = Manager.get_accepted_parameters_bds_values([B1, N2, N1]) 83 | values_expected = [np.array(x).reshape(-1, ) for x in [[2, 3, 4], [0.27, 0.32, 0.28], [0.97, 0.12, 0.99]]] 84 | 85 | self.assertTrue(all([all(a == b) for a, b in zip(values, values_expected)])) 86 | 87 | 88 | if __name__ == '__main__': 89 | unittest.main() 90 | -------------------------------------------------------------------------------- /tests/backend_tests_mpi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from mpi4py import MPI 4 | 5 | from abcpy.backends import BackendMPI, BackendMPITestHelper 6 | 7 | 8 | def setUpModule(): 9 | ''' 10 | If an exception is raised in a setUpModule then none of 11 | the tests in the module will be run. 12 | 13 | This is useful because the teams run in a while loop on initialization 14 | only responding to the scheduler's commands and will never execute anything else. 15 | 16 | On termination of scheduler, the teams call quit() that raises a SystemExit(). 17 | Because of the behaviour of setUpModule, it will not run any unit tests 18 | for the team and we now only need to write unit-tests from the scheduler's 19 | point of view. 20 | ''' 21 | global rank, backend_mpi 22 | comm = MPI.COMM_WORLD 23 | rank = comm.Get_rank() 24 | backend_mpi = BackendMPI() 25 | 26 | 27 | class MPIBackendTests(unittest.TestCase): 28 | 29 | def test_parallelize(self): 30 | data = [0] * backend_mpi.size() 31 | pds = backend_mpi.parallelize(data) 32 | pds_map = backend_mpi.map(lambda x: x + MPI.COMM_WORLD.Get_rank(), pds) 33 | res = backend_mpi.collect(pds_map) 34 | 35 | for scheduler_index in backend_mpi.scheduler_node_ranks(): 36 | self.assertTrue(scheduler_index not in res, "Node in scheduler_node_ranks performed map.") 37 | 38 | def test_map(self): 39 | data = [1, 2, 3, 4, 5] 40 | pds = backend_mpi.parallelize(data) 41 | pds_map = backend_mpi.map(lambda x: x ** 2, pds) 42 | res = backend_mpi.collect(pds_map) 43 | assert res == list(map(lambda x: x ** 2, data)) 44 | 45 | def test_broadcast(self): 46 | data = [1, 2, 3, 4, 5] 47 | pds = backend_mpi.parallelize(data) 48 | 49 | bds = backend_mpi.broadcast(100) 50 | 51 | # Pollute the BDS values of the scheduler to confirm teams 52 | # use their broadcasted value 53 | for k, v in backend_mpi.bds_store.items(): 54 | backend_mpi.bds_store[k] = 99999 55 | 56 | def test_map(x): 57 | return x + bds.value() 58 | 59 | pds_m = backend_mpi.map(test_map, pds) 60 | self.assertTrue(backend_mpi.collect(pds_m) == [101, 102, 103, 104, 105]) 61 | 62 | def test_pds_delete(self): 63 | 64 | def check_if_exists(x): 65 | obj = BackendMPITestHelper() 66 | return obj.check_pds(x) 67 | 68 | data = [1, 2, 3, 4, 5] 69 | pds = backend_mpi.parallelize(data) 70 | 71 | # Check if the pds we just created exists in all the teams(+scheduler) 72 | 73 | id_check_pds = backend_mpi.parallelize([pds.pds_id] * 5) 74 | pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) 75 | self.assertTrue(False not in backend_mpi.collect(pds_check_result), "PDS was not created") 76 | 77 | # Delete the PDS on scheduler and try again 78 | del pds 79 | pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) 80 | 81 | self.assertTrue(True not in backend_mpi.collect(pds_check_result), "PDS was not deleted") 82 | 83 | def test_bds_delete(self): 84 | 85 | def check_if_exists(x): 86 | obj = BackendMPITestHelper() 87 | return obj.check_bds(x) 88 | 89 | data = [1, 2, 3, 4, 5] 90 | bds = backend_mpi.broadcast(data) 91 | 92 | # Check if the pds we just created exists in all the teams(+scheduler) 93 | id_check_bds = backend_mpi.parallelize([bds.bds_id] * 5) 94 | bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) 95 | self.assertTrue(False not in backend_mpi.collect(bds_check_result), "BDS was not created") 96 | 97 | # Delete the PDS on scheduler and try again 98 | del bds 99 | bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) 100 | self.assertTrue(True not in backend_mpi.collect(bds_check_result), "BDS was not deleted") 101 | 102 | def test_function_pickle(self): 103 | def square(x): 104 | return x ** 2 105 | 106 | class staticfunctest: 107 | @staticmethod 108 | def square(x): 109 | return x ** 2 110 | 111 | class nonstaticfunctest: 112 | def square(self, x): 113 | return x ** 2 114 | 115 | data = [1, 2, 3, 4, 5] 116 | expected_result = [1, 4, 9, 16, 25] 117 | pds = backend_mpi.parallelize(data) 118 | 119 | pds_map1 = backend_mpi.map(square, pds) 120 | pds_res1 = backend_mpi.collect(pds_map1) 121 | self.assertTrue(pds_res1 == expected_result, "Failed pickle test for general function") 122 | 123 | pds_map2 = backend_mpi.map(lambda x: x ** 2, pds) 124 | pds_res2 = backend_mpi.collect(pds_map2) 125 | self.assertTrue(pds_res2 == expected_result, "Failed pickle test for lambda function") 126 | 127 | pds_map3 = backend_mpi.map(staticfunctest.square, pds) 128 | pds_res3 = backend_mpi.collect(pds_map3) 129 | self.assertTrue(pds_res3 == expected_result, "Failed pickle test for static function") 130 | 131 | obj = nonstaticfunctest() 132 | pds_map4 = backend_mpi.map(obj.square, pds) 133 | pds_res4 = backend_mpi.collect(pds_map4) 134 | self.assertTrue(pds_res4 == expected_result, "Failed pickle test for non-static function") 135 | 136 | def test_exception_handling(self): 137 | 138 | def function_with_possible_zero_devision(i): 139 | return 1 / i 140 | 141 | data = [1, 2, 0] 142 | pds = backend_mpi.parallelize(data) 143 | 144 | pds_map = backend_mpi.map(function_with_possible_zero_devision, pds) 145 | with self.assertRaises(ZeroDivisionError): 146 | backend_mpi.collect(pds_map) 147 | -------------------------------------------------------------------------------- /tests/backend_tests_mpi_model_mpi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy 4 | from mpi4py import MPI 5 | 6 | from abcpy.backends import BackendMPI, BackendMPITestHelper 7 | 8 | 9 | def setUpModule(): 10 | ''' 11 | If an exception is raised in a setUpModule then none of 12 | the tests in the module will be run. 13 | 14 | This is useful because the teams run in a while loop on initialization 15 | only responding to the scheduler's commands and will never execute anything else. 16 | 17 | On termination of scheduler, the teams call quit() that raises a SystemExit(). 18 | Because of the behaviour of setUpModule, it will not run any unit tests 19 | for the team and we now only need to write unit-tests from the scheduler's 20 | point of view. 21 | ''' 22 | global rank, backend_mpi 23 | comm = MPI.COMM_WORLD 24 | rank = comm.Get_rank() 25 | backend_mpi = BackendMPI(process_per_model=2) 26 | 27 | 28 | class MPIBackendTests(unittest.TestCase): 29 | 30 | def test_parallelize(self): 31 | data = [0] * backend_mpi.size() 32 | pds = backend_mpi.parallelize(data) 33 | pds_map = backend_mpi.map(lambda x, npc=None: x + MPI.COMM_WORLD.Get_rank(), pds) 34 | res = backend_mpi.collect(pds_map) 35 | 36 | for scheduler_index in backend_mpi.scheduler_node_ranks(): 37 | self.assertTrue(scheduler_index not in res, "Node in scheduler_node_ranks performed map.") 38 | 39 | def test_map(self): 40 | def square_mpi(x, npc=None): 41 | local_res = numpy.array([2 * (x ** 2)], 'i') 42 | # global_res = numpy.array([0], 'i') 43 | # MPI.COMM_WORLD.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) 44 | return local_res[0] 45 | 46 | data = [1, 2, 3, 4, 5] 47 | pds = backend_mpi.parallelize(data) 48 | pds_map = backend_mpi.map(square_mpi, pds) 49 | res = backend_mpi.collect(pds_map) 50 | assert res == list(map(lambda x: 2 * (x ** 2), data)) 51 | 52 | def test_broadcast(self): 53 | data = [1, 2, 3, 4, 5] 54 | pds = backend_mpi.parallelize(data) 55 | 56 | bds = backend_mpi.broadcast(100) 57 | 58 | # Pollute the BDS values of the scheduler to confirm teams 59 | # use their broadcasted value 60 | for k, v in backend_mpi.bds_store.items(): 61 | backend_mpi.bds_store[k] = 99999 62 | 63 | def test_map(x, npc=None): 64 | return x + bds.value() 65 | 66 | pds_m = backend_mpi.map(test_map, pds) 67 | self.assertTrue(backend_mpi.collect(pds_m) == [101, 102, 103, 104, 105]) 68 | 69 | def test_pds_delete(self): 70 | 71 | def check_if_exists(x, npc): 72 | obj = BackendMPITestHelper() 73 | if npc.communicator().Get_rank() == 0: 74 | return obj.check_pds(x) 75 | return None 76 | 77 | data = [1, 2, 3, 4, 5] 78 | pds = backend_mpi.parallelize(data) 79 | 80 | # Check if the pds we just created exists in all the teams(+scheduler) 81 | 82 | id_check_pds = backend_mpi.parallelize([pds.pds_id] * 5) 83 | pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) 84 | self.assertTrue(False not in backend_mpi.collect(pds_check_result), "PDS was not created") 85 | 86 | # Delete the PDS on scheduler and try again 87 | del pds 88 | pds_check_result = backend_mpi.map(check_if_exists, id_check_pds) 89 | 90 | self.assertTrue(True not in backend_mpi.collect(pds_check_result), "PDS was not deleted") 91 | 92 | def test_bds_delete(self): 93 | 94 | def check_if_exists(x, npc=None): 95 | obj = BackendMPITestHelper() 96 | return obj.check_bds(x) 97 | 98 | data = [1, 2, 3, 4, 5] 99 | bds = backend_mpi.broadcast(data) 100 | 101 | # Check if the pds we just created exists in all the teams(+scheduler) 102 | id_check_bds = backend_mpi.parallelize([bds.bds_id] * 5) 103 | bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) 104 | self.assertTrue(False not in backend_mpi.collect(bds_check_result), "BDS was not created") 105 | 106 | # Delete the PDS on scheduler and try again 107 | del bds 108 | bds_check_result = backend_mpi.map(check_if_exists, id_check_bds) 109 | self.assertTrue(True not in backend_mpi.collect(bds_check_result), "BDS was not deleted") 110 | 111 | def test_function_pickle(self): 112 | 113 | def square_mpi(x, npc=None): 114 | local_res = numpy.array([2 * (x ** 2)], 'i') 115 | # global_res = numpy.array([0], 'i') 116 | # model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) 117 | return local_res[0] 118 | 119 | class staticfunctest_mpi: 120 | @staticmethod 121 | def square_mpi(x, npc=None): 122 | local_res = numpy.array([2 * (x ** 2)], 'i') 123 | # global_res = numpy.array([0], 'i') 124 | # model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) 125 | return local_res[0] 126 | 127 | class nonstaticfunctest_mpi: 128 | def square_mpi(self, x, npc=None): 129 | local_res = numpy.array([2 * (x ** 2)], 'i') 130 | # global_res = numpy.array([0], 'i') 131 | # model_comm.Reduce([local_res,MPI.INT], [global_res,MPI.INT], op=MPI.SUM, root=0) 132 | return local_res[0] 133 | 134 | data = [1, 2, 3, 4, 5] 135 | expected_result = [2, 8, 18, 32, 50] 136 | 137 | pds = backend_mpi.parallelize(data) 138 | pds_map1 = backend_mpi.map(square_mpi, pds) 139 | pds_res1 = backend_mpi.collect(pds_map1) 140 | 141 | self.assertTrue(pds_res1 == expected_result, "Failed pickle test for general function") 142 | 143 | pds_map3 = backend_mpi.map(staticfunctest_mpi.square_mpi, pds) 144 | pds_res3 = backend_mpi.collect(pds_map3) 145 | self.assertTrue(pds_res3 == expected_result, "Failed pickle test for static function") 146 | 147 | obj = nonstaticfunctest_mpi() 148 | pds_map4 = backend_mpi.map(obj.square_mpi, pds) 149 | pds_res4 = backend_mpi.collect(pds_map4) 150 | self.assertTrue(pds_res4 == expected_result, "Failed pickle test for non-static function") 151 | -------------------------------------------------------------------------------- /tests/discretemodels_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from abcpy.discretemodels import * 4 | from tests.probabilisticmodels_tests import AbstractAPIImplementationTests 5 | 6 | """Tests whether the methods defined for discrete probabilistic models are working as intended.""" 7 | 8 | 9 | class BernoulliAPITests(AbstractAPIImplementationTests, unittest.TestCase): 10 | model_types = [Bernoulli] 11 | model_inputs = [[0.5]] 12 | 13 | 14 | class BinomialAPITests(AbstractAPIImplementationTests, unittest.TestCase): 15 | model_types = [Binomial] 16 | model_inputs = [[3, 0.5]] 17 | 18 | 19 | class PoissonAPITests(AbstractAPIImplementationTests, unittest.TestCase): 20 | model_types = [Poisson] 21 | model_inputs = [[3]] 22 | 23 | 24 | class DiscreteUniformTests(AbstractAPIImplementationTests, unittest.TestCase): 25 | model_types = [DiscreteUniform] 26 | model_inputs = [[10, 20]] 27 | 28 | 29 | class CheckParametersAtInitializationTests(unittest.TestCase): 30 | """Tests that no probabilistic model with invalid parameters can be initialized.""" 31 | 32 | # TODO: Test for all distributions the behaviour if input parameters are real distributions and not only 33 | # hyperparameters 34 | 35 | def test_Bernoulli(self): 36 | with self.assertRaises(TypeError): 37 | Bernoulli(np.array([1, 2, 3])) 38 | 39 | with self.assertRaises(ValueError): 40 | Bernoulli([[1], [4]]) 41 | 42 | def test_Binomial(self): 43 | with self.assertRaises(TypeError): 44 | Bernoulli(np.array([1, 2, 3])) 45 | 46 | with self.assertRaises(ValueError): 47 | Bernoulli([1, 2, 3]) 48 | 49 | def test_Poisson(self): 50 | with self.assertRaises(TypeError): 51 | Poisson(np.array([1, 2, 3])) 52 | 53 | with self.assertRaises(ValueError): 54 | Poisson([2, 3]) 55 | 56 | def test_DiscreteUniform(self): 57 | with self.assertRaises(TypeError): 58 | DiscreteUniform(np.array([1, 2, 3])) 59 | 60 | with self.assertRaises(ValueError): 61 | DiscreteUniform([2, 3, 4]) 62 | 63 | 64 | class DimensionTests(unittest.TestCase): 65 | """Tests whether the dimensions of all discrete models are defined in the correct way.""" 66 | 67 | def test_Bernoulli(self): 68 | Bn = Bernoulli([0.5]) 69 | self.assertTrue(Bn.get_output_dimension() == 1) 70 | 71 | def test_Binomial(self): 72 | Bi = Binomial([1, 0.5]) 73 | self.assertTrue(Bi.get_output_dimension() == 1) 74 | 75 | def test_Poisson(self): 76 | Po = Poisson([3]) 77 | self.assertTrue(Po.get_output_dimension() == 1) 78 | 79 | def test_DiscreteUniform(self): 80 | Du = DiscreteUniform([10, 20]) 81 | self.assertTrue(Du.get_output_dimension() == 1) 82 | 83 | 84 | class SampleFromDistributionTests(unittest.TestCase): 85 | """Tests the return value of forward_simulate for all discrete distributions.""" 86 | 87 | def test_Bernoulli(self): 88 | Bn = Bernoulli([0.5]) 89 | samples = Bn.forward_simulate(Bn.get_input_values(), 3) 90 | self.assertTrue(isinstance(samples, list)) 91 | self.assertTrue(len(samples) == 3) 92 | 93 | def test_Binomial(self): 94 | Bi = Binomial([1, 0.1]) 95 | samples = Bi.forward_simulate(Bi.get_input_values(), 3) 96 | self.assertTrue(isinstance(samples, list)) 97 | self.assertTrue(len(samples) == 3) 98 | 99 | def test_Poisson(self): 100 | Po = Poisson([3]) 101 | samples = Po.forward_simulate(Po.get_input_values(), 3) 102 | self.assertTrue(isinstance(samples, list)) 103 | self.assertTrue(len(samples) == 3) 104 | 105 | def test_DiscreteUniform(self): 106 | Du = DiscreteUniform([10, 20]) 107 | samples = Du.forward_simulate(Du.get_input_values(), 3) 108 | self.assertTrue(isinstance(samples, list)) 109 | self.assertTrue(len(samples) == 3) 110 | 111 | 112 | class CheckParametersBeforeSamplingTests(unittest.TestCase): 113 | """Tests whether False will be returned if the input parameters of _check_parameters_before_sampling are not 114 | accepted.""" 115 | 116 | def test_Bernoulli(self): 117 | Bn = Bernoulli([0.5]) 118 | self.assertFalse(Bn._check_input([-.3])) 119 | self.assertFalse(Bn._check_input([1.2])) 120 | 121 | def test_Binomial(self): 122 | Bi = Binomial([1, 0.5]) 123 | with self.assertRaises(TypeError): 124 | self.assertFalse(Bi._check_input([3, .5, 5])) 125 | self.assertFalse(Bi._check_input([.3, .5])) 126 | self.assertFalse(Bi._check_input([-2, .5])) 127 | self.assertFalse(Bi._check_input([3, -.3])) 128 | self.assertFalse(Bi._check_input([3, 1.2])) 129 | 130 | def test_Poisson(self): 131 | Po = Poisson([3]) 132 | self.assertFalse(Po._check_input([3, 5])) 133 | self.assertFalse(Po._check_input([-1])) 134 | 135 | def test_DiscreteUniform(self): 136 | Du = DiscreteUniform([10, 20]) 137 | self.assertFalse(Du._check_input([3.0, 5])) 138 | self.assertFalse(Du._check_input([2, 6.0])) 139 | self.assertFalse(Du._check_input([5, 2])) 140 | 141 | 142 | if __name__ == '__main__': 143 | unittest.main() 144 | -------------------------------------------------------------------------------- /tests/jointapprox_lhd_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from abcpy.approx_lhd import SynLikelihood 6 | from abcpy.continuousmodels import Normal, Uniform 7 | from abcpy.jointapprox_lhd import ProductCombination 8 | from abcpy.statistics import Identity 9 | 10 | 11 | class ProductCombinationTests(unittest.TestCase): 12 | def setUp(self): 13 | self.stat_calc1 = Identity(degree=1, cross=0) 14 | self.stat_calc2 = Identity(degree=1, cross=0) 15 | self.likfun1 = SynLikelihood(self.stat_calc1) 16 | self.likfun2 = SynLikelihood(self.stat_calc2) 17 | ## Define Models 18 | # define a uniform prior distribution 19 | self.mu = Uniform([[-5.0], [5.0]], name='mu') 20 | self.sigma = Uniform([[0.0], [10.0]], name='sigma') 21 | # define a Gaussian model 22 | self.model1 = Normal([self.mu, self.sigma]) 23 | self.model2 = Normal([self.mu, self.sigma]) 24 | 25 | # Check whether wrong sized distnacefuncs gives an error 26 | self.assertRaises(ValueError, ProductCombination, [self.model1, self.model2], [self.likfun1]) 27 | 28 | self.jointapprox_lhd = ProductCombination([self.model1, self.model2], [self.likfun1, self.likfun2]) 29 | 30 | def test_likelihood(self): 31 | # test simple distance computation 32 | a = [[0, 0, 0], [0, 0, 0]] 33 | b = [[0, 0, 0], [0, 0, 0]] 34 | c = [[1, 1, 1], [1, 1, 1]] 35 | 36 | # Checks whether wrong input type produces error message 37 | self.assertRaises(TypeError, self.jointapprox_lhd.likelihood, 3.4, [[2, 1]]) 38 | self.assertRaises(TypeError, self.jointapprox_lhd.likelihood, [[2, 4]], 3.4) 39 | 40 | # test input has different dimensionality 41 | self.assertRaises(BaseException, self.jointapprox_lhd.likelihood, [a], [b, c]) 42 | self.assertRaises(BaseException, self.jointapprox_lhd.likelihood, [b, c], [a]) 43 | 44 | # test whether they compute correct values 45 | # create observed data 46 | y_obs = [[9.8], [9.8]] 47 | # create fake simulated data 48 | self.mu._fixed_values = [1.1] 49 | self.sigma._fixed_values = [1.0] 50 | y_sim_1 = self.model1.forward_simulate(self.model1.get_input_values(), 100, rng=np.random.RandomState(1)) 51 | y_sim_2 = self.model2.forward_simulate(self.model2.get_input_values(), 100, rng=np.random.RandomState(1)) 52 | # calculate the statistics of the observed data 53 | comp_likelihood = self.jointapprox_lhd.likelihood(y_obs, [y_sim_1, y_sim_2]) 54 | expected_likelihood = 8.612491843767518e-43 55 | # This checks whether it computes a correct value and dimension is right 56 | self.assertLess(comp_likelihood - expected_likelihood, 10e-2) 57 | 58 | 59 | if __name__ == '__main__': 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /tests/jointdistances_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from abcpy.continuousmodels import Normal, Uniform 6 | from abcpy.distances import Euclidean 7 | from abcpy.jointdistances import LinearCombination 8 | from abcpy.statistics import Identity 9 | 10 | 11 | class LinearCombinationTests(unittest.TestCase): 12 | def setUp(self): 13 | self.stat_calc1 = Identity(degree=1, cross=0) 14 | self.stat_calc2 = Identity(degree=1, cross=0) 15 | self.distancefunc1 = Euclidean(self.stat_calc1) 16 | self.distancefunc2 = Euclidean(self.stat_calc2) 17 | ## Define Models 18 | # define a uniform prior distribution 19 | mu = Uniform([[-5.0], [5.0]], name='mu') 20 | sigma = Uniform([[0.0], [10.0]], name='sigma') 21 | # define a Gaussian model 22 | self.model1 = Normal([mu, sigma]) 23 | self.model2 = Normal([mu, sigma]) 24 | 25 | # Check whether wrong sized distnacefuncs gives an error 26 | self.assertRaises(ValueError, LinearCombination, [self.model1, self.model2], [self.distancefunc1], [1.0, 1.0]) 27 | 28 | # Check whether wrong sized weights gives an error 29 | self.assertRaises(ValueError, LinearCombination, [self.model1, self.model2], 30 | [self.distancefunc1, self.distancefunc2], [1.0, 1.0, 1.0]) 31 | 32 | self.jointdistancefunc = LinearCombination([self.model1, self.model2], [self.distancefunc1, self.distancefunc2], 33 | [1.0, 1.0]) 34 | 35 | def test_distance(self): 36 | # test simple distance computation 37 | a = [[0, 0, 0], [0, 0, 0]] 38 | b = [[0, 0, 0], [0, 0, 0]] 39 | c = [[1, 1, 1], [1, 1, 1]] 40 | 41 | # Checks whether wrong input type produces error message 42 | self.assertRaises(TypeError, self.jointdistancefunc.distance, 3.4, [b]) 43 | self.assertRaises(TypeError, self.jointdistancefunc.distance, [a], 3.4) 44 | 45 | # test input has different dimensionality 46 | self.assertRaises(BaseException, self.jointdistancefunc.distance, [a], [b, c]) 47 | self.assertRaises(BaseException, self.jointdistancefunc.distance, [b, c], [a]) 48 | 49 | # test whether they compute correct values 50 | self.assertTrue(self.jointdistancefunc.distance([a, b], [a, b]) == np.array([0])) 51 | self.assertTrue(self.jointdistancefunc.distance([a, c], [c, b]) == np.array([1.7320508075688772])) 52 | 53 | def test_dist_max(self): 54 | self.assertTrue(self.jointdistancefunc.dist_max() == np.inf) 55 | 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /tests/modelselections_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from abcpy.backends import BackendDummy as Backend 4 | from abcpy.continuousmodels import Normal 5 | from abcpy.continuousmodels import StudentT 6 | from abcpy.continuousmodels import Uniform 7 | from abcpy.modelselections import RandomForest 8 | from abcpy.statistics import Identity 9 | 10 | 11 | class RandomForestTests(unittest.TestCase): 12 | def setUp(self): 13 | # define observation for true parameters mean=170, std=15 14 | self.y_obs = [160.82499176] 15 | self.model_array = [None] * 2 16 | # Model 1: Gaussian 17 | # define prior 18 | self.mu1 = Uniform([[150], [200]], name='mu1') 19 | self.sigma1 = Uniform([[5.0], [25.0]], name='sigma1') 20 | # define the model 21 | self.model_array[0] = Normal([self.mu1, self.sigma1]) 22 | # Model 2: Student t 23 | # define prior 24 | self.mu2 = Uniform([[150], [200]], name='mu2') 25 | self.sigma2 = Uniform([[1], [30.0]], name='sigma2') 26 | # define the model 27 | self.model_array[1] = StudentT([self.mu2, self.sigma2]) 28 | 29 | # define statistics 30 | self.statistics_calc = Identity(degree=2, cross=False) 31 | # define backend 32 | self.backend = Backend() 33 | 34 | def test_select_model(self): 35 | modelselection = RandomForest(self.model_array, self.statistics_calc, self.backend, seed=1) 36 | model = modelselection.select_model(self.y_obs, n_samples=100, n_samples_per_param=1) 37 | 38 | self.assertTrue(self.model_array[0] == model) 39 | 40 | def test_posterior_probability(self): 41 | modelselection = RandomForest(self.model_array, self.statistics_calc, self.backend, seed=1) 42 | model_prob = modelselection.posterior_probability(self.y_obs) 43 | 44 | self.assertTrue(model_prob > 0.7) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /tests/perturbationkernel_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from abcpy.acceptedparametersmanager import AcceptedParametersManager 4 | from abcpy.backends import BackendDummy as Backend 5 | from abcpy.continuousmodels import Normal 6 | from abcpy.discretemodels import Binomial 7 | from abcpy.perturbationkernel import * 8 | 9 | """Tests whether the methods for each perturbation kernel are working as intended""" 10 | 11 | 12 | class JointCheckKernelsTests(unittest.TestCase): 13 | """Tests whether value errors are raised correctly during initialization.""" 14 | 15 | def test_Raises(self): 16 | N1 = Normal([0.1, 0.01]) 17 | N2 = Normal([0.3, N1]) 18 | kernel = MultivariateNormalKernel([N1, N2, N1]) 19 | with self.assertRaises(ValueError): 20 | JointPerturbationKernel([kernel]) 21 | 22 | def test_doesnt_raise(self): 23 | N1 = Normal([0.1, 0.01]) 24 | N2 = Normal([0.3, N1]) 25 | kernel = MultivariateNormalKernel([N1, N2]) 26 | try: 27 | JointPerturbationKernel([kernel]) 28 | except ValueError: 29 | self.fail("JointPerturbationKernel raises an exception") 30 | 31 | 32 | class CalculateCovTest(unittest.TestCase): 33 | """Tests whether the implementation of calculate_cov is working as intended.""" 34 | 35 | def test_default(self): 36 | B1 = Binomial([10, 0.2]) 37 | N1 = Normal([0.1, 0.01]) 38 | N2 = Normal([0.3, N1]) 39 | graph = Normal([B1, N2]) 40 | 41 | Manager = AcceptedParametersManager([graph]) 42 | backend = Backend() 43 | kernel = DefaultKernel([N1, N2, B1]) 44 | Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]], np.array([1, 1])) 45 | 46 | kernel_parameters = [] 47 | for krnl in kernel.kernels: 48 | kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) 49 | Manager.update_kernel_values(backend, kernel_parameters) 50 | 51 | covs = kernel.calculate_cov(Manager) 52 | self.assertTrue(len(covs) == 2) 53 | 54 | self.assertTrue(len(covs[0]) == 2) 55 | 56 | self.assertTrue(not (covs[1])) 57 | 58 | def test_Student_T(self): 59 | N1 = Normal([0.1, 0.01]) 60 | N2 = Normal([0.3, N1]) 61 | graph = Normal([N1, N2]) 62 | 63 | Manager = AcceptedParametersManager([graph]) 64 | backend = Backend() 65 | kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) 66 | Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1])) 67 | 68 | kernel_parameters = [] 69 | for krnl in kernel.kernels: 70 | kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) 71 | Manager.update_kernel_values(backend, kernel_parameters) 72 | 73 | covs = kernel.calculate_cov(Manager) 74 | print(covs) 75 | self.assertTrue(len(covs) == 1) 76 | 77 | self.assertTrue(len(covs[0]) == 2) 78 | 79 | 80 | class UpdateTests(unittest.TestCase): 81 | """Tests whether the values returned after perturbation are in the correct format for each perturbation kernel.""" 82 | 83 | def test_DefaultKernel(self): 84 | B1 = Binomial([10, 0.2]) 85 | N1 = Normal([0.1, 0.01]) 86 | N2 = Normal([0.3, N1]) 87 | graph = Normal([B1, N2]) 88 | 89 | Manager = AcceptedParametersManager([graph]) 90 | backend = Backend() 91 | kernel = DefaultKernel([N1, N2, B1]) 92 | Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]], np.array([1, 1]), 93 | accepted_cov_mats=[[[0.01, 0], [0, 0.01]], []]) 94 | 95 | kernel_parameters = [] 96 | for krnl in kernel.kernels: 97 | kernel_parameters.append( 98 | Manager.get_accepted_parameters_bds_values(krnl.models)) 99 | 100 | Manager.update_kernel_values(backend, kernel_parameters=kernel_parameters) 101 | 102 | rng = np.random.RandomState(1) 103 | perturbed_values_and_models = kernel.update(Manager, 1, rng) 104 | self.assertEqual(perturbed_values_and_models, 105 | [(N1, [0.17443453636632419]), (N2, [0.25882435863499248]), (B1, [3])]) 106 | 107 | def test_Student_T(self): 108 | N1 = Normal([0.1, 0.01]) 109 | N2 = Normal([0.3, N1]) 110 | graph = Normal([N1, N2]) 111 | 112 | Manager = AcceptedParametersManager([graph]) 113 | backend = Backend() 114 | kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) 115 | Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1]), 116 | accepted_cov_mats=[[[0.01, 0], [0, 0.01]], []]) 117 | 118 | kernel_parameters = [] 119 | for krnl in kernel.kernels: 120 | kernel_parameters.append( 121 | Manager.get_accepted_parameters_bds_values(krnl.models)) 122 | 123 | Manager.update_kernel_values(backend, kernel_parameters=kernel_parameters) 124 | 125 | rng = np.random.RandomState(1) 126 | perturbed_values_and_models = kernel.update(Manager, 1, rng) 127 | print(perturbed_values_and_models) 128 | self.assertEqual(perturbed_values_and_models, 129 | [(N1, [0.2107982411716391]), (N2, [-0.049106838502166614])]) 130 | 131 | 132 | class PdfTests(unittest.TestCase): 133 | """Tests whether the pdf returns the correct results.""" 134 | 135 | def test_return_value_default_kernel(self): 136 | B1 = Binomial([10, 0.2]) 137 | N1 = Normal([0.1, 0.01]) 138 | N2 = Normal([0.3, N1]) 139 | graph = Normal([B1, N2]) 140 | 141 | Manager = AcceptedParametersManager([graph]) 142 | backend = Backend() 143 | kernel = DefaultKernel([N1, N2, B1]) 144 | Manager.update_broadcast(backend, [[2, 0.4, 0.09], [3, 0.2, 0.008]], np.array([0.5, 0.2])) 145 | kernel_parameters = [] 146 | for krnl in kernel.kernels: 147 | kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) 148 | Manager.update_kernel_values(backend, kernel_parameters) 149 | mapping, mapping_index = Manager.get_mapping(Manager.model) 150 | covs = [[[1, 0], [0, 1]], []] 151 | Manager.update_broadcast(backend, accepted_cov_mats=covs) 152 | pdf = kernel.pdf(mapping, Manager, Manager.accepted_parameters_bds.value()[1], [2, 0.3, 0.1]) 153 | self.assertTrue(isinstance(pdf, float)) 154 | 155 | def test_return_value_Student_T(self): 156 | N1 = Normal([0.1, 0.01]) 157 | N2 = Normal([0.3, N1]) 158 | graph = Normal([N1, N2]) 159 | 160 | Manager = AcceptedParametersManager([graph]) 161 | backend = Backend() 162 | kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) 163 | Manager.update_broadcast(backend, [[0.4, 0.09], [0.2, 0.008]], np.array([0.5, 0.2])) 164 | kernel_parameters = [] 165 | for krnl in kernel.kernels: 166 | kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) 167 | Manager.update_kernel_values(backend, kernel_parameters) 168 | mapping, mapping_index = Manager.get_mapping(Manager.model) 169 | covs = [[[1, 0], [0, 1]], []] 170 | Manager.update_broadcast(backend, accepted_cov_mats=covs) 171 | pdf = kernel.pdf(mapping, Manager, Manager.accepted_parameters_bds.value()[1], [0.3, 0.1]) 172 | self.assertTrue(isinstance(pdf, float)) 173 | 174 | 175 | if __name__ == '__main__': 176 | unittest.main() 177 | -------------------------------------------------------------------------------- /tests/pickle_tests.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import unittest 3 | 4 | import cloudpickle 5 | import numpy as np 6 | 7 | '''We use pickle in our MPI backend to send a method from the master to the workers. The object with which this method is associated cotains the backend as an attribute, while the backend itself contains the data on which the workers should work. Pickling the method results in pickling the backend, which results in the whole data being pickled and sent, which is undersirable. 8 | 9 | In pickle, the "getstate" method can be specified. When an object cotaining a "getstate" method is pickled, only the attributes specified within that method are pickled. 10 | 11 | This test checks whether everything is working correctly with cloudpickle. 12 | ''' 13 | 14 | 15 | class ToBePickled: 16 | def __init__(self): 17 | self.included = 5 18 | self.notIncluded = np.zeros(10 ** 5) 19 | 20 | def __getstate__(self): 21 | """Method that tells cloudpickle which attributes should be pickled 22 | Returns 23 | ------- 24 | state 25 | all the attributes that should be pickled 26 | """ 27 | state = self.__dict__.copy() 28 | del state['notIncluded'] 29 | return state 30 | 31 | 32 | class PickleTests(unittest.TestCase): 33 | def test_exclusion(self): 34 | """Tests whether after pickling and unpickling the object, the attribute which should not be included exists""" 35 | pickled_object = cloudpickle.dumps(ToBePickled(), pickle.HIGHEST_PROTOCOL) 36 | unpickled_object = cloudpickle.loads(pickled_object) 37 | self.assertTrue(not (hasattr(pickled_object, 'notIncluded'))) 38 | 39 | 40 | if __name__ == '__main__': 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/test_examples.py: -------------------------------------------------------------------------------- 1 | """Tests here the examples which do not require parallelization.""" 2 | 3 | import unittest 4 | try: 5 | import torch 6 | except ImportError: 7 | has_torch = False 8 | else: 9 | has_torch = True 10 | 11 | 12 | class ExampleApproxLhdTest(unittest.TestCase): 13 | def test_pmc(self): 14 | from examples.approx_lhd.pmc_hierarchical_models import infer_parameters 15 | journal = infer_parameters(steps=1, n_sample=50) 16 | test_result = journal.posterior_mean()["school_location"] 17 | expected_result = 0.2569671145768137 18 | self.assertAlmostEqual(test_result, expected_result) 19 | 20 | 21 | class ExampleBackendsTest(unittest.TestCase): 22 | def test_dummy(self): 23 | from examples.backends.dummy.pmcabc_gaussian import infer_parameters 24 | journal = infer_parameters(steps=1, n_sample=50) 25 | test_result = journal.posterior_mean()["mu"] 26 | expected_result = 175.00683044068612 27 | self.assertAlmostEqual(test_result, expected_result) 28 | 29 | 30 | class ExampleExtensionsModelsTest(unittest.TestCase): 31 | def test_cpp(self): 32 | from examples.extensions.models.gaussian_cpp.pmcabc_gaussian_model_simple import infer_parameters 33 | journal = infer_parameters(steps=1, n_sample=50) 34 | test_result = journal.posterior_mean()["mu"] 35 | expected_result = 173.74453347475725 36 | self.assertAlmostEqual(test_result, expected_result) 37 | 38 | def test_f90(self): 39 | from examples.extensions.models.gaussian_f90.pmcabc_gaussian_model_simple import infer_parameters 40 | journal = infer_parameters(steps=1, n_sample=50) 41 | test_result = journal.posterior_mean()["mu"] 42 | # note that the f90 example does not always yield the same result on some machines, even if it uses random seed 43 | expected_result = 173.84265330966315 44 | self.assertAlmostEqual(test_result, expected_result, delta=3) 45 | 46 | def test_python(self): 47 | from examples.extensions.models.gaussian_python.pmcabc_gaussian_model_simple import infer_parameters 48 | journal = infer_parameters(steps=1, n_sample=50) 49 | test_result = journal.posterior_mean()["mu"] 50 | expected_result = 175.00683044068612 51 | self.assertAlmostEqual(test_result, expected_result) 52 | 53 | def test_R(self): 54 | import os 55 | print(os.getcwd()) 56 | from examples.extensions.models.gaussian_R.pmcabc_gaussian_model_simple import infer_parameters 57 | journal = infer_parameters(steps=1, n_sample=50) 58 | test_result = journal.posterior_mean()["mu"] 59 | expected_result = 173.4192372459506 60 | self.assertAlmostEqual(test_result, expected_result) 61 | 62 | 63 | class ExampleExtensionsPerturbationKernelsTest(unittest.TestCase): 64 | def test_pmcabc_perturbation_kernel(self): 65 | from examples.extensions.perturbationkernels.pmcabc_perturbation_kernels import infer_parameters 66 | journal = infer_parameters(steps=1, n_sample=50) 67 | test_result = journal.posterior_mean()["schol_without_additional_effects"] 68 | expected_result = 1.9492397683665226 69 | self.assertAlmostEqual(test_result, expected_result) 70 | 71 | 72 | class ExampleHierarchicalModelsTest(unittest.TestCase): 73 | def test_pmcabc(self): 74 | from examples.hierarchicalmodels.pmcabc_inference_on_multiple_sets_of_obs import infer_parameters 75 | journal = infer_parameters(steps=1, n_sample=50) 76 | test_result = journal.posterior_mean()["schol_without_additional_effects"] 77 | expected_result = 1.9492397683665226 78 | self.assertAlmostEqual(test_result, expected_result) 79 | 80 | 81 | class ExampleModelSelectionTest(unittest.TestCase): 82 | def test_random_forest(self): 83 | from examples.modelselection.randomforest_modelselections import infer_model 84 | model, model_prob = infer_model() 85 | expected_result = 0.8704000000000001 86 | # this is not fully reproducible, there are some fluctuations in the estimated value 87 | self.assertAlmostEqual(model_prob[0], expected_result, delta=0.05) 88 | 89 | 90 | class ExampleStatisticsLearningTest(unittest.TestCase): 91 | def test_pmcabc(self): 92 | if has_torch: 93 | from examples.statisticslearning.pmcabc_gaussian_statistics_learning import infer_parameters 94 | journal = infer_parameters(steps=1, n_sample=50) 95 | test_result = journal.posterior_mean()["mu"] 96 | expected_result = 172.52136853079725 97 | self.assertAlmostEqual(test_result, expected_result) 98 | 99 | def test_gaussian_statistics_learning_DrawFromPrior_reload_NNs(self): 100 | if has_torch: 101 | from examples.statisticslearning.gaussian_statistics_learning_DrawFromPrior_reload_NNs import infer_parameters 102 | journal = infer_parameters(steps=1, n_sample=50) 103 | test_result = journal.posterior_mean()["mu"] 104 | expected_result = 172.52136853079725 105 | self.assertAlmostEqual(test_result, expected_result) 106 | 107 | def test_gaussian_statistics_learning_exponential_family(self): 108 | if has_torch: 109 | from examples.statisticslearning.gaussian_statistics_learning_exponential_family import infer_parameters 110 | journal = infer_parameters(steps=1, n_sample=50) 111 | test_result = journal.posterior_mean()["mu"] 112 | expected_result = 172.52136853079725 113 | self.assertAlmostEqual(test_result, expected_result) 114 | 115 | 116 | if __name__ == '__main__': 117 | unittest.main() 118 | -------------------------------------------------------------------------------- /tests/test_examples_mpi.py: -------------------------------------------------------------------------------- 1 | """Tests here example with MPI.""" 2 | 3 | import unittest 4 | 5 | from abcpy.backends import BackendMPI 6 | 7 | 8 | def setUpModule(): 9 | ''' 10 | If an exception is raised in a setUpModule then none of 11 | the tests in the module will be run. 12 | 13 | This is useful because the teams run in a while loop on initialization 14 | only responding to the scheduler's commands and will never execute anything else. 15 | 16 | On termination of scheduler, the teams call quit() that raises a SystemExit(). 17 | Because of the behaviour of setUpModule, it will not run any unit tests 18 | for the team and we now only need to write unit-tests from the scheduler's 19 | point of view. 20 | ''' 21 | global backend_mpi 22 | backend_mpi = BackendMPI() 23 | 24 | 25 | class ExampleGaussianMPITest(unittest.TestCase): 26 | def test_example(self): 27 | from examples.backends.mpi.pmcabc_gaussian import infer_parameters 28 | journal = infer_parameters(backend_mpi, steps=3, n_sample=50) 29 | test_result = journal.posterior_mean()['mu'] 30 | expected_result = 174.94717012502286 31 | self.assertAlmostEqual(test_result, expected_result) 32 | 33 | 34 | if __name__ == '__main__': 35 | unittest.main() 36 | --------------------------------------------------------------------------------