├── .gitignore ├── .gitlab-ci.yml ├── .readthedocs.yml ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── PTS ├── README.md ├── data │ ├── BigTestData.str.tar.xz │ ├── Reduced_dataset.structure │ ├── extraparams │ └── mainparams ├── fastStructure │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-faststructure │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure_1 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure_16 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure_2 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure_4 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── structure_threader-structure_6 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml └── structure_threader-structure_8 │ ├── downloads.xml │ ├── install.sh │ ├── results-definition.xml │ └── test-definition.xml ├── README.md ├── TestData ├── BigTestData.str.tar.xz ├── README.md ├── SmallTestData.structure ├── extraparams ├── indfile.txt ├── joblist.txt ├── mainparams ├── mav_benchmark_parameters.txt └── parameters.txt ├── benchmarks ├── ParallelStructure_runner.R ├── README.md ├── Side_by_side.png ├── Side_by_side.svg ├── bar_plotter.py ├── benchmark.sh ├── benchmark_fast.sh ├── speedup_plotter.py └── system_speedup_plotter.py ├── docs ├── benchmark.md ├── binaries.md ├── citation.md ├── external.md ├── faq.md ├── future.md ├── index.md ├── install.md ├── output.md ├── requirements.txt ├── test_data.md └── usage.md ├── helper_scripts ├── Dockerfile ├── README.md ├── install_faststructure.sh ├── install_maverick.sh ├── install_structure.sh └── structure.spec ├── mkdocs.yml ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py ├── structure_threader ├── __init__.py ├── argparser.py ├── bins │ ├── __init__.py │ ├── linux │ │ ├── MavericK │ │ ├── __init__.py │ │ ├── fastStructure │ │ └── structure │ └── osx │ │ ├── MavericK │ │ ├── fastStructure │ │ └── structure ├── colorer │ ├── __init__.py │ └── colorer.py ├── evanno │ ├── LICENSE │ ├── __init__.py │ ├── fastChooseK.py │ ├── harvesterCore.py │ └── structureHarvester.py ├── plotter │ ├── __init__.py │ ├── html_template.py │ └── structplot.py ├── sanity_checks │ ├── __init__.py │ └── sanity.py ├── skeletons │ ├── __init__.py │ └── stparams.py ├── structure_threader.py └── wrappers │ ├── __init__.py │ ├── alstructure_wrapper.R │ ├── alstructure_wrapper.py │ ├── faststructure_wrapper.py │ ├── maverick_wrapper.py │ └── structure_wrapper.py └── tests ├── alstructure_field_tests.sh ├── alstructure_function_tests.py ├── conftest.py ├── evanno_tests.py ├── fastStructure_field_test.sh ├── faststructure_function_tests.py ├── files ├── chooseK.txt ├── fS_run_K.1.log ├── fS_run_K.1.meanP ├── fS_run_K.1.meanQ ├── fS_run_K.2.log ├── fS_run_K.2.meanP ├── fS_run_K.2.meanQ ├── fS_run_K.3.log ├── fS_run_K.3.meanP ├── fS_run_K.3.meanQ ├── fS_run_K.4.log ├── fS_run_K.4.meanP ├── fS_run_K.4.meanQ ├── fS_run_K.5.log ├── fS_run_K.5.meanP ├── fS_run_K.5.meanQ ├── fS_run_K.6.log ├── fS_run_K.6.meanP ├── fS_run_K.6.meanQ ├── mav_K1 │ ├── outputEvidence.csv │ ├── outputEvidenceDetails.csv │ ├── outputEvidenceNormalised.csv │ ├── outputLikelihood.csv │ ├── outputLog.txt │ ├── outputQmatrixError_ind_K1.csv │ ├── outputQmatrixError_pop_K1.csv │ ├── outputQmatrix_gene_K1.csv │ ├── outputQmatrix_ind_K1.csv │ └── outputQmatrix_pop_K1.csv ├── mav_K2 │ ├── outputEvidence.csv │ ├── outputEvidenceDetails.csv │ ├── outputEvidenceNormalised.csv │ ├── outputLikelihood.csv │ ├── outputLog.txt │ ├── outputQmatrixError_ind_K2.csv │ ├── outputQmatrixError_pop_K2.csv │ ├── outputQmatrix_gene_K2.csv │ ├── outputQmatrix_ind_K2.csv │ └── outputQmatrix_pop_K2.csv ├── mav_K3 │ ├── outputEvidence.csv │ ├── outputEvidenceDetails.csv │ ├── outputEvidenceNormalised.csv │ ├── outputLikelihood.csv │ ├── outputLog.txt │ ├── outputQmatrixError_ind_K3.csv │ ├── outputQmatrixError_pop_K3.csv │ ├── outputQmatrix_gene_K3.csv │ ├── outputQmatrix_ind_K3.csv │ └── outputQmatrix_pop_K3.csv └── test_merged │ ├── outputEvidence.csv │ └── outputEvidenceDetails.csv ├── maverick_field_test.sh ├── maverick_functions_tests.py ├── mockups.py ├── sanity_tests.py ├── smalldata ├── BigTestData.bed.tar.xz ├── BigTestData.str.tar.xz ├── BigTestData.vcf.tar.xz ├── Reduced_dataset.structure ├── SmallTestData.vcf ├── SmallTestData_reference.tsv ├── extraparams ├── indfile.txt ├── mainparams ├── parameters.txt ├── parameters_a.txt ├── parameters_a_as.txt ├── parameters_as.txt └── parameters_f.txt ├── structure_field_test.sh ├── structure_function_tests.py └── wrapper_tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | .ropeproject/ 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | tests/files/merged/* 44 | tests/files/bestK/* 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # Eric6 project 60 | *.e4p 61 | .eric* 62 | 63 | # Dolphin breadcrumbs 64 | *.directory 65 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - tests 3 | - deployment 4 | 5 | image: python:3.11-slim 6 | 7 | # Change pip's cache directory to be inside the project directory since we can 8 | # only cache local items. 9 | variables: 10 | PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache" 11 | 12 | # Pip's cache doesn't store the python packages 13 | # https://pip.pypa.io/en/stable/reference/pip_install/#caching 14 | # 15 | # If you want to also cache the installed packages, you have to install 16 | # them in a virtualenv and cache it as well. 17 | cache: 18 | paths: 19 | - .cache/pip 20 | - venv/ 21 | 22 | tests: 23 | stage: tests 24 | script: 25 | - apt-get update 26 | - apt-get install -y xz-utils 27 | - apt-get install -y r-base r-cran-devtools r-cran-biocmanager 28 | - python -V # Print out python version for debugging 29 | - pip install virtualenv 30 | - virtualenv venv 31 | - source venv/bin/activate 32 | - pip install pytest 33 | - pip install . 34 | - pytest tests/*.py -v 35 | - tests/structure_field_test.sh 36 | - tests/fastStructure_field_test.sh 37 | - tests/maverick_field_test.sh 38 | - tests/alstructure_field_tests.sh 39 | 40 | 41 | deployment: 42 | stage: deployment 43 | script: 44 | - python -V 45 | - pip install virtualenv 46 | - virtualenv venv 47 | - source venv/bin/activate 48 | - pip install twine 49 | - python setup.py sdist 50 | - twine upload dist/* -u __token__ -p $pypi_token 51 | only: 52 | - tags 53 | artifacts: 54 | name: "$CI_PROJECT_NAME-$CI_COMMIT_TAG" 55 | paths: 56 | - dist/ 57 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.8" 12 | 13 | # Build documentation with MkDocs 14 | mkdocs: 15 | configuration: mkdocs.yml 16 | 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | os: 4 | - linux 5 | 6 | dist: 7 | - xenial 8 | 9 | python: 10 | - "3.7" 11 | 12 | install: 13 | - pip install . 14 | - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 15 | - echo "deb https://cran.r-project.org/bin/linux/ubuntu/ xenial-cran35/" | sudo tee -a /etc/apt/sources.list 16 | - sudo apt-get update 17 | - sudo apt-get install -y r-base libcurl4-openssl-dev libssl-dev gfortran liblapack-dev libblas-dev 18 | 19 | # Run tests 20 | script: 21 | - py.test tests/*.py -v 22 | - tests/structure_field_test.sh 23 | - tests/fastStructure_field_test.sh 24 | - tests/maverick_field_test.sh 25 | - travis_wait tests/alstructure_field_tests.sh 26 | 27 | # Other stuff 28 | notifications: 29 | email: 30 | - f.pinamartins@gmail.com 31 | - o.diogo.silva@gmail.com 32 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include structure_threader/bins * 2 | -------------------------------------------------------------------------------- /PTS/README.md: -------------------------------------------------------------------------------- 1 | # PTS 2 | 3 | These are the files used to create a [Phoronix Test Suite](http://www.phoronix-test-suite.com/) test profile. 4 | 5 | The objective is to have them upstreamed by PTS itsef, as *Structure_threader* seems to be a very nice benchmark! 6 | -------------------------------------------------------------------------------- /PTS/data/BigTestData.str.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/PTS/data/BigTestData.str.tar.xz -------------------------------------------------------------------------------- /PTS/data/Reduced_dataset.structure: -------------------------------------------------------------------------------- 1 | SNP_4 SNP_6 SNP_17 SNP_26 SNP_38 SNP_88 SNP_122 SNP_154 SNP_164 SNP_264 SNP_270 SNP_272 SNP_494 SNP_496 SNP_640 SNP_695 SNP_696 SNP_836 SNP_839 SNP_889 SNP_937 SNP_967 SNP_970 SNP_982 SNP_1059 SNP_1077 SNP_1083 SNP_1084 SNP_1117 2 | Pop1.19 1 -9 -9 3 2 -9 -9 -9 4 4 -9 4 3 -9 -9 -9 -9 -9 1 -9 1 2 -9 3 1 4 2 4 1 4 3 | Pop1.19 1 -9 -9 3 2 -9 -9 -9 2 4 -9 4 3 -9 -9 -9 -9 -9 3 -9 3 2 -9 3 2 2 2 4 1 2 4 | Pop1.5 1 2 1 3 2 3 4 2 4 4 2 4 3 4 4 2 2 1 3 2 1 4 2 3 1 4 2 4 1 2 5 | Pop1.5 1 2 1 3 2 3 4 2 4 4 2 4 3 4 4 2 2 1 3 2 1 2 2 3 2 4 2 4 1 2 6 | Pop2.37 2 2 -9 3 -9 3 4 4 2 4 2 4 -9 4 -9 2 2 1 -9 4 1 2 -9 3 1 -9 -9 4 1 2 7 | Pop2.37 2 2 -9 3 -9 3 2 2 2 3 2 4 -9 4 -9 3 2 1 -9 4 3 2 -9 3 1 -9 -9 4 1 2 8 | Pop2.5 2 2 1 3 2 -9 4 4 2 4 2 4 3 4 4 2 -9 -9 1 4 3 -9 4 3 1 2 2 4 1 2 9 | Pop2.5 2 2 1 3 2 -9 2 4 2 4 2 4 3 4 4 3 -9 -9 1 4 3 -9 4 3 1 2 2 4 1 2 10 | Pop3.6 3 2 1 3 2 -9 2 4 4 4 2 4 -9 4 4 2 -9 -9 1 4 1 -9 4 3 2 2 2 1 3 2 11 | Pop3.6 3 2 1 3 2 -9 2 2 2 4 2 4 -9 4 4 3 -9 -9 3 2 3 -9 4 3 2 2 2 1 3 2 12 | Pop3.7 3 2 1 2 2 1 2 2 -9 4 2 4 1 4 4 2 1 1 3 4 1 2 4 3 1 2 2 4 1 4 13 | Pop3.7 3 2 1 3 2 3 2 2 -9 4 2 4 3 4 4 3 2 3 3 4 1 2 4 3 2 2 2 4 1 4 14 | Pop4.11 4 2 1 3 -9 3 2 4 2 -9 2 -9 3 -9 4 -9 1 3 3 4 3 2 4 3 2 2 2 -9 -9 4 15 | Pop4.11 4 2 1 3 -9 1 2 4 4 -9 2 -9 3 -9 4 -9 1 3 1 4 1 2 4 3 2 2 2 -9 -9 4 16 | Pop4.7 4 2 1 3 2 3 2 2 2 4 2 1 3 4 4 2 2 1 3 4 1 4 4 3 1 2 2 1 1 4 17 | Pop4.7 4 2 1 3 2 3 2 2 2 4 2 4 3 4 2 2 2 1 3 4 3 2 4 3 1 2 2 4 3 4 18 | Pop5.16 5 2 1 3 2 1 -9 -9 -9 4 2 1 1 4 -9 -9 1 1 3 4 -9 2 -9 3 -9 -9 -9 1 1 -9 19 | Pop5.16 5 2 1 3 2 1 -9 -9 -9 4 2 4 3 4 -9 -9 2 3 3 4 -9 2 -9 3 -9 -9 -9 4 3 -9 20 | Pop5.9 5 1 4 3 2 1 2 2 2 3 2 4 1 4 2 3 2 1 3 4 3 2 4 3 1 -9 -9 1 3 2 21 | Pop5.9 5 1 4 3 2 1 4 2 2 4 2 4 1 4 4 3 2 1 3 4 1 2 4 3 1 -9 -9 1 3 2 22 | Pop6.11 6 2 1 2 2 3 2 4 2 4 4 1 -9 4 4 3 2 1 3 4 3 4 4 1 1 4 4 1 3 2 23 | Pop6.11 6 2 1 3 2 3 2 4 2 4 4 1 -9 4 4 3 2 1 3 4 3 2 4 3 2 2 2 1 3 2 24 | Pop6.6 6 2 1 3 2 3 2 4 4 4 -9 -9 1 4 4 3 2 1 3 4 3 2 4 3 2 2 4 1 3 2 25 | Pop6.6 6 2 1 3 2 3 2 4 2 4 -9 -9 1 4 4 3 2 1 3 4 3 2 4 3 2 2 4 1 3 2 26 | Pop7.10 7 1 1 2 4 1 2 4 -9 4 4 -9 3 4 -9 3 1 3 1 4 3 2 4 3 -9 2 2 4 1 2 27 | Pop7.10 7 2 4 2 4 3 2 2 -9 4 2 -9 3 3 -9 3 1 3 3 4 3 2 4 3 -9 2 2 4 1 2 28 | Pop7.8 7 1 1 2 2 -9 -9 4 -9 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 4 4 3 -9 2 4 4 1 4 29 | Pop7.8 7 2 1 3 2 -9 -9 2 -9 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 2 3 -9 2 2 4 1 4 30 | Pop8.12 8 1 4 2 2 1 -9 2 4 4 4 4 3 4 4 3 2 1 3 4 1 2 4 3 1 2 2 1 1 2 31 | Pop8.12 8 1 4 3 2 3 -9 2 2 4 4 4 3 4 2 3 2 1 3 4 1 2 2 3 1 2 2 4 3 2 32 | Pop8.9 8 1 4 2 2 1 2 2 4 4 2 1 1 4 4 2 1 1 3 4 1 2 2 1 1 2 2 4 1 2 33 | Pop8.9 8 2 4 3 2 1 2 2 2 4 2 4 3 4 2 3 2 3 3 4 3 2 2 3 1 2 2 4 1 2 34 | Pop9.10 9 2 1 2 2 3 4 4 4 4 2 4 3 4 4 2 2 1 3 4 1 4 2 3 1 2 4 4 1 4 35 | Pop9.10 9 2 1 3 2 3 2 2 4 4 2 4 3 3 2 2 2 1 3 4 1 2 2 3 1 2 2 4 1 4 36 | Pop9.9 9 2 1 3 -9 3 2 2 2 4 -9 4 3 4 4 -9 2 1 3 4 1 2 -9 1 1 2 2 4 1 4 37 | Pop9.9 9 2 1 3 -9 3 2 2 2 4 -9 4 3 4 4 -9 2 1 3 4 1 2 -9 3 1 2 2 4 1 4 38 | Pop10.16 10 2 1 3 2 1 2 2 -9 4 2 1 3 -9 2 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4 39 | Pop10.16 10 2 1 3 2 3 2 2 -9 4 2 4 3 -9 2 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4 40 | Pop10.9 10 2 -9 3 -9 1 -9 -9 -9 -9 -9 4 -9 -9 -9 -9 -9 -9 3 4 1 -9 -9 -9 -9 2 2 -9 -9 2 41 | Pop10.9 10 2 -9 3 -9 3 -9 -9 -9 -9 -9 4 -9 -9 -9 -9 -9 -9 3 4 1 -9 -9 -9 -9 2 2 -9 -9 2 42 | Pop11.13 11 2 1 3 2 3 2 4 2 4 2 1 3 4 2 3 2 1 3 -9 1 4 4 3 1 2 2 4 1 2 43 | Pop11.13 11 2 1 3 2 3 2 2 2 4 2 1 3 3 2 3 2 1 3 -9 1 2 4 3 2 2 2 4 1 2 44 | Pop11.6 11 1 4 3 -9 1 2 2 4 -9 2 1 1 4 4 3 1 1 3 4 1 2 4 3 1 2 -9 1 1 4 45 | Pop11.6 11 1 4 3 -9 1 2 2 2 -9 2 1 1 4 2 3 2 3 3 4 1 2 4 3 1 2 -9 4 3 4 46 | Pop12.21 12 2 1 2 2 3 2 4 2 -9 2 -9 3 4 2 3 2 1 3 4 1 2 4 1 2 2 2 4 1 2 47 | Pop12.21 12 2 1 3 2 3 2 4 2 -9 2 -9 3 4 2 3 2 1 3 4 1 2 4 3 2 2 2 4 1 2 48 | Pop12.6 12 -9 -9 3 2 -9 -9 4 4 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 4 1 1 2 2 1 1 4 49 | Pop12.6 12 -9 -9 3 2 -9 -9 2 2 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 2 3 2 2 2 4 3 2 50 | Pop13.17 13 -9 -9 3 4 1 2 4 4 -9 -9 4 3 -9 -9 -9 1 1 3 -9 -9 2 4 -9 -9 2 2 -9 -9 -9 51 | Pop13.17 13 -9 -9 3 4 3 2 2 2 -9 -9 4 3 -9 -9 -9 2 3 3 -9 -9 2 4 -9 -9 2 2 -9 -9 -9 52 | Pop13.2 13 1 4 3 -9 1 2 4 2 -9 -9 -9 3 3 4 -9 2 1 3 -9 3 4 -9 1 -9 2 4 4 1 -9 53 | Pop13.2 13 1 4 3 -9 3 2 4 2 -9 -9 -9 3 3 4 -9 2 1 3 -9 3 2 -9 1 -9 2 2 4 1 -9 54 | Pop14.27 14 -9 -9 3 2 3 4 4 -9 4 -9 4 3 4 4 3 1 1 3 -9 -9 2 4 3 1 -9 2 4 1 2 55 | Pop14.27 14 -9 -9 3 2 3 2 4 -9 4 -9 4 3 4 4 3 2 3 3 -9 -9 2 2 3 1 -9 2 4 1 2 56 | Pop14.4 14 2 1 2 2 1 4 4 4 4 2 1 3 4 4 3 1 1 3 4 1 2 4 3 1 2 2 1 1 2 57 | Pop14.4 14 2 1 3 2 3 2 2 2 4 2 1 3 4 4 3 2 3 3 4 3 2 4 3 1 2 2 4 3 2 58 | Pop15.12 15 2 1 3 2 3 2 4 -9 4 2 4 -9 4 4 2 2 1 3 4 1 2 4 3 1 2 2 4 1 4 59 | Pop15.12 15 2 1 3 2 3 2 2 -9 4 2 4 -9 4 4 2 2 1 3 2 1 2 2 3 1 2 2 4 1 4 60 | Pop15.8 15 2 1 3 2 3 2 4 4 4 2 1 3 4 4 3 2 1 3 4 1 4 4 3 1 4 2 1 1 2 61 | Pop15.8 15 2 1 3 2 3 2 2 2 4 2 4 3 4 2 3 2 1 3 4 1 2 4 3 1 4 2 4 3 2 62 | Pop16.1 16 2 1 3 2 1 4 2 2 -9 2 1 3 4 2 2 1 1 3 4 1 2 4 3 1 2 2 4 1 4 63 | Pop16.1 16 2 1 3 2 1 4 2 2 -9 2 1 3 4 2 2 2 3 3 4 1 2 4 3 2 2 2 4 1 4 64 | Pop16.7 16 2 1 3 2 1 2 4 2 4 2 4 3 4 4 2 2 1 3 4 1 4 2 3 1 2 2 1 1 4 65 | Pop16.7 16 2 1 3 2 1 2 2 2 4 2 4 3 4 4 3 2 1 3 4 1 2 2 3 2 2 2 4 3 2 66 | Pop17.11 17 1 1 3 4 3 2 2 -9 4 -9 4 3 -9 4 -9 2 1 3 4 1 4 4 3 1 2 4 4 1 4 67 | Pop17.11 17 2 1 3 4 3 2 2 -9 4 -9 4 3 -9 4 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4 68 | Pop17.19 17 2 1 3 2 3 2 2 2 4 -9 4 -9 4 2 2 2 1 3 4 1 4 4 3 1 2 4 1 3 4 69 | Pop17.19 17 2 1 3 2 3 2 2 2 4 -9 4 -9 4 2 2 2 1 3 4 3 4 4 3 2 2 4 1 3 4 70 | -------------------------------------------------------------------------------- /PTS/data/extraparams: -------------------------------------------------------------------------------- 1 | 2 | EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE 3 | PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE 4 | SPECIFIED IN mainparams. 5 | 6 | "(int)" means that this takes an integer value. 7 | "(d)" means that this is a double (ie, a Real number such as 3.14). 8 | "(B)" means that this variable is Boolean 9 | (ie insert 1 for True, and 0 for False). 10 | 11 | PROGRAM OPTIONS 12 | 13 | #define NOADMIX 0 // (B) Use no admixture model (0=admixture model, 1=no-admix) 14 | #define LINKAGE 0 // (B) Use the linkage model model 15 | #define USEPOPINFO 0 // (B) Use prior population information to pre-assign individuals 16 | to clusters 17 | #define LOCPRIOR 0 //(B) Use location information to improve weak data 18 | 19 | #define FREQSCORR 0 // (B) allele frequencies are correlated among pops 20 | #define ONEFST 0 // (B) assume same value of Fst for all subpopulations. 21 | 22 | #define INFERALPHA 1 // (B) Infer ALPHA (the admixture parameter) 23 | #define POPALPHAS 0 // (B) Individual alpha for each population 24 | #define ALPHA 1.0 // (d) Dirichlet parameter for degree of admixture 25 | (this is the initial value if INFERALPHA==1). 26 | 27 | #define INFERLAMBDA 0 // (B) Infer LAMBDA (the allele frequencies parameter) 28 | #define POPSPECIFICLAMBDA 0 //(B) infer a separate lambda for each pop 29 | (only if INFERLAMBDA=1). 30 | #define LAMBDA 1.0 // (d) Dirichlet parameter for allele frequencies 31 | 32 | 33 | 34 | 35 | PRIORS 36 | 37 | #define FPRIORMEAN 0.01 // (d) Prior mean and SD of Fst for pops. 38 | #define FPRIORSD 0.05 // (d) The prior is a Gamma distribution with these parameters 39 | 40 | #define UNIFPRIORALPHA 1 // (B) use a uniform prior for alpha; 41 | otherwise gamma prior 42 | #define ALPHAMAX 10.0 // (d) max value of alpha if uniform prior 43 | #define ALPHAPRIORA 1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma 44 | prior with mean A*B, and 45 | #define ALPHAPRIORB 2.0 // variance A*B^2. 46 | 47 | 48 | #define LOG10RMIN -4.0 //(d) Log10 of minimum allowed value of r under linkage model 49 | #define LOG10RMAX 1.0 //(d) Log10 of maximum allowed value of r 50 | #define LOG10RPROPSD 0.1 //(d) standard deviation of log r in update 51 | #define LOG10RSTART -2.0 //(d) initial value of log10 r 52 | 53 | 54 | USING PRIOR POPULATION INFO (USEPOPINFO) 55 | 56 | #define GENSBACK 2 //(int) For use when inferring whether an indiv- 57 | idual is an immigrant, or has an immigrant an- 58 | cestor in the past GENSBACK generations. eg, if 59 | GENSBACK==2, it tests for immigrant ancestry 60 | back to grandparents. 61 | #define MIGRPRIOR 0.01 //(d) prior prob that an individual is a migrant 62 | (used only when USEPOPINFO==1). This should 63 | be small, eg 0.01 or 0.1. 64 | #define PFROMPOPFLAGONLY 0 // (B) only use individuals with POPFLAG=1 to update P. 65 | This is to enable use of a reference set of 66 | individuals for clustering additional "test" 67 | individuals. 68 | 69 | LOCPRIOR MODEL FOR USING LOCATION INFORMATION 70 | 71 | #define LOCISPOP 0 //(B) use POPDATA for location information 72 | #define LOCPRIORINIT 1.0 //(d) initial value for r, the location prior 73 | #define MAXLOCPRIOR 20.0 //(d) max allowed value for r 74 | 75 | 76 | 77 | 78 | OUTPUT OPTIONS 79 | 80 | #define PRINTNET 1 // (B) Print the "net nucleotide distance" to screen during the run 81 | #define PRINTLAMBDA 1 // (B) Print current value(s) of lambda to screen 82 | #define PRINTQSUM 1 // (B) Print summary of current population membership to screen 83 | 84 | #define SITEBYSITE 0 // (B) whether or not to print site by site results. 85 | (Linkage model only) This is a large file! 86 | #define PRINTQHAT 0 // (B) Q-hat printed to a separate file. Turn this 87 | on before using STRAT. 88 | #define UPDATEFREQ 100 // (int) frequency of printing update on the screen. 89 | Set automatically if this is 0. 90 | #define PRINTLIKES 0 // (B) print current likelihood to screen every rep 91 | #define INTERMEDSAVE 0 // (int) number of saves to file during run 92 | 93 | #define ECHODATA 1 // (B) Print some of data file to screen to check 94 | that the data entry is correct. 95 | (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:) 96 | #define ANCESTDIST 0 // (B) collect data about the distribution of an- 97 | cestry coefficients (Q) for each individual 98 | #define NUMBOXES 1000 // (int) the distribution of Q values is stored as 99 | a histogram with this number of boxes. 100 | #define ANCESTPINT 0.90 // (d) the size of the displayed probability 101 | interval on Q (values between 0.0--1.0) 102 | 103 | 104 | 105 | MISCELLANEOUS 106 | 107 | #define COMPUTEPROB 1 // (B) Estimate the probability of the Data under 108 | the model. This is used when choosing the 109 | best number of subpopulations. 110 | #define ADMBURNIN 500 // (int) [only relevant for linkage model]: 111 | Initial period of burnin with admixture model (see Readme) 112 | #define ALPHAPROPSD 0.025 // (d) SD of proposal for updating alpha 113 | #define STARTATPOPINFO 0 // Use given populations as the initial condition 114 | for population origins. (Need POPDATA==1). It 115 | is assumed that the PopData in the input file 116 | are between 1 and k where k<=MAXPOPS. 117 | #define RANDOMIZE 0 // (B) use new random seed for each run 118 | #define SEED 0 // (int) seed value for random number generator 119 | (must set RANDOMIZE=0) 120 | #define METROFREQ 10 // (int) Frequency of using Metropolis step to update 121 | Q under admixture model (ie use the metr. move every 122 | i steps). If this is set to 0, it is never used. 123 | (Proposal for each q^(i) sampled from prior. The 124 | goal is to improve mixing for small alpha.) 125 | #define REPORTHITRATE 0 // (B) report hit rate if using METROFREQ 126 | -------------------------------------------------------------------------------- /PTS/data/mainparams: -------------------------------------------------------------------------------- 1 | 2 | KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE 3 | IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE 4 | FILE extraparams. 5 | 6 | 7 | "(int)" means that this takes an integer value. 8 | "(B)" means that this variable is Boolean 9 | (ie insert 1 for True, and 0 for False) 10 | "(str)" means that this is a string (but not enclosed in quotes!) 11 | 12 | 13 | Basic Program Parameters 14 | 15 | #define MAXPOPS KKK // (int) number of populations assumed 16 | #define BURNIN 5000 // (int) length of burnin period 17 | #define NUMREPS 100000 // (int) number of MCMC reps after burnin 18 | 19 | Input/Output files 20 | 21 | #define INFILE Reduced_dataset.structure // (str) name of input data file 22 | #define OUTFILE outfile //(str) name of output data file 23 | 24 | Data file format 25 | 26 | #define NUMINDS 34 // (int) number of diploid individuals in data file 27 | #define NUMLOCI 29 // (int) number of loci in data file 28 | #define PLOIDY 2 // (int) ploidy of data 29 | #define MISSING -9 // (int) value given to missing genotype data 30 | #define ONEROWPERIND 0 // (B) store data for individuals in a single line 31 | 32 | 33 | #define LABEL 1 // (B) Input file contains individual labels 34 | #define POPDATA 1 // (B) Input file contains a population identifier 35 | #define POPFLAG 0 // (B) Input file contains a flag which says 36 | whether to use popinfo when USEPOPINFO==1 37 | #define LOCDATA 0 // (B) Input file contains a location identifier 38 | 39 | #define PHENOTYPE 0 // (B) Input file contains phenotype information 40 | #define EXTRACOLS 0 // (int) Number of additional columns of data 41 | before the genotype data start. 42 | 43 | #define MARKERNAMES 1 // (B) data file contains row of marker names 44 | #define RECESSIVEALLELES 0 // (B) data file contains dominant markers (eg AFLPs) 45 | // and a row to indicate which alleles are recessive 46 | #define MAPDISTANCES 0 // (B) data file contains row of map distances 47 | // between loci 48 | 49 | 50 | Advanced data file options 51 | 52 | #define PHASED 0 // (B) Data are in correct phase (relevant for linkage model only) 53 | #define PHASEINFO 0 // (B) the data for each individual contains a line 54 | indicating phase (linkage model) 55 | #define MARKOVPHASE 0 // (B) the phase info follows a Markov model. 56 | #define NOTAMBIGUOUS -999 // (int) for use in some analyses of polyploid data 57 | 58 | 59 | 60 | Command line options: 61 | 62 | -m mainparams 63 | -e extraparams 64 | -s stratparams 65 | -K MAXPOPS 66 | -L NUMLOCI 67 | -N NUMINDS 68 | -i input file 69 | -o output file 70 | -D SEED 71 | -------------------------------------------------------------------------------- /PTS/fastStructure/downloads.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz 8 | 5cbb76e7d49e27a57046ab641b666f97 9 | v1.0.tar.gz 10 | 11 | 12 | 13 | https://github.com/StuntsPT/Structure_threader/raw/master/PTS/data/BigTestData.str.tar.xz 14 | 1115759c154152b264a4ff5f87c34c1d 15 | BigTestData.str.tar.xz 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /PTS/fastStructure/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | tar -zxvf v1.0.tar.gz 4 | tar -xvfJ BigTestData.str.tar.xz 5 | 6 | cd fastStructure-1.0/vars 7 | python2 setup.py build-ext --inplace 8 | cd .. 9 | python2 setup.py build-ext --inplace 10 | 11 | cd .. 12 | 13 | echo "#!/bin/bash 14 | 15 | for i in {1..4} 16 | do 17 | for j in {1..4} 18 | do 19 | python2 ./fastStructure-1.0/structure.py --input=BigTestData --output=TestBigData_out_K\${i}_R\${j} -K \${i} --format=str >> \$LOG_FILE 2>&1 20 | done 21 | done 22 | 23 | " > fastStructure 24 | chmod +x fastStructure 25 | -------------------------------------------------------------------------------- /PTS/fastStructure/results-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sys.time 6 | 7 | 8 | -------------------------------------------------------------------------------- /PTS/fastStructure/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 fastStructure runs 6 | 1.0 7 | Single threaded bayesian calculation of population attribution. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | 1000 genomes Chr22 clustering 11 | 3 12 | 13 | 14 | 0.1 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | python-numpy, cython, GSL, python-scipy 21 | 1.0 22 | https://rajanil.github.io/fastStructure 23 | Francisco Pina-Martins 24 | 25 | 26 | -------------------------------------------------------------------------------- /PTS/structure/downloads.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz 7 | 4e0591678cdbfe79347d272b5dceeda1 8 | 66375 9 | 10 | 11 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure 12 | 1665111727e05d4ba3eea253a986c374 13 | 5317 14 | 15 | 16 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams 17 | 4e41f743c66c3dcd926463a831ab1cda 18 | 2596 19 | 20 | 21 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams 22 | c4f5fe55b6a496b8c55a0538fcc173d7 23 | 6257 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /PTS/structure/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | tar -zxvf structure_kernel_source.tar.gz 4 | 5 | cd structure_kernel_src 6 | make 7 | cd .. 8 | 9 | echo "#!/bin/bash 10 | 11 | for i in {1..4} 12 | do 13 | for j in {1..4} 14 | do 15 | ./structure_kernel_src/structure -i Reduced_dataset.structure -o output_K\${i}_R\${j} -K \${i} >> \$LOG_FILE 2>&1 16 | done 17 | done 18 | 19 | " > structure 20 | chmod +x structure 21 | -------------------------------------------------------------------------------- /PTS/structure/results-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sys.time 6 | 7 | 8 | -------------------------------------------------------------------------------- /PTS/structure/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs 6 | 2.3.4 7 | Single threaded bayesian calculation of population attribution. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 1.0.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 1.0 22 | http://web.stanford.edu/group/pritchardlab/structure.html 23 | Francisco Pina-Martins 24 | 25 | 26 | -------------------------------------------------------------------------------- /PTS/structure_threader-faststructure/downloads.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz 7 | ca5c134d8fc24760fd47d730adfde892 8 | 235897 9 | v0.1-rc3.tar.gz 10 | 11 | 12 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/BigTestData.str.tar.xz 13 | 1115759c154152b264a4ff5f87c34c1d 14 | 71044 15 | 16 | 17 | https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz 18 | 5cbb76e7d49e27a57046ab641b666f97 19 | 100627 20 | v1.0.tar.gz 21 | 22 | 23 | http://pypi.python.org/packages/source/n/nose/nose-1.3.6.tar.gz 24 | 0ca546d81ca8309080fc80cb389e7a16 25 | nose-1.3.6.tar.gz 26 | 276273 27 | 28 | 29 | https://github.com/numpy/numpy/archive/v1.9.2.tar.gz 30 | 90f7434759088acccfddf5ba61b1f908 31 | v1.9.2.tar.gz 32 | 4230459 33 | 34 | 35 | https://github.com/scipy/scipy/archive/v0.16.0b2.tar.gz 36 | c2e7cf084b0242275ee54df1cf2e9c92 37 | v0.16.0b2.tar.gz 38 | 11608327 39 | 40 | 41 | http://cython.org/release/Cython-0.22.zip 42 | b22af7b964903ceff57167dc7d5b9b51 43 | Cython-0.22.zip 44 | 2095055 45 | 46 | 47 | http://gnu.mirror.vexxhost.com/gsl/gsl-latest.tar.gz 48 | e49a664db13d81c968415cd53f62bc8b 49 | gsl-latest.tar.gz 50 | 3534080 51 | 52 | 53 | http://www.netlib.org/lapack/lapack-3.5.0.tgz 54 | b1d3e3e425b2e44a06760ff173104bdf 55 | lapack-3.5.0.tgz 56 | 6313139 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /PTS/structure_threader-faststructure/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Define test dir 6 | testdir=$(pwd) 7 | 8 | # Get LDFAGS 9 | _OLD_LDFLAGS=$LDFLAGS 10 | 11 | # Create env dir for dependent libraries 12 | mkdir -p ${testdir}/env 13 | envdir=${testdir}/env 14 | 15 | # Install dependencies 16 | # LAPACK 17 | tar xvfz lapack-3.5.0.tgz 18 | mkdir -p build-lapack 19 | cd build-lapack 20 | cmake ../lapack-3.5.0 21 | make 22 | #mkdir -p ${envdir}/{bin,lib} 23 | mv bin ${envdir} 24 | mv lib ${envdir} 25 | 26 | # cython 27 | cd ${testdir} 28 | unzip Cython-0.22.zip 29 | cd Cython-0.22 30 | export PYTHONPATH=$PYTHONPATH:${envdir}/lib/python2.7/site-packages/ 31 | mkdir -p ${envdir}/lib/python2.7/site-packages/ 32 | python2 setup.py install --prefix=${envdir} 33 | 34 | # python-nose 35 | cd ${testdir} 36 | tar xvfz nose-1.3.6.tar.gz 37 | cd nose-1.3.6 38 | python2 setup.py install --prefix=${envdir} 39 | 40 | # numpy 41 | cd ${testdir} 42 | tar xvfz v1.9.2.tar.gz 43 | cd numpy-1.9.2 44 | sed -e "s|#![ ]*/usr/bin/python$|#!/usr/bin/python2|" \ 45 | -e "s|#![ ]*/usr/bin/env python$|#!/usr/bin/env python2|" \ 46 | -e "s|#![ ]*/bin/env python$|#!/usr/bin/env python2|" \ 47 | -i $(find . -name '*.py') 48 | export ATLAS=None 49 | export LDFLAGS="$LDFLAGS -shared" 50 | python2 setup.py install --prefix=${envdir} 51 | 52 | # scipy 53 | export PATH=$PATH:${envdir}/bin 54 | cd ${testdir} 55 | tar xvfz v0.16.0b2.tar.gz 56 | cd scipy-0.16.0b2 57 | python2 setup.py install --prefix=${envdir} 58 | 59 | # GNU scientific library 60 | cd ${testdir} 61 | tar xvzf gsl-latest.tar.gz 62 | cd gsl-1.16 63 | export LDFLAGS=${_OLD_LDFLAGS} 64 | ./configure --prefix=${envdir} 65 | make 66 | make install 67 | 68 | # fastStructure 69 | # Extract tarball, enter src dir, build binary and place it in the env dir 70 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${envdir}/lib 71 | export CFLAGS="-I${envdir}/include" 72 | export LDFLAGS="-L${envdir}/lib" 73 | cd ${testdir} 74 | tar xvfz v1.0.tar.gz 75 | cd fastStructure-1.0 76 | cd vars 77 | python2 setup.py build_ext --inplace 78 | cd .. 79 | python2 setup.py build_ext --inplace 80 | 81 | # Dataset 82 | cd ${testdir} 83 | tar xvfJ BigTestData.str.tar.xz 84 | 85 | # Structure_threader 86 | cd ${testdir} 87 | tar xfvz v0.1-rc3.tar.gz 88 | 89 | # Create launcher script 90 | 91 | echo "#!/bin/bash 92 | workdir=$(pwd) 93 | cd Structure_threader-0.1-rc3/ 94 | python3 structure_threader.py \$@ -fs \${workdir}/fastStructure-1.0/structure.py > \$LOG_FILE 2>&1 95 | " > structure_threader-faststructure 96 | chmod +x structure_threader-faststructure 97 | -------------------------------------------------------------------------------- /PTS/structure_threader-faststructure/results-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sys.time 6 | 7 | 8 | -------------------------------------------------------------------------------- /PTS/structure_threader-faststructure/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed wrapped fastStructure runs for 16 Ks 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "fastStructure" algorithm. Will calculate clusterings for 16 values of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.1 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-utils, python 21 | 341 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../BigTestData -o ../ -K 16 --no-plots 1 --no-tests 1 28 | 29 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure/downloads.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz 7 | ca5c134d8fc24760fd47d730adfde892 8 | 235897 9 | v0.1-rc3.tar.gz 10 | 11 | 12 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz 13 | 4e0591678cdbfe79347d272b5dceeda1 14 | 66375 15 | 16 | 17 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure 18 | 1665111727e05d4ba3eea253a986c374 19 | 5317 20 | 21 | 22 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams 23 | 4e41f743c66c3dcd926463a831ab1cda 24 | 2596 25 | 26 | 27 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams 28 | c4f5fe55b6a496b8c55a0538fcc173d7 29 | 6257 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure 28 | chmod +x structure_threader-structure 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure/results-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sys.time 6 | 7 | 8 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 wrapped Structure runs 6 | 0.1-rc3 7 | Bayesian calculation of population attribution, using the "Structure" algorithm. Calculates clusterings for 4 different values of "K" with 4 replciates for each "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 1.0.1 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-utils, python 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 --no-plots 1 --no-tests 1 28 | 29 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_1/downloads.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz 7 | ca5c134d8fc24760fd47d730adfde892 8 | 235897 9 | v0.1-rc3.tar.gz 10 | 11 | 12 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz 13 | 4e0591678cdbfe79347d272b5dceeda1 14 | 66375 15 | 16 | 17 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure 18 | 1665111727e05d4ba3eea253a986c374 19 | 5317 20 | 21 | 22 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams 23 | 4e41f743c66c3dcd926463a831ab1cda 24 | 2596 25 | 26 | 27 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams 28 | c4f5fe55b6a496b8c55a0538fcc173d7 29 | 6257 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_1/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_1 28 | chmod +x structure_threader-structure_1 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_1/results-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sys.time 6 | 7 | 8 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_1/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on a single thread 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 1 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_16/downloads.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/downloads.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_16/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_16 28 | chmod +x structure_threader-structure_16 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_16/results-definition.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/results-definition.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_16/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on 16 threads 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 16 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_2/downloads.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/downloads.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_2/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_2 28 | chmod +x structure_threader-structure_2 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_2/results-definition.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/results-definition.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_2/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on 2 threads 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 2 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_4/downloads.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/downloads.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_4/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_4 28 | chmod +x structure_threader-structure_4 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_4/results-definition.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/results-definition.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_4/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on 4 threads 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 4 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_6/downloads.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/downloads.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_6/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_6 28 | chmod +x structure_threader-structure_6 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_6/results-definition.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/results-definition.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_6/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on 6 threads 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 6 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_8/downloads.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/downloads.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_8/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Define test dir 4 | testdir=$(pwd) 5 | 6 | # Create env dir for dependent libraries 7 | mkdir -p ${testdir}/env 8 | 9 | # Install dependencies 10 | # Structure 11 | cd ${testdir} 12 | tar -zxvf structure_kernel_source.tar.gz 13 | cd structure_kernel_src 14 | make 15 | 16 | # Install Structure_threder 17 | # Structure_threader 18 | cd ${testdir} 19 | tar xfvz v0.1-rc3.tar.gz 20 | 21 | # Create launcher script 22 | 23 | echo "#!/bin/bash 24 | workdir=$(pwd) 25 | cd Structure_threader-0.1-rc3/ 26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1 27 | " > structure_threader-structure_8 28 | chmod +x structure_threader-structure_8 29 | -------------------------------------------------------------------------------- /PTS/structure_threader-structure_8/results-definition.xml: -------------------------------------------------------------------------------- 1 | ../structure_threader-structure_1/results-definition.xml -------------------------------------------------------------------------------- /PTS/structure_threader-structure_8/test-definition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Timed 16 Structure runs on 8 threads 6 | 0.1-rc3 7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K". 8 | Seconds 9 | LIB 10 | Cork Oak clustering 11 | 3 12 | 13 | 14 | 0.2.0 15 | Linux, Solaris, MacOSX, BSD 16 | Scientific 17 | Processor 18 | Free 19 | Verified 20 | build-essential 21 | 2 22 | http://github.com/StuntsPT/Structure_threader 23 | Francisco Pina-Martins 24 | 25 | 26 | 27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 8 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Structure_threader 2 | A program to parallelize the runs of [Structure](http://web.stanford.edu/group/pritchardlab/structure.html), [fastStructure](https://rajanil.github.io/fastStructure/), [MavericK](http://www.bobverity.com/home/maverick/what-is-maverick/) and [ALStructure](https://github.com/StoreyLab/alstructure) software. 3 | 4 | [![Travis Status](https://travis-ci.org/StuntsPT/Structure_threader.svg?branch=master)](https://travis-ci.org/StuntsPT/Structure_threader) [![Documentation Status](https://readthedocs.org/projects/structure-threader/badge/?version=latest)](http://structure-threader.readthedocs.io/en/latest/?badge=latest) 5 | [![DOI](https://zenodo.org/badge/31598374.svg)](https://zenodo.org/badge/latestdoi/31598374) 6 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/becafd10f0bc4904b6d2857cf4c47ea4)](https://www.codacy.com/gh/StuntsPT/Structure_threader/dashboard?utm_source=github.com&utm_medium=referral&utm_content=StuntsPT/Structure_threader&utm_campaign=Badge_Grade) 7 | [![Pipeline Status](https://gitlab.com/StuntsPT/Structure_threader/badges/master/pipeline.svg)](https://gitlab.com/StuntsPT/Structure_threader/pipelines) 8 | 9 | 10 | ## Installation 11 | 12 | ```bash 13 | pip3 install structure_threader 14 | ``` 15 | 16 | *Structure_threader* is available on 17 | [Pypi](https://pypi.python.org/pypi/structure_threader/). It can be 18 | installed by simply running the above command. If you are on a \*nix like 19 | platform, you can use the `--user` option if you can't or don't want to install 20 | the program as `root` user. Binaries for STRUCTURE, fastStructure and 21 | *MavericK* are also distributed for GNU/Linux and Mac OS X. For more details, 22 | please [check the 23 | manual](http://structure-threader.readthedocs.io/en/latest/install/). 24 | 25 | 26 | ## Manual 27 | The complete documentation can be found on [readthedocs.org](http://structure-threader.readthedocs.io/en/latest/). 28 | 29 | 30 | ## Citation 31 | If you use *Structure_threader*, please cite our 32 | ~~[Zenodo DOI](https://zenodo.org/badge/latestdoi/31598374).~~ 33 | [Molecular Ecology Resources paper](http://doi.org/10.1111/1755-0998.12702) 34 | 35 | ### Full citation: 36 |
37 |
Pina-Martins, F., Silva, D. N., Fino, J., & Paulo, O. S. (2017). Structure_threader: An improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular Ecology Resources, n/a-n/a. doi:10.1111/1755-0998.12702
38 | 39 |
40 | 41 | ## License 42 | GPLv3 43 | -------------------------------------------------------------------------------- /TestData/BigTestData.str.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/TestData/BigTestData.str.tar.xz -------------------------------------------------------------------------------- /TestData/README.md: -------------------------------------------------------------------------------- 1 | ../docs/test_data.md -------------------------------------------------------------------------------- /TestData/extraparams: -------------------------------------------------------------------------------- 1 | 2 | EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE 3 | PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE 4 | SPECIFIED IN mainparams. 5 | 6 | "(int)" means that this takes an integer value. 7 | "(d)" means that this is a double (ie, a Real number such as 3.14). 8 | "(B)" means that this variable is Boolean 9 | (ie insert 1 for True, and 0 for False). 10 | 11 | PROGRAM OPTIONS 12 | 13 | #define NOADMIX 0 // (B) Use no admixture model (0=admixture model, 1=no-admix) 14 | #define LINKAGE 0 // (B) Use the linkage model model 15 | #define USEPOPINFO 0 // (B) Use prior population information to pre-assign individuals 16 | to clusters 17 | #define LOCPRIOR 0 //(B) Use location information to improve weak data 18 | 19 | #define FREQSCORR 0 // (B) allele frequencies are correlated among pops 20 | #define ONEFST 0 // (B) assume same value of Fst for all subpopulations. 21 | 22 | #define INFERALPHA 1 // (B) Infer ALPHA (the admixture parameter) 23 | #define POPALPHAS 0 // (B) Individual alpha for each population 24 | #define ALPHA 1.0 // (d) Dirichlet parameter for degree of admixture 25 | (this is the initial value if INFERALPHA==1). 26 | 27 | #define INFERLAMBDA 0 // (B) Infer LAMBDA (the allele frequencies parameter) 28 | #define POPSPECIFICLAMBDA 0 //(B) infer a separate lambda for each pop 29 | (only if INFERLAMBDA=1). 30 | #define LAMBDA 1.0 // (d) Dirichlet parameter for allele frequencies 31 | 32 | 33 | 34 | 35 | PRIORS 36 | 37 | #define FPRIORMEAN 0.01 // (d) Prior mean and SD of Fst for pops. 38 | #define FPRIORSD 0.05 // (d) The prior is a Gamma distribution with these parameters 39 | 40 | #define UNIFPRIORALPHA 1 // (B) use a uniform prior for alpha; 41 | otherwise gamma prior 42 | #define ALPHAMAX 10.0 // (d) max value of alpha if uniform prior 43 | #define ALPHAPRIORA 1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma 44 | prior with mean A*B, and 45 | #define ALPHAPRIORB 2.0 // variance A*B^2. 46 | 47 | 48 | #define LOG10RMIN -4.0 //(d) Log10 of minimum allowed value of r under linkage model 49 | #define LOG10RMAX 1.0 //(d) Log10 of maximum allowed value of r 50 | #define LOG10RPROPSD 0.1 //(d) standard deviation of log r in update 51 | #define LOG10RSTART -2.0 //(d) initial value of log10 r 52 | 53 | 54 | USING PRIOR POPULATION INFO (USEPOPINFO) 55 | 56 | #define GENSBACK 2 //(int) For use when inferring whether an indiv- 57 | idual is an immigrant, or has an immigrant an- 58 | cestor in the past GENSBACK generations. eg, if 59 | GENSBACK==2, it tests for immigrant ancestry 60 | back to grandparents. 61 | #define MIGRPRIOR 0.01 //(d) prior prob that an individual is a migrant 62 | (used only when USEPOPINFO==1). This should 63 | be small, eg 0.01 or 0.1. 64 | #define PFROMPOPFLAGONLY 0 // (B) only use individuals with POPFLAG=1 to update P. 65 | This is to enable use of a reference set of 66 | individuals for clustering additional "test" 67 | individuals. 68 | 69 | LOCPRIOR MODEL FOR USING LOCATION INFORMATION 70 | 71 | #define LOCISPOP 0 //(B) use POPDATA for location information 72 | #define LOCPRIORINIT 1.0 //(d) initial value for r, the location prior 73 | #define MAXLOCPRIOR 20.0 //(d) max allowed value for r 74 | 75 | 76 | 77 | 78 | OUTPUT OPTIONS 79 | 80 | #define PRINTNET 1 // (B) Print the "net nucleotide distance" to screen during the run 81 | #define PRINTLAMBDA 1 // (B) Print current value(s) of lambda to screen 82 | #define PRINTQSUM 1 // (B) Print summary of current population membership to screen 83 | 84 | #define SITEBYSITE 0 // (B) whether or not to print site by site results. 85 | (Linkage model only) This is a large file! 86 | #define PRINTQHAT 0 // (B) Q-hat printed to a separate file. Turn this 87 | on before using STRAT. 88 | #define UPDATEFREQ 100 // (int) frequency of printing update on the screen. 89 | Set automatically if this is 0. 90 | #define PRINTLIKES 0 // (B) print current likelihood to screen every rep 91 | #define INTERMEDSAVE 0 // (int) number of saves to file during run 92 | 93 | #define ECHODATA 1 // (B) Print some of data file to screen to check 94 | that the data entry is correct. 95 | (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:) 96 | #define ANCESTDIST 0 // (B) collect data about the distribution of an- 97 | cestry coefficients (Q) for each individual 98 | #define NUMBOXES 1000 // (int) the distribution of Q values is stored as 99 | a histogram with this number of boxes. 100 | #define ANCESTPINT 0.90 // (d) the size of the displayed probability 101 | interval on Q (values between 0.0--1.0) 102 | 103 | 104 | 105 | MISCELLANEOUS 106 | 107 | #define COMPUTEPROB 1 // (B) Estimate the probability of the Data under 108 | the model. This is used when choosing the 109 | best number of subpopulations. 110 | #define ADMBURNIN 500 // (int) [only relevant for linkage model]: 111 | Initial period of burnin with admixture model (see Readme) 112 | #define ALPHAPROPSD 0.025 // (d) SD of proposal for updating alpha 113 | #define STARTATPOPINFO 0 // Use given populations as the initial condition 114 | for population origins. (Need POPDATA==1). It 115 | is assumed that the PopData in the input file 116 | are between 1 and k where k<=MAXPOPS. 117 | #define RANDOMIZE 1 // (B) use new random seed for each run 118 | #define SEED 0 // (int) seed value for random number generator 119 | (must set RANDOMIZE=0) 120 | #define METROFREQ 10 // (int) Frequency of using Metropolis step to update 121 | Q under admixture model (ie use the metr. move every 122 | i steps). If this is set to 0, it is never used. 123 | (Proposal for each q^(i) sampled from prior. The 124 | goal is to improve mixing for small alpha.) 125 | #define REPORTHITRATE 0 // (B) report hit rate if using METROFREQ 126 | -------------------------------------------------------------------------------- /TestData/joblist.txt: -------------------------------------------------------------------------------- 1 | T1 1 1 50000 1000000 2 | T2 1 1 50000 1000000 3 | T3 1 1 50000 1000000 4 | T4 1 1 50000 1000000 5 | T5 1 2 50000 1000000 6 | T6 1 2 50000 1000000 7 | T7 1 2 50000 1000000 8 | T8 1 2 50000 1000000 9 | T9 1 3 50000 1000000 10 | T10 1 3 50000 1000000 11 | T11 1 3 50000 1000000 12 | T12 1 3 50000 1000000 13 | T13 1 4 50000 1000000 14 | T14 1 4 50000 1000000 15 | T15 1 4 50000 1000000 16 | T16 1 4 50000 1000000 17 | -------------------------------------------------------------------------------- /TestData/mainparams: -------------------------------------------------------------------------------- 1 | 2 | KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE 3 | IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE 4 | FILE extraparams. 5 | 6 | 7 | "(int)" means that this takes an integer value. 8 | "(B)" means that this variable is Boolean 9 | (ie insert 1 for True, and 0 for False) 10 | "(str)" means that this is a string (but not enclosed in quotes!) 11 | 12 | 13 | Basic Program Parameters 14 | 15 | #define MAXPOPS KKK // (int) number of populations assumed 16 | #define BURNIN 50000 // (int) length of burnin period 17 | #define NUMREPS 1000000 // (int) number of MCMC reps after burnin 18 | 19 | Input/Output files 20 | 21 | #define INFILE /home/francisco/structure/sobreiro_snps_structure_bin_pop.input // (str) name of input data file 22 | #define OUTFILE outfile //(str) name of output data file 23 | 24 | Data file format 25 | 26 | #define NUMINDS 100 // (int) number of diploid individuals in data file 27 | #define NUMLOCI 80 // (int) number of loci in data file 28 | #define PLOIDY 2 // (int) ploidy of data 29 | #define MISSING -9 // (int) value given to missing genotype data 30 | #define ONEROWPERIND 0 // (B) store data for individuals in a single line 31 | 32 | 33 | #define LABEL 1 // (B) Input file contains individual labels 34 | #define POPDATA 1 // (B) Input file contains a population identifier 35 | #define POPFLAG 0 // (B) Input file contains a flag which says 36 | whether to use popinfo when USEPOPINFO==1 37 | #define LOCDATA 0 // (B) Input file contains a location identifier 38 | 39 | #define PHENOTYPE 0 // (B) Input file contains phenotype information 40 | #define EXTRACOLS 0 // (int) Number of additional columns of data 41 | before the genotype data start. 42 | 43 | #define MARKERNAMES 1 // (B) data file contains row of marker names 44 | #define RECESSIVEALLELES 0 // (B) data file contains dominant markers (eg AFLPs) 45 | // and a row to indicate which alleles are recessive 46 | #define MAPDISTANCES 0 // (B) data file contains row of map distances 47 | // between loci 48 | 49 | 50 | Advanced data file options 51 | 52 | #define PHASED 0 // (B) Data are in correct phase (relevant for linkage model only) 53 | #define PHASEINFO 0 // (B) the data for each individual contains a line 54 | indicating phase (linkage model) 55 | #define MARKOVPHASE 0 // (B) the phase info follows a Markov model. 56 | #define NOTAMBIGUOUS -999 // (int) for use in some analyses of polyploid data 57 | 58 | 59 | 60 | Command line options: 61 | 62 | -m mainparams 63 | -e extraparams 64 | -s stratparams 65 | -K MAXPOPS 66 | -L NUMLOCI 67 | -N NUMINDS 68 | -i input file 69 | -o output file 70 | -D SEED 71 | -------------------------------------------------------------------------------- /TestData/mav_benchmark_parameters.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 16 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 1.0 15 | alphaPropSD 0.10 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 2500 22 | mainSamples 10000 23 | 24 | thermodynamic_on t 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 200 27 | thermodynamicSamples 1000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | outputRoot default/ 41 | -------------------------------------------------------------------------------- /TestData/parameters.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 4 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 1.0 15 | alphaPropSD 0.10 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 500 22 | mainSamples 4000 23 | 24 | thermodynamic_on t 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 1000 27 | thermodynamicSamples 5000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | -------------------------------------------------------------------------------- /benchmarks/ParallelStructure_runner.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/Rscript 2 | # Copyright 2016 Francisco Pina Martins 3 | # This file is part of Structure_threader. 4 | # Structure_threader is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | 9 | # Structure_threader is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | 14 | # You should have received a copy of the GNU General Public License 15 | # along with Structure_threader. If not, see . 16 | 17 | # Usage: Rscript ParallelStructure_runer.R "number_of_threads_to_use" 18 | 19 | library(ParallelStructure) 20 | 21 | ## Define variables 22 | # Get number of threads from CLI 23 | args <- commandArgs(trailingOnly = TRUE) 24 | 25 | # Joblist location 26 | joblist_location = "/home/francisco/bench/joblist.txt" 27 | # Location of STRUCUTRE binary 28 | structure_bin = "/opt/structure/bin/" 29 | # Infile location 30 | infile_path = "/home/francisco/bench/SmallTestData.structure" 31 | # Outfile location 32 | outfile_path = "Results/" # Yes! This C**p program takes arguments in both full and relative path simultaneously! 33 | # Number of individuals 34 | n_inds = 100 35 | #Number of loci 36 | n_loci = 80 37 | 38 | parallel_structure(joblist = joblist_location, n_cpu = args[1], 39 | structure_path = structure_bin, infile = infile_path, 40 | outpath = outfile_path, numinds = n_inds, numloci = n_loci, 41 | plot_output = 0, label = 1, popdata = 1, popflag = 0, 42 | locdata = 0, phenotypes = 0, markernames = 1, 43 | mapdist = 0, onerowperind = 0, phaseinfo = 0, 44 | recessivealleles = 0, phased = 0, extracol = 0, missing = -9, 45 | ploidy = 2, noadmix = 0, linkage = 0, usepopinfo = 0, 46 | locprior = 0, inferalpha = 1, alpha = 1, popalphas = 0, 47 | unifprioralpha = 1, alphamax = 10, alphapropsd = 0.025, 48 | freqscorr = 0, onefst = 0, fpriormean = 0.01, 49 | fpriorsd = 0.05, inferlambda = 0, lambda = 1, 50 | computeprob = 1, pfromflagonly = 0, ancestdist = 0, 51 | startatpopinfo = 0, metrofreq = 10, updatefreq = 100, 52 | printqhat = 0,revert_convert=0, randomize=1) 53 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks for *Structure_threader* 2 | 3 | In this directory you will find the files used for benchmarking the single threaded runs of both STRUCTURE and fastStructure, as well as some results. 4 | The scripts to draw the speedup plots and the barplots can be found here as well. 5 | 6 | 7 | ## Contents: 8 | 9 | * benchmark.sh 10 | * benchmark_fast.sh 11 | * speedup_plotter.py 12 | * bar_plotter.py 13 | 14 | 15 | ### benchmark.sh 16 | 17 | This is a [Zsh](http://www.zsh.org/) script to run STRUCTURE sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4). 18 | It does not log the runs, nor the results (everything is written into the same file). 19 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run. 20 | 21 | 22 | ### benchmark_fast.sh 23 | 24 | This is a [Zsh](http://www.zsh.org/) script to run fastStructure sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4). 25 | It does not log the runs, nor the results (everything is written into the same file). 26 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run. 27 | 28 | 29 | ### speedup_plotter.py 30 | 31 | This is the python script that was used to create the speedup plots for the generated data. 32 | 33 | ### bar_plotter.py 34 | 35 | This is the python script that was used to create the bar plots for the single threaded vs. multi-threaded run times. 36 | -------------------------------------------------------------------------------- /benchmarks/Side_by_side.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/benchmarks/Side_by_side.png -------------------------------------------------------------------------------- /benchmarks/bar_plotter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2015 Francisco Pina Martins 4 | # This file is part of speedup_plotter. 5 | # speedup_plotter is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # speedup_plotter is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with speedup_plotter. If not, see . 17 | 18 | import matplotlib.pyplot as plt 19 | import numpy 20 | 21 | from speedup_plotter import data_harverster 22 | 23 | 24 | def draw_bar_plot(dataframes): 25 | """ 26 | Draws a bar plot with the different times for single vs. multiple 27 | threads implementations.""" 28 | 29 | N = len(dataframes[:, 0]) 30 | single_times = dataframes[:, 1] 31 | threaded_times = dataframes[:, 2] 32 | 33 | locs = numpy.arange(N) # the x locations for the groups 34 | 35 | width = 0.35 # the width of the bars 36 | 37 | fig, ax = plt.subplots() 38 | rects1 = ax.bar(locs, single_times, width, color='grey') 39 | 40 | rects2 = ax.bar(locs+width, threaded_times, width, color='darkgrey') 41 | 42 | # add some text for labels, title and axes ticks 43 | ax.set_ylabel('Time (s)') 44 | ax.set_title('Time to calculate clustering for each value of "K", single, ' 45 | 'vs. multiple threading') 46 | ax.set_xticks(locs+width) 47 | 48 | ax.set_xticklabels(list(map(int, dataframes[:, 0]))) 49 | 50 | ax.legend((rects1[0], rects2[0]), ('Single thread', '8 threads'), loc="upper left") 51 | 52 | ax.grid(True, zorder=0) 53 | 54 | plt.savefig(argv[1] + "_plot.svg", format="svg") 55 | 56 | if __name__ == "__main__": 57 | from sys import argv 58 | # Usage: python3 bar_plotter.py K_times.csv 59 | dataframes = data_harverster(argv[1]) 60 | draw_bar_plot(dataframes) 61 | -------------------------------------------------------------------------------- /benchmarks/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | ks=( 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 ) 4 | 5 | for i in $ks 6 | do 7 | /opt/structure/bin/structure -K $i -i TestData.structure -o bench.txt 8 | done 9 | 10 | -------------------------------------------------------------------------------- /benchmarks/benchmark_fast.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | ks=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ) 4 | 5 | datafile=BigTestData 6 | 7 | for i in ${ks} 8 | do 9 | echo "Currently running K=${i}." 10 | /usr/bin/time -f %E python2 ~/Software/fastStructure/structure.py -K $i --input=../TestData/${datafile} --output=../TestData/${datafile}_out --format=str 11 | done 12 | -------------------------------------------------------------------------------- /benchmarks/speedup_plotter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2015 Francisco Pina Martins 4 | # This file is part of speedup_plotter. 5 | # speedup_plotter is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # speedup_plotter is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with speedup_plotter. If not, see . 17 | 18 | import matplotlib.pyplot as plt 19 | import numpy 20 | 21 | def data_harverster(datafile_name): 22 | """Gather speedup data from a csv file and return a np array with it.""" 23 | timearray = numpy.genfromtxt(datafile_name, delimiter = ";", autostrip=True, 24 | dtype=float, skip_header=True, 25 | filling_values=False) 26 | 27 | return timearray 28 | 29 | 30 | def draw_plot(timearray): 31 | """Draw a line plot based on the speedup data.""" 32 | i7 = list(timearray[:, 1][:-4]) 33 | i7.insert(0, 1) 34 | e5 = list(timearray[:, 2][:-4]) 35 | e5.insert(0, 1) 36 | oldxeon = list(timearray[:, 3]) 37 | oldxeon.insert(0, 1) 38 | i5 = list(timearray[:, 4][:-6]) 39 | i5.insert(0, 1) 40 | 41 | plt.axis([0, 16, 0, 16]) 42 | plt.plot([1, 2, 4, 6, 8], i7, 'k-v', fillstyle="full", ms=7, 43 | label="i7-4700MQ") 44 | plt.plot([1, 2, 4], i5, 'k-^', fillstyle="full", ms=7, label="i5-3350P") 45 | plt.plot([1, 2, 4, 6, 8, 10, 12, 14, 16], oldxeon, 'k-x', fillstyle="full", 46 | ms=7, label="E5520") 47 | plt.plot([1, 2, 4, 6, 8], e5, 'k+-', fillstyle="full", ms=7, 48 | label="E5-2609") 49 | 50 | plt.plot(range(16), range(16), 'k-.', label="Linear scaling") 51 | 52 | plt.grid(True) 53 | plt.xlabel("Number of threads") 54 | plt.ylabel("Speed increase") 55 | plt.legend(loc=2, fontsize="small") 56 | plt.savefig(argv[1] + "_plot.svg", format="svg") 57 | #plt.show() 58 | 59 | if __name__ == "__main__": 60 | from sys import argv 61 | timearray = data_harverster(argv[1]) 62 | draw_plot(timearray) 63 | -------------------------------------------------------------------------------- /benchmarks/system_speedup_plotter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2015-2017 Francisco Pina Martins 4 | # This file is part of speedup_plotter. 5 | # speedup_plotter is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # speedup_plotter is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with speedup_plotter. If not, see . 17 | 18 | import matplotlib.pyplot as plt 19 | import numpy 20 | 21 | def data_harverster(datafile_name): 22 | """ 23 | Gather speedup data from a csv file and return a np array with it. 24 | """ 25 | timearray = numpy.genfromtxt(datafile_name, delimiter=";", autostrip=True, 26 | dtype=float, skip_header=False, names=True, 27 | filling_values=False) 28 | 29 | return timearray 30 | 31 | 32 | def draw_plot(timearray): 33 | """ 34 | Draw a line plot based on speedup data. 35 | """ 36 | system_cores = max(map(int, timearray["CPUs"])) 37 | names = [x for x in timearray.dtype.names if x != "CPUs"] 38 | linetypes = ("k-", "k:", "k--") 39 | lines = {k: v for k, v in zip(names, linetypes)} 40 | plt.axis([1, system_cores + 1, 1, system_cores + 1]) 41 | for name in names: 42 | plt.plot(list(map(int, timearray["CPUs"])), timearray[name], 43 | lines[name], 44 | fillstyle="full", ms=7, label=name) 45 | plt.plot(range(1, system_cores + 2), range(1, system_cores + 2), 'k-.', 46 | label="Linear scaling") 47 | 48 | plt.grid(True) 49 | plt.xlabel("Number of threads") 50 | plt.ylabel("Speed increase") 51 | plt.xticks(list(map(int, timearray["CPUs"]))) 52 | plt.legend(loc=2, fontsize="small") 53 | plt.savefig(argv[1] + "_plot.svg", format="svg") 54 | #plt.show() 55 | 56 | if __name__ == "__main__": 57 | from sys import argv 58 | TIMEARRAY = data_harverster(argv[1]) 59 | draw_plot(TIMEARRAY) 60 | -------------------------------------------------------------------------------- /docs/benchmark.md: -------------------------------------------------------------------------------- 1 | # Benchmarking process 2 | 3 | You can find some of the scripts used for the benchmarking process inside the [*benchmarks* directory](https://github.com/StuntsPT/Structure_threader/tree/master/benchmarks). Inside this directory you will find the files used for benchmarking the single threaded runs of both STRUCTURE and *fastStructure*, as well as some results. 4 | The scripts to draw the speedup plots and the barplots can be found there as well. 5 | You will also find relevant documentation, which is reproduced here. 6 | 7 | 8 | ## Directory contents: 9 | 10 | * benchmark.sh 11 | * benchmark_fast.sh 12 | * speedup_plotter.py 13 | * bar_plotter.py 14 | 15 | 16 | ### benchmark.sh 17 | 18 | This is a [Zsh](http://www.zsh.org/) script to run STRUCTURE sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4). 19 | It does not log the runs, nor the results (everything is written into the same file). 20 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run. 21 | 22 | 23 | ### benchmark_fast.sh 24 | 25 | This is a [Zsh](http://www.zsh.org/) script to run fastStructure sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4). 26 | It does not log the runs, nor the results (everything is written into the same file). 27 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run. 28 | 29 | 30 | ### speedup_plotter.py 31 | 32 | This is the python script that was used to create the speedup plots for the generated data. 33 | 34 | 35 | ### bar_plotter.py 36 | 37 | This is the python script that was used to create the bar plots for the single threaded vs. multi-threaded run times. 38 | -------------------------------------------------------------------------------- /docs/binaries.md: -------------------------------------------------------------------------------- 1 | # Binary building 2 | 3 | For your conveninence, we have pre-build binaries of STRUCTURE and 4 | fastStructure. They are provided with the package under 5 | `structure_threader/bins/$platform/`. 6 | Here is how they were built. 7 | 8 | ## The build system 9 | 10 | ### GNU/Linux binaries 11 | Binaries were built on a machine with an Intel Xeon E5-2609 0 @ 2.40GHz CPU. 12 | The OS under which the binaries were built is Ubuntu 12.04 64bit. This "old" 13 | OS was used since linux systems have backwards, but not forwards compatibility. 14 | This means that binaries built on older systems will run on newer systems, but 15 | the opposite may not be true. 16 | 17 | ### OSX binaries 18 | Binaries were build on Mid 2013 MacBook Air with an "Haswell" based i5 CPU, running OSX 10.10 Yosemite. 19 | They should be forward compatible with later OSX releases. 20 | 21 | ## STRUCTURE 22 | 23 | STRUCTURE is relatively simple to build. Source code can be obtained from the 24 | [STRUCTURE website](http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz). To build the binary, we used our helper 25 | script "install_structure.sh". The binary version is 2.3.4. 26 | 27 | 28 | ## fastStructure 29 | 30 | fastStructure requires a more involved process to build as it requires many 31 | dependencies. It can be obtained from 32 | [it's own github repository](https://github.com/rajanil/fastStructure). 33 | Although fastStructure is written in python 2, it uses compiled code, by making 34 | use of `cython`. 35 | To build the binary, we have insatlled fastStructure using our helper script 36 | "install_faststructure.sh" and then we have used 37 | [pyinstaller](http://www.pyinstaller.org/) to turn it into a binary. The used 38 | "specfile" can be found [here](https://github.com/StuntsPT/Structure_threader/tree/master/helper_scripts/structure.spec) which contains all the required information to reproduce the 39 | build. The binary version is 1.0. 40 | -------------------------------------------------------------------------------- /docs/citation.md: -------------------------------------------------------------------------------- 1 | # Citation 2 | 3 | If you use *Structure_threader*, please cite: 4 | 5 | ~~[Zenodo DOI](https://zenodo.org/badge/latestdoi/31598374).~~ 6 | [Molecular Ecology Resources paper](http://doi.org/10.1111/1755-0998.12702) 7 | 8 | ### Full citation: 9 |
10 |
Pina-Martins, F., Silva, D. N., Fino, J., & Paulo, O. S. (2017). Structure_threader: An improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular Ecology Resources, n/a-n/a. doi:10.1111/1755-0998.12702
11 | 12 |
13 | 14 | If you used STRUCTURE, please cite: 15 | 16 | * [Pritchard JK, Stephens M, Donnelly P (2000) Inference of population structure using multilocus genotype data. Genetics, 155, 945–959.](http://www.genetics.org/content/155/2/945) 17 | 18 | If you used fastStructure, please cite: 19 | 20 | * [Raj A, Stephens M, Pritchard JK (2014) fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets. Genetics, 197, 573–589.](http://www.genetics.org/content/197/2/573.long) 21 | 22 | If you used the evanno test module, please cite: 23 | 24 | * [Earl DA, vonHoldt BM (2012) STRUCTURE HARVESTER: a website and program for visualizing STRUCTURE output and implementing the Evanno method. Conservation Genetics Resources, 4, 359–361.](http://link.springer.com/article/10.1007%2Fs12686-011-9548-7) 25 | 26 | If you used MavericK, please cite: 27 | 28 | * [Verity, R & Nichols, R.A (2016). Estimating the number of subpopulations (K) in structured populations. Genetics 203.4, 1827-1839.](https://doi.org/10.1534/genetics.115.180992 ) 29 | -------------------------------------------------------------------------------- /docs/external.md: -------------------------------------------------------------------------------- 1 | # Manually installing external programs 2 | 3 | If you wish to compile your own binaries for these programs, you may wish to 4 | rely on our 5 | ["helper_scripts"](https://github.com/StuntsPT/Structure_threader/tree/master/helper_scripts) 6 | which contain commands to compile and install *MavericK*, *Structure* **and** 7 | *fastStructure* (along with any required dependencies). For more details check 8 | the next few sections. 9 | 10 | ## Structure_threader helper scripts 11 | The directory "helper_scripts" contains three scripts that will install *STRUCTURE*, *fastStructre* and *MavericK* respectively in a *semi* automatic way. 12 | 13 | All scripts default the programs' install locations to ~/Software/. You can change this in the scripts themselves should you wish to change this location. 14 | 15 | 16 | ### install_structure.sh 17 | This script will download and install STRUCTURE. 18 | 19 | 20 | #### Requirements: 21 | * a C compiler, such as GCC, with fortran support. 22 | * Cmake is required to build LAPACK 23 | 24 | This should be available in every HPC environment. 25 | 26 | In Ubuntu, all you should need is the package "build-essential" (if it is not 27 | already installed for some reason). It can be installed like this: 28 | 29 | ``` 30 | sudo apt-get install build-essential 31 | ``` 32 | 33 | In other distros, the package name should be similar. 34 | 35 | ### install_faststructure.sh 36 | This script will download and install fastStructure and its dependencies. 37 | 38 | fastStructure depends on quite a few software packages: 39 | * cython 40 | * numpy 41 | * scipy 42 | * GNU scientific library 43 | 44 | If these are already installed in your system, feel free to comment the script 45 | section that will install them. Otherwise it will install a new local copy of 46 | these programs. You can install these packages in Ubuntu with the following 47 | command: 48 | 49 | ``` 50 | sudo apt-get install cython python-numpy python-scipy gsl-bin 51 | ``` 52 | 53 | ### Important note: 54 | If you are relying on the GNU Scientific Library that was installed using the 55 | `install_faststructure` script, you will need to make your system aware of 56 | where these libraries are. 57 | for that, add the following to your `~/.bashrc`: 58 | 59 | ```bash 60 | LD_LIBRARY_PATH=$install_dir/lib 61 | export LD_LIBRARY_PATH 62 | ``` 63 | 64 | Where `$install_dir` is the directory defined in `install_faststructure.sh`. 65 | 66 | 67 | ### install_maverick.sh 68 | This script will download, compile and install MavericK. 69 | 70 | 71 | #### Requirements: 72 | * a recent C compiler, such as GCC 6.1 and above. 73 | 74 | This should be available in every HPC environment. 75 | 76 | In Ubuntu, all you should need is the package "build-essential" (if it is not 77 | already installed for some reason). It can be installed like this: 78 | 79 | ``` 80 | sudo apt-get install build-essential 81 | ``` 82 | 83 | In other distros, the package name should be similar. 84 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | #FAQ 2 | 3 | Nothing so far. Will be updated as questions start to arise. 4 | -------------------------------------------------------------------------------- /docs/future.md: -------------------------------------------------------------------------------- 1 | # Future Plans 2 | Here is a list of the features that are currently planned to implement in *Structure_threader*. 3 | 4 | * ~~Add a "setup.py" installation process~~ 5 | * ~~Add unit tests~~ 6 | * ~~Allow the fastStructure wrapper to use the ".bed" format too.~~ 7 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # *Structure_threader* 2 | 3 | ## Description 4 | 5 | A program to parallelize and automate the runs of [Structure](http://web.stanford.edu/group/pritchardlab/structure.html), [fastStructure](https://rajanil.github.io/fastStructure/), [MavericK](http://www.bobverity.com/home/maverick/what-is-maverick/) and [ALStructure](https://github.com/StoreyLab/alstructure) software. 6 | 7 | 8 | ## Requirements 9 | 10 | Python 3. The main program only uses modules from the standard library. 11 | In order to draw the plots, matplotlib >= 1.4 is required (installed automatically as a dependency when installed via `pip`). 12 | To run "fastChooseK.py" (fastStructure wrapper only), numpy is also required (installed automatically as a dependency when installed via `pip`). 13 | In order to use "ALStructure", you need to have [R](https://www.r-project.org/) installed too (must be installed manually, as `pip` can't handle installing R or dependencies). 14 | 15 | 16 | ## Where to get it 17 | 18 | * Source code - [Structure_threader on gitlab](https://gitlab.com/StuntsPT/Structure_threader) 19 | * Source code - [Structure_threader on github](https://github.com/StuntsPT/Structure_threader) 20 | * Source distribution with platform binaries for wrapped programs - [Sturcture_threader on Pypi](https://pypi.python.org/pypi/structure_threader/) 21 | * You can easily install *Structure_threader* by issuing the command `pip3 install structure_threader` 22 | 23 | 24 | ## Contents 25 | 26 | * [Installation & dependencies](install.md) 27 | * [Binary building](binaries.md) 28 | * [Usage](usage.md) 29 | * [Output](output.md) 30 | * [Test Data](test_data.md) 31 | * [Benchmarking](benchmark.md) 32 | * [Citation](citation.md) 33 | * [Future Plans](future.md) 34 | * [FAQ](faq.md) 35 | 36 | 37 | ## A word of caution 38 | 39 | *Structure_threader* can be quite useful in automating and speeding up your analyses, however, in order to use it effectively you **really** should learn and understand how the wrapped programs work. It is **highly** recommended that you first learn to use the wrapped programs in their default implementations. And by "learning", we don't just mean "I know how to make it run.", but rather "I understand what each of the chosen parameters does, and why I selected each of them.". 40 | The paper [An overview of STRUCTURE: applications, parameter settings, and supporting software](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665925/) is an excellent guide for understanding the parameterization of *STRUCTURE*. 41 | We do not know of a good "tutorial" for learning about *fastStructure*, and as such, the [original research paper](http://www.genetics.org/content/197/2/573) (paywalled), albeit a bit dense, is still the best place to learn about it. 42 | The [documentation for *MavericK*](http://www.bobverity.com/home/maverick/additional-files/), for instance, is quite comprehensive and a great resource to learn to use *MavericK* and consequently about the importance of proper MCMC chain mixing. 43 | 44 | 45 | ## Other works 46 | 47 | The script "fastChooseK.py" was taken from [the original fastStructure repository](https://github.com/rajanil/fastStructure), ported to python 3, largely modified to work as a module for the main script and re-licensed as GPLv3. 48 | 49 | The scripts "harvesterCore.py" and "structureHarvester.py" were taken from [the original structureHarverster repository](https://github.com/dentearl/structureHarvester), ported to python 3, and slightly modified to work as a module for the main script. Please see the "Citation" part of the README to know what to cite, should you use this module. 50 | 51 | Binaries for [fastStructure](https://github.com/rajanil/fastStructure), [STRUCTURE](http://web.stanford.edu/group/pritchardlab/structure.html) and [MavericK](https://github.com/bobverity/MavericK) are distributed in the pypi hosted version. 52 | 53 | 54 | ## Bug reporting 55 | 56 | Found a bug or would like a feature added? Or maybe drop some feedback? 57 | Just [open a new issue on gitlab](https://gitlab.com/StuntsPT/Structure_threader/issues/new) [or on github](https://github.com/StuntsPT/Structure_threader/issues/new). 58 | 59 | 60 | ## License 61 | 62 | GPLv3 63 | -------------------------------------------------------------------------------- /docs/output.md: -------------------------------------------------------------------------------- 1 | # Output 2 | 3 | The program will inform the user of what run is currently being processed by 4 | outputting the command it is running to STDOUT, such as this: 5 | 6 | ``` 7 | Running: /opt/structure/bin/structure -K 1 -i input_file.structure -o results_admix/K1_rep10 8 | ``` 9 | 10 | After each run, the corresponding output file is saved to the location chosen in 11 | the *Output dir* argument. 12 | 13 | When all tasks are performed the program will exit with the message: 14 | "All jobs finished." 15 | After these jobs are run, the program will use [Structure Harvester](http://taylor0.biology.ucla.edu/struct_harvest/) (or "fastChooseK.py" if wrapping *fastStructure*) to infer the optimal value of "K". 16 | Finally, the program will create plots with the inferred clustering, one for each calculated value of "K". 17 | A "Thermodynamic Integration" test will be performed to infer the bestK if using *MavericK*. 18 | 19 | ## Results 20 | 21 | After a successful run, inside the directory you selected as "output directory" (let's call it "My_results" for the sake of the example) you will find the following: 22 | 23 | * In the root of "My_results" you will find the "results files" outputted by the wrapped program. One file (directory, in the case of *MavericK*) for each replicate of "K". 24 | * Under "My_results/bestK" you will find either the results of the "Evanno test", the results of "fastChooseK.py", or the results of "Thermodynamic Integration" test, depending on what program was wrapped. 25 | * Under "My_results/plots" you will find one plot for each value of "K" in [SVG format](https://www.w3.org/Graphics/SVG/). 26 | * If logging was turned on, you will also find a detailed log file for each run in the root of "My_results". 27 | 28 | ## Obtaining STRUCTURE alpha values 29 | 30 | In order to obtain the alpha values of each STRUCTURE run, you can pass the option `--log 1` to *Strucutre_threader*. This will write a file named `K_rep.stlog`, where `knum` and `rnum` are the K value and replicate number respectively. This file contains the STDOUT generated by STRUCTURE. These can be used to plot alpha vs. iteration plots to help assess MCMC chain convergence. 31 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs==1.5.1 2 | -------------------------------------------------------------------------------- /docs/test_data.md: -------------------------------------------------------------------------------- 1 | # Test Data for *Structure_threader* 2 | In [this directory](https://github.com/StuntsPT/Structure_threader/tree/master/TestData) you will find the data that was used to benchmark *Structure_threader*. 3 | 4 | 5 | ## BigTestData.str.tar.xz 6 | This file is a *fastStructure* formatted input file which was used to benchmark *fastStructure*. This is a large SNP file (1000 SNPs across 1000 individuals) which was obtained from the [1000 genomes project](http://www.1000genomes.org). The file was downloaded from [chromossome 22](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz), and was then filtered using [vcftools](https://github.com/vcftools/vcftoolshttps://github.com/vcftools/vcftools) with the following criteria: 7 | 8 | * only biallelic, non-singleton SNV sites 9 | * SNvs must be at lest 2KB apart from each other 10 | * minor allele frequency < 0.05 11 | 12 | The used command was: 13 | 14 | ./vcftools --gzvcf \ 15 | ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz \ 16 | --maf 0.05 --thin 2000 --min-alleles 2 --max-alleles 2 --non-ref-ac 2 \ 17 | --recode --chr 22 --out Chr22 18 | 19 | This was the criteria that was used on the *admixture* [analysis of the 1000 genomes project](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/admixture_files/README.admixture_20141217). 20 | 21 | The file was then converted to structure format with [PGDSpider](http://www.cmpg.unibe.ch/software/PGDSpider/). 22 | To further reduce the dataset (for faster benchmarking), the file was then processed with `cut` and `head` and finally compressed with xz. 23 | 24 | The used commands were: 25 | 26 | cut -d " " -f 1-1000 Chr22.recode.str | head -n 2000 > BigTestData.str 27 | tar cvfJ BigTestData.str.tar.xz BigTestData.str 28 | 29 | 30 | ## BigTestData.bed.tar.xz 31 | This file is a *PLINK* formatted `.bed`, `.bim` and `.fam` set of files. They were obtained in the exact same way as `BigTestData.str.tar.xz`, except for the conversion using *PGDSPIDER*, which was not used. Instead, the filtered VCF file was reduced to 501 individuals and 1000 SNPs with the following command: 32 | 33 | head -n 1253 Chr22.recode.vcf |cut -f 1-510 > Testdata.vcf 34 | 35 | This file was then converted to the *PLINK* format and compressed with the following commands: 36 | 37 | plink1.9 --vcf Testdata.vcf 38 | mv plink.bed BigTestData.bed 39 | mv plink.fam BigTestData.fam 40 | mv plink.bim BigTestData.bim 41 | tar cvfJ BigTestData.bed.tar.xz BigTestData.bed BigTestData.fam BigTestData.bim 42 | 43 | 44 | ## BigTestData.vcf.tar.xz 45 | This file is *VCF* formatted. It was obtained in the exact same way as `BigTestData.str.tar.xz`, except for the conversion using *PGDSPIDER*, which was not used. Instead, the filtered VCF file was reduced to 501 individuals and 1000 SNPs and compressed with the following command: 46 | 47 | head -n 1253 Chr22.recode.vcf |cut -f 1-510 > BigTestData.vcf 48 | tar cvfJ BigTestData.vcf.tar.xz BigTestData.vcf 49 | 50 | 51 | ## extraparams and mainparams 52 | The *STRUCTURE* parameter files that were used in the benchmarking process. 53 | 54 | 55 | ## joblist.txt 56 | The joblist used to benchmark *ParallelStructure*. Consists of 16 jobs, 4 values of "K" with 4 replicates each. 57 | 58 | 59 | ## SmallTestData.structure 60 | This file is a Structure formatted input file which was used to benchmark STRUCTURE and *MavericK*. This is a medium sized SNP file (80 SNPs) which was obtained from the [1000 genomes project](http://www.1000genomes.org). The file was downloaded from [chromossome 22](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz), and was then filtered using vcftools following the same criteria and commands as the BigTestData.str file. 61 | 62 | 63 | The used commands were: 64 | 65 | cut -d " " -f 1-80 SmallData.structure > SmallData302SNPs.structure 66 | head -n 201 SmallData302SNPs.structure > SmallTestData.structure 67 | 68 | 69 | ## parameter.txt 70 | The *MavericK* parameter file that is used in the unit tests. 71 | 72 | 73 | ## mav_benchmark_parameters 74 | The file with the *MAvericK* benchmark parameters. 75 | -------------------------------------------------------------------------------- /helper_scripts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | # apt stuff 4 | RUN apt update 5 | RUN apt install -y python3-pip parallel zip 6 | 7 | # Pypi stuff 8 | RUN pip install structure_threader 9 | 10 | RUN mkdir /analysis 11 | WORKDIR /analysis 12 | -------------------------------------------------------------------------------- /helper_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Structure_threader helper scripts 2 | 3 | This directory contains three scripts that will install *STRUCTURE*, *fastStructre* and *MavericK* respectively in a *semi* automatic way. 4 | 5 | All scripts default the programs' install locations to ~/Software/. You can change this in the scripts themselves should you wish to change this location. 6 | 7 | ### install_structure.sh 8 | This script will download and install STRUCTURE. 9 | 10 | 11 | #### Requirements: 12 | * a C compiler, such as GCC, with fortran support. 13 | * Cmake is required to build LAPACK 14 | 15 | This should be available in every HPC environment. 16 | 17 | In Ubuntu, all you should need is the package "build-essential" (if it is not 18 | already installed for some reason). It can be installed like this: 19 | 20 | ``` 21 | sudo apt-get install build-essential 22 | ``` 23 | 24 | In other distros, the package name should be similar. 25 | 26 | ### install_faststructure.sh 27 | This script will download and install fastStructure and its dependencies. 28 | 29 | fastStructure depends on quite a few software packages: 30 | * cython 31 | * numpy 32 | * scipy 33 | * GNU scientific library 34 | 35 | If these are already installed in your system, feel free to comment the script 36 | section that will install them. Otherwise it will install a new local copy of 37 | these programs. You can install these packages in Ubuntu with the following 38 | command: 39 | 40 | ``` 41 | sudo apt-get install cython python-numpy python-scipy gsl-bin 42 | ``` 43 | 44 | ### Important note: 45 | If you are relying on the GNU Scientific Library that was installed using the 46 | `install_faststructure` script, you will need to make your system aware of 47 | where these libraries are. 48 | for that, add the following to your `~/.bashrc`: 49 | 50 | ```bash 51 | LD_LIBRARY_PATH=$install_dir/lib 52 | export LD_LIBRARY_PATH 53 | ``` 54 | 55 | Where `$install_dir` is the directory defined in `install_faststructure.sh`. 56 | 57 | 58 | ### install_maverick.sh 59 | This script will download, compile and install MavericK. 60 | 61 | 62 | #### Requirements: 63 | * a recent C compiler, such as GCC 6.1 and above. 64 | 65 | This should be available in every HPC environment. 66 | 67 | In Ubuntu, all you should need is the package "build-essential" (if it is not 68 | already installed for some reason). It can be installed like this: 69 | 70 | ``` 71 | sudo apt-get install build-essential 72 | ``` 73 | 74 | In other distros, the package name should be similar. 75 | -------------------------------------------------------------------------------- /helper_scripts/install_faststructure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015-2019 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | # Define and create installation location: 21 | install_dir=${HOME}/Software/faststructure 22 | mkdir -p ${install_dir} 23 | 24 | # Define temp dir 25 | tempdir=/tmp/$USER 26 | mkdir -p $tempdir 27 | 28 | # Get LDFAGS 29 | _OLD_LDFLAGS=$LDFLAGS 30 | 31 | 32 | # Download faststructure and deps. sources into temp dir 33 | # faststructure 34 | wget -c https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz -O ${tempdir}/fastStructure-1.0.tar.gz 35 | # python-nose 36 | wget -c https://pypi.python.org/packages/source/n/nose/nose-1.3.6.tar.gz -O ${tempdir}/nose-1.3.6.tar.gz 37 | # numpy 38 | wget -c https://github.com/numpy/numpy/archive/v1.9.2.tar.gz -O ${tempdir}/numpy.tar.gz 39 | # scipy 40 | wget -c https://github.com/scipy/scipy/archive/v0.16.0b2.tar.gz -O ${tempdir}/scipy.tar.gz 41 | # cython 42 | wget -c https://pypi.python.org/packages/f8/25/80f9ca7e31e2b68cc942ff1d6136588f33a7aef5e2d6abe3f2183cb9fad5/cython-0.22.tar.gz -O ${tempdir}/Cython-0.22.tar.gz 43 | # GNU scientific library 44 | wget -c http://gnu.mirror.vexxhost.com/gsl/gsl-1.16.tar.gz -O ${tempdir}/gsl-1.16.tar.gz 45 | # LAPACK (required for scipy) 46 | wget -c http://www.netlib.org/lapack/lapack-3.5.0.tgz -O ${tempdir}/lapack-3.5.0.tgz 47 | 48 | 49 | # Install dependencies 50 | # LAPACK 51 | cd ${tempdir} 52 | tar xvfz lapack-3.5.0.tgz 53 | mkdir -p build-lapack 54 | cd build-lapack 55 | export FCFLAGS="-w -fallow-argument-mismatch -O2" 56 | export FFLAGS="-w -fallow-argument-mismatch -O2" 57 | cmake ../lapack-3.5.0 58 | make 59 | mkdir -p ${install_dir}/{bin,lib} 60 | mv bin/* ${install_dir}/bin 61 | mv lib/* ${install_dir}/lib 62 | 63 | # cython 64 | cd ${tempdir} 65 | tar xvfz Cython-0.22.tar.gz 66 | cd Cython-0.22 67 | python2 setup.py install --user 68 | 69 | # python-nose 70 | cd ${tempdir} 71 | tar xvfz nose-1.3.6.tar.gz 72 | cd nose-1.3.6 73 | python2 setup.py install --user 74 | 75 | # numpy 76 | cd ${tempdir} 77 | tar xvfz numpy.tar.gz 78 | cd numpy-1.9.2 79 | sed -e "s|#![ ]*/usr/bin/python$|#!/usr/bin/python2|" \ 80 | -e "s|#![ ]*/usr/bin/env python$|#!/usr/bin/env python2|" \ 81 | -e "s|#![ ]*/bin/env python$|#!/usr/bin/env python2|" \ 82 | -i $(find . -name '*.py') 83 | export ATLAS=None 84 | export LDFLAGS="$LDFLAGS -shared" 85 | python2 setup.py install --user 86 | 87 | # scipy 88 | cd ${tempdir} 89 | tar xvfz scipy.tar.gz 90 | cd scipy-0.16.0b2 91 | python2 setup.py install --user 92 | 93 | # GNU scientific library 94 | cd ${tempdir} 95 | tar xvzf gsl-1.16.tar.gz 96 | cd gsl-1.16 97 | export LDFLAGS=${_OLD_LDFLAGS} 98 | ./configure --prefix=${install_dir} 99 | make 100 | make install 101 | 102 | # Extract tarball, enter src dir, build binary and place it in the install dir 103 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${install_dir}/lib 104 | export CFLAGS="-I${install_dir}/include" 105 | export LDFLAGS="-L${install_dir}/lib" 106 | cd ${tempdir} 107 | tar xvfz fastStructure-1.0.tar.gz 108 | cd fastStructure-1.0 109 | cd vars 110 | python2 setup.py build_ext --inplace 111 | cd .. 112 | python2 setup.py build_ext --inplace 113 | cd .. 114 | mv fastStructure-1.0 ${install_dir} 115 | 116 | echo "" 117 | echo "Install succesfull. fastStructure is now ready to use." 118 | echo "" 119 | echo "In order to use the locally installed libraries, please add" 120 | echo "the following to the end of your ~/.bashrc:" 121 | echo "" 122 | echo "######" 123 | echo "LD_LIBRARY_PATH=${install_dir}/lib" 124 | echo "export LD_LIBRARY_PATH" 125 | echo "#####" 126 | -------------------------------------------------------------------------------- /helper_scripts/install_maverick.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017-2022 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | # Define MavericK version and package name: 21 | _version=1.0.4 22 | _name=MavericK 23 | 24 | # Define and create installation location: 25 | install_dir=~/Software/"${_name}" 26 | mkdir -p "${install_dir}" 27 | 28 | # Define temp dir 29 | tempdir="/tmp/$USER" 30 | mkdir -p "${tempdir}" 31 | 32 | # Download structure sources into temp dir 33 | wget "https://github.com/bobverity/${_name}/archive/v${_version}.tar.gz" -O "${tempdir}/${_name}.tar.gz" 34 | 35 | # Extract tarball, enter src dir, build binary and place it in the install dir 36 | cd "${tempdir}" 37 | tar xvfz "${_name}.tar.gz" 38 | cd "${_name}-${_version}/" 39 | make 40 | mv "${_name}" "${install_dir}" 41 | 42 | echo "" 43 | echo "Install succesfull. MavericK is now ready to use." 44 | echo "" 45 | -------------------------------------------------------------------------------- /helper_scripts/install_structure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015-2022 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | # Define and create installation location: 21 | install_dir=~/Software/structure 22 | mkdir -p "${install_dir}" 23 | 24 | # Define temp dir 25 | tempdir=/tmp/"${USER}" 26 | mkdir -p "${tempdir}" 27 | 28 | # Download structure sources into temp dir 29 | wget http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz -O "${tempdir}"/structure_kernel_source.tar.gz 30 | 31 | # Extract tarball, enter src dir, build binary and place it in the install dir 32 | cd "${tempdir}" 33 | tar xvfz structure_kernel_source.tar.gz 34 | cd structure_kernel_src/ 35 | sed -i 's/OPT = -O3/OPT = -O3 -fcommon/' Makefile 36 | make 37 | mv structure "${install_dir}" 38 | 39 | echo "" 40 | echo "Install succesfull. STRUCTURE is now ready to use." 41 | echo "" 42 | -------------------------------------------------------------------------------- /helper_scripts/structure.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['structure.py'], 7 | pathex=['.', './vars'], 8 | binaries=None, 9 | datas=None, 10 | hiddenimports=['vars.admixprop', 'vars.allelefreq', 'vars.utils', 'vars.marglikehood', 'scipy.special', 'scipy.optimize'], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=[], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher) 17 | pyz = PYZ(a.pure, a.zipped_data, 18 | cipher=block_cipher) 19 | exe = EXE(pyz, 20 | a.scripts, 21 | a.binaries, 22 | a.zipfiles, 23 | a.datas, 24 | name='fastStructure', 25 | debug=False, 26 | strip=False, 27 | upx=True, 28 | console=True ) 29 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: 'Structure_threader manual' 2 | #site_favicon: 'assets/Icon.png' 3 | site_author: Francisco Pina-Martins 4 | repo_url: https://gitlab.com/StuntsPT/Structure_threader 5 | edit_uri: tree/master/docs 6 | 7 | nav: 8 | - 'Introduction': 'index.md' 9 | - 'Installation & dependencies': 'install.md' 10 | - 'External programs': 'external.md' 11 | - 'Usage': 'usage.md' 12 | - 'Output': 'output.md' 13 | - 'Test Data': 'test_data.md' 14 | - 'Building Binaries': 'binaries.md' 15 | - 'Benchmarking': 'benchmark.md' 16 | - 'Citation': 'citation.md' 17 | - 'Future Plans': 'future.md' 18 | - 'FAQ': 'faq.md' 19 | theme: readthedocs 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # pyproject.toml 2 | [build-system] 3 | requires = ["setuptools >= 42.0.0"] 4 | build-backend = "setuptools.build_meta" 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Unversioned 2 | numpy 3 | pytest 4 | 5 | # Version required 6 | matplotlib >= 1.5 7 | plotly >= 4.1.1 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2016-2025 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | 19 | import sys 20 | from setuptools import setup 21 | 22 | 23 | class NotSupportedException(BaseException): 24 | pass 25 | 26 | 27 | if sys.version_info.major < 3: 28 | raise NotSupportedException("Only Python 3.x Supported") 29 | 30 | 31 | def platform_detection(install_binaries=True): 32 | """ 33 | Detect the platform and adapt the binaries location. 34 | """ 35 | if install_binaries is True: 36 | if sys.platform == "linux": 37 | bin_dir = "structure_threader/bins/linux" 38 | elif sys.platform == "darwin": 39 | bin_dir = "structure_threader/bins/osx" 40 | else: 41 | return None 42 | else: 43 | return None 44 | 45 | structure_bin = bin_dir + "/structure" 46 | faststructure_bin = bin_dir + "/fastStructure" 47 | maverick_bin = bin_dir + "/MavericK" 48 | 49 | return [('bin', [faststructure_bin, structure_bin, maverick_bin])] 50 | 51 | 52 | # Set some variables (PKGBUILD inspired) 53 | DATA_FILES = platform_detection() 54 | try: 55 | DATA_FILES[0][1].append("structure_threader/wrappers/alstructure_wrapper.R") 56 | except TypeError: 57 | DATA_FILES = [('bin', 58 | ["structure_threader/wrappers/alstructure_wrapper.R"])] 59 | VERSION = "1.3.11" 60 | URL = "https://gitlab.com/StuntsPT/Structure_threader" 61 | 62 | 63 | setup( 64 | name="structure_threader", 65 | version=VERSION, 66 | packages=["structure_threader", 67 | "structure_threader.evanno", 68 | "structure_threader.plotter", 69 | "structure_threader.sanity_checks", 70 | "structure_threader.colorer", 71 | "structure_threader.wrappers", 72 | "structure_threader.skeletons"], 73 | install_requires=["plotly>=4.1.1", 74 | "colorlover", 75 | "numpy>=1.12.1", 76 | "matplotlib"], 77 | description=("A program to parallelize runs of 'Structure', " 78 | "'fastStructure' and 'MavericK'."), 79 | url=URL, 80 | download_url="{0}/-/archive/{1}/Structure_threader-{1}.tar.gz".format(URL, VERSION), 81 | author="Francisco Pina-Martins", 82 | author_email="f.pinamartins@gmail.com", 83 | license="GPL3", 84 | classifiers=["Intended Audience :: Science/Research", 85 | "License :: OSI Approved :: GNU General Public License v3 (" 86 | "GPLv3)", 87 | "Natural Language :: English", 88 | "Operating System :: POSIX :: Linux", 89 | "Topic :: Scientific/Engineering :: Bio-Informatics", 90 | "Programming Language :: Python :: 3 :: Only", 91 | "Programming Language :: Python :: 3.4", 92 | "Programming Language :: Python :: 3.5", 93 | "Programming Language :: Python :: 3.6", 94 | "Programming Language :: Python :: 3.7"], 95 | data_files=DATA_FILES, 96 | entry_points={ 97 | "console_scripts": [ 98 | "structure_threader = structure_threader.structure_threader:main", 99 | ] 100 | }, 101 | ) 102 | -------------------------------------------------------------------------------- /structure_threader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/__init__.py -------------------------------------------------------------------------------- /structure_threader/bins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/__init__.py -------------------------------------------------------------------------------- /structure_threader/bins/linux/MavericK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/MavericK -------------------------------------------------------------------------------- /structure_threader/bins/linux/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/__init__.py -------------------------------------------------------------------------------- /structure_threader/bins/linux/fastStructure: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/fastStructure -------------------------------------------------------------------------------- /structure_threader/bins/linux/structure: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/structure -------------------------------------------------------------------------------- /structure_threader/bins/osx/MavericK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/MavericK -------------------------------------------------------------------------------- /structure_threader/bins/osx/fastStructure: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/fastStructure -------------------------------------------------------------------------------- /structure_threader/bins/osx/structure: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/structure -------------------------------------------------------------------------------- /structure_threader/colorer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/colorer/__init__.py -------------------------------------------------------------------------------- /structure_threader/colorer/colorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | # Code taken from http://stackoverflow.com/a/1336640/3091595. 5 | # Thanks to @sorin for providing this coloring method! 6 | 7 | import logging 8 | # now we patch Python code to add color support to logging.StreamHandler 9 | def add_coloring_to_emit_windows(fn): 10 | # add methods we need to the class 11 | def _out_handle(self): 12 | import ctypes 13 | return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 14 | out_handle = property(_out_handle) 15 | 16 | def _set_color(self, code): 17 | import ctypes 18 | # Constants from the Windows API 19 | self.STD_OUTPUT_HANDLE = -11 20 | hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 21 | ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code) 22 | 23 | setattr(logging.StreamHandler, '_set_color', _set_color) 24 | 25 | def new(*args): 26 | FOREGROUND_BLUE = 0x0001 # text color contains blue. 27 | FOREGROUND_GREEN = 0x0002 # text color contains green. 28 | FOREGROUND_RED = 0x0004 # text color contains red. 29 | FOREGROUND_INTENSITY = 0x0008 # text color is intensified. 30 | FOREGROUND_WHITE = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED 31 | # winbase.h 32 | STD_INPUT_HANDLE = -10 33 | STD_OUTPUT_HANDLE = -11 34 | STD_ERROR_HANDLE = -12 35 | 36 | # wincon.h 37 | FOREGROUND_BLACK = 0x0000 38 | FOREGROUND_BLUE = 0x0001 39 | FOREGROUND_GREEN = 0x0002 40 | FOREGROUND_CYAN = 0x0003 41 | FOREGROUND_RED = 0x0004 42 | FOREGROUND_MAGENTA = 0x0005 43 | FOREGROUND_YELLOW = 0x0006 44 | FOREGROUND_GREY = 0x0007 45 | FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. 46 | 47 | BACKGROUND_BLACK = 0x0000 48 | BACKGROUND_BLUE = 0x0010 49 | BACKGROUND_GREEN = 0x0020 50 | BACKGROUND_CYAN = 0x0030 51 | BACKGROUND_RED = 0x0040 52 | BACKGROUND_MAGENTA = 0x0050 53 | BACKGROUND_YELLOW = 0x0060 54 | BACKGROUND_GREY = 0x0070 55 | BACKGROUND_INTENSITY = 0x0080 # background color is intensified. 56 | 57 | levelno = args[1].levelno 58 | if(levelno>=50): 59 | color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY 60 | elif(levelno>=40): 61 | color = FOREGROUND_RED | FOREGROUND_INTENSITY 62 | elif(levelno>=30): 63 | color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY 64 | elif(levelno>=20): 65 | color = FOREGROUND_GREEN 66 | elif(levelno>=10): 67 | color = FOREGROUND_MAGENTA 68 | else: 69 | color = FOREGROUND_WHITE 70 | args[0]._set_color(color) 71 | 72 | ret = fn(*args) 73 | args[0]._set_color( FOREGROUND_WHITE ) 74 | #print "after" 75 | return ret 76 | return new 77 | 78 | def add_coloring_to_emit_ansi(fn): 79 | # add methods we need to the class 80 | def new(*args): 81 | levelno = args[1].levelno 82 | if(levelno>=50): 83 | color = '\x1b[31m' # red 84 | elif(levelno>=40): 85 | color = '\x1b[31m' # red 86 | elif(levelno>=30): 87 | color = '\x1b[33m' # yellow 88 | elif(levelno>=20): 89 | color = '\x1b[32m' # green 90 | elif(levelno>=10): 91 | color = '\x1b[35m' # pink 92 | else: 93 | color = '\x1b[0m' # normal 94 | args[1].msg = color + args[1].msg + '\x1b[0m' # normal 95 | #print "after" 96 | return fn(*args) 97 | return new 98 | 99 | import platform 100 | if platform.system()=='Windows': 101 | # Windows does not support ANSI escapes and we are using API calls to set the console color 102 | logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit) 103 | else: 104 | # all non-Windows platforms are supporting ANSI escapes so we use them 105 | logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit) 106 | #log = logging.getLogger() 107 | #log.addFilter(log_filter()) 108 | #//hdlr = logging.StreamHandler() 109 | #//hdlr.setFormatter(formatter()) 110 | -------------------------------------------------------------------------------- /structure_threader/evanno/LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE 2 | 3 | Copyright (C) 2007-2014 by 4 | Dent Earl (dearl (a) soe ucsc edu, dentearl (a) gmail com) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /structure_threader/evanno/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/evanno/__init__.py -------------------------------------------------------------------------------- /structure_threader/evanno/fastChooseK.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # The MIT License (MIT) 4 | # 5 | # Copyright (c) 2014 Anil 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | # Copyright 2015-2016 Francisco Pina Martins 26 | # This file is part of structure_threader. 27 | # structure_threader is free software: you can redistribute it and/or modify 28 | # it under the terms of the GNU General Public License as published by 29 | # the Free Software Foundation, either version 3 of the License, or 30 | # (at your option) any later version. 31 | 32 | # structure_threader is distributed in the hope that it will be useful, 33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 35 | # GNU General Public License for more details. 36 | 37 | # You should have received a copy of the GNU General Public License 38 | # along with structure_threader. If not, see . 39 | 40 | 41 | import glob 42 | import numpy as np 43 | 44 | 45 | insum = lambda x, axes: np.apply_over_axes(np.sum, x, axes) 46 | 47 | 48 | # class Exception(Exception): 49 | # pass 50 | 51 | 52 | def parse_logs(files): 53 | """ 54 | Parses through log files to extract marginal 55 | likelihood estimates from executing the 56 | variational inference algorithm on a dataset. 57 | 58 | Arguments: 59 | 60 | files : list 61 | list of .log file names 62 | """ 63 | marginal_likelihood = [] 64 | for file in files: 65 | handle = open(file, 'r') 66 | for line in handle: 67 | if 'Marginal Likelihood' in line: 68 | m = float(line.strip().split('=')[1]) 69 | marginal_likelihood.append(m) 70 | break 71 | handle.close() 72 | 73 | return marginal_likelihood 74 | 75 | 76 | def parse_varQs(files): 77 | """ 78 | Parses through multiple .meanQ files to extract the mean 79 | admixture proportions estimated by executing the 80 | variational inference algorithm on a dataset. This is then used 81 | to identify the number of model components used to explain 82 | structure in the data, for each .meanQ file. 83 | 84 | Arguments: 85 | 86 | files : list 87 | list of .meanQ file names 88 | """ 89 | bestKs = [] 90 | 91 | for file in files: 92 | handle = open(file, 'r') 93 | Q = np.array([list(map(float, line.strip().split())) for line in handle]) 94 | Q = Q/insum(Q, [1]) 95 | handle.close() 96 | 97 | N = Q.shape[0] 98 | C = np.cumsum(np.sort(Q.sum(0))[::-1]) 99 | bestKs.append(np.sum(C < N - 1) + 1) 100 | 101 | return bestKs 102 | 103 | def main(indir, outpath): 104 | """ 105 | Main function that runs everything in order. 106 | """ 107 | if indir.endswith("/") is False: 108 | indir = indir + "/" 109 | 110 | files = glob.glob('%s*.log'%indir) 111 | Ks = np.array([int(file.split('.')[-2]) for file in files]) 112 | marginal_likelihoods = parse_logs(files) 113 | 114 | files = glob.glob('%s*.meanQ'%indir) 115 | bestKs = parse_varQs(files) 116 | 117 | outfile = open(outpath + "/chooseK.txt", "w") 118 | ml = "Model complexity that maximizes marginal likelihood = %d\n"\ 119 | % Ks[np.argmax(marginal_likelihoods)] 120 | ex_str = "Model components used to explain structure in data = %d\n"\ 121 | % np.argmax(np.bincount(bestKs)) 122 | 123 | outfile.write(ml) 124 | outfile.write(ex_str) 125 | outfile.close() 126 | 127 | # Retrieve list of bestk 128 | return [x for x in range(Ks[np.argmax(marginal_likelihoods)], 129 | np.argmax(np.bincount(bestKs)) + 1)] 130 | 131 | 132 | if __name__ == "__main__": 133 | # Usage: python3 fastChooseK.py /path/to/faststructure_outdir/common_sufix \ 134 | # /path/to/dir/where/results_file/is_written 135 | from sys import argv 136 | 137 | filesuffix = argv[1] 138 | 139 | outpath = argv[2] 140 | 141 | main(filesuffix, outpath) 142 | -------------------------------------------------------------------------------- /structure_threader/plotter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/plotter/__init__.py -------------------------------------------------------------------------------- /structure_threader/sanity_checks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/sanity_checks/__init__.py -------------------------------------------------------------------------------- /structure_threader/skeletons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/skeletons/__init__.py -------------------------------------------------------------------------------- /structure_threader/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/wrappers/__init__.py -------------------------------------------------------------------------------- /structure_threader/wrappers/alstructure_wrapper.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | # Copyright 2019-2020 Francisco Pina Martins 3 | # This file is part of structure_threader. 4 | # structure_threader is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | 9 | # structure_threader is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | 14 | # You should have received a copy of the GNU General Public License 15 | # along with structure_threader. If not, see . 16 | 17 | ## Default repo 18 | local({r <- getOption("repos") 19 | r["CRAN"] <- "http://cran.r-project.org" 20 | options(repos=r) 21 | }) 22 | 23 | ll = Sys.getenv()[ grep("R_LIBS_USER", names(Sys.getenv())) ] 24 | local_lib = gsub(".*~", path.expand('~'), as.character(ll), perl=T) 25 | 26 | if (dir.exists(local_lib) == FALSE) { 27 | dir.create(local_lib, showWarnings = TRUE, recursive = TRUE) 28 | } 29 | 30 | .libPaths(c(local_lib)) 31 | 32 | if(!require("alstructure")){ 33 | if(!require("devtools")){ 34 | install.packages("devtools") 35 | } 36 | library("devtools") 37 | install_github("storeylab/alstructure", build_vignettes=FALSE, ref="e355411") 38 | library(alstructure) 39 | } 40 | 41 | if(!require(lfa)){ 42 | if (!requireNamespace("BiocManager", quietly = TRUE)) 43 | install.packages("BiocManager") 44 | 45 | BiocManager::install("lfa") 46 | library(lfa) 47 | } 48 | 49 | alstructure_wrapper = function(data_matrix, K) { 50 | #' ALStructure wrapper 51 | #' 52 | #' Small wrapper function that wraps ALStructure 53 | #' Takes a data matrix and value of K 54 | #' as arguments and returns a q-matrix 55 | 56 | K = as.numeric(K) 57 | 58 | fit <- alstructure(X = data_matrix, d_hat=K) 59 | q_matrix = t(fit$Q_hat) 60 | 61 | return(q_matrix) 62 | } 63 | 64 | data_to_matrix = function (ifile) { 65 | #' data_to_matrix 66 | #' Converts the data in an input file into a data matrix that can be read 67 | #' by alstructure 68 | #' Takes a tsv or a bed file as input and returns a data matrix 69 | 70 | if (substring(ifile, nchar(ifile)-3) == ".tsv") { 71 | print(ifile) 72 | input_data = as.matrix(read.csv(ifile, header=F, sep="\t")) 73 | } else { 74 | input_data = lfa::read.bed(ifile) 75 | } 76 | 77 | return(input_data) 78 | } 79 | 80 | args = commandArgs(trailingOnly=TRUE) 81 | 82 | if (sys.nframe() == 0){ 83 | data_matrix = data_to_matrix(args[1]) 84 | Q_matrix = alstructure_wrapper(data_matrix, args[2]) 85 | write.csv(Q_matrix, args[3]) 86 | } 87 | -------------------------------------------------------------------------------- /structure_threader/wrappers/alstructure_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2019 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import os 19 | import logging 20 | 21 | 22 | try: 23 | import colorer.colorer as colorer 24 | except ImportError: 25 | import structure_threader.colorer.colorer as colorer 26 | 27 | 28 | def alstr_cli_generator(arg, k_val): 29 | """ 30 | Generates and returns command line for running ALStructure. 31 | """ 32 | output_file = os.path.join(arg.outpath, "alstr_K" + str(k_val)) 33 | if arg.infile.endswith((".bed", ".fam", ".bim")): 34 | infile = arg.infile[:-4] 35 | elif arg.infile.endswith(".vcf"): 36 | vcf_to_matrix(arg.infile) 37 | infile = arg.infile[:-4] + ".tsv" 38 | 39 | cli = ["Rscript", arg.external_prog, infile, str(k_val), output_file] 40 | 41 | return cli, output_file 42 | 43 | 44 | def vcf_to_matrix(vcf_file): 45 | """ 46 | Parses a VCF file and converts it to a tsv matrix that can be read by 47 | ALStructure. 48 | Takes a VCF filename as input. 49 | Does not return anything. 50 | Writes a new file with the same name as the VCF but with .tsv extension 51 | """ 52 | conversion_table = {"0/0": "0", "0/1": "1", "1/0": "1", "1/1": "2", 53 | "0|0": "0", "0|1": "1", "1|0": "1", "1|1": "2"} 54 | 55 | outfile = open(vcf_file.replace(".vcf", ".tsv"), "w") 56 | infile = open(vcf_file, "r") 57 | 58 | # Skip initial comments that starts with # 59 | while True: 60 | line = infile.readline() 61 | # break while statement if it is not a comment line 62 | # i.e. does not startwith # 63 | if not line.startswith('#'): 64 | break 65 | 66 | while line: 67 | genotypes = line.split()[9:] 68 | converted = [conversion_table[x.split(":")[0]] 69 | if x.split(":")[0] in conversion_table 70 | else "NA" for x in genotypes] 71 | outfile.write("\t".join(converted) + "\n") 72 | try: 73 | line = infile.readline() 74 | except IOError: 75 | break 76 | infile.close() 77 | outfile.close() 78 | -------------------------------------------------------------------------------- /structure_threader/wrappers/faststructure_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2018 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import os 19 | 20 | 21 | def fs_cli_generator(k_val, arg): 22 | """ 23 | Generates and returns command line for running fastStructure. 24 | """ 25 | output_file = os.path.join(arg.outpath, "fS_run_K") 26 | if arg.infile.endswith((".bed", ".fam", ".bim")): 27 | file_format = "bed" 28 | infile = arg.infile[:-4] 29 | else: 30 | file_format = "str" # Assume 'STR' format if plink is not specified 31 | if arg.infile.endswith(".str") is False: # Do we need a symlink? 32 | infile = arg.infile 33 | try: 34 | os.symlink(os.path.basename(arg.infile), arg.infile+".str") 35 | except OSError as err: 36 | if err.errno != 17: 37 | raise 38 | else: 39 | infile = arg.infile[:-4] 40 | 41 | cli = ["python2", arg.external_prog, "-K", str(k_val), "--input", 42 | infile, "--output", output_file, "--format", file_format, 43 | "--seed", str(arg.seed)] + arg.extra_options.split() 44 | 45 | # Are we using the python script or a binary? 46 | if arg.external_prog.endswith(".py") is False: 47 | cli = cli[1:] 48 | 49 | return cli, output_file 50 | -------------------------------------------------------------------------------- /structure_threader/wrappers/structure_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2017-2018 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import os 19 | import logging 20 | import itertools 21 | import random 22 | 23 | 24 | try: 25 | import colorer.colorer as colorer 26 | except ImportError: 27 | import structure_threader.colorer.colorer as colorer 28 | 29 | 30 | def str_cli_generator(arg, k_val, rep_num, seed): 31 | """ 32 | Generates and returns command line for running STRUCTURE. 33 | """ 34 | output_file = os.path.join(arg.outpath, "str_K" + str(k_val) + "_rep" + 35 | str(rep_num)) 36 | cli = [arg.external_prog, "-K", str(k_val), "-i", arg.infile, "-o", 37 | output_file] 38 | 39 | if seed is not None: 40 | cli += ["-D", seed] 41 | 42 | if arg.params is not None: 43 | cli += arg.params 44 | 45 | return cli, output_file 46 | 47 | 48 | def str_param_checker(arg): 49 | """ 50 | Handles the parameter files for STRUCTURE (or lack thereoff) 51 | """ 52 | def _disable_STRUCUTRE_RANDOMIZE(extraparams_file): 53 | """ 54 | Checks if the RANDOMIZE option is set in the `extraparams` file. 55 | If it is, disable it (set to `0`) 56 | """ 57 | infile = open(extraparams, "r") 58 | params = "" 59 | overwrite = False 60 | for lines in infile: 61 | try: 62 | if lines.split()[1] == "RANDOMIZE": 63 | if lines.split()[2] == "1": 64 | lines = lines.replace("1", "0") 65 | logging.warning("The RANDOMIZE option was activated in" 66 | " the `extraparams` file. " 67 | " *Structure_threader* has disabled it" 68 | " since it handles this functionality " 69 | "internallly (random seed setting).") 70 | overwrite = True 71 | except IndexError: 72 | pass 73 | params += lines 74 | infile.close() 75 | 76 | if overwrite: 77 | outfile = open(extraparams, "w") 78 | outfile.write(params) 79 | outfile.close() 80 | 81 | os.chdir(os.path.dirname(arg.infile)) 82 | if arg.params is not None: 83 | mainparams = arg.params 84 | extraparams = os.path.join(os.path.dirname(arg.params), 85 | "extraparams") 86 | if os.path.isfile(extraparams) is False: 87 | logging.warning("No 'extraparams' file was found. An empty one " 88 | "was created, but it is highly recommended " 89 | "that you fill one out.") 90 | touch = open(extraparams, 'w') 91 | touch.close() 92 | else: 93 | _disable_STRUCUTRE_RANDOMIZE(extraparams) 94 | arg.params = ["-m", mainparams, "-e", extraparams] 95 | 96 | 97 | def seed_generator(seed, k_list, replicates): 98 | """ 99 | Uses a user input seed value to generate *N* seeds, one for each run. 100 | Takes a seed value and the number of iterations as input and returns a 101 | job list: [(seed, K, replicate), ...]. 102 | """ 103 | jobs = list(itertools.product(k_list, replicates))[::-1] 104 | 105 | random.seed(seed) 106 | jobs = [(str(random.randrange(10000000)),) + x for x in jobs] 107 | 108 | return jobs 109 | -------------------------------------------------------------------------------- /tests/alstructure_field_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2019-2020 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | LightGreen='\033[1;32m' 21 | NoColor='\033[0m' 22 | 23 | echo "Runnig ALStructure 'field test'. This will simulate a full wrapped run on small test data." 24 | 25 | git_dir=$(pwd) 26 | str_bin=$(which alstructure_wrapper.R) 27 | structure_threader_exec=$(which structure_threader) 28 | 29 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.bed.tar.xz" -C "${git_dir}/tests/smalldata/" 30 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.bed" -o ~/results_als -als "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt" 31 | 32 | echo -e "${LightGreen}ALStructure 'Field test' ran successfully on the \`.bed\` file. Yay!${NoColor}" 33 | 34 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.vcf.tar.xz" -C "${git_dir}/tests/smalldata/" 35 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.vcf" -o ~/results_als -als "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt" 36 | 37 | echo -e "${LightGreen}ALStructure 'Field test' ran successfully on the \`.vcf\` file. Yay!${NoColor}" 38 | -------------------------------------------------------------------------------- /tests/alstructure_function_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2019 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import pytest 19 | import mockups 20 | import filecmp 21 | import structure_threader.wrappers.alstructure_wrapper as alsw 22 | 23 | 24 | def test_alstr_cli_generator(): 25 | """ 26 | Tests if alstr_cli_generator() is working correctlly. 27 | """ 28 | # Define arguments 29 | arg = mockups.Arguments() 30 | arg.infile += ".bed" 31 | k_val = 4 32 | 33 | # "Rscript", arg.external_prog, infile, str(k_val), output_file 34 | 35 | mock_cli = ["Rscript", "EP", "IF", str(k_val), "alstr_K4"] 36 | 37 | returned_cli, out_file = alsw.alstr_cli_generator(arg, k_val) 38 | assert returned_cli == mock_cli 39 | assert out_file == "alstr_K4" 40 | 41 | 42 | def test_vcf_to_matrix(): 43 | """ 44 | Tests if vcf_to_matrix() is working correctlly. 45 | Converts a known file, and compares the result with a known good conversion 46 | """ 47 | # Define arguments 48 | arg = mockups.Arguments() 49 | arg.infile = "smalldata/SmallTestData.vcf" 50 | k_val = 4 51 | alsw.vcf_to_matrix(arg.infile) 52 | 53 | assert filecmp.cmp(arg.infile[:-4] + ".tsv", 54 | "smalldata/SmallTestData_reference.tsv") 55 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2016 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import os 19 | import os.path 20 | import sys 21 | 22 | sys.path.append(os.path.join(os.getcwd(), '.')) 23 | sys.path.append(os.path.join(os.getcwd(), '..')) 24 | print(sys.path) 25 | 26 | os.chdir(os.getcwd() + "/tests") 27 | -------------------------------------------------------------------------------- /tests/evanno_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2016 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | 19 | import glob 20 | 21 | import structure_threader.evanno.fastChooseK as fc 22 | 23 | def test_parse_logs(): 24 | """ 25 | Tests the result of parse_logs(). 26 | """ 27 | files = glob.glob("files/*.log") 28 | assert sorted(fc.parse_logs(files)) == sorted([-0.9875020559, -0.978009636, 29 | -0.9721792877, -0.9768312088, 30 | -0.9806135049, 31 | -0.9825775986]) 32 | 33 | def test_parse_varQs(): 34 | """ 35 | Tests the result of parse_varQs(). 36 | """ 37 | files = glob.glob("files/*.meanQ") 38 | assert sorted(fc.parse_varQs(files)) == sorted([5, 2, 3, 1, 3, 3]) 39 | 40 | def test_main(): 41 | """ 42 | Tests the result of main(). 43 | """ 44 | indir = "files/" 45 | text = str(['Model complexity that maximizes marginal likelihood = 2\n', 46 | 'Model components used to explain structure in data = 3\n']) 47 | outdir = "files/" 48 | assert fc.main(indir, outdir) == [x for x in range(2, 4)] 49 | outfile = open(outdir + "chooseK.txt", "r") 50 | test_text = str(outfile.readlines()) 51 | assert test_text == text 52 | -------------------------------------------------------------------------------- /tests/fastStructure_field_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016-2022 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | LightGreen='\033[1;32m' 21 | NoColor='\033[0m' 22 | 23 | echo "Runnig fastStructure 'field test'. This will simulate a full wrapped run on small test data." 24 | 25 | git_dir=$(pwd) 26 | str_bin=$(which fastStructure) 27 | structure_threader_exec=$(which structure_threader) 28 | 29 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.str.tar.xz" -C "${git_dir}/tests/smalldata/" 30 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.str" -o ~/results_fs -fs "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt" 31 | 32 | echo -e "${LightGreen}fastStructure 'Field test' ran successfully. Yay!${NoColor}" 33 | -------------------------------------------------------------------------------- /tests/faststructure_function_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2017 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | 19 | import pytest 20 | import mockups 21 | import structure_threader.wrappers.faststructure_wrapper as fsw 22 | 23 | 24 | def test_fs_cli_generator(): 25 | """ 26 | Tests if fs_cli_generator() is working correctlly. 27 | """ 28 | # Define arguments 29 | arg = mockups.Arguments() 30 | k_val = 4 31 | 32 | for prog in ["EP", "EP.py"]: 33 | 34 | arg.external_prog = prog 35 | arg.seed = "1235813" 36 | mock_cli = [prog, "-K", str(k_val), "--input", 37 | "IF", "--output", "fS_run_K", "--format", "str", 38 | "--seed", "1235813", "--prior=logistic"] 39 | if prog.endswith(".py"): 40 | mock_cli = ["python2"] + mock_cli 41 | 42 | returned_cli, returned_outdir = fsw.fs_cli_generator(k_val, arg) 43 | 44 | assert returned_cli == mock_cli 45 | assert returned_outdir == "fS_run_K" 46 | -------------------------------------------------------------------------------- /tests/files/chooseK.txt: -------------------------------------------------------------------------------- 1 | Model complexity that maximizes marginal likelihood = 2 2 | Model components used to explain structure in data = 3 3 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.1.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -0.9768312088 2 | Marginal likelihood with initialization (2) = -0.9768312088 3 | Marginal likelihood with initialization (3) = -0.9768312088 4 | Marginal likelihood with initialization (4) = -0.9768312088 5 | Marginal likelihood with initialization (5) = -0.9768312088 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -0.9768312088 -- 0.129 8 | 10 -0.9768312088 0.0000000000 0.060 9 | Marginal Likelihood = -0.9768312088 10 | Total time = 0.2047 seconds 11 | Total iterations = 10 12 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.1.meanQ: -------------------------------------------------------------------------------- 1 | 1.000000 2 | 1.000000 3 | 1.000000 4 | 1.000000 5 | 1.000000 6 | 1.000000 7 | 1.000000 8 | 1.000000 9 | 1.000000 10 | 1.000000 11 | 1.000000 12 | 1.000000 13 | 1.000000 14 | 1.000000 15 | 1.000000 16 | 1.000000 17 | 1.000000 18 | 1.000000 19 | 1.000000 20 | 1.000000 21 | 1.000000 22 | 1.000000 23 | 1.000000 24 | 1.000000 25 | 1.000000 26 | 1.000000 27 | 1.000000 28 | 1.000000 29 | 1.000000 30 | 1.000000 31 | 1.000000 32 | 1.000000 33 | 1.000000 34 | 1.000000 35 | 1.000000 36 | 1.000000 37 | 1.000000 38 | 1.000000 39 | 1.000000 40 | 1.000000 41 | 1.000000 42 | 1.000000 43 | 1.000000 44 | 1.000000 45 | 1.000000 46 | 1.000000 47 | 1.000000 48 | 1.000000 49 | 1.000000 50 | 1.000000 51 | 1.000000 52 | 1.000000 53 | 1.000000 54 | 1.000000 55 | 1.000000 56 | 1.000000 57 | 1.000000 58 | 1.000000 59 | 1.000000 60 | 1.000000 61 | 1.000000 62 | 1.000000 63 | 1.000000 64 | 1.000000 65 | 1.000000 66 | 1.000000 67 | 1.000000 68 | 1.000000 69 | 1.000000 70 | 1.000000 71 | 1.000000 72 | 1.000000 73 | 1.000000 74 | 1.000000 75 | 1.000000 76 | 1.000000 77 | 1.000000 78 | 1.000000 79 | 1.000000 80 | 1.000000 81 | 1.000000 82 | 1.000000 83 | 1.000000 84 | 1.000000 85 | 1.000000 86 | 1.000000 87 | 1.000000 88 | 1.000000 89 | 1.000000 90 | 1.000000 91 | 1.000000 92 | 1.000000 93 | 1.000000 94 | 1.000000 95 | 1.000000 96 | 1.000000 97 | 1.000000 98 | 1.000000 99 | 1.000000 100 | 1.000000 101 | 1.000000 102 | 1.000000 103 | 1.000000 104 | 1.000000 105 | 1.000000 106 | 1.000000 107 | 1.000000 108 | 1.000000 109 | 1.000000 110 | 1.000000 111 | 1.000000 112 | 1.000000 113 | 1.000000 114 | 1.000000 115 | 1.000000 116 | 1.000000 117 | 1.000000 118 | 1.000000 119 | 1.000000 120 | 1.000000 121 | 1.000000 122 | 1.000000 123 | 1.000000 124 | 1.000000 125 | 1.000000 126 | 1.000000 127 | 1.000000 128 | 1.000000 129 | 1.000000 130 | 1.000000 131 | 1.000000 132 | 1.000000 133 | 1.000000 134 | 1.000000 135 | 1.000000 136 | 1.000000 137 | 1.000000 138 | 1.000000 139 | 1.000000 140 | 1.000000 141 | 1.000000 142 | 1.000000 143 | 1.000000 144 | 1.000000 145 | 1.000000 146 | 1.000000 147 | 1.000000 148 | 1.000000 149 | 1.000000 150 | 1.000000 151 | 1.000000 152 | 1.000000 153 | 1.000000 154 | 1.000000 155 | 1.000000 156 | 1.000000 157 | 1.000000 158 | 1.000000 159 | 1.000000 160 | 1.000000 161 | 1.000000 162 | 1.000000 163 | 1.000000 164 | 1.000000 165 | 1.000000 166 | 1.000000 167 | 1.000000 168 | 1.000000 169 | 1.000000 170 | 1.000000 171 | 1.000000 172 | 1.000000 173 | 1.000000 174 | 1.000000 175 | 1.000000 176 | 1.000000 177 | 1.000000 178 | 1.000000 179 | 1.000000 180 | 1.000000 181 | 1.000000 182 | 1.000000 183 | 1.000000 184 | 1.000000 185 | 1.000000 186 | 1.000000 187 | 1.000000 188 | 1.000000 189 | 1.000000 190 | 1.000000 191 | 1.000000 192 | 1.000000 193 | 1.000000 194 | 1.000000 195 | 1.000000 196 | 1.000000 197 | 1.000000 198 | 1.000000 199 | 1.000000 200 | 1.000000 201 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.2.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -0.9962448182 2 | Marginal likelihood with initialization (2) = -0.9962458914 3 | Marginal likelihood with initialization (3) = -0.9962442830 4 | Marginal likelihood with initialization (4) = -0.9962474286 5 | Marginal likelihood with initialization (5) = -0.9962488147 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -0.9962442830 -- 0.256 8 | 10 -0.9722112089 0.0240330742 0.133 9 | 20 -0.9721876216 0.0000235873 0.141 10 | 30 -0.9721816820 0.0000059396 0.140 11 | 40 -0.9721795377 0.0000021443 0.141 12 | 50 -0.9721792877 0.0000002500 0.140 13 | Marginal Likelihood = -0.9721792877 14 | Total time = 0.9769 seconds 15 | Total iterations = 50 16 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.2.meanQ: -------------------------------------------------------------------------------- 1 | 0.331741 0.668259 2 | 0.892599 0.107401 3 | 0.228488 0.771512 4 | 0.856694 0.143306 5 | 0.699816 0.300184 6 | 0.456815 0.543185 7 | 0.002000 0.998000 8 | 0.081283 0.918717 9 | 0.177865 0.822135 10 | 0.031425 0.968575 11 | 0.993659 0.006341 12 | 0.005221 0.994779 13 | 0.090059 0.909941 14 | 0.365875 0.634125 15 | 0.007618 0.992382 16 | 0.720843 0.279157 17 | 0.893861 0.106139 18 | 0.003167 0.996833 19 | 0.011088 0.988912 20 | 0.240903 0.759097 21 | 0.298820 0.701180 22 | 0.345545 0.654455 23 | 0.041422 0.958578 24 | 0.998039 0.001961 25 | 0.908162 0.091838 26 | 0.896575 0.103425 27 | 0.998298 0.001702 28 | 0.551780 0.448220 29 | 0.010520 0.989480 30 | 0.746138 0.253862 31 | 0.135904 0.864096 32 | 0.997515 0.002485 33 | 0.174546 0.825454 34 | 0.104552 0.895448 35 | 0.038609 0.961391 36 | 0.281483 0.718517 37 | 0.248786 0.751214 38 | 0.837123 0.162877 39 | 0.027292 0.972708 40 | 0.068598 0.931402 41 | 0.996376 0.003624 42 | 0.002387 0.997613 43 | 0.607574 0.392426 44 | 0.099170 0.900830 45 | 0.626031 0.373969 46 | 0.004283 0.995717 47 | 0.738307 0.261693 48 | 0.733526 0.266474 49 | 0.031206 0.968794 50 | 0.039766 0.960234 51 | 0.067336 0.932664 52 | 0.202952 0.797048 53 | 0.025617 0.974383 54 | 0.610240 0.389760 55 | 0.730133 0.269867 56 | 0.874680 0.125320 57 | 0.153469 0.846531 58 | 0.001666 0.998334 59 | 0.998214 0.001786 60 | 0.441552 0.558448 61 | 0.115335 0.884665 62 | 0.002956 0.997044 63 | 0.997504 0.002496 64 | 0.140582 0.859418 65 | 0.974193 0.025807 66 | 0.269150 0.730850 67 | 0.926503 0.073497 68 | 0.001841 0.998159 69 | 0.002722 0.997278 70 | 0.939099 0.060901 71 | 0.997309 0.002691 72 | 0.912466 0.087534 73 | 0.002411 0.997589 74 | 0.032776 0.967224 75 | 0.837659 0.162341 76 | 0.277290 0.722710 77 | 0.042524 0.957476 78 | 0.367006 0.632994 79 | 0.103201 0.896799 80 | 0.138195 0.861805 81 | 0.003942 0.996058 82 | 0.003629 0.996371 83 | 0.997654 0.002346 84 | 0.610618 0.389382 85 | 0.968560 0.031440 86 | 0.998089 0.001911 87 | 0.153792 0.846208 88 | 0.255452 0.744548 89 | 0.900993 0.099007 90 | 0.898977 0.101023 91 | 0.029303 0.970697 92 | 0.530885 0.469115 93 | 0.997547 0.002453 94 | 0.997679 0.002321 95 | 0.288946 0.711054 96 | 0.217767 0.782233 97 | 0.282245 0.717755 98 | 0.997146 0.002854 99 | 0.409879 0.590121 100 | 0.552314 0.447686 101 | 0.088516 0.911484 102 | 0.054706 0.945294 103 | 0.002425 0.997575 104 | 0.271878 0.728122 105 | 0.004466 0.995534 106 | 0.002118 0.997882 107 | 0.005835 0.994165 108 | 0.166099 0.833901 109 | 0.030578 0.969422 110 | 0.796477 0.203523 111 | 0.997926 0.002074 112 | 0.201074 0.798926 113 | 0.998043 0.001957 114 | 0.211867 0.788133 115 | 0.028922 0.971078 116 | 0.975076 0.024924 117 | 0.996514 0.003486 118 | 0.002619 0.997381 119 | 0.232152 0.767848 120 | 0.143624 0.856376 121 | 0.989838 0.010162 122 | 0.158352 0.841648 123 | 0.058998 0.941002 124 | 0.593080 0.406920 125 | 0.997703 0.002297 126 | 0.069720 0.930280 127 | 0.886732 0.113268 128 | 0.739399 0.260601 129 | 0.001616 0.998384 130 | 0.070098 0.929902 131 | 0.453419 0.546581 132 | 0.992781 0.007219 133 | 0.573809 0.426191 134 | 0.008584 0.991416 135 | 0.050800 0.949200 136 | 0.732246 0.267754 137 | 0.989812 0.010188 138 | 0.219366 0.780634 139 | 0.006969 0.993031 140 | 0.185238 0.814762 141 | 0.998112 0.001888 142 | 0.339359 0.660641 143 | 0.939270 0.060730 144 | 0.633369 0.366631 145 | 0.245514 0.754486 146 | 0.248691 0.751309 147 | 0.263072 0.736928 148 | 0.003037 0.996963 149 | 0.901762 0.098238 150 | 0.224052 0.775948 151 | 0.997198 0.002802 152 | 0.167212 0.832788 153 | 0.301326 0.698674 154 | 0.272351 0.727649 155 | 0.077950 0.922050 156 | 0.243474 0.756526 157 | 0.163620 0.836380 158 | 0.371254 0.628746 159 | 0.997634 0.002366 160 | 0.049000 0.951000 161 | 0.541675 0.458325 162 | 0.876381 0.123619 163 | 0.530690 0.469310 164 | 0.251742 0.748258 165 | 0.945323 0.054677 166 | 0.996418 0.003582 167 | 0.975009 0.024991 168 | 0.004857 0.995143 169 | 0.998017 0.001983 170 | 0.001762 0.998238 171 | 0.115508 0.884492 172 | 0.921012 0.078988 173 | 0.998151 0.001849 174 | 0.646833 0.353167 175 | 0.984644 0.015356 176 | 0.997853 0.002147 177 | 0.997309 0.002691 178 | 0.281990 0.718010 179 | 0.990500 0.009500 180 | 0.005388 0.994612 181 | 0.997445 0.002555 182 | 0.410226 0.589774 183 | 0.287826 0.712174 184 | 0.353331 0.646669 185 | 0.998224 0.001776 186 | 0.996591 0.003409 187 | 0.998192 0.001808 188 | 0.277430 0.722570 189 | 0.028117 0.971883 190 | 0.604150 0.395850 191 | 0.973441 0.026559 192 | 0.142860 0.857140 193 | 0.411118 0.588882 194 | 0.470591 0.529409 195 | 0.535374 0.464626 196 | 0.056266 0.943734 197 | 0.997050 0.002950 198 | 0.080356 0.919644 199 | 0.994845 0.005155 200 | 0.196205 0.803795 201 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.3.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -1.0145917797 2 | Marginal likelihood with initialization (2) = -1.0145882446 3 | Marginal likelihood with initialization (3) = -1.0145827297 4 | Marginal likelihood with initialization (4) = -1.0145901376 5 | Marginal likelihood with initialization (5) = -1.0145910911 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -1.0145827297 -- 0.348 8 | 10 -0.9859337987 0.0286489310 0.194 9 | 20 -0.9822573922 0.0036764065 0.202 10 | 30 -0.9794985794 0.0027588128 0.211 11 | 40 -0.9787630021 0.0007355773 0.207 12 | 50 -0.9784822781 0.0002807240 0.204 13 | 60 -0.9783333715 0.0001489066 0.201 14 | 70 -0.9782232677 0.0001101038 0.205 15 | 80 -0.9781580266 0.0000652411 0.203 16 | 90 -0.9781153321 0.0000426946 0.204 17 | 100 -0.9780905558 0.0000247762 0.202 18 | 110 -0.9780713773 0.0000191785 0.201 19 | 120 -0.9780566238 0.0000147536 0.202 20 | 130 -0.9780388288 0.0000177949 0.204 21 | 140 -0.9780276914 0.0000111374 0.204 22 | 150 -0.9780104209 0.0000172705 0.205 23 | 160 -0.9780096360 0.0000007849 0.202 24 | Marginal Likelihood = -0.9780096360 25 | Total time = 3.6365 seconds 26 | Total iterations = 160 27 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.4.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -1.0322754363 2 | Marginal likelihood with initialization (2) = -1.0322789256 3 | Marginal likelihood with initialization (3) = -1.0322756875 4 | Marginal likelihood with initialization (4) = -1.0322761998 5 | Marginal likelihood with initialization (5) = -1.0322750626 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -1.0322750626 -- 0.444 8 | 10 -0.9968657948 0.0354092679 0.228 9 | 20 -0.9927563614 0.0041094334 0.250 10 | 30 -0.9906067583 0.0021496031 0.242 11 | 40 -0.9892050089 0.0014017494 0.240 12 | 50 -0.9880193078 0.0011857011 0.250 13 | 60 -0.9869323420 0.0010869658 0.249 14 | 70 -0.9858803255 0.0010520165 0.261 15 | 80 -0.9844959958 0.0013843298 0.277 16 | 90 -0.9814235998 0.0030723959 0.482 17 | 100 -0.9809491806 0.0004744193 0.906 18 | 110 -0.9808581435 0.0000910371 0.915 19 | 120 -0.9808059295 0.0000522140 0.913 20 | 130 -0.9807703261 0.0000356034 0.921 21 | 140 -0.9807255141 0.0000448120 0.942 22 | 150 -0.9807141701 0.0000113440 0.997 23 | 160 -0.9806841895 0.0000299807 0.908 24 | 170 -0.9806591596 0.0000250299 0.901 25 | 180 -0.9806227676 0.0000363919 0.933 26 | 190 -0.9806137539 0.0000090137 0.912 27 | 200 -0.9806135049 0.0000002490 1.024 28 | Marginal Likelihood = -0.9806135049 29 | Total time = 13.2452 seconds 30 | Total iterations = 200 31 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.5.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -1.0494952058 2 | Marginal likelihood with initialization (2) = -1.0494959772 3 | Marginal likelihood with initialization (3) = -1.0494950292 4 | Marginal likelihood with initialization (4) = -1.0494955249 5 | Marginal likelihood with initialization (5) = -1.0494938800 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -1.0494938800 -- 0.603 8 | 10 -1.0143674048 0.0351264752 0.324 9 | 20 -1.0080986272 0.0062687775 0.360 10 | 30 -1.0044323928 0.0036662344 0.353 11 | 40 -1.0018065336 0.0026258592 0.365 12 | 50 -0.9997522272 0.0020543064 0.380 13 | 60 -0.9981561486 0.0015960785 0.359 14 | 70 -0.9967190253 0.0014371233 0.376 15 | 80 -0.9950744351 0.0016445903 0.437 16 | 90 -0.9932921706 0.0017822644 0.555 17 | 100 -0.9905502256 0.0027419451 0.596 18 | 110 -0.9885425160 0.0020077096 1.076 19 | 120 -0.9874276668 0.0011148492 1.127 20 | 130 -0.9850634302 0.0023642366 1.140 21 | 140 -0.9829899842 0.0020734460 1.688 22 | 150 -0.9828916089 0.0000983753 1.762 23 | 160 -0.9828161668 0.0000754421 1.786 24 | 170 -0.9827707233 0.0000454435 1.756 25 | 180 -0.9826984185 0.0000723048 1.734 26 | 190 -0.9826596660 0.0000387525 1.724 27 | 200 -0.9827200318 -0.0000603657 1.766 28 | 210 -0.9826534179 0.0000666139 1.716 29 | 220 -0.9826403040 0.0000131139 1.716 30 | 230 -0.9826219649 0.0000183391 1.739 31 | 240 -0.9826069534 0.0000150115 1.732 32 | 250 -0.9826014574 0.0000054960 1.717 33 | 260 -0.9825777629 0.0000236945 1.710 34 | 270 -0.9825775986 0.0000001642 1.715 35 | Marginal Likelihood = -0.9825775986 36 | Total time = 32.3766 seconds 37 | Total iterations = 270 38 | -------------------------------------------------------------------------------- /tests/files/fS_run_K.6.log: -------------------------------------------------------------------------------- 1 | Marginal likelihood with initialization (1) = -1.0663514941 2 | Marginal likelihood with initialization (2) = -1.0663524294 3 | Marginal likelihood with initialization (3) = -1.0663511842 4 | Marginal likelihood with initialization (4) = -1.0663493670 5 | Marginal likelihood with initialization (5) = -1.0663537662 6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs) 7 | 0 -1.0663493670 -- 0.707 8 | 10 -1.0297408401 0.0366085269 0.375 9 | 20 -1.0209563255 0.0087845145 0.424 10 | 30 -1.0154511352 0.0055051903 0.482 11 | 40 -1.0122027626 0.0032483726 0.457 12 | 50 -1.0096371397 0.0025656228 0.454 13 | 60 -1.0076254619 0.0020116778 0.459 14 | 70 -1.0059788580 0.0016466039 0.442 15 | 80 -1.0043790133 0.0015998447 0.477 16 | 90 -1.0026262978 0.0017527155 0.513 17 | 100 -1.0002259160 0.0024003818 0.560 18 | 110 -0.9976624946 0.0025634214 0.688 19 | 120 -0.9960707049 0.0015917897 0.701 20 | 130 -0.9943839192 0.0016867857 0.722 21 | 140 -0.9919652260 0.0024186931 0.817 22 | 150 -0.9883990150 0.0035662110 1.220 23 | 160 -0.9879880261 0.0004109889 1.446 24 | 170 -0.9878610098 0.0001270163 1.472 25 | 180 -0.9877818167 0.0000791931 1.469 26 | 190 -0.9876545303 0.0001272864 1.462 27 | 200 -0.9875749509 0.0000795793 1.447 28 | 210 -0.9875508346 0.0000241164 1.450 29 | 220 -0.9875354411 0.0000153935 1.462 30 | 230 -0.9875174672 0.0000179739 1.460 31 | 240 -0.9875021183 0.0000153489 1.456 32 | 250 -0.9875020559 0.0000000623 1.440 33 | Marginal Likelihood = -0.9875020559 34 | Total time = 24.1385 seconds 35 | Total iterations = 250 36 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputEvidence.csv: -------------------------------------------------------------------------------- 1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE 2 | 1,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,-4893.197483,-4895.314475,-4895.199946,-4896.925829,-4895.213564,-4895.170259,0.591499,-4950.068622,0.000000 3 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputEvidenceDetails.csv: -------------------------------------------------------------------------------- 1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20 2 | 1,-4837.237055,-4837.352752,-4837.247417,-4837.566108,-4837.195471,111.920856,115.923445,115.905059,118.719441,116.036186,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000 3 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputEvidenceNormalised.csv: -------------------------------------------------------------------------------- 1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL 2 | 1,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000 3 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputLog.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------ 2 | MAVERICK 3 | by Robert Verity and Richard A. Nichols 4 | Version 1.0.4 (30 November 2016) 5 | accessed Wed Mar 8 14:54:27 2017 6 | ------------------------------------------ 7 | 8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt 9 | 10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure 11 | 12 | Parameters taking default values 13 | fixLabels_on = true 14 | mainThinning = 1 15 | outputComparisonStatistics_on = false 16 | outputEvanno_on = false 17 | outputEvidenceNormalised_on = true 18 | outputMaxLike_admixFreqs_on = false 19 | outputMaxLike_alleleFreqs_on = false 20 | outputPosteriorGrouping_on = false 21 | outputQmatrixError_gene_on = false 22 | outputQmatrix_structureFormat_on = false 23 | suppressWarning1_on = false 24 | thermodynamicThinning = 1 25 | 26 | Parameters read in from file 27 | EMalgorithm_on = f 28 | EMiterations = 100 29 | EMrepeats = 100 30 | admix_on = t 31 | alpha = 1.0 32 | alphaPropSD = 0.10 33 | exhaustive_on = f 34 | fixAlpha_on = f 35 | headerRow_on = t 36 | mainBurnin = 500 37 | mainRepeats = 5 38 | mainSamples = 4000 39 | missingData = -9 40 | outputEvidenceDetails_on = t 41 | outputEvidence_on = t 42 | outputLikelihood_on = t 43 | outputLog_on = t 44 | outputQmatrixError_ind_on = t 45 | outputQmatrixError_pop_on = t 46 | outputQmatrix_gene_on = t 47 | outputQmatrix_ind_on = t 48 | outputQmatrix_pop_on = t 49 | ploidy = 2 50 | ploidyCol_on = f 51 | popCol_on = t 52 | thermodynamicBurnin = 1000 53 | thermodynamicRungs = 20 54 | thermodynamicSamples = 5000 55 | thermodynamic_on = t 56 | 57 | Parameters defined on command line 58 | Kmax = 1 59 | Kmin = 1 60 | 61 | Data properties 62 | row 1 = header line 63 | column 1 = individual labels 64 | column 2 = population of origin 65 | unique populations = 19 66 | individuals = 375 67 | loci = 1 68 | alleles per locus = {597} 69 | missing observations = 0 of 750 70 | 71 | -- K=1 ---------------- 72 | 73 | Running exhaustive approach... 74 | complete 75 | 76 | Running ordinary MCMC... 77 | analysis 1 of 5 78 | analysis 2 of 5 79 | analysis 3 of 5 80 | analysis 4 of 5 81 | analysis 5 of 5 82 | complete 83 | 84 | Carrying out thermodynamic integration... 85 | complete 86 | 87 | Estimates of (log) model evidence... 88 | 89 | Harmonic mean (averaged over 5 runs) 90 | estimate: -4950.068622 91 | standard error: 0.000000 92 | 93 | Structure estimator (averaged over 5 runs) 94 | estimate: -4895.170259 95 | standard error: 0.591499 96 | 97 | Thermodynamic integral estimator 98 | estimate: -4950.068622 99 | standard error: 0.000000 100 | 101 | Program completed in approximately 14 seconds 102 | Output written to: //home/francisco/aaa/K1/ 103 | ------------------------------------------ 104 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputQmatrixError_pop_K1.csv: -------------------------------------------------------------------------------- 1 | given_population,individuals,deme1 2 | 1,20,0.000000 3 | 2,20,0.000000 4 | 3,20,0.000000 5 | 4,19,0.000000 6 | 5,20,0.000000 7 | 6,20,0.000000 8 | 7,20,0.000000 9 | 8,20,0.000000 10 | 9,19,0.000000 11 | 10,20,0.000000 12 | 11,20,0.000000 13 | 12,19,0.000000 14 | 13,18,0.000000 15 | 14,20,0.000000 16 | 15,20,0.000000 17 | 16,20,0.000000 18 | 17,20,0.000000 19 | 18,20,0.000000 20 | 19,20,0.000000 21 | -------------------------------------------------------------------------------- /tests/files/mav_K1/outputQmatrix_pop_K1.csv: -------------------------------------------------------------------------------- 1 | given_population,members,deme1 2 | 1,20,1.000 3 | 2,20,1.000 4 | 3,20,1.000 5 | 4,19,1.000 6 | 5,20,1.000 7 | 6,20,1.000 8 | 7,20,1.000 9 | 8,20,1.000 10 | 9,19,1.000 11 | 10,20,1.000 12 | 11,20,1.000 13 | 12,19,1.000 14 | 13,18,1.000 15 | 14,20,1.000 16 | 15,20,1.000 17 | 16,20,1.000 18 | 17,20,1.000 19 | 18,20,1.000 20 | 19,20,1.000 21 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputEvidence.csv: -------------------------------------------------------------------------------- 1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE 2 | 2,NA,-4856.632930,-4848.323781,-4851.489523,-4848.881320,-4854.073068,-4851.880124,1.568297,-4899.476073,-4873.733718,-4968.378324,-4834.051540,-4888.401052,-4892.808141,21.898525,-4857.999632,0.186050 3 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputEvidenceDetails.csv: -------------------------------------------------------------------------------- 1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20 2 | 2,-4642.025324,-4642.131637,-4646.324901,-4637.938826,-4642.016869,514.901498,463.204162,644.106847,392.225429,492.768366,-4871.698511,-4871.568007,-4870.901184,-4869.884613,-4869.304794,-4868.322469,-4866.607574,-4865.781276,-4865.899430,-4864.714164,-4861.072693,-4859.566166,-4856.120337,-4854.334843,-4858.785823,-4844.267226,-4838.273490,-4834.594181,-4831.627192,-4829.036598,0.059240,0.052458,0.053758,0.069849,0.055874,0.057787,0.838348,0.484864,0.061589,0.062912,0.580284,0.550482,0.867754,2.915845,0.075564,1.005823,0.454265,0.415732,0.478739,0.216293 3 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputEvidenceNormalised.csv: -------------------------------------------------------------------------------- 1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL 2 | 2,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000 3 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputLog.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------ 2 | MAVERICK 3 | by Robert Verity and Richard A. Nichols 4 | Version 1.0.4 (30 November 2016) 5 | accessed Wed Mar 8 14:54:27 2017 6 | ------------------------------------------ 7 | 8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt 9 | 10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure 11 | 12 | Parameters taking default values 13 | fixLabels_on = true 14 | mainThinning = 1 15 | outputComparisonStatistics_on = false 16 | outputEvanno_on = false 17 | outputEvidenceNormalised_on = true 18 | outputMaxLike_admixFreqs_on = false 19 | outputMaxLike_alleleFreqs_on = false 20 | outputPosteriorGrouping_on = false 21 | outputQmatrixError_gene_on = false 22 | outputQmatrix_structureFormat_on = false 23 | suppressWarning1_on = false 24 | thermodynamicThinning = 1 25 | 26 | Parameters read in from file 27 | EMalgorithm_on = f 28 | EMiterations = 100 29 | EMrepeats = 100 30 | admix_on = t 31 | alpha = 1.0 32 | alphaPropSD = 0.10 33 | exhaustive_on = f 34 | fixAlpha_on = f 35 | headerRow_on = t 36 | mainBurnin = 500 37 | mainRepeats = 5 38 | mainSamples = 4000 39 | missingData = -9 40 | outputEvidenceDetails_on = t 41 | outputEvidence_on = t 42 | outputLikelihood_on = t 43 | outputLog_on = t 44 | outputQmatrixError_ind_on = t 45 | outputQmatrixError_pop_on = t 46 | outputQmatrix_gene_on = t 47 | outputQmatrix_ind_on = t 48 | outputQmatrix_pop_on = t 49 | ploidy = 2 50 | ploidyCol_on = f 51 | popCol_on = t 52 | thermodynamicBurnin = 1000 53 | thermodynamicRungs = 20 54 | thermodynamicSamples = 5000 55 | thermodynamic_on = t 56 | 57 | Parameters defined on command line 58 | Kmax = 2 59 | Kmin = 2 60 | 61 | Data properties 62 | row 1 = header line 63 | column 1 = individual labels 64 | column 2 = population of origin 65 | unique populations = 19 66 | individuals = 375 67 | loci = 1 68 | alleles per locus = {597} 69 | missing observations = 0 of 750 70 | 71 | -- K=2 ---------------- 72 | 73 | Running ordinary MCMC... 74 | analysis 1 of 5 75 | analysis 2 of 5 76 | analysis 3 of 5 77 | analysis 4 of 5 78 | analysis 5 of 5 79 | complete 80 | 81 | Carrying out thermodynamic integration... 82 | power = 0.00 83 | power = 0.05 84 | power = 0.11 85 | power = 0.16 86 | power = 0.21 87 | power = 0.26 88 | power = 0.32 89 | power = 0.37 90 | power = 0.42 91 | power = 0.47 92 | power = 0.53 93 | power = 0.58 94 | power = 0.63 95 | power = 0.68 96 | power = 0.74 97 | power = 0.79 98 | power = 0.84 99 | power = 0.89 100 | power = 0.95 101 | power = 1.00 102 | complete 103 | 104 | Estimates of (log) model evidence... 105 | 106 | Harmonic mean (averaged over 5 runs) 107 | estimate: -4851.880124 108 | standard error: 1.568297 109 | 110 | Structure estimator (averaged over 5 runs) 111 | estimate: -4892.808141 112 | standard error: 21.898525 113 | 114 | Thermodynamic integral estimator 115 | estimate: -4857.999632 116 | standard error: 0.186050 117 | 118 | Program completed in approximately 76 seconds 119 | Output written to: //home/francisco/aaa/K2/ 120 | ------------------------------------------ 121 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputQmatrixError_pop_K2.csv: -------------------------------------------------------------------------------- 1 | given_population,individuals,deme1,deme2 2 | 1,20,0.001399,0.001399 3 | 2,20,0.002001,0.002001 4 | 3,20,0.001360,0.001360 5 | 4,19,0.001139,0.001139 6 | 5,20,0.000964,0.000964 7 | 6,20,0.002221,0.002221 8 | 7,20,0.002534,0.002534 9 | 8,20,0.001788,0.001788 10 | 9,19,0.002095,0.002095 11 | 10,20,0.001570,0.001570 12 | 11,20,0.003051,0.003051 13 | 12,19,0.001470,0.001470 14 | 13,18,0.001024,0.001024 15 | 14,20,0.002149,0.002149 16 | 15,20,0.001915,0.001915 17 | 16,20,0.002162,0.002162 18 | 17,20,0.001749,0.001749 19 | 18,20,0.007064,0.007064 20 | 19,20,0.006483,0.006483 21 | -------------------------------------------------------------------------------- /tests/files/mav_K2/outputQmatrix_pop_K2.csv: -------------------------------------------------------------------------------- 1 | given_population,members,deme1,deme2 2 | 1,20,0.515,0.485 3 | 2,20,0.514,0.486 4 | 3,20,0.518,0.482 5 | 4,19,0.514,0.486 6 | 5,20,0.515,0.485 7 | 6,20,0.514,0.486 8 | 7,20,0.520,0.480 9 | 8,20,0.516,0.484 10 | 9,19,0.514,0.486 11 | 10,20,0.516,0.484 12 | 11,20,0.520,0.480 13 | 12,19,0.514,0.486 14 | 13,18,0.515,0.485 15 | 14,20,0.513,0.487 16 | 15,20,0.521,0.479 17 | 16,20,0.515,0.485 18 | 17,20,0.514,0.486 19 | 18,20,0.211,0.789 20 | 19,20,0.232,0.768 21 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputEvidence.csv: -------------------------------------------------------------------------------- 1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE 2 | 3,NA,-4812.193788,-4811.617486,-4816.869374,-4811.804987,-4815.077819,-4813.512691,1.047962,-4805.251200,-4828.874173,-4877.958287,-4827.789834,-4812.903396,-4830.555378,12.667684,-4826.325657,0.233016 3 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputEvidenceDetails.csv: -------------------------------------------------------------------------------- 1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20 2 | 3,-4559.252409,-4562.072614,-4563.546862,-4560.779604,-4559.438919,491.997583,533.603118,628.822851,534.020459,506.928953,-4844.015929,-4842.788256,-4841.668817,-4840.246323,-4840.619825,-4839.634357,-4837.084935,-4834.745895,-4833.497700,-4831.799614,-4828.558782,-4826.749753,-4826.112989,-4819.417419,-4812.081612,-4809.074651,-4803.286714,-4797.179265,-4820.155460,-4786.954286,0.056233,0.061409,0.097444,0.678973,0.060120,0.060439,0.690001,0.499692,1.448793,1.576579,1.533912,1.613603,1.840289,1.758141,0.373402,1.039759,0.565020,0.893233,0.108332,0.313486 3 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputEvidenceNormalised.csv: -------------------------------------------------------------------------------- 1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL 2 | 3,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000 3 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputLog.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------ 2 | MAVERICK 3 | by Robert Verity and Richard A. Nichols 4 | Version 1.0.4 (30 November 2016) 5 | accessed Wed Mar 8 14:54:27 2017 6 | ------------------------------------------ 7 | 8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt 9 | 10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure 11 | 12 | Parameters taking default values 13 | fixLabels_on = true 14 | mainThinning = 1 15 | outputComparisonStatistics_on = false 16 | outputEvanno_on = false 17 | outputEvidenceNormalised_on = true 18 | outputMaxLike_admixFreqs_on = false 19 | outputMaxLike_alleleFreqs_on = false 20 | outputPosteriorGrouping_on = false 21 | outputQmatrixError_gene_on = false 22 | outputQmatrix_structureFormat_on = false 23 | suppressWarning1_on = false 24 | thermodynamicThinning = 1 25 | 26 | Parameters read in from file 27 | EMalgorithm_on = f 28 | EMiterations = 100 29 | EMrepeats = 100 30 | admix_on = t 31 | alpha = 1.0 32 | alphaPropSD = 0.10 33 | exhaustive_on = f 34 | fixAlpha_on = f 35 | headerRow_on = t 36 | mainBurnin = 500 37 | mainRepeats = 5 38 | mainSamples = 4000 39 | missingData = -9 40 | outputEvidenceDetails_on = t 41 | outputEvidence_on = t 42 | outputLikelihood_on = t 43 | outputLog_on = t 44 | outputQmatrixError_ind_on = t 45 | outputQmatrixError_pop_on = t 46 | outputQmatrix_gene_on = t 47 | outputQmatrix_ind_on = t 48 | outputQmatrix_pop_on = t 49 | ploidy = 2 50 | ploidyCol_on = f 51 | popCol_on = t 52 | thermodynamicBurnin = 1000 53 | thermodynamicRungs = 20 54 | thermodynamicSamples = 5000 55 | thermodynamic_on = t 56 | 57 | Parameters defined on command line 58 | Kmax = 3 59 | Kmin = 3 60 | 61 | Data properties 62 | row 1 = header line 63 | column 1 = individual labels 64 | column 2 = population of origin 65 | unique populations = 19 66 | individuals = 375 67 | loci = 1 68 | alleles per locus = {597} 69 | missing observations = 0 of 750 70 | 71 | -- K=3 ---------------- 72 | 73 | Running ordinary MCMC... 74 | analysis 1 of 5 75 | analysis 2 of 5 76 | analysis 3 of 5 77 | analysis 4 of 5 78 | analysis 5 of 5 79 | complete 80 | 81 | Carrying out thermodynamic integration... 82 | power = 0.00 83 | power = 0.05 84 | power = 0.11 85 | power = 0.16 86 | power = 0.21 87 | power = 0.26 88 | power = 0.32 89 | power = 0.37 90 | power = 0.42 91 | power = 0.47 92 | power = 0.53 93 | power = 0.58 94 | power = 0.63 95 | power = 0.68 96 | power = 0.74 97 | power = 0.79 98 | power = 0.84 99 | power = 0.89 100 | power = 0.95 101 | power = 1.00 102 | complete 103 | 104 | Estimates of (log) model evidence... 105 | 106 | Harmonic mean (averaged over 5 runs) 107 | estimate: -4813.512691 108 | standard error: 1.047962 109 | 110 | Structure estimator (averaged over 5 runs) 111 | estimate: -4830.555378 112 | standard error: 12.667684 113 | 114 | Thermodynamic integral estimator 115 | estimate: -4826.325657 116 | standard error: 0.233016 117 | 118 | Program completed in approximately 96 seconds 119 | Output written to: //home/francisco/aaa/K3/ 120 | ------------------------------------------ 121 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputQmatrixError_pop_K3.csv: -------------------------------------------------------------------------------- 1 | given_population,individuals,deme1,deme2,deme3 2 | 1,20,0.002078,0.002111,0.001967 3 | 2,20,0.002416,0.002737,0.001468 4 | 3,20,0.002399,0.001352,0.002512 5 | 4,19,0.001307,0.001874,0.001004 6 | 5,20,0.001646,0.001803,0.001779 7 | 6,20,0.001633,0.002033,0.001200 8 | 7,20,0.001752,0.001634,0.002491 9 | 8,20,0.001616,0.001361,0.001761 10 | 9,19,0.002039,0.000818,0.002452 11 | 10,20,0.001869,0.001662,0.001961 12 | 11,20,0.001816,0.002076,0.003254 13 | 12,19,0.001825,0.001169,0.001118 14 | 13,18,0.002402,0.002587,0.001475 15 | 14,20,0.001994,0.001940,0.001300 16 | 15,20,0.001853,0.002396,0.001091 17 | 16,20,0.001925,0.001530,0.002277 18 | 17,20,0.002869,0.001445,0.002779 19 | 18,20,0.002704,0.002997,0.002013 20 | 19,20,0.002641,0.002179,0.000864 21 | -------------------------------------------------------------------------------- /tests/files/mav_K3/outputQmatrix_pop_K3.csv: -------------------------------------------------------------------------------- 1 | given_population,members,deme1,deme2,deme3 2 | 1,20,0.317,0.345,0.339 3 | 2,20,0.317,0.331,0.353 4 | 3,20,0.315,0.335,0.350 5 | 4,19,0.318,0.343,0.339 6 | 5,20,0.318,0.343,0.339 7 | 6,20,0.316,0.332,0.352 8 | 7,20,0.315,0.348,0.337 9 | 8,20,0.316,0.331,0.352 10 | 9,19,0.318,0.343,0.339 11 | 10,20,0.314,0.349,0.337 12 | 11,20,0.311,0.308,0.381 13 | 12,19,0.319,0.342,0.339 14 | 13,18,0.317,0.345,0.338 15 | 14,20,0.318,0.343,0.338 16 | 15,20,0.312,0.336,0.353 17 | 16,20,0.316,0.348,0.337 18 | 17,20,0.318,0.342,0.340 19 | 18,20,0.705,0.149,0.146 20 | 19,20,0.680,0.158,0.163 21 | -------------------------------------------------------------------------------- /tests/files/test_merged/outputEvidence.csv: -------------------------------------------------------------------------------- 1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE 2 | 1,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,-4893.197483,-4895.314475,-4895.199946,-4896.925829,-4895.213564,-4895.170259,0.591499,-4950.068622,0.000000 3 | 2,NA,-4856.632930,-4848.323781,-4851.489523,-4848.881320,-4854.073068,-4851.880124,1.568297,-4899.476073,-4873.733718,-4968.378324,-4834.051540,-4888.401052,-4892.808141,21.898525,-4857.999632,0.186050 4 | 3,NA,-4812.193788,-4811.617486,-4816.869374,-4811.804987,-4815.077819,-4813.512691,1.047962,-4805.251200,-4828.874173,-4877.958287,-4827.789834,-4812.903396,-4830.555378,12.667684,-4826.325657,0.233016 5 | -------------------------------------------------------------------------------- /tests/files/test_merged/outputEvidenceDetails.csv: -------------------------------------------------------------------------------- 1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20 2 | 1,-4837.237055,-4837.352752,-4837.247417,-4837.566108,-4837.195471,111.920856,115.923445,115.905059,118.719441,116.036186,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000 3 | 2,-4642.025324,-4642.131637,-4646.324901,-4637.938826,-4642.016869,514.901498,463.204162,644.106847,392.225429,492.768366,-4871.698511,-4871.568007,-4870.901184,-4869.884613,-4869.304794,-4868.322469,-4866.607574,-4865.781276,-4865.899430,-4864.714164,-4861.072693,-4859.566166,-4856.120337,-4854.334843,-4858.785823,-4844.267226,-4838.273490,-4834.594181,-4831.627192,-4829.036598,0.059240,0.052458,0.053758,0.069849,0.055874,0.057787,0.838348,0.484864,0.061589,0.062912,0.580284,0.550482,0.867754,2.915845,0.075564,1.005823,0.454265,0.415732,0.478739,0.216293 4 | 3,-4559.252409,-4562.072614,-4563.546862,-4560.779604,-4559.438919,491.997583,533.603118,628.822851,534.020459,506.928953,-4844.015929,-4842.788256,-4841.668817,-4840.246323,-4840.619825,-4839.634357,-4837.084935,-4834.745895,-4833.497700,-4831.799614,-4828.558782,-4826.749753,-4826.112989,-4819.417419,-4812.081612,-4809.074651,-4803.286714,-4797.179265,-4820.155460,-4786.954286,0.056233,0.061409,0.097444,0.678973,0.060120,0.060439,0.690001,0.499692,1.448793,1.576579,1.533912,1.613603,1.840289,1.758141,0.373402,1.039759,0.565020,0.893233,0.108332,0.313486 5 | -------------------------------------------------------------------------------- /tests/maverick_field_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | LightGreen='\033[1;32m' 21 | NoColor='\033[0m' 22 | 23 | echo "Runnig MavericK 'field test'. This will simulate a full wrapped run on small test data." 24 | 25 | git_dir=$(pwd) 26 | maverick_bin=$(which MavericK) 27 | structure_threader_exec=$(which structure_threader) 28 | 29 | ${structure_threader_exec} run -i ${git_dir}/tests/smalldata/Reduced_dataset.structure -o ~/results -mv ${maverick_bin} -K 3 -t 4 --params ${git_dir}/tests/smalldata/parameters.txt 30 | 31 | echo -e "${LightGreen}MavericK 'Field test' ran successfully. Yay!${NoColor}" 32 | -------------------------------------------------------------------------------- /tests/mockups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2017 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | 19 | class Arguments(): 20 | """ 21 | Bogus class to work a mock for the "args" attributes from argparse. 22 | """ 23 | def __init__(self): 24 | self.external_prog = "EP" 25 | self.infile = "IF" 26 | self.outpath = "" 27 | self.params = "smalldata/parameters.txt" 28 | self.notests = False 29 | self.k_list = [2, 3, 4, 5] 30 | self.extra_options = "--prior=logistic" 31 | -------------------------------------------------------------------------------- /tests/sanity_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2016 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import os 19 | import pytest 20 | import structure_threader.sanity_checks.sanity as sc 21 | 22 | 23 | def test_cpu_checker(): 24 | """ 25 | Tests if cpu_checker() is working correctlly. 26 | """ 27 | assert sc.cpu_checker(1) == 1 28 | assert sc.cpu_checker(os.cpu_count() + 1) == os.cpu_count() 29 | 30 | 31 | def test_file_checker(tmpdir): 32 | """ 33 | Tests if file_checker() is working correctlly. 34 | """ 35 | testdir = tmpdir.mkdir("sub") 36 | testfile = testdir.join("filetest.txt") 37 | testfile.write("content") 38 | 39 | # Correctly check for a file 40 | assert sc.file_checker(str(testfile)) is None 41 | # Correctlly check for a directory 42 | assert sc.file_checker(str(testdir), is_file=False) is None 43 | # Check for a file, but given a dir 44 | with pytest.raises(SystemExit): 45 | sc.file_checker(str(testdir)) 46 | # Check for a dir, but given a file 47 | with pytest.raises(SystemExit): 48 | sc.file_checker(str(testfile), is_file=False) 49 | # Chck for a file and provided with a wrong path 50 | with pytest.raises(SystemExit): 51 | sc.file_checker(str(testfile) + "a") 52 | -------------------------------------------------------------------------------- /tests/smalldata/BigTestData.bed.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/tests/smalldata/BigTestData.bed.tar.xz -------------------------------------------------------------------------------- /tests/smalldata/BigTestData.str.tar.xz: -------------------------------------------------------------------------------- 1 | ../../PTS/data/BigTestData.str.tar.xz -------------------------------------------------------------------------------- /tests/smalldata/BigTestData.vcf.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/tests/smalldata/BigTestData.vcf.tar.xz -------------------------------------------------------------------------------- /tests/smalldata/Reduced_dataset.structure: -------------------------------------------------------------------------------- 1 | ../../PTS/data/Reduced_dataset.structure -------------------------------------------------------------------------------- /tests/smalldata/extraparams: -------------------------------------------------------------------------------- 1 | ../../PTS/data/extraparams -------------------------------------------------------------------------------- /tests/smalldata/indfile.txt: -------------------------------------------------------------------------------- 1 | ../../TestData/indfile.txt -------------------------------------------------------------------------------- /tests/smalldata/mainparams: -------------------------------------------------------------------------------- 1 | ../../PTS/data/mainparams -------------------------------------------------------------------------------- /tests/smalldata/parameters.txt: -------------------------------------------------------------------------------- 1 | ../../TestData/parameters.txt -------------------------------------------------------------------------------- /tests/smalldata/parameters_a.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 4 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 0.9,0.8,0.7,0.6 15 | alphaPropSD 0.10 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 500 22 | mainSamples 4000 23 | 24 | thermodynamic_on t 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 1000 27 | thermodynamicSamples 5000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | -------------------------------------------------------------------------------- /tests/smalldata/parameters_a_as.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 4 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 0.9,0.8,0.7,0.6 15 | alphaPropSD 0.09,0.08,0.07,0.06 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 500 22 | mainSamples 4000 23 | 24 | thermodynamic_on t 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 1000 27 | thermodynamicSamples 5000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | -------------------------------------------------------------------------------- /tests/smalldata/parameters_as.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 4 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 1.0 15 | alphaPropSD 0.09,0.08,0.07,0.06 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 500 22 | mainSamples 4000 23 | 24 | thermodynamic_on t 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 1000 27 | thermodynamicSamples 5000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | -------------------------------------------------------------------------------- /tests/smalldata/parameters_f.txt: -------------------------------------------------------------------------------- 1 | #### Data proprieties 2 | headerRow_on t 3 | popCol_on t 4 | ploidyCol_on f 5 | ploidy 2 6 | missingData -9 7 | 8 | 9 | #### Model parameters 10 | Kmin 1 11 | Kmax 4 12 | admix_on t 13 | fixAlpha_on f 14 | alpha 1.0 15 | alphaPropSD 0.10 16 | 17 | 18 | #### Simulation parameters 19 | exhaustive_on f 20 | mainRepeats 5 21 | mainBurnin 500 22 | mainSamples 4000 23 | 24 | thermodynamic_on f 25 | thermodynamicRungs 20 26 | thermodynamicBurnin 1000 27 | thermodynamicSamples 5000 28 | 29 | 30 | #### Basic output proprieties 31 | outputLog_on t 32 | outputLikelihood_on t 33 | outputQmatrix_ind_on t 34 | outputQmatrix_pop_on t 35 | outputEvidence_on t 36 | outputEvidenceDetails_on t 37 | 38 | 39 | #### Output location 40 | -------------------------------------------------------------------------------- /tests/structure_field_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016-2022 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | set -e 19 | 20 | LightGreen='\033[1;32m' 21 | NoColor='\033[0m' 22 | 23 | echo "Runnig STRUCTURE 'field test'. This will simulate a full wrapped run on small test data." 24 | 25 | git_dir=$(pwd) 26 | str_bin=$(which structure) 27 | structure_threader_exec=$(which structure_threader) 28 | 29 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/Reduced_dataset.structure" -o ~/results -st "${str_bin}" -K 3 -t 4 -R 5 --params "${git_dir}/tests/smalldata/mainparams" 30 | 31 | echo -e "${LightGreen}STRUCTURE 'Field test' ran successfully. Yay!${NoColor}" 32 | -------------------------------------------------------------------------------- /tests/structure_function_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2017-2018 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | 18 | import pytest 19 | import mockups 20 | import structure_threader.wrappers.structure_wrapper as sw 21 | 22 | 23 | def test_str_cli_generator(): 24 | """ 25 | Tests if str_cli_generator() is working correctlly. 26 | """ 27 | # Define arguments 28 | arg = mockups.Arguments() 29 | k_val = 4 30 | outfile = "str_K4_rep1" 31 | arg.params = None 32 | seed = None 33 | 34 | mock_cli = ["EP", "-K", str(k_val), "-i", "IF", "-o", outfile] 35 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed) 36 | 37 | assert returned_cli == mock_cli 38 | assert returned_outfile == outfile 39 | 40 | # Add a seed to the test 41 | seed = "1234" 42 | mock_cli += ["-D", "1234"] 43 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed) 44 | 45 | assert returned_cli == mock_cli 46 | assert returned_outfile == outfile 47 | 48 | # Add an argument to the test 49 | arg.params = "test" 50 | 51 | mock_cli += arg.params 52 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed) 53 | 54 | assert returned_cli == mock_cli 55 | assert returned_outfile == outfile 56 | 57 | 58 | def test_str_param_checker(): 59 | """ 60 | Tests if the STRUCTURE parameter file checker is working. 61 | """ 62 | arg = mockups.Arguments() 63 | arg.infile = "smalldata/Reduced_dataset.structure" 64 | arg.params = "mainparams" 65 | sw.str_param_checker(arg) 66 | assert arg.params == ["-m", "mainparams", "-e", "extraparams"] 67 | 68 | 69 | def test_seed_generator(): 70 | """ 71 | Tests if the Seed generator is working as intended. 72 | """ 73 | k_list = [1, 2] 74 | replicates = [1, 2, 3] 75 | seed = 1235813 76 | 77 | mock_jobs = [(2, 3), (2, 2), (2, 1), (1, 3), (1, 2), (1, 1)] 78 | mock_jobs = [("2153978", 2, 3), ("940261", 2, 2), ("8867621", 2, 1), 79 | ("786598", 1, 3), ("3922463", 1, 2), ("6870574", 1, 1)] 80 | returned_jobs = sw.seed_generator(seed, k_list, replicates) 81 | assert returned_jobs == mock_jobs 82 | 83 | # Use a different seed 84 | seed = 42 85 | 86 | mock_jobs = [("1867825", 2, 3), ("419610", 2, 2), ("4614226", 2, 1), 87 | ("4108603", 1, 3), ("3744854", 1, 2), ("2341057", 1, 1)] 88 | returned_jobs = sw.seed_generator(seed, k_list, replicates) 89 | assert returned_jobs == mock_jobs 90 | -------------------------------------------------------------------------------- /tests/wrapper_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright 2017 Francisco Pina Martins 4 | # This file is part of structure_threader. 5 | # structure_threader is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | 10 | # structure_threader is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | 15 | # You should have received a copy of the GNU General Public License 16 | # along with structure_threader. If not, see . 17 | --------------------------------------------------------------------------------