├── .gitignore
├── .gitlab-ci.yml
├── .readthedocs.yml
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── PTS
├── README.md
├── data
│ ├── BigTestData.str.tar.xz
│ ├── Reduced_dataset.structure
│ ├── extraparams
│ └── mainparams
├── fastStructure
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-faststructure
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure_1
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure_16
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure_2
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure_4
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── structure_threader-structure_6
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
└── structure_threader-structure_8
│ ├── downloads.xml
│ ├── install.sh
│ ├── results-definition.xml
│ └── test-definition.xml
├── README.md
├── TestData
├── BigTestData.str.tar.xz
├── README.md
├── SmallTestData.structure
├── extraparams
├── indfile.txt
├── joblist.txt
├── mainparams
├── mav_benchmark_parameters.txt
└── parameters.txt
├── benchmarks
├── ParallelStructure_runner.R
├── README.md
├── Side_by_side.png
├── Side_by_side.svg
├── bar_plotter.py
├── benchmark.sh
├── benchmark_fast.sh
├── speedup_plotter.py
└── system_speedup_plotter.py
├── docs
├── benchmark.md
├── binaries.md
├── citation.md
├── external.md
├── faq.md
├── future.md
├── index.md
├── install.md
├── output.md
├── requirements.txt
├── test_data.md
└── usage.md
├── helper_scripts
├── Dockerfile
├── README.md
├── install_faststructure.sh
├── install_maverick.sh
├── install_structure.sh
└── structure.spec
├── mkdocs.yml
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── setup.py
├── structure_threader
├── __init__.py
├── argparser.py
├── bins
│ ├── __init__.py
│ ├── linux
│ │ ├── MavericK
│ │ ├── __init__.py
│ │ ├── fastStructure
│ │ └── structure
│ └── osx
│ │ ├── MavericK
│ │ ├── fastStructure
│ │ └── structure
├── colorer
│ ├── __init__.py
│ └── colorer.py
├── evanno
│ ├── LICENSE
│ ├── __init__.py
│ ├── fastChooseK.py
│ ├── harvesterCore.py
│ └── structureHarvester.py
├── plotter
│ ├── __init__.py
│ ├── html_template.py
│ └── structplot.py
├── sanity_checks
│ ├── __init__.py
│ └── sanity.py
├── skeletons
│ ├── __init__.py
│ └── stparams.py
├── structure_threader.py
└── wrappers
│ ├── __init__.py
│ ├── alstructure_wrapper.R
│ ├── alstructure_wrapper.py
│ ├── faststructure_wrapper.py
│ ├── maverick_wrapper.py
│ └── structure_wrapper.py
└── tests
├── alstructure_field_tests.sh
├── alstructure_function_tests.py
├── conftest.py
├── evanno_tests.py
├── fastStructure_field_test.sh
├── faststructure_function_tests.py
├── files
├── chooseK.txt
├── fS_run_K.1.log
├── fS_run_K.1.meanP
├── fS_run_K.1.meanQ
├── fS_run_K.2.log
├── fS_run_K.2.meanP
├── fS_run_K.2.meanQ
├── fS_run_K.3.log
├── fS_run_K.3.meanP
├── fS_run_K.3.meanQ
├── fS_run_K.4.log
├── fS_run_K.4.meanP
├── fS_run_K.4.meanQ
├── fS_run_K.5.log
├── fS_run_K.5.meanP
├── fS_run_K.5.meanQ
├── fS_run_K.6.log
├── fS_run_K.6.meanP
├── fS_run_K.6.meanQ
├── mav_K1
│ ├── outputEvidence.csv
│ ├── outputEvidenceDetails.csv
│ ├── outputEvidenceNormalised.csv
│ ├── outputLikelihood.csv
│ ├── outputLog.txt
│ ├── outputQmatrixError_ind_K1.csv
│ ├── outputQmatrixError_pop_K1.csv
│ ├── outputQmatrix_gene_K1.csv
│ ├── outputQmatrix_ind_K1.csv
│ └── outputQmatrix_pop_K1.csv
├── mav_K2
│ ├── outputEvidence.csv
│ ├── outputEvidenceDetails.csv
│ ├── outputEvidenceNormalised.csv
│ ├── outputLikelihood.csv
│ ├── outputLog.txt
│ ├── outputQmatrixError_ind_K2.csv
│ ├── outputQmatrixError_pop_K2.csv
│ ├── outputQmatrix_gene_K2.csv
│ ├── outputQmatrix_ind_K2.csv
│ └── outputQmatrix_pop_K2.csv
├── mav_K3
│ ├── outputEvidence.csv
│ ├── outputEvidenceDetails.csv
│ ├── outputEvidenceNormalised.csv
│ ├── outputLikelihood.csv
│ ├── outputLog.txt
│ ├── outputQmatrixError_ind_K3.csv
│ ├── outputQmatrixError_pop_K3.csv
│ ├── outputQmatrix_gene_K3.csv
│ ├── outputQmatrix_ind_K3.csv
│ └── outputQmatrix_pop_K3.csv
└── test_merged
│ ├── outputEvidence.csv
│ └── outputEvidenceDetails.csv
├── maverick_field_test.sh
├── maverick_functions_tests.py
├── mockups.py
├── sanity_tests.py
├── smalldata
├── BigTestData.bed.tar.xz
├── BigTestData.str.tar.xz
├── BigTestData.vcf.tar.xz
├── Reduced_dataset.structure
├── SmallTestData.vcf
├── SmallTestData_reference.tsv
├── extraparams
├── indfile.txt
├── mainparams
├── parameters.txt
├── parameters_a.txt
├── parameters_a_as.txt
├── parameters_as.txt
└── parameters_f.txt
├── structure_field_test.sh
├── structure_function_tests.py
└── wrapper_tests.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | .ropeproject/
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | tests/files/merged/*
44 | tests/files/bestK/*
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 |
53 | # Sphinx documentation
54 | docs/_build/
55 |
56 | # PyBuilder
57 | target/
58 |
59 | # Eric6 project
60 | *.e4p
61 | .eric*
62 |
63 | # Dolphin breadcrumbs
64 | *.directory
65 |
--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | stages:
2 | - tests
3 | - deployment
4 |
5 | image: python:3.11-slim
6 |
7 | # Change pip's cache directory to be inside the project directory since we can
8 | # only cache local items.
9 | variables:
10 | PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache"
11 |
12 | # Pip's cache doesn't store the python packages
13 | # https://pip.pypa.io/en/stable/reference/pip_install/#caching
14 | #
15 | # If you want to also cache the installed packages, you have to install
16 | # them in a virtualenv and cache it as well.
17 | cache:
18 | paths:
19 | - .cache/pip
20 | - venv/
21 |
22 | tests:
23 | stage: tests
24 | script:
25 | - apt-get update
26 | - apt-get install -y xz-utils
27 | - apt-get install -y r-base r-cran-devtools r-cran-biocmanager
28 | - python -V # Print out python version for debugging
29 | - pip install virtualenv
30 | - virtualenv venv
31 | - source venv/bin/activate
32 | - pip install pytest
33 | - pip install .
34 | - pytest tests/*.py -v
35 | - tests/structure_field_test.sh
36 | - tests/fastStructure_field_test.sh
37 | - tests/maverick_field_test.sh
38 | - tests/alstructure_field_tests.sh
39 |
40 |
41 | deployment:
42 | stage: deployment
43 | script:
44 | - python -V
45 | - pip install virtualenv
46 | - virtualenv venv
47 | - source venv/bin/activate
48 | - pip install twine
49 | - python setup.py sdist
50 | - twine upload dist/* -u __token__ -p $pypi_token
51 | only:
52 | - tags
53 | artifacts:
54 | name: "$CI_PROJECT_NAME-$CI_COMMIT_TAG"
55 | paths:
56 | - dist/
57 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.8"
12 |
13 | # Build documentation with MkDocs
14 | mkdocs:
15 | configuration: mkdocs.yml
16 |
17 | python:
18 | install:
19 | - requirements: docs/requirements.txt
20 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | os:
4 | - linux
5 |
6 | dist:
7 | - xenial
8 |
9 | python:
10 | - "3.7"
11 |
12 | install:
13 | - pip install .
14 | - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
15 | - echo "deb https://cran.r-project.org/bin/linux/ubuntu/ xenial-cran35/" | sudo tee -a /etc/apt/sources.list
16 | - sudo apt-get update
17 | - sudo apt-get install -y r-base libcurl4-openssl-dev libssl-dev gfortran liblapack-dev libblas-dev
18 |
19 | # Run tests
20 | script:
21 | - py.test tests/*.py -v
22 | - tests/structure_field_test.sh
23 | - tests/fastStructure_field_test.sh
24 | - tests/maverick_field_test.sh
25 | - travis_wait tests/alstructure_field_tests.sh
26 |
27 | # Other stuff
28 | notifications:
29 | email:
30 | - f.pinamartins@gmail.com
31 | - o.diogo.silva@gmail.com
32 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include structure_threader/bins *
2 |
--------------------------------------------------------------------------------
/PTS/README.md:
--------------------------------------------------------------------------------
1 | # PTS
2 |
3 | These are the files used to create a [Phoronix Test Suite](http://www.phoronix-test-suite.com/) test profile.
4 |
5 | The objective is to have them upstreamed by PTS itsef, as *Structure_threader* seems to be a very nice benchmark!
6 |
--------------------------------------------------------------------------------
/PTS/data/BigTestData.str.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/PTS/data/BigTestData.str.tar.xz
--------------------------------------------------------------------------------
/PTS/data/Reduced_dataset.structure:
--------------------------------------------------------------------------------
1 | SNP_4 SNP_6 SNP_17 SNP_26 SNP_38 SNP_88 SNP_122 SNP_154 SNP_164 SNP_264 SNP_270 SNP_272 SNP_494 SNP_496 SNP_640 SNP_695 SNP_696 SNP_836 SNP_839 SNP_889 SNP_937 SNP_967 SNP_970 SNP_982 SNP_1059 SNP_1077 SNP_1083 SNP_1084 SNP_1117
2 | Pop1.19 1 -9 -9 3 2 -9 -9 -9 4 4 -9 4 3 -9 -9 -9 -9 -9 1 -9 1 2 -9 3 1 4 2 4 1 4
3 | Pop1.19 1 -9 -9 3 2 -9 -9 -9 2 4 -9 4 3 -9 -9 -9 -9 -9 3 -9 3 2 -9 3 2 2 2 4 1 2
4 | Pop1.5 1 2 1 3 2 3 4 2 4 4 2 4 3 4 4 2 2 1 3 2 1 4 2 3 1 4 2 4 1 2
5 | Pop1.5 1 2 1 3 2 3 4 2 4 4 2 4 3 4 4 2 2 1 3 2 1 2 2 3 2 4 2 4 1 2
6 | Pop2.37 2 2 -9 3 -9 3 4 4 2 4 2 4 -9 4 -9 2 2 1 -9 4 1 2 -9 3 1 -9 -9 4 1 2
7 | Pop2.37 2 2 -9 3 -9 3 2 2 2 3 2 4 -9 4 -9 3 2 1 -9 4 3 2 -9 3 1 -9 -9 4 1 2
8 | Pop2.5 2 2 1 3 2 -9 4 4 2 4 2 4 3 4 4 2 -9 -9 1 4 3 -9 4 3 1 2 2 4 1 2
9 | Pop2.5 2 2 1 3 2 -9 2 4 2 4 2 4 3 4 4 3 -9 -9 1 4 3 -9 4 3 1 2 2 4 1 2
10 | Pop3.6 3 2 1 3 2 -9 2 4 4 4 2 4 -9 4 4 2 -9 -9 1 4 1 -9 4 3 2 2 2 1 3 2
11 | Pop3.6 3 2 1 3 2 -9 2 2 2 4 2 4 -9 4 4 3 -9 -9 3 2 3 -9 4 3 2 2 2 1 3 2
12 | Pop3.7 3 2 1 2 2 1 2 2 -9 4 2 4 1 4 4 2 1 1 3 4 1 2 4 3 1 2 2 4 1 4
13 | Pop3.7 3 2 1 3 2 3 2 2 -9 4 2 4 3 4 4 3 2 3 3 4 1 2 4 3 2 2 2 4 1 4
14 | Pop4.11 4 2 1 3 -9 3 2 4 2 -9 2 -9 3 -9 4 -9 1 3 3 4 3 2 4 3 2 2 2 -9 -9 4
15 | Pop4.11 4 2 1 3 -9 1 2 4 4 -9 2 -9 3 -9 4 -9 1 3 1 4 1 2 4 3 2 2 2 -9 -9 4
16 | Pop4.7 4 2 1 3 2 3 2 2 2 4 2 1 3 4 4 2 2 1 3 4 1 4 4 3 1 2 2 1 1 4
17 | Pop4.7 4 2 1 3 2 3 2 2 2 4 2 4 3 4 2 2 2 1 3 4 3 2 4 3 1 2 2 4 3 4
18 | Pop5.16 5 2 1 3 2 1 -9 -9 -9 4 2 1 1 4 -9 -9 1 1 3 4 -9 2 -9 3 -9 -9 -9 1 1 -9
19 | Pop5.16 5 2 1 3 2 1 -9 -9 -9 4 2 4 3 4 -9 -9 2 3 3 4 -9 2 -9 3 -9 -9 -9 4 3 -9
20 | Pop5.9 5 1 4 3 2 1 2 2 2 3 2 4 1 4 2 3 2 1 3 4 3 2 4 3 1 -9 -9 1 3 2
21 | Pop5.9 5 1 4 3 2 1 4 2 2 4 2 4 1 4 4 3 2 1 3 4 1 2 4 3 1 -9 -9 1 3 2
22 | Pop6.11 6 2 1 2 2 3 2 4 2 4 4 1 -9 4 4 3 2 1 3 4 3 4 4 1 1 4 4 1 3 2
23 | Pop6.11 6 2 1 3 2 3 2 4 2 4 4 1 -9 4 4 3 2 1 3 4 3 2 4 3 2 2 2 1 3 2
24 | Pop6.6 6 2 1 3 2 3 2 4 4 4 -9 -9 1 4 4 3 2 1 3 4 3 2 4 3 2 2 4 1 3 2
25 | Pop6.6 6 2 1 3 2 3 2 4 2 4 -9 -9 1 4 4 3 2 1 3 4 3 2 4 3 2 2 4 1 3 2
26 | Pop7.10 7 1 1 2 4 1 2 4 -9 4 4 -9 3 4 -9 3 1 3 1 4 3 2 4 3 -9 2 2 4 1 2
27 | Pop7.10 7 2 4 2 4 3 2 2 -9 4 2 -9 3 3 -9 3 1 3 3 4 3 2 4 3 -9 2 2 4 1 2
28 | Pop7.8 7 1 1 2 2 -9 -9 4 -9 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 4 4 3 -9 2 4 4 1 4
29 | Pop7.8 7 2 1 3 2 -9 -9 2 -9 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 2 3 -9 2 2 4 1 4
30 | Pop8.12 8 1 4 2 2 1 -9 2 4 4 4 4 3 4 4 3 2 1 3 4 1 2 4 3 1 2 2 1 1 2
31 | Pop8.12 8 1 4 3 2 3 -9 2 2 4 4 4 3 4 2 3 2 1 3 4 1 2 2 3 1 2 2 4 3 2
32 | Pop8.9 8 1 4 2 2 1 2 2 4 4 2 1 1 4 4 2 1 1 3 4 1 2 2 1 1 2 2 4 1 2
33 | Pop8.9 8 2 4 3 2 1 2 2 2 4 2 4 3 4 2 3 2 3 3 4 3 2 2 3 1 2 2 4 1 2
34 | Pop9.10 9 2 1 2 2 3 4 4 4 4 2 4 3 4 4 2 2 1 3 4 1 4 2 3 1 2 4 4 1 4
35 | Pop9.10 9 2 1 3 2 3 2 2 4 4 2 4 3 3 2 2 2 1 3 4 1 2 2 3 1 2 2 4 1 4
36 | Pop9.9 9 2 1 3 -9 3 2 2 2 4 -9 4 3 4 4 -9 2 1 3 4 1 2 -9 1 1 2 2 4 1 4
37 | Pop9.9 9 2 1 3 -9 3 2 2 2 4 -9 4 3 4 4 -9 2 1 3 4 1 2 -9 3 1 2 2 4 1 4
38 | Pop10.16 10 2 1 3 2 1 2 2 -9 4 2 1 3 -9 2 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4
39 | Pop10.16 10 2 1 3 2 3 2 2 -9 4 2 4 3 -9 2 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4
40 | Pop10.9 10 2 -9 3 -9 1 -9 -9 -9 -9 -9 4 -9 -9 -9 -9 -9 -9 3 4 1 -9 -9 -9 -9 2 2 -9 -9 2
41 | Pop10.9 10 2 -9 3 -9 3 -9 -9 -9 -9 -9 4 -9 -9 -9 -9 -9 -9 3 4 1 -9 -9 -9 -9 2 2 -9 -9 2
42 | Pop11.13 11 2 1 3 2 3 2 4 2 4 2 1 3 4 2 3 2 1 3 -9 1 4 4 3 1 2 2 4 1 2
43 | Pop11.13 11 2 1 3 2 3 2 2 2 4 2 1 3 3 2 3 2 1 3 -9 1 2 4 3 2 2 2 4 1 2
44 | Pop11.6 11 1 4 3 -9 1 2 2 4 -9 2 1 1 4 4 3 1 1 3 4 1 2 4 3 1 2 -9 1 1 4
45 | Pop11.6 11 1 4 3 -9 1 2 2 2 -9 2 1 1 4 2 3 2 3 3 4 1 2 4 3 1 2 -9 4 3 4
46 | Pop12.21 12 2 1 2 2 3 2 4 2 -9 2 -9 3 4 2 3 2 1 3 4 1 2 4 1 2 2 2 4 1 2
47 | Pop12.21 12 2 1 3 2 3 2 4 2 -9 2 -9 3 4 2 3 2 1 3 4 1 2 4 3 2 2 2 4 1 2
48 | Pop12.6 12 -9 -9 3 2 -9 -9 4 4 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 4 1 1 2 2 1 1 4
49 | Pop12.6 12 -9 -9 3 2 -9 -9 2 2 4 2 4 3 4 -9 -9 -9 -9 3 4 -9 2 2 3 2 2 2 4 3 2
50 | Pop13.17 13 -9 -9 3 4 1 2 4 4 -9 -9 4 3 -9 -9 -9 1 1 3 -9 -9 2 4 -9 -9 2 2 -9 -9 -9
51 | Pop13.17 13 -9 -9 3 4 3 2 2 2 -9 -9 4 3 -9 -9 -9 2 3 3 -9 -9 2 4 -9 -9 2 2 -9 -9 -9
52 | Pop13.2 13 1 4 3 -9 1 2 4 2 -9 -9 -9 3 3 4 -9 2 1 3 -9 3 4 -9 1 -9 2 4 4 1 -9
53 | Pop13.2 13 1 4 3 -9 3 2 4 2 -9 -9 -9 3 3 4 -9 2 1 3 -9 3 2 -9 1 -9 2 2 4 1 -9
54 | Pop14.27 14 -9 -9 3 2 3 4 4 -9 4 -9 4 3 4 4 3 1 1 3 -9 -9 2 4 3 1 -9 2 4 1 2
55 | Pop14.27 14 -9 -9 3 2 3 2 4 -9 4 -9 4 3 4 4 3 2 3 3 -9 -9 2 2 3 1 -9 2 4 1 2
56 | Pop14.4 14 2 1 2 2 1 4 4 4 4 2 1 3 4 4 3 1 1 3 4 1 2 4 3 1 2 2 1 1 2
57 | Pop14.4 14 2 1 3 2 3 2 2 2 4 2 1 3 4 4 3 2 3 3 4 3 2 4 3 1 2 2 4 3 2
58 | Pop15.12 15 2 1 3 2 3 2 4 -9 4 2 4 -9 4 4 2 2 1 3 4 1 2 4 3 1 2 2 4 1 4
59 | Pop15.12 15 2 1 3 2 3 2 2 -9 4 2 4 -9 4 4 2 2 1 3 2 1 2 2 3 1 2 2 4 1 4
60 | Pop15.8 15 2 1 3 2 3 2 4 4 4 2 1 3 4 4 3 2 1 3 4 1 4 4 3 1 4 2 1 1 2
61 | Pop15.8 15 2 1 3 2 3 2 2 2 4 2 4 3 4 2 3 2 1 3 4 1 2 4 3 1 4 2 4 3 2
62 | Pop16.1 16 2 1 3 2 1 4 2 2 -9 2 1 3 4 2 2 1 1 3 4 1 2 4 3 1 2 2 4 1 4
63 | Pop16.1 16 2 1 3 2 1 4 2 2 -9 2 1 3 4 2 2 2 3 3 4 1 2 4 3 2 2 2 4 1 4
64 | Pop16.7 16 2 1 3 2 1 2 4 2 4 2 4 3 4 4 2 2 1 3 4 1 4 2 3 1 2 2 1 1 4
65 | Pop16.7 16 2 1 3 2 1 2 2 2 4 2 4 3 4 4 3 2 1 3 4 1 2 2 3 2 2 2 4 3 2
66 | Pop17.11 17 1 1 3 4 3 2 2 -9 4 -9 4 3 -9 4 -9 2 1 3 4 1 4 4 3 1 2 4 4 1 4
67 | Pop17.11 17 2 1 3 4 3 2 2 -9 4 -9 4 3 -9 4 -9 2 1 3 4 1 4 4 3 1 2 2 4 1 4
68 | Pop17.19 17 2 1 3 2 3 2 2 2 4 -9 4 -9 4 2 2 2 1 3 4 1 4 4 3 1 2 4 1 3 4
69 | Pop17.19 17 2 1 3 2 3 2 2 2 4 -9 4 -9 4 2 2 2 1 3 4 3 4 4 3 2 2 4 1 3 4
70 |
--------------------------------------------------------------------------------
/PTS/data/extraparams:
--------------------------------------------------------------------------------
1 |
2 | EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE
3 | PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE
4 | SPECIFIED IN mainparams.
5 |
6 | "(int)" means that this takes an integer value.
7 | "(d)" means that this is a double (ie, a Real number such as 3.14).
8 | "(B)" means that this variable is Boolean
9 | (ie insert 1 for True, and 0 for False).
10 |
11 | PROGRAM OPTIONS
12 |
13 | #define NOADMIX 0 // (B) Use no admixture model (0=admixture model, 1=no-admix)
14 | #define LINKAGE 0 // (B) Use the linkage model model
15 | #define USEPOPINFO 0 // (B) Use prior population information to pre-assign individuals
16 | to clusters
17 | #define LOCPRIOR 0 //(B) Use location information to improve weak data
18 |
19 | #define FREQSCORR 0 // (B) allele frequencies are correlated among pops
20 | #define ONEFST 0 // (B) assume same value of Fst for all subpopulations.
21 |
22 | #define INFERALPHA 1 // (B) Infer ALPHA (the admixture parameter)
23 | #define POPALPHAS 0 // (B) Individual alpha for each population
24 | #define ALPHA 1.0 // (d) Dirichlet parameter for degree of admixture
25 | (this is the initial value if INFERALPHA==1).
26 |
27 | #define INFERLAMBDA 0 // (B) Infer LAMBDA (the allele frequencies parameter)
28 | #define POPSPECIFICLAMBDA 0 //(B) infer a separate lambda for each pop
29 | (only if INFERLAMBDA=1).
30 | #define LAMBDA 1.0 // (d) Dirichlet parameter for allele frequencies
31 |
32 |
33 |
34 |
35 | PRIORS
36 |
37 | #define FPRIORMEAN 0.01 // (d) Prior mean and SD of Fst for pops.
38 | #define FPRIORSD 0.05 // (d) The prior is a Gamma distribution with these parameters
39 |
40 | #define UNIFPRIORALPHA 1 // (B) use a uniform prior for alpha;
41 | otherwise gamma prior
42 | #define ALPHAMAX 10.0 // (d) max value of alpha if uniform prior
43 | #define ALPHAPRIORA 1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma
44 | prior with mean A*B, and
45 | #define ALPHAPRIORB 2.0 // variance A*B^2.
46 |
47 |
48 | #define LOG10RMIN -4.0 //(d) Log10 of minimum allowed value of r under linkage model
49 | #define LOG10RMAX 1.0 //(d) Log10 of maximum allowed value of r
50 | #define LOG10RPROPSD 0.1 //(d) standard deviation of log r in update
51 | #define LOG10RSTART -2.0 //(d) initial value of log10 r
52 |
53 |
54 | USING PRIOR POPULATION INFO (USEPOPINFO)
55 |
56 | #define GENSBACK 2 //(int) For use when inferring whether an indiv-
57 | idual is an immigrant, or has an immigrant an-
58 | cestor in the past GENSBACK generations. eg, if
59 | GENSBACK==2, it tests for immigrant ancestry
60 | back to grandparents.
61 | #define MIGRPRIOR 0.01 //(d) prior prob that an individual is a migrant
62 | (used only when USEPOPINFO==1). This should
63 | be small, eg 0.01 or 0.1.
64 | #define PFROMPOPFLAGONLY 0 // (B) only use individuals with POPFLAG=1 to update P.
65 | This is to enable use of a reference set of
66 | individuals for clustering additional "test"
67 | individuals.
68 |
69 | LOCPRIOR MODEL FOR USING LOCATION INFORMATION
70 |
71 | #define LOCISPOP 0 //(B) use POPDATA for location information
72 | #define LOCPRIORINIT 1.0 //(d) initial value for r, the location prior
73 | #define MAXLOCPRIOR 20.0 //(d) max allowed value for r
74 |
75 |
76 |
77 |
78 | OUTPUT OPTIONS
79 |
80 | #define PRINTNET 1 // (B) Print the "net nucleotide distance" to screen during the run
81 | #define PRINTLAMBDA 1 // (B) Print current value(s) of lambda to screen
82 | #define PRINTQSUM 1 // (B) Print summary of current population membership to screen
83 |
84 | #define SITEBYSITE 0 // (B) whether or not to print site by site results.
85 | (Linkage model only) This is a large file!
86 | #define PRINTQHAT 0 // (B) Q-hat printed to a separate file. Turn this
87 | on before using STRAT.
88 | #define UPDATEFREQ 100 // (int) frequency of printing update on the screen.
89 | Set automatically if this is 0.
90 | #define PRINTLIKES 0 // (B) print current likelihood to screen every rep
91 | #define INTERMEDSAVE 0 // (int) number of saves to file during run
92 |
93 | #define ECHODATA 1 // (B) Print some of data file to screen to check
94 | that the data entry is correct.
95 | (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)
96 | #define ANCESTDIST 0 // (B) collect data about the distribution of an-
97 | cestry coefficients (Q) for each individual
98 | #define NUMBOXES 1000 // (int) the distribution of Q values is stored as
99 | a histogram with this number of boxes.
100 | #define ANCESTPINT 0.90 // (d) the size of the displayed probability
101 | interval on Q (values between 0.0--1.0)
102 |
103 |
104 |
105 | MISCELLANEOUS
106 |
107 | #define COMPUTEPROB 1 // (B) Estimate the probability of the Data under
108 | the model. This is used when choosing the
109 | best number of subpopulations.
110 | #define ADMBURNIN 500 // (int) [only relevant for linkage model]:
111 | Initial period of burnin with admixture model (see Readme)
112 | #define ALPHAPROPSD 0.025 // (d) SD of proposal for updating alpha
113 | #define STARTATPOPINFO 0 // Use given populations as the initial condition
114 | for population origins. (Need POPDATA==1). It
115 | is assumed that the PopData in the input file
116 | are between 1 and k where k<=MAXPOPS.
117 | #define RANDOMIZE 0 // (B) use new random seed for each run
118 | #define SEED 0 // (int) seed value for random number generator
119 | (must set RANDOMIZE=0)
120 | #define METROFREQ 10 // (int) Frequency of using Metropolis step to update
121 | Q under admixture model (ie use the metr. move every
122 | i steps). If this is set to 0, it is never used.
123 | (Proposal for each q^(i) sampled from prior. The
124 | goal is to improve mixing for small alpha.)
125 | #define REPORTHITRATE 0 // (B) report hit rate if using METROFREQ
126 |
--------------------------------------------------------------------------------
/PTS/data/mainparams:
--------------------------------------------------------------------------------
1 |
2 | KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE
3 | IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE
4 | FILE extraparams.
5 |
6 |
7 | "(int)" means that this takes an integer value.
8 | "(B)" means that this variable is Boolean
9 | (ie insert 1 for True, and 0 for False)
10 | "(str)" means that this is a string (but not enclosed in quotes!)
11 |
12 |
13 | Basic Program Parameters
14 |
15 | #define MAXPOPS KKK // (int) number of populations assumed
16 | #define BURNIN 5000 // (int) length of burnin period
17 | #define NUMREPS 100000 // (int) number of MCMC reps after burnin
18 |
19 | Input/Output files
20 |
21 | #define INFILE Reduced_dataset.structure // (str) name of input data file
22 | #define OUTFILE outfile //(str) name of output data file
23 |
24 | Data file format
25 |
26 | #define NUMINDS 34 // (int) number of diploid individuals in data file
27 | #define NUMLOCI 29 // (int) number of loci in data file
28 | #define PLOIDY 2 // (int) ploidy of data
29 | #define MISSING -9 // (int) value given to missing genotype data
30 | #define ONEROWPERIND 0 // (B) store data for individuals in a single line
31 |
32 |
33 | #define LABEL 1 // (B) Input file contains individual labels
34 | #define POPDATA 1 // (B) Input file contains a population identifier
35 | #define POPFLAG 0 // (B) Input file contains a flag which says
36 | whether to use popinfo when USEPOPINFO==1
37 | #define LOCDATA 0 // (B) Input file contains a location identifier
38 |
39 | #define PHENOTYPE 0 // (B) Input file contains phenotype information
40 | #define EXTRACOLS 0 // (int) Number of additional columns of data
41 | before the genotype data start.
42 |
43 | #define MARKERNAMES 1 // (B) data file contains row of marker names
44 | #define RECESSIVEALLELES 0 // (B) data file contains dominant markers (eg AFLPs)
45 | // and a row to indicate which alleles are recessive
46 | #define MAPDISTANCES 0 // (B) data file contains row of map distances
47 | // between loci
48 |
49 |
50 | Advanced data file options
51 |
52 | #define PHASED 0 // (B) Data are in correct phase (relevant for linkage model only)
53 | #define PHASEINFO 0 // (B) the data for each individual contains a line
54 | indicating phase (linkage model)
55 | #define MARKOVPHASE 0 // (B) the phase info follows a Markov model.
56 | #define NOTAMBIGUOUS -999 // (int) for use in some analyses of polyploid data
57 |
58 |
59 |
60 | Command line options:
61 |
62 | -m mainparams
63 | -e extraparams
64 | -s stratparams
65 | -K MAXPOPS
66 | -L NUMLOCI
67 | -N NUMINDS
68 | -i input file
69 | -o output file
70 | -D SEED
71 |
--------------------------------------------------------------------------------
/PTS/fastStructure/downloads.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz
8 | 5cbb76e7d49e27a57046ab641b666f97
9 | v1.0.tar.gz
10 |
11 |
12 |
13 | https://github.com/StuntsPT/Structure_threader/raw/master/PTS/data/BigTestData.str.tar.xz
14 | 1115759c154152b264a4ff5f87c34c1d
15 | BigTestData.str.tar.xz
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/PTS/fastStructure/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | tar -zxvf v1.0.tar.gz
4 | tar -xvfJ BigTestData.str.tar.xz
5 |
6 | cd fastStructure-1.0/vars
7 | python2 setup.py build-ext --inplace
8 | cd ..
9 | python2 setup.py build-ext --inplace
10 |
11 | cd ..
12 |
13 | echo "#!/bin/bash
14 |
15 | for i in {1..4}
16 | do
17 | for j in {1..4}
18 | do
19 | python2 ./fastStructure-1.0/structure.py --input=BigTestData --output=TestBigData_out_K\${i}_R\${j} -K \${i} --format=str >> \$LOG_FILE 2>&1
20 | done
21 | done
22 |
23 | " > fastStructure
24 | chmod +x fastStructure
25 |
--------------------------------------------------------------------------------
/PTS/fastStructure/results-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sys.time
6 |
7 |
8 |
--------------------------------------------------------------------------------
/PTS/fastStructure/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 fastStructure runs
6 | 1.0
7 | Single threaded bayesian calculation of population attribution. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | 1000 genomes Chr22 clustering
11 | 3
12 |
13 |
14 | 0.1
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | python-numpy, cython, GSL, python-scipy
21 | 1.0
22 | https://rajanil.github.io/fastStructure
23 | Francisco Pina-Martins
24 |
25 |
26 |
--------------------------------------------------------------------------------
/PTS/structure/downloads.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz
7 | 4e0591678cdbfe79347d272b5dceeda1
8 | 66375
9 |
10 |
11 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure
12 | 1665111727e05d4ba3eea253a986c374
13 | 5317
14 |
15 |
16 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams
17 | 4e41f743c66c3dcd926463a831ab1cda
18 | 2596
19 |
20 |
21 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams
22 | c4f5fe55b6a496b8c55a0538fcc173d7
23 | 6257
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/PTS/structure/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | tar -zxvf structure_kernel_source.tar.gz
4 |
5 | cd structure_kernel_src
6 | make
7 | cd ..
8 |
9 | echo "#!/bin/bash
10 |
11 | for i in {1..4}
12 | do
13 | for j in {1..4}
14 | do
15 | ./structure_kernel_src/structure -i Reduced_dataset.structure -o output_K\${i}_R\${j} -K \${i} >> \$LOG_FILE 2>&1
16 | done
17 | done
18 |
19 | " > structure
20 | chmod +x structure
21 |
--------------------------------------------------------------------------------
/PTS/structure/results-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sys.time
6 |
7 |
8 |
--------------------------------------------------------------------------------
/PTS/structure/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs
6 | 2.3.4
7 | Single threaded bayesian calculation of population attribution. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 1.0.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 1.0
22 | http://web.stanford.edu/group/pritchardlab/structure.html
23 | Francisco Pina-Martins
24 |
25 |
26 |
--------------------------------------------------------------------------------
/PTS/structure_threader-faststructure/downloads.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz
7 | ca5c134d8fc24760fd47d730adfde892
8 | 235897
9 | v0.1-rc3.tar.gz
10 |
11 |
12 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/BigTestData.str.tar.xz
13 | 1115759c154152b264a4ff5f87c34c1d
14 | 71044
15 |
16 |
17 | https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz
18 | 5cbb76e7d49e27a57046ab641b666f97
19 | 100627
20 | v1.0.tar.gz
21 |
22 |
23 | http://pypi.python.org/packages/source/n/nose/nose-1.3.6.tar.gz
24 | 0ca546d81ca8309080fc80cb389e7a16
25 | nose-1.3.6.tar.gz
26 | 276273
27 |
28 |
29 | https://github.com/numpy/numpy/archive/v1.9.2.tar.gz
30 | 90f7434759088acccfddf5ba61b1f908
31 | v1.9.2.tar.gz
32 | 4230459
33 |
34 |
35 | https://github.com/scipy/scipy/archive/v0.16.0b2.tar.gz
36 | c2e7cf084b0242275ee54df1cf2e9c92
37 | v0.16.0b2.tar.gz
38 | 11608327
39 |
40 |
41 | http://cython.org/release/Cython-0.22.zip
42 | b22af7b964903ceff57167dc7d5b9b51
43 | Cython-0.22.zip
44 | 2095055
45 |
46 |
47 | http://gnu.mirror.vexxhost.com/gsl/gsl-latest.tar.gz
48 | e49a664db13d81c968415cd53f62bc8b
49 | gsl-latest.tar.gz
50 | 3534080
51 |
52 |
53 | http://www.netlib.org/lapack/lapack-3.5.0.tgz
54 | b1d3e3e425b2e44a06760ff173104bdf
55 | lapack-3.5.0.tgz
56 | 6313139
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/PTS/structure_threader-faststructure/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # Define test dir
6 | testdir=$(pwd)
7 |
8 | # Get LDFAGS
9 | _OLD_LDFLAGS=$LDFLAGS
10 |
11 | # Create env dir for dependent libraries
12 | mkdir -p ${testdir}/env
13 | envdir=${testdir}/env
14 |
15 | # Install dependencies
16 | # LAPACK
17 | tar xvfz lapack-3.5.0.tgz
18 | mkdir -p build-lapack
19 | cd build-lapack
20 | cmake ../lapack-3.5.0
21 | make
22 | #mkdir -p ${envdir}/{bin,lib}
23 | mv bin ${envdir}
24 | mv lib ${envdir}
25 |
26 | # cython
27 | cd ${testdir}
28 | unzip Cython-0.22.zip
29 | cd Cython-0.22
30 | export PYTHONPATH=$PYTHONPATH:${envdir}/lib/python2.7/site-packages/
31 | mkdir -p ${envdir}/lib/python2.7/site-packages/
32 | python2 setup.py install --prefix=${envdir}
33 |
34 | # python-nose
35 | cd ${testdir}
36 | tar xvfz nose-1.3.6.tar.gz
37 | cd nose-1.3.6
38 | python2 setup.py install --prefix=${envdir}
39 |
40 | # numpy
41 | cd ${testdir}
42 | tar xvfz v1.9.2.tar.gz
43 | cd numpy-1.9.2
44 | sed -e "s|#![ ]*/usr/bin/python$|#!/usr/bin/python2|" \
45 | -e "s|#![ ]*/usr/bin/env python$|#!/usr/bin/env python2|" \
46 | -e "s|#![ ]*/bin/env python$|#!/usr/bin/env python2|" \
47 | -i $(find . -name '*.py')
48 | export ATLAS=None
49 | export LDFLAGS="$LDFLAGS -shared"
50 | python2 setup.py install --prefix=${envdir}
51 |
52 | # scipy
53 | export PATH=$PATH:${envdir}/bin
54 | cd ${testdir}
55 | tar xvfz v0.16.0b2.tar.gz
56 | cd scipy-0.16.0b2
57 | python2 setup.py install --prefix=${envdir}
58 |
59 | # GNU scientific library
60 | cd ${testdir}
61 | tar xvzf gsl-latest.tar.gz
62 | cd gsl-1.16
63 | export LDFLAGS=${_OLD_LDFLAGS}
64 | ./configure --prefix=${envdir}
65 | make
66 | make install
67 |
68 | # fastStructure
69 | # Extract tarball, enter src dir, build binary and place it in the env dir
70 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${envdir}/lib
71 | export CFLAGS="-I${envdir}/include"
72 | export LDFLAGS="-L${envdir}/lib"
73 | cd ${testdir}
74 | tar xvfz v1.0.tar.gz
75 | cd fastStructure-1.0
76 | cd vars
77 | python2 setup.py build_ext --inplace
78 | cd ..
79 | python2 setup.py build_ext --inplace
80 |
81 | # Dataset
82 | cd ${testdir}
83 | tar xvfJ BigTestData.str.tar.xz
84 |
85 | # Structure_threader
86 | cd ${testdir}
87 | tar xfvz v0.1-rc3.tar.gz
88 |
89 | # Create launcher script
90 |
91 | echo "#!/bin/bash
92 | workdir=$(pwd)
93 | cd Structure_threader-0.1-rc3/
94 | python3 structure_threader.py \$@ -fs \${workdir}/fastStructure-1.0/structure.py > \$LOG_FILE 2>&1
95 | " > structure_threader-faststructure
96 | chmod +x structure_threader-faststructure
97 |
--------------------------------------------------------------------------------
/PTS/structure_threader-faststructure/results-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sys.time
6 |
7 |
8 |
--------------------------------------------------------------------------------
/PTS/structure_threader-faststructure/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed wrapped fastStructure runs for 16 Ks
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "fastStructure" algorithm. Will calculate clusterings for 16 values of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.1
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-utils, python
21 | 341
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../BigTestData -o ../ -K 16 --no-plots 1 --no-tests 1
28 |
29 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure/downloads.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz
7 | ca5c134d8fc24760fd47d730adfde892
8 | 235897
9 | v0.1-rc3.tar.gz
10 |
11 |
12 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz
13 | 4e0591678cdbfe79347d272b5dceeda1
14 | 66375
15 |
16 |
17 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure
18 | 1665111727e05d4ba3eea253a986c374
19 | 5317
20 |
21 |
22 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams
23 | 4e41f743c66c3dcd926463a831ab1cda
24 | 2596
25 |
26 |
27 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams
28 | c4f5fe55b6a496b8c55a0538fcc173d7
29 | 6257
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure
28 | chmod +x structure_threader-structure
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure/results-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sys.time
6 |
7 |
8 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 wrapped Structure runs
6 | 0.1-rc3
7 | Bayesian calculation of population attribution, using the "Structure" algorithm. Calculates clusterings for 4 different values of "K" with 4 replciates for each "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 1.0.1
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-utils, python
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 --no-plots 1 --no-tests 1
28 |
29 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_1/downloads.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | https://github.com/StuntsPT/Structure_threader/archive/v0.1-rc3.tar.gz
7 | ca5c134d8fc24760fd47d730adfde892
8 | 235897
9 | v0.1-rc3.tar.gz
10 |
11 |
12 | http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz
13 | 4e0591678cdbfe79347d272b5dceeda1
14 | 66375
15 |
16 |
17 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/Reduced_dataset.structure
18 | 1665111727e05d4ba3eea253a986c374
19 | 5317
20 |
21 |
22 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/mainparams
23 | 4e41f743c66c3dcd926463a831ab1cda
24 | 2596
25 |
26 |
27 | https://raw.githubusercontent.com/StuntsPT/Structure_threader/master/PTS/data/extraparams
28 | c4f5fe55b6a496b8c55a0538fcc173d7
29 | 6257
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_1/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_1
28 | chmod +x structure_threader-structure_1
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_1/results-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sys.time
6 |
7 |
8 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_1/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on a single thread
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 1 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_16/downloads.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/downloads.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_16/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_16
28 | chmod +x structure_threader-structure_16
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_16/results-definition.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/results-definition.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_16/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on 16 threads
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 16 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_2/downloads.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/downloads.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_2/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_2
28 | chmod +x structure_threader-structure_2
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_2/results-definition.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/results-definition.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_2/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on 2 threads
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 2 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_4/downloads.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/downloads.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_4/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_4
28 | chmod +x structure_threader-structure_4
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_4/results-definition.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/results-definition.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_4/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on 4 threads
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 4 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_6/downloads.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/downloads.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_6/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_6
28 | chmod +x structure_threader-structure_6
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_6/results-definition.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/results-definition.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_6/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on 6 threads
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 6 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_8/downloads.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/downloads.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_8/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Define test dir
4 | testdir=$(pwd)
5 |
6 | # Create env dir for dependent libraries
7 | mkdir -p ${testdir}/env
8 |
9 | # Install dependencies
10 | # Structure
11 | cd ${testdir}
12 | tar -zxvf structure_kernel_source.tar.gz
13 | cd structure_kernel_src
14 | make
15 |
16 | # Install Structure_threder
17 | # Structure_threader
18 | cd ${testdir}
19 | tar xfvz v0.1-rc3.tar.gz
20 |
21 | # Create launcher script
22 |
23 | echo "#!/bin/bash
24 | workdir=$(pwd)
25 | cd Structure_threader-0.1-rc3/
26 | python3 structure_threader.py \$@ -st \${workdir}/structure_kernel_src/structure > \$LOG_FILE 2>&1
27 | " > structure_threader-structure_8
28 | chmod +x structure_threader-structure_8
29 |
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_8/results-definition.xml:
--------------------------------------------------------------------------------
1 | ../structure_threader-structure_1/results-definition.xml
--------------------------------------------------------------------------------
/PTS/structure_threader-structure_8/test-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Timed 16 Structure runs on 8 threads
6 | 0.1-rc3
7 | Multi threaded bayesian calculation of population attribution, using the "Structure" algorithm. Will calculate clusterings for 4 values of "K" with 4 replciates for each value of "K".
8 | Seconds
9 | LIB
10 | Cork Oak clustering
11 | 3
12 |
13 |
14 | 0.2.0
15 | Linux, Solaris, MacOSX, BSD
16 | Scientific
17 | Processor
18 | Free
19 | Verified
20 | build-essential
21 | 2
22 | http://github.com/StuntsPT/Structure_threader
23 | Francisco Pina-Martins
24 |
25 |
26 |
27 | -i ../Reduced_dataset.structure -o ../ -K 4 -R 4 -t 8 -st \${workdir}/structure_kernel_src/structure --no-plots 1 --no-tests 1
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Structure_threader
2 | A program to parallelize the runs of [Structure](http://web.stanford.edu/group/pritchardlab/structure.html), [fastStructure](https://rajanil.github.io/fastStructure/), [MavericK](http://www.bobverity.com/home/maverick/what-is-maverick/) and [ALStructure](https://github.com/StoreyLab/alstructure) software.
3 |
4 | [](https://travis-ci.org/StuntsPT/Structure_threader) [](http://structure-threader.readthedocs.io/en/latest/?badge=latest)
5 | [](https://zenodo.org/badge/latestdoi/31598374)
6 | [](https://www.codacy.com/gh/StuntsPT/Structure_threader/dashboard?utm_source=github.com&utm_medium=referral&utm_content=StuntsPT/Structure_threader&utm_campaign=Badge_Grade)
7 | [](https://gitlab.com/StuntsPT/Structure_threader/pipelines)
8 |
9 |
10 | ## Installation
11 |
12 | ```bash
13 | pip3 install structure_threader
14 | ```
15 |
16 | *Structure_threader* is available on
17 | [Pypi](https://pypi.python.org/pypi/structure_threader/). It can be
18 | installed by simply running the above command. If you are on a \*nix like
19 | platform, you can use the `--user` option if you can't or don't want to install
20 | the program as `root` user. Binaries for STRUCTURE, fastStructure and
21 | *MavericK* are also distributed for GNU/Linux and Mac OS X. For more details,
22 | please [check the
23 | manual](http://structure-threader.readthedocs.io/en/latest/install/).
24 |
25 |
26 | ## Manual
27 | The complete documentation can be found on [readthedocs.org](http://structure-threader.readthedocs.io/en/latest/).
28 |
29 |
30 | ## Citation
31 | If you use *Structure_threader*, please cite our
32 | ~~[Zenodo DOI](https://zenodo.org/badge/latestdoi/31598374).~~
33 | [Molecular Ecology Resources paper](http://doi.org/10.1111/1755-0998.12702)
34 |
35 | ### Full citation:
36 |
37 |
Pina-Martins, F., Silva, D. N., Fino, J., & Paulo, O. S. (2017). Structure_threader: An improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular Ecology Resources, n/a-n/a. doi:10.1111/1755-0998.12702
38 |
39 |
40 |
41 | ## License
42 | GPLv3
43 |
--------------------------------------------------------------------------------
/TestData/BigTestData.str.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/TestData/BigTestData.str.tar.xz
--------------------------------------------------------------------------------
/TestData/README.md:
--------------------------------------------------------------------------------
1 | ../docs/test_data.md
--------------------------------------------------------------------------------
/TestData/extraparams:
--------------------------------------------------------------------------------
1 |
2 | EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE
3 | PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE
4 | SPECIFIED IN mainparams.
5 |
6 | "(int)" means that this takes an integer value.
7 | "(d)" means that this is a double (ie, a Real number such as 3.14).
8 | "(B)" means that this variable is Boolean
9 | (ie insert 1 for True, and 0 for False).
10 |
11 | PROGRAM OPTIONS
12 |
13 | #define NOADMIX 0 // (B) Use no admixture model (0=admixture model, 1=no-admix)
14 | #define LINKAGE 0 // (B) Use the linkage model model
15 | #define USEPOPINFO 0 // (B) Use prior population information to pre-assign individuals
16 | to clusters
17 | #define LOCPRIOR 0 //(B) Use location information to improve weak data
18 |
19 | #define FREQSCORR 0 // (B) allele frequencies are correlated among pops
20 | #define ONEFST 0 // (B) assume same value of Fst for all subpopulations.
21 |
22 | #define INFERALPHA 1 // (B) Infer ALPHA (the admixture parameter)
23 | #define POPALPHAS 0 // (B) Individual alpha for each population
24 | #define ALPHA 1.0 // (d) Dirichlet parameter for degree of admixture
25 | (this is the initial value if INFERALPHA==1).
26 |
27 | #define INFERLAMBDA 0 // (B) Infer LAMBDA (the allele frequencies parameter)
28 | #define POPSPECIFICLAMBDA 0 //(B) infer a separate lambda for each pop
29 | (only if INFERLAMBDA=1).
30 | #define LAMBDA 1.0 // (d) Dirichlet parameter for allele frequencies
31 |
32 |
33 |
34 |
35 | PRIORS
36 |
37 | #define FPRIORMEAN 0.01 // (d) Prior mean and SD of Fst for pops.
38 | #define FPRIORSD 0.05 // (d) The prior is a Gamma distribution with these parameters
39 |
40 | #define UNIFPRIORALPHA 1 // (B) use a uniform prior for alpha;
41 | otherwise gamma prior
42 | #define ALPHAMAX 10.0 // (d) max value of alpha if uniform prior
43 | #define ALPHAPRIORA 1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma
44 | prior with mean A*B, and
45 | #define ALPHAPRIORB 2.0 // variance A*B^2.
46 |
47 |
48 | #define LOG10RMIN -4.0 //(d) Log10 of minimum allowed value of r under linkage model
49 | #define LOG10RMAX 1.0 //(d) Log10 of maximum allowed value of r
50 | #define LOG10RPROPSD 0.1 //(d) standard deviation of log r in update
51 | #define LOG10RSTART -2.0 //(d) initial value of log10 r
52 |
53 |
54 | USING PRIOR POPULATION INFO (USEPOPINFO)
55 |
56 | #define GENSBACK 2 //(int) For use when inferring whether an indiv-
57 | idual is an immigrant, or has an immigrant an-
58 | cestor in the past GENSBACK generations. eg, if
59 | GENSBACK==2, it tests for immigrant ancestry
60 | back to grandparents.
61 | #define MIGRPRIOR 0.01 //(d) prior prob that an individual is a migrant
62 | (used only when USEPOPINFO==1). This should
63 | be small, eg 0.01 or 0.1.
64 | #define PFROMPOPFLAGONLY 0 // (B) only use individuals with POPFLAG=1 to update P.
65 | This is to enable use of a reference set of
66 | individuals for clustering additional "test"
67 | individuals.
68 |
69 | LOCPRIOR MODEL FOR USING LOCATION INFORMATION
70 |
71 | #define LOCISPOP 0 //(B) use POPDATA for location information
72 | #define LOCPRIORINIT 1.0 //(d) initial value for r, the location prior
73 | #define MAXLOCPRIOR 20.0 //(d) max allowed value for r
74 |
75 |
76 |
77 |
78 | OUTPUT OPTIONS
79 |
80 | #define PRINTNET 1 // (B) Print the "net nucleotide distance" to screen during the run
81 | #define PRINTLAMBDA 1 // (B) Print current value(s) of lambda to screen
82 | #define PRINTQSUM 1 // (B) Print summary of current population membership to screen
83 |
84 | #define SITEBYSITE 0 // (B) whether or not to print site by site results.
85 | (Linkage model only) This is a large file!
86 | #define PRINTQHAT 0 // (B) Q-hat printed to a separate file. Turn this
87 | on before using STRAT.
88 | #define UPDATEFREQ 100 // (int) frequency of printing update on the screen.
89 | Set automatically if this is 0.
90 | #define PRINTLIKES 0 // (B) print current likelihood to screen every rep
91 | #define INTERMEDSAVE 0 // (int) number of saves to file during run
92 |
93 | #define ECHODATA 1 // (B) Print some of data file to screen to check
94 | that the data entry is correct.
95 | (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)
96 | #define ANCESTDIST 0 // (B) collect data about the distribution of an-
97 | cestry coefficients (Q) for each individual
98 | #define NUMBOXES 1000 // (int) the distribution of Q values is stored as
99 | a histogram with this number of boxes.
100 | #define ANCESTPINT 0.90 // (d) the size of the displayed probability
101 | interval on Q (values between 0.0--1.0)
102 |
103 |
104 |
105 | MISCELLANEOUS
106 |
107 | #define COMPUTEPROB 1 // (B) Estimate the probability of the Data under
108 | the model. This is used when choosing the
109 | best number of subpopulations.
110 | #define ADMBURNIN 500 // (int) [only relevant for linkage model]:
111 | Initial period of burnin with admixture model (see Readme)
112 | #define ALPHAPROPSD 0.025 // (d) SD of proposal for updating alpha
113 | #define STARTATPOPINFO 0 // Use given populations as the initial condition
114 | for population origins. (Need POPDATA==1). It
115 | is assumed that the PopData in the input file
116 | are between 1 and k where k<=MAXPOPS.
117 | #define RANDOMIZE 1 // (B) use new random seed for each run
118 | #define SEED 0 // (int) seed value for random number generator
119 | (must set RANDOMIZE=0)
120 | #define METROFREQ 10 // (int) Frequency of using Metropolis step to update
121 | Q under admixture model (ie use the metr. move every
122 | i steps). If this is set to 0, it is never used.
123 | (Proposal for each q^(i) sampled from prior. The
124 | goal is to improve mixing for small alpha.)
125 | #define REPORTHITRATE 0 // (B) report hit rate if using METROFREQ
126 |
--------------------------------------------------------------------------------
/TestData/joblist.txt:
--------------------------------------------------------------------------------
1 | T1 1 1 50000 1000000
2 | T2 1 1 50000 1000000
3 | T3 1 1 50000 1000000
4 | T4 1 1 50000 1000000
5 | T5 1 2 50000 1000000
6 | T6 1 2 50000 1000000
7 | T7 1 2 50000 1000000
8 | T8 1 2 50000 1000000
9 | T9 1 3 50000 1000000
10 | T10 1 3 50000 1000000
11 | T11 1 3 50000 1000000
12 | T12 1 3 50000 1000000
13 | T13 1 4 50000 1000000
14 | T14 1 4 50000 1000000
15 | T15 1 4 50000 1000000
16 | T16 1 4 50000 1000000
17 |
--------------------------------------------------------------------------------
/TestData/mainparams:
--------------------------------------------------------------------------------
1 |
2 | KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE
3 | IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE
4 | FILE extraparams.
5 |
6 |
7 | "(int)" means that this takes an integer value.
8 | "(B)" means that this variable is Boolean
9 | (ie insert 1 for True, and 0 for False)
10 | "(str)" means that this is a string (but not enclosed in quotes!)
11 |
12 |
13 | Basic Program Parameters
14 |
15 | #define MAXPOPS KKK // (int) number of populations assumed
16 | #define BURNIN 50000 // (int) length of burnin period
17 | #define NUMREPS 1000000 // (int) number of MCMC reps after burnin
18 |
19 | Input/Output files
20 |
21 | #define INFILE /home/francisco/structure/sobreiro_snps_structure_bin_pop.input // (str) name of input data file
22 | #define OUTFILE outfile //(str) name of output data file
23 |
24 | Data file format
25 |
26 | #define NUMINDS 100 // (int) number of diploid individuals in data file
27 | #define NUMLOCI 80 // (int) number of loci in data file
28 | #define PLOIDY 2 // (int) ploidy of data
29 | #define MISSING -9 // (int) value given to missing genotype data
30 | #define ONEROWPERIND 0 // (B) store data for individuals in a single line
31 |
32 |
33 | #define LABEL 1 // (B) Input file contains individual labels
34 | #define POPDATA 1 // (B) Input file contains a population identifier
35 | #define POPFLAG 0 // (B) Input file contains a flag which says
36 | whether to use popinfo when USEPOPINFO==1
37 | #define LOCDATA 0 // (B) Input file contains a location identifier
38 |
39 | #define PHENOTYPE 0 // (B) Input file contains phenotype information
40 | #define EXTRACOLS 0 // (int) Number of additional columns of data
41 | before the genotype data start.
42 |
43 | #define MARKERNAMES 1 // (B) data file contains row of marker names
44 | #define RECESSIVEALLELES 0 // (B) data file contains dominant markers (eg AFLPs)
45 | // and a row to indicate which alleles are recessive
46 | #define MAPDISTANCES 0 // (B) data file contains row of map distances
47 | // between loci
48 |
49 |
50 | Advanced data file options
51 |
52 | #define PHASED 0 // (B) Data are in correct phase (relevant for linkage model only)
53 | #define PHASEINFO 0 // (B) the data for each individual contains a line
54 | indicating phase (linkage model)
55 | #define MARKOVPHASE 0 // (B) the phase info follows a Markov model.
56 | #define NOTAMBIGUOUS -999 // (int) for use in some analyses of polyploid data
57 |
58 |
59 |
60 | Command line options:
61 |
62 | -m mainparams
63 | -e extraparams
64 | -s stratparams
65 | -K MAXPOPS
66 | -L NUMLOCI
67 | -N NUMINDS
68 | -i input file
69 | -o output file
70 | -D SEED
71 |
--------------------------------------------------------------------------------
/TestData/mav_benchmark_parameters.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 16
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 1.0
15 | alphaPropSD 0.10
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 2500
22 | mainSamples 10000
23 |
24 | thermodynamic_on t
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 200
27 | thermodynamicSamples 1000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 | outputRoot default/
41 |
--------------------------------------------------------------------------------
/TestData/parameters.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 4
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 1.0
15 | alphaPropSD 0.10
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 500
22 | mainSamples 4000
23 |
24 | thermodynamic_on t
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 1000
27 | thermodynamicSamples 5000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 |
--------------------------------------------------------------------------------
/benchmarks/ParallelStructure_runner.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/Rscript
2 | # Copyright 2016 Francisco Pina Martins
3 | # This file is part of Structure_threader.
4 | # Structure_threader is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # Structure_threader is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 |
14 | # You should have received a copy of the GNU General Public License
15 | # along with Structure_threader. If not, see .
16 |
17 | # Usage: Rscript ParallelStructure_runer.R "number_of_threads_to_use"
18 |
19 | library(ParallelStructure)
20 |
21 | ## Define variables
22 | # Get number of threads from CLI
23 | args <- commandArgs(trailingOnly = TRUE)
24 |
25 | # Joblist location
26 | joblist_location = "/home/francisco/bench/joblist.txt"
27 | # Location of STRUCUTRE binary
28 | structure_bin = "/opt/structure/bin/"
29 | # Infile location
30 | infile_path = "/home/francisco/bench/SmallTestData.structure"
31 | # Outfile location
32 | outfile_path = "Results/" # Yes! This C**p program takes arguments in both full and relative path simultaneously!
33 | # Number of individuals
34 | n_inds = 100
35 | #Number of loci
36 | n_loci = 80
37 |
38 | parallel_structure(joblist = joblist_location, n_cpu = args[1],
39 | structure_path = structure_bin, infile = infile_path,
40 | outpath = outfile_path, numinds = n_inds, numloci = n_loci,
41 | plot_output = 0, label = 1, popdata = 1, popflag = 0,
42 | locdata = 0, phenotypes = 0, markernames = 1,
43 | mapdist = 0, onerowperind = 0, phaseinfo = 0,
44 | recessivealleles = 0, phased = 0, extracol = 0, missing = -9,
45 | ploidy = 2, noadmix = 0, linkage = 0, usepopinfo = 0,
46 | locprior = 0, inferalpha = 1, alpha = 1, popalphas = 0,
47 | unifprioralpha = 1, alphamax = 10, alphapropsd = 0.025,
48 | freqscorr = 0, onefst = 0, fpriormean = 0.01,
49 | fpriorsd = 0.05, inferlambda = 0, lambda = 1,
50 | computeprob = 1, pfromflagonly = 0, ancestdist = 0,
51 | startatpopinfo = 0, metrofreq = 10, updatefreq = 100,
52 | printqhat = 0,revert_convert=0, randomize=1)
53 |
--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
1 | # Benchmarks for *Structure_threader*
2 |
3 | In this directory you will find the files used for benchmarking the single threaded runs of both STRUCTURE and fastStructure, as well as some results.
4 | The scripts to draw the speedup plots and the barplots can be found here as well.
5 |
6 |
7 | ## Contents:
8 |
9 | * benchmark.sh
10 | * benchmark_fast.sh
11 | * speedup_plotter.py
12 | * bar_plotter.py
13 |
14 |
15 | ### benchmark.sh
16 |
17 | This is a [Zsh](http://www.zsh.org/) script to run STRUCTURE sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4).
18 | It does not log the runs, nor the results (everything is written into the same file).
19 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run.
20 |
21 |
22 | ### benchmark_fast.sh
23 |
24 | This is a [Zsh](http://www.zsh.org/) script to run fastStructure sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4).
25 | It does not log the runs, nor the results (everything is written into the same file).
26 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run.
27 |
28 |
29 | ### speedup_plotter.py
30 |
31 | This is the python script that was used to create the speedup plots for the generated data.
32 |
33 | ### bar_plotter.py
34 |
35 | This is the python script that was used to create the bar plots for the single threaded vs. multi-threaded run times.
36 |
--------------------------------------------------------------------------------
/benchmarks/Side_by_side.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/benchmarks/Side_by_side.png
--------------------------------------------------------------------------------
/benchmarks/bar_plotter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2015 Francisco Pina Martins
4 | # This file is part of speedup_plotter.
5 | # speedup_plotter is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # speedup_plotter is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with speedup_plotter. If not, see .
17 |
18 | import matplotlib.pyplot as plt
19 | import numpy
20 |
21 | from speedup_plotter import data_harverster
22 |
23 |
24 | def draw_bar_plot(dataframes):
25 | """
26 | Draws a bar plot with the different times for single vs. multiple
27 | threads implementations."""
28 |
29 | N = len(dataframes[:, 0])
30 | single_times = dataframes[:, 1]
31 | threaded_times = dataframes[:, 2]
32 |
33 | locs = numpy.arange(N) # the x locations for the groups
34 |
35 | width = 0.35 # the width of the bars
36 |
37 | fig, ax = plt.subplots()
38 | rects1 = ax.bar(locs, single_times, width, color='grey')
39 |
40 | rects2 = ax.bar(locs+width, threaded_times, width, color='darkgrey')
41 |
42 | # add some text for labels, title and axes ticks
43 | ax.set_ylabel('Time (s)')
44 | ax.set_title('Time to calculate clustering for each value of "K", single, '
45 | 'vs. multiple threading')
46 | ax.set_xticks(locs+width)
47 |
48 | ax.set_xticklabels(list(map(int, dataframes[:, 0])))
49 |
50 | ax.legend((rects1[0], rects2[0]), ('Single thread', '8 threads'), loc="upper left")
51 |
52 | ax.grid(True, zorder=0)
53 |
54 | plt.savefig(argv[1] + "_plot.svg", format="svg")
55 |
56 | if __name__ == "__main__":
57 | from sys import argv
58 | # Usage: python3 bar_plotter.py K_times.csv
59 | dataframes = data_harverster(argv[1])
60 | draw_bar_plot(dataframes)
61 |
--------------------------------------------------------------------------------
/benchmarks/benchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | ks=( 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 )
4 |
5 | for i in $ks
6 | do
7 | /opt/structure/bin/structure -K $i -i TestData.structure -o bench.txt
8 | done
9 |
10 |
--------------------------------------------------------------------------------
/benchmarks/benchmark_fast.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | ks=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 )
4 |
5 | datafile=BigTestData
6 |
7 | for i in ${ks}
8 | do
9 | echo "Currently running K=${i}."
10 | /usr/bin/time -f %E python2 ~/Software/fastStructure/structure.py -K $i --input=../TestData/${datafile} --output=../TestData/${datafile}_out --format=str
11 | done
12 |
--------------------------------------------------------------------------------
/benchmarks/speedup_plotter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2015 Francisco Pina Martins
4 | # This file is part of speedup_plotter.
5 | # speedup_plotter is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # speedup_plotter is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with speedup_plotter. If not, see .
17 |
18 | import matplotlib.pyplot as plt
19 | import numpy
20 |
21 | def data_harverster(datafile_name):
22 | """Gather speedup data from a csv file and return a np array with it."""
23 | timearray = numpy.genfromtxt(datafile_name, delimiter = ";", autostrip=True,
24 | dtype=float, skip_header=True,
25 | filling_values=False)
26 |
27 | return timearray
28 |
29 |
30 | def draw_plot(timearray):
31 | """Draw a line plot based on the speedup data."""
32 | i7 = list(timearray[:, 1][:-4])
33 | i7.insert(0, 1)
34 | e5 = list(timearray[:, 2][:-4])
35 | e5.insert(0, 1)
36 | oldxeon = list(timearray[:, 3])
37 | oldxeon.insert(0, 1)
38 | i5 = list(timearray[:, 4][:-6])
39 | i5.insert(0, 1)
40 |
41 | plt.axis([0, 16, 0, 16])
42 | plt.plot([1, 2, 4, 6, 8], i7, 'k-v', fillstyle="full", ms=7,
43 | label="i7-4700MQ")
44 | plt.plot([1, 2, 4], i5, 'k-^', fillstyle="full", ms=7, label="i5-3350P")
45 | plt.plot([1, 2, 4, 6, 8, 10, 12, 14, 16], oldxeon, 'k-x', fillstyle="full",
46 | ms=7, label="E5520")
47 | plt.plot([1, 2, 4, 6, 8], e5, 'k+-', fillstyle="full", ms=7,
48 | label="E5-2609")
49 |
50 | plt.plot(range(16), range(16), 'k-.', label="Linear scaling")
51 |
52 | plt.grid(True)
53 | plt.xlabel("Number of threads")
54 | plt.ylabel("Speed increase")
55 | plt.legend(loc=2, fontsize="small")
56 | plt.savefig(argv[1] + "_plot.svg", format="svg")
57 | #plt.show()
58 |
59 | if __name__ == "__main__":
60 | from sys import argv
61 | timearray = data_harverster(argv[1])
62 | draw_plot(timearray)
63 |
--------------------------------------------------------------------------------
/benchmarks/system_speedup_plotter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2015-2017 Francisco Pina Martins
4 | # This file is part of speedup_plotter.
5 | # speedup_plotter is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # speedup_plotter is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with speedup_plotter. If not, see .
17 |
18 | import matplotlib.pyplot as plt
19 | import numpy
20 |
21 | def data_harverster(datafile_name):
22 | """
23 | Gather speedup data from a csv file and return a np array with it.
24 | """
25 | timearray = numpy.genfromtxt(datafile_name, delimiter=";", autostrip=True,
26 | dtype=float, skip_header=False, names=True,
27 | filling_values=False)
28 |
29 | return timearray
30 |
31 |
32 | def draw_plot(timearray):
33 | """
34 | Draw a line plot based on speedup data.
35 | """
36 | system_cores = max(map(int, timearray["CPUs"]))
37 | names = [x for x in timearray.dtype.names if x != "CPUs"]
38 | linetypes = ("k-", "k:", "k--")
39 | lines = {k: v for k, v in zip(names, linetypes)}
40 | plt.axis([1, system_cores + 1, 1, system_cores + 1])
41 | for name in names:
42 | plt.plot(list(map(int, timearray["CPUs"])), timearray[name],
43 | lines[name],
44 | fillstyle="full", ms=7, label=name)
45 | plt.plot(range(1, system_cores + 2), range(1, system_cores + 2), 'k-.',
46 | label="Linear scaling")
47 |
48 | plt.grid(True)
49 | plt.xlabel("Number of threads")
50 | plt.ylabel("Speed increase")
51 | plt.xticks(list(map(int, timearray["CPUs"])))
52 | plt.legend(loc=2, fontsize="small")
53 | plt.savefig(argv[1] + "_plot.svg", format="svg")
54 | #plt.show()
55 |
56 | if __name__ == "__main__":
57 | from sys import argv
58 | TIMEARRAY = data_harverster(argv[1])
59 | draw_plot(TIMEARRAY)
60 |
--------------------------------------------------------------------------------
/docs/benchmark.md:
--------------------------------------------------------------------------------
1 | # Benchmarking process
2 |
3 | You can find some of the scripts used for the benchmarking process inside the [*benchmarks* directory](https://github.com/StuntsPT/Structure_threader/tree/master/benchmarks). Inside this directory you will find the files used for benchmarking the single threaded runs of both STRUCTURE and *fastStructure*, as well as some results.
4 | The scripts to draw the speedup plots and the barplots can be found there as well.
5 | You will also find relevant documentation, which is reproduced here.
6 |
7 |
8 | ## Directory contents:
9 |
10 | * benchmark.sh
11 | * benchmark_fast.sh
12 | * speedup_plotter.py
13 | * bar_plotter.py
14 |
15 |
16 | ### benchmark.sh
17 |
18 | This is a [Zsh](http://www.zsh.org/) script to run STRUCTURE sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4).
19 | It does not log the runs, nor the results (everything is written into the same file).
20 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run.
21 |
22 |
23 | ### benchmark_fast.sh
24 |
25 | This is a [Zsh](http://www.zsh.org/) script to run fastStructure sequentially for 16 jobs, 4 jobs for each value of "K" (from 1 to 4).
26 | It does not log the runs, nor the results (everything is written into the same file).
27 | It was used with the Unix [time](http://linux.die.net/man/1/time) program to log the time it took to run.
28 |
29 |
30 | ### speedup_plotter.py
31 |
32 | This is the python script that was used to create the speedup plots for the generated data.
33 |
34 |
35 | ### bar_plotter.py
36 |
37 | This is the python script that was used to create the bar plots for the single threaded vs. multi-threaded run times.
38 |
--------------------------------------------------------------------------------
/docs/binaries.md:
--------------------------------------------------------------------------------
1 | # Binary building
2 |
3 | For your conveninence, we have pre-build binaries of STRUCTURE and
4 | fastStructure. They are provided with the package under
5 | `structure_threader/bins/$platform/`.
6 | Here is how they were built.
7 |
8 | ## The build system
9 |
10 | ### GNU/Linux binaries
11 | Binaries were built on a machine with an Intel Xeon E5-2609 0 @ 2.40GHz CPU.
12 | The OS under which the binaries were built is Ubuntu 12.04 64bit. This "old"
13 | OS was used since linux systems have backwards, but not forwards compatibility.
14 | This means that binaries built on older systems will run on newer systems, but
15 | the opposite may not be true.
16 |
17 | ### OSX binaries
18 | Binaries were build on Mid 2013 MacBook Air with an "Haswell" based i5 CPU, running OSX 10.10 Yosemite.
19 | They should be forward compatible with later OSX releases.
20 |
21 | ## STRUCTURE
22 |
23 | STRUCTURE is relatively simple to build. Source code can be obtained from the
24 | [STRUCTURE website](http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz). To build the binary, we used our helper
25 | script "install_structure.sh". The binary version is 2.3.4.
26 |
27 |
28 | ## fastStructure
29 |
30 | fastStructure requires a more involved process to build as it requires many
31 | dependencies. It can be obtained from
32 | [it's own github repository](https://github.com/rajanil/fastStructure).
33 | Although fastStructure is written in python 2, it uses compiled code, by making
34 | use of `cython`.
35 | To build the binary, we have insatlled fastStructure using our helper script
36 | "install_faststructure.sh" and then we have used
37 | [pyinstaller](http://www.pyinstaller.org/) to turn it into a binary. The used
38 | "specfile" can be found [here](https://github.com/StuntsPT/Structure_threader/tree/master/helper_scripts/structure.spec) which contains all the required information to reproduce the
39 | build. The binary version is 1.0.
40 |
--------------------------------------------------------------------------------
/docs/citation.md:
--------------------------------------------------------------------------------
1 | # Citation
2 |
3 | If you use *Structure_threader*, please cite:
4 |
5 | ~~[Zenodo DOI](https://zenodo.org/badge/latestdoi/31598374).~~
6 | [Molecular Ecology Resources paper](http://doi.org/10.1111/1755-0998.12702)
7 |
8 | ### Full citation:
9 |
10 |
Pina-Martins, F., Silva, D. N., Fino, J., & Paulo, O. S. (2017). Structure_threader: An improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular Ecology Resources, n/a-n/a. doi:10.1111/1755-0998.12702
11 |
12 |
13 |
14 | If you used STRUCTURE, please cite:
15 |
16 | * [Pritchard JK, Stephens M, Donnelly P (2000) Inference of population structure using multilocus genotype data. Genetics, 155, 945–959.](http://www.genetics.org/content/155/2/945)
17 |
18 | If you used fastStructure, please cite:
19 |
20 | * [Raj A, Stephens M, Pritchard JK (2014) fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets. Genetics, 197, 573–589.](http://www.genetics.org/content/197/2/573.long)
21 |
22 | If you used the evanno test module, please cite:
23 |
24 | * [Earl DA, vonHoldt BM (2012) STRUCTURE HARVESTER: a website and program for visualizing STRUCTURE output and implementing the Evanno method. Conservation Genetics Resources, 4, 359–361.](http://link.springer.com/article/10.1007%2Fs12686-011-9548-7)
25 |
26 | If you used MavericK, please cite:
27 |
28 | * [Verity, R & Nichols, R.A (2016). Estimating the number of subpopulations (K) in structured populations. Genetics 203.4, 1827-1839.](https://doi.org/10.1534/genetics.115.180992 )
29 |
--------------------------------------------------------------------------------
/docs/external.md:
--------------------------------------------------------------------------------
1 | # Manually installing external programs
2 |
3 | If you wish to compile your own binaries for these programs, you may wish to
4 | rely on our
5 | ["helper_scripts"](https://github.com/StuntsPT/Structure_threader/tree/master/helper_scripts)
6 | which contain commands to compile and install *MavericK*, *Structure* **and**
7 | *fastStructure* (along with any required dependencies). For more details check
8 | the next few sections.
9 |
10 | ## Structure_threader helper scripts
11 | The directory "helper_scripts" contains three scripts that will install *STRUCTURE*, *fastStructre* and *MavericK* respectively in a *semi* automatic way.
12 |
13 | All scripts default the programs' install locations to ~/Software/. You can change this in the scripts themselves should you wish to change this location.
14 |
15 |
16 | ### install_structure.sh
17 | This script will download and install STRUCTURE.
18 |
19 |
20 | #### Requirements:
21 | * a C compiler, such as GCC, with fortran support.
22 | * Cmake is required to build LAPACK
23 |
24 | This should be available in every HPC environment.
25 |
26 | In Ubuntu, all you should need is the package "build-essential" (if it is not
27 | already installed for some reason). It can be installed like this:
28 |
29 | ```
30 | sudo apt-get install build-essential
31 | ```
32 |
33 | In other distros, the package name should be similar.
34 |
35 | ### install_faststructure.sh
36 | This script will download and install fastStructure and its dependencies.
37 |
38 | fastStructure depends on quite a few software packages:
39 | * cython
40 | * numpy
41 | * scipy
42 | * GNU scientific library
43 |
44 | If these are already installed in your system, feel free to comment the script
45 | section that will install them. Otherwise it will install a new local copy of
46 | these programs. You can install these packages in Ubuntu with the following
47 | command:
48 |
49 | ```
50 | sudo apt-get install cython python-numpy python-scipy gsl-bin
51 | ```
52 |
53 | ### Important note:
54 | If you are relying on the GNU Scientific Library that was installed using the
55 | `install_faststructure` script, you will need to make your system aware of
56 | where these libraries are.
57 | for that, add the following to your `~/.bashrc`:
58 |
59 | ```bash
60 | LD_LIBRARY_PATH=$install_dir/lib
61 | export LD_LIBRARY_PATH
62 | ```
63 |
64 | Where `$install_dir` is the directory defined in `install_faststructure.sh`.
65 |
66 |
67 | ### install_maverick.sh
68 | This script will download, compile and install MavericK.
69 |
70 |
71 | #### Requirements:
72 | * a recent C compiler, such as GCC 6.1 and above.
73 |
74 | This should be available in every HPC environment.
75 |
76 | In Ubuntu, all you should need is the package "build-essential" (if it is not
77 | already installed for some reason). It can be installed like this:
78 |
79 | ```
80 | sudo apt-get install build-essential
81 | ```
82 |
83 | In other distros, the package name should be similar.
84 |
--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
1 | #FAQ
2 |
3 | Nothing so far. Will be updated as questions start to arise.
4 |
--------------------------------------------------------------------------------
/docs/future.md:
--------------------------------------------------------------------------------
1 | # Future Plans
2 | Here is a list of the features that are currently planned to implement in *Structure_threader*.
3 |
4 | * ~~Add a "setup.py" installation process~~
5 | * ~~Add unit tests~~
6 | * ~~Allow the fastStructure wrapper to use the ".bed" format too.~~
7 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # *Structure_threader*
2 |
3 | ## Description
4 |
5 | A program to parallelize and automate the runs of [Structure](http://web.stanford.edu/group/pritchardlab/structure.html), [fastStructure](https://rajanil.github.io/fastStructure/), [MavericK](http://www.bobverity.com/home/maverick/what-is-maverick/) and [ALStructure](https://github.com/StoreyLab/alstructure) software.
6 |
7 |
8 | ## Requirements
9 |
10 | Python 3. The main program only uses modules from the standard library.
11 | In order to draw the plots, matplotlib >= 1.4 is required (installed automatically as a dependency when installed via `pip`).
12 | To run "fastChooseK.py" (fastStructure wrapper only), numpy is also required (installed automatically as a dependency when installed via `pip`).
13 | In order to use "ALStructure", you need to have [R](https://www.r-project.org/) installed too (must be installed manually, as `pip` can't handle installing R or dependencies).
14 |
15 |
16 | ## Where to get it
17 |
18 | * Source code - [Structure_threader on gitlab](https://gitlab.com/StuntsPT/Structure_threader)
19 | * Source code - [Structure_threader on github](https://github.com/StuntsPT/Structure_threader)
20 | * Source distribution with platform binaries for wrapped programs - [Sturcture_threader on Pypi](https://pypi.python.org/pypi/structure_threader/)
21 | * You can easily install *Structure_threader* by issuing the command `pip3 install structure_threader`
22 |
23 |
24 | ## Contents
25 |
26 | * [Installation & dependencies](install.md)
27 | * [Binary building](binaries.md)
28 | * [Usage](usage.md)
29 | * [Output](output.md)
30 | * [Test Data](test_data.md)
31 | * [Benchmarking](benchmark.md)
32 | * [Citation](citation.md)
33 | * [Future Plans](future.md)
34 | * [FAQ](faq.md)
35 |
36 |
37 | ## A word of caution
38 |
39 | *Structure_threader* can be quite useful in automating and speeding up your analyses, however, in order to use it effectively you **really** should learn and understand how the wrapped programs work. It is **highly** recommended that you first learn to use the wrapped programs in their default implementations. And by "learning", we don't just mean "I know how to make it run.", but rather "I understand what each of the chosen parameters does, and why I selected each of them.".
40 | The paper [An overview of STRUCTURE: applications, parameter settings, and supporting software](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665925/) is an excellent guide for understanding the parameterization of *STRUCTURE*.
41 | We do not know of a good "tutorial" for learning about *fastStructure*, and as such, the [original research paper](http://www.genetics.org/content/197/2/573) (paywalled), albeit a bit dense, is still the best place to learn about it.
42 | The [documentation for *MavericK*](http://www.bobverity.com/home/maverick/additional-files/), for instance, is quite comprehensive and a great resource to learn to use *MavericK* and consequently about the importance of proper MCMC chain mixing.
43 |
44 |
45 | ## Other works
46 |
47 | The script "fastChooseK.py" was taken from [the original fastStructure repository](https://github.com/rajanil/fastStructure), ported to python 3, largely modified to work as a module for the main script and re-licensed as GPLv3.
48 |
49 | The scripts "harvesterCore.py" and "structureHarvester.py" were taken from [the original structureHarverster repository](https://github.com/dentearl/structureHarvester), ported to python 3, and slightly modified to work as a module for the main script. Please see the "Citation" part of the README to know what to cite, should you use this module.
50 |
51 | Binaries for [fastStructure](https://github.com/rajanil/fastStructure), [STRUCTURE](http://web.stanford.edu/group/pritchardlab/structure.html) and [MavericK](https://github.com/bobverity/MavericK) are distributed in the pypi hosted version.
52 |
53 |
54 | ## Bug reporting
55 |
56 | Found a bug or would like a feature added? Or maybe drop some feedback?
57 | Just [open a new issue on gitlab](https://gitlab.com/StuntsPT/Structure_threader/issues/new) [or on github](https://github.com/StuntsPT/Structure_threader/issues/new).
58 |
59 |
60 | ## License
61 |
62 | GPLv3
63 |
--------------------------------------------------------------------------------
/docs/output.md:
--------------------------------------------------------------------------------
1 | # Output
2 |
3 | The program will inform the user of what run is currently being processed by
4 | outputting the command it is running to STDOUT, such as this:
5 |
6 | ```
7 | Running: /opt/structure/bin/structure -K 1 -i input_file.structure -o results_admix/K1_rep10
8 | ```
9 |
10 | After each run, the corresponding output file is saved to the location chosen in
11 | the *Output dir* argument.
12 |
13 | When all tasks are performed the program will exit with the message:
14 | "All jobs finished."
15 | After these jobs are run, the program will use [Structure Harvester](http://taylor0.biology.ucla.edu/struct_harvest/) (or "fastChooseK.py" if wrapping *fastStructure*) to infer the optimal value of "K".
16 | Finally, the program will create plots with the inferred clustering, one for each calculated value of "K".
17 | A "Thermodynamic Integration" test will be performed to infer the bestK if using *MavericK*.
18 |
19 | ## Results
20 |
21 | After a successful run, inside the directory you selected as "output directory" (let's call it "My_results" for the sake of the example) you will find the following:
22 |
23 | * In the root of "My_results" you will find the "results files" outputted by the wrapped program. One file (directory, in the case of *MavericK*) for each replicate of "K".
24 | * Under "My_results/bestK" you will find either the results of the "Evanno test", the results of "fastChooseK.py", or the results of "Thermodynamic Integration" test, depending on what program was wrapped.
25 | * Under "My_results/plots" you will find one plot for each value of "K" in [SVG format](https://www.w3.org/Graphics/SVG/).
26 | * If logging was turned on, you will also find a detailed log file for each run in the root of "My_results".
27 |
28 | ## Obtaining STRUCTURE alpha values
29 |
30 | In order to obtain the alpha values of each STRUCTURE run, you can pass the option `--log 1` to *Strucutre_threader*. This will write a file named `K_rep.stlog`, where `knum` and `rnum` are the K value and replicate number respectively. This file contains the STDOUT generated by STRUCTURE. These can be used to plot alpha vs. iteration plots to help assess MCMC chain convergence.
31 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs==1.5.1
2 |
--------------------------------------------------------------------------------
/docs/test_data.md:
--------------------------------------------------------------------------------
1 | # Test Data for *Structure_threader*
2 | In [this directory](https://github.com/StuntsPT/Structure_threader/tree/master/TestData) you will find the data that was used to benchmark *Structure_threader*.
3 |
4 |
5 | ## BigTestData.str.tar.xz
6 | This file is a *fastStructure* formatted input file which was used to benchmark *fastStructure*. This is a large SNP file (1000 SNPs across 1000 individuals) which was obtained from the [1000 genomes project](http://www.1000genomes.org). The file was downloaded from [chromossome 22](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz), and was then filtered using [vcftools](https://github.com/vcftools/vcftoolshttps://github.com/vcftools/vcftools) with the following criteria:
7 |
8 | * only biallelic, non-singleton SNV sites
9 | * SNvs must be at lest 2KB apart from each other
10 | * minor allele frequency < 0.05
11 |
12 | The used command was:
13 |
14 | ./vcftools --gzvcf \
15 | ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz \
16 | --maf 0.05 --thin 2000 --min-alleles 2 --max-alleles 2 --non-ref-ac 2 \
17 | --recode --chr 22 --out Chr22
18 |
19 | This was the criteria that was used on the *admixture* [analysis of the 1000 genomes project](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/admixture_files/README.admixture_20141217).
20 |
21 | The file was then converted to structure format with [PGDSpider](http://www.cmpg.unibe.ch/software/PGDSpider/).
22 | To further reduce the dataset (for faster benchmarking), the file was then processed with `cut` and `head` and finally compressed with xz.
23 |
24 | The used commands were:
25 |
26 | cut -d " " -f 1-1000 Chr22.recode.str | head -n 2000 > BigTestData.str
27 | tar cvfJ BigTestData.str.tar.xz BigTestData.str
28 |
29 |
30 | ## BigTestData.bed.tar.xz
31 | This file is a *PLINK* formatted `.bed`, `.bim` and `.fam` set of files. They were obtained in the exact same way as `BigTestData.str.tar.xz`, except for the conversion using *PGDSPIDER*, which was not used. Instead, the filtered VCF file was reduced to 501 individuals and 1000 SNPs with the following command:
32 |
33 | head -n 1253 Chr22.recode.vcf |cut -f 1-510 > Testdata.vcf
34 |
35 | This file was then converted to the *PLINK* format and compressed with the following commands:
36 |
37 | plink1.9 --vcf Testdata.vcf
38 | mv plink.bed BigTestData.bed
39 | mv plink.fam BigTestData.fam
40 | mv plink.bim BigTestData.bim
41 | tar cvfJ BigTestData.bed.tar.xz BigTestData.bed BigTestData.fam BigTestData.bim
42 |
43 |
44 | ## BigTestData.vcf.tar.xz
45 | This file is *VCF* formatted. It was obtained in the exact same way as `BigTestData.str.tar.xz`, except for the conversion using *PGDSPIDER*, which was not used. Instead, the filtered VCF file was reduced to 501 individuals and 1000 SNPs and compressed with the following command:
46 |
47 | head -n 1253 Chr22.recode.vcf |cut -f 1-510 > BigTestData.vcf
48 | tar cvfJ BigTestData.vcf.tar.xz BigTestData.vcf
49 |
50 |
51 | ## extraparams and mainparams
52 | The *STRUCTURE* parameter files that were used in the benchmarking process.
53 |
54 |
55 | ## joblist.txt
56 | The joblist used to benchmark *ParallelStructure*. Consists of 16 jobs, 4 values of "K" with 4 replicates each.
57 |
58 |
59 | ## SmallTestData.structure
60 | This file is a Structure formatted input file which was used to benchmark STRUCTURE and *MavericK*. This is a medium sized SNP file (80 SNPs) which was obtained from the [1000 genomes project](http://www.1000genomes.org). The file was downloaded from [chromossome 22](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz), and was then filtered using vcftools following the same criteria and commands as the BigTestData.str file.
61 |
62 |
63 | The used commands were:
64 |
65 | cut -d " " -f 1-80 SmallData.structure > SmallData302SNPs.structure
66 | head -n 201 SmallData302SNPs.structure > SmallTestData.structure
67 |
68 |
69 | ## parameter.txt
70 | The *MavericK* parameter file that is used in the unit tests.
71 |
72 |
73 | ## mav_benchmark_parameters
74 | The file with the *MAvericK* benchmark parameters.
75 |
--------------------------------------------------------------------------------
/helper_scripts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 |
3 | # apt stuff
4 | RUN apt update
5 | RUN apt install -y python3-pip parallel zip
6 |
7 | # Pypi stuff
8 | RUN pip install structure_threader
9 |
10 | RUN mkdir /analysis
11 | WORKDIR /analysis
12 |
--------------------------------------------------------------------------------
/helper_scripts/README.md:
--------------------------------------------------------------------------------
1 | # Structure_threader helper scripts
2 |
3 | This directory contains three scripts that will install *STRUCTURE*, *fastStructre* and *MavericK* respectively in a *semi* automatic way.
4 |
5 | All scripts default the programs' install locations to ~/Software/. You can change this in the scripts themselves should you wish to change this location.
6 |
7 | ### install_structure.sh
8 | This script will download and install STRUCTURE.
9 |
10 |
11 | #### Requirements:
12 | * a C compiler, such as GCC, with fortran support.
13 | * Cmake is required to build LAPACK
14 |
15 | This should be available in every HPC environment.
16 |
17 | In Ubuntu, all you should need is the package "build-essential" (if it is not
18 | already installed for some reason). It can be installed like this:
19 |
20 | ```
21 | sudo apt-get install build-essential
22 | ```
23 |
24 | In other distros, the package name should be similar.
25 |
26 | ### install_faststructure.sh
27 | This script will download and install fastStructure and its dependencies.
28 |
29 | fastStructure depends on quite a few software packages:
30 | * cython
31 | * numpy
32 | * scipy
33 | * GNU scientific library
34 |
35 | If these are already installed in your system, feel free to comment the script
36 | section that will install them. Otherwise it will install a new local copy of
37 | these programs. You can install these packages in Ubuntu with the following
38 | command:
39 |
40 | ```
41 | sudo apt-get install cython python-numpy python-scipy gsl-bin
42 | ```
43 |
44 | ### Important note:
45 | If you are relying on the GNU Scientific Library that was installed using the
46 | `install_faststructure` script, you will need to make your system aware of
47 | where these libraries are.
48 | for that, add the following to your `~/.bashrc`:
49 |
50 | ```bash
51 | LD_LIBRARY_PATH=$install_dir/lib
52 | export LD_LIBRARY_PATH
53 | ```
54 |
55 | Where `$install_dir` is the directory defined in `install_faststructure.sh`.
56 |
57 |
58 | ### install_maverick.sh
59 | This script will download, compile and install MavericK.
60 |
61 |
62 | #### Requirements:
63 | * a recent C compiler, such as GCC 6.1 and above.
64 |
65 | This should be available in every HPC environment.
66 |
67 | In Ubuntu, all you should need is the package "build-essential" (if it is not
68 | already installed for some reason). It can be installed like this:
69 |
70 | ```
71 | sudo apt-get install build-essential
72 | ```
73 |
74 | In other distros, the package name should be similar.
75 |
--------------------------------------------------------------------------------
/helper_scripts/install_faststructure.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015-2019 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | # Define and create installation location:
21 | install_dir=${HOME}/Software/faststructure
22 | mkdir -p ${install_dir}
23 |
24 | # Define temp dir
25 | tempdir=/tmp/$USER
26 | mkdir -p $tempdir
27 |
28 | # Get LDFAGS
29 | _OLD_LDFLAGS=$LDFLAGS
30 |
31 |
32 | # Download faststructure and deps. sources into temp dir
33 | # faststructure
34 | wget -c https://github.com/rajanil/fastStructure/archive/v1.0.tar.gz -O ${tempdir}/fastStructure-1.0.tar.gz
35 | # python-nose
36 | wget -c https://pypi.python.org/packages/source/n/nose/nose-1.3.6.tar.gz -O ${tempdir}/nose-1.3.6.tar.gz
37 | # numpy
38 | wget -c https://github.com/numpy/numpy/archive/v1.9.2.tar.gz -O ${tempdir}/numpy.tar.gz
39 | # scipy
40 | wget -c https://github.com/scipy/scipy/archive/v0.16.0b2.tar.gz -O ${tempdir}/scipy.tar.gz
41 | # cython
42 | wget -c https://pypi.python.org/packages/f8/25/80f9ca7e31e2b68cc942ff1d6136588f33a7aef5e2d6abe3f2183cb9fad5/cython-0.22.tar.gz -O ${tempdir}/Cython-0.22.tar.gz
43 | # GNU scientific library
44 | wget -c http://gnu.mirror.vexxhost.com/gsl/gsl-1.16.tar.gz -O ${tempdir}/gsl-1.16.tar.gz
45 | # LAPACK (required for scipy)
46 | wget -c http://www.netlib.org/lapack/lapack-3.5.0.tgz -O ${tempdir}/lapack-3.5.0.tgz
47 |
48 |
49 | # Install dependencies
50 | # LAPACK
51 | cd ${tempdir}
52 | tar xvfz lapack-3.5.0.tgz
53 | mkdir -p build-lapack
54 | cd build-lapack
55 | export FCFLAGS="-w -fallow-argument-mismatch -O2"
56 | export FFLAGS="-w -fallow-argument-mismatch -O2"
57 | cmake ../lapack-3.5.0
58 | make
59 | mkdir -p ${install_dir}/{bin,lib}
60 | mv bin/* ${install_dir}/bin
61 | mv lib/* ${install_dir}/lib
62 |
63 | # cython
64 | cd ${tempdir}
65 | tar xvfz Cython-0.22.tar.gz
66 | cd Cython-0.22
67 | python2 setup.py install --user
68 |
69 | # python-nose
70 | cd ${tempdir}
71 | tar xvfz nose-1.3.6.tar.gz
72 | cd nose-1.3.6
73 | python2 setup.py install --user
74 |
75 | # numpy
76 | cd ${tempdir}
77 | tar xvfz numpy.tar.gz
78 | cd numpy-1.9.2
79 | sed -e "s|#![ ]*/usr/bin/python$|#!/usr/bin/python2|" \
80 | -e "s|#![ ]*/usr/bin/env python$|#!/usr/bin/env python2|" \
81 | -e "s|#![ ]*/bin/env python$|#!/usr/bin/env python2|" \
82 | -i $(find . -name '*.py')
83 | export ATLAS=None
84 | export LDFLAGS="$LDFLAGS -shared"
85 | python2 setup.py install --user
86 |
87 | # scipy
88 | cd ${tempdir}
89 | tar xvfz scipy.tar.gz
90 | cd scipy-0.16.0b2
91 | python2 setup.py install --user
92 |
93 | # GNU scientific library
94 | cd ${tempdir}
95 | tar xvzf gsl-1.16.tar.gz
96 | cd gsl-1.16
97 | export LDFLAGS=${_OLD_LDFLAGS}
98 | ./configure --prefix=${install_dir}
99 | make
100 | make install
101 |
102 | # Extract tarball, enter src dir, build binary and place it in the install dir
103 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${install_dir}/lib
104 | export CFLAGS="-I${install_dir}/include"
105 | export LDFLAGS="-L${install_dir}/lib"
106 | cd ${tempdir}
107 | tar xvfz fastStructure-1.0.tar.gz
108 | cd fastStructure-1.0
109 | cd vars
110 | python2 setup.py build_ext --inplace
111 | cd ..
112 | python2 setup.py build_ext --inplace
113 | cd ..
114 | mv fastStructure-1.0 ${install_dir}
115 |
116 | echo ""
117 | echo "Install succesfull. fastStructure is now ready to use."
118 | echo ""
119 | echo "In order to use the locally installed libraries, please add"
120 | echo "the following to the end of your ~/.bashrc:"
121 | echo ""
122 | echo "######"
123 | echo "LD_LIBRARY_PATH=${install_dir}/lib"
124 | echo "export LD_LIBRARY_PATH"
125 | echo "#####"
126 |
--------------------------------------------------------------------------------
/helper_scripts/install_maverick.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2017-2022 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | # Define MavericK version and package name:
21 | _version=1.0.4
22 | _name=MavericK
23 |
24 | # Define and create installation location:
25 | install_dir=~/Software/"${_name}"
26 | mkdir -p "${install_dir}"
27 |
28 | # Define temp dir
29 | tempdir="/tmp/$USER"
30 | mkdir -p "${tempdir}"
31 |
32 | # Download structure sources into temp dir
33 | wget "https://github.com/bobverity/${_name}/archive/v${_version}.tar.gz" -O "${tempdir}/${_name}.tar.gz"
34 |
35 | # Extract tarball, enter src dir, build binary and place it in the install dir
36 | cd "${tempdir}"
37 | tar xvfz "${_name}.tar.gz"
38 | cd "${_name}-${_version}/"
39 | make
40 | mv "${_name}" "${install_dir}"
41 |
42 | echo ""
43 | echo "Install succesfull. MavericK is now ready to use."
44 | echo ""
45 |
--------------------------------------------------------------------------------
/helper_scripts/install_structure.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015-2022 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | # Define and create installation location:
21 | install_dir=~/Software/structure
22 | mkdir -p "${install_dir}"
23 |
24 | # Define temp dir
25 | tempdir=/tmp/"${USER}"
26 | mkdir -p "${tempdir}"
27 |
28 | # Download structure sources into temp dir
29 | wget http://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_kernel_source.tar.gz -O "${tempdir}"/structure_kernel_source.tar.gz
30 |
31 | # Extract tarball, enter src dir, build binary and place it in the install dir
32 | cd "${tempdir}"
33 | tar xvfz structure_kernel_source.tar.gz
34 | cd structure_kernel_src/
35 | sed -i 's/OPT = -O3/OPT = -O3 -fcommon/' Makefile
36 | make
37 | mv structure "${install_dir}"
38 |
39 | echo ""
40 | echo "Install succesfull. STRUCTURE is now ready to use."
41 | echo ""
42 |
--------------------------------------------------------------------------------
/helper_scripts/structure.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python -*-
2 |
3 | block_cipher = None
4 |
5 |
6 | a = Analysis(['structure.py'],
7 | pathex=['.', './vars'],
8 | binaries=None,
9 | datas=None,
10 | hiddenimports=['vars.admixprop', 'vars.allelefreq', 'vars.utils', 'vars.marglikehood', 'scipy.special', 'scipy.optimize'],
11 | hookspath=[],
12 | runtime_hooks=[],
13 | excludes=[],
14 | win_no_prefer_redirects=False,
15 | win_private_assemblies=False,
16 | cipher=block_cipher)
17 | pyz = PYZ(a.pure, a.zipped_data,
18 | cipher=block_cipher)
19 | exe = EXE(pyz,
20 | a.scripts,
21 | a.binaries,
22 | a.zipfiles,
23 | a.datas,
24 | name='fastStructure',
25 | debug=False,
26 | strip=False,
27 | upx=True,
28 | console=True )
29 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: 'Structure_threader manual'
2 | #site_favicon: 'assets/Icon.png'
3 | site_author: Francisco Pina-Martins
4 | repo_url: https://gitlab.com/StuntsPT/Structure_threader
5 | edit_uri: tree/master/docs
6 |
7 | nav:
8 | - 'Introduction': 'index.md'
9 | - 'Installation & dependencies': 'install.md'
10 | - 'External programs': 'external.md'
11 | - 'Usage': 'usage.md'
12 | - 'Output': 'output.md'
13 | - 'Test Data': 'test_data.md'
14 | - 'Building Binaries': 'binaries.md'
15 | - 'Benchmarking': 'benchmark.md'
16 | - 'Citation': 'citation.md'
17 | - 'Future Plans': 'future.md'
18 | - 'FAQ': 'faq.md'
19 | theme: readthedocs
20 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # pyproject.toml
2 | [build-system]
3 | requires = ["setuptools >= 42.0.0"]
4 | build-backend = "setuptools.build_meta"
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Unversioned
2 | numpy
3 | pytest
4 |
5 | # Version required
6 | matplotlib >= 1.5
7 | plotly >= 4.1.1
8 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2016-2025 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 |
19 | import sys
20 | from setuptools import setup
21 |
22 |
23 | class NotSupportedException(BaseException):
24 | pass
25 |
26 |
27 | if sys.version_info.major < 3:
28 | raise NotSupportedException("Only Python 3.x Supported")
29 |
30 |
31 | def platform_detection(install_binaries=True):
32 | """
33 | Detect the platform and adapt the binaries location.
34 | """
35 | if install_binaries is True:
36 | if sys.platform == "linux":
37 | bin_dir = "structure_threader/bins/linux"
38 | elif sys.platform == "darwin":
39 | bin_dir = "structure_threader/bins/osx"
40 | else:
41 | return None
42 | else:
43 | return None
44 |
45 | structure_bin = bin_dir + "/structure"
46 | faststructure_bin = bin_dir + "/fastStructure"
47 | maverick_bin = bin_dir + "/MavericK"
48 |
49 | return [('bin', [faststructure_bin, structure_bin, maverick_bin])]
50 |
51 |
52 | # Set some variables (PKGBUILD inspired)
53 | DATA_FILES = platform_detection()
54 | try:
55 | DATA_FILES[0][1].append("structure_threader/wrappers/alstructure_wrapper.R")
56 | except TypeError:
57 | DATA_FILES = [('bin',
58 | ["structure_threader/wrappers/alstructure_wrapper.R"])]
59 | VERSION = "1.3.11"
60 | URL = "https://gitlab.com/StuntsPT/Structure_threader"
61 |
62 |
63 | setup(
64 | name="structure_threader",
65 | version=VERSION,
66 | packages=["structure_threader",
67 | "structure_threader.evanno",
68 | "structure_threader.plotter",
69 | "structure_threader.sanity_checks",
70 | "structure_threader.colorer",
71 | "structure_threader.wrappers",
72 | "structure_threader.skeletons"],
73 | install_requires=["plotly>=4.1.1",
74 | "colorlover",
75 | "numpy>=1.12.1",
76 | "matplotlib"],
77 | description=("A program to parallelize runs of 'Structure', "
78 | "'fastStructure' and 'MavericK'."),
79 | url=URL,
80 | download_url="{0}/-/archive/{1}/Structure_threader-{1}.tar.gz".format(URL, VERSION),
81 | author="Francisco Pina-Martins",
82 | author_email="f.pinamartins@gmail.com",
83 | license="GPL3",
84 | classifiers=["Intended Audience :: Science/Research",
85 | "License :: OSI Approved :: GNU General Public License v3 ("
86 | "GPLv3)",
87 | "Natural Language :: English",
88 | "Operating System :: POSIX :: Linux",
89 | "Topic :: Scientific/Engineering :: Bio-Informatics",
90 | "Programming Language :: Python :: 3 :: Only",
91 | "Programming Language :: Python :: 3.4",
92 | "Programming Language :: Python :: 3.5",
93 | "Programming Language :: Python :: 3.6",
94 | "Programming Language :: Python :: 3.7"],
95 | data_files=DATA_FILES,
96 | entry_points={
97 | "console_scripts": [
98 | "structure_threader = structure_threader.structure_threader:main",
99 | ]
100 | },
101 | )
102 |
--------------------------------------------------------------------------------
/structure_threader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/__init__.py
--------------------------------------------------------------------------------
/structure_threader/bins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/__init__.py
--------------------------------------------------------------------------------
/structure_threader/bins/linux/MavericK:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/MavericK
--------------------------------------------------------------------------------
/structure_threader/bins/linux/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/__init__.py
--------------------------------------------------------------------------------
/structure_threader/bins/linux/fastStructure:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/fastStructure
--------------------------------------------------------------------------------
/structure_threader/bins/linux/structure:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/linux/structure
--------------------------------------------------------------------------------
/structure_threader/bins/osx/MavericK:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/MavericK
--------------------------------------------------------------------------------
/structure_threader/bins/osx/fastStructure:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/fastStructure
--------------------------------------------------------------------------------
/structure_threader/bins/osx/structure:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/bins/osx/structure
--------------------------------------------------------------------------------
/structure_threader/colorer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/colorer/__init__.py
--------------------------------------------------------------------------------
/structure_threader/colorer/colorer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | # Code taken from http://stackoverflow.com/a/1336640/3091595.
5 | # Thanks to @sorin for providing this coloring method!
6 |
7 | import logging
8 | # now we patch Python code to add color support to logging.StreamHandler
9 | def add_coloring_to_emit_windows(fn):
10 | # add methods we need to the class
11 | def _out_handle(self):
12 | import ctypes
13 | return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
14 | out_handle = property(_out_handle)
15 |
16 | def _set_color(self, code):
17 | import ctypes
18 | # Constants from the Windows API
19 | self.STD_OUTPUT_HANDLE = -11
20 | hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
21 | ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code)
22 |
23 | setattr(logging.StreamHandler, '_set_color', _set_color)
24 |
25 | def new(*args):
26 | FOREGROUND_BLUE = 0x0001 # text color contains blue.
27 | FOREGROUND_GREEN = 0x0002 # text color contains green.
28 | FOREGROUND_RED = 0x0004 # text color contains red.
29 | FOREGROUND_INTENSITY = 0x0008 # text color is intensified.
30 | FOREGROUND_WHITE = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED
31 | # winbase.h
32 | STD_INPUT_HANDLE = -10
33 | STD_OUTPUT_HANDLE = -11
34 | STD_ERROR_HANDLE = -12
35 |
36 | # wincon.h
37 | FOREGROUND_BLACK = 0x0000
38 | FOREGROUND_BLUE = 0x0001
39 | FOREGROUND_GREEN = 0x0002
40 | FOREGROUND_CYAN = 0x0003
41 | FOREGROUND_RED = 0x0004
42 | FOREGROUND_MAGENTA = 0x0005
43 | FOREGROUND_YELLOW = 0x0006
44 | FOREGROUND_GREY = 0x0007
45 | FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified.
46 |
47 | BACKGROUND_BLACK = 0x0000
48 | BACKGROUND_BLUE = 0x0010
49 | BACKGROUND_GREEN = 0x0020
50 | BACKGROUND_CYAN = 0x0030
51 | BACKGROUND_RED = 0x0040
52 | BACKGROUND_MAGENTA = 0x0050
53 | BACKGROUND_YELLOW = 0x0060
54 | BACKGROUND_GREY = 0x0070
55 | BACKGROUND_INTENSITY = 0x0080 # background color is intensified.
56 |
57 | levelno = args[1].levelno
58 | if(levelno>=50):
59 | color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY
60 | elif(levelno>=40):
61 | color = FOREGROUND_RED | FOREGROUND_INTENSITY
62 | elif(levelno>=30):
63 | color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
64 | elif(levelno>=20):
65 | color = FOREGROUND_GREEN
66 | elif(levelno>=10):
67 | color = FOREGROUND_MAGENTA
68 | else:
69 | color = FOREGROUND_WHITE
70 | args[0]._set_color(color)
71 |
72 | ret = fn(*args)
73 | args[0]._set_color( FOREGROUND_WHITE )
74 | #print "after"
75 | return ret
76 | return new
77 |
78 | def add_coloring_to_emit_ansi(fn):
79 | # add methods we need to the class
80 | def new(*args):
81 | levelno = args[1].levelno
82 | if(levelno>=50):
83 | color = '\x1b[31m' # red
84 | elif(levelno>=40):
85 | color = '\x1b[31m' # red
86 | elif(levelno>=30):
87 | color = '\x1b[33m' # yellow
88 | elif(levelno>=20):
89 | color = '\x1b[32m' # green
90 | elif(levelno>=10):
91 | color = '\x1b[35m' # pink
92 | else:
93 | color = '\x1b[0m' # normal
94 | args[1].msg = color + args[1].msg + '\x1b[0m' # normal
95 | #print "after"
96 | return fn(*args)
97 | return new
98 |
99 | import platform
100 | if platform.system()=='Windows':
101 | # Windows does not support ANSI escapes and we are using API calls to set the console color
102 | logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
103 | else:
104 | # all non-Windows platforms are supporting ANSI escapes so we use them
105 | logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit)
106 | #log = logging.getLogger()
107 | #log.addFilter(log_filter())
108 | #//hdlr = logging.StreamHandler()
109 | #//hdlr.setFormatter(formatter())
110 |
--------------------------------------------------------------------------------
/structure_threader/evanno/LICENSE:
--------------------------------------------------------------------------------
1 | LICENSE
2 |
3 | Copyright (C) 2007-2014 by
4 | Dent Earl (dearl (a) soe ucsc edu, dentearl (a) gmail com)
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
--------------------------------------------------------------------------------
/structure_threader/evanno/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/evanno/__init__.py
--------------------------------------------------------------------------------
/structure_threader/evanno/fastChooseK.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # The MIT License (MIT)
4 | #
5 | # Copyright (c) 2014 Anil
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 |
25 | # Copyright 2015-2016 Francisco Pina Martins
26 | # This file is part of structure_threader.
27 | # structure_threader is free software: you can redistribute it and/or modify
28 | # it under the terms of the GNU General Public License as published by
29 | # the Free Software Foundation, either version 3 of the License, or
30 | # (at your option) any later version.
31 |
32 | # structure_threader is distributed in the hope that it will be useful,
33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 | # GNU General Public License for more details.
36 |
37 | # You should have received a copy of the GNU General Public License
38 | # along with structure_threader. If not, see .
39 |
40 |
41 | import glob
42 | import numpy as np
43 |
44 |
45 | insum = lambda x, axes: np.apply_over_axes(np.sum, x, axes)
46 |
47 |
48 | # class Exception(Exception):
49 | # pass
50 |
51 |
52 | def parse_logs(files):
53 | """
54 | Parses through log files to extract marginal
55 | likelihood estimates from executing the
56 | variational inference algorithm on a dataset.
57 |
58 | Arguments:
59 |
60 | files : list
61 | list of .log file names
62 | """
63 | marginal_likelihood = []
64 | for file in files:
65 | handle = open(file, 'r')
66 | for line in handle:
67 | if 'Marginal Likelihood' in line:
68 | m = float(line.strip().split('=')[1])
69 | marginal_likelihood.append(m)
70 | break
71 | handle.close()
72 |
73 | return marginal_likelihood
74 |
75 |
76 | def parse_varQs(files):
77 | """
78 | Parses through multiple .meanQ files to extract the mean
79 | admixture proportions estimated by executing the
80 | variational inference algorithm on a dataset. This is then used
81 | to identify the number of model components used to explain
82 | structure in the data, for each .meanQ file.
83 |
84 | Arguments:
85 |
86 | files : list
87 | list of .meanQ file names
88 | """
89 | bestKs = []
90 |
91 | for file in files:
92 | handle = open(file, 'r')
93 | Q = np.array([list(map(float, line.strip().split())) for line in handle])
94 | Q = Q/insum(Q, [1])
95 | handle.close()
96 |
97 | N = Q.shape[0]
98 | C = np.cumsum(np.sort(Q.sum(0))[::-1])
99 | bestKs.append(np.sum(C < N - 1) + 1)
100 |
101 | return bestKs
102 |
103 | def main(indir, outpath):
104 | """
105 | Main function that runs everything in order.
106 | """
107 | if indir.endswith("/") is False:
108 | indir = indir + "/"
109 |
110 | files = glob.glob('%s*.log'%indir)
111 | Ks = np.array([int(file.split('.')[-2]) for file in files])
112 | marginal_likelihoods = parse_logs(files)
113 |
114 | files = glob.glob('%s*.meanQ'%indir)
115 | bestKs = parse_varQs(files)
116 |
117 | outfile = open(outpath + "/chooseK.txt", "w")
118 | ml = "Model complexity that maximizes marginal likelihood = %d\n"\
119 | % Ks[np.argmax(marginal_likelihoods)]
120 | ex_str = "Model components used to explain structure in data = %d\n"\
121 | % np.argmax(np.bincount(bestKs))
122 |
123 | outfile.write(ml)
124 | outfile.write(ex_str)
125 | outfile.close()
126 |
127 | # Retrieve list of bestk
128 | return [x for x in range(Ks[np.argmax(marginal_likelihoods)],
129 | np.argmax(np.bincount(bestKs)) + 1)]
130 |
131 |
132 | if __name__ == "__main__":
133 | # Usage: python3 fastChooseK.py /path/to/faststructure_outdir/common_sufix \
134 | # /path/to/dir/where/results_file/is_written
135 | from sys import argv
136 |
137 | filesuffix = argv[1]
138 |
139 | outpath = argv[2]
140 |
141 | main(filesuffix, outpath)
142 |
--------------------------------------------------------------------------------
/structure_threader/plotter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/plotter/__init__.py
--------------------------------------------------------------------------------
/structure_threader/sanity_checks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/sanity_checks/__init__.py
--------------------------------------------------------------------------------
/structure_threader/skeletons/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/skeletons/__init__.py
--------------------------------------------------------------------------------
/structure_threader/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/structure_threader/wrappers/__init__.py
--------------------------------------------------------------------------------
/structure_threader/wrappers/alstructure_wrapper.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | # Copyright 2019-2020 Francisco Pina Martins
3 | # This file is part of structure_threader.
4 | # structure_threader is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # structure_threader is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 |
14 | # You should have received a copy of the GNU General Public License
15 | # along with structure_threader. If not, see .
16 |
17 | ## Default repo
18 | local({r <- getOption("repos")
19 | r["CRAN"] <- "http://cran.r-project.org"
20 | options(repos=r)
21 | })
22 |
23 | ll = Sys.getenv()[ grep("R_LIBS_USER", names(Sys.getenv())) ]
24 | local_lib = gsub(".*~", path.expand('~'), as.character(ll), perl=T)
25 |
26 | if (dir.exists(local_lib) == FALSE) {
27 | dir.create(local_lib, showWarnings = TRUE, recursive = TRUE)
28 | }
29 |
30 | .libPaths(c(local_lib))
31 |
32 | if(!require("alstructure")){
33 | if(!require("devtools")){
34 | install.packages("devtools")
35 | }
36 | library("devtools")
37 | install_github("storeylab/alstructure", build_vignettes=FALSE, ref="e355411")
38 | library(alstructure)
39 | }
40 |
41 | if(!require(lfa)){
42 | if (!requireNamespace("BiocManager", quietly = TRUE))
43 | install.packages("BiocManager")
44 |
45 | BiocManager::install("lfa")
46 | library(lfa)
47 | }
48 |
49 | alstructure_wrapper = function(data_matrix, K) {
50 | #' ALStructure wrapper
51 | #'
52 | #' Small wrapper function that wraps ALStructure
53 | #' Takes a data matrix and value of K
54 | #' as arguments and returns a q-matrix
55 |
56 | K = as.numeric(K)
57 |
58 | fit <- alstructure(X = data_matrix, d_hat=K)
59 | q_matrix = t(fit$Q_hat)
60 |
61 | return(q_matrix)
62 | }
63 |
64 | data_to_matrix = function (ifile) {
65 | #' data_to_matrix
66 | #' Converts the data in an input file into a data matrix that can be read
67 | #' by alstructure
68 | #' Takes a tsv or a bed file as input and returns a data matrix
69 |
70 | if (substring(ifile, nchar(ifile)-3) == ".tsv") {
71 | print(ifile)
72 | input_data = as.matrix(read.csv(ifile, header=F, sep="\t"))
73 | } else {
74 | input_data = lfa::read.bed(ifile)
75 | }
76 |
77 | return(input_data)
78 | }
79 |
80 | args = commandArgs(trailingOnly=TRUE)
81 |
82 | if (sys.nframe() == 0){
83 | data_matrix = data_to_matrix(args[1])
84 | Q_matrix = alstructure_wrapper(data_matrix, args[2])
85 | write.csv(Q_matrix, args[3])
86 | }
87 |
--------------------------------------------------------------------------------
/structure_threader/wrappers/alstructure_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2019 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import os
19 | import logging
20 |
21 |
22 | try:
23 | import colorer.colorer as colorer
24 | except ImportError:
25 | import structure_threader.colorer.colorer as colorer
26 |
27 |
28 | def alstr_cli_generator(arg, k_val):
29 | """
30 | Generates and returns command line for running ALStructure.
31 | """
32 | output_file = os.path.join(arg.outpath, "alstr_K" + str(k_val))
33 | if arg.infile.endswith((".bed", ".fam", ".bim")):
34 | infile = arg.infile[:-4]
35 | elif arg.infile.endswith(".vcf"):
36 | vcf_to_matrix(arg.infile)
37 | infile = arg.infile[:-4] + ".tsv"
38 |
39 | cli = ["Rscript", arg.external_prog, infile, str(k_val), output_file]
40 |
41 | return cli, output_file
42 |
43 |
44 | def vcf_to_matrix(vcf_file):
45 | """
46 | Parses a VCF file and converts it to a tsv matrix that can be read by
47 | ALStructure.
48 | Takes a VCF filename as input.
49 | Does not return anything.
50 | Writes a new file with the same name as the VCF but with .tsv extension
51 | """
52 | conversion_table = {"0/0": "0", "0/1": "1", "1/0": "1", "1/1": "2",
53 | "0|0": "0", "0|1": "1", "1|0": "1", "1|1": "2"}
54 |
55 | outfile = open(vcf_file.replace(".vcf", ".tsv"), "w")
56 | infile = open(vcf_file, "r")
57 |
58 | # Skip initial comments that starts with #
59 | while True:
60 | line = infile.readline()
61 | # break while statement if it is not a comment line
62 | # i.e. does not startwith #
63 | if not line.startswith('#'):
64 | break
65 |
66 | while line:
67 | genotypes = line.split()[9:]
68 | converted = [conversion_table[x.split(":")[0]]
69 | if x.split(":")[0] in conversion_table
70 | else "NA" for x in genotypes]
71 | outfile.write("\t".join(converted) + "\n")
72 | try:
73 | line = infile.readline()
74 | except IOError:
75 | break
76 | infile.close()
77 | outfile.close()
78 |
--------------------------------------------------------------------------------
/structure_threader/wrappers/faststructure_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2018 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import os
19 |
20 |
21 | def fs_cli_generator(k_val, arg):
22 | """
23 | Generates and returns command line for running fastStructure.
24 | """
25 | output_file = os.path.join(arg.outpath, "fS_run_K")
26 | if arg.infile.endswith((".bed", ".fam", ".bim")):
27 | file_format = "bed"
28 | infile = arg.infile[:-4]
29 | else:
30 | file_format = "str" # Assume 'STR' format if plink is not specified
31 | if arg.infile.endswith(".str") is False: # Do we need a symlink?
32 | infile = arg.infile
33 | try:
34 | os.symlink(os.path.basename(arg.infile), arg.infile+".str")
35 | except OSError as err:
36 | if err.errno != 17:
37 | raise
38 | else:
39 | infile = arg.infile[:-4]
40 |
41 | cli = ["python2", arg.external_prog, "-K", str(k_val), "--input",
42 | infile, "--output", output_file, "--format", file_format,
43 | "--seed", str(arg.seed)] + arg.extra_options.split()
44 |
45 | # Are we using the python script or a binary?
46 | if arg.external_prog.endswith(".py") is False:
47 | cli = cli[1:]
48 |
49 | return cli, output_file
50 |
--------------------------------------------------------------------------------
/structure_threader/wrappers/structure_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2017-2018 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import os
19 | import logging
20 | import itertools
21 | import random
22 |
23 |
24 | try:
25 | import colorer.colorer as colorer
26 | except ImportError:
27 | import structure_threader.colorer.colorer as colorer
28 |
29 |
30 | def str_cli_generator(arg, k_val, rep_num, seed):
31 | """
32 | Generates and returns command line for running STRUCTURE.
33 | """
34 | output_file = os.path.join(arg.outpath, "str_K" + str(k_val) + "_rep" +
35 | str(rep_num))
36 | cli = [arg.external_prog, "-K", str(k_val), "-i", arg.infile, "-o",
37 | output_file]
38 |
39 | if seed is not None:
40 | cli += ["-D", seed]
41 |
42 | if arg.params is not None:
43 | cli += arg.params
44 |
45 | return cli, output_file
46 |
47 |
48 | def str_param_checker(arg):
49 | """
50 | Handles the parameter files for STRUCTURE (or lack thereoff)
51 | """
52 | def _disable_STRUCUTRE_RANDOMIZE(extraparams_file):
53 | """
54 | Checks if the RANDOMIZE option is set in the `extraparams` file.
55 | If it is, disable it (set to `0`)
56 | """
57 | infile = open(extraparams, "r")
58 | params = ""
59 | overwrite = False
60 | for lines in infile:
61 | try:
62 | if lines.split()[1] == "RANDOMIZE":
63 | if lines.split()[2] == "1":
64 | lines = lines.replace("1", "0")
65 | logging.warning("The RANDOMIZE option was activated in"
66 | " the `extraparams` file. "
67 | " *Structure_threader* has disabled it"
68 | " since it handles this functionality "
69 | "internallly (random seed setting).")
70 | overwrite = True
71 | except IndexError:
72 | pass
73 | params += lines
74 | infile.close()
75 |
76 | if overwrite:
77 | outfile = open(extraparams, "w")
78 | outfile.write(params)
79 | outfile.close()
80 |
81 | os.chdir(os.path.dirname(arg.infile))
82 | if arg.params is not None:
83 | mainparams = arg.params
84 | extraparams = os.path.join(os.path.dirname(arg.params),
85 | "extraparams")
86 | if os.path.isfile(extraparams) is False:
87 | logging.warning("No 'extraparams' file was found. An empty one "
88 | "was created, but it is highly recommended "
89 | "that you fill one out.")
90 | touch = open(extraparams, 'w')
91 | touch.close()
92 | else:
93 | _disable_STRUCUTRE_RANDOMIZE(extraparams)
94 | arg.params = ["-m", mainparams, "-e", extraparams]
95 |
96 |
97 | def seed_generator(seed, k_list, replicates):
98 | """
99 | Uses a user input seed value to generate *N* seeds, one for each run.
100 | Takes a seed value and the number of iterations as input and returns a
101 | job list: [(seed, K, replicate), ...].
102 | """
103 | jobs = list(itertools.product(k_list, replicates))[::-1]
104 |
105 | random.seed(seed)
106 | jobs = [(str(random.randrange(10000000)),) + x for x in jobs]
107 |
108 | return jobs
109 |
--------------------------------------------------------------------------------
/tests/alstructure_field_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019-2020 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | LightGreen='\033[1;32m'
21 | NoColor='\033[0m'
22 |
23 | echo "Runnig ALStructure 'field test'. This will simulate a full wrapped run on small test data."
24 |
25 | git_dir=$(pwd)
26 | str_bin=$(which alstructure_wrapper.R)
27 | structure_threader_exec=$(which structure_threader)
28 |
29 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.bed.tar.xz" -C "${git_dir}/tests/smalldata/"
30 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.bed" -o ~/results_als -als "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt"
31 |
32 | echo -e "${LightGreen}ALStructure 'Field test' ran successfully on the \`.bed\` file. Yay!${NoColor}"
33 |
34 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.vcf.tar.xz" -C "${git_dir}/tests/smalldata/"
35 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.vcf" -o ~/results_als -als "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt"
36 |
37 | echo -e "${LightGreen}ALStructure 'Field test' ran successfully on the \`.vcf\` file. Yay!${NoColor}"
38 |
--------------------------------------------------------------------------------
/tests/alstructure_function_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2019 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import pytest
19 | import mockups
20 | import filecmp
21 | import structure_threader.wrappers.alstructure_wrapper as alsw
22 |
23 |
24 | def test_alstr_cli_generator():
25 | """
26 | Tests if alstr_cli_generator() is working correctlly.
27 | """
28 | # Define arguments
29 | arg = mockups.Arguments()
30 | arg.infile += ".bed"
31 | k_val = 4
32 |
33 | # "Rscript", arg.external_prog, infile, str(k_val), output_file
34 |
35 | mock_cli = ["Rscript", "EP", "IF", str(k_val), "alstr_K4"]
36 |
37 | returned_cli, out_file = alsw.alstr_cli_generator(arg, k_val)
38 | assert returned_cli == mock_cli
39 | assert out_file == "alstr_K4"
40 |
41 |
42 | def test_vcf_to_matrix():
43 | """
44 | Tests if vcf_to_matrix() is working correctlly.
45 | Converts a known file, and compares the result with a known good conversion
46 | """
47 | # Define arguments
48 | arg = mockups.Arguments()
49 | arg.infile = "smalldata/SmallTestData.vcf"
50 | k_val = 4
51 | alsw.vcf_to_matrix(arg.infile)
52 |
53 | assert filecmp.cmp(arg.infile[:-4] + ".tsv",
54 | "smalldata/SmallTestData_reference.tsv")
55 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2016 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import os
19 | import os.path
20 | import sys
21 |
22 | sys.path.append(os.path.join(os.getcwd(), '.'))
23 | sys.path.append(os.path.join(os.getcwd(), '..'))
24 | print(sys.path)
25 |
26 | os.chdir(os.getcwd() + "/tests")
27 |
--------------------------------------------------------------------------------
/tests/evanno_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2016 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 |
19 | import glob
20 |
21 | import structure_threader.evanno.fastChooseK as fc
22 |
23 | def test_parse_logs():
24 | """
25 | Tests the result of parse_logs().
26 | """
27 | files = glob.glob("files/*.log")
28 | assert sorted(fc.parse_logs(files)) == sorted([-0.9875020559, -0.978009636,
29 | -0.9721792877, -0.9768312088,
30 | -0.9806135049,
31 | -0.9825775986])
32 |
33 | def test_parse_varQs():
34 | """
35 | Tests the result of parse_varQs().
36 | """
37 | files = glob.glob("files/*.meanQ")
38 | assert sorted(fc.parse_varQs(files)) == sorted([5, 2, 3, 1, 3, 3])
39 |
40 | def test_main():
41 | """
42 | Tests the result of main().
43 | """
44 | indir = "files/"
45 | text = str(['Model complexity that maximizes marginal likelihood = 2\n',
46 | 'Model components used to explain structure in data = 3\n'])
47 | outdir = "files/"
48 | assert fc.main(indir, outdir) == [x for x in range(2, 4)]
49 | outfile = open(outdir + "chooseK.txt", "r")
50 | test_text = str(outfile.readlines())
51 | assert test_text == text
52 |
--------------------------------------------------------------------------------
/tests/fastStructure_field_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016-2022 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | LightGreen='\033[1;32m'
21 | NoColor='\033[0m'
22 |
23 | echo "Runnig fastStructure 'field test'. This will simulate a full wrapped run on small test data."
24 |
25 | git_dir=$(pwd)
26 | str_bin=$(which fastStructure)
27 | structure_threader_exec=$(which structure_threader)
28 |
29 | tar xvfJ "${git_dir}/tests/smalldata/BigTestData.str.tar.xz" -C "${git_dir}/tests/smalldata/"
30 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/BigTestData.str" -o ~/results_fs -fs "${str_bin}" -K 4 -t 4 --ind "${git_dir}/tests/smalldata/indfile.txt"
31 |
32 | echo -e "${LightGreen}fastStructure 'Field test' ran successfully. Yay!${NoColor}"
33 |
--------------------------------------------------------------------------------
/tests/faststructure_function_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2017 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 |
19 | import pytest
20 | import mockups
21 | import structure_threader.wrappers.faststructure_wrapper as fsw
22 |
23 |
24 | def test_fs_cli_generator():
25 | """
26 | Tests if fs_cli_generator() is working correctlly.
27 | """
28 | # Define arguments
29 | arg = mockups.Arguments()
30 | k_val = 4
31 |
32 | for prog in ["EP", "EP.py"]:
33 |
34 | arg.external_prog = prog
35 | arg.seed = "1235813"
36 | mock_cli = [prog, "-K", str(k_val), "--input",
37 | "IF", "--output", "fS_run_K", "--format", "str",
38 | "--seed", "1235813", "--prior=logistic"]
39 | if prog.endswith(".py"):
40 | mock_cli = ["python2"] + mock_cli
41 |
42 | returned_cli, returned_outdir = fsw.fs_cli_generator(k_val, arg)
43 |
44 | assert returned_cli == mock_cli
45 | assert returned_outdir == "fS_run_K"
46 |
--------------------------------------------------------------------------------
/tests/files/chooseK.txt:
--------------------------------------------------------------------------------
1 | Model complexity that maximizes marginal likelihood = 2
2 | Model components used to explain structure in data = 3
3 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.1.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -0.9768312088
2 | Marginal likelihood with initialization (2) = -0.9768312088
3 | Marginal likelihood with initialization (3) = -0.9768312088
4 | Marginal likelihood with initialization (4) = -0.9768312088
5 | Marginal likelihood with initialization (5) = -0.9768312088
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -0.9768312088 -- 0.129
8 | 10 -0.9768312088 0.0000000000 0.060
9 | Marginal Likelihood = -0.9768312088
10 | Total time = 0.2047 seconds
11 | Total iterations = 10
12 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.1.meanQ:
--------------------------------------------------------------------------------
1 | 1.000000
2 | 1.000000
3 | 1.000000
4 | 1.000000
5 | 1.000000
6 | 1.000000
7 | 1.000000
8 | 1.000000
9 | 1.000000
10 | 1.000000
11 | 1.000000
12 | 1.000000
13 | 1.000000
14 | 1.000000
15 | 1.000000
16 | 1.000000
17 | 1.000000
18 | 1.000000
19 | 1.000000
20 | 1.000000
21 | 1.000000
22 | 1.000000
23 | 1.000000
24 | 1.000000
25 | 1.000000
26 | 1.000000
27 | 1.000000
28 | 1.000000
29 | 1.000000
30 | 1.000000
31 | 1.000000
32 | 1.000000
33 | 1.000000
34 | 1.000000
35 | 1.000000
36 | 1.000000
37 | 1.000000
38 | 1.000000
39 | 1.000000
40 | 1.000000
41 | 1.000000
42 | 1.000000
43 | 1.000000
44 | 1.000000
45 | 1.000000
46 | 1.000000
47 | 1.000000
48 | 1.000000
49 | 1.000000
50 | 1.000000
51 | 1.000000
52 | 1.000000
53 | 1.000000
54 | 1.000000
55 | 1.000000
56 | 1.000000
57 | 1.000000
58 | 1.000000
59 | 1.000000
60 | 1.000000
61 | 1.000000
62 | 1.000000
63 | 1.000000
64 | 1.000000
65 | 1.000000
66 | 1.000000
67 | 1.000000
68 | 1.000000
69 | 1.000000
70 | 1.000000
71 | 1.000000
72 | 1.000000
73 | 1.000000
74 | 1.000000
75 | 1.000000
76 | 1.000000
77 | 1.000000
78 | 1.000000
79 | 1.000000
80 | 1.000000
81 | 1.000000
82 | 1.000000
83 | 1.000000
84 | 1.000000
85 | 1.000000
86 | 1.000000
87 | 1.000000
88 | 1.000000
89 | 1.000000
90 | 1.000000
91 | 1.000000
92 | 1.000000
93 | 1.000000
94 | 1.000000
95 | 1.000000
96 | 1.000000
97 | 1.000000
98 | 1.000000
99 | 1.000000
100 | 1.000000
101 | 1.000000
102 | 1.000000
103 | 1.000000
104 | 1.000000
105 | 1.000000
106 | 1.000000
107 | 1.000000
108 | 1.000000
109 | 1.000000
110 | 1.000000
111 | 1.000000
112 | 1.000000
113 | 1.000000
114 | 1.000000
115 | 1.000000
116 | 1.000000
117 | 1.000000
118 | 1.000000
119 | 1.000000
120 | 1.000000
121 | 1.000000
122 | 1.000000
123 | 1.000000
124 | 1.000000
125 | 1.000000
126 | 1.000000
127 | 1.000000
128 | 1.000000
129 | 1.000000
130 | 1.000000
131 | 1.000000
132 | 1.000000
133 | 1.000000
134 | 1.000000
135 | 1.000000
136 | 1.000000
137 | 1.000000
138 | 1.000000
139 | 1.000000
140 | 1.000000
141 | 1.000000
142 | 1.000000
143 | 1.000000
144 | 1.000000
145 | 1.000000
146 | 1.000000
147 | 1.000000
148 | 1.000000
149 | 1.000000
150 | 1.000000
151 | 1.000000
152 | 1.000000
153 | 1.000000
154 | 1.000000
155 | 1.000000
156 | 1.000000
157 | 1.000000
158 | 1.000000
159 | 1.000000
160 | 1.000000
161 | 1.000000
162 | 1.000000
163 | 1.000000
164 | 1.000000
165 | 1.000000
166 | 1.000000
167 | 1.000000
168 | 1.000000
169 | 1.000000
170 | 1.000000
171 | 1.000000
172 | 1.000000
173 | 1.000000
174 | 1.000000
175 | 1.000000
176 | 1.000000
177 | 1.000000
178 | 1.000000
179 | 1.000000
180 | 1.000000
181 | 1.000000
182 | 1.000000
183 | 1.000000
184 | 1.000000
185 | 1.000000
186 | 1.000000
187 | 1.000000
188 | 1.000000
189 | 1.000000
190 | 1.000000
191 | 1.000000
192 | 1.000000
193 | 1.000000
194 | 1.000000
195 | 1.000000
196 | 1.000000
197 | 1.000000
198 | 1.000000
199 | 1.000000
200 | 1.000000
201 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.2.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -0.9962448182
2 | Marginal likelihood with initialization (2) = -0.9962458914
3 | Marginal likelihood with initialization (3) = -0.9962442830
4 | Marginal likelihood with initialization (4) = -0.9962474286
5 | Marginal likelihood with initialization (5) = -0.9962488147
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -0.9962442830 -- 0.256
8 | 10 -0.9722112089 0.0240330742 0.133
9 | 20 -0.9721876216 0.0000235873 0.141
10 | 30 -0.9721816820 0.0000059396 0.140
11 | 40 -0.9721795377 0.0000021443 0.141
12 | 50 -0.9721792877 0.0000002500 0.140
13 | Marginal Likelihood = -0.9721792877
14 | Total time = 0.9769 seconds
15 | Total iterations = 50
16 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.2.meanQ:
--------------------------------------------------------------------------------
1 | 0.331741 0.668259
2 | 0.892599 0.107401
3 | 0.228488 0.771512
4 | 0.856694 0.143306
5 | 0.699816 0.300184
6 | 0.456815 0.543185
7 | 0.002000 0.998000
8 | 0.081283 0.918717
9 | 0.177865 0.822135
10 | 0.031425 0.968575
11 | 0.993659 0.006341
12 | 0.005221 0.994779
13 | 0.090059 0.909941
14 | 0.365875 0.634125
15 | 0.007618 0.992382
16 | 0.720843 0.279157
17 | 0.893861 0.106139
18 | 0.003167 0.996833
19 | 0.011088 0.988912
20 | 0.240903 0.759097
21 | 0.298820 0.701180
22 | 0.345545 0.654455
23 | 0.041422 0.958578
24 | 0.998039 0.001961
25 | 0.908162 0.091838
26 | 0.896575 0.103425
27 | 0.998298 0.001702
28 | 0.551780 0.448220
29 | 0.010520 0.989480
30 | 0.746138 0.253862
31 | 0.135904 0.864096
32 | 0.997515 0.002485
33 | 0.174546 0.825454
34 | 0.104552 0.895448
35 | 0.038609 0.961391
36 | 0.281483 0.718517
37 | 0.248786 0.751214
38 | 0.837123 0.162877
39 | 0.027292 0.972708
40 | 0.068598 0.931402
41 | 0.996376 0.003624
42 | 0.002387 0.997613
43 | 0.607574 0.392426
44 | 0.099170 0.900830
45 | 0.626031 0.373969
46 | 0.004283 0.995717
47 | 0.738307 0.261693
48 | 0.733526 0.266474
49 | 0.031206 0.968794
50 | 0.039766 0.960234
51 | 0.067336 0.932664
52 | 0.202952 0.797048
53 | 0.025617 0.974383
54 | 0.610240 0.389760
55 | 0.730133 0.269867
56 | 0.874680 0.125320
57 | 0.153469 0.846531
58 | 0.001666 0.998334
59 | 0.998214 0.001786
60 | 0.441552 0.558448
61 | 0.115335 0.884665
62 | 0.002956 0.997044
63 | 0.997504 0.002496
64 | 0.140582 0.859418
65 | 0.974193 0.025807
66 | 0.269150 0.730850
67 | 0.926503 0.073497
68 | 0.001841 0.998159
69 | 0.002722 0.997278
70 | 0.939099 0.060901
71 | 0.997309 0.002691
72 | 0.912466 0.087534
73 | 0.002411 0.997589
74 | 0.032776 0.967224
75 | 0.837659 0.162341
76 | 0.277290 0.722710
77 | 0.042524 0.957476
78 | 0.367006 0.632994
79 | 0.103201 0.896799
80 | 0.138195 0.861805
81 | 0.003942 0.996058
82 | 0.003629 0.996371
83 | 0.997654 0.002346
84 | 0.610618 0.389382
85 | 0.968560 0.031440
86 | 0.998089 0.001911
87 | 0.153792 0.846208
88 | 0.255452 0.744548
89 | 0.900993 0.099007
90 | 0.898977 0.101023
91 | 0.029303 0.970697
92 | 0.530885 0.469115
93 | 0.997547 0.002453
94 | 0.997679 0.002321
95 | 0.288946 0.711054
96 | 0.217767 0.782233
97 | 0.282245 0.717755
98 | 0.997146 0.002854
99 | 0.409879 0.590121
100 | 0.552314 0.447686
101 | 0.088516 0.911484
102 | 0.054706 0.945294
103 | 0.002425 0.997575
104 | 0.271878 0.728122
105 | 0.004466 0.995534
106 | 0.002118 0.997882
107 | 0.005835 0.994165
108 | 0.166099 0.833901
109 | 0.030578 0.969422
110 | 0.796477 0.203523
111 | 0.997926 0.002074
112 | 0.201074 0.798926
113 | 0.998043 0.001957
114 | 0.211867 0.788133
115 | 0.028922 0.971078
116 | 0.975076 0.024924
117 | 0.996514 0.003486
118 | 0.002619 0.997381
119 | 0.232152 0.767848
120 | 0.143624 0.856376
121 | 0.989838 0.010162
122 | 0.158352 0.841648
123 | 0.058998 0.941002
124 | 0.593080 0.406920
125 | 0.997703 0.002297
126 | 0.069720 0.930280
127 | 0.886732 0.113268
128 | 0.739399 0.260601
129 | 0.001616 0.998384
130 | 0.070098 0.929902
131 | 0.453419 0.546581
132 | 0.992781 0.007219
133 | 0.573809 0.426191
134 | 0.008584 0.991416
135 | 0.050800 0.949200
136 | 0.732246 0.267754
137 | 0.989812 0.010188
138 | 0.219366 0.780634
139 | 0.006969 0.993031
140 | 0.185238 0.814762
141 | 0.998112 0.001888
142 | 0.339359 0.660641
143 | 0.939270 0.060730
144 | 0.633369 0.366631
145 | 0.245514 0.754486
146 | 0.248691 0.751309
147 | 0.263072 0.736928
148 | 0.003037 0.996963
149 | 0.901762 0.098238
150 | 0.224052 0.775948
151 | 0.997198 0.002802
152 | 0.167212 0.832788
153 | 0.301326 0.698674
154 | 0.272351 0.727649
155 | 0.077950 0.922050
156 | 0.243474 0.756526
157 | 0.163620 0.836380
158 | 0.371254 0.628746
159 | 0.997634 0.002366
160 | 0.049000 0.951000
161 | 0.541675 0.458325
162 | 0.876381 0.123619
163 | 0.530690 0.469310
164 | 0.251742 0.748258
165 | 0.945323 0.054677
166 | 0.996418 0.003582
167 | 0.975009 0.024991
168 | 0.004857 0.995143
169 | 0.998017 0.001983
170 | 0.001762 0.998238
171 | 0.115508 0.884492
172 | 0.921012 0.078988
173 | 0.998151 0.001849
174 | 0.646833 0.353167
175 | 0.984644 0.015356
176 | 0.997853 0.002147
177 | 0.997309 0.002691
178 | 0.281990 0.718010
179 | 0.990500 0.009500
180 | 0.005388 0.994612
181 | 0.997445 0.002555
182 | 0.410226 0.589774
183 | 0.287826 0.712174
184 | 0.353331 0.646669
185 | 0.998224 0.001776
186 | 0.996591 0.003409
187 | 0.998192 0.001808
188 | 0.277430 0.722570
189 | 0.028117 0.971883
190 | 0.604150 0.395850
191 | 0.973441 0.026559
192 | 0.142860 0.857140
193 | 0.411118 0.588882
194 | 0.470591 0.529409
195 | 0.535374 0.464626
196 | 0.056266 0.943734
197 | 0.997050 0.002950
198 | 0.080356 0.919644
199 | 0.994845 0.005155
200 | 0.196205 0.803795
201 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.3.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -1.0145917797
2 | Marginal likelihood with initialization (2) = -1.0145882446
3 | Marginal likelihood with initialization (3) = -1.0145827297
4 | Marginal likelihood with initialization (4) = -1.0145901376
5 | Marginal likelihood with initialization (5) = -1.0145910911
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -1.0145827297 -- 0.348
8 | 10 -0.9859337987 0.0286489310 0.194
9 | 20 -0.9822573922 0.0036764065 0.202
10 | 30 -0.9794985794 0.0027588128 0.211
11 | 40 -0.9787630021 0.0007355773 0.207
12 | 50 -0.9784822781 0.0002807240 0.204
13 | 60 -0.9783333715 0.0001489066 0.201
14 | 70 -0.9782232677 0.0001101038 0.205
15 | 80 -0.9781580266 0.0000652411 0.203
16 | 90 -0.9781153321 0.0000426946 0.204
17 | 100 -0.9780905558 0.0000247762 0.202
18 | 110 -0.9780713773 0.0000191785 0.201
19 | 120 -0.9780566238 0.0000147536 0.202
20 | 130 -0.9780388288 0.0000177949 0.204
21 | 140 -0.9780276914 0.0000111374 0.204
22 | 150 -0.9780104209 0.0000172705 0.205
23 | 160 -0.9780096360 0.0000007849 0.202
24 | Marginal Likelihood = -0.9780096360
25 | Total time = 3.6365 seconds
26 | Total iterations = 160
27 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.4.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -1.0322754363
2 | Marginal likelihood with initialization (2) = -1.0322789256
3 | Marginal likelihood with initialization (3) = -1.0322756875
4 | Marginal likelihood with initialization (4) = -1.0322761998
5 | Marginal likelihood with initialization (5) = -1.0322750626
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -1.0322750626 -- 0.444
8 | 10 -0.9968657948 0.0354092679 0.228
9 | 20 -0.9927563614 0.0041094334 0.250
10 | 30 -0.9906067583 0.0021496031 0.242
11 | 40 -0.9892050089 0.0014017494 0.240
12 | 50 -0.9880193078 0.0011857011 0.250
13 | 60 -0.9869323420 0.0010869658 0.249
14 | 70 -0.9858803255 0.0010520165 0.261
15 | 80 -0.9844959958 0.0013843298 0.277
16 | 90 -0.9814235998 0.0030723959 0.482
17 | 100 -0.9809491806 0.0004744193 0.906
18 | 110 -0.9808581435 0.0000910371 0.915
19 | 120 -0.9808059295 0.0000522140 0.913
20 | 130 -0.9807703261 0.0000356034 0.921
21 | 140 -0.9807255141 0.0000448120 0.942
22 | 150 -0.9807141701 0.0000113440 0.997
23 | 160 -0.9806841895 0.0000299807 0.908
24 | 170 -0.9806591596 0.0000250299 0.901
25 | 180 -0.9806227676 0.0000363919 0.933
26 | 190 -0.9806137539 0.0000090137 0.912
27 | 200 -0.9806135049 0.0000002490 1.024
28 | Marginal Likelihood = -0.9806135049
29 | Total time = 13.2452 seconds
30 | Total iterations = 200
31 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.5.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -1.0494952058
2 | Marginal likelihood with initialization (2) = -1.0494959772
3 | Marginal likelihood with initialization (3) = -1.0494950292
4 | Marginal likelihood with initialization (4) = -1.0494955249
5 | Marginal likelihood with initialization (5) = -1.0494938800
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -1.0494938800 -- 0.603
8 | 10 -1.0143674048 0.0351264752 0.324
9 | 20 -1.0080986272 0.0062687775 0.360
10 | 30 -1.0044323928 0.0036662344 0.353
11 | 40 -1.0018065336 0.0026258592 0.365
12 | 50 -0.9997522272 0.0020543064 0.380
13 | 60 -0.9981561486 0.0015960785 0.359
14 | 70 -0.9967190253 0.0014371233 0.376
15 | 80 -0.9950744351 0.0016445903 0.437
16 | 90 -0.9932921706 0.0017822644 0.555
17 | 100 -0.9905502256 0.0027419451 0.596
18 | 110 -0.9885425160 0.0020077096 1.076
19 | 120 -0.9874276668 0.0011148492 1.127
20 | 130 -0.9850634302 0.0023642366 1.140
21 | 140 -0.9829899842 0.0020734460 1.688
22 | 150 -0.9828916089 0.0000983753 1.762
23 | 160 -0.9828161668 0.0000754421 1.786
24 | 170 -0.9827707233 0.0000454435 1.756
25 | 180 -0.9826984185 0.0000723048 1.734
26 | 190 -0.9826596660 0.0000387525 1.724
27 | 200 -0.9827200318 -0.0000603657 1.766
28 | 210 -0.9826534179 0.0000666139 1.716
29 | 220 -0.9826403040 0.0000131139 1.716
30 | 230 -0.9826219649 0.0000183391 1.739
31 | 240 -0.9826069534 0.0000150115 1.732
32 | 250 -0.9826014574 0.0000054960 1.717
33 | 260 -0.9825777629 0.0000236945 1.710
34 | 270 -0.9825775986 0.0000001642 1.715
35 | Marginal Likelihood = -0.9825775986
36 | Total time = 32.3766 seconds
37 | Total iterations = 270
38 |
--------------------------------------------------------------------------------
/tests/files/fS_run_K.6.log:
--------------------------------------------------------------------------------
1 | Marginal likelihood with initialization (1) = -1.0663514941
2 | Marginal likelihood with initialization (2) = -1.0663524294
3 | Marginal likelihood with initialization (3) = -1.0663511842
4 | Marginal likelihood with initialization (4) = -1.0663493670
5 | Marginal likelihood with initialization (5) = -1.0663537662
6 | Iteration Marginal_Likelihood delta_Marginal_Likelihood Iteration_Time (secs)
7 | 0 -1.0663493670 -- 0.707
8 | 10 -1.0297408401 0.0366085269 0.375
9 | 20 -1.0209563255 0.0087845145 0.424
10 | 30 -1.0154511352 0.0055051903 0.482
11 | 40 -1.0122027626 0.0032483726 0.457
12 | 50 -1.0096371397 0.0025656228 0.454
13 | 60 -1.0076254619 0.0020116778 0.459
14 | 70 -1.0059788580 0.0016466039 0.442
15 | 80 -1.0043790133 0.0015998447 0.477
16 | 90 -1.0026262978 0.0017527155 0.513
17 | 100 -1.0002259160 0.0024003818 0.560
18 | 110 -0.9976624946 0.0025634214 0.688
19 | 120 -0.9960707049 0.0015917897 0.701
20 | 130 -0.9943839192 0.0016867857 0.722
21 | 140 -0.9919652260 0.0024186931 0.817
22 | 150 -0.9883990150 0.0035662110 1.220
23 | 160 -0.9879880261 0.0004109889 1.446
24 | 170 -0.9878610098 0.0001270163 1.472
25 | 180 -0.9877818167 0.0000791931 1.469
26 | 190 -0.9876545303 0.0001272864 1.462
27 | 200 -0.9875749509 0.0000795793 1.447
28 | 210 -0.9875508346 0.0000241164 1.450
29 | 220 -0.9875354411 0.0000153935 1.462
30 | 230 -0.9875174672 0.0000179739 1.460
31 | 240 -0.9875021183 0.0000153489 1.456
32 | 250 -0.9875020559 0.0000000623 1.440
33 | Marginal Likelihood = -0.9875020559
34 | Total time = 24.1385 seconds
35 | Total iterations = 250
36 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputEvidence.csv:
--------------------------------------------------------------------------------
1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE
2 | 1,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,-4893.197483,-4895.314475,-4895.199946,-4896.925829,-4895.213564,-4895.170259,0.591499,-4950.068622,0.000000
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputEvidenceDetails.csv:
--------------------------------------------------------------------------------
1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20
2 | 1,-4837.237055,-4837.352752,-4837.247417,-4837.566108,-4837.195471,111.920856,115.923445,115.905059,118.719441,116.036186,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputEvidenceNormalised.csv:
--------------------------------------------------------------------------------
1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL
2 | 1,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputLog.txt:
--------------------------------------------------------------------------------
1 | ------------------------------------------
2 | MAVERICK
3 | by Robert Verity and Richard A. Nichols
4 | Version 1.0.4 (30 November 2016)
5 | accessed Wed Mar 8 14:54:27 2017
6 | ------------------------------------------
7 |
8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt
9 |
10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure
11 |
12 | Parameters taking default values
13 | fixLabels_on = true
14 | mainThinning = 1
15 | outputComparisonStatistics_on = false
16 | outputEvanno_on = false
17 | outputEvidenceNormalised_on = true
18 | outputMaxLike_admixFreqs_on = false
19 | outputMaxLike_alleleFreqs_on = false
20 | outputPosteriorGrouping_on = false
21 | outputQmatrixError_gene_on = false
22 | outputQmatrix_structureFormat_on = false
23 | suppressWarning1_on = false
24 | thermodynamicThinning = 1
25 |
26 | Parameters read in from file
27 | EMalgorithm_on = f
28 | EMiterations = 100
29 | EMrepeats = 100
30 | admix_on = t
31 | alpha = 1.0
32 | alphaPropSD = 0.10
33 | exhaustive_on = f
34 | fixAlpha_on = f
35 | headerRow_on = t
36 | mainBurnin = 500
37 | mainRepeats = 5
38 | mainSamples = 4000
39 | missingData = -9
40 | outputEvidenceDetails_on = t
41 | outputEvidence_on = t
42 | outputLikelihood_on = t
43 | outputLog_on = t
44 | outputQmatrixError_ind_on = t
45 | outputQmatrixError_pop_on = t
46 | outputQmatrix_gene_on = t
47 | outputQmatrix_ind_on = t
48 | outputQmatrix_pop_on = t
49 | ploidy = 2
50 | ploidyCol_on = f
51 | popCol_on = t
52 | thermodynamicBurnin = 1000
53 | thermodynamicRungs = 20
54 | thermodynamicSamples = 5000
55 | thermodynamic_on = t
56 |
57 | Parameters defined on command line
58 | Kmax = 1
59 | Kmin = 1
60 |
61 | Data properties
62 | row 1 = header line
63 | column 1 = individual labels
64 | column 2 = population of origin
65 | unique populations = 19
66 | individuals = 375
67 | loci = 1
68 | alleles per locus = {597}
69 | missing observations = 0 of 750
70 |
71 | -- K=1 ----------------
72 |
73 | Running exhaustive approach...
74 | complete
75 |
76 | Running ordinary MCMC...
77 | analysis 1 of 5
78 | analysis 2 of 5
79 | analysis 3 of 5
80 | analysis 4 of 5
81 | analysis 5 of 5
82 | complete
83 |
84 | Carrying out thermodynamic integration...
85 | complete
86 |
87 | Estimates of (log) model evidence...
88 |
89 | Harmonic mean (averaged over 5 runs)
90 | estimate: -4950.068622
91 | standard error: 0.000000
92 |
93 | Structure estimator (averaged over 5 runs)
94 | estimate: -4895.170259
95 | standard error: 0.591499
96 |
97 | Thermodynamic integral estimator
98 | estimate: -4950.068622
99 | standard error: 0.000000
100 |
101 | Program completed in approximately 14 seconds
102 | Output written to: //home/francisco/aaa/K1/
103 | ------------------------------------------
104 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputQmatrixError_pop_K1.csv:
--------------------------------------------------------------------------------
1 | given_population,individuals,deme1
2 | 1,20,0.000000
3 | 2,20,0.000000
4 | 3,20,0.000000
5 | 4,19,0.000000
6 | 5,20,0.000000
7 | 6,20,0.000000
8 | 7,20,0.000000
9 | 8,20,0.000000
10 | 9,19,0.000000
11 | 10,20,0.000000
12 | 11,20,0.000000
13 | 12,19,0.000000
14 | 13,18,0.000000
15 | 14,20,0.000000
16 | 15,20,0.000000
17 | 16,20,0.000000
18 | 17,20,0.000000
19 | 18,20,0.000000
20 | 19,20,0.000000
21 |
--------------------------------------------------------------------------------
/tests/files/mav_K1/outputQmatrix_pop_K1.csv:
--------------------------------------------------------------------------------
1 | given_population,members,deme1
2 | 1,20,1.000
3 | 2,20,1.000
4 | 3,20,1.000
5 | 4,19,1.000
6 | 5,20,1.000
7 | 6,20,1.000
8 | 7,20,1.000
9 | 8,20,1.000
10 | 9,19,1.000
11 | 10,20,1.000
12 | 11,20,1.000
13 | 12,19,1.000
14 | 13,18,1.000
15 | 14,20,1.000
16 | 15,20,1.000
17 | 16,20,1.000
18 | 17,20,1.000
19 | 18,20,1.000
20 | 19,20,1.000
21 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputEvidence.csv:
--------------------------------------------------------------------------------
1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE
2 | 2,NA,-4856.632930,-4848.323781,-4851.489523,-4848.881320,-4854.073068,-4851.880124,1.568297,-4899.476073,-4873.733718,-4968.378324,-4834.051540,-4888.401052,-4892.808141,21.898525,-4857.999632,0.186050
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputEvidenceDetails.csv:
--------------------------------------------------------------------------------
1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20
2 | 2,-4642.025324,-4642.131637,-4646.324901,-4637.938826,-4642.016869,514.901498,463.204162,644.106847,392.225429,492.768366,-4871.698511,-4871.568007,-4870.901184,-4869.884613,-4869.304794,-4868.322469,-4866.607574,-4865.781276,-4865.899430,-4864.714164,-4861.072693,-4859.566166,-4856.120337,-4854.334843,-4858.785823,-4844.267226,-4838.273490,-4834.594181,-4831.627192,-4829.036598,0.059240,0.052458,0.053758,0.069849,0.055874,0.057787,0.838348,0.484864,0.061589,0.062912,0.580284,0.550482,0.867754,2.915845,0.075564,1.005823,0.454265,0.415732,0.478739,0.216293
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputEvidenceNormalised.csv:
--------------------------------------------------------------------------------
1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL
2 | 2,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputLog.txt:
--------------------------------------------------------------------------------
1 | ------------------------------------------
2 | MAVERICK
3 | by Robert Verity and Richard A. Nichols
4 | Version 1.0.4 (30 November 2016)
5 | accessed Wed Mar 8 14:54:27 2017
6 | ------------------------------------------
7 |
8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt
9 |
10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure
11 |
12 | Parameters taking default values
13 | fixLabels_on = true
14 | mainThinning = 1
15 | outputComparisonStatistics_on = false
16 | outputEvanno_on = false
17 | outputEvidenceNormalised_on = true
18 | outputMaxLike_admixFreqs_on = false
19 | outputMaxLike_alleleFreqs_on = false
20 | outputPosteriorGrouping_on = false
21 | outputQmatrixError_gene_on = false
22 | outputQmatrix_structureFormat_on = false
23 | suppressWarning1_on = false
24 | thermodynamicThinning = 1
25 |
26 | Parameters read in from file
27 | EMalgorithm_on = f
28 | EMiterations = 100
29 | EMrepeats = 100
30 | admix_on = t
31 | alpha = 1.0
32 | alphaPropSD = 0.10
33 | exhaustive_on = f
34 | fixAlpha_on = f
35 | headerRow_on = t
36 | mainBurnin = 500
37 | mainRepeats = 5
38 | mainSamples = 4000
39 | missingData = -9
40 | outputEvidenceDetails_on = t
41 | outputEvidence_on = t
42 | outputLikelihood_on = t
43 | outputLog_on = t
44 | outputQmatrixError_ind_on = t
45 | outputQmatrixError_pop_on = t
46 | outputQmatrix_gene_on = t
47 | outputQmatrix_ind_on = t
48 | outputQmatrix_pop_on = t
49 | ploidy = 2
50 | ploidyCol_on = f
51 | popCol_on = t
52 | thermodynamicBurnin = 1000
53 | thermodynamicRungs = 20
54 | thermodynamicSamples = 5000
55 | thermodynamic_on = t
56 |
57 | Parameters defined on command line
58 | Kmax = 2
59 | Kmin = 2
60 |
61 | Data properties
62 | row 1 = header line
63 | column 1 = individual labels
64 | column 2 = population of origin
65 | unique populations = 19
66 | individuals = 375
67 | loci = 1
68 | alleles per locus = {597}
69 | missing observations = 0 of 750
70 |
71 | -- K=2 ----------------
72 |
73 | Running ordinary MCMC...
74 | analysis 1 of 5
75 | analysis 2 of 5
76 | analysis 3 of 5
77 | analysis 4 of 5
78 | analysis 5 of 5
79 | complete
80 |
81 | Carrying out thermodynamic integration...
82 | power = 0.00
83 | power = 0.05
84 | power = 0.11
85 | power = 0.16
86 | power = 0.21
87 | power = 0.26
88 | power = 0.32
89 | power = 0.37
90 | power = 0.42
91 | power = 0.47
92 | power = 0.53
93 | power = 0.58
94 | power = 0.63
95 | power = 0.68
96 | power = 0.74
97 | power = 0.79
98 | power = 0.84
99 | power = 0.89
100 | power = 0.95
101 | power = 1.00
102 | complete
103 |
104 | Estimates of (log) model evidence...
105 |
106 | Harmonic mean (averaged over 5 runs)
107 | estimate: -4851.880124
108 | standard error: 1.568297
109 |
110 | Structure estimator (averaged over 5 runs)
111 | estimate: -4892.808141
112 | standard error: 21.898525
113 |
114 | Thermodynamic integral estimator
115 | estimate: -4857.999632
116 | standard error: 0.186050
117 |
118 | Program completed in approximately 76 seconds
119 | Output written to: //home/francisco/aaa/K2/
120 | ------------------------------------------
121 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputQmatrixError_pop_K2.csv:
--------------------------------------------------------------------------------
1 | given_population,individuals,deme1,deme2
2 | 1,20,0.001399,0.001399
3 | 2,20,0.002001,0.002001
4 | 3,20,0.001360,0.001360
5 | 4,19,0.001139,0.001139
6 | 5,20,0.000964,0.000964
7 | 6,20,0.002221,0.002221
8 | 7,20,0.002534,0.002534
9 | 8,20,0.001788,0.001788
10 | 9,19,0.002095,0.002095
11 | 10,20,0.001570,0.001570
12 | 11,20,0.003051,0.003051
13 | 12,19,0.001470,0.001470
14 | 13,18,0.001024,0.001024
15 | 14,20,0.002149,0.002149
16 | 15,20,0.001915,0.001915
17 | 16,20,0.002162,0.002162
18 | 17,20,0.001749,0.001749
19 | 18,20,0.007064,0.007064
20 | 19,20,0.006483,0.006483
21 |
--------------------------------------------------------------------------------
/tests/files/mav_K2/outputQmatrix_pop_K2.csv:
--------------------------------------------------------------------------------
1 | given_population,members,deme1,deme2
2 | 1,20,0.515,0.485
3 | 2,20,0.514,0.486
4 | 3,20,0.518,0.482
5 | 4,19,0.514,0.486
6 | 5,20,0.515,0.485
7 | 6,20,0.514,0.486
8 | 7,20,0.520,0.480
9 | 8,20,0.516,0.484
10 | 9,19,0.514,0.486
11 | 10,20,0.516,0.484
12 | 11,20,0.520,0.480
13 | 12,19,0.514,0.486
14 | 13,18,0.515,0.485
15 | 14,20,0.513,0.487
16 | 15,20,0.521,0.479
17 | 16,20,0.515,0.485
18 | 17,20,0.514,0.486
19 | 18,20,0.211,0.789
20 | 19,20,0.232,0.768
21 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputEvidence.csv:
--------------------------------------------------------------------------------
1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE
2 | 3,NA,-4812.193788,-4811.617486,-4816.869374,-4811.804987,-4815.077819,-4813.512691,1.047962,-4805.251200,-4828.874173,-4877.958287,-4827.789834,-4812.903396,-4830.555378,12.667684,-4826.325657,0.233016
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputEvidenceDetails.csv:
--------------------------------------------------------------------------------
1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20
2 | 3,-4559.252409,-4562.072614,-4563.546862,-4560.779604,-4559.438919,491.997583,533.603118,628.822851,534.020459,506.928953,-4844.015929,-4842.788256,-4841.668817,-4840.246323,-4840.619825,-4839.634357,-4837.084935,-4834.745895,-4833.497700,-4831.799614,-4828.558782,-4826.749753,-4826.112989,-4819.417419,-4812.081612,-4809.074651,-4803.286714,-4797.179265,-4820.155460,-4786.954286,0.056233,0.061409,0.097444,0.678973,0.060120,0.060439,0.690001,0.499692,1.448793,1.576579,1.533912,1.613603,1.840289,1.758141,0.373402,1.039759,0.565020,0.893233,0.108332,0.313486
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputEvidenceNormalised.csv:
--------------------------------------------------------------------------------
1 | K,posterior_exhaustive,posterior_harmonic_mean,posterior_harmonic_LL,posterior_harmonic_UL,posterior_structure_mean,posterior_structure_LL,posterior_structure_UL,posterior_TI_mean,posterior_TI_LL,posterior_TI_UL
2 | 3,NA,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
3 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputLog.txt:
--------------------------------------------------------------------------------
1 | ------------------------------------------
2 | MAVERICK
3 | by Robert Verity and Richard A. Nichols
4 | Version 1.0.4 (30 November 2016)
5 | accessed Wed Mar 8 14:54:27 2017
6 | ------------------------------------------
7 |
8 | Parameters file: //home/francisco/Programming/github/MavericK/parameters.txt
9 |
10 | Data file: //home/francisco/Programming/github/MavericK/Qsuber_sequenom.structure
11 |
12 | Parameters taking default values
13 | fixLabels_on = true
14 | mainThinning = 1
15 | outputComparisonStatistics_on = false
16 | outputEvanno_on = false
17 | outputEvidenceNormalised_on = true
18 | outputMaxLike_admixFreqs_on = false
19 | outputMaxLike_alleleFreqs_on = false
20 | outputPosteriorGrouping_on = false
21 | outputQmatrixError_gene_on = false
22 | outputQmatrix_structureFormat_on = false
23 | suppressWarning1_on = false
24 | thermodynamicThinning = 1
25 |
26 | Parameters read in from file
27 | EMalgorithm_on = f
28 | EMiterations = 100
29 | EMrepeats = 100
30 | admix_on = t
31 | alpha = 1.0
32 | alphaPropSD = 0.10
33 | exhaustive_on = f
34 | fixAlpha_on = f
35 | headerRow_on = t
36 | mainBurnin = 500
37 | mainRepeats = 5
38 | mainSamples = 4000
39 | missingData = -9
40 | outputEvidenceDetails_on = t
41 | outputEvidence_on = t
42 | outputLikelihood_on = t
43 | outputLog_on = t
44 | outputQmatrixError_ind_on = t
45 | outputQmatrixError_pop_on = t
46 | outputQmatrix_gene_on = t
47 | outputQmatrix_ind_on = t
48 | outputQmatrix_pop_on = t
49 | ploidy = 2
50 | ploidyCol_on = f
51 | popCol_on = t
52 | thermodynamicBurnin = 1000
53 | thermodynamicRungs = 20
54 | thermodynamicSamples = 5000
55 | thermodynamic_on = t
56 |
57 | Parameters defined on command line
58 | Kmax = 3
59 | Kmin = 3
60 |
61 | Data properties
62 | row 1 = header line
63 | column 1 = individual labels
64 | column 2 = population of origin
65 | unique populations = 19
66 | individuals = 375
67 | loci = 1
68 | alleles per locus = {597}
69 | missing observations = 0 of 750
70 |
71 | -- K=3 ----------------
72 |
73 | Running ordinary MCMC...
74 | analysis 1 of 5
75 | analysis 2 of 5
76 | analysis 3 of 5
77 | analysis 4 of 5
78 | analysis 5 of 5
79 | complete
80 |
81 | Carrying out thermodynamic integration...
82 | power = 0.00
83 | power = 0.05
84 | power = 0.11
85 | power = 0.16
86 | power = 0.21
87 | power = 0.26
88 | power = 0.32
89 | power = 0.37
90 | power = 0.42
91 | power = 0.47
92 | power = 0.53
93 | power = 0.58
94 | power = 0.63
95 | power = 0.68
96 | power = 0.74
97 | power = 0.79
98 | power = 0.84
99 | power = 0.89
100 | power = 0.95
101 | power = 1.00
102 | complete
103 |
104 | Estimates of (log) model evidence...
105 |
106 | Harmonic mean (averaged over 5 runs)
107 | estimate: -4813.512691
108 | standard error: 1.047962
109 |
110 | Structure estimator (averaged over 5 runs)
111 | estimate: -4830.555378
112 | standard error: 12.667684
113 |
114 | Thermodynamic integral estimator
115 | estimate: -4826.325657
116 | standard error: 0.233016
117 |
118 | Program completed in approximately 96 seconds
119 | Output written to: //home/francisco/aaa/K3/
120 | ------------------------------------------
121 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputQmatrixError_pop_K3.csv:
--------------------------------------------------------------------------------
1 | given_population,individuals,deme1,deme2,deme3
2 | 1,20,0.002078,0.002111,0.001967
3 | 2,20,0.002416,0.002737,0.001468
4 | 3,20,0.002399,0.001352,0.002512
5 | 4,19,0.001307,0.001874,0.001004
6 | 5,20,0.001646,0.001803,0.001779
7 | 6,20,0.001633,0.002033,0.001200
8 | 7,20,0.001752,0.001634,0.002491
9 | 8,20,0.001616,0.001361,0.001761
10 | 9,19,0.002039,0.000818,0.002452
11 | 10,20,0.001869,0.001662,0.001961
12 | 11,20,0.001816,0.002076,0.003254
13 | 12,19,0.001825,0.001169,0.001118
14 | 13,18,0.002402,0.002587,0.001475
15 | 14,20,0.001994,0.001940,0.001300
16 | 15,20,0.001853,0.002396,0.001091
17 | 16,20,0.001925,0.001530,0.002277
18 | 17,20,0.002869,0.001445,0.002779
19 | 18,20,0.002704,0.002997,0.002013
20 | 19,20,0.002641,0.002179,0.000864
21 |
--------------------------------------------------------------------------------
/tests/files/mav_K3/outputQmatrix_pop_K3.csv:
--------------------------------------------------------------------------------
1 | given_population,members,deme1,deme2,deme3
2 | 1,20,0.317,0.345,0.339
3 | 2,20,0.317,0.331,0.353
4 | 3,20,0.315,0.335,0.350
5 | 4,19,0.318,0.343,0.339
6 | 5,20,0.318,0.343,0.339
7 | 6,20,0.316,0.332,0.352
8 | 7,20,0.315,0.348,0.337
9 | 8,20,0.316,0.331,0.352
10 | 9,19,0.318,0.343,0.339
11 | 10,20,0.314,0.349,0.337
12 | 11,20,0.311,0.308,0.381
13 | 12,19,0.319,0.342,0.339
14 | 13,18,0.317,0.345,0.338
15 | 14,20,0.318,0.343,0.338
16 | 15,20,0.312,0.336,0.353
17 | 16,20,0.316,0.348,0.337
18 | 17,20,0.318,0.342,0.340
19 | 18,20,0.705,0.149,0.146
20 | 19,20,0.680,0.158,0.163
21 |
--------------------------------------------------------------------------------
/tests/files/test_merged/outputEvidence.csv:
--------------------------------------------------------------------------------
1 | K,logEvidence_exhaustive,logEvidence_harmonic_rep1,logEvidence_harmonic_rep2,logEvidence_harmonic_rep3,logEvidence_harmonic_rep4,logEvidence_harmonic_rep5,logEvidence_harmonic_grandMean,logEvidence_harmonic_grandSE,logEvidence_structure_rep1,logEvidence_structure_rep2,logEvidence_structure_rep3,logEvidence_structure_rep4,logEvidence_structure_rep5,logEvidence_structure_grandMean,logEvidence_structure_grandSE,logEvidence_TI,logEvidence_TI_SE
2 | 1,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,-4893.197483,-4895.314475,-4895.199946,-4896.925829,-4895.213564,-4895.170259,0.591499,-4950.068622,0.000000
3 | 2,NA,-4856.632930,-4848.323781,-4851.489523,-4848.881320,-4854.073068,-4851.880124,1.568297,-4899.476073,-4873.733718,-4968.378324,-4834.051540,-4888.401052,-4892.808141,21.898525,-4857.999632,0.186050
4 | 3,NA,-4812.193788,-4811.617486,-4816.869374,-4811.804987,-4815.077819,-4813.512691,1.047962,-4805.251200,-4828.874173,-4877.958287,-4827.789834,-4812.903396,-4830.555378,12.667684,-4826.325657,0.233016
5 |
--------------------------------------------------------------------------------
/tests/files/test_merged/outputEvidenceDetails.csv:
--------------------------------------------------------------------------------
1 | K,structure_loglike_mean_rep1,structure_loglike_mean_rep2,structure_loglike_mean_rep3,structure_loglike_mean_rep4,structure_loglike_mean_rep5,structure_loglike_var_rep1,structure_loglike_var_rep2,structure_loglike_var_rep3,structure_loglike_var_rep4,structure_loglike_var_rep5,TIpoint_mean_rung1,TIpoint_mean_rung2,TIpoint_mean_rung3,TIpoint_mean_rung4,TIpoint_mean_rung5,TIpoint_mean_rung6,TIpoint_mean_rung7,TIpoint_mean_rung8,TIpoint_mean_rung9,TIpoint_mean_rung10,TIpoint_mean_rung11,TIpoint_mean_rung12,TIpoint_mean_rung13,TIpoint_mean_rung14,TIpoint_mean_rung15,TIpoint_mean_rung16,TIpoint_mean_rung17,TIpoint_mean_rung18,TIpoint_mean_rung19,TIpoint_mean_rung20,TIpoint_SE_rung1,TIpoint_SE_rung2,TIpoint_SE_rung3,TIpoint_SE_rung4,TIpoint_SE_rung5,TIpoint_SE_rung6,TIpoint_SE_rung7,TIpoint_SE_rung8,TIpoint_SE_rung9,TIpoint_SE_rung10,TIpoint_SE_rung11,TIpoint_SE_rung12,TIpoint_SE_rung13,TIpoint_SE_rung14,TIpoint_SE_rung15,TIpoint_SE_rung16,TIpoint_SE_rung17,TIpoint_SE_rung18,TIpoint_SE_rung19,TIpoint_SE_rung20
2 | 1,-4837.237055,-4837.352752,-4837.247417,-4837.566108,-4837.195471,111.920856,115.923445,115.905059,118.719441,116.036186,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,-4950.068622,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3 | 2,-4642.025324,-4642.131637,-4646.324901,-4637.938826,-4642.016869,514.901498,463.204162,644.106847,392.225429,492.768366,-4871.698511,-4871.568007,-4870.901184,-4869.884613,-4869.304794,-4868.322469,-4866.607574,-4865.781276,-4865.899430,-4864.714164,-4861.072693,-4859.566166,-4856.120337,-4854.334843,-4858.785823,-4844.267226,-4838.273490,-4834.594181,-4831.627192,-4829.036598,0.059240,0.052458,0.053758,0.069849,0.055874,0.057787,0.838348,0.484864,0.061589,0.062912,0.580284,0.550482,0.867754,2.915845,0.075564,1.005823,0.454265,0.415732,0.478739,0.216293
4 | 3,-4559.252409,-4562.072614,-4563.546862,-4560.779604,-4559.438919,491.997583,533.603118,628.822851,534.020459,506.928953,-4844.015929,-4842.788256,-4841.668817,-4840.246323,-4840.619825,-4839.634357,-4837.084935,-4834.745895,-4833.497700,-4831.799614,-4828.558782,-4826.749753,-4826.112989,-4819.417419,-4812.081612,-4809.074651,-4803.286714,-4797.179265,-4820.155460,-4786.954286,0.056233,0.061409,0.097444,0.678973,0.060120,0.060439,0.690001,0.499692,1.448793,1.576579,1.533912,1.613603,1.840289,1.758141,0.373402,1.039759,0.565020,0.893233,0.108332,0.313486
5 |
--------------------------------------------------------------------------------
/tests/maverick_field_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2017 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | LightGreen='\033[1;32m'
21 | NoColor='\033[0m'
22 |
23 | echo "Runnig MavericK 'field test'. This will simulate a full wrapped run on small test data."
24 |
25 | git_dir=$(pwd)
26 | maverick_bin=$(which MavericK)
27 | structure_threader_exec=$(which structure_threader)
28 |
29 | ${structure_threader_exec} run -i ${git_dir}/tests/smalldata/Reduced_dataset.structure -o ~/results -mv ${maverick_bin} -K 3 -t 4 --params ${git_dir}/tests/smalldata/parameters.txt
30 |
31 | echo -e "${LightGreen}MavericK 'Field test' ran successfully. Yay!${NoColor}"
32 |
--------------------------------------------------------------------------------
/tests/mockups.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2017 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 |
19 | class Arguments():
20 | """
21 | Bogus class to work a mock for the "args" attributes from argparse.
22 | """
23 | def __init__(self):
24 | self.external_prog = "EP"
25 | self.infile = "IF"
26 | self.outpath = ""
27 | self.params = "smalldata/parameters.txt"
28 | self.notests = False
29 | self.k_list = [2, 3, 4, 5]
30 | self.extra_options = "--prior=logistic"
31 |
--------------------------------------------------------------------------------
/tests/sanity_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2016 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import os
19 | import pytest
20 | import structure_threader.sanity_checks.sanity as sc
21 |
22 |
23 | def test_cpu_checker():
24 | """
25 | Tests if cpu_checker() is working correctlly.
26 | """
27 | assert sc.cpu_checker(1) == 1
28 | assert sc.cpu_checker(os.cpu_count() + 1) == os.cpu_count()
29 |
30 |
31 | def test_file_checker(tmpdir):
32 | """
33 | Tests if file_checker() is working correctlly.
34 | """
35 | testdir = tmpdir.mkdir("sub")
36 | testfile = testdir.join("filetest.txt")
37 | testfile.write("content")
38 |
39 | # Correctly check for a file
40 | assert sc.file_checker(str(testfile)) is None
41 | # Correctlly check for a directory
42 | assert sc.file_checker(str(testdir), is_file=False) is None
43 | # Check for a file, but given a dir
44 | with pytest.raises(SystemExit):
45 | sc.file_checker(str(testdir))
46 | # Check for a dir, but given a file
47 | with pytest.raises(SystemExit):
48 | sc.file_checker(str(testfile), is_file=False)
49 | # Chck for a file and provided with a wrong path
50 | with pytest.raises(SystemExit):
51 | sc.file_checker(str(testfile) + "a")
52 |
--------------------------------------------------------------------------------
/tests/smalldata/BigTestData.bed.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/tests/smalldata/BigTestData.bed.tar.xz
--------------------------------------------------------------------------------
/tests/smalldata/BigTestData.str.tar.xz:
--------------------------------------------------------------------------------
1 | ../../PTS/data/BigTestData.str.tar.xz
--------------------------------------------------------------------------------
/tests/smalldata/BigTestData.vcf.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuntsPT/Structure_threader/0ec858227d5670c36dc2cc924fd63bc868d4bf03/tests/smalldata/BigTestData.vcf.tar.xz
--------------------------------------------------------------------------------
/tests/smalldata/Reduced_dataset.structure:
--------------------------------------------------------------------------------
1 | ../../PTS/data/Reduced_dataset.structure
--------------------------------------------------------------------------------
/tests/smalldata/extraparams:
--------------------------------------------------------------------------------
1 | ../../PTS/data/extraparams
--------------------------------------------------------------------------------
/tests/smalldata/indfile.txt:
--------------------------------------------------------------------------------
1 | ../../TestData/indfile.txt
--------------------------------------------------------------------------------
/tests/smalldata/mainparams:
--------------------------------------------------------------------------------
1 | ../../PTS/data/mainparams
--------------------------------------------------------------------------------
/tests/smalldata/parameters.txt:
--------------------------------------------------------------------------------
1 | ../../TestData/parameters.txt
--------------------------------------------------------------------------------
/tests/smalldata/parameters_a.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 4
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 0.9,0.8,0.7,0.6
15 | alphaPropSD 0.10
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 500
22 | mainSamples 4000
23 |
24 | thermodynamic_on t
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 1000
27 | thermodynamicSamples 5000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 |
--------------------------------------------------------------------------------
/tests/smalldata/parameters_a_as.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 4
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 0.9,0.8,0.7,0.6
15 | alphaPropSD 0.09,0.08,0.07,0.06
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 500
22 | mainSamples 4000
23 |
24 | thermodynamic_on t
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 1000
27 | thermodynamicSamples 5000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 |
--------------------------------------------------------------------------------
/tests/smalldata/parameters_as.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 4
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 1.0
15 | alphaPropSD 0.09,0.08,0.07,0.06
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 500
22 | mainSamples 4000
23 |
24 | thermodynamic_on t
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 1000
27 | thermodynamicSamples 5000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 |
--------------------------------------------------------------------------------
/tests/smalldata/parameters_f.txt:
--------------------------------------------------------------------------------
1 | #### Data proprieties
2 | headerRow_on t
3 | popCol_on t
4 | ploidyCol_on f
5 | ploidy 2
6 | missingData -9
7 |
8 |
9 | #### Model parameters
10 | Kmin 1
11 | Kmax 4
12 | admix_on t
13 | fixAlpha_on f
14 | alpha 1.0
15 | alphaPropSD 0.10
16 |
17 |
18 | #### Simulation parameters
19 | exhaustive_on f
20 | mainRepeats 5
21 | mainBurnin 500
22 | mainSamples 4000
23 |
24 | thermodynamic_on f
25 | thermodynamicRungs 20
26 | thermodynamicBurnin 1000
27 | thermodynamicSamples 5000
28 |
29 |
30 | #### Basic output proprieties
31 | outputLog_on t
32 | outputLikelihood_on t
33 | outputQmatrix_ind_on t
34 | outputQmatrix_pop_on t
35 | outputEvidence_on t
36 | outputEvidenceDetails_on t
37 |
38 |
39 | #### Output location
40 |
--------------------------------------------------------------------------------
/tests/structure_field_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016-2022 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | set -e
19 |
20 | LightGreen='\033[1;32m'
21 | NoColor='\033[0m'
22 |
23 | echo "Runnig STRUCTURE 'field test'. This will simulate a full wrapped run on small test data."
24 |
25 | git_dir=$(pwd)
26 | str_bin=$(which structure)
27 | structure_threader_exec=$(which structure_threader)
28 |
29 | ${structure_threader_exec} run -i "${git_dir}/tests/smalldata/Reduced_dataset.structure" -o ~/results -st "${str_bin}" -K 3 -t 4 -R 5 --params "${git_dir}/tests/smalldata/mainparams"
30 |
31 | echo -e "${LightGreen}STRUCTURE 'Field test' ran successfully. Yay!${NoColor}"
32 |
--------------------------------------------------------------------------------
/tests/structure_function_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2017-2018 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
18 | import pytest
19 | import mockups
20 | import structure_threader.wrappers.structure_wrapper as sw
21 |
22 |
23 | def test_str_cli_generator():
24 | """
25 | Tests if str_cli_generator() is working correctlly.
26 | """
27 | # Define arguments
28 | arg = mockups.Arguments()
29 | k_val = 4
30 | outfile = "str_K4_rep1"
31 | arg.params = None
32 | seed = None
33 |
34 | mock_cli = ["EP", "-K", str(k_val), "-i", "IF", "-o", outfile]
35 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed)
36 |
37 | assert returned_cli == mock_cli
38 | assert returned_outfile == outfile
39 |
40 | # Add a seed to the test
41 | seed = "1234"
42 | mock_cli += ["-D", "1234"]
43 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed)
44 |
45 | assert returned_cli == mock_cli
46 | assert returned_outfile == outfile
47 |
48 | # Add an argument to the test
49 | arg.params = "test"
50 |
51 | mock_cli += arg.params
52 | returned_cli, returned_outfile = sw.str_cli_generator(arg, k_val, 1, seed)
53 |
54 | assert returned_cli == mock_cli
55 | assert returned_outfile == outfile
56 |
57 |
58 | def test_str_param_checker():
59 | """
60 | Tests if the STRUCTURE parameter file checker is working.
61 | """
62 | arg = mockups.Arguments()
63 | arg.infile = "smalldata/Reduced_dataset.structure"
64 | arg.params = "mainparams"
65 | sw.str_param_checker(arg)
66 | assert arg.params == ["-m", "mainparams", "-e", "extraparams"]
67 |
68 |
69 | def test_seed_generator():
70 | """
71 | Tests if the Seed generator is working as intended.
72 | """
73 | k_list = [1, 2]
74 | replicates = [1, 2, 3]
75 | seed = 1235813
76 |
77 | mock_jobs = [(2, 3), (2, 2), (2, 1), (1, 3), (1, 2), (1, 1)]
78 | mock_jobs = [("2153978", 2, 3), ("940261", 2, 2), ("8867621", 2, 1),
79 | ("786598", 1, 3), ("3922463", 1, 2), ("6870574", 1, 1)]
80 | returned_jobs = sw.seed_generator(seed, k_list, replicates)
81 | assert returned_jobs == mock_jobs
82 |
83 | # Use a different seed
84 | seed = 42
85 |
86 | mock_jobs = [("1867825", 2, 3), ("419610", 2, 2), ("4614226", 2, 1),
87 | ("4108603", 1, 3), ("3744854", 1, 2), ("2341057", 1, 1)]
88 | returned_jobs = sw.seed_generator(seed, k_list, replicates)
89 | assert returned_jobs == mock_jobs
90 |
--------------------------------------------------------------------------------
/tests/wrapper_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright 2017 Francisco Pina Martins
4 | # This file is part of structure_threader.
5 | # structure_threader is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 |
10 | # structure_threader is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License
16 | # along with structure_threader. If not, see .
17 |
--------------------------------------------------------------------------------