├── .devcontainer
├── DockerFile
└── devcontainer.json
├── .github
├── actions
│ ├── build-iqtree
│ │ └── action.yml
│ └── setup-piqtree
│ │ └── action.yml
├── dependabot.yml
└── workflows
│ ├── add_source_to_release.yml
│ ├── build_container.yml
│ ├── build_docs.yml
│ ├── build_wheels.yml
│ ├── ci.yml
│ └── release.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── build_tools
├── before_all_linux.sh
├── before_all_mac.sh
├── before_all_windows.sh
└── build_iqtree.sh
├── changelog.d
├── 20250521_122741_robert.mcarthur_windows_support.md
├── 20250522_113423_robert.mcarthur_nj_tree_nans.md
├── README.md
└── templates
│ ├── new.md.j2
│ ├── new.rst.j2
│ └── title.md.j2
├── changelog.md
├── docs
├── api
│ ├── genetic_distance
│ │ └── jc_distances.md
│ ├── index.md
│ ├── model
│ │ ├── FreqType.md
│ │ ├── Model.md
│ │ ├── ModelFinderResult.md
│ │ ├── RateModel.md
│ │ ├── SubstitutionModel.md
│ │ ├── make_model.md
│ │ └── model_finder.md
│ ├── tree
│ │ ├── build_tree.md
│ │ ├── fit_tree.md
│ │ ├── nj_tree.md
│ │ └── random_trees.md
│ └── tree_distance
│ │ └── robinson_foulds.md
├── apps
│ ├── app_pipeline.py
│ ├── available_help.py
│ ├── fit_tree.py
│ ├── model_finder.py
│ ├── nj.py
│ ├── options.py
│ ├── pairwise_dist.py
│ └── phylo.py
├── citation.md
├── developers
│ ├── contributing.md
│ ├── environment_setup.md
│ ├── index.md
│ ├── iqtree_submodule.md
│ ├── release.md
│ ├── testing.md
│ └── troubleshooting.md
├── index.md
└── quickstart
│ ├── calculate_jc_distances.md
│ ├── calculate_rf_distances.md
│ ├── construct_ml_tree.md
│ ├── construct_nj_tree.md
│ ├── fit_tree_topology.md
│ ├── index.md
│ ├── make_random_trees.md
│ ├── using_model_finder.md
│ └── using_substitution_models.md
├── mkdocs.yml
├── noxfile.py
├── pyproject.toml
├── rtd_get_docs.py
├── setup.py
├── src
└── piqtree
│ ├── __init__.py
│ ├── _app
│ └── __init__.py
│ ├── _data.py
│ ├── _libiqtree
│ ├── README.txt
│ ├── _piqtree.cpp
│ └── _piqtree.h
│ ├── exceptions.py
│ ├── iqtree
│ ├── __init__.py
│ ├── _decorator.py
│ ├── _jc_distance.py
│ ├── _model_finder.py
│ ├── _random_tree.py
│ ├── _robinson_foulds.py
│ └── _tree.py
│ ├── model
│ ├── __init__.py
│ ├── _freq_type.py
│ ├── _model.py
│ ├── _options.py
│ ├── _rate_type.py
│ └── _substitution_model.py
│ └── py.typed
└── tests
├── conftest.py
├── data
└── example.fasta
├── test_app
├── test_app.py
└── test_pickle.py
├── test_data.py
├── test_iqtree
├── test_build_tree.py
├── test_distance.py
├── test_fit_tree.py
├── test_model_finder.py
├── test_nj_tree.py
├── test_random_trees.py
├── test_robinson_foulds.py
├── test_segmentation_fault.py
└── test_tree_yaml.py
└── test_model
├── test_freq_type.py
├── test_model.py
├── test_options.py
├── test_rate_type.py
└── test_substitution_model.py
/.devcontainer/DockerFile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 |
3 | # Install dependencies
4 | RUN apt-get update && apt-get install -y \
5 | build-essential \
6 | btop \
7 | cmake \
8 | gdb \
9 | gh \
10 | git \
11 | zsh \
12 | sudo \
13 | wget \
14 | libeigen3-dev \
15 | libboost-all-dev \
16 | binutils \
17 | && rm -rf /var/lib/apt/lists/*
18 |
19 | # Set up a non-root user with sudo
20 | RUN useradd -ms /bin/zsh user && echo "user ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/user
21 |
22 | USER user
23 | WORKDIR /home/user/repos
24 |
25 | # Install Oh My Zsh
26 | RUN sh -c "$(wget https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh -O -)" "" --unattended
27 |
28 | # Install zsh autosuggestions
29 | RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
30 |
31 | # Install zsh syntax highlighting
32 | RUN git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
33 |
34 | # Configure zsh
35 | RUN sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions zsh-syntax-highlighting)/' ~/.zshrc
36 |
37 | # Create a new conda environment with Python 3.12 and fix the bug with VS-code terminal showing 2 conda env prompts
38 | RUN . /opt/conda/etc/profile.d/conda.sh && \
39 | conda config --set auto_activate_base False && \
40 | /opt/conda/bin/conda create -n piq-dev python=3.12 -y && \
41 | conda activate piq-dev
42 |
43 | # Initialize conda for zsh shell
44 | RUN echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.zshrc
45 |
46 | # Install pybind11 in the conda environment
47 | RUN . /opt/conda/etc/profile.d/conda.sh && \
48 | conda activate piq-dev && \
49 | pip install pybind11
50 |
51 | # Set up zsh as the default shell
52 | SHELL ["/bin/zsh", "-c"]
53 |
54 | # Set zsh as the default shell for the container
55 | CMD [ "zsh" ]
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "piqtree devcontainer",
3 | "image": "ghcr.io/iqtree/piqtree:latest",
4 | "customizations": {
5 | "vscode": {
6 | "settings": {
7 | "terminal.integrated.shell.linux": "/bin/zsh",
8 | "terminal.integrated.profiles.linux": {
9 | "zsh": {
10 | "path": "zsh"
11 | }
12 | },
13 | "python.defaultInterpreterPath": "/home/user/.conda/envs/piq-dev/bin/python",
14 | "C_Cpp.default.includePath": [
15 | "${workspaceFolder}/**",
16 | "/home/user/.conda/envs/piq-dev/lib/python3.12/site-packages/pybind11/include/",
17 | "/home/user/.conda/envs/piq-dev/include/python3.12/"
18 | ]
19 | },
20 | "extensions": [
21 | "ms-python.python",
22 | "ms-toolsai.jupyter",
23 | "ms-vscode.cpptools",
24 | "ms-vscode.cpptools-extension-pack",
25 | "ms-vscode.makefile-tools",
26 | "charliermarsh.ruff",
27 | "njpwerner.autodocstring",
28 | "github.vscode-github-actions",
29 | "tamasfe.even-better-toml",
30 | "DavidAnson.vscode-markdownlint",
31 | "be5invis.toml",
32 | "mhutchie.git-graph"
33 | ]
34 | }
35 | },
36 | "remoteUser": "user",
37 | "postCreateCommand": "sudo chown -R user:user /home/user/.ssh && git config --global --add safe.directory ${containerWorkspaceFolder}"
38 | }
--------------------------------------------------------------------------------
/.github/actions/build-iqtree/action.yml:
--------------------------------------------------------------------------------
1 | name: Fetch or Build IQ-TREE Static Library
2 | description: "Checks if the IQ-TREE static library exists, if not then building it."
3 | inputs:
4 | os:
5 | description: "Runner OS Name."
6 | required: true
7 | outputs:
8 | iqtree-sha:
9 | description: "SHA for commit of IQ-TREE static library."
10 | value: ${{ steps.iqtree-sha.outputs.iqtree-sha }}
11 | runs:
12 | using: composite
13 | steps:
14 | - id: iqtree-sha
15 | name: Get IQ-TREE SHA
16 | shell: bash
17 | run: |
18 | cd iqtree2
19 | IQ_TREE_SHA=$(git rev-parse HEAD)
20 | echo "iqtree-sha=${IQ_TREE_SHA}" >> "$GITHUB_OUTPUT"
21 |
22 | - name: Cache IQ-TREE (Windows)
23 | if: runner.os == 'Windows'
24 | uses: actions/cache@v4
25 | id: cache-windows
26 | with:
27 | key: libiqtree-${{ inputs.os }}-${{ steps.iqtree-sha.outputs.iqtree-sha }}
28 | path: |
29 | src/piqtree/_libiqtree/iqtree2.lib
30 | src/piqtree/_libiqtree/iqtree2.dll
31 | lookup-only: true
32 |
33 | - name: Cache IQ-TREE (Linux/macOS)
34 | if: runner.os != 'Windows'
35 | uses: actions/cache@v4
36 | id: cache-unix
37 | with:
38 | key: libiqtree-${{ inputs.os }}-${{ steps.iqtree-sha.outputs.iqtree-sha }}
39 | path: |
40 | src/piqtree/_libiqtree/libiqtree2.a
41 | lookup-only: true
42 |
43 | - name: Combine Cache Hits
44 | id: cache
45 | shell: bash
46 | run: |
47 | if [[ "${{ steps.cache-windows.outputs.cache-hit }}" == 'true' || "${{ steps.cache-unix.outputs.cache-hit }}" == 'true' ]]; then
48 | echo "cache-hit=true" >> "$GITHUB_OUTPUT"
49 | else
50 | echo "cache-hit=false" >> "$GITHUB_OUTPUT"
51 | fi
52 |
53 | - name: Install Boost
54 | if: runner.os == 'Windows' && steps.cache.outputs.cache-hit != 'true'
55 | uses: MarkusJx/install-boost@v2.5.0
56 | id: install-boost
57 | with:
58 | boost_version: 1.84.0
59 | platform_version: 2022
60 | toolset: mingw
61 |
62 | - name: Set Boost Environment Variables
63 | if: runner.os == 'Windows' && steps.cache.outputs.cache-hit != 'true'
64 | shell: bash
65 | run: |
66 | echo "Boost_INCLUDE_DIR=${{ steps.install-boost.outputs.BOOST_ROOT }}/include" >> "$GITHUB_ENV"
67 | echo "Boost_LIBRARY_DIRS=${{ steps.install-boost.outputs.BOOST_ROOT }}/lib" >> "$GITHUB_ENV"
68 |
69 | - name: Setup MSVC Developer Command Prompt
70 | if: runner.os == 'Windows' && steps.cache.outputs.cache-hit != 'true'
71 | uses: ilammy/msvc-dev-cmd@v1
72 |
73 | - name: Build IQ-TREE
74 | shell: bash
75 | if: steps.cache.outputs.cache-hit != 'true'
76 | run: |
77 | if [[ "${{ runner.os }}" == "Linux" ]]; then
78 | sudo ./build_tools/before_all_linux.sh
79 | elif [[ "${{ runner.os }}" == "macOS" ]]; then
80 | ./build_tools/before_all_mac.sh
81 | elif [[ "${{ runner.os }}" == "Windows" ]]; then
82 | ./build_tools/before_all_windows.sh
83 | else
84 | echo "Unrecognized OS: '${{ inputs.os }}'."
85 | exit 1
86 | fi
--------------------------------------------------------------------------------
/.github/actions/setup-piqtree/action.yml:
--------------------------------------------------------------------------------
1 | name: Setup piqtree
2 | description: "Setup Python and Fetch Static Library."
3 | inputs:
4 | python-version:
5 | description: "Python version."
6 | required: true
7 | cache-key:
8 | description: "Key for static library cache."
9 | required: true
10 | runs:
11 | using: composite
12 | steps:
13 | - uses: actions/setup-python@v5
14 | with:
15 | python-version: ${{ inputs.python-version }}
16 |
17 | - name: Cache IQ-TREE (Windows)
18 | if: runner.os == 'Windows'
19 | uses: actions/cache/restore@v4
20 | id: cache-windows
21 | with:
22 | key: ${{ inputs.cache-key }}
23 | path: |
24 | src/piqtree/_libiqtree/iqtree2.lib
25 | src/piqtree/_libiqtree/iqtree2.dll
26 | fail-on-cache-miss: true
27 |
28 | - name: Cache IQ-TREE (Linux/macOS)
29 | if: runner.os != 'Windows'
30 | uses: actions/cache/restore@v4
31 | id: cache-unix
32 | with:
33 | key: ${{ inputs.cache-key }}
34 | path: |
35 | src/piqtree/_libiqtree/libiqtree2.a
36 | fail-on-cache-miss: true
37 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 | open-pull-requests-limit: 10
13 |
14 | - package-ecosystem: "github-actions"
15 | directory: "/"
16 | schedule:
17 | interval: "weekly"
18 | open-pull-requests-limit: 10
19 | - package-ecosystem: "github-actions"
20 | directory: "/.github/actions/build-iqtree/"
21 | schedule:
22 | interval: "weekly"
23 | open-pull-requests-limit: 10
24 | - package-ecosystem: "github-actions"
25 | directory: "/.github/actions/setup-piqtree/"
26 | schedule:
27 | interval: "weekly"
28 | open-pull-requests-limit: 10
29 |
30 | - package-ecosystem: "gitsubmodule"
31 | directory: "/"
32 | schedule:
33 | interval: "daily"
34 |
--------------------------------------------------------------------------------
/.github/workflows/add_source_to_release.yml:
--------------------------------------------------------------------------------
1 | name: Attach Full Source to Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: write
9 |
10 | jobs:
11 | upload-source:
12 | runs-on: ubuntu-latest
13 |
14 | env:
15 | VERSION: ${{ github.ref_name }}
16 | TARBALL: piqtree-${{ github.ref_name }}.tar.gz
17 |
18 |
19 | steps:
20 | - uses: actions/checkout@v4
21 | with:
22 | fetch-depth: 0
23 | submodules: recursive
24 |
25 | - name: Make Tarball
26 | run: |
27 | tar czf "../$TARBALL" \
28 | --transform "s,^\.,piqtree-${VERSION}," \
29 | --exclude-vcs .
30 | mv "../$TARBALL" "$TARBALL"
31 |
32 | - name: Compute SHA256 Checksum
33 | run: sha256sum "$TARBALL" > "$TARBALL.sha256"
34 |
35 | - name: Upload Full Source and SHA256 to GitHub Release
36 | uses: softprops/action-gh-release@v2.2.2
37 | with:
38 | files: |
39 | ${{ env.TARBALL }}
40 | ${{ env.TARBALL }}.sha256
41 | env:
42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/build_container.yml:
--------------------------------------------------------------------------------
1 | name: Build and Push Docker Image to ghcr
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | paths:
9 | - .devcontainer/DockerFile
10 |
11 | permissions:
12 | contents: read
13 | packages: write
14 |
15 | jobs:
16 | build-and-push:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - name: Checkout Repository
20 | uses: actions/checkout@v4
21 |
22 | - name: Login to GitHub Container Registry
23 | run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u iqtree --password-stdin
24 |
25 | - name: Build and Push Base Docker Image
26 | run: |
27 | docker build -f .devcontainer/DockerFile . \
28 | -t ghcr.io/iqtree/piqtree:latest
29 | docker push ghcr.io/iqtree/piqtree:latest
30 |
31 |
--------------------------------------------------------------------------------
/.github/workflows/build_docs.yml:
--------------------------------------------------------------------------------
1 | name: Build Docs
2 |
3 | concurrency:
4 | group: docs-build-${{ github.ref }}
5 | cancel-in-progress: true
6 | on:
7 | workflow_dispatch:
8 | push:
9 | branches:
10 | - main
11 | paths:
12 | - 'docs/**'
13 | - '.readthedocs.yaml'
14 | - 'mkdocs.yml'
15 | - 'rtd_get_docs.py'
16 |
17 | permissions:
18 | contents: read
19 |
20 | jobs:
21 | build-iqtree:
22 | name: Fetch or Build IQ-TREE Static Library
23 | runs-on: ubuntu-latest
24 | steps:
25 | - uses: actions/checkout@v4
26 | with:
27 | fetch-depth: 0
28 | submodules: recursive
29 |
30 | - id: build
31 | uses: ./.github/actions/build-iqtree
32 | with:
33 | os: ubuntu-latest
34 |
35 | outputs:
36 | iqtree-sha: ${{steps.build.outputs.iqtree-sha}}
37 |
38 | build-docs:
39 | runs-on: ubuntu-latest
40 | needs: build-iqtree
41 | steps:
42 | - name: Checkout Repository
43 | uses: actions/checkout@v4
44 | with:
45 | fetch-depth: 0
46 | submodules: recursive
47 |
48 | - uses: ./.github/actions/setup-piqtree
49 | with:
50 | python-version: "3.13"
51 | cache-key: libiqtree-ubuntu-latest-${{ needs.build-iqtree.outputs.iqtree-sha }}
52 |
53 | - name: Install Docs Dependencies
54 | run: |
55 | pip install .[doc]
56 |
57 | - name: Build Documentation
58 | run: |
59 | mkdocs build
60 | working-directory: ${{ github.workspace }}
61 |
62 | - name: Upload Documentation Artifact
63 | uses: actions/upload-artifact@v4
64 | with:
65 | name: piqtree-docs-html
66 | path: site
67 |
--------------------------------------------------------------------------------
/.github/workflows/build_wheels.yml:
--------------------------------------------------------------------------------
1 | name: Build Wheels
2 |
3 | on: [workflow_dispatch]
4 |
5 | permissions:
6 | contents: read
7 |
8 | jobs:
9 | build:
10 | name: Build Wheels for ${{matrix.platform_id}} on ${{matrix.os}}
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | fail-fast: false
14 | matrix:
15 | include:
16 | # manylinux x86_64
17 | - os: ubuntu-latest
18 | platform_id: manylinux_x86_64
19 |
20 | # MacOS x86_64
21 | - os: macos-13
22 | platform_id: macosx_x86_64
23 |
24 | # MacOS arm64
25 | - os: macos-14
26 | platform_id: macosx_arm64
27 |
28 | # Windows x86_64
29 | - os: windows-latest
30 | platform_id: win_amd64
31 |
32 | steps:
33 | - uses: actions/checkout@v4
34 | with:
35 | fetch-depth: 0
36 | submodules: recursive
37 |
38 | - name: Set up QEMU for Linux ARM builds
39 | if: ${{runner.os == 'Linux' && endsWith(matrix.platform_id, '_aarch64')}}
40 | uses: docker/setup-qemu-action@v3
41 | with:
42 | platforms: arm64
43 |
44 | - name: Set macOS Deployment Target
45 | if: runner.os == 'macOS'
46 | run: |
47 | if [[ "${{ matrix.os }}" == "macos-13" ]]; then
48 | echo "MACOSX_DEPLOYMENT_TARGET=13.0" >> $GITHUB_ENV
49 | elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
50 | echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV
51 | fi
52 |
53 | - name: Install Boost
54 | if: runner.os == 'Windows'
55 | uses: MarkusJx/install-boost@v2.5.0
56 | id: install-boost
57 | with:
58 | boost_version: 1.84.0
59 | platform_version: 2022
60 | toolset: mingw
61 |
62 | - name: Setup MSVC Developer Command Prompt
63 | if: runner.os == 'Windows'
64 | uses: ilammy/msvc-dev-cmd@v1
65 |
66 | - name: Build Wheels
67 | uses: pypa/cibuildwheel@v2.23.3
68 | env: # Can specify per os - e.g. CIBW_BEFORE_ALL_LINUX, CIBW_BEFORE_ALL_MACOS, CIBW_BEFORE_ALL_WINDOWS
69 | CIBW_BEFORE_ALL_LINUX: ./build_tools/before_all_linux.sh
70 | CIBW_BEFORE_ALL_MACOS: ./build_tools/before_all_mac.sh
71 | CIBW_BEFORE_ALL_WINDOWS: bash ./build_tools/before_all_windows.sh
72 | CIBW_ENVIRONMENT_WINDOWS: Boost_INCLUDE_DIR='${{ steps.install-boost.outputs.BOOST_ROOT }}/include' Boost_LIBRARY_DIRS='${{ steps.install-boost.outputs.BOOST_ROOT }}/lib'
73 | CIBW_ARCHS_LINUX: ${{endsWith(matrix.platform_id, '_x86_64') && 'x86_64' || 'aarch64'}}
74 | CIBW_ARCHS_MACOS: ${{endsWith(matrix.platform_id, 'universal2') && 'universal2' || 'auto'}}
75 | CIBW_ARCHS_WINDOWS: ${{endsWith(matrix.platform_id, '_amd64') && 'AMD64' || 'ARM64'}}
76 | CIBW_BUILD: "*${{matrix.platform_id}}"
77 | CIBW_TEST_REQUIRES: pytest
78 | CIBW_TEST_COMMAND: pytest {package}/tests
79 | CIBW_TEST_SKIP: "*-macosx_universal2:x86_64" # skip x86 on m1 mac
80 | CIBW_SKIP: pp* # Disable building PyPy wheels on all platforms
81 |
82 | - name: Upload Wheels
83 | uses: actions/upload-artifact@v4
84 | with:
85 | name: cibw-wheels-${{ matrix.platform_id }}
86 | path: ./wheelhouse/*.whl
87 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | concurrency:
4 | group: ${{ github.workflow }}-${{ github.ref }}
5 | cancel-in-progress: true
6 | on:
7 | workflow_dispatch:
8 | schedule:
9 | - cron: "35 4 */6 * *" # Every 6 days.
10 | pull_request:
11 | push:
12 |
13 | permissions:
14 | contents: read
15 |
16 | jobs:
17 | build-iqtree:
18 | name: Fetch or Build IQ-TREE Static Library on ${{matrix.os}}
19 | runs-on: ${{ matrix.os }}
20 | strategy:
21 | matrix:
22 | os: [ubuntu-latest, macos-13, macos-14, windows-latest] # Intel linux, Intel Mac, ARM Mac, Windows
23 |
24 | steps:
25 | - uses: actions/checkout@v4
26 | with:
27 | fetch-depth: 0
28 | submodules: recursive
29 |
30 | - id: build
31 | uses: ./.github/actions/build-iqtree
32 | with:
33 | os: ${{ matrix.os }}
34 |
35 | outputs:
36 | iqtree-sha: ${{steps.build.outputs.iqtree-sha}}
37 |
38 | tests:
39 | name: Run Tests with Python ${{matrix.python-version}} on ${{matrix.os}}
40 | needs: build-iqtree
41 | runs-on: ${{ matrix.os }}
42 | strategy:
43 | matrix:
44 | os: [ubuntu-latest, macos-13, macos-14, windows-latest] # Intel linux, Intel Mac, ARM Mac, Windows
45 | python-version: ["3.11", "3.12", "3.13"]
46 | steps:
47 | - uses: actions/checkout@v4
48 | with:
49 | fetch-depth: 0
50 | submodules: recursive
51 |
52 | - uses: ./.github/actions/setup-piqtree
53 | with:
54 | python-version: ${{ matrix.python-version }}
55 | cache-key: libiqtree-${{ matrix.os }}-${{ needs.build-iqtree.outputs.iqtree-sha }}
56 |
57 | - name: Install llvm (macOS)
58 | if: runner.os == 'macOS'
59 | run: |
60 | brew install llvm
61 |
62 | - name: Install llvm (Windows)
63 | if: runner.os == 'Windows'
64 | run: |
65 | choco install -y llvm --version=14.0.6 --allow-downgrade
66 |
67 | - name: Run Nox Testing
68 | run: |
69 | pip install nox
70 | nox -s test-${{ matrix.python-version }} -- --cov --cov-report=lcov:${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report=term --cov-append --durations=20
71 |
72 | - name: Coveralls Parallel
73 | if: matrix.os != 'macos-13'
74 | uses: coverallsapp/github-action@v2
75 | with:
76 | parallel: true
77 | github-token: ${{secrets.github_token}}
78 | flag-name: run-${{matrix.python-version}}-${{matrix.os}}
79 | file: "${{matrix.os}}-${{matrix.python-version}}.lcov"
80 |
81 | upload-coverage:
82 | name: Finish Coveralls
83 | needs: tests
84 | runs-on: ubuntu-latest
85 | steps:
86 | - name: Coveralls Finished
87 | uses: coverallsapp/github-action@v2
88 | with:
89 | github-token: ${{ secrets.github_token }}
90 | parallel-finished: true
91 |
92 | type_check:
93 | name: Type Check
94 | needs: build-iqtree
95 | runs-on: ${{ matrix.os }}
96 |
97 | strategy:
98 | matrix:
99 | python-version: ["3.13"]
100 | os: [ubuntu-latest]
101 |
102 | steps:
103 | - uses: actions/checkout@v4
104 | with:
105 | fetch-depth: 0
106 |
107 | - uses: ./.github/actions/setup-piqtree
108 | with:
109 | python-version: ${{ matrix.python-version }}
110 | cache-key: libiqtree-${{ matrix.os }}-${{ needs.build-iqtree.outputs.iqtree-sha }}
111 |
112 | - name: "Run Type Checking for ${{ matrix.python-version }}"
113 | run: |
114 | pip install nox
115 | nox -s type_check-${{ matrix.python-version }}
116 |
117 | lint:
118 | name: Linting
119 | needs: build-iqtree
120 | runs-on: ${{ matrix.os }}
121 |
122 | strategy:
123 | matrix:
124 | python-version: ["3.13"]
125 | os: [ubuntu-latest]
126 |
127 | steps:
128 | - uses: actions/checkout@v4
129 | with:
130 | fetch-depth: 0
131 |
132 | - uses: ./.github/actions/setup-piqtree
133 | with:
134 | python-version: ${{ matrix.python-version }}
135 | cache-key: libiqtree-${{ matrix.os }}-${{ needs.build-iqtree.outputs.iqtree-sha }}
136 |
137 | - name: "Run Linting for ${{ matrix.python-version }}"
138 | run: |
139 | pip install nox
140 | nox -s ruff-${{ matrix.python-version }}
141 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on: [workflow_dispatch]
4 |
5 | permissions:
6 | contents: read
7 |
8 | jobs:
9 | build-wheels:
10 | name: Build Wheels for ${{matrix.platform_id}} on ${{matrix.os}}
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | fail-fast: false
14 | matrix:
15 | include:
16 | # manylinux x86_64
17 | - os: ubuntu-latest
18 | platform_id: manylinux_x86_64
19 |
20 | # MacOS x86_64
21 | - os: macos-13
22 | platform_id: macosx_x86_64
23 |
24 | # MacOS arm64
25 | - os: macos-14
26 | platform_id: macosx_arm64
27 |
28 | # Windows x86_64
29 | - os: windows-latest
30 | platform_id: win_amd64
31 |
32 | steps:
33 | - uses: actions/checkout@v4
34 | with:
35 | fetch-depth: 0
36 | submodules: recursive
37 |
38 | - name: Set up QEMU for Linux ARM builds
39 | if: ${{runner.os == 'Linux' && endsWith(matrix.platform_id, '_aarch64')}}
40 | uses: docker/setup-qemu-action@v3
41 | with:
42 | platforms: arm64
43 |
44 | - name: Set macOS Deployment Target
45 | if: runner.os == 'macOS'
46 | run: |
47 | if [[ "${{ matrix.os }}" == "macos-13" ]]; then
48 | echo "MACOSX_DEPLOYMENT_TARGET=13.0" >> $GITHUB_ENV
49 | elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
50 | echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV
51 | fi
52 |
53 | - name: Install Boost
54 | if: runner.os == 'Windows'
55 | uses: MarkusJx/install-boost@v2.5.0
56 | id: install-boost
57 | with:
58 | boost_version: 1.84.0
59 | platform_version: 2022
60 | toolset: mingw
61 |
62 | - name: Setup MSVC Developer Command Prompt
63 | if: runner.os == 'Windows'
64 | uses: ilammy/msvc-dev-cmd@v1
65 |
66 | - name: Build Wheels
67 | uses: pypa/cibuildwheel@v2.23.3
68 | env: # Can specify per os - e.g. CIBW_BEFORE_ALL_LINUX, CIBW_BEFORE_ALL_MACOS, CIBW_BEFORE_ALL_WINDOWS
69 | CIBW_BEFORE_ALL_LINUX: ./build_tools/before_all_linux.sh
70 | CIBW_BEFORE_ALL_MACOS: ./build_tools/before_all_mac.sh
71 | CIBW_BEFORE_ALL_WINDOWS: bash ./build_tools/before_all_windows.sh
72 | CIBW_ENVIRONMENT_WINDOWS: Boost_INCLUDE_DIR='${{ steps.install-boost.outputs.BOOST_ROOT }}/include' Boost_LIBRARY_DIRS='${{ steps.install-boost.outputs.BOOST_ROOT }}/lib'
73 | CIBW_ARCHS_LINUX: ${{endsWith(matrix.platform_id, '_x86_64') && 'x86_64' || 'aarch64'}}
74 | CIBW_ARCHS_MACOS: ${{endsWith(matrix.platform_id, 'universal2') && 'universal2' || 'auto'}}
75 | CIBW_ARCHS_WINDOWS: ${{endsWith(matrix.platform_id, '_amd64') && 'AMD64' || 'ARM64'}}
76 | CIBW_BUILD: "*${{matrix.platform_id}}"
77 | CIBW_TEST_REQUIRES: pytest
78 | CIBW_TEST_COMMAND: pytest {package}/tests
79 | CIBW_TEST_SKIP: "*-macosx_universal2:x86_64" # skip x86 on m1 mac
80 | CIBW_SKIP: pp* # Disable building PyPy wheels on all platforms
81 |
82 | - name: Upload Wheels
83 | uses: actions/upload-artifact@v4
84 | with:
85 | name: cibw-wheels-${{ matrix.platform_id }}
86 | path: ./wheelhouse/*.whl
87 |
88 | build-iqtree:
89 | name: Fetch or Build IQ-TREE Static Library
90 | runs-on: ubuntu-latest
91 | steps:
92 | - uses: actions/checkout@v4
93 | with:
94 | fetch-depth: 0
95 | submodules: recursive
96 |
97 | - id: build
98 | uses: ./.github/actions/build-iqtree
99 | with:
100 | os: ubuntu-latest
101 |
102 | outputs:
103 | iqtree-sha: ${{steps.build.outputs.iqtree-sha}}
104 |
105 | build-docs:
106 | runs-on: ubuntu-latest
107 | needs: build-iqtree
108 | steps:
109 | - name: Checkout Repository
110 | uses: actions/checkout@v4
111 | with:
112 | fetch-depth: 0
113 | submodules: recursive
114 |
115 | - uses: ./.github/actions/setup-piqtree
116 | with:
117 | python-version: "3.13"
118 | cache-key: libiqtree-ubuntu-latest-${{ needs.build-iqtree.outputs.iqtree-sha }}
119 |
120 | - name: Install Docs Dependencies
121 | run: |
122 | pip install .[doc]
123 |
124 | - name: Build Documentation
125 | run: |
126 | mkdocs build
127 | working-directory: ${{ github.workspace }}
128 |
129 | - name: Upload Documentation Artifact
130 | uses: actions/upload-artifact@v4
131 | with:
132 | name: piqtree-docs-html
133 | path: site
134 |
135 | release-test:
136 | name: Release to Test PyPI
137 | needs: [build-wheels, build-docs]
138 | environment: release
139 | runs-on: ubuntu-latest
140 | permissions:
141 | id-token: write
142 |
143 | steps:
144 | - name: Download Artifacts
145 | uses: actions/download-artifact@v4
146 | with:
147 | pattern: cibw-*
148 | path: dist
149 | merge-multiple: true
150 |
151 | - name: Publish Package Distributions to Test PyPI
152 | uses: pypa/gh-action-pypi-publish@release/v1
153 | with:
154 | repository-url: https://test.pypi.org/legacy/
155 |
156 | release:
157 | name: Release to PyPI
158 | needs: release-test
159 | environment: release
160 | runs-on: ubuntu-latest
161 | permissions:
162 | id-token: write
163 |
164 | steps:
165 | - name: Download Artifacts
166 | uses: actions/download-artifact@v4
167 | with:
168 | pattern: cibw-*
169 | path: dist
170 | merge-multiple: true
171 |
172 | - name: Publish Package Distributions to PyPI
173 | uses: pypa/gh-action-pypi-publish@release/v1
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # vscode
2 |
3 | .vscode
4 |
5 | # iqtree
6 | *.bionj
7 | *.ckp.gz
8 | *.iqtree
9 | *.log
10 | *.mldist
11 | *.treefile
12 |
13 | # piqtree specific ignores
14 | src/piqtree/_libiqtree/**/*.a
15 | src/piqtree/_libiqtree/**/*.dll
16 | src/piqtree/_libiqtree/**/*.lib
17 | src/*.dll
18 |
19 | # docs
20 | data
21 |
22 | # Byte-compiled / optimized / DLL files
23 | __pycache__/
24 | *.py[cod]
25 | *$py.class
26 |
27 | # C extensions
28 | *.so
29 |
30 | # Distribution / packaging
31 | .Python
32 | build/
33 | develop-eggs/
34 | dist/
35 | downloads/
36 | eggs/
37 | .eggs/
38 | lib/
39 | lib64/
40 | parts/
41 | sdist/
42 | var/
43 | wheels/
44 | share/python-wheels/
45 | *.egg-info/
46 | .installed.cfg
47 | *.egg
48 | MANIFEST
49 |
50 | # PyInstaller
51 | # Usually these files are written by a python script from a template
52 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
53 | *.manifest
54 | *.spec
55 |
56 | # Installer logs
57 | pip-log.txt
58 | pip-delete-this-directory.txt
59 |
60 | # Unit test / coverage reports
61 | htmlcov/
62 | .tox/
63 | .nox/
64 | .coverage
65 | .coverage.*
66 | .cache
67 | nosetests.xml
68 | coverage.xml
69 | *.cover
70 | *.py,cover
71 | .hypothesis/
72 | .pytest_cache/
73 | cover/
74 |
75 | # Translations
76 | *.mo
77 | *.pot
78 |
79 | # Django stuff:
80 | *.log
81 | local_settings.py
82 | db.sqlite3
83 | db.sqlite3-journal
84 |
85 | # Flask stuff:
86 | instance/
87 | .webassets-cache
88 |
89 | # Scrapy stuff:
90 | .scrapy
91 |
92 | # Sphinx documentation
93 | docs/_build/
94 |
95 | # PyBuilder
96 | .pybuilder/
97 | target/
98 |
99 | # Jupyter Notebook
100 | .ipynb_checkpoints
101 |
102 | # IPython
103 | profile_default/
104 | ipython_config.py
105 |
106 | # pyenv
107 | # For a library or package, you might want to ignore these files since the code is
108 | # intended to run in multiple environments; otherwise, check them in:
109 | # .python-version
110 |
111 | # pipenv
112 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
113 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
114 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
115 | # install all needed dependencies.
116 | #Pipfile.lock
117 |
118 | # poetry
119 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
120 | # This is especially recommended for binary packages to ensure reproducibility, and is more
121 | # commonly ignored for libraries.
122 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
123 | #poetry.lock
124 |
125 | # pdm
126 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
127 | #pdm.lock
128 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
129 | # in version control.
130 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
131 | .pdm.toml
132 | .pdm-python
133 | .pdm-build/
134 |
135 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
136 | __pypackages__/
137 |
138 | # Celery stuff
139 | celerybeat-schedule
140 | celerybeat.pid
141 |
142 | # SageMath parsed files
143 | *.sage.py
144 |
145 | # Environments
146 | .env
147 | .venv
148 | env/
149 | venv/
150 | ENV/
151 | env.bak/
152 | venv.bak/
153 |
154 | # Spyder project settings
155 | .spyderproject
156 | .spyproject
157 |
158 | # Rope project settings
159 | .ropeproject
160 |
161 | # mkdocs documentation
162 | /site
163 |
164 | # mypy
165 | .mypy_cache/
166 | .dmypy.json
167 | dmypy.json
168 |
169 | # Pyre type checker
170 | .pyre/
171 |
172 | # pytype static type analyzer
173 | .pytype/
174 |
175 | # Cython debug symbols
176 | cython_debug/
177 |
178 | # PyCharm
179 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
180 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
181 | # and can be added to the global gitignore or merged into this file. For a more nuclear
182 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
183 |
184 | #.idea/
185 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "iqtree2"]
2 | path = iqtree2
3 | url = https://github.com/iqtree/iqtree2.git
4 | branch = libiqtree
5 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file for MkDocs projects
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.12"
13 | commands:
14 | # Install the required dependencies
15 | - pip install requests
16 | # Run the script to download and extract the pre-built docs
17 | - python rtd_get_docs.py
18 | - echo "Documentation downloaded and extracted"
19 |
20 | # Disable the default build processes since we're using pre-built docs
21 | sphinx:
22 | configuration: null
23 |
24 | python:
25 | install: []
26 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement by emailing
63 | Gavin Huttley.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # piqtree
2 |
3 | [](https://pypi.org/project/piqtree/)
4 | [](https://pypi.org/project/piqtree/)
5 | [](https://github.com/iqtree/piqtree/blob/main/LICENSE)
6 |
7 | [](https://github.com/iqtree/piqtree/actions/workflows/ci.yml)
8 | [](https://coveralls.io/github/iqtree/piqtree?branch=main)
9 | [](https://piqtree.readthedocs.io/en/latest/?badge=latest)
10 | [](https://github.com/astral-sh/ruff)
11 |
12 | `piqtree` is a library which allows you use IQ-TREE directly from Python! The interface with python is through [cogent3](https://cogent3.org) objects.
13 | For usage, please refer to the [documentation](https://piqtree.readthedocs.io/) or the examples below.
14 |
15 | If you encounter any problems or have any feature requests feel free to raise an [issue](https://github.com/iqtree/piqtree/issues)!
16 |
17 | ## Contributing
18 |
19 | If you would like to help out by contributing to the piqtree project, please check out our [contributor guide!](https://piqtree.readthedocs.io/en/latest/developers/)
20 |
21 | ## Examples
22 |
23 | ### Phylogenetic Reconstruction
24 |
25 | ```python
26 | from piqtree import build_tree
27 | from cogent3 import load_aligned_seqs # Included with piqtree!
28 |
29 | # Load Sequences
30 | aln = load_aligned_seqs("tests/data/example.fasta", moltype="dna")
31 | aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Mouse"])
32 |
33 | # Reconstruct a phylogenetic tree with IQ-TREE!
34 | tree = build_tree(aln, "JC", rand_seed=1) # Optionally specify a random seed.
35 |
36 | print("Tree topology:", tree) # A cogent3 tree object
37 | print("Log-likelihood:", tree.params["lnL"])
38 | # In a Jupyter notebook, try tree.get_figure() to see a dendrogram
39 | ```
40 |
41 | > **Note**
42 | > See the [cogent3 docs](https://cogent3.org) for examples on what you can do with cogent3 trees.
43 |
44 | ### Fit Branch Lengths to Tree Topology
45 |
46 | ```python
47 | from piqtree import fit_tree
48 | from cogent3 import load_aligned_seqs, make_tree
49 |
50 | # Load Sequences
51 | aln = load_aligned_seqs("tests/data/example.fasta", moltype="dna")
52 | aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Mouse"])
53 |
54 | # Construct tree topology
55 | tree = make_tree("(Human, Chimpanzee, (Rhesus, Mouse));")
56 |
57 | # Fit branch lengths with IQ-TREE!
58 | tree = fit_tree(aln, tree, "JC", rand_seed=1) # Optionally specify a random seed.
59 |
60 | print("Tree with branch lengths:", tree)
61 | print("Log-likelihood:", tree.params["lnL"])
62 | ```
63 |
64 | ## More
65 |
66 | For more examples ranging from using ModelFinder, to making rapid neighbour-joining trees, or randomly generated trees be sure to check out the [documentation](https://piqtree.readthedocs.io/)!
--------------------------------------------------------------------------------
/build_tools/before_all_linux.sh:
--------------------------------------------------------------------------------
1 | if command -v apt-get &> /dev/null; then
2 | apt-get update -y && apt-get install -y libeigen3-dev libboost-all-dev
3 | elif command -v yum &> /dev/null; then
4 | yum update -y && yum install -y eigen3-devel boost-devel
5 | elif command -v apk &> /dev/null; then
6 | apk update && apk add eigen-dev boost-dev
7 | else
8 | echo "Unsupported package manager";
9 | exit 1;
10 | fi
11 |
12 | bash build_tools/build_iqtree.sh
--------------------------------------------------------------------------------
/build_tools/before_all_mac.sh:
--------------------------------------------------------------------------------
1 | # Check if running in GitHub Actions
2 | if [ "$GITHUB_ACTIONS" = "true" ]; then
3 | brew update
4 | fi
5 |
6 | brew install llvm eigen boost libomp make
7 |
8 | export LDFLAGS="-L$(brew --prefix libomp)/lib"
9 | export CPPFLAGS="-I$(brew --prefix libomp)/include"
10 | export CXXFLAGS="-I$(brew --prefix libomp)/include"
11 |
12 | bash build_tools/build_iqtree.sh
13 |
--------------------------------------------------------------------------------
/build_tools/before_all_windows.sh:
--------------------------------------------------------------------------------
1 | # Install dependencies using choco
2 |
3 | export Boost_INCLUDE_DIR=$(echo $Boost_INCLUDE_DIR | sed 's|\\|/|g')
4 | export Boost_LIBRARY_DIRS=$(echo $Boost_LIBRARY_DIRS | sed 's|\\|/|g')
5 |
6 | echo "Boost_INCLUDE_DIR: $Boost_INCLUDE_DIR"
7 | echo "Boost_LIBRARY_DIRS: $Boost_LIBRARY_DIRS"
8 |
9 | choco install -y llvm --version=14.0.6 --allow-downgrade
10 | choco install -y eigen
11 |
12 | # Build IQ-TREE
13 | bash build_tools/build_iqtree.sh
--------------------------------------------------------------------------------
/build_tools/build_iqtree.sh:
--------------------------------------------------------------------------------
1 | cd iqtree2
2 | rm -rf build
3 | mkdir build && cd build
4 |
5 | if [[ "$OSTYPE" == "darwin"* ]]; then
6 | echo "Building for macOS."
7 | echo $LDFLAGS
8 | echo $CPPFLAGS
9 | echo $CXXFLAGS
10 | cmake -DBUILD_LIB=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ..
11 | gmake -j
12 | elif [[ "$OSTYPE" == "msys"* || "$OSTYPE" == "cygwin"* ]]; then
13 | echo "Building for Windows."
14 |
15 | if [[ -n "$BOOST_ROOT" ]]; then
16 | export Boost_INCLUDE_DIR="${BOOST_ROOT}"
17 | export Boost_LIBRARY_DIRS="${BOOST_ROOT}"
18 | fi
19 |
20 | cmake -G "MinGW Makefiles" \
21 | -DCMAKE_C_COMPILER=clang \
22 | -DCMAKE_CXX_COMPILER=clang++ \
23 | -DCMAKE_C_FLAGS=--target=x86_64-pc-windows-gnu \
24 | -DCMAKE_CXX_FLAGS=--target=x86_64-pc-windows-gnu \
25 | -DCMAKE_MAKE_PROGRAM=make \
26 | -DBoost_INCLUDE_DIR=$Boost_INCLUDE_DIR \
27 | -DBoost_LIBRARY_DIRS=$Boost_LIBRARY_DIRS \
28 | -DIQTREE_FLAGS="cpp14" \
29 | -DBUILD_LIB=ON \
30 | ..
31 | make -j
32 | else
33 | echo "Building for linux."
34 | cmake -DBUILD_LIB=ON -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ..
35 | make -j
36 | fi
37 |
38 | cd ../..
39 |
40 | if [[ "$OSTYPE" == "darwin"* || "$OSTYPE" == "linux"* ]]; then
41 | mv iqtree2/build/libiqtree2.a src/piqtree/_libiqtree/
42 | elif [[ "$OSTYPE" == "msys"* || "$OSTYPE" == "cygwin"* ]]; then
43 | mv iqtree2/build/iqtree2.lib src/piqtree/_libiqtree/
44 | mv iqtree2/build/iqtree2.dll src/piqtree/_libiqtree/
45 | fi
46 |
--------------------------------------------------------------------------------
/changelog.d/20250521_122741_robert.mcarthur_windows_support.md:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 | ### Contributors
9 |
10 | - @rmcar17 and @thomaskf enabled windows support for piqtree!
11 |
12 |
13 |
14 | ### ENH
15 |
16 | - Added windows support!
17 |
18 |
19 |
25 |
31 |
37 |
43 |
--------------------------------------------------------------------------------
/changelog.d/20250522_113423_robert.mcarthur_nj_tree_nans.md:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 | ### Contributors
9 |
10 | - @rmcar17 ensured `nj_tree` raises an appropriate error on NaN input.
11 |
12 |
13 |
19 |
20 | ### BUG
21 |
22 | - An error is now raised when calling `nj_tree` with NaN distances instead of failing silently.
23 |
24 |
25 |
31 |
37 |
43 |
--------------------------------------------------------------------------------
/changelog.d/README.md:
--------------------------------------------------------------------------------
1 | # Adding changelog entries
2 |
3 | This directory is used to store changelog fragments for the next release
4 | of the project. Each file should contain a small changelog fragment
5 | that will be added to the full changelog when the release is made.
6 | The file is created using
7 | ```
8 | scriv create --edit
9 | ```
10 | to create a change entry
11 |
12 | This will create a file with the correct name and format and commented out sample categories, and load it into your `git` editor (the one specified by ```git config --global core.editor``` )
13 |
14 | Uncomment the category of change you are making and add a short description of the
15 | change as a markdown bullet point. For example:
16 |
17 | Contributors
18 | * khiron
19 |
20 | ENH
21 | * Added a new feature that allows users to do Y
22 |
23 | DEP
24 | * Removed deprecated feature Z
25 |
26 | BUG
27 | * Fixed a bug that caused the project to crash when a user did X by doing Y instead
28 |
29 | DOC
30 | * Documented feature Z
31 |
32 | Check the file in with your changes.
33 |
34 | ---
35 |
36 | # Building a changelog
37 |
38 | To build the changelog for the next release, run
39 |
40 | ```
41 | scriv collect
42 | ```
43 |
44 | This will create a file called `CHANGELOG.md` in the root of the project. This file will contain the full changelog for the project, including all the fragments that have been added to the changelog.d directory and remove those fragments. Note this requires that the `__init__.py` file's `__version__` variable is updated from the last time you ran collect, otherwise you will get a warning
45 |
46 | ```
47 | Entry 'Changes since release 0.0.1' already uses version '0.0.1'.
48 | ```
49 | ... and the fragments will not be collected. When the `__version__` variable in the project is updated the next time you run collect, the fragments will be collected and the changelog will be updated.
50 |
51 | You can override this to create a new collection of all available fragments that are not aligned with a `__version__` with
52 |
53 | ```
54 | scriv collect --version "description of a milestone not yet used"
55 | ```
56 |
57 | ---
58 |
59 | # Releasing a new version
60 |
61 | To release a version of the project, run
62 |
63 | ```
64 | scriv release
65 | ```
66 |
67 | This will create a new tag in the `git` repository, and create a new release on GitHub. The release will contain the full changelog for the project, and the tag will be annotated with the full changelog as well. Note you will need to have a GitHub classic personal access token set up in your environment (GITHUB_TOKEN) for this to work.
68 |
69 | If you are using VS code and powershell you can add the following to your `settings.json` file to set the environment variable for the terminal. If you are using a different terminal you will need to set the environment variable in the appropriate way for your terminal.
70 |
71 | ```json
72 |
73 | "terminal.integrated.env.windows": {
74 |
75 | "GITHUB_TOKEN": "ghp_...",
76 | }
77 | ```
78 |
79 | ---
80 |
--------------------------------------------------------------------------------
/changelog.d/templates/new.md.j2:
--------------------------------------------------------------------------------
1 |
6 |
7 | {% for cat in config.categories -%}
8 |
14 | {% endfor -%}
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/changelog.d/templates/new.rst.j2:
--------------------------------------------------------------------------------
1 | .. A new scriv changelog fragment.
2 | {% if config.categories -%}
3 | ..
4 | .. Uncomment the header that is right (remove the leading dots).
5 | ..
6 | {% for cat in config.categories -%}
7 | .. {{ cat }}
8 | .. {{ config.rst_header_chars[1] * (cat|length) }}
9 | ..
10 | .. - A bullet item for the {{ cat }} category.
11 | ..
12 | {% endfor -%}
13 | {% else %}
14 | - A bullet item for this fragment. EDIT ME!
15 | {% endif -%}
--------------------------------------------------------------------------------
/changelog.d/templates/title.md.j2:
--------------------------------------------------------------------------------
1 | Changes in release "{{version}}"
--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Changes in release "0.5.0"
4 |
5 | ## Contributors
6 |
7 | - @GavinHuttley added `nj_tree` as a hook for `cogent3.Alignment.quick_tree`.
8 | - @YapengLang handled negative branch lengths from the rapidNJ tree.
9 | - @rmcar17, @thomaskf general maintanence on the piqtree/IQ-TREE sides including work on windows behind the scenes.
10 |
11 | ## ENH
12 |
13 | - Add support for Python 3.13, remove support for 3.10
14 | - IQ-TREE's rapidNJ implementation can be used as a hook for `quick_tree` on `cogent3` alignment objects. Try `Alignment.quick_tree(use_hook="piqtree")`.
15 | - `nj_tree` now by default does not allow negative branch lengths. Set `allow_negative=True` if this behaviour is desired.
16 | - Allow `str` to be used for `model` in `build_tree` and `fit_tree`. The value is automatically coerced into the `Model` class.
17 |
18 | ## API
19 |
20 | - Simplify API for `piqtree_phylo` and `piqtree_fit` apps. Both now take a single parameter for the model, matching the parameter for `model` in `build_tree` and `fit_tree`.
21 |
22 |
23 | # Changes in release "0.4.0"
24 |
25 | ## Contributors
26 |
27 | - @rmcar17 integrated new functions from IQ-TREE in Python, added multithreading support, and wrote the API refererence and quickstart.
28 | - @thomaskf exposed all new functions from IQ-TREE to be available from Python, and worked on multithreading support.
29 | - @GavinHuttley worked on setting up and writing the documentation and associated devtools, ModelFinder, and integration with `cogent3` apps.
30 | - @YapengLang worked on bootstrapping support and extracting model parameters
31 | - @KatherineCaley worked on processing the ModelFinder results.
32 |
33 | ## ENH
34 |
35 | - piqtree2 renamed piqtree to support future major releases of IQ-TREE.
36 | - piqtree now supports multithreading!
37 | - New function `nj_tree` constructs a rapid neighbour-joining tree from a pairwise distance matrix.
38 | - New function `model_finder` finds the best model for a given alignment.
39 | - New function `jc_distances` constructs a pairwise JC distance matrix from an alignment.
40 | - New function `make_model` allows converting an IQ-TREE string representation of a model to a `Model` class.
41 | - API for `random_trees` has changed - new order (`num_trees`, `num_taxa`, `tree_mode`, then `rand_seed`).
42 | - API for `robinson_foulds` has changed - now accepts a Sequence of trees.
43 | - Model parameters are now extracted from IQ-TREE where for now possible.
44 | - `build_tree` now supports ultrafast bootstrapping.
45 | - `Model` creation is now more robust.
46 | - Use `piqtree.__iqtree_version__` to see what version of piqtree is being used.
47 | - See what can now be done in our [new documentation](https://piqtree.readthedocs.io)!
48 |
49 | ## DOC
50 |
51 | - [Documentation](https://piqtree.readthedocs.io) is now on readthedocs!
52 |
53 |
54 | # Changes in release "0.3.1"
55 |
56 | ## ENH
57 |
58 | - Add support for Lie Markov Models.
59 | - Base frequencies default to None (specified by model).
60 |
61 | ## BUG
62 |
63 | - `piqtree2` apps are now pickleable (they can now be run with `parallel=True` in the cogent3 app infrastructure)
64 |
65 |
66 | # Changes in release "0.3.0"
67 |
68 | ## Contributors
69 |
70 | - @rmcar17 Added new classes to enhance model specification when calling `build_tree` and `fit_tree`.
71 | - @thomaskf fixed a bug in IQ-TREE resulting in segmentation faults on some invalid arguments.
72 |
73 | ## ENH
74 |
75 | - `build_tree` and `fit_tree` now allow specifying base frequencies, invariable sites and rate heterogeneity options.
76 |
77 | ## BUG
78 |
79 | - Fixed a segmentation fault on repetitive calls to IQ-TREE with particular arguments.
80 |
81 |
82 | # Changes in release "0.2.0"
83 |
84 | ## Contributors
85 |
86 | - Richard Morris
87 | - Robert McArthur
88 |
89 | ## ENH
90 |
91 | - `build_tree` and `fit_tree` now use enums for the substitution model.
92 |
93 | ## BUG
94 |
95 | - Fixed an issue where calling `build_tree` or `fit_tree` twice, then another function with invalid input resulted in a segmentation fault.
96 |
97 | ## DOC
98 |
99 | - Implement scriv as a tool to manage collection of changes, and automated collation into the changelog
100 |
--------------------------------------------------------------------------------
/docs/api/genetic_distance/jc_distances.md:
--------------------------------------------------------------------------------
1 | # jc_distances
2 |
3 | ::: piqtree.jc_distances
4 |
5 | ## Usage
6 |
7 | For usage, see ["Calculate pairwise Jukes-Cantor distances"](../../quickstart/calculate_jc_distances.md).
8 |
--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
1 | # API Reference Index
2 |
3 | ## Trees
4 |
5 | | Name | Summary |
6 | |------|---------|
7 | | [build_tree](tree/build_tree.md) | Construct a maximum-likelihood phylogenetic tree. |
8 | | [fit_tree](tree/fit_tree.md) | Fit branch lengths to a phylogenetic tree. |
9 | | [nj_tree](tree/nj_tree.md) | Construct rapid neighbour-joining tree from pairwise distance matrix. |
10 | | [random_trees](tree/random_trees.md) | Create a collection of random phylogenetic trees. |
11 |
12 | ## Subsitution Models
13 |
14 | | Name | Summary |
15 | |------|---------|
16 | | [model_finder](model/model_finder.md) | Determine the best-fit model for your data. |
17 | | [ModelFinderResult](model/ModelFinderResult.md) | Collection of data returned by IQ-TREE's ModelFinder. |
18 | | [Model](model/Model.md) | Class for substitution models. |
19 | | [make_model](model/make_model.md) | Function to construct Model classes from IQ-TREE strings. |
20 | | [SubstitutionModel](model/SubstitutionModel.md) | Enums for substitution models. |
21 | | [FreqType](model/FreqType.md) | Enum for base frequencies. |
22 | | [RateModel](model/RateModel.md) | Classes for rate heterogeneity. |
23 |
24 | ## Genetic Distances
25 |
26 | | Name | Summary |
27 | |------|---------|
28 | | [jc_distances](genetic_distance/jc_distances.md) | Pairwise Jukes-Cantor genetic distances. |
29 |
30 | ## Tree Distances
31 |
32 | | Name | Summary |
33 | |------|---------|
34 | | [robinson_foulds](tree_distance/robinson_foulds.md) | Pairwise Robinson-Foulds distances. |
35 |
--------------------------------------------------------------------------------
/docs/api/model/FreqType.md:
--------------------------------------------------------------------------------
1 | # FreqType
2 |
3 | ::: piqtree.model.FreqType
4 | options:
5 | show_if_no_docstring: true
6 |
7 | ## Usage
8 |
9 | For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md#base-frequencies).
10 |
--------------------------------------------------------------------------------
/docs/api/model/Model.md:
--------------------------------------------------------------------------------
1 | # Model
2 |
3 | ::: piqtree.Model
4 |
5 | ## Usage
6 |
7 | For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md).
8 |
--------------------------------------------------------------------------------
/docs/api/model/ModelFinderResult.md:
--------------------------------------------------------------------------------
1 | # ModelFinderResult
2 |
3 | ::: piqtree.ModelFinderResult
4 |
5 | ## Usage
6 |
7 | For usage, see ["Find the model of best fit with ModelFinder"](../../quickstart/using_model_finder.md).
8 |
--------------------------------------------------------------------------------
/docs/api/model/RateModel.md:
--------------------------------------------------------------------------------
1 | # RateModel
2 |
3 | ::: piqtree.model.DiscreteGammaModel
4 |
5 | ::: piqtree.model.FreeRateModel
6 |
7 | ## Usage
8 |
9 | For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md#rate-heterogeneity).
10 |
--------------------------------------------------------------------------------
/docs/api/model/SubstitutionModel.md:
--------------------------------------------------------------------------------
1 | # SubstitutionModel
2 |
3 | Enums for available DNA and amino acid models. Strings may be used instead of the enums.
4 |
5 | ::: piqtree.model.DnaModel
6 | options:
7 | show_if_no_docstring: true
8 |
9 | ::: piqtree.model.AaModel
10 | options:
11 | show_if_no_docstring: true
12 |
13 | ## Usage
14 |
15 | For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md#usage).
16 |
--------------------------------------------------------------------------------
/docs/api/model/make_model.md:
--------------------------------------------------------------------------------
1 | # make_model
2 |
3 | ::: piqtree.make_model
4 |
5 | ## Usage
6 |
7 | For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md).
8 |
--------------------------------------------------------------------------------
/docs/api/model/model_finder.md:
--------------------------------------------------------------------------------
1 | # model_finder
2 |
3 | ::: piqtree.model_finder
4 |
5 | ## Usage
6 |
7 | For usage, see ["Find the model of best fit with ModelFinder"](../../quickstart/using_model_finder.md).
8 |
--------------------------------------------------------------------------------
/docs/api/tree/build_tree.md:
--------------------------------------------------------------------------------
1 | # build_tree
2 |
3 | ::: piqtree.build_tree
4 |
5 | ## Usage
6 |
7 | For usage, see ["Construct a maximum likelihood phylogenetic tree"](../../quickstart/construct_ml_tree.md).
8 |
--------------------------------------------------------------------------------
/docs/api/tree/fit_tree.md:
--------------------------------------------------------------------------------
1 | # fit_tree
2 |
3 | ::: piqtree.fit_tree
4 |
5 | ## Usage
6 |
7 | For usage, see ["Fit branch lengths to a tree topology from an alignment"](../../quickstart/fit_tree_topology.md).
8 |
--------------------------------------------------------------------------------
/docs/api/tree/nj_tree.md:
--------------------------------------------------------------------------------
1 | # nj_tree
2 |
3 | ::: piqtree.nj_tree
4 |
5 | ## Usage
6 |
7 | For usage, see ["Construct a rapid neighbour-joining tree from a distance matrix"](../../quickstart/construct_nj_tree.md).
8 |
--------------------------------------------------------------------------------
/docs/api/tree/random_trees.md:
--------------------------------------------------------------------------------
1 | # random_trees
2 |
3 | ::: piqtree.random_trees
4 |
5 | ::: piqtree.TreeGenMode
6 | options:
7 | show_if_no_docstring: true
8 |
9 | ## Usage
10 |
11 | For usage, see ["Make a collection of randomly generated trees"](../../quickstart/make_random_trees.md).
12 |
--------------------------------------------------------------------------------
/docs/api/tree_distance/robinson_foulds.md:
--------------------------------------------------------------------------------
1 | # robinson_foulds
2 |
3 | ::: piqtree.robinson_foulds
4 |
5 | ## Usage
6 |
7 | For usage, see ["Calculate pairwise Robinson-Foulds distances between trees"](../../quickstart/calculate_rf_distances.md).
8 |
--------------------------------------------------------------------------------
/docs/apps/app_pipeline.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # ## Building workflows using `piqtree` apps
3 | # > **WARNING**
4 | # > This page is under construction!
5 | #
6 | # We can combine `piqtree` apps with other cogent3 apps to develop a pipeline. There are multiple concepts involved here, particularly data stores, composed apps, parallel execution, log files etc... See the cogent3 [app documentation](https://cogent3.org/doc/app/index.html) for more details.
7 | #
8 | # To develop a pipeline efficiently we only need a subset of the sequences in an alignment. We will use the [diverse-seq](https://pypi.org/project/diverse-seq/) plugin for that purpose. This allows selecting a specified subset of sequences that capture the diversity in an alignment.
9 | #
10 | # But first, we need the data.
11 | # %%
12 | from piqtree import download_dataset
13 |
14 | alns_path = download_dataset("mammal-orths.zip", dest_dir="data", inflate_zip=False)
15 |
16 | # %% [markdown]
17 | # We open this directory as a cogent3 data store.
18 |
19 | # %%
20 | from cogent3 import open_data_store
21 |
22 | dstore = open_data_store(alns_path, suffix="fa")
23 | dstore.describe
24 |
25 | # %% [markdown]
26 | # We need to create some apps to: load data, a divergent sequence selector app, drop alignment columns containing non-canonical nucleotides (so gaps and N's), select alignments with a minimym number of aligned columns, a "data store" to write results to and a writer. These will then be combined into a single composed app which will be applied to all the alignments in the data store.
27 |
28 | # %%
29 | import pathlib
30 | from collections import Counter
31 |
32 | from cogent3 import get_app
33 |
34 | outpath = pathlib.Path("data/delme.sqlitedb")
35 | outpath.unlink(missing_ok=True)
36 |
37 | loader = get_app("load_aligned", format="fasta", moltype="dna")
38 | divergent = get_app("dvs_nmost", n=10, k=6)
39 | just_nucs = get_app("omit_degenerates") # has to go after the divergent selector
40 | min_length = get_app("min_length", length=600)
41 | best_model = get_app("piqtree_mfinder")
42 | app = loader + divergent + min_length + best_model
43 | model_counts = Counter(
44 | str(result.best_aic)
45 | for result in app.as_completed(dstore, show_progress=True, parallel=True)
46 | if result
47 | )
48 | model_counts
49 |
--------------------------------------------------------------------------------
/docs/apps/available_help.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # To see the apps that `piqtree` makes available, use the cogent3 function `available_apps()` as follows.
3 |
4 | # %%
5 | from cogent3 import available_apps
6 |
7 | available_apps("piqtree")
8 |
9 | # %% [markdown]
10 | # For a particular app, use the cogent3 function `app_help()` to find out what the options are.
11 |
12 | # %%
13 | from cogent3 import app_help
14 |
15 | app_help("piqtree_phylo")
16 |
17 | # %%
18 | from cogent3 import app_help
19 |
20 | app_help("piqtree_fit")
21 |
--------------------------------------------------------------------------------
/docs/apps/fit_tree.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # We evaluate a support for a specific phylogeny using `piqtree_fit`.
3 | # For this simple case, we will assess the support for a specific phylogeny using the GTR model on one alignment.
4 |
5 | # %%
6 | import cogent3
7 |
8 | from piqtree import download_dataset
9 |
10 | aln_path = download_dataset("example.phy.gz", dest_dir="data")
11 | aln = cogent3.load_aligned_seqs(aln_path, moltype="dna", format="phylip")
12 | aln
13 |
14 | # %% [markdown]
15 | # We use a tree corresponding to the data in the `example.phy` file.
16 |
17 | # %%
18 | tree_path = download_dataset("example.tree.gz", dest_dir="data")
19 | tree = cogent3.load_tree(tree_path)
20 | tree.get_figure().show()
21 |
22 | # %% [markdown]
23 | # We now take a look at the help for the `piqtree_fit` app.
24 |
25 | # %%
26 |
27 | from cogent3 import app_help
28 |
29 | app_help("piqtree_fit")
30 |
31 | # %% [markdown]
32 | # We fit a GTR model and estimate the nucleotide frequencies by maximum likelihood.
33 |
34 | # %%
35 | from cogent3 import get_app
36 |
37 | fit_gtr = get_app("piqtree_fit", tree, submod_type="GTR", freq_type="FO")
38 | fit_gtr
39 |
40 | # %% [markdown]
41 | # ## Fit the model
42 |
43 | # %%
44 | fit = fit_gtr(aln)
45 |
46 | # %% [markdown]
47 | # The `fit` object is a cogent3 tree (`PhyloNode`) and the maximum likelihood estimated parameters are stored in the `params` attribute.
48 | #
49 | # > **Note**
50 | # > "lnL" is short for log likelihood and is the log likelihood of the model.
51 | # > "mprobs" is short for motif probabilities and are the estimated equilibrium frequencies of the nucleotides.
52 |
53 | # %%
54 | fit.params
55 |
--------------------------------------------------------------------------------
/docs/apps/model_finder.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # We use the `piqtree_mfinder` app to rank models. This is the python binding to the IQ-TREE ModelFinder tool.
3 |
4 | # %%
5 | from cogent3 import app_help, get_app, load_aligned_seqs
6 |
7 | from piqtree import download_dataset
8 |
9 | aln_path = download_dataset("example.phy.gz", dest_dir="data")
10 | # format argument not required after cogent3 2024.11 release
11 | aln = load_aligned_seqs(aln_path, moltype="dna", format="phylip")
12 |
13 | # %% [markdown]
14 | # Get help and then apply `piqtree_mfinder`.
15 |
16 | # %%
17 | app_help("piqtree_mfinder")
18 |
19 | # %%
20 | mfinder = get_app("piqtree_mfinder")
21 | ranked = mfinder(aln)
22 | ranked
23 |
24 | # %% [markdown]
25 | # ## Accessing the best model
26 | # The different measures used to select the best model, AIC, AICc, and BIC, are available as attributes of the result object. We'll select AICc as the measure for choosing the best model.
27 |
28 | # %%
29 | selected = ranked.best_aicc
30 |
31 | # %% [markdown]
32 | # You can inspect the statistics for one of these using the `model_stats` attribute.
33 |
34 | # %%
35 | ranked.model_stats[selected]
36 |
37 | # %% [markdown]
38 | # ## Using the best model
39 | # You can apply the selected model to a phylogenetic analysis.
40 | # > **Note**
41 | # > The process is the same for both the `piqtree_phylo` and the `piqtree_fit` apps.
42 |
43 | # %%
44 | fit = get_app(
45 | "piqtree_phylo",
46 | selected.submod_type,
47 | freq_type=selected.freq_type,
48 | rate_model=selected.rate_model,
49 | invariant_sites=selected.invariant_sites,
50 | )
51 | fitted = fit(aln)
52 | fitted
53 |
--------------------------------------------------------------------------------
/docs/apps/nj.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # The Neighbour-Joining method uses genetic distances to build a phylogenetic tree. `piqtree` provides only `piqtree_jc_dists` for this. `cogent3` includes many more methods. The results of either can be used to build a tree. For divergent sequences we will use Lake's paralinear measure as it accomodates divergent sequence compositions.
3 |
4 | # %%
5 | import cogent3
6 |
7 | from piqtree import download_dataset
8 |
9 | aln_path = download_dataset("example.phy.gz", dest_dir="data")
10 | aln = cogent3.load_aligned_seqs(aln_path, moltype="dna", format="phylip")
11 |
12 | # %% [markdown]
13 | # ## Getting a paralinear distance matrix
14 | # This can be obtained directly from the alignment object itself.
15 |
16 | # %%
17 | dists = aln.distance_matrix(calc="paralinear")
18 | dists
19 |
20 | # %% [markdown]
21 | # Get help on the `piqtree_nj` app.
22 |
23 | # %%
24 | cogent3.app_help("piqtree_nj")
25 |
26 | # %% [markdown]
27 | # Make an app and apply it to the distance matrix.
28 |
29 | # %%
30 | nj = cogent3.get_app("piqtree_nj")
31 | tree = nj(dists)
32 |
33 | # %% [markdown]
34 | # > **Warning**
35 | # > Branch lengths can be negative in the piqtree NJ tree. This manifests as branches going backwards!
36 |
37 | # %%
38 | tree.get_figure().show()
39 |
40 | # %% [markdown]
41 | # > **Note**
42 | # > To write the tree to a file, use the `write()` method.
43 |
44 | # %% [markdown]
45 | # ## Combining the piqtree dist and nj apps
46 | # We can combine the `piqtree_jc_dists` and `piqtree_nj` apps to build a tree from an alignment in one step.
47 |
48 | # %%
49 | jc = cogent3.get_app("piqtree_jc_dists")
50 | nj = cogent3.get_app("piqtree_nj")
51 | app = jc + nj
52 | tree = app(aln)
53 | tree.get_figure().show()
54 |
--------------------------------------------------------------------------------
/docs/apps/options.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # ## Site-heterogeneity model types
3 |
4 |
5 | # %%
6 | from piqtree import available_rate_type
7 |
8 | available_rate_type()
9 |
10 | # %% [markdown]
11 | # ## State frequency types
12 |
13 | # %% tags=hide_code
14 |
15 | from piqtree import available_freq_type
16 |
17 | available_freq_type()
18 |
19 | # %% [markdown]
20 | # ## Substitution model types
21 | # Assign the value in the "Abbreviation" column as a string to the `submod_type` parameter, e.g.
22 | # ```python
23 | # submod_type="GTR"
24 | # ```
25 |
26 |
27 | # %%
28 | from piqtree import available_models
29 |
30 | available_models()
31 |
--------------------------------------------------------------------------------
/docs/apps/pairwise_dist.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # We estimate pairwise distances via the Jukes-Cantor model with the `piqtree_jc_dists` app.
3 |
4 | # %%
5 | import cogent3
6 |
7 | from piqtree import download_dataset
8 |
9 | aln_path = download_dataset("example.phy.gz", dest_dir="data")
10 | aln = cogent3.load_aligned_seqs(aln_path, moltype="dna", format="phylip")
11 |
12 | # %% [markdown]
13 | # We get help on the `piqtree_jc_dist` app.
14 |
15 | # %%
16 | cogent3.app_help("piqtree_jc_dists")
17 |
18 | # %% [markdown]
19 | # Make an app and apply it to the alignment.
20 |
21 | # %%
22 | jc_dists = cogent3.get_app("piqtree_jc_dists")
23 | dists = jc_dists(aln)
24 | dists
25 |
--------------------------------------------------------------------------------
/docs/apps/phylo.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # We use the `piqtree_phylo` app to build phylogenies.
3 | # For this simple case, we will build a single phylogeny using the GTR model on one alignment.
4 | # We have a utility script for this documentation that provides the local path to that data. We will then load that data and, as it contains quite a few sequences, we will use a subset of it. We use methods on the cogent3 object to do that.
5 |
6 | # %%
7 | import cogent3
8 |
9 | from piqtree import download_dataset
10 |
11 | aln_path = download_dataset("example.phy.gz", dest_dir="data")
12 | aln = cogent3.load_aligned_seqs(aln_path, moltype="dna", format="phylip")
13 | aln
14 |
15 |
16 | # %% [markdown]
17 | # We now take a look at the help for the `piqtree_phylo` app.
18 |
19 | # %%
20 |
21 | from cogent3 import app_help
22 |
23 | app_help("piqtree_phylo")
24 |
25 | # %% [markdown]
26 | # We build an app for estimating phylogenies with a GTR model
27 |
28 | # %%
29 | from cogent3 import get_app
30 |
31 | phylo_gtr = get_app("piqtree_phylo", "GTR", bootstrap_reps=1000)
32 | phylo_gtr
33 |
34 | # %% [markdown]
35 | # ## Run the phylogeny estimation and display branches with support < 90%.
36 |
37 | # %%
38 | tree = phylo_gtr(aln)
39 | dnd = tree.get_figure(show_support=True, threshold=90)
40 | dnd.show(width=600, height=600)
41 |
--------------------------------------------------------------------------------
/docs/citation.md:
--------------------------------------------------------------------------------
1 | # Citation
2 |
3 | Please cite piqtree as follows:
4 |
5 | Bui Quang Minh, Heiko A Schmidt, Olga Chernomor, Dominik Schrempf, Michael D Woodhams, Arndt von Haeseler, Robert Lanfear, IQ-TREE 2: New Models and Efficient Methods for Phylogenetic Inference in the Genomic Era, Molecular Biology and Evolution, Volume 37, Issue 5, May 2020, Pages 1530–1534, [10.1093/molbev/msaa015](https://doi.org/10.1093/molbev/msaa015)
6 |
7 | ```bibtex
8 | @article{10.1093/molbev/msaa015,
9 | author = {Minh, Bui Quang and Schmidt, Heiko A and Chernomor, Olga and Schrempf, Dominik and Woodhams, Michael D and von Haeseler, Arndt and Lanfear, Robert},
10 | title = "{IQ-TREE 2: New Models and Efficient Methods for Phylogenetic Inference in the Genomic Era}",
11 | journal = {Molecular Biology and Evolution},
12 | volume = {37},
13 | number = {5},
14 | pages = {1530-1534},
15 | year = {2020},
16 | month = {02},
17 | }
18 | ```
19 |
20 | Please cite cogent3 as follows:
21 |
22 | Huttley, G. (2021). cogent3: comparative genomics toolkit (2020.12.21). [10.5281/zenodo.4542532](https://doi.org/10.5281/zenodo.4542532)
23 |
24 | ```bibtex
25 | @misc{cogent3,
26 | title = {cogent3: comparative genomics toolkit (2020.12.21)},
27 | author = {Huttley, Gavin A},
28 | year = {2021},
29 | doi = {10.5281/zenodo.4542532},
30 | }
31 | ```
32 |
33 |
34 |
--------------------------------------------------------------------------------
/docs/developers/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing to `piqtree`
2 |
3 | We welcome all contributors who would like to work on the `piqtree` project.
4 |
5 | ## Overview
6 |
7 | Contributions can be made through a GitHub pull request to the main repository.
8 | We recommend working off a new branch when contributing new features.
9 |
10 | After submitting a pull request, it will be reviewed by a member of the core dev team.
11 | It's rare for a contribution to be accepted without some edits.
12 | This is a constructive process and we hope you will find our remarks useful.
13 |
14 | Thanks for participating!
15 |
16 | ## Code formatting
17 |
18 | We use `ruff` to format the project's code. The formatter can be run in the terminal in the project's base directory:
19 |
20 | ```bash
21 | ruff format
22 | ```
23 |
24 | ## Docstrings
25 |
26 | All public-facing functions must be documented and we strongly encourage docstrings for internal functions,
27 | particularly those with wide usage. We follow the [NumPy docstring style guide](https://numpydoc.readthedocs.io/en/latest/format.html)
28 | for all docstrings.
29 |
--------------------------------------------------------------------------------
/docs/developers/environment_setup.md:
--------------------------------------------------------------------------------
1 | # Setting up your environment for development
2 |
3 | We currently only support x86-64 linux, and both x86-64 and ARM mac operating systems.
4 | For windows development we recommend using Windows Subsytem for Linux. Alternatively,
5 | one can work through the provided dev container.
6 |
7 | ## Initial setup
8 |
9 | Fork and clone the [`piqtree` repository](https://github.com/iqtree/piqtree).
10 |
11 | ## Working with the `iqtree2` submodule
12 |
13 | We use a git submodule to keep track of the version of IQ-TREE that is being used.
14 | To initialise the submodule, or to get the latest version after an update, the
15 | following can be run:
16 |
17 | ```bash
18 | git submodule update --init --recursive
19 | ```
20 |
21 | ## Building the IQ-TREE library
22 |
23 | There are several build scripts used by the CI to install dependencies and build the
24 | IQ-TREE library.
25 |
26 | If you are working through the dev container, all dependencies should already be installed.
27 | The IQ-TREE library can be built with `./build_tools/build_iqtree.sh`.
28 |
29 | If you are working on a Mac, running `./build_tools/before_all_mac.sh` will install the
30 | required dependencies through homebrew, then build the IQ-TREE library. If you need to
31 | build the library again, running `./build_tools/build_iqtree.sh` will skip the dependency
32 | installation step.
33 |
34 | If you are working on linux, check the top of the `.devconatiner/Dockerfile` for the list of
35 | dependencies. Once installed, the IQ-TREE library can be build with `./build_tools/build_iqtree.sh`.
36 |
37 | ## Installing `piqtree` for standard development
38 |
39 | After completing the above steps, in your preferred Python virtual environment run the following
40 | in the repository's directory:
41 |
42 | ```bash
43 | pip install -e ".[dev]"
44 | ```
45 |
46 | This will install `piqtree` in editable mode, alongside testing, linting and other dependencies.
47 |
48 | ## Installing `piqtree` for documentation development
49 |
50 | To contribute to the documentation, we recommend running the following after the above
51 | steps in a separate virtual environment:
52 |
53 | ```bash
54 | pip install -e ".[doc]"
55 | ```
56 |
57 | This will install `piqtree` in editable mode, along with dependencies for building the documentation.
58 |
59 | ## Running the tests
60 |
61 | To verify that installation has worked, using your chosen standard development environment run
62 | the following in the base directory of this repository.
63 |
64 | ```bash
65 | pytest
66 | ```
67 |
--------------------------------------------------------------------------------
/docs/developers/index.md:
--------------------------------------------------------------------------------
1 | # Contributor Guide
2 |
3 | This guide in intended for those who wish to contribute to the `piqtree` project.
4 |
5 | - See ["Setting up your environment for development"](./environment_setup.md) for setting up the `piqtree`.
6 | - See ["Working with the IQ-TREE submodule"](./iqtree_submodule.md) for keeping the IQ-TREE library up to date.
7 | - See ["Contributing"](./contributing.md) for how to contribute to `piqtree`.
8 | - See ["Testing, linting, and type checking"](./testing.md) for checking your code.
9 | - See ["Troubleshooting"](./troubleshooting.md) for common problems.
10 | - See ["Release checklist"](./release.md) for releasing a new version of `piqtree`.
11 |
--------------------------------------------------------------------------------
/docs/developers/iqtree_submodule.md:
--------------------------------------------------------------------------------
1 | # Working with the IQ-TREE submodule
2 |
3 | We use a submodule to keep track of the version of IQ-TREE being used.
4 |
5 | Every so often, there is an update to the submodule.
6 | This most frequently happens when new features are being added from IQ-TREE, or
7 | upon a new release.
8 |
9 | When the submodule updates, to ensure the latest version of the IQ-TREE library is being used
10 | the submodule must be updated locally and the library rebuilt. This can be done by running the
11 | following two commands in the base directory of the `piqtree` repository:
12 |
13 | ```bash
14 | git submodule update --init --recursive
15 | ./build_tools/build_iqtree.sh
16 | ```
17 |
--------------------------------------------------------------------------------
/docs/developers/release.md:
--------------------------------------------------------------------------------
1 | # Release checklist
2 |
3 | These instructions are for `piqtree` release managers.
4 | The release is largely automated by the CI, but there are several things to confirm and do in this process.
5 |
6 | The release process is automated through the "Release" GitHub Action.
7 | The documentation is fetched by readthedocs from the "Build Docs" GitHub Action.
8 |
9 | - The `piqtree` version has been correctly bumped.
10 | - The testing, linting and type checking all pass on all supported platforms.
11 | - The code has been thoroughly tested (check what's been missed in the coverage report).
12 | - The documentation builds and appears correct.
13 | - The documentation has been updated on readthedocs (this must be triggered from readthedocs).
14 | - The "Release" GitHub Action has correctly uploaded to Test PyPI.
15 | - The "Release" GitHub action has correctly uploaded to PyPI.
16 |
--------------------------------------------------------------------------------
/docs/developers/testing.md:
--------------------------------------------------------------------------------
1 | # Testing, linting, and type checking
2 |
3 | We use `pytest` for testing, `ruff` for linting and `mypy` for type checking. Pull requests require all three of these things to pass.
4 |
5 | ## Testing
6 |
7 | To run the tests, run the following in the base directory of the `piqtree` repository:
8 |
9 | ```bash
10 | pytest
11 | ```
12 |
13 | ## Linting
14 |
15 | To run the linting, run the following in the base directory of the `piqtree` repository:
16 |
17 | ```bash
18 | ruff check
19 | ```
20 |
21 | ## Type checking
22 |
23 | To run type checking, run the following in the base directory of the `piqtree` repository:
24 |
25 | ```bash
26 | mypy src tests
27 | ```
28 |
--------------------------------------------------------------------------------
/docs/developers/troubleshooting.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting
2 |
3 | This is a guide for common problems that may occur when working on `piqtree` development. If you have an issue that isn't listed here,
4 | please feel more than welcome to [raise an issue](https://github.com/iqtree/piqtree/issues) and we'll look into it.
5 |
6 | ## `piqtree` doesn't install
7 |
8 | There could be several reasons `piqtree` doesn't install correctly.
9 |
10 | ### Check your operating system is supported
11 |
12 | At this stage, we currently only support x86-64 linux, and x86-64 and ARM macOS.
13 |
14 | ### Check the IQ-TREE library is up-to-date
15 |
16 | Try running the following to build the latest version of the IQ-TREE library.
17 |
18 | ```bash
19 | git submodule update --init --recursive
20 | ./build_tools/build_iqtree.sh
21 | ```
22 |
23 | If the above doesn't work, please re-try the guide on ["Setting up your environment for development"](./environment_setup.md).
24 |
25 | ## The IQ-TREE library doesn't build
26 |
27 | There could be several reasons `piqtree` doesn't build.
28 |
29 | ### Check your operating system is supported
30 |
31 | At this stage, we currently only support x86-64 linux, and x86-64 and ARM macOS.
32 |
33 | ### Check dependencies have been installed
34 |
35 | - If on macOS, try running `./before_all_mac.sh` which will also install dependencies.
36 | - If on linux, try installing the dependencies listed at the top of `.devcontainer/DockerFile`
37 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # piqtree documentation
2 |
3 | ## Overview
4 |
5 | `piqtree` is a Python package that exposes selected [IQ-TREE](http://www.iqtree.org) capabilities within Python, using the [cogent3](https://cogent3.org) library as the interface.
6 |
7 | `piqtree` is implemented with the goals of:
8 |
9 | - making the individual high-performance components of IQ-TREE available within Python, enabling the community to take advantage of these routines.
10 | - facilitating exploratory analyses by leveraging cogent3's capabilities to provide a rich user experience in interactive Jupyter notebooks, e.g. trivial parallelisation across collections of alignments.
11 | - code using piqtree apps should be easy to understand.
12 |
13 | In addition to the functions provided, `piqtree` provides mini-applications in the form of [cogent3 apps](https://cogent3.org/doc/app/index.html). These can interplay with other such apps, e.g. the [cogent3-ete3](https://pypi.org/project/cogent3-ete3/) tree conversion plugin, the [diverse-seqs](https://pypi.org/project/diverse-seq/) sequence subsampling plugin.
14 |
15 | > **Note**
16 | > `piqtree` does not implement all of the capabilities of IQ-TREE!
17 |
18 | ## Installation
19 |
20 | You get the vanilla version of `piqtree` by running the following command.
21 |
22 | ```bash
23 | pip install piqtree
24 | ```
25 |
26 | To get visualisation support with plotly, use the `[extra]` option.
27 |
28 | ```bash
29 | pip install "piqtree[extra]"
30 | ```
31 |
32 | ## Supported platforms
33 |
34 | At present we do not provide native binaries for Windows. Windows users can run `piqtree` using the Windows Subsystem for Linux (WSL) which can installed via the Windows Store.
35 |
--------------------------------------------------------------------------------
/docs/quickstart/calculate_jc_distances.md:
--------------------------------------------------------------------------------
1 | # Calculate pairwise Jukes-Cantor distances
2 |
3 | A pairwise Jukes-Cantor distance matrix can be constructed from a cogent3 alignment object using [`jc_distances`](../api/genetic_distance/jc_distances.md).
4 | The resulting distance matrix can be indexed by integer index, or by name.
5 |
6 | ## Usage
7 |
8 | ### Basic Usage
9 |
10 | Construct a `cogent3` alignment object, then calculate the pairwise JC distance matrix.
11 |
12 | ```python
13 | from cogent3 import load_aligned_seqs
14 | from piqtree import jc_distances
15 |
16 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
17 |
18 | distance_matrix = jc_distances(aln)
19 |
20 | distance_1 = distance_matrix[0, 1]
21 |
22 | distance_2 = distance_matrix["Human", "Chimpanzee"]
23 | ```
24 |
25 | ### Multithreading
26 |
27 | The number of threads to be used may be specified. By default, or if 0 is specified all available threads are used.
28 |
29 | ```python
30 | from cogent3 import load_aligned_seqs
31 | from piqtree import jc_distances
32 |
33 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
34 |
35 | # Use only 4 threads
36 | distance_matrix = jc_distances(aln, num_threads=4)
37 | ```
38 |
39 | ## See also
40 |
41 | - For using the JC distance matrix to construct rapid neighbour-joining tree, see ["Construct a rapid neighbour-joining tree from a distance matrix"](construct_nj_tree.md).
42 |
--------------------------------------------------------------------------------
/docs/quickstart/calculate_rf_distances.md:
--------------------------------------------------------------------------------
1 | # Calculate pairwise Robinson-Foulds distances between trees
2 |
3 | A pairwise Robinson-Foulds distance matrix can be constructed from a sequence of cogent3 tree objects using [`robinson_foulds`](../api/tree_distance/robinson_foulds.md).
4 |
5 | ## Usage
6 |
7 | ### Basic Usage
8 |
9 | Construct a collection of cogent3 tree objects, then use [`robinson_foulds`](../api/tree_distance/robinson_foulds.md) to find the pairwise distances.
10 |
11 | ```python
12 | from cogent3 import make_tree
13 | from piqtree import robinson_foulds
14 |
15 | tree1 = make_tree("(a,b,(c,(d,e)));")
16 | tree2 = make_tree("(e,b,(c,(d,a)));")
17 | tree3 = make_tree("(a,b,(d,(c,e)));")
18 |
19 | rf_distances = robinson_foulds([tree1, tree2, tree3])
20 | ```
21 |
22 | ## See also
23 |
24 | - For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
25 | - For making a collection of random trees, see ["Make a collection of randomly generated trees"](make_random_trees.md).
26 |
--------------------------------------------------------------------------------
/docs/quickstart/construct_ml_tree.md:
--------------------------------------------------------------------------------
1 | # Construct a maximum likelihood phylogenetic tree
2 |
3 | A maximum-likelihood phylogenetic tree can be constructed from
4 | cogent3 alignment objects using [`build_tree`](../api/tree/build_tree.md).
5 |
6 | ## Usage
7 |
8 | ### Basic Usage
9 |
10 | Construct a `cogent3` alignment object, then construct a maximum-likelihood tree.
11 |
12 | ```python
13 | from cogent3 import load_aligned_seqs
14 | from piqtree import Model, build_tree
15 |
16 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
17 |
18 | tree = build_tree(aln, Model("GTR"))
19 | log_likelihood = tree.params["lnL"]
20 | ```
21 |
22 | ### Ultrafast Bootstrap
23 |
24 | To perform ultrafast bootstrapping, the number of replicates can be specified. The number of replicates must be at least 1000.
25 | The support for each node in the tree object can be accessed from `#!py3 node.params["support"]`.
26 |
27 | ```python
28 | from cogent3 import load_aligned_seqs
29 | from piqtree import Model, build_tree
30 | from piqtree.model import FreqType
31 |
32 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
33 | tree = build_tree(aln, Model("K81", freq_type=FreqType.FO), bootstrap_replicates=2000)
34 | ```
35 |
36 | ### Reproducible Results
37 |
38 | For reproducible results, a random seed may be specified.
39 | > **Caution:** 0 and None are equivalent to no random seed being specified.
40 |
41 | ```python
42 | from cogent3 import load_aligned_seqs
43 | from piqtree import Model, build_tree
44 | from piqtree.model import AaModel
45 |
46 | aln = load_aligned_seqs("my_alignment.fasta", moltype="protein")
47 |
48 | tree = build_tree(aln, Model(AaModel.Dayhoff), rand_seed=3)
49 | ```
50 |
51 | ### Multithreading
52 |
53 | To speed up computation, the number of threads to be used may be specified.
54 | By default, the computation is done on a single thread. If 0 is specified,
55 | then IQ-TREE attempts to determine the optimal number of threads.
56 |
57 | > **Caution:** If 0 is specified with small datasets, the time to determine the
58 | > optimal number of threads may exceed the time to find the maximum likelihood
59 | > tree.
60 |
61 | ```python
62 | from cogent3 import load_aligned_seqs
63 | from piqtree import Model, build_tree
64 | from piqtree.model import DiscreteGammaModel, DnaModel
65 |
66 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
67 |
68 | model = Model(DnaModel.HKY, rate_model=DiscreteGammaModel(6), invariant_sites=True)
69 | tree = build_tree(aln, model, num_threads=4)
70 | ```
71 |
72 | ## See also
73 |
74 | - For how to specify a `Model`, see ["Use different kinds of substitution models"](using_substitution_models.md).
75 | - For selecting the best `Model`, see ["Find the model of best fit with ModelFinder"](using_model_finder.md).
76 | - For fitting branch lengths to a tree topology see ["Fit branch lengths to a tree topology from an alignment"](fit_tree_topology.md).
77 |
--------------------------------------------------------------------------------
/docs/quickstart/construct_nj_tree.md:
--------------------------------------------------------------------------------
1 | # Construct a rapid neighbour-joining tree from a distance matrix
2 |
3 | A rapid neighbour-joining tree can be constructed from a pairwise `DistanceMatrix` object with [`nj_tree`](../api/tree/nj_tree.md).
4 |
5 | ## Usage
6 |
7 | ### Basic Usage
8 |
9 | Make a pairwise distance matrix, then construct a the rapid neighbour-joining tree.
10 |
11 | ```python
12 | from cogent3 import load_aligned_seqs
13 | from piqtree import jc_distances, nj_tree
14 |
15 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
16 |
17 | distance_matrix = jc_distances(aln)
18 |
19 | tree = nj_tree(distance_matrix)
20 | ```
21 |
22 | ### Other Distance Matrices
23 |
24 | `cogent3` supports the calculation of **paralinear**, **JC69**, **TN93**, **hamming** and **pdist** distance matrices from alignment objects.
25 |
26 | ```python
27 | from cogent3 import load_aligned_seqs
28 | from piqtree import nj_tree
29 |
30 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
31 |
32 | distance_matrix = aln.distance_matrix(calc="TN93") # calc is not case sensitive
33 |
34 | tree = nj_tree(distance_matrix)
35 | ```
36 |
37 | ## See also
38 |
39 | - For constructing the JC distance matrix with `piqtree`, see ["Construct a rapid neighbour-joining tree from a distance matrix"](calculate_jc_distances.md).
40 | - For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
41 |
--------------------------------------------------------------------------------
/docs/quickstart/fit_tree_topology.md:
--------------------------------------------------------------------------------
1 | # Fit branch lengths to a tree topology from an alignment
2 |
3 | Branch lengths can be fitted to a tree from a cogent3 alignment object
4 | using [`fit_tree`](../api/tree/fit_tree.md).
5 |
6 | ## Usage
7 |
8 | ### Basic Usage
9 |
10 | Construct `cogent3` alignment and tree objects, then fit branch lengths to a new tree.
11 |
12 | ```python
13 | from cogent3 import load_aligned_seqs, make_tree
14 | from piqtree import Model, fit_tree
15 |
16 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
17 | tree = make_tree("((Human, Chimpanzee), Rhesus, Mouse);")
18 |
19 | fitted_tree = fit_tree(aln, tree, model=Model("F81"))
20 | log_likelihood = fitted_tree.params["lnL"]
21 | ```
22 |
23 | ### Reproducible Results
24 |
25 | For reproducible results, a random seed may be specified.
26 | > **Caution:** 0 and None are equivalent to no random seed being specified.
27 |
28 | ```python
29 | from cogent3 import load_aligned_seqs, make_tree
30 | from piqtree import Model, fit_tree
31 | from piqtree.model import DnaModel
32 |
33 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
34 | tree = make_tree("((Human, Chimpanzee), Rhesus, Mouse);")
35 |
36 | fitted_tree = fit_tree(aln, tree, model=Model(DnaModel.SYM), rand_seed=42)
37 | ```
38 |
39 | ### Multithreading
40 |
41 | To speed up computation, the number of threads to be used may be specified.
42 | By default, the computation is done on a single thread. If 0 is specified,
43 | then IQ-TREE attempts to determine the optimal number of threads.
44 |
45 | > **Caution:** If 0 is specified with small datasets, the time to determine the
46 | > optimal number of threads may exceed the time to find the maximum likelihood
47 | > tree.
48 |
49 | ```python
50 | from cogent3 import load_aligned_seqs, make_tree
51 | from piqtree import Model, fit_tree
52 | from piqtree.model import DnaModel, FreeRateModel
53 |
54 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
55 | tree = make_tree("((Human, Chimpanzee), Rhesus, Mouse);")
56 |
57 | model = Model(DnaModel.HKY, rate_model=FreeRateModel(3), invariant_sites=True)
58 | fitted_tree = fit_tree(aln, tree, model, num_threads=4)
59 | ```
60 |
61 | ## See also
62 |
63 | - For how to specify a `Model`, see ["Use different kinds of substitution models"](using_substitution_models.md).
64 | - For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
65 |
--------------------------------------------------------------------------------
/docs/quickstart/index.md:
--------------------------------------------------------------------------------
1 | # Quickstart
2 |
3 | Want to jump right in to using `piqtree`? This is the place to be! Here, you can find examples to:
4 |
5 | - [Construct a maximum likelihood phylogenetic tree.](construct_ml_tree.md)
6 | - [Fit branch lengths to a tree topology from an alignment.](fit_tree_topology.md)
7 | - [Use different kinds of substitution models.](using_substitution_models.md)
8 | - [Find the model of best fit with ModelFinder.](using_model_finder.md)
9 | - [Calculate pairwise Jukes-Cantor distances.](calculate_jc_distances.md)
10 | - [Construct a rapid neighbour-joining tree from a distance matrix.](construct_nj_tree.md)
11 | - [Make a collection of randomly generated trees.](make_random_trees.md)
12 | - [Calculate pairwise Robinson-Foulds distances between trees.](calculate_rf_distances.md)
13 |
14 | Be sure to check out the [API Reference](../api/index.md) for full usage!
15 |
--------------------------------------------------------------------------------
/docs/quickstart/make_random_trees.md:
--------------------------------------------------------------------------------
1 | # Make a collection of randomly generated trees
2 |
3 | A sequence of random trees can be generated using [`random_trees`](../api/tree/random_trees.md#piqtree.random_trees).
4 | Multiple tree generation modes are supported with the [`TreeGenMode`](../api/tree/random_trees.md#piqtree.TreeGenMode) including
5 | balanced, birth-death, caterpillar, star, uniform, and Yule-Harding trees.
6 |
7 | ## Usage
8 |
9 | ### Basic Usage
10 |
11 | Specify the number of trees to generate, the number of taxa, and under what mechanism the trees are to be generated.
12 | See the documentation for [`TreeGenMode`](../api/tree/random_trees.md#piqtree.TreeGenMode) for all available generation options.
13 |
14 | > **Note:** if star trees are generated the tree appears bifurcating, but branch lengths are set to zero where appropriate.
15 |
16 | ```python
17 | from piqtree import TreeGenMode, random_trees
18 |
19 | num_trees = 5
20 | num_taxa = 100
21 |
22 | trees = random_trees(num_trees, num_taxa, TreeGenMode.YULE_HARDING)
23 | ```
24 |
25 | ### Reproducible Results
26 |
27 | For reproducible results, a random seed may be specified.
28 | > **Caution:** 0 and None are equivalent to no random seed being specified.
29 |
30 | ```python
31 | from piqtree import TreeGenMode, random_trees
32 |
33 | num_trees = 5
34 | num_taxa = 100
35 |
36 | trees = random_trees(num_trees, num_taxa, TreeGenMode.UNIFORM, rand_seed=1)
37 | ```
38 |
39 | ## See also
40 |
41 | - For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
42 |
--------------------------------------------------------------------------------
/docs/quickstart/using_model_finder.md:
--------------------------------------------------------------------------------
1 | # Find the model of best fit with ModelFinder
2 |
3 | IQ-TREE's ModelFinder can be used to automatically find the model of best fit for an alignment using [`model_finder`](../api/model/model_finder.md).
4 | The best scoring model under either **the *Akaike information criterion* (AIC), *corrected Akaike information criterion* (AICc), or the *Bayesian information criterion* (BIC) can be selected.
5 |
6 | ## Usage
7 |
8 | ### Basic Usage
9 |
10 | Construct a `cogent3` alignment object, then construct a maximum-likelihood tree.
11 |
12 | ```python
13 | from cogent3 import load_aligned_seqs
14 | from piqtree import model_finder
15 |
16 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
17 |
18 | result = model_finder(aln)
19 |
20 | best_aic_model = result.best_aic
21 | best_aicc_model = result.best_aicc
22 | best_bic_model = result.best_bic
23 | ```
24 |
25 | ### Specifying the Search Space
26 |
27 | We expose the `mset`, `mfreq` and `mrate` parameters from IQ-TREE's ModelFinder to specify the substitution model search space, base frequency search space, and rate heterogeneity search space respectively. They can be specified as a set of strings in either `model_set`, `freq_set` or `rate_set` respectively.
28 |
29 | ```python
30 | from cogent3 import load_aligned_seqs
31 | from piqtree import model_finder
32 |
33 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
34 |
35 | result = model_finder(aln, model_set={"HKY", "TIM"})
36 |
37 | best_aic_model = result.best_aic
38 | best_aicc_model = result.best_aicc
39 | best_bic_model = result.best_bic
40 | ```
41 |
42 | ### Reproducible Results
43 |
44 | For reproducible results, a random seed may be specified.
45 | > **Caution:** 0 and None are equivalent to no random seed being specified.
46 |
47 | ```python
48 | from cogent3 import load_aligned_seqs
49 | from piqtree import model_finder
50 |
51 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
52 |
53 | result = model_finder(aln, rand_seed=5)
54 |
55 | best_aic_model = result.best_aic
56 | best_aicc_model = result.best_aicc
57 | best_bic_model = result.best_bic
58 | ```
59 |
60 | ### Multithreading
61 |
62 | To speed up computation, the number of threads to be used may be specified.
63 | By default, the computation is done on a single thread. If 0 is specified,
64 | then IQ-TREE attempts to determine the optimal number of threads.
65 |
66 | > **Caution:** If 0 is specified with small datasets, the time to determine the
67 | > optimal number of threads may exceed the time to find the maximum likelihood
68 | > tree.
69 |
70 | ```python
71 | from cogent3 import load_aligned_seqs
72 | from piqtree import model_finder
73 |
74 | aln = load_aligned_seqs("my_alignment.fasta", moltype="dna")
75 |
76 | result = model_finder(aln, num_threads=4)
77 |
78 | best_aic_model = result.best_aic
79 | best_aicc_model = result.best_aicc
80 | best_bic_model = result.best_bic
81 | ```
82 |
83 | ## See also
84 |
85 | - For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
86 | - For how to specify a `Model`, see ["Use different kinds of substitution models"](using_substitution_models.md).
87 |
--------------------------------------------------------------------------------
/docs/quickstart/using_substitution_models.md:
--------------------------------------------------------------------------------
1 | # Use different kinds of substitution models
2 |
3 | piqtree currently supports all named IQ-TREE DNA models including Lie Markov models, empirical amino-acid exchange rate matrices,
4 | as well as specification for base frequencies and rate heterogeneity across sites.
5 |
6 | We use the [`Model`](../api/model/Model.md) class to represent the substitution model which can be constructed from strings, or
7 | using enums. Substitution models can be combined with specification for base frequencies, and rate heterogeneity across sites models.
8 |
9 | ## Usage
10 |
11 | ### DNA Models
12 |
13 | DNA models may be specified using the [`DnaModel`](../api/model/SubstitutionModel.md#piqtree.model.DnaModel) enum, or by using
14 | the IQ-TREE string representation. A full list of supported DNA models is available [here](../api/model/SubstitutionModel.md#piqtree.model.DnaModel).
15 |
16 | ```python
17 | from piqtree import Model
18 | from piqtree.model import DnaModel
19 |
20 | hky_model_1 = Model("HKY")
21 | hky_model_2 = Model(DnaModel.HKY)
22 |
23 | lie_6_6_model_1 = Model("6.6")
24 | lie_6_6_model_2 = Model(DnaModel.LIE_6_6)
25 | ```
26 |
27 | ### Amino-acid Models
28 |
29 | Amino-acid models may be specified using the [`AaModel`](../api/model/SubstitutionModel.md#piqtree.model.AaModel) enum, or by using
30 | the IQ-TREE string representation. A full list of supported amino-acid models is available [here](../api/model/SubstitutionModel.md#piqtree.model.AaModel).
31 |
32 | ```python
33 | from piqtree import Model
34 | from piqtree.model import AaModel
35 |
36 | dayhoff_model_1 = Model("Dayhoff")
37 | dayhoff_model_2 = Model(AaModel.Dayhoff)
38 |
39 | nq_yeast_model_1 = Model("NQ.yeast")
40 | nq_yeast_model_2 = Model(AaModel.NQ_yeast)
41 | ```
42 |
43 | ### Base Frequencies
44 |
45 | Three types of base frequencies can be specified using either the [`FreqType`](../api/model/FreqType.md), or strings.
46 | If not specified, it uses the chosen model's default.
47 |
48 | - [`F`](../api/model/FreqType.md#piqtree.model.FreqType.F): Empirical base frequencies.
49 | - [`FQ`](../api/model/FreqType.md#piqtree.model.FreqType.FQ): Equal base frequencies.
50 | - [`FO`](../api/model/FreqType.md#piqtree.model.FreqType.FO): Optimised base frequencies by maximum-likelihood.
51 |
52 | ```python
53 | from piqtree import Model
54 | from piqtree.model import FreqType
55 |
56 | # Default for the GTR model
57 | empirical_freqs_1 = Model("GTR", freq_type="F")
58 | empirical_freqs_2 = Model("GTR", freq_type=FreqType.F)
59 |
60 | equal_freqs_1 = Model("GTR", freq_type="FQ")
61 | equal_freqs_2 = Model("GTR", freq_type=FreqType.FQ)
62 |
63 | opt_freqs_1 = Model("GTR", freq_type="FO")
64 | opt_freqs_2 = Model("GTR", freq_type=FreqType.FO)
65 | ```
66 |
67 | ### Rate Heterogeneity
68 |
69 | #### Invariable Sites
70 |
71 | A boolean flag can be specified when constructing the [`Model`](../api/model/Model.md) class to allow for a proportion of invariable sites.
72 |
73 | ```python
74 | from piqtree import Model
75 |
76 | without_invar_sites = Model("TIM", invariant_sites=False) # Default
77 | with_invar_sites = Model("TIM", invariant_sites=True)
78 | ```
79 |
80 | #### Discrete Gamma Model
81 |
82 | We support the [`DiscreteGammaModel`](../api/model/RateModel.md#piqtree.model.DiscreteGammaModel) allowing for a variable number of rate categories (by default 4).
83 |
84 | ```python
85 | from piqtree import Model
86 | from piqtree.model import DiscreteGammaModel
87 |
88 | # 4 rate categories, no invariable sites
89 | k81_discrete_gamma_4 = Model("K81", rate_model=DiscreteGammaModel())
90 |
91 | # 8 rate categories, with invariable sites
92 | k81_invar_discrete_gamma_8 = Model("K81", rate_model=DiscreteGammaModel(8), invariant_sites=True)
93 | ```
94 |
95 | #### FreeRate Model
96 |
97 | We support the [`FreeRateModel`](../api/model/RateModel.md#piqtree.model.FreeRateModel) allowing for a variable number of rate categories (by default 4).
98 |
99 | ```python
100 | from piqtree import Model
101 | from piqtree.model import FreeRateModel
102 |
103 | # 4 rate categories, no invariable sites
104 | sym_discrete_gamma_4 = Model("SYM", rate_model=FreeRateModel())
105 |
106 | # 8 rate categories, with invariable sites
107 | sym_invar_discrete_gamma_8 = Model("SYM", rate_model=FreeRateModel(8), invariant_sites=True)
108 | ```
109 |
110 | ### Making Model Classes from IQ-TREE Strings
111 |
112 | For the supported model types, the Model class can be created by using [`make_model`](../api/model/make_model.md) on the IQ-TREE string representation of the model.
113 |
114 | ```python
115 | from piqtree import make_model
116 |
117 | model = make_model("GTR+FQ+I+R3")
118 | ```
119 |
120 | ## See also
121 |
122 | - Use a [`Model`](../api/model/Model.md) to construct a maximum likelihood tree: ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md).
123 | - Use a [`Model`](../api/model/Model.md) to fit branch lengths to a tree topology: ["Fit branch lengths to a tree topology from an alignment"](fit_tree_topology.md).
124 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: piqtree
2 | site_url: "https://github.com/iqtree/piqtree"
3 | site_author: "Gavin Huttley"
4 | repo_url: "https://github.com/iqtree/piqtree"
5 | repo_name: 'GitHub'
6 | theme:
7 | name: material
8 | features:
9 | - navigation.indexes
10 | highlightjs: true
11 | hljs_languages:
12 | - yaml
13 | - python
14 | - text
15 | markdown_extensions:
16 | - pymdownx.highlight:
17 | anchor_linenums: true
18 | line_spans: __span
19 | pygments_lang_class: true
20 | - pymdownx.inlinehilite
21 | - pymdownx.snippets
22 | - pymdownx.superfences
23 | plugins:
24 | - search
25 | - mkdocstrings:
26 | handlers:
27 | python:
28 | paths: [src]
29 | options:
30 | show_root_heading: true
31 | docstring_style: numpy
32 | docstring_section_style: spacy
33 | docstring_options:
34 | ignore_init_summary: true
35 | merge_init_into_class: true
36 | - mkdocs-jupyter:
37 | include_requirejs: true
38 | execute: true
39 | remove_tag_config:
40 | remove_input_tags:
41 | - hide_code
42 |
43 | nav:
44 | - Introduction: index.md
45 | - Quickstart:
46 | - quickstart/index.md
47 | - quickstart/construct_ml_tree.md
48 | - quickstart/fit_tree_topology.md
49 | - quickstart/using_substitution_models.md
50 | - quickstart/using_model_finder.md
51 | - quickstart/calculate_jc_distances.md
52 | - quickstart/construct_nj_tree.md
53 | - quickstart/make_random_trees.md
54 | - quickstart/calculate_rf_distances.md
55 | - API reference:
56 | - api/index.md
57 | - Trees:
58 | - api/tree/build_tree.md
59 | - api/tree/fit_tree.md
60 | - api/tree/nj_tree.md
61 | - api/tree/random_trees.md
62 | - Substitution models:
63 | - api/model/model_finder.md
64 | - api/model/ModelFinderResult.md
65 | - api/model/Model.md
66 | - api/model/make_model.md
67 | - api/model/SubstitutionModel.md
68 | - api/model/FreqType.md
69 | - api/model/RateModel.md
70 | - Genetic distances:
71 | - api/genetic_distance/jc_distances.md
72 | - Tree distances:
73 | - api/tree_distance/robinson_foulds.md
74 | - Apps:
75 | - Available apps: apps/available_help.py
76 | - Selecting models for phylogenetic analysis: apps/model_finder.py
77 | - Estimate a phylogenetic tree: apps/phylo.py
78 | - Fit a model to one tree: apps/fit_tree.py
79 | - Phylogenetic model options: apps/options.py
80 | - Pairwise JC distances: apps/pairwise_dist.py
81 | - NJ tree: apps/nj.py
82 | - Parallel processing across alignments: apps/app_pipeline.py
83 | - Contributing:
84 | - developers/index.md
85 | - developers/environment_setup.md
86 | - developers/iqtree_submodule.md
87 | - developers/contributing.md
88 | - developers/testing.md
89 | - developers/troubleshooting.md
90 | - developers/release.md
91 | - Citation: citation.md
92 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import nox
4 |
5 | _py_versions = range(11, 14)
6 |
7 |
8 | @nox.session(python=[f"3.{v}" for v in _py_versions])
9 | def test(session):
10 | posargs = list(session.posargs)
11 | env = os.environ.copy()
12 |
13 | install_spec = "-e.[test]"
14 | session.install(install_spec)
15 | session.run("pytest", *posargs, env=env)
16 |
17 |
18 | @nox.session(python=[f"3.{v}" for v in _py_versions])
19 | def type_check(session):
20 | posargs = list(session.posargs)
21 | env = os.environ.copy()
22 |
23 | install_spec = ".[typing]"
24 | session.install(install_spec)
25 | session.run("mypy", "src", "tests", *posargs, env=env)
26 |
27 |
28 | @nox.session(python=[f"3.{v}" for v in _py_versions])
29 | def ruff(session):
30 | posargs = list(session.posargs)
31 | env = os.environ.copy()
32 |
33 | install_spec = ".[lint]"
34 | session.install(install_spec)
35 | session.run("ruff", "check", *posargs, env=env)
36 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools >= 61.0", "pybind11 >= 2.12", "delvewheel >= 1.10"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "piqtree"
7 | dependencies = ["cogent3>=2025.5.8a2", "pyyaml", "requests"]
8 | requires-python = ">=3.11, <3.14"
9 |
10 | authors = [
11 | { name = "Gavin Huttley" },
12 | { name = "Robert McArthur" },
13 | { name = "Bui Quang Minh " },
14 | { name = "Richard Morris" },
15 | { name = "Thomas Wong" },
16 | ]
17 | description = "Python bindings for IQTree"
18 | readme = "README.md"
19 | dynamic = ["version"]
20 |
21 | classifiers = [
22 | "Development Status :: 2 - Pre-Alpha",
23 |
24 | "Intended Audience :: Science/Research",
25 | "Topic :: Scientific/Engineering :: Bio-Informatics",
26 | "Topic :: Software Development :: Libraries :: Python Modules",
27 |
28 | "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
29 |
30 | "Natural Language :: English",
31 |
32 | "Operating System :: MacOS",
33 | "Operating System :: POSIX :: Linux",
34 |
35 | "Programming Language :: C++",
36 | "Programming Language :: Python :: 3",
37 | "Programming Language :: Python :: 3.11",
38 | "Programming Language :: Python :: 3.12",
39 | "Programming Language :: Python :: 3.13",
40 |
41 | "Typing :: Typed",
42 | ]
43 |
44 | [project.urls]
45 | Repository = "https://github.com/cogent3/piqtree"
46 | Documentation = "https://piqtree.readthedocs.io"
47 |
48 |
49 | [project.optional-dependencies]
50 | dev = [
51 | "cibuildwheel",
52 | "pybind11",
53 | "delvewheel",
54 | "scriv",
55 | "piqtree[test]",
56 | "piqtree[lint]",
57 | "piqtree[typing]",
58 | ]
59 | test = ["pytest", "pytest-cov", "nox"]
60 | lint = ["ruff==0.11.12"]
61 | typing = ["mypy==1.16.0", "piqtree[stubs]", "piqtree[test]"]
62 | stubs = ["types-PyYAML", "types-requests"]
63 | extra = ["cogent3[extra]"]
64 | doc = [
65 | "mkdocs",
66 | "mkdocstrings[python]",
67 | "mkdocs-jupyter",
68 | "pymdown-extensions",
69 | "jupytext",
70 | "piqtree[test]",
71 | "cogent3[extra]",
72 | "diverse-seq",
73 | "jupyter",
74 | "ipywidgets",
75 | ]
76 |
77 | [project.entry-points."cogent3.app"]
78 | piqtree_phylo = "piqtree._app:piqtree_phylo"
79 | piqtree_fit = "piqtree._app:piqtree_fit"
80 | piqtree_random_trees = "piqtree._app:piqtree_random_trees"
81 | piqtree_jc_dists = "piqtree._app:piqtree_jc_dists"
82 | piqtree_nj = "piqtree._app:piqtree_nj"
83 | piqtree_mfinder = "piqtree._app:piqtree_mfinder"
84 |
85 | [project.entry-points."cogent3.hook"]
86 | quick_tree = "piqtree._app:piqtree_nj"
87 |
88 | [tool.setuptools.dynamic]
89 | version = { attr = "piqtree.__version__" }
90 |
91 | [tool.ruff]
92 | exclude = [
93 | "iqtree2",
94 | ".bzr",
95 | ".direnv",
96 | ".eggs",
97 | ".git",
98 | ".git-rewrite",
99 | ".hg",
100 | ".ipynb_checkpoints",
101 | ".mypy_cache",
102 | ".nox",
103 | ".pants.d",
104 | ".pyenv",
105 | ".pytest_cache",
106 | ".pytype",
107 | ".ruff_cache",
108 | ".svn",
109 | ".tox",
110 | ".venv",
111 | ".vscode",
112 | "__pypackages__",
113 | "_build",
114 | "buck-out",
115 | "build",
116 | "dist",
117 | "node_modules",
118 | "site-packages",
119 | "venv",
120 | ]
121 |
122 | # Same as Black.
123 | line-length = 88
124 | indent-width = 4
125 |
126 | target-version = "py310"
127 |
128 | [tool.ruff.lint]
129 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
130 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
131 | # McCabe complexity (`C901`) by default.
132 | select = ["ALL"]
133 | ignore = ["EXE002", "FA100", "E501", "PLR0913", "PLR2004", "S603", "S607", "D"]
134 |
135 | # Allow fix for all enabled rules (when `--fix`) is provided.
136 | fixable = ["ALL"]
137 | unfixable = []
138 |
139 | # Allow unused variables when underscore-prefixed.
140 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
141 |
142 | [tool.ruff.lint.per-file-ignores]
143 | "tests/**/*.py" = [
144 | "S101", # asserts allowed in tests...
145 | "INP001", # __init__.py files are not required...
146 | "N802", # allow non snake_case function names for fixtures
147 | "N803", # allow use of fixture constants
148 | "SLF001", # private member access is useful for testing
149 | "FBT001", # allow bool pos args for parameterisation
150 | "D", # don't require docstrings
151 | ]
152 | "docs/**/*.py" = ["B018", "E402", "ERA001", "INP001"]
153 | "noxfile.py" = [
154 | "S101", # asserts allowed in tests...
155 | "INP001", # __init__.py files are not required...
156 | "ANN",
157 | "N802",
158 | "N803",
159 | "D",
160 | ]
161 | "src/piqtree/_app/__init__.py" = [
162 | "N801", # apps follow function naming convention
163 | ]
164 | "src/piqtree/__init__.py" = [
165 | "E402", # handle DLLs before imports
166 | "PTH118", # os operations for DLL path
167 | "PTH120", # os operations for DLL path
168 | ]
169 | "src/piqtree/model/_substitution_model.py" = [
170 | "N815", # use IQ-TREE naming scheme
171 | ]
172 |
173 | [tool.ruff.format]
174 | # Like Black, use double quotes for strings.
175 | quote-style = "double"
176 |
177 | # Like Black, indent with spaces, rather than tabs.
178 | indent-style = "space"
179 |
180 | # Like Black, respect magic trailing commas.
181 | skip-magic-trailing-comma = false
182 |
183 | # Like Black, automatically detect the appropriate line ending.
184 | line-ending = "auto"
185 |
186 | # Enable auto-formatting of code examples in docstrings. Markdown,
187 | # reStructuredText code/literal blocks and doctests are all supported.
188 | #
189 | # This is currently disabled by default, but it is planned for this
190 | # to be opt-out in the future.
191 | docstring-code-format = false
192 |
193 | # Set the line length limit used when formatting code snippets in
194 | # docstrings.
195 | #
196 | # This only has an effect when the `docstring-code-format` setting is
197 | # enabled.
198 | docstring-code-line-length = "dynamic"
199 |
200 | [tool.scriv]
201 | format = "md"
202 | categories = [
203 | "Contributors",
204 | "ENH",
205 | "BUG",
206 | "DOC",
207 | "Deprecations",
208 | "Discontinued",
209 | ]
210 | output_file = "changelog.md"
211 | version = "literal: src/piqtree/__init__.py: __version__"
212 | skip_fragments = "README.*"
213 | new_fragment_template = "file: changelog.d/templates/new.md.j2"
214 | entry_title_template = "file: changelog.d/templates/title.md.j2"
215 |
216 | [[tool.mypy.overrides]]
217 | module = ['cogent3.*', "_piqtree"]
218 | ignore_missing_imports = true
219 |
--------------------------------------------------------------------------------
/rtd_get_docs.py:
--------------------------------------------------------------------------------
1 | # this file
2 | # is directly used by .readthedocs.yaml
3 | # it extracts the built docs from the github artefact
4 | # created by the build_docs.yml github action
5 | import os
6 | import pathlib
7 | import time
8 | import zipfile
9 |
10 | import requests
11 |
12 | MAX_WAIT_TIME = 600 # Typically takes 5.5 minutes
13 | POLL_INTERVAL = 20
14 |
15 |
16 | def get_rtd_version_name() -> str:
17 | return os.environ.get("READTHEDOCS_VERSION_NAME", "").lower()
18 |
19 |
20 | def get_github_token() -> str:
21 | token = os.environ.get("GITHUB_TOKEN")
22 | if token is None:
23 | token = os.environ.get("GITHUB_TOKEN_PRIVATE")
24 | if not token:
25 | msg = "GitHub token not found."
26 | raise OSError(msg)
27 | return token
28 |
29 |
30 | def get_latest_run(workflow_filename: str, headers: dict) -> dict:
31 | url = f"https://api.github.com/repos/iqtree/piqtree/actions/workflows/{workflow_filename}/runs"
32 |
33 | response = requests.get(url, headers=headers, timeout=10)
34 | response.raise_for_status()
35 |
36 | runs = response.json()["workflow_runs"]
37 | if not runs:
38 | msg = f"No workflow runs found for: '{workflow_filename}'"
39 | raise ValueError(msg)
40 |
41 | return runs[0]
42 |
43 |
44 | def wait_for_run_completion(run: dict, headers: dict) -> dict:
45 | run_id = run["id"]
46 | run_url = f"https://api.github.com/repos/iqtree/piqtree/actions/runs/{run_id}"
47 |
48 | waited = 0
49 | while waited < MAX_WAIT_TIME:
50 | response = requests.get(run_url, headers=headers, timeout=10)
51 | response.raise_for_status()
52 | run_status = response.json()
53 |
54 | status = run_status["status"]
55 | conclusion = run_status["conclusion"]
56 |
57 | if status == "completed":
58 | if conclusion != "success":
59 | msg = f"Latest workflow run failed with conclusion: '{conclusion}'"
60 | raise RuntimeError(msg)
61 | return run_status
62 |
63 | time.sleep(POLL_INTERVAL)
64 | waited += POLL_INTERVAL
65 |
66 | msg = "Timed out waiting for workflow run to complete."
67 | raise TimeoutError(msg)
68 |
69 |
70 | def download_and_extract_artifact(run: dict, headers: dict) -> None:
71 | artifact_name = "piqtree-docs-html"
72 | artifacts_url = run["artifacts_url"]
73 |
74 | response = requests.get(artifacts_url, headers=headers, timeout=10)
75 | response.raise_for_status()
76 |
77 | artifacts = response.json()["artifacts"]
78 |
79 | artifact = next((a for a in artifacts if a["name"] == artifact_name), None)
80 | if artifact is None:
81 | msg = f"Artifact '{artifact_name}' not found in the run."
82 | raise ValueError(msg)
83 |
84 | download_url = artifact["archive_download_url"]
85 | response = requests.get(download_url, headers=headers, timeout=10)
86 | response.raise_for_status()
87 |
88 | out = pathlib.Path(f"{artifact_name}.zip")
89 | out.write_bytes(response.content)
90 |
91 | with zipfile.ZipFile(out, "r") as zip_ref:
92 | zip_ref.extractall("_readthedocs/html/")
93 |
94 |
95 | def download_and_extract_docs() -> None:
96 | version = get_rtd_version_name()
97 |
98 | if version not in ("latest", "stable"):
99 | msg = f"Unexpected version '{version}' for readthedocs."
100 | raise ValueError(msg)
101 |
102 | workflow_filename = "release.yml" if version == "stable" else "build_docs.yml"
103 |
104 | headers = {"Authorization": f"token {get_github_token()}"}
105 |
106 | latest_run = get_latest_run(workflow_filename, headers)
107 | completed_run = wait_for_run_completion(latest_run, headers)
108 | download_and_extract_artifact(completed_run, headers)
109 |
110 |
111 | if __name__ == "__main__":
112 | download_and_extract_docs()
113 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """setup for piqtree."""
2 |
3 | import os
4 | import platform
5 | import subprocess
6 | from pathlib import Path
7 |
8 | from pybind11.setup_helpers import Pybind11Extension, build_ext
9 | from setuptools import setup
10 |
11 | LIBRARY_DIR = "src/piqtree/_libiqtree/"
12 | IQTREE_LIB_NAME = "iqtree2"
13 |
14 |
15 | def get_brew_prefix(package: str) -> Path:
16 | """Get the prefix path for a specific Homebrew package."""
17 | return Path(
18 | subprocess.check_output(["brew", "--prefix", package]).strip().decode("utf-8"),
19 | )
20 |
21 |
22 | extra_libs = []
23 | extra_compile_args = []
24 | include_dirs = []
25 | library_dirs = []
26 |
27 |
28 | def include_dlls() -> None:
29 | import shutil
30 |
31 | from delvewheel._dll_utils import get_all_needed
32 |
33 | needed_dll_paths, _, _, _ = get_all_needed(
34 | LIBRARY_DIR + f"{IQTREE_LIB_NAME}.dll",
35 | set(),
36 | None,
37 | "raise",
38 | False, # noqa: FBT003
39 | False, # noqa: FBT003
40 | 0,
41 | )
42 |
43 | for dll_path in needed_dll_paths:
44 | shutil.copy(dll_path, LIBRARY_DIR)
45 |
46 |
47 | def setup_windows() -> None:
48 | include_dlls()
49 |
50 |
51 | def setup_macos() -> None:
52 | brew_prefix_llvm = get_brew_prefix("llvm")
53 | brew_prefix_libomp = get_brew_prefix("libomp")
54 |
55 | # Use Homebrew's clang/clang++
56 | os.environ["CC"] = str(brew_prefix_llvm / "bin" / "clang")
57 | os.environ["CXX"] = str(brew_prefix_llvm / "bin" / "clang++")
58 |
59 | # Define OpenMP flags and libraries for macOS
60 | extra_compile_args.extend(["-Xpreprocessor", "-fopenmp"])
61 | extra_libs.extend(["z", "omp"])
62 |
63 | # Use the paths from Homebrew for libomp
64 | include_dirs.extend([str(brew_prefix_libomp / "include")])
65 | library_dirs.extend(
66 | [
67 | str(brew_prefix_libomp / "lib"),
68 | str(brew_prefix_llvm / "lib"),
69 | ],
70 | )
71 |
72 |
73 | def setup_linux() -> None:
74 | extra_compile_args.extend(["-fopenmp"])
75 | extra_libs.extend(["z", "gomp"])
76 |
77 |
78 | match system := platform.system():
79 | case "Windows":
80 | setup_windows()
81 | case "Darwin":
82 | setup_macos()
83 | case "Linux":
84 | setup_linux()
85 | case _:
86 | msg = f"Unsupported platform: {system}"
87 | raise ValueError(msg)
88 |
89 | ext_modules = [
90 | Pybind11Extension(
91 | "_piqtree",
92 | ["src/piqtree/_libiqtree/_piqtree.cpp"],
93 | library_dirs=[
94 | *library_dirs,
95 | LIBRARY_DIR,
96 | ],
97 | libraries=[IQTREE_LIB_NAME, *extra_libs],
98 | extra_compile_args=extra_compile_args,
99 | include_dirs=include_dirs,
100 | ),
101 | ]
102 |
103 | setup(
104 | name="piqtree",
105 | ext_modules=ext_modules,
106 | cmdclass={"build_ext": build_ext},
107 | zip_safe=False,
108 | package_data={"piqtree": ["_libiqtree/*.dll"]},
109 | )
110 |
--------------------------------------------------------------------------------
/src/piqtree/__init__.py:
--------------------------------------------------------------------------------
1 | """piqtree - access the power of IQ-TREE within Python."""
2 |
3 |
4 | def _add_dll_path() -> None:
5 | import os
6 |
7 | if "add_dll_directory" in dir(os):
8 | dll_folder = os.path.join(os.path.dirname(__file__), "_libiqtree")
9 | os.add_dll_directory(dll_folder) # type: ignore[attr-defined]
10 |
11 |
12 | _add_dll_path()
13 | del _add_dll_path
14 |
15 |
16 | from _piqtree import __iqtree_version__
17 |
18 | from piqtree._data import dataset_names, download_dataset
19 | from piqtree.iqtree import (
20 | ModelFinderResult,
21 | TreeGenMode,
22 | build_tree,
23 | fit_tree,
24 | jc_distances,
25 | model_finder,
26 | nj_tree,
27 | random_trees,
28 | robinson_foulds,
29 | )
30 | from piqtree.model import (
31 | Model,
32 | available_freq_type,
33 | available_models,
34 | available_rate_type,
35 | make_model,
36 | )
37 |
38 | __version__ = "0.5.0"
39 |
40 | __all__ = [
41 | "Model",
42 | "ModelFinderResult",
43 | "TreeGenMode",
44 | "__iqtree_version__",
45 | "available_freq_type",
46 | "available_models",
47 | "available_rate_type",
48 | "build_tree",
49 | "dataset_names",
50 | "download_dataset",
51 | "fit_tree",
52 | "jc_distances",
53 | "make_model",
54 | "model_finder",
55 | "nj_tree",
56 | "random_trees",
57 | "robinson_foulds",
58 | ]
59 |
--------------------------------------------------------------------------------
/src/piqtree/_app/__init__.py:
--------------------------------------------------------------------------------
1 | """cogent3 apps for piqtree."""
2 |
3 | import cogent3
4 | import cogent3.app.typing as c3_types
5 | from cogent3.app import composable
6 | from cogent3.util.misc import extend_docstring_from
7 |
8 | from piqtree import (
9 | TreeGenMode,
10 | build_tree,
11 | fit_tree,
12 | jc_distances,
13 | model_finder,
14 | nj_tree,
15 | random_trees,
16 | )
17 | from piqtree.iqtree import ModelFinderResult
18 | from piqtree.model import Model
19 |
20 |
21 | @composable.define_app
22 | class piqtree_phylo:
23 | @extend_docstring_from(build_tree)
24 | def __init__(
25 | self,
26 | model: Model | str,
27 | *,
28 | rand_seed: int | None = None,
29 | bootstrap_reps: int | None = None,
30 | num_threads: int | None = None,
31 | ) -> None:
32 | self._model = model
33 | self._rand_seed = rand_seed
34 | self._bootstrap_reps = bootstrap_reps
35 | self._num_threads = num_threads
36 |
37 | def main(
38 | self,
39 | aln: c3_types.AlignedSeqsType,
40 | ) -> cogent3.PhyloNode | cogent3.app.typing.SerialisableType:
41 | return build_tree(
42 | aln,
43 | self._model,
44 | self._rand_seed,
45 | bootstrap_replicates=self._bootstrap_reps,
46 | num_threads=self._num_threads,
47 | )
48 |
49 |
50 | @composable.define_app
51 | class piqtree_fit:
52 | @extend_docstring_from(fit_tree)
53 | def __init__(
54 | self,
55 | tree: cogent3.PhyloNode,
56 | model: Model | str,
57 | *,
58 | rand_seed: int | None = None,
59 | num_threads: int | None = None,
60 | ) -> None:
61 | self._tree = tree
62 | self._model = model
63 | self._rand_seed = rand_seed
64 | self._num_threads = num_threads
65 |
66 | def main(
67 | self,
68 | aln: c3_types.AlignedSeqsType,
69 | ) -> cogent3.PhyloNode | cogent3.app.typing.SerialisableType:
70 | return fit_tree(
71 | aln,
72 | self._tree,
73 | self._model,
74 | self._rand_seed,
75 | self._num_threads,
76 | )
77 |
78 |
79 | @composable.define_app
80 | @extend_docstring_from(random_trees)
81 | def piqtree_random_trees(
82 | num_taxa: int,
83 | num_trees: int,
84 | tree_mode: TreeGenMode,
85 | rand_seed: int | None = None,
86 | ) -> tuple[cogent3.PhyloNode]:
87 | return random_trees(num_trees, num_taxa, tree_mode, rand_seed)
88 |
89 |
90 | @composable.define_app
91 | class piqtree_jc_dists:
92 | @extend_docstring_from(jc_distances)
93 | def __init__(
94 | self,
95 | num_threads: int | None = None,
96 | ) -> None:
97 | self._num_threads = num_threads
98 |
99 | def main(
100 | self,
101 | aln: c3_types.AlignedSeqsType,
102 | ) -> cogent3.PhyloNode | cogent3.app.typing.SerialisableType:
103 | return jc_distances(
104 | aln,
105 | num_threads=self._num_threads,
106 | )
107 |
108 |
109 | @composable.define_app
110 | @extend_docstring_from(nj_tree)
111 | def piqtree_nj(
112 | dists: c3_types.PairwiseDistanceType,
113 | *,
114 | allow_negative: bool = False,
115 | ) -> cogent3.PhyloNode:
116 | tree = nj_tree(dists, allow_negative=allow_negative)
117 | tree.params |= {"provenance": "piqtree"}
118 | return tree
119 |
120 |
121 | @composable.define_app
122 | @extend_docstring_from(model_finder)
123 | def piqtree_mfinder(
124 | aln: c3_types.AlignedSeqsType,
125 | ) -> ModelFinderResult | c3_types.SerialisableType:
126 | return model_finder(aln)
127 |
128 |
129 | _ALL_APP_NAMES = [
130 | "piqtree_phylo",
131 | "piqtree_fit",
132 | "piqtree_random_trees",
133 | "piqtree_jc_dists",
134 | "piqtree_nj",
135 | "piqtree_mfinder",
136 | ]
137 |
--------------------------------------------------------------------------------
/src/piqtree/_data.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import zipfile
3 |
4 | import requests
5 |
6 | _data_files = {
7 | "mammal-orths.zip": "https://github.com/user-attachments/files/17806562/mammal-orths.zip",
8 | "brca1.fasta.gz": "https://github.com/user-attachments/files/17806563/brca1.fasta.gz",
9 | "example.phy.gz": "https://github.com/user-attachments/files/17806561/example.phy.gz",
10 | "example.tree.gz": "https://github.com/user-attachments/files/17821150/example.tree.gz",
11 | }
12 |
13 |
14 | def _inflate_zip(zip_path: pathlib.Path, output_dir: pathlib.Path) -> pathlib.Path:
15 | """Decompress the contents of a zip file to a named directory."""
16 | with zipfile.ZipFile(zip_path, "r") as zip_ref:
17 | zip_ref.extractall(output_dir)
18 | return output_dir
19 |
20 |
21 | def _get_url(name: str) -> str:
22 | """URL for a data file."""
23 | if name not in _data_files:
24 | msg = f"Unknown data file: {name}"
25 | raise ValueError(msg)
26 | return _data_files[name]
27 |
28 |
29 | def dataset_names() -> list[str]:
30 | """Return the names of available datasets."""
31 | return list(_data_files.keys())
32 |
33 |
34 | def download_dataset(
35 | name: str,
36 | dest_dir: str | pathlib.Path,
37 | dest_name: str | None = None,
38 | *,
39 | inflate_zip: bool = True,
40 | ) -> pathlib.Path:
41 | """Download a data files used in docs, requires an internet connection.
42 |
43 | Parameters
44 | ----------
45 | name : str
46 | data set name, see `dataset_names()`.
47 | dest_dir : str | pathlib.Path
48 | where to write a local copy.
49 | dest_name : str | None, optional
50 | name of the file to write, if None (default) uses name.
51 | inflate_zip : bool = True
52 | whether to unzip archives, by default True.
53 |
54 | Returns
55 | -------
56 | pathlib.Path
57 | path to the downloaded file
58 |
59 | Notes
60 | -----
61 | Only downloads if dest_dir / dest_name does not exist.
62 |
63 | """
64 | dest_dir = pathlib.Path(dest_dir)
65 |
66 | url = _get_url(name)
67 | dest_name = dest_name or name
68 | outpath = dest_dir / dest_name
69 | outpath.parent.mkdir(parents=True, exist_ok=True)
70 | if outpath.exists():
71 | return outpath
72 |
73 | response = requests.get(url, stream=True, timeout=20)
74 | block_size = 4096
75 | with outpath.open("wb") as out:
76 | for data in response.iter_content(block_size):
77 | out.write(data)
78 |
79 | if inflate_zip and outpath.suffix == ".zip":
80 | outpath = _inflate_zip(outpath, dest_dir)
81 | return outpath
82 |
--------------------------------------------------------------------------------
/src/piqtree/_libiqtree/README.txt:
--------------------------------------------------------------------------------
1 | Place the IQ-TREE static library, `libiqtree2.a`, in this directory.
--------------------------------------------------------------------------------
/src/piqtree/_libiqtree/_piqtree.cpp:
--------------------------------------------------------------------------------
1 | #include "_piqtree.h"
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "_piqtree.h"
9 |
10 | namespace py = pybind11;
11 |
12 | void checkError(char* errorStr) {
13 | if (errorStr && std::strlen(errorStr) > 0) {
14 | string msg(errorStr);
15 | free(errorStr);
16 | throw std::runtime_error(msg);
17 | }
18 | if (errorStr)
19 | free(errorStr);
20 | }
21 |
22 | namespace PYBIND11_NAMESPACE {
23 | namespace detail {
24 | template <>
25 | struct type_caster {
26 | public:
27 | PYBIND11_TYPE_CASTER(StringArray, const_name("StringArray"));
28 |
29 | // Conversion from Python to C++
30 | bool load(handle src, bool) {
31 | /* Extract PyObject from handle */
32 | PyObject* source = src.ptr();
33 | if (!py::isinstance(source)) {
34 | return false;
35 | }
36 |
37 | auto seq = reinterpret_borrow(src);
38 | value.length = seq.size();
39 |
40 | tmpStrings.reserve(value.length);
41 | tmpCStrs.reserve(value.length);
42 |
43 | for (size_t i = 0; i < seq.size(); ++i) {
44 | auto item = seq[i];
45 | if (!py::isinstance(item)) {
46 | return false;
47 | }
48 |
49 | tmpStrings.push_back(item.cast());
50 | tmpCStrs.push_back(tmpStrings[i].c_str());
51 | }
52 |
53 | value.strings = tmpCStrs.data();
54 |
55 | return true;
56 | }
57 |
58 | // Conversion from C++ to Python
59 | static handle cast(StringArray src, return_value_policy, handle) {
60 | throw std::runtime_error("Unsupported operation");
61 | }
62 |
63 | private:
64 | vector tmpStrings;
65 | vector tmpCStrs;
66 | };
67 |
68 | template <>
69 | struct type_caster {
70 | public:
71 | PYBIND11_TYPE_CASTER(DoubleArray, _("DoubleArray"));
72 |
73 | // Conversion from Python to C++
74 | bool load(handle src, bool) {
75 | if (!py::isinstance>(src)) {
76 | return false; // Only accept numpy arrays of float64
77 | }
78 |
79 | auto arr = py::cast>(src);
80 | if (arr.ndim() != 1) {
81 | return false; // Only accept 1D arrays
82 | }
83 |
84 | value.length = arr.size();
85 |
86 | tmpDoubles.assign(arr.data(), arr.data() + value.length);
87 | value.doubles = tmpDoubles.data();
88 |
89 | return true;
90 | }
91 |
92 | // Conversion from C++ to Python
93 | static handle cast(DoubleArray src, return_value_policy, handle) {
94 | throw std::runtime_error("Unsupported operation");
95 | }
96 |
97 | private:
98 | vector tmpDoubles;
99 | };
100 |
101 | template <>
102 | struct type_caster {
103 | public:
104 | PYBIND11_TYPE_CASTER(IntegerResult, _("IntegerResult"));
105 |
106 | // Conversion from Python to C++
107 | bool load(handle /* src */, bool /* convert */) {
108 | throw std::runtime_error("Unsupported operation");
109 | }
110 |
111 | // Conversion from C++ to Python
112 | static handle cast(const IntegerResult& src, return_value_policy, handle) {
113 | checkError(src.errorStr);
114 |
115 | return py::int_(src.value).release();
116 | }
117 | };
118 |
119 | template <>
120 | struct type_caster {
121 | public:
122 | // Indicate that this caster only supports conversion from C++ to Python
123 | PYBIND11_TYPE_CASTER(StringResult, _("StringResult"));
124 |
125 | // Reject Python to C++ conversion
126 | bool load(handle src, bool) {
127 | throw std::runtime_error("Unsupported operation");
128 | }
129 |
130 | // Conversion from C++ to Python
131 | static handle cast(const StringResult& src, return_value_policy, handle) {
132 | checkError(src.errorStr);
133 |
134 | PyObject* py_str = PyUnicode_FromString(src.value);
135 | if (!py_str)
136 | throw error_already_set();
137 |
138 | free(src.value);
139 |
140 | return handle(py_str);
141 | }
142 | };
143 |
144 | template <>
145 | struct type_caster {
146 | public:
147 | PYBIND11_TYPE_CASTER(DoubleArrayResult, _("DoubleArrayResult"));
148 |
149 | // Conversion from Python to C++
150 | bool load(handle src, bool) {
151 | throw std::runtime_error("Unsupported operation");
152 | }
153 |
154 | // Conversion from C++ to Python
155 | static handle cast(DoubleArrayResult src, return_value_policy, handle) {
156 | checkError(src.errorStr);
157 |
158 | auto result = py::array_t(src.length);
159 |
160 | std::memcpy(result.mutable_data(), src.value, src.length * sizeof(double));
161 | free(src.value);
162 |
163 | return result.release();
164 | }
165 | };
166 | } // namespace detail
167 | } // namespace PYBIND11_NAMESPACE
168 |
169 | int mine() {
170 | return 42;
171 | }
172 |
173 | PYBIND11_MODULE(_piqtree, m) {
174 | m.doc() = "_piqtree - Linking IQ-TREE to Python!";
175 |
176 | m.attr("__iqtree_version__") = version();
177 |
178 | m.def("iq_robinson_fould", &robinson_fould,
179 | "Calculates the robinson fould distance between two trees");
180 | m.def("iq_random_tree", &random_tree,
181 | "Generates a set of random phylogenetic trees. tree_gen_mode "
182 | "allows:\"YULE_HARDING\", \"UNIFORM\", \"CATERPILLAR\", \"BALANCED\", "
183 | "\"BIRTH_DEATH\", \"STAR_TREE\".");
184 | m.def("iq_build_tree", &build_tree,
185 | "Perform phylogenetic analysis on the input alignment (in string "
186 | "format). With estimation of the best topology.");
187 | m.def("iq_fit_tree", &fit_tree,
188 | "Perform phylogenetic analysis on the input alignment (in string "
189 | "format). With restriction to the input toplogy.");
190 | m.def("iq_model_finder", &modelfinder,
191 | "Find optimal model for an alignment.");
192 | m.def("iq_jc_distances", &build_distmatrix,
193 | "Construct pairwise distance matrix for alignment.");
194 | m.def("iq_nj_tree", &build_njtree,
195 | "Build neighbour-joining tree from distance matrix.");
196 | m.def("mine", &mine, "The meaning of life, the universe (and everything)!");
197 | }
198 |
--------------------------------------------------------------------------------
/src/piqtree/_libiqtree/_piqtree.h:
--------------------------------------------------------------------------------
1 | #ifndef _PIQTREE_H
2 | #define _PIQTREE_H
3 |
4 | #include
5 | #include
6 |
7 | using namespace std;
8 |
9 | #ifdef _MSC_VER
10 | #pragma pack(push, 1)
11 | #else
12 | #pragma pack(1)
13 | #endif
14 |
15 | typedef struct {
16 | const char** strings;
17 | size_t length;
18 | } StringArray;
19 |
20 | typedef struct {
21 | double* doubles;
22 | size_t length;
23 | } DoubleArray;
24 |
25 | typedef struct {
26 | int value;
27 | char* errorStr;
28 | } IntegerResult;
29 |
30 | typedef struct {
31 | char* value;
32 | char* errorStr;
33 | } StringResult;
34 |
35 | typedef struct {
36 | double* value;
37 | size_t length;
38 | char* errorStr;
39 | } DoubleArrayResult;
40 |
41 | #ifdef _MSC_VER
42 | #pragma pack(pop)
43 | #else
44 | #pragma pack()
45 | #endif
46 |
47 | /*
48 | * Calculates the robinson fould distance between two trees
49 | */
50 | extern "C" IntegerResult robinson_fould(const char* ctree1, const char* ctree2);
51 |
52 | /*
53 | * Generates a set of random phylogenetic trees
54 | * tree_gen_mode allows:"YULE_HARDING", "UNIFORM", "CATERPILLAR", "BALANCED",
55 | * "BIRTH_DEATH", "STAR_TREE" output: a newick tree (in string format)
56 | */
57 | extern "C" StringResult random_tree(int num_taxa,
58 | const char* tree_gen_mode,
59 | int num_trees,
60 | int rand_seed = 0);
61 |
62 | /*
63 | * Perform phylogenetic analysis on the input alignment
64 | * With estimation of the best topology
65 | * num_thres -- number of cpu threads to be used, default: 1; 0 - auto detection
66 | * of the optimal number of cpu threads output: results in YAML format with the
67 | * tree and the details of parameters
68 | */
69 | extern "C" StringResult build_tree(StringArray& names,
70 | StringArray& seqs,
71 | const char* model,
72 | int rand_seed = 0,
73 | int bootstrap_rep = 0,
74 | int num_thres = 1);
75 |
76 | /*
77 | * Perform phylogenetic analysis on the input alignment
78 | * With restriction to the input toplogy
79 | * num_thres -- number of cpu threads to be used, default: 1; 0 - auto detection
80 | * of the optimal number of cpu threads output: results in YAML format with the
81 | * details of parameters
82 | */
83 | extern "C" StringResult fit_tree(StringArray& names,
84 | StringArray& seqs,
85 | const char* model,
86 | const char* intree,
87 | int rand_seed = 0,
88 | int num_thres = 1);
89 |
90 | /*
91 | * Perform phylogenetic analysis with ModelFinder
92 | * on the input alignment (in string format)
93 | * model_set -- a set of models to consider
94 | * freq_set -- a set of frequency types
95 | * rate_set -- a set of RHAS models
96 | * rand_seed -- random seed, if 0, then will generate a new random seed
97 | * num_thres -- number of cpu threads to be used, default: 1; 0 - auto detection
98 | * of the optimal number of cpu threads output: modelfinder results in YAML
99 | * format
100 | */
101 | extern "C" StringResult modelfinder(StringArray& names,
102 | StringArray& seqs,
103 | int rand_seed = 0,
104 | const char* model_set = "",
105 | const char* freq_set = "",
106 | const char* rate_set = "",
107 | int num_thres = 1);
108 |
109 | /*
110 | * Build pairwise JC distance matrix
111 | * output: set of distances
112 | * (n * i + j)-th element of the list represents the distance between i-th and
113 | * j-th sequence, where n is the number of sequences num_thres -- number of cpu
114 | * threads to be used, default: 1; 0 - use all available cpu threads on the
115 | * machine
116 | */
117 | extern "C" DoubleArrayResult build_distmatrix(StringArray& names,
118 | StringArray& seqs,
119 | int num_thres = 1);
120 |
121 | /*
122 | * Using Rapid-NJ to build tree from a distance matrix
123 | * output: a newick tree (in string format)
124 | */
125 | extern "C" StringResult build_njtree(StringArray& names,
126 | DoubleArray& distances);
127 |
128 | /*
129 | * verion number
130 | */
131 | extern "C" StringResult version();
132 |
133 | #endif /* _PIQTREE_H */
134 |
--------------------------------------------------------------------------------
/src/piqtree/exceptions.py:
--------------------------------------------------------------------------------
1 | """Contains piqtree exceptions."""
2 |
3 |
4 | class IqTreeError(Exception):
5 | """An error thrown by IQ-TREE."""
6 |
7 |
8 | class ParseIqTreeError(Exception):
9 | """There was an error when parsing a result from IQ-TREE."""
10 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/__init__.py:
--------------------------------------------------------------------------------
1 | """Functions for calling IQ-TREE as a library."""
2 |
3 | from ._jc_distance import jc_distances
4 | from ._model_finder import ModelFinderResult, ModelResultValue, model_finder
5 | from ._random_tree import TreeGenMode, random_trees
6 | from ._robinson_foulds import robinson_foulds
7 | from ._tree import build_tree, fit_tree, nj_tree
8 |
9 | __all__ = [
10 | "ModelFinderResult",
11 | "ModelResultValue",
12 | "TreeGenMode",
13 | "build_tree",
14 | "fit_tree",
15 | "jc_distances",
16 | "model_finder",
17 | "nj_tree",
18 | "random_trees",
19 | "robinson_foulds",
20 | ]
21 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/_decorator.py:
--------------------------------------------------------------------------------
1 | """Decorators for IQ-TREE functions."""
2 |
3 | import os
4 | import pathlib
5 | import sys
6 | import tempfile
7 | from collections.abc import Callable
8 | from functools import wraps
9 | from typing import TypeVar
10 |
11 | from typing_extensions import ParamSpec
12 |
13 | from piqtree.exceptions import IqTreeError
14 |
15 | Param = ParamSpec("Param")
16 | RetType = TypeVar("RetType")
17 |
18 |
19 | def iqtree_func(
20 | func: Callable[Param, RetType],
21 | *,
22 | hide_files: bool | None = False,
23 | ) -> Callable[Param, RetType]:
24 | """IQ-TREE function wrapper.
25 |
26 | Hides stdout and stderr, as well as any output files.
27 |
28 | Parameters
29 | ----------
30 | func : Callable[Param, RetType]
31 | The IQ-TREE library function.
32 | hide_files : bool | None, optional
33 | Whether hiding output files is necessary, by default False.
34 |
35 | Returns
36 | -------
37 | Callable[Param, RetType]
38 | The wrappe IQ-TREE function.
39 |
40 | Raises
41 | ------
42 | IqTreeError
43 | An error from the IQ-TREE library.
44 |
45 | """
46 |
47 | @wraps(func)
48 | def wrapper_iqtree_func(*args: Param.args, **kwargs: Param.kwargs) -> RetType:
49 | # Flush stdout and stderr
50 | sys.stdout.flush()
51 | sys.stderr.flush()
52 |
53 | # Save original stdout and stderr file descriptors
54 | original_stdout_fd = os.dup(sys.stdout.fileno())
55 | original_stderr_fd = os.dup(sys.stderr.fileno())
56 |
57 | # Open /dev/null (or NUL on Windows) as destination for stdout and stderr
58 | devnull_fd = os.open(os.devnull, os.O_WRONLY)
59 |
60 | if hide_files:
61 | original_dir = pathlib.Path.cwd()
62 | tempdir = tempfile.TemporaryDirectory(prefix=f"piqtree_{func.__name__}")
63 | os.chdir(tempdir.name)
64 |
65 | try:
66 | # Replace stdout and stderr with /dev/null
67 | os.dup2(devnull_fd, sys.stdout.fileno())
68 | os.dup2(devnull_fd, sys.stderr.fileno())
69 |
70 | # Call the wrapped function
71 | return func(*args, **kwargs)
72 | except RuntimeError as e:
73 | raise IqTreeError(e) from None
74 | finally:
75 | # Flush stdout and stderr
76 | sys.stdout.flush()
77 | sys.stderr.flush()
78 |
79 | # Restore stdout and stderr
80 | os.dup2(original_stdout_fd, sys.stdout.fileno())
81 | os.dup2(original_stderr_fd, sys.stderr.fileno())
82 |
83 | # Close the devnull file descriptor
84 | os.close(devnull_fd)
85 |
86 | if hide_files:
87 | os.chdir(original_dir)
88 | tempdir.cleanup()
89 |
90 | return wrapper_iqtree_func
91 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/_jc_distance.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Sequence
2 |
3 | import cogent3.app.typing as c3_types
4 | import numpy as np
5 | from _piqtree import iq_jc_distances
6 | from cogent3.evolve.fast_distance import DistanceMatrix
7 |
8 | from piqtree.iqtree._decorator import iqtree_func
9 |
10 | iq_jc_distances = iqtree_func(iq_jc_distances, hide_files=True)
11 |
12 |
13 | def _dists_to_distmatrix(
14 | distances: np.ndarray,
15 | names: Sequence[str],
16 | ) -> c3_types.PairwiseDistanceType:
17 | """Convert numpy representation of distance matrix into cogent3 pairwise distance matrix.
18 |
19 | Parameters
20 | ----------
21 | distances : np.ndarray
22 | Pairwise distances.
23 | names : Sequence[str]
24 | Corresponding sequence names.
25 |
26 | Returns
27 | -------
28 | c3_types.PairwiseDistanceType
29 | Pairwise distance matrix.
30 |
31 | """
32 | dist_dict = {}
33 | for i in range(1, len(distances)):
34 | for j in range(i):
35 | dist_dict[(names[i], names[j])] = distances[i, j]
36 | return DistanceMatrix(dist_dict)
37 |
38 |
39 | def jc_distances(
40 | aln: c3_types.AlignedSeqsType,
41 | num_threads: int | None = None,
42 | ) -> c3_types.PairwiseDistanceType:
43 | """Compute pairwise JC distances for a given alignment.
44 |
45 | Parameters
46 | ----------
47 | aln : c3_types.AlignedSeqsType
48 | alignment to compute pairwise JC distances for.
49 | num_threads: int | None, optional
50 | Number of threads for IQ-TREE to use, by default None (all available threads).
51 |
52 | Returns
53 | -------
54 | c3_types.PairwiseDistanceType
55 | Pairwise JC distance matrix.
56 |
57 | """
58 | if num_threads is None:
59 | num_threads = 0
60 |
61 | names = aln.names
62 | seqs = [str(seq) for seq in aln.iter_seqs(names)]
63 |
64 | distances = np.array(iq_jc_distances(names, seqs, num_threads)).reshape(
65 | (len(names), len(names)),
66 | )
67 | return _dists_to_distmatrix(distances, names)
68 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/_model_finder.py:
--------------------------------------------------------------------------------
1 | """Python wrapper for model finder in the IQ-TREE library."""
2 |
3 | import dataclasses
4 | from collections.abc import Iterable
5 | from typing import Any
6 |
7 | import yaml
8 | from _piqtree import iq_model_finder
9 | from cogent3.app import typing as c3_types
10 | from cogent3.util.misc import get_object_provenance
11 |
12 | from piqtree.iqtree._decorator import iqtree_func
13 | from piqtree.model import Model, make_model
14 |
15 | iq_model_finder = iqtree_func(iq_model_finder, hide_files=True)
16 |
17 |
18 | @dataclasses.dataclass(slots=True, frozen=True)
19 | class ModelResultValue:
20 | """Model statistics from IQ-TREE model_finder.
21 |
22 | Parameters
23 | ----------
24 | lnL
25 | Log likelihood of the model.
26 | nfp
27 | Number of free parameters in the model.
28 | tree_length
29 | Length of the tree (sum of branch lengths).
30 | """
31 |
32 | lnL: float # noqa: N815
33 | nfp: int
34 | tree_length: float
35 |
36 | @classmethod
37 | def from_string(cls, val: str) -> "ModelResultValue":
38 | """Parse the string produced by IQ-TREE model_finder for a given model."""
39 | try:
40 | lnL, nfp, tree_length = val.split() # noqa: N806
41 | return cls(lnL=float(lnL), nfp=int(nfp), tree_length=float(tree_length))
42 | except ValueError as e:
43 | msg = f"Error parsing string '{val}'"
44 | raise ValueError(msg) from e
45 |
46 |
47 | @dataclasses.dataclass(slots=True)
48 | class ModelFinderResult:
49 | """Data returned by ModelFinder.
50 |
51 | Attributes
52 | ----------
53 | source: str
54 | Source of the alignment.
55 | raw_data: dict[str, Any]
56 | Raw data returned by ModelFinder.
57 | best_aic: Model
58 | The best AIC model.
59 | best_aicc: Model
60 | The best AICc model.
61 | best_bic: Model
62 | The best BIC model.
63 | model_stats:
64 | Semi-processed representation of raw_data.
65 | """
66 |
67 | source: str
68 | raw_data: dataclasses.InitVar[dict[str, Any]]
69 | best_aic: Model = dataclasses.field(init=False)
70 | best_aicc: Model = dataclasses.field(init=False)
71 | best_bic: Model = dataclasses.field(init=False)
72 | model_stats: dict[Model | str, ModelResultValue] = dataclasses.field(
73 | init=False,
74 | repr=False,
75 | default_factory=dict,
76 | )
77 |
78 | def __post_init__(self, raw_data: dict[str, Any]) -> None:
79 | self.model_stats = {
80 | key: ModelResultValue.from_string(val)
81 | for key, val in raw_data.items()
82 | if not key.startswith(("best_", "initTree")) and isinstance(val, str)
83 | }
84 | self.best_aic = make_model(raw_data["best_model_AIC"])
85 | self.best_aicc = make_model(raw_data["best_model_AICc"])
86 | self.best_bic = make_model(raw_data["best_model_BIC"])
87 |
88 | self.model_stats[self.best_aic] = ModelResultValue.from_string(
89 | raw_data[str(self.best_aic)],
90 | )
91 | self.model_stats[self.best_aicc] = ModelResultValue.from_string(
92 | raw_data[str(self.best_aicc)],
93 | )
94 | self.model_stats[self.best_bic] = ModelResultValue.from_string(
95 | raw_data[str(self.best_bic)],
96 | )
97 |
98 | def to_rich_dict(self) -> dict[str, Any]:
99 | import piqtree
100 |
101 | result = {"version": piqtree.__version__, "type": get_object_provenance(self)}
102 |
103 | raw_data = {
104 | str(model_): f"{stats.lnL} {stats.nfp} {stats.tree_length}"
105 | for model_, stats in self.model_stats.items()
106 | }
107 | for attr in ("best_model_AIC", "best_model_AICc", "best_model_BIC"):
108 | raw_data[attr] = str(getattr(self, attr.replace("_model", "").lower()))
109 | result["init_kwargs"] = {"raw_data": raw_data, "source": self.source}
110 | return result
111 |
112 | @classmethod
113 | def from_rich_dict(cls, data: dict[str, Any]) -> "ModelFinderResult":
114 | return cls(**data["init_kwargs"])
115 |
116 |
117 | def model_finder(
118 | aln: c3_types.AlignedSeqsType,
119 | model_set: Iterable[str] | None = None,
120 | freq_set: Iterable[str] | None = None,
121 | rate_set: Iterable[str] | None = None,
122 | rand_seed: int | None = None,
123 | num_threads: int | None = None,
124 | ) -> ModelFinderResult | c3_types.SerialisableType:
125 | """Find the models of best fit for an alignment using ModelFinder.
126 |
127 | Parameters
128 | ----------
129 | aln : c3_types.AlignedSeqsType
130 | The alignment to find the model of best fit for.
131 | model_set : Iterable[str] | None, optional
132 | Search space for models.
133 | Equivalent to IQ-TREE's mset parameter, by default None
134 | freq_set : Iterable[str] | None, optional
135 | Search space for frequency types.
136 | Equivalent to IQ-TREE's mfreq parameter, by default None
137 | rate_set : Iterable[str] | None, optional
138 | Search space for rate heterogeneity types.
139 | Equivalent to IQ-TREE's mrate parameter, by default None
140 | rand_seed : int | None, optional
141 | The random seed - 0 or None means no seed, by default None.
142 | num_threads: int | None, optional
143 | Number of threads for IQ-TREE to use, by default None (single-threaded).
144 | If 0 is specified, IQ-TREE attempts to find the optimal number of threads.
145 |
146 | Returns
147 | -------
148 | ModelFinderResult | c3_types.SerialisableType
149 | Collection of data returned from IQ-TREE's ModelFinder.
150 | """
151 | source = aln.info.source
152 | if rand_seed is None:
153 | rand_seed = 0 # The default rand_seed in IQ-TREE
154 |
155 | if num_threads is None:
156 | num_threads = 1
157 |
158 | if model_set is None:
159 | model_set = set()
160 | if freq_set is None:
161 | freq_set = set()
162 | if rate_set is None:
163 | rate_set = set()
164 |
165 | names = aln.names
166 | seqs = [str(seq) for seq in aln.iter_seqs(names)]
167 |
168 | raw = yaml.safe_load(
169 | iq_model_finder(
170 | names,
171 | seqs,
172 | rand_seed,
173 | ",".join(model_set),
174 | ",".join(freq_set),
175 | ",".join(rate_set),
176 | num_threads,
177 | ),
178 | )
179 | return ModelFinderResult(raw_data=raw, source=source)
180 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/_random_tree.py:
--------------------------------------------------------------------------------
1 | """Python wrappers to random tree generation in the IQ-TREE library."""
2 |
3 | from enum import Enum, auto
4 |
5 | import cogent3
6 | from _piqtree import iq_random_tree
7 |
8 | from piqtree.iqtree._decorator import iqtree_func
9 |
10 | iq_random_tree = iqtree_func(iq_random_tree)
11 |
12 |
13 | class TreeGenMode(Enum):
14 | """Setting under which to generate random trees."""
15 |
16 | YULE_HARDING = auto()
17 | UNIFORM = auto()
18 | CATERPILLAR = auto()
19 | BALANCED = auto()
20 | BIRTH_DEATH = auto()
21 | STAR_TREE = auto()
22 |
23 |
24 | def random_trees(
25 | num_trees: int,
26 | num_taxa: int,
27 | tree_mode: TreeGenMode,
28 | rand_seed: int | None = None,
29 | ) -> tuple[cogent3.PhyloNode]:
30 | """Generate a collection of random trees.
31 |
32 | Generates a random collection of trees through IQ-TREE.
33 |
34 | Parameters
35 | ----------
36 | num_trees : int
37 | The number of trees to generate.
38 | num_taxa : int
39 | The number of taxa per tree.
40 | tree_mode : TreeGenMode
41 | How the trees are generated.
42 | rand_seed : int | None, optional
43 | The random seed - 0 or None means no seed, by default None.
44 |
45 | Returns
46 | -------
47 | tuple[cogent3.PhyloNode]
48 | A collection of random trees.
49 |
50 | """
51 | if rand_seed is None:
52 | rand_seed = 0 # The default rand_seed in IQ-TREE
53 | trees = iq_random_tree(num_taxa, tree_mode.name, num_trees, rand_seed)
54 | return tuple(
55 | cogent3.make_tree(newick) for newick in trees.split("\n") if newick != ""
56 | )
57 |
--------------------------------------------------------------------------------
/src/piqtree/iqtree/_robinson_foulds.py:
--------------------------------------------------------------------------------
1 | """Python wrappers to RF distances in the IQ-TREE library."""
2 |
3 | from collections.abc import Sequence
4 |
5 | import cogent3
6 | import numpy as np
7 | from _piqtree import iq_robinson_fould
8 |
9 | from piqtree.iqtree._decorator import iqtree_func
10 |
11 | iq_robinson_fould = iqtree_func(iq_robinson_fould)
12 |
13 |
14 | def robinson_foulds(trees: Sequence[cogent3.PhyloNode]) -> np.ndarray:
15 | """Pairwise Robinson-Foulds distance between a sequence of trees.
16 |
17 | For the given collection of trees, returns a numpy array containing
18 | the pairwise distances between the trees.
19 |
20 | Parameters
21 | ----------
22 | trees : Sequence[cogent3.PhyloNode]
23 | The sequence of trees to calculate the pairwise Robinson-Foulds
24 | distances of.
25 |
26 | Returns
27 | -------
28 | np.ndarray
29 | Pairwise Robinson-Foulds distances.
30 | """
31 | pairwise_distances = np.zeros((len(trees), len(trees)))
32 | for i in range(1, len(trees)):
33 | for j in range(i):
34 | rf = iq_robinson_fould(str(trees[i]), str(trees[j]))
35 | pairwise_distances[i, j] = rf
36 | pairwise_distances[j, i] = rf
37 | return pairwise_distances
38 |
--------------------------------------------------------------------------------
/src/piqtree/model/__init__.py:
--------------------------------------------------------------------------------
1 | """Models available in IQ-TREE."""
2 |
3 | from ._freq_type import FreqType, get_freq_type
4 | from ._model import Model, make_model
5 | from ._options import available_freq_type, available_models, available_rate_type
6 | from ._rate_type import (
7 | DiscreteGammaModel,
8 | FreeRateModel,
9 | RateModel,
10 | RateType,
11 | get_rate_type,
12 | )
13 | from ._substitution_model import (
14 | AaModel,
15 | DnaModel,
16 | SubstitutionModel,
17 | get_substitution_model,
18 | )
19 |
20 | __all__ = [
21 | "AaModel",
22 | "DiscreteGammaModel",
23 | "DnaModel",
24 | "FreeRateModel",
25 | "FreqType",
26 | "Model",
27 | "RateModel",
28 | "RateType",
29 | "SubstitutionModel",
30 | "available_freq_type",
31 | "available_models",
32 | "available_rate_type",
33 | "get_freq_type",
34 | "get_rate_type",
35 | "get_substitution_model",
36 | "make_model",
37 | ]
38 |
--------------------------------------------------------------------------------
/src/piqtree/model/_freq_type.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import functools
3 | from enum import Enum, unique
4 |
5 |
6 | @unique
7 | class FreqType(Enum):
8 | """Types of base frequencies."""
9 |
10 | F = "F"
11 | FO = "FO"
12 | FQ = "FQ"
13 |
14 | @staticmethod
15 | @functools.cache
16 | def _descriptions() -> dict["FreqType", str]:
17 | return {
18 | FreqType.F: "Empirical state frequency observed from the data.",
19 | FreqType.FO: "State frequency optimized by maximum-likelihood from the data. Note that this is with letter-O and not digit-0.",
20 | FreqType.FQ: "Equal state frequency.",
21 | }
22 |
23 | @property
24 | def description(self) -> str:
25 | """The description of the FreqType.
26 |
27 | Returns
28 | -------
29 | str
30 | The description of the FreqType.
31 |
32 | """
33 | return self._descriptions()[self]
34 |
35 | def iqtree_str(self) -> str:
36 | return self.value
37 |
38 |
39 | def get_freq_type(name: str | FreqType) -> FreqType:
40 | """Return the FreqType enum for a given name.
41 |
42 | Parameters
43 | ----------
44 | name : str | FreqType
45 | Name of the frequency type.
46 |
47 | Returns
48 | -------
49 | FreqType
50 | The resolved FreqType Enum.
51 |
52 | Raises
53 | ------
54 | ValueError
55 | If the FreqType name cannot be resolved.
56 |
57 | """
58 | if isinstance(name, FreqType):
59 | return name
60 |
61 | name = name.lstrip("+")
62 |
63 | with contextlib.suppress(KeyError):
64 | return FreqType[name]
65 |
66 | msg = f"Unknown state frequency type: {name!r}"
67 | raise ValueError(msg)
68 |
--------------------------------------------------------------------------------
/src/piqtree/model/_model.py:
--------------------------------------------------------------------------------
1 | from piqtree.model._freq_type import FreqType, get_freq_type
2 | from piqtree.model._rate_type import RateModel, get_rate_type
3 | from piqtree.model._substitution_model import SubstitutionModel, get_substitution_model
4 |
5 |
6 | class Model:
7 | """Specification for substitution models.
8 |
9 | Stores the substitution model with base frequency settings.
10 | """
11 |
12 | def __init__(
13 | self,
14 | submod_type: str | SubstitutionModel,
15 | freq_type: str | FreqType | None = None,
16 | rate_model: str | RateModel | None = None,
17 | *,
18 | invariant_sites: bool = False,
19 | ) -> None:
20 | """Construct Model class.
21 |
22 | Parameters
23 | ----------
24 | submod_type : str | SubstitutionModel
25 | The substitution model to use
26 | freq_type : str | FreqType | None, optional
27 | State frequency specification, by default None. (defaults
28 | to empirical base frequencies if not specified by model).
29 | rate_model : str | RateModel | None, optional
30 | Rate heterogeneity across sites model, by default
31 | no Gamma, and no FreeRate.
32 | invariant_sites : bool, optional
33 | Invariable sites, by default False.
34 |
35 | """
36 | self.submod_type = get_substitution_model(submod_type)
37 | self.freq_type = get_freq_type(freq_type) if freq_type else None
38 | self.rate_type = (
39 | get_rate_type(rate_model, invariant_sites=invariant_sites)
40 | if rate_model is not None or invariant_sites
41 | else None
42 | )
43 |
44 | def __hash__(self) -> int:
45 | return hash(str(self))
46 |
47 | def __repr__(self) -> str:
48 | attrs = [
49 | f"submod_type={getattr(self.submod_type, 'name', None)}",
50 | f"freq_type={getattr(self.freq_type, 'name', None)}",
51 | f"rate_type={getattr(self.rate_type, 'name', None)}",
52 | ]
53 | return f"Model({', '.join(attrs)})"
54 |
55 | def __str__(self) -> str:
56 | """Convert the model into the IQ-TREE representation.
57 |
58 | Returns
59 | -------
60 | str
61 | The IQ-TREE representation of the mode.
62 |
63 | """
64 | iqtree_extra_args = (
65 | x for x in (self.freq_type, self.rate_type) if x is not None
66 | )
67 | return "+".join(x.iqtree_str() for x in [self.submod_type, *iqtree_extra_args])
68 |
69 | @property
70 | def rate_model(self) -> RateModel | None:
71 | """The RateModel used, if one is chosen.
72 |
73 | Returns
74 | -------
75 | RateModel | None
76 | The RateModel used by the Model.
77 |
78 | """
79 | return self.rate_type.rate_model if self.rate_type else None
80 |
81 | @property
82 | def invariant_sites(self) -> bool:
83 | """Whether invariant sites are used.
84 |
85 | Returns
86 | -------
87 | bool
88 | True if invariant sites are used by the model, False otherwise.
89 |
90 | """
91 | return self.rate_type.invariant_sites if self.rate_type else False
92 |
93 |
94 | def make_model(iqtree_str: str) -> Model:
95 | """Convert an IQ-TREE model specification into a Model class.
96 |
97 | Parameters
98 | ----------
99 | iqtree_str : str
100 | The IQ-TREE model string.
101 |
102 | Returns
103 | -------
104 | Model
105 | The equivalent Model class.
106 | """
107 | if "+" not in iqtree_str:
108 | return Model(iqtree_str)
109 |
110 | sub_mod_str, components = iqtree_str.split("+", maxsplit=1)
111 |
112 | freq_type = None
113 | invariant_sites = False
114 | rate_model = None
115 |
116 | for component in components.split("+"):
117 | if component.startswith("F"):
118 | if freq_type is not None:
119 | msg = f"Model {iqtree_str!r} contains multiple base frequency specifications."
120 | raise ValueError(msg)
121 | freq_type = component
122 | elif component.startswith("I"):
123 | if invariant_sites:
124 | msg = f"Model {iqtree_str!r} contains multiple specifications for invariant sites."
125 | raise ValueError(msg)
126 | invariant_sites = True
127 | elif component.startswith(("G", "R")):
128 | if rate_model is not None:
129 | msg = f"Model {iqtree_str!r} contains multiple rate heterogeneity specifications."
130 | raise ValueError(msg)
131 | rate_model = component
132 | else:
133 | msg = f"Model {iqtree_str!r} contains unexpected component."
134 | raise ValueError(msg)
135 |
136 | return Model(sub_mod_str, freq_type, rate_model, invariant_sites=invariant_sites)
137 |
--------------------------------------------------------------------------------
/src/piqtree/model/_options.py:
--------------------------------------------------------------------------------
1 | """Convenience functions for showing user facing options and their descriptions."""
2 |
3 | import functools
4 | from typing import Literal
5 |
6 | from cogent3.core.table import Table, make_table
7 |
8 | from piqtree.model._freq_type import FreqType
9 | from piqtree.model._rate_type import ALL_BASE_RATE_TYPES, get_description
10 | from piqtree.model._substitution_model import (
11 | ALL_MODELS_CLASSES,
12 | AaModel,
13 | DnaModel,
14 | SubstitutionModel,
15 | )
16 |
17 |
18 | @functools.cache
19 | def _make_models(model_type: type[SubstitutionModel]) -> dict[str, list[str]]:
20 | data: dict[str, list[str]] = {
21 | "Model Type": [],
22 | "Abbreviation": [],
23 | "Description": [],
24 | }
25 |
26 | model_classes = (
27 | ALL_MODELS_CLASSES if model_type == SubstitutionModel else [model_type]
28 | )
29 |
30 | for model_class in model_classes:
31 | for model in model_class:
32 | data["Model Type"].append(model.model_type())
33 | data["Abbreviation"].append(model.value)
34 | data["Description"].append(model.description)
35 |
36 | return data
37 |
38 |
39 | def available_models(
40 | model_type: Literal["dna", "protein"] | None = None,
41 | *,
42 | show_all: bool = True,
43 | ) -> Table:
44 | """Return a table showing available substitution models.
45 |
46 | Parameters
47 | ----------
48 | model_type : Literal["dna", "protein"] | None, optional
49 | The models to fetch, by default None (all models).
50 | show_all : bool, optional
51 | if True, the representation of the table shows all records, by default True.
52 |
53 | Returns
54 | -------
55 | Table
56 | Table with all available models.
57 |
58 | """
59 | template = "Available {}substitution models"
60 | if model_type == "dna":
61 | table = make_table(
62 | data=_make_models(DnaModel),
63 | title=template.format("nucleotide "),
64 | )
65 | elif model_type == "protein":
66 | table = make_table(
67 | data=_make_models(AaModel),
68 | title=template.format("protein "),
69 | )
70 | else:
71 | table = make_table(
72 | data=_make_models(SubstitutionModel),
73 | title=template.format(""),
74 | )
75 |
76 | if show_all:
77 | table.set_repr_policy(head=table.shape[0])
78 | return table
79 |
80 |
81 | def available_freq_type() -> Table:
82 | """Return a table showing available freq type options."""
83 | data: dict[str, list[str]] = {"Freq Type": [], "Description": []}
84 |
85 | for freq_type in FreqType:
86 | data["Freq Type"].append(freq_type.value)
87 | data["Description"].append(freq_type.description)
88 |
89 | return make_table(data=data, title="Available frequency types")
90 |
91 |
92 | def available_rate_type() -> Table:
93 | """Return a table showing available rate type options."""
94 | data: dict[str, list[str]] = {"Rate Type": [], "Description": []}
95 |
96 | for rate_type in ALL_BASE_RATE_TYPES:
97 | data["Rate Type"].append(rate_type.iqtree_str())
98 | data["Description"].append(get_description(rate_type))
99 |
100 | return make_table(data=data, title="Available rate heterogeneity types")
101 |
--------------------------------------------------------------------------------
/src/piqtree/model/_rate_type.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 |
4 | class RateModel(ABC):
5 | """Base class for rate models."""
6 |
7 | @abstractmethod
8 | def iqtree_str(self) -> str:
9 | """Convert to an iqtree settings string.
10 |
11 | Returns
12 | -------
13 | str
14 | String parsable by IQ-TREE for the rate heterogeneity model.
15 |
16 | """
17 |
18 |
19 | class RateType:
20 | def __init__(
21 | self,
22 | *,
23 | invariant_sites: bool = False,
24 | rate_model: RateModel | None = None,
25 | ) -> None:
26 | """Rate heterogeneity across sites model.
27 |
28 | Parameters
29 | ----------
30 | invariant_sites : bool, optional
31 | Invariable Sites Model, by default False.
32 | rate_model : RateModel | None, optional
33 | Discrete Gamma Model or FreeRate Model, by default None.
34 |
35 | """
36 | self.invariant_sites = invariant_sites
37 | self.rate_model = rate_model
38 |
39 | def iqtree_str(self) -> str:
40 | """Convert to an iqtree settings string.
41 |
42 | Returns
43 | -------
44 | str
45 | String parsable by IQ-TREE for the rate heterogeneity model.
46 |
47 | """
48 | rate_type_str = "I" if self.invariant_sites else ""
49 | if self.rate_model is None:
50 | return rate_type_str
51 | # Invariant sites and model need to be joined by a '+'
52 | if self.invariant_sites:
53 | rate_type_str += "+"
54 | return rate_type_str + self.rate_model.iqtree_str()
55 |
56 | @property
57 | def name(self) -> str:
58 | return self.iqtree_str()
59 |
60 |
61 | class DiscreteGammaModel(RateModel):
62 | def __init__(self, rate_categories: int | None = None) -> None:
63 | """Discrete Gamma Model.
64 |
65 | Parameters
66 | ----------
67 | rate_categories : int, optional
68 | The number of rate categories, by default 4.
69 |
70 | References
71 | ----------
72 | .. [1] Yang, Ziheng. "Maximum likelihood phylogenetic estimation from
73 | DNA sequences with variable rates over sites: approximate methods."
74 | Journal of Molecular evolution 39 (1994): 306-314.
75 |
76 | """
77 | self.rate_categories = rate_categories
78 |
79 | def iqtree_str(self) -> str:
80 | if self.rate_categories is None:
81 | return "G"
82 | return f"G{self.rate_categories}"
83 |
84 |
85 | class FreeRateModel(RateModel):
86 | def __init__(self, rate_categories: int | None = None) -> None:
87 | """FreeRate Model.
88 |
89 | Parameters
90 | ----------
91 | rate_categories : int, optional
92 | The number of rate categories, by default 4.
93 |
94 | References
95 | ----------
96 | .. [1] Yang, Ziheng. "A space-time process model for the evolution of
97 | DNA sequences." Genetics 139.2 (1995): 993-1005.
98 | .. [2] Soubrier, Julien, et al. "The influence of rate heterogeneity
99 | among sites on the time dependence of molecular rates." Molecular
100 | biology and evolution 29.11 (2012): 3345-3358.
101 |
102 | """
103 | self.rate_categories = rate_categories
104 |
105 | def iqtree_str(self) -> str:
106 | if self.rate_categories is None:
107 | return "R"
108 | return f"R{self.rate_categories}"
109 |
110 |
111 | ALL_BASE_RATE_TYPES = [
112 | RateType(),
113 | RateType(invariant_sites=True),
114 | RateType(rate_model=DiscreteGammaModel()),
115 | RateType(invariant_sites=True, rate_model=DiscreteGammaModel()),
116 | RateType(rate_model=FreeRateModel()),
117 | RateType(invariant_sites=True, rate_model=FreeRateModel()),
118 | ]
119 |
120 | _BASE_RATE_TYPE_DESCRIPTIONS = {
121 | RateType().iqtree_str(): "no invariable sites, no rate heterogeneity model.",
122 | RateType(
123 | invariant_sites=True,
124 | ).iqtree_str(): "allowing for a proportion of invariable sites.",
125 | RateType(
126 | rate_model=DiscreteGammaModel(),
127 | ).iqtree_str(): "discrete Gamma model (Yang, 1994) with default 4 rate categories. The number of categories can be changed with e.g. +G8.",
128 | RateType(
129 | invariant_sites=True,
130 | rate_model=DiscreteGammaModel(),
131 | ).iqtree_str(): "invariable site plus discrete Gamma model (Gu et al., 1995).",
132 | RateType(
133 | rate_model=FreeRateModel(),
134 | ).iqtree_str(): "FreeRate model (Yang, 1995; Soubrier et al., 2012) that generalizes the +G model by relaxing the assumption of Gamma-distributed rates. The number of categories can be specified with e.g. +R6 (default 4 categories if not specified). The FreeRate model typically fits data better than the +G model and is recommended for analysis of large data sets.",
135 | RateType(
136 | invariant_sites=True,
137 | rate_model=FreeRateModel(),
138 | ).iqtree_str(): "invariable site plus FreeRate model.",
139 | }
140 |
141 |
142 | def get_description(rate_type: RateType) -> str:
143 | rate_type_str = "".join(c for c in rate_type.iqtree_str() if not c.isdigit())
144 | return _BASE_RATE_TYPE_DESCRIPTIONS[rate_type_str]
145 |
146 |
147 | def get_rate_type(
148 | rate_model: str | RateModel | None = None,
149 | *,
150 | invariant_sites: bool = False,
151 | ) -> RateType:
152 | """Make a RateType from a chosen rate model and invariant sites.
153 |
154 | Parameters
155 | ----------
156 | rate_model : str | RateModel | None, optional
157 | The chosen rate model, by default None.
158 | invariant_sites : bool, optional
159 | Whether to use invariant sites, by default False.
160 |
161 | Returns
162 | -------
163 | RateType
164 | RateType generated from the rate model with invariant sites.
165 |
166 | """
167 | if isinstance(rate_model, RateModel):
168 | return RateType(rate_model=rate_model, invariant_sites=invariant_sites)
169 |
170 | if rate_model is None:
171 | return RateType(invariant_sites=invariant_sites)
172 |
173 | if not isinstance(rate_model, str):
174 | msg = f"Unexpected type for rate_model: {type(rate_model)}"
175 | raise TypeError(msg)
176 |
177 | stripped_rate_model = rate_model.lstrip("+")
178 | if len(stripped_rate_model) == 1:
179 | rate_categories = None
180 | else:
181 | integer_part = stripped_rate_model[1:]
182 | if not integer_part.isdigit():
183 | msg = f"Unexpected value for rate_model {rate_model!r}"
184 | raise ValueError(msg)
185 |
186 | rate_categories = int(integer_part)
187 |
188 | if stripped_rate_model[0] == "G":
189 | return RateType(
190 | rate_model=DiscreteGammaModel(rate_categories=rate_categories),
191 | invariant_sites=invariant_sites,
192 | )
193 |
194 | if stripped_rate_model[0] == "R":
195 | return RateType(
196 | rate_model=FreeRateModel(rate_categories=rate_categories),
197 | invariant_sites=invariant_sites,
198 | )
199 |
200 | msg = f"Unexpected value for rate_model {rate_model!r}"
201 | raise ValueError(msg)
202 |
--------------------------------------------------------------------------------
/src/piqtree/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iqtree/piqtree/bb5a1adde249d5e0dcf730924c2ae38230a7495d/src/piqtree/py.typed
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | import pytest
4 | from cogent3 import Alignment, load_aligned_seqs
5 |
6 |
7 | @pytest.fixture(scope="session")
8 | def DATA_DIR() -> pathlib.Path:
9 | return pathlib.Path(__file__).parent / "data"
10 |
11 |
12 | @pytest.fixture
13 | def three_otu(DATA_DIR: pathlib.Path) -> Alignment:
14 | aln = load_aligned_seqs(DATA_DIR / "example.fasta", moltype="dna", new_type=True)
15 | aln = aln.take_seqs(["Human", "Rhesus", "Mouse"])
16 | return aln.omit_gap_pos(allowed_gap_frac=0)
17 |
18 |
19 | @pytest.fixture
20 | def four_otu(DATA_DIR: pathlib.Path) -> Alignment:
21 | aln = load_aligned_seqs(DATA_DIR / "example.fasta", moltype="dna", new_type=True)
22 | aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Mouse"])
23 | return aln.omit_gap_pos(allowed_gap_frac=0)
24 |
25 |
26 | @pytest.fixture
27 | def five_otu(DATA_DIR: pathlib.Path) -> Alignment:
28 | aln = load_aligned_seqs(DATA_DIR / "example.fasta", moltype="dna", new_type=True)
29 | aln = aln.take_seqs(["Human", "Chimpanzee", "Rhesus", "Manatee", "Dugong"])
30 | return aln.omit_gap_pos(allowed_gap_frac=0)
31 |
32 |
33 | @pytest.fixture
34 | def all_otu(DATA_DIR: pathlib.Path) -> Alignment:
35 | aln = load_aligned_seqs(DATA_DIR / "example.fasta", moltype="dna", new_type=True)
36 | return aln.omit_gap_pos(allowed_gap_frac=0)
37 |
--------------------------------------------------------------------------------
/tests/test_app/test_app.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from cogent3 import get_app, make_tree
3 | from cogent3.core.new_alignment import Alignment
4 |
5 | import piqtree
6 | from piqtree import jc_distances, make_model
7 |
8 |
9 | def test_piqtree_phylo(four_otu: Alignment) -> None:
10 | expected = make_tree("(Human,Chimpanzee,(Rhesus,Mouse));")
11 | app = get_app("piqtree_phylo", model="JC")
12 | got = app(four_otu)
13 | assert expected.same_topology(got)
14 |
15 |
16 | def test_piqtree_phylo_support(four_otu: Alignment) -> None:
17 | app = get_app("piqtree_phylo", model=make_model("JC"), bootstrap_reps=1000)
18 | got = app(four_otu)
19 | supports = [
20 | node.params.get("support", None)
21 | for node in got.postorder()
22 | if not node.is_tip() and node.name != "root"
23 | ]
24 | assert all(supports)
25 |
26 |
27 | def test_piqtree_fit(three_otu: Alignment) -> None:
28 | tree = make_tree(tip_names=three_otu.names)
29 | app = get_app("model", "JC69", tree=tree)
30 | expected = app(three_otu)
31 | piphylo = get_app("piqtree_fit", tree=tree, model="JC")
32 | got = piphylo(three_otu)
33 | assert got.params["lnL"] == pytest.approx(expected.lnL)
34 |
35 |
36 | @pytest.mark.parametrize("num_trees", [1, 10, 20])
37 | @pytest.mark.parametrize("num_taxa", [10, 50, 100])
38 | @pytest.mark.parametrize("tree_mode", list(piqtree.TreeGenMode))
39 | def test_piqtree_random_trees(
40 | num_trees: int,
41 | num_taxa: int,
42 | tree_mode: piqtree.TreeGenMode,
43 | ) -> None:
44 | app = get_app(
45 | "piqtree_random_trees",
46 | tree_mode=tree_mode,
47 | num_trees=num_trees,
48 | rand_seed=1,
49 | )
50 | trees = app(num_taxa)
51 | assert len(trees) == num_trees
52 |
53 | for tree in trees:
54 | assert len(tree.tips()) == num_taxa
55 |
56 |
57 | def test_piqtree_jc_distances(five_otu: Alignment) -> None:
58 | app = get_app("piqtree_jc_dists")
59 | dists = app(five_otu)
60 |
61 | assert (
62 | 0 < dists["Human", "Chimpanzee"] < dists["Human", "Dugong"]
63 | ) # chimpanzee closer than rhesus
64 | assert (
65 | 0 < dists["Human", "Rhesus"] < dists["Human", "Manatee"]
66 | ) # rhesus closer than manatee
67 | assert (
68 | 0 < dists["Human", "Rhesus"] < dists["Human", "Dugong"]
69 | ) # rhesus closer than dugong
70 |
71 | assert (
72 | 0 < dists["Chimpanzee", "Rhesus"] < dists["Chimpanzee", "Manatee"]
73 | ) # rhesus closer than manatee
74 | assert (
75 | 0 < dists["Chimpanzee", "Rhesus"] < dists["Chimpanzee", "Dugong"]
76 | ) # rhesus closer than dugong
77 |
78 | assert (
79 | 0 < dists["Manatee", "Dugong"] < dists["Manatee", "Rhesus"]
80 | ) # dugong closer than rhesus
81 |
82 |
83 | def test_piqtree_nj(five_otu: Alignment) -> None:
84 | dists = jc_distances(five_otu)
85 |
86 | expected = make_tree("(((Human, Chimpanzee), Rhesus), Manatee, Dugong);")
87 |
88 | app = get_app("piqtree_nj")
89 |
90 | actual = app(dists)
91 |
92 | assert expected.same_topology(actual)
93 |
94 |
95 | def test_mfinder(five_otu: Alignment) -> None:
96 | from piqtree.iqtree import ModelFinderResult
97 |
98 | app = get_app("piqtree_mfinder")
99 | got = app(five_otu)
100 | assert isinstance(got, ModelFinderResult)
101 |
102 |
103 | def test_mfinder_result_roundtrip(five_otu: Alignment) -> None:
104 | from piqtree.iqtree import ModelFinderResult
105 |
106 | app = get_app("piqtree_mfinder")
107 | got = app(five_otu)
108 | rd = got.to_rich_dict()
109 | inflated = ModelFinderResult.from_rich_dict(rd)
110 | assert isinstance(inflated, ModelFinderResult)
111 | assert str(got.best_aicc) == str(inflated.best_aicc)
112 |
113 |
114 | def test_quick_tree_hook(four_otu: Alignment) -> None:
115 | tree = four_otu.quick_tree(use_hook="piqtree")
116 | assert tree.params["provenance"] == "piqtree"
117 |
--------------------------------------------------------------------------------
/tests/test_app/test_pickle.py:
--------------------------------------------------------------------------------
1 | """Apps must be pickleable to be able to be run with parallel=True"""
2 |
3 | import pickle
4 |
5 | from cogent3 import get_app, make_tree
6 |
7 | from piqtree._app import _ALL_APP_NAMES
8 |
9 |
10 | def test_pickle() -> None:
11 | app_args = {
12 | "piqtree_phylo": ("JC",),
13 | "piqtree_fit": (make_tree("(a,b,(c,d));"), "JC"),
14 | }
15 | for app_name in _ALL_APP_NAMES:
16 | app = get_app(app_name, *app_args.get(app_name, ()))
17 | assert len(pickle.dumps(app)) > 0
18 |
--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | import piqtree
4 |
5 |
6 | def test_dataset_names() -> None:
7 | names = piqtree.dataset_names()
8 | assert len(names) > 0
9 |
10 |
11 | def test_download_dataset(tmp_path: pathlib.Path) -> None:
12 | path = piqtree.download_dataset("example.tree.gz", dest_dir=tmp_path)
13 | assert pathlib.Path(path).exists()
14 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_build_tree.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 | from cogent3 import Alignment, make_tree
5 |
6 | import piqtree
7 | from piqtree.exceptions import IqTreeError
8 | from piqtree.model import (
9 | DiscreteGammaModel,
10 | DnaModel,
11 | FreeRateModel,
12 | FreqType,
13 | Model,
14 | RateModel,
15 | )
16 |
17 |
18 | def check_build_tree(
19 | four_otu: Alignment,
20 | dna_model: DnaModel,
21 | freq_type: FreqType | None = None,
22 | rate_model: RateModel | None = None,
23 | *,
24 | invariant_sites: bool = False,
25 | coerce_str: bool = False,
26 | ) -> None:
27 | expected = make_tree("(Human,Chimpanzee,(Rhesus,Mouse));")
28 |
29 | model = Model(
30 | dna_model,
31 | freq_type=freq_type if freq_type else None,
32 | invariant_sites=invariant_sites,
33 | rate_model=rate_model,
34 | )
35 |
36 | got1 = piqtree.build_tree(
37 | four_otu,
38 | str(model) if coerce_str else model,
39 | rand_seed=1,
40 | )
41 | got1 = got1.unrooted()
42 | # Check topology
43 | assert expected.same_topology(got1.unrooted())
44 | # Check if branch lengths exist
45 | assert all("length" in v.params for v in got1.get_edge_vector())
46 |
47 | # Should be similar for any seed
48 | got2 = piqtree.build_tree(four_otu, model, rand_seed=None)
49 | got2 = got2.unrooted()
50 | assert expected.same_topology(got2)
51 | assert all("length" in v.params for v in got2.get_edge_vector())
52 |
53 |
54 | @pytest.mark.parametrize("dna_model", list(DnaModel)[:22])
55 | @pytest.mark.parametrize("freq_type", list(FreqType))
56 | def test_non_lie_build_tree(
57 | four_otu: Alignment,
58 | dna_model: DnaModel,
59 | freq_type: FreqType,
60 | ) -> None:
61 | check_build_tree(four_otu, dna_model, freq_type)
62 |
63 |
64 | @pytest.mark.parametrize("dna_model", list(DnaModel)[22:])
65 | def test_lie_build_tree(four_otu: Alignment, dna_model: DnaModel) -> None:
66 | check_build_tree(four_otu, dna_model)
67 |
68 |
69 | @pytest.mark.parametrize("dna_model", list(DnaModel)[-3:])
70 | def test_str_build_tree(four_otu: Alignment, dna_model: DnaModel) -> None:
71 | check_build_tree(four_otu, dna_model, coerce_str=True)
72 |
73 |
74 | @pytest.mark.parametrize("dna_model", list(DnaModel)[:5])
75 | @pytest.mark.parametrize("invariant_sites", [False, True])
76 | @pytest.mark.parametrize(
77 | "rate_model",
78 | [
79 | None,
80 | DiscreteGammaModel(),
81 | FreeRateModel(),
82 | DiscreteGammaModel(6),
83 | FreeRateModel(6),
84 | ],
85 | )
86 | def test_rate_model_build_tree(
87 | four_otu: Alignment,
88 | dna_model: DnaModel,
89 | invariant_sites: bool,
90 | rate_model: RateModel,
91 | ) -> None:
92 | check_build_tree(
93 | four_otu,
94 | dna_model,
95 | rate_model=rate_model,
96 | invariant_sites=invariant_sites,
97 | )
98 |
99 |
100 | def test_build_tree_inadequate_bootstrapping(four_otu: Alignment) -> None:
101 | with pytest.raises(IqTreeError, match=re.escape("#replicates must be >= 1000")):
102 | piqtree.build_tree(four_otu, Model(DnaModel.GTR), bootstrap_replicates=10)
103 |
104 |
105 | def test_build_tree_bootstrapping(four_otu: Alignment) -> None:
106 | tree = piqtree.build_tree(four_otu, Model(DnaModel.GTR), bootstrap_replicates=1000)
107 |
108 | supported_node = max(tree.children, key=lambda x: len(x.children))
109 | assert "support" in supported_node.params
110 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_distance.py:
--------------------------------------------------------------------------------
1 | from cogent3 import Alignment
2 |
3 | from piqtree import jc_distances
4 |
5 |
6 | def test_jc_distance(five_otu: Alignment) -> None:
7 | dists = jc_distances(five_otu)
8 |
9 | assert (
10 | 0 < dists["Human", "Chimpanzee"] < dists["Human", "Dugong"]
11 | ) # chimpanzee closer than rhesus
12 | assert (
13 | 0 < dists["Human", "Rhesus"] < dists["Human", "Manatee"]
14 | ) # rhesus closer than manatee
15 | assert (
16 | 0 < dists["Human", "Rhesus"] < dists["Human", "Dugong"]
17 | ) # rhesus closer than dugong
18 |
19 | assert (
20 | 0 < dists["Chimpanzee", "Rhesus"] < dists["Chimpanzee", "Manatee"]
21 | ) # rhesus closer than manatee
22 | assert (
23 | 0 < dists["Chimpanzee", "Rhesus"] < dists["Chimpanzee", "Dugong"]
24 | ) # rhesus closer than dugong
25 |
26 | assert (
27 | 0 < dists["Manatee", "Dugong"] < dists["Manatee", "Rhesus"]
28 | ) # dugong closer than rhesus
29 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_fit_tree.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from cogent3 import Alignment, get_app, make_tree
3 | from cogent3.app.result import model_result
4 | from cogent3.core.tree import PhyloNode
5 |
6 | import piqtree
7 | from piqtree.model import DnaModel, Model
8 |
9 |
10 | def check_likelihood(got: PhyloNode, expected: model_result) -> None:
11 | assert got.params["lnL"] == pytest.approx(expected.lnL)
12 |
13 |
14 | def check_motif_probs(got: PhyloNode, expected: PhyloNode) -> None:
15 | expected = expected.params["mprobs"]
16 | got = got.params["mprobs"]
17 |
18 | expected_keys = set(expected.keys())
19 | got_keys = set(got.keys())
20 |
21 | # Check that the base characters are the same
22 | assert expected_keys == got_keys
23 |
24 | # Check that the probs are the same
25 | expected_values = [expected[key] for key in expected_keys]
26 | got_values = [got[key] for key in expected_keys]
27 | assert all(
28 | got == pytest.approx(exp)
29 | for got, exp in zip(got_values, expected_values, strict=True)
30 | )
31 |
32 |
33 | def check_rate_parameters(got: PhyloNode, expected: PhyloNode) -> None:
34 | # Collect all rate parameters in got and expected
35 | exclude = {"length", "ENS", "paralinear", "mprobs"}
36 | expected_keys = {
37 | k for k in expected.get_edge_vector()[0].params if k not in exclude
38 | }
39 | got_keys = {k for k in got.get_edge_vector()[0].params if k not in exclude}
40 |
41 | # Check that the keys of rate are the same
42 | assert expected_keys == got_keys
43 |
44 | # Check that the values of rate are the same
45 | expected_values = [expected[0].params[key] for key in expected_keys]
46 | got_values = [got[0].params[key] for key in expected_keys]
47 |
48 | assert all(
49 | got == pytest.approx(exp, rel=1e-2)
50 | for got, exp in zip(got_values, expected_values, strict=True)
51 | )
52 |
53 |
54 | def check_branch_lengths(got: PhyloNode, expected: PhyloNode) -> None:
55 | got = got.get_distances()
56 | expected = expected.get_distances()
57 | # Check that the keys of branch lengths are the same
58 | assert got.keys() == expected.keys()
59 |
60 | # Check that the branch lengths are the same
61 | expected_values = [expected[key] for key in expected]
62 | got_values = [got[key] for key in expected]
63 |
64 | assert all(
65 | got == pytest.approx(exp, rel=1e-2)
66 | for got, exp in zip(got_values, expected_values, strict=True)
67 | )
68 |
69 |
70 | @pytest.mark.parametrize(
71 | ("iq_model", "c3_model"),
72 | [
73 | (DnaModel.JC, "JC69"),
74 | (DnaModel.K80, "K80"),
75 | (DnaModel.GTR, "GTR"),
76 | (DnaModel.TN, "TN93"),
77 | (DnaModel.HKY, "HKY85"),
78 | (DnaModel.F81, "F81"),
79 | ],
80 | )
81 | def test_fit_tree(three_otu: Alignment, iq_model: DnaModel, c3_model: str) -> None:
82 | tree_topology = make_tree(tip_names=three_otu.names)
83 | app = get_app("model", c3_model, tree=tree_topology)
84 | expected = app(three_otu)
85 |
86 | model = Model(iq_model)
87 |
88 | got1 = piqtree.fit_tree(three_otu, tree_topology, model, rand_seed=1)
89 | check_likelihood(got1, expected)
90 | check_motif_probs(got1, expected.tree)
91 | check_rate_parameters(got1, expected.tree)
92 | check_branch_lengths(got1, expected.tree)
93 |
94 | # Should be within an approximation for any seed
95 | got2 = piqtree.fit_tree(three_otu, tree_topology, model, rand_seed=None)
96 | check_likelihood(got2, expected)
97 | check_motif_probs(got2, expected.tree)
98 | check_rate_parameters(got2, expected.tree)
99 | check_branch_lengths(got2, expected.tree)
100 |
101 |
102 | @pytest.mark.parametrize(
103 | ("iq_model", "c3_model"),
104 | [
105 | (DnaModel.JC, "JC69"),
106 | (DnaModel.K80, "K80"),
107 | (DnaModel.GTR, "GTR"),
108 | (DnaModel.TN, "TN93"),
109 | (DnaModel.HKY, "HKY85"),
110 | (DnaModel.F81, "F81"),
111 | ],
112 | )
113 | def test_fit_tree_str_model(
114 | three_otu: Alignment,
115 | iq_model: DnaModel,
116 | c3_model: str,
117 | ) -> None:
118 | tree_topology = make_tree(tip_names=three_otu.names)
119 | app = get_app("model", c3_model, tree=tree_topology)
120 | expected = app(three_otu)
121 |
122 | model = str(Model(iq_model))
123 |
124 | got1 = piqtree.fit_tree(three_otu, tree_topology, model, rand_seed=1)
125 | check_likelihood(got1, expected)
126 | check_motif_probs(got1, expected.tree)
127 | check_rate_parameters(got1, expected.tree)
128 | check_branch_lengths(got1, expected.tree)
129 |
130 | # Should be within an approximation for any seed
131 | got2 = piqtree.fit_tree(three_otu, tree_topology, model, rand_seed=None)
132 | check_likelihood(got2, expected)
133 | check_motif_probs(got2, expected.tree)
134 | check_rate_parameters(got2, expected.tree)
135 | check_branch_lengths(got2, expected.tree)
136 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_model_finder.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 |
3 | import pytest
4 | from cogent3 import Alignment
5 |
6 | from piqtree.iqtree import ModelFinderResult, ModelResultValue, model_finder
7 |
8 |
9 | def test_model_result_value_from_string() -> None:
10 | val = "123.45 10 0.678"
11 | result = ModelResultValue.from_string(val)
12 | assert result.lnL == 123.45
13 | assert result.nfp == 10
14 | assert result.tree_length == 0.678
15 |
16 |
17 | @pytest.mark.parametrize(
18 | "bad_val",
19 | [
20 | "123.45.00 10 0.678", # invalid float
21 | "123.45 10 10 0.678", # too many values
22 | "123.45 10", # too few values
23 | ],
24 | )
25 | def test_model_result_value_from_bad_string(bad_val: str) -> None:
26 | with pytest.raises(ValueError, match="Error parsing string"):
27 | _ = ModelResultValue.from_string(bad_val)
28 |
29 |
30 | @pytest.mark.parametrize("model", ["GTR+F", "GTR+F+G"])
31 | def test_model_finder_result(model: str) -> None:
32 | raw_data = {
33 | model: "123.45 10 0.678",
34 | "best_model_AIC": model,
35 | "best_model_AICc": model,
36 | "best_model_BIC": model,
37 | "best_tree_AIC": "((a,b),(c,d));", # ignored
38 | "initTree": "((a,b),(c,d));", # ignored
39 | "partition_type": 0, # ignored
40 | }
41 |
42 | result = ModelFinderResult("test", raw_data)
43 |
44 | assert isinstance(result.model_stats[model], ModelResultValue)
45 | assert result.model_stats[model].lnL == 123.45
46 | assert result.model_stats[model].nfp == 10
47 | assert result.model_stats[model].tree_length == 0.678
48 |
49 |
50 | def test_model_finder(five_otu: Alignment) -> None:
51 | result1 = model_finder(five_otu, rand_seed=1)
52 | result2 = model_finder(
53 | five_otu,
54 | num_threads=multiprocessing.cpu_count(),
55 | rand_seed=1,
56 | )
57 | assert str(result1.best_aic) == str(result2.best_aic)
58 | assert str(result1.best_aicc) == str(result2.best_aicc)
59 | assert str(result1.best_bic) == str(result2.best_bic)
60 |
61 |
62 | def test_model_finder_restricted_submod(five_otu: Alignment) -> None:
63 | result = model_finder(five_otu, rand_seed=1, model_set={"HKY", "TIM"})
64 | assert str(result.best_aic).startswith(("HKY", "TIM"))
65 | assert str(result.best_aicc).startswith(("HKY", "TIM"))
66 | assert str(result.best_bic).startswith(("HKY", "TIM"))
67 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_nj_tree.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import numpy as np
4 | import pytest
5 | from cogent3 import Alignment, make_tree
6 |
7 | from piqtree import jc_distances, nj_tree
8 |
9 |
10 | def test_nj_tree(five_otu: Alignment) -> None:
11 | expected = make_tree("(((Human, Chimpanzee), Rhesus), Manatee, Dugong);")
12 |
13 | dists = jc_distances(five_otu)
14 | actual = nj_tree(dists)
15 |
16 | assert expected.same_topology(actual)
17 |
18 |
19 | def test_nj_tree_allow_negative(all_otu: Alignment) -> None:
20 | # a distance matrix can produce trees with negative branch lengths
21 | dists = jc_distances(all_otu)
22 |
23 | # check that all branch lengths are non-negative, by default
24 | tree1 = nj_tree(dists)
25 | assert all(node.length >= 0 for node in tree1.traverse(include_self=False))
26 |
27 | # check that some branch lengths are negative when allow_negative=True
28 | tree2 = nj_tree(dists, allow_negative=True)
29 | assert any(node.length < 0 for node in tree2.traverse(include_self=False))
30 |
31 |
32 | def test_nj_tree_nan(four_otu: Alignment) -> None:
33 | dists = jc_distances(four_otu)
34 | dists[1, 0] = dists[0, 1] = np.nan
35 |
36 | with pytest.raises(
37 | ValueError,
38 | match=re.escape("The pairwise distance matrix cannot contain NaN values."),
39 | ):
40 | nj_tree(dists, allow_negative=True)
41 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_random_trees.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import piqtree
4 | import piqtree.exceptions
5 |
6 |
7 | @pytest.mark.parametrize("num_trees", [1, 10, 20])
8 | @pytest.mark.parametrize("num_taxa", [10, 50, 100])
9 | @pytest.mark.parametrize("tree_mode", list(piqtree.TreeGenMode))
10 | def test_random_trees(
11 | num_trees: int,
12 | num_taxa: int,
13 | tree_mode: piqtree.TreeGenMode,
14 | ) -> None:
15 | trees = piqtree.random_trees(
16 | num_trees,
17 | num_taxa,
18 | tree_mode,
19 | rand_seed=1,
20 | )
21 | assert len(trees) == num_trees
22 |
23 | for tree in trees:
24 | assert len(tree.tips()) == num_taxa
25 |
26 |
27 | @pytest.mark.parametrize("num_trees", [1, 10, 20])
28 | @pytest.mark.parametrize("num_taxa", [10, 50, 100])
29 | @pytest.mark.parametrize("tree_mode", list(piqtree.TreeGenMode))
30 | def test_random_trees_no_seed(
31 | num_taxa: int,
32 | tree_mode: piqtree.TreeGenMode,
33 | num_trees: int,
34 | ) -> None:
35 | trees = piqtree.random_trees(
36 | num_trees,
37 | num_taxa,
38 | tree_mode,
39 | )
40 | assert len(trees) == num_trees
41 |
42 | for tree in trees:
43 | assert len(tree.tips()) == num_taxa
44 |
45 |
46 | @pytest.mark.parametrize("num_taxa", [-1, 0, 1, 2])
47 | @pytest.mark.parametrize("tree_mode", list(piqtree.TreeGenMode))
48 | def test_invalid_taxa(
49 | num_taxa: int,
50 | tree_mode: piqtree.TreeGenMode,
51 | ) -> None:
52 | with pytest.raises(piqtree.exceptions.IqTreeError):
53 | _ = piqtree.random_trees(
54 | 2,
55 | num_taxa,
56 | tree_mode,
57 | rand_seed=1,
58 | )
59 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_robinson_foulds.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.testing import assert_array_equal
3 |
4 | import piqtree
5 |
6 |
7 | def test_robinson_foulds() -> None:
8 | tree1 = "(A,B,(C,D));"
9 | tree2 = "(A,C,(B,D));"
10 | pairwise_distances = piqtree.robinson_foulds([tree1, tree2])
11 | assert_array_equal(pairwise_distances, np.array([[0, 2], [2, 0]]))
12 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_segmentation_fault.py:
--------------------------------------------------------------------------------
1 | """Test combinations of calls which under previous versions resulted in a segmentation fault."""
2 |
3 | import pytest
4 | from cogent3 import make_aligned_seqs, make_tree
5 |
6 | from piqtree import TreeGenMode, build_tree, fit_tree, random_trees
7 | from piqtree.exceptions import IqTreeError
8 | from piqtree.model import DiscreteGammaModel, DnaModel, FreeRateModel, Model
9 |
10 |
11 | def test_two_build_random_trees() -> None:
12 | """
13 | Calling build tree twice followed by random trees with a bad input
14 | used to result in a Segmentation Fault in a previous version.
15 | """
16 | aln = make_aligned_seqs({"a": "GGG", "b": "GGC", "c": "AAC", "d": "AAA"})
17 |
18 | build_tree(aln, Model(DnaModel.JC), 1)
19 | build_tree(aln, Model(DnaModel.JC), 2)
20 |
21 | with pytest.raises(IqTreeError):
22 | random_trees(3, 2, TreeGenMode.BALANCED, 1)
23 |
24 |
25 | def test_two_fit_random_trees() -> None:
26 | """
27 | Calling fit tree twice followed by random trees with a bad input
28 | used to result in a Segmentation Fault in a previous version.
29 | """
30 | aln = make_aligned_seqs({"a": "GGG", "b": "GGC", "c": "AAC", "d": "AAA"})
31 | tree = make_tree("(a,b,(c,d));")
32 |
33 | fit_tree(aln, tree, Model(DnaModel.JC), 1)
34 | fit_tree(aln, tree, Model(DnaModel.JC), 2)
35 |
36 | with pytest.raises(IqTreeError):
37 | random_trees(3, 2, TreeGenMode.BALANCED, 1)
38 |
39 |
40 | @pytest.mark.parametrize("rate_model_class", [DiscreteGammaModel, FreeRateModel])
41 | @pytest.mark.parametrize("categories", [0, -4])
42 | def test_two_invalid_models(
43 | rate_model_class: type[DiscreteGammaModel] | type[FreeRateModel],
44 | categories: int,
45 | ) -> None:
46 | """
47 | Calling build_tree multiple times with an invalid
48 | model has resulted in a Segmentation Fault.
49 | """
50 | aln = make_aligned_seqs({"a": "GGG", "b": "GGC", "c": "AAC", "d": "AAA"})
51 |
52 | with pytest.raises(IqTreeError):
53 | _ = build_tree(aln, Model(DnaModel.JC, rate_model=rate_model_class(categories)))
54 |
55 | with pytest.raises(IqTreeError):
56 | _ = build_tree(aln, Model(DnaModel.JC, rate_model=rate_model_class(categories)))
57 |
--------------------------------------------------------------------------------
/tests/test_iqtree/test_tree_yaml.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any
3 |
4 | import pytest
5 | from cogent3 import make_tree
6 |
7 | from piqtree.exceptions import ParseIqTreeError
8 | from piqtree.iqtree._tree import _process_tree_yaml, _tree_equal
9 |
10 |
11 | @pytest.fixture
12 | def newick_not_in_candidates() -> list[dict[str, Any]]:
13 | return [
14 | { # Newick string not in candidate set (different branch length)
15 | "CandidateSet": {
16 | 0: "-6519.33018689 (0:0.0058955371,1:0.0026486308,(2:0.0230933557,3:0.3069062230):0.01387802789);",
17 | 1: "-6540.1924365 (0:0.0059276645,(1:0.0026255655,2:0.0369876991):2.43436209e-06,3:0.3205542282);",
18 | 2: "-6540.32542968 (0:0.0059468612,(1:0.0021841363,3:0.3203544844):2.076530752e-06,2:0.0369270512);",
19 | },
20 | "ModelDNA": {
21 | "rates": "1, 3.815110072, 1, 1, 3.815110072, 1",
22 | "state_freq": "0.3640205807, 0.1862366777, 0.217291437, 0.2324513047",
23 | },
24 | "PhyloTree": {
25 | "newick": "(0:0.0068955371,1:0.0026486308,(2:0.0230933557,3:0.3069062230):0.01387802789);",
26 | },
27 | "StopRule": {
28 | "curIteration": 101,
29 | "start_real_time": 1731027583,
30 | "time_vec": None,
31 | },
32 | "boot_consense_logl": 0,
33 | "contree_rfdist": -1,
34 | "finished": True,
35 | "finishedCandidateSet": True,
36 | "finishedModelFinal": True,
37 | "finishedModelInit": True,
38 | "initTree": "(0:0.0063001853,1:0.0022115739,(2:0.0203850510,3:0.3497395366):0.01883712169);",
39 | "iqtree": {
40 | "seed": 598834595,
41 | "start_time": 1731027582,
42 | "version": "2.3.6.lib",
43 | },
44 | },
45 | { # Newick string not in candidate set (different names)
46 | "CandidateSet": {
47 | 0: "-6519.33018689 (0:0.0058955371,2:0.0026486308,(1:0.0230933557,3:0.3069062230):0.01387802789);",
48 | 1: "-6540.1924365 (0:0.0059276645,(1:0.0026255655,2:0.0369876991):2.43436209e-06,3:0.3205542282);",
49 | 2: "-6540.32542968 (0:0.0059468612,(1:0.0021841363,3:0.3203544844):2.076530752e-06,2:0.0369270512);",
50 | },
51 | "ModelDNA": {
52 | "rates": "1, 3.815110072, 1, 1, 3.815110072, 1",
53 | "state_freq": "0.3640205807, 0.1862366777, 0.217291437, 0.2324513047",
54 | },
55 | "PhyloTree": {
56 | "newick": "(0:0.0058955371,1:0.0026486308,(2:0.0230933557,3:0.3069062230):0.01387802789);",
57 | },
58 | "StopRule": {
59 | "curIteration": 101,
60 | "start_real_time": 1731027583,
61 | "time_vec": None,
62 | },
63 | "boot_consense_logl": 0,
64 | "contree_rfdist": -1,
65 | "finished": True,
66 | "finishedCandidateSet": True,
67 | "finishedModelFinal": True,
68 | "finishedModelInit": True,
69 | "initTree": "(0:0.0063001853,1:0.0022115739,(2:0.0203850510,3:0.3497395366):0.01883712169);",
70 | "iqtree": {
71 | "seed": 598834595,
72 | "start_time": 1731027582,
73 | "version": "2.3.6.lib",
74 | },
75 | },
76 | ]
77 |
78 |
79 | @pytest.fixture
80 | def non_lie_dna_with_rate_model() -> dict[str, Any]:
81 | return {
82 | "CandidateSet": {
83 | 0: "-6736.94578464 (0:0.0063211201,1:0.0029675780,(2:0.0228519739,3:0.3072009029):0.01373649616);",
84 | 1: "-6757.78815651 (0:0.0063607954,(1:0.0030079874,2:0.0365597715):2.296825575e-06,3:0.3208135518);",
85 | 2: "-6758.07765021 (0:0.0063826033,(1:0.0021953253,3:0.3207201830):0.0001145372551,2:0.0365362763);",
86 | },
87 | "ModelDNA": {
88 | "rates": "1, 3.82025079, 1, 1, 3.82025079, 1",
89 | "state_freq": "0.3628523161, 0.1852938562, 0.2173913044, 0.2344625233",
90 | },
91 | "PhyloTree": {
92 | "newick": "(0:0.0063211201,1:0.0029675780,(2:0.0228519739,3:0.3072009029):0.01373649616);",
93 | },
94 | "RateGammaInvar": {"gamma_shape": 1.698497993, "p_invar": 1.002841144e-06},
95 | "StopRule": {
96 | "curIteration": 101,
97 | "start_real_time": 1724397157,
98 | "time_vec": None,
99 | },
100 | "boot_consense_logl": 0,
101 | "contree_rfdist": -1,
102 | "finished": True,
103 | "finishedCandidateSet": True,
104 | "finishedModelFinal": True,
105 | "finishedModelInit": True,
106 | "initTree": "(0:0.0063680036,1:0.0026681490,(2:0.0183861083,3:0.3034074996):0.01838610827);",
107 | "iqtree": {"seed": 95633264, "start_time": 1724397157, "version": "2.3.6.lib"},
108 | }
109 |
110 |
111 | @pytest.fixture
112 | def lie_dna_model() -> dict[str, Any]:
113 | return {
114 | "CandidateSet": {
115 | 0: "-6606.82337571 ((0:0.0058632731,(2:0.0225450645,3:0.3055011501):0.01414755595):2.340509431e-06,1:0.0026433577):0;",
116 | 1: "-6606.82337581 (0:0.0058609304,(1:0.0026456850,(2:0.0225450645,3:0.3055011501):0.01414755618):2.340509431e-06):0;",
117 | 2: "-6606.82337581 ((0:0.0058632731,1:0.0026456850):2.340509431e-06,(2:0.0225450645,3:0.3055011501):0.01414520892):0;",
118 | 3: "-6606.82337594 (((0:0.0058630055,1:0.0026461740):0.01414791335,2:0.0225451031):0.1178978025,3:0.1876035160):0;",
119 | 4: "-6628.65519886 (0:0.0000023102,((1:0.0027556399,3:0.3193110364):2.07651076e-06,2:0.0366286909):0.005770021808):1e-06;",
120 | 5: "-6628.70596445 (0:0.0000022647,((1:0.0027641463,2:0.0366284785):2.43434433e-06,3:0.3193357780):0.005401508376):1e-06;",
121 | },
122 | "ModelLieMarkovRY2.2b": {
123 | "model_parameters": 0.4841804549,
124 | "state_freq": "0.25, 0.25, 0.25, 0.25",
125 | },
126 | "PhyloTree": {
127 | "newick": "((0:0.0058632731,(2:0.0225450645,3:0.3055011501):0.01414755595):2.340509431e-06,1:0.0026433577):0;",
128 | },
129 | "StopRule": {
130 | "curIteration": 101,
131 | "start_real_time": 1729226726,
132 | "time_vec": None,
133 | },
134 | "boot_consense_logl": 0,
135 | "contree_rfdist": -1,
136 | "finished": True,
137 | "finishedCandidateSet": True,
138 | "finishedModelFinal": True,
139 | "finishedModelInit": True,
140 | "initTree": "(0:0.0063001853,1:0.0022115739,(2:0.0203850510,3:0.3497395366):0.01883712169);",
141 | "iqtree": {"seed": 1, "start_time": 1729226725, "version": "2.3.6.lib"},
142 | }
143 |
144 |
145 | def test_newick_not_in_candidates(
146 | newick_not_in_candidates: list[dict[str, Any]],
147 | ) -> None:
148 | for yaml in newick_not_in_candidates:
149 | with pytest.raises(
150 | ParseIqTreeError,
151 | match=re.escape("IQ-TREE output malformated, likelihood not found."),
152 | ):
153 | _ = _process_tree_yaml(
154 | yaml,
155 | ["a", "b", "c", "d"],
156 | )
157 |
158 |
159 | def test_non_lie_dna_with_rate_model(
160 | non_lie_dna_with_rate_model: dict[str, Any],
161 | ) -> None:
162 | # test parsing yaml file containing fits from non-Lie DnaModel and rate heterogeneity model
163 | edge_params = {
164 | "rates": {
165 | "A/C": 1.0,
166 | "A/G": 3.82025079,
167 | "A/T": 1.0,
168 | "C/G": 1.0,
169 | "C/T": 3.82025079,
170 | "G/T": 1,
171 | },
172 | "mprobs": {
173 | "A": 0.3628523161,
174 | "C": 0.1852938562,
175 | "G": 0.2173913044,
176 | "T": 0.2344625233,
177 | },
178 | }
179 | rate_params = {"gamma_shape": 1.698497993, "p_invar": 1.002841144e-06}
180 | tree = _process_tree_yaml(non_lie_dna_with_rate_model, ["a", "b", "c", "d"])
181 | assert tree.params["edge_pars"] == edge_params
182 | assert tree.params["RateGammaInvar"] == rate_params
183 |
184 |
185 | def test_non_lie_dna_model_motif_absent(
186 | non_lie_dna_with_rate_model: dict[str, Any],
187 | ) -> None:
188 | non_lie_dna_with_rate_model["ModelDNA"].pop("state_freq")
189 | with pytest.raises(
190 | ParseIqTreeError,
191 | match=re.escape("IQ-TREE output malformated, motif parameters not found."),
192 | ):
193 | _ = _process_tree_yaml(non_lie_dna_with_rate_model, ["a", "b", "c", "d"])
194 |
195 |
196 | def test_non_lie_dna_model_rate_absent(
197 | non_lie_dna_with_rate_model: dict[str, Any],
198 | ) -> None:
199 | non_lie_dna_with_rate_model["ModelDNA"].pop("rates")
200 | with pytest.raises(
201 | ParseIqTreeError,
202 | match=re.escape("IQ-TREE output malformated, rate parameters not found."),
203 | ):
204 | _ = _process_tree_yaml(non_lie_dna_with_rate_model, ["a", "b", "c", "d"])
205 |
206 |
207 | def test_lie_dna_model(
208 | lie_dna_model: dict[str, Any],
209 | ) -> None:
210 | # test parsing yaml file containing fits from Lie DnaModel
211 | model_parameters = {
212 | "model_parameters": 0.4841804549,
213 | "mprobs": {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
214 | }
215 | tree = _process_tree_yaml(lie_dna_model, ["a", "b", "c", "d"])
216 | assert tree.params["ModelLieMarkovRY2.2b"] == model_parameters
217 |
218 |
219 | def test_lie_dna_model_motif_absent(
220 | lie_dna_model: dict[str, Any],
221 | ) -> None:
222 | lie_dna_model["ModelLieMarkovRY2.2b"].pop("state_freq")
223 | with pytest.raises(
224 | ParseIqTreeError,
225 | match=re.escape("IQ-TREE output malformated, motif parameters not found."),
226 | ):
227 | _ = _process_tree_yaml(lie_dna_model, ["a", "b", "c", "d"])
228 |
229 |
230 | @pytest.mark.parametrize(
231 | ("candidate", "expected"),
232 | [
233 | ("((a:1.0,b:0.9),c:0.8);", True),
234 | ("((a:0.9,b:0.9),c:0.8);", False),
235 | ("((a:1.0,c:0.8),b:0.9);", False),
236 | ],
237 | )
238 | def test_tree_equal(candidate: str, expected: bool) -> None:
239 | tree = make_tree("((a:1.0,b:0.9),c:0.8);")
240 | candidate = make_tree(candidate)
241 | assert _tree_equal(tree, candidate) == expected
242 |
--------------------------------------------------------------------------------
/tests/test_model/test_freq_type.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from piqtree.model import FreqType, get_freq_type
4 |
5 |
6 | def test_number_of_descriptions() -> None:
7 | assert len(FreqType) == len(FreqType._descriptions())
8 |
9 |
10 | def test_descriptions_exist() -> None:
11 | for freq_type in FreqType:
12 | # Raises an error if description not present
13 | _ = freq_type.description
14 |
15 |
16 | @pytest.mark.parametrize(
17 | ("freq_type", "iqtree_str"),
18 | [
19 | (FreqType.F, "F"),
20 | (FreqType.FO, "FO"),
21 | (FreqType.FQ, "FQ"),
22 | ("F", "F"),
23 | ("FO", "FO"),
24 | ("FQ", "FQ"),
25 | ("+F", "F"),
26 | ("+FO", "FO"),
27 | ("+FQ", "FQ"),
28 | ],
29 | )
30 | def test_get_freq_type(freq_type: FreqType | str, iqtree_str: str) -> None:
31 | out = get_freq_type(freq_type)
32 | assert isinstance(out, FreqType)
33 | assert out.iqtree_str() == iqtree_str
34 |
35 |
36 | @pytest.mark.parametrize(
37 | "freq_type",
38 | ["F0", "+F0", "+G", "+R9"],
39 | )
40 | def test_invalid_freq_type_name(freq_type: str) -> None:
41 | with pytest.raises(
42 | ValueError,
43 | match=f"Unknown state frequency type: {freq_type!r}",
44 | ):
45 | _ = get_freq_type(freq_type)
46 |
--------------------------------------------------------------------------------
/tests/test_model/test_model.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 |
5 | from piqtree.model import (
6 | AaModel,
7 | DiscreteGammaModel,
8 | DnaModel,
9 | FreeRateModel,
10 | FreqType,
11 | Model,
12 | RateModel,
13 | SubstitutionModel,
14 | make_model,
15 | )
16 |
17 |
18 | @pytest.mark.parametrize("sub_mod", list(DnaModel) + list(AaModel))
19 | @pytest.mark.parametrize("freq_type", [None, *list(FreqType)])
20 | @pytest.mark.parametrize("invariant_sites", [False, True])
21 | @pytest.mark.parametrize(
22 | "rate_model",
23 | [
24 | None,
25 | DiscreteGammaModel(),
26 | FreeRateModel(),
27 | DiscreteGammaModel(6),
28 | FreeRateModel(6),
29 | ],
30 | )
31 | def test_make_model(
32 | sub_mod: SubstitutionModel,
33 | freq_type: FreqType,
34 | invariant_sites: bool,
35 | rate_model: RateModel,
36 | ) -> None:
37 | model = Model(sub_mod, freq_type, rate_model, invariant_sites=invariant_sites)
38 | expected = str(model)
39 |
40 | # Check the expected string is approximately generated correctly
41 | if invariant_sites:
42 | assert "+I" in expected
43 | else:
44 | assert "+I" not in expected
45 |
46 | if isinstance(rate_model, DiscreteGammaModel):
47 | assert "+G" in expected
48 | else:
49 | assert "+G" not in expected
50 |
51 | if isinstance(rate_model, FreeRateModel):
52 | assert "+R" in expected
53 | else:
54 | assert "+R" not in expected
55 |
56 | if freq_type is not None:
57 | assert "+F" in expected
58 | else:
59 | assert "+F" not in expected
60 |
61 | # Check make_model
62 | got = str(make_model(expected))
63 | assert got == expected
64 |
65 |
66 | def test_bad_sub_model() -> None:
67 | with pytest.raises(
68 | ValueError,
69 | match=re.escape("Unknown substitution model: 'GYR'"),
70 | ):
71 | make_model("GYR")
72 |
73 |
74 | def test_multiple_freq_type() -> None:
75 | with pytest.raises(
76 | ValueError,
77 | match=re.escape(
78 | "Model 'GTR+FO+FO' contains multiple base frequency specifications.",
79 | ),
80 | ):
81 | make_model("GTR+FO+FO")
82 |
83 |
84 | def test_multiple_invariant_sites() -> None:
85 | with pytest.raises(
86 | ValueError,
87 | match=re.escape(
88 | "Model 'GTR+I+I' contains multiple specifications for invariant sites.",
89 | ),
90 | ):
91 | make_model("GTR+I+I")
92 |
93 |
94 | def test_multiple_rate_het() -> None:
95 | with pytest.raises(
96 | ValueError,
97 | match=re.escape(
98 | "Model 'GTR+G+R' contains multiple rate heterogeneity specifications.",
99 | ),
100 | ):
101 | make_model("GTR+G+R")
102 |
103 |
104 | def test_unexpected_component() -> None:
105 | with pytest.raises(
106 | ValueError,
107 | match=re.escape(
108 | "Model 'GTR+Z' contains unexpected component.",
109 | ),
110 | ):
111 | make_model("GTR+Z")
112 |
--------------------------------------------------------------------------------
/tests/test_model/test_options.py:
--------------------------------------------------------------------------------
1 | # testing the display of functions
2 |
3 | from typing import Literal
4 |
5 | import pytest
6 |
7 | from piqtree import available_freq_type, available_models, available_rate_type
8 | from piqtree.model import AaModel, DnaModel, FreqType, SubstitutionModel
9 | from piqtree.model._rate_type import ALL_BASE_RATE_TYPES
10 |
11 |
12 | @pytest.mark.parametrize(
13 | ("model_class", "model_type"),
14 | [(None, None), (DnaModel, "dna"), (AaModel, "protein")],
15 | )
16 | def test_num_available_models(
17 | model_class: type[SubstitutionModel] | None,
18 | model_type: Literal["dna", "protein"] | None,
19 | ) -> None:
20 | table = available_models(model_type)
21 | total_models = (
22 | len(DnaModel) + len(AaModel) if model_class is None else len(model_class)
23 | )
24 | assert total_models > 0
25 | assert table.shape[0] == total_models
26 | assert table._repr_policy["head"] == table.shape[0]
27 |
28 |
29 | def test_num_available_models_not_show_all() -> None:
30 | table = available_models(show_all=False)
31 | assert table._repr_policy["head"] != table.shape[0]
32 |
33 |
34 | @pytest.mark.parametrize(
35 | ("model_fetch", "model_type"),
36 | [(None, None), ("dna", "nucleotide"), ("protein", "protein")],
37 | )
38 | def test_available_models_types(
39 | model_fetch: Literal["dna", "protein"] | None,
40 | model_type: str | None,
41 | ) -> None:
42 | table = available_models(model_fetch)
43 |
44 | if model_type is None:
45 | for check_model_type in table[:, 0]:
46 | assert check_model_type[0] in ["nucleotide", "protein"]
47 | else:
48 | for check_model_type in table[:, 0]:
49 | assert check_model_type[0] == model_type
50 |
51 |
52 | def test_num_freq_type() -> None:
53 | table = available_freq_type()
54 | total_freq_types = len(FreqType)
55 |
56 | assert total_freq_types > 0
57 | assert table.shape[0] == total_freq_types
58 |
59 |
60 | def test_num_rate_type() -> None:
61 | table = available_rate_type()
62 | total_rate_types = len(ALL_BASE_RATE_TYPES)
63 |
64 | assert total_rate_types > 0
65 | assert table.shape[0] == total_rate_types
66 |
--------------------------------------------------------------------------------
/tests/test_model/test_rate_type.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from piqtree.model import (
4 | DiscreteGammaModel,
5 | FreeRateModel,
6 | RateModel,
7 | get_rate_type,
8 | )
9 |
10 |
11 | def test_rate_model_uninstantiable() -> None:
12 | with pytest.raises(TypeError):
13 | _ = RateModel() # type: ignore[abstract]
14 |
15 |
16 | @pytest.mark.parametrize(
17 | ("invariant_sites", "rate_model", "iqtree_str"),
18 | [
19 | (False, None, ""),
20 | (True, None, "I"),
21 | (False, DiscreteGammaModel(), "G"),
22 | (True, DiscreteGammaModel(), "I+G"),
23 | (False, FreeRateModel(), "R"),
24 | (True, FreeRateModel(), "I+R"),
25 | (False, DiscreteGammaModel(8), "G8"),
26 | (True, DiscreteGammaModel(8), "I+G8"),
27 | (False, FreeRateModel(8), "R8"),
28 | (True, FreeRateModel(8), "I+R8"),
29 | (False, DiscreteGammaModel(42), "G42"),
30 | (True, DiscreteGammaModel(42), "I+G42"),
31 | (False, FreeRateModel(42), "R42"),
32 | (True, FreeRateModel(42), "I+R42"),
33 | (False, "G", "G"),
34 | (True, "+G", "I+G"),
35 | (False, "+R", "R"),
36 | (True, "R", "I+R"),
37 | (False, "G8", "G8"),
38 | (True, "+G8", "I+G8"),
39 | (False, "+R8", "R8"),
40 | (True, "R8", "I+R8"),
41 | (False, "+G42", "G42"),
42 | (True, "G42", "I+G42"),
43 | (False, "R42", "R42"),
44 | (True, "+R42", "I+R42"),
45 | ],
46 | )
47 | def test_get_rate_type(
48 | invariant_sites: bool,
49 | rate_model: RateModel | None,
50 | iqtree_str: str,
51 | ) -> None:
52 | model = get_rate_type(invariant_sites=invariant_sites, rate_model=rate_model)
53 | assert model.iqtree_str() == iqtree_str
54 |
55 | if rate_model is None:
56 | model = get_rate_type(invariant_sites=invariant_sites)
57 | assert model.iqtree_str() == iqtree_str
58 |
59 | if not invariant_sites:
60 | model = get_rate_type(rate_model=rate_model)
61 | assert model.iqtree_str() == iqtree_str
62 |
63 |
64 | @pytest.mark.parametrize("invariant_sites", [True, False])
65 | @pytest.mark.parametrize(
66 | "bad_rate_model",
67 | ["M", "T46", "R2D2"],
68 | )
69 | def test_invalid_rate_model_name(
70 | invariant_sites: bool,
71 | bad_rate_model: str,
72 | ) -> None:
73 | with pytest.raises(
74 | ValueError,
75 | match=f"Unexpected value for rate_model {bad_rate_model!r}",
76 | ):
77 | _ = get_rate_type(invariant_sites=invariant_sites, rate_model=bad_rate_model)
78 |
79 |
80 | @pytest.mark.parametrize("invariant_sites", [True, False])
81 | @pytest.mark.parametrize(
82 | "bad_rate_model",
83 | [4, 3.15, ["R3", "G"]],
84 | )
85 | def test_invalid_rate_model_type(
86 | invariant_sites: bool,
87 | bad_rate_model: float | list,
88 | ) -> None:
89 | with pytest.raises(
90 | TypeError,
91 | match=f"Unexpected type for rate_model: {type(bad_rate_model)}",
92 | ):
93 | _ = get_rate_type(
94 | invariant_sites=invariant_sites,
95 | rate_model=bad_rate_model, # type: ignore[arg-type]
96 | )
97 |
--------------------------------------------------------------------------------
/tests/test_model/test_substitution_model.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import pytest
4 |
5 | from piqtree.model import AaModel, DnaModel, SubstitutionModel, get_substitution_model
6 |
7 |
8 | @pytest.mark.parametrize("model_class", [DnaModel, AaModel])
9 | def test_number_of_descriptions(
10 | model_class: type[DnaModel] | type[AaModel],
11 | ) -> None:
12 | assert len(model_class) == len(model_class._descriptions())
13 |
14 |
15 | @pytest.mark.parametrize("model_class", [DnaModel, AaModel])
16 | def test_descriptions_exist(model_class: type[DnaModel] | type[AaModel]) -> None:
17 | for model in model_class:
18 | # Raises an error if description not present
19 | _ = model.description
20 |
21 |
22 | @pytest.mark.parametrize(
23 | ("model_class", "model_type"),
24 | [(DnaModel, "nucleotide"), (AaModel, "protein")],
25 | )
26 | def test_model_type(
27 | model_class: type[DnaModel] | type[AaModel],
28 | model_type: str,
29 | ) -> None:
30 | assert model_class.model_type() == model_type
31 |
32 | for model in model_class:
33 | assert model.model_type() == model_type
34 |
35 |
36 | @pytest.mark.parametrize(
37 | ("submod_type", "iqtree_str"),
38 | [
39 | (DnaModel.F81, "F81"),
40 | (DnaModel.LIE_10_34, "10.34"),
41 | (AaModel.NQ_insect, "NQ.insect"),
42 | ("NQ.yeast", "NQ.yeast"),
43 | ("GTR", "GTR"),
44 | ("2.2b", "2.2b"),
45 | ],
46 | )
47 | def test_get_substitution_model(
48 | submod_type: SubstitutionModel | str,
49 | iqtree_str: str,
50 | ) -> None:
51 | out = get_substitution_model(submod_type)
52 | assert isinstance(out, SubstitutionModel)
53 | assert out.iqtree_str() == iqtree_str
54 |
55 |
56 | @pytest.mark.parametrize(
57 | "submod_type",
58 | ["FQ", "F", "+GTR", "AA", "G8", ""],
59 | )
60 | def test_invalid_substitution_model(submod_type: str) -> None:
61 | with pytest.raises(
62 | ValueError,
63 | match=re.escape(f"Unknown substitution model: {submod_type!r}"),
64 | ):
65 | _ = get_substitution_model(submod_type)
66 |
--------------------------------------------------------------------------------