├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASING.md ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── newick.py └── tests ├── fixtures ├── ar53_r207.tree ├── mrbayes.nwk └── tree-glottolog-newick.txt ├── test_nescent.py └── test_newick.py /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: [3.9, "3.10", 3.11, 3.12] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install .[test] 27 | - name: Test with pytest 28 | run: | 29 | pytest 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .pytest_cache 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/_build/ 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | #Ipython Notebook 63 | .ipynb_checkpoints 64 | 65 | .idea 66 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changes 2 | 3 | The `newick` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 4 | 5 | ## Unreleased 6 | 7 | Dropped Python 3.7 compatibility. 8 | 9 | 10 | ## [v1.9.0] - 2023-03-14 11 | 12 | Support round-tripping Newick with two annotations per node, one before and one after the 13 | length separator. 14 | 15 | 16 | ## [v1.8.1] - 2023-03-05 17 | 18 | Bugfix: Make `unquoted_name` property work for nodes without names, too. 19 | 20 | 21 | ## [v1.8.0] - 2023-03-05 22 | 23 | - Added `rename` method - since that seems to be most common kind of node visitor. 24 | - Added `strip_comments` method to remove comments from a `Node`. While it is 25 | already possible to remove comments upon parsing, this method makes this possible 26 | for parsed trees. 27 | 28 | 29 | ## [v1.7.1] - 2023-03-03 30 | 31 | Bugfix: Quote node names containing whitespace, if `auto_quote` is set. 32 | 33 | 34 | ## [v1.7.0] - 2023-02-13 35 | 36 | Big performance improvement of parser by switching to accumulated tokens. 37 | 38 | 39 | ## [v1.6.0] - 2023-01-11 40 | 41 | Support reading key-value data from node comments. 42 | 43 | 44 | ## [v1.5.0] - 2023-01-09 45 | 46 | Full support for quoted labels and (nested) comments. 47 | 48 | 49 | ## [v1.4.0] - 2022-12-06 50 | 51 | - Drop py3.6 compatibility 52 | - Run tests on py3.11 53 | - Added type hints. 54 | 55 | 56 | ## [v1.3.2] - 2021-12-14 57 | 58 | - Backwards incompatibility through bug-fix: newick will not (incorrectly) parse 59 | invalid newick trees anymore, but raise `ValueError`. 60 | - Run tests on py 3.10 as well. 61 | 62 | 63 | ## [v1.3.1] - 2021-10-14 64 | 65 | Fixed support for node annotations for the case when annotations are between `:` and length. 66 | 67 | 68 | ## [v1.3.0] - 2021-05-04 69 | 70 | Added support for reading and writing of node annotations (in comments). 71 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ------------ 3 | 4 | Fork dlce-eva/python-newick and install the development environment: 5 | 6 | ```sh 7 | $ pip install virtualenv # might require sudo/admin privileges 8 | $ git clone https://github.com//python-newick.git 9 | $ cd python-newick 10 | $ python -m virtualenv .venv 11 | $ source .venv/bin/activate # Windows: .venv\Scripts\activate.bat 12 | $ pip install -r requirements.txt # installs the cloned version with dev-tools in development mode 13 | ``` 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src 2 | global-exclude *.py[co] 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-newick 2 | 3 | > [!IMPORTANT] 4 | > This project has been moved to https://gitlab.mpcdf.mpg.de/dlce-eva/python-newick 5 | 6 | python package to read and write the 7 | [Newick format](https://en.wikipedia.org/wiki/Newick_format). 8 | 9 | 10 | ## Reading Newick 11 | 12 | Since Newick specifies a format for a **set of trees**, all functions to read Newick return 13 | a `list` of `newick.Node` objects. 14 | 15 | - Reading from a string: 16 | ```python 17 | >>> from newick import loads 18 | >>> trees = loads('(A,B,(C,D)E)F;') 19 | >>> trees[0].name 20 | 'F' 21 | >>> [n.name for n in trees[0].descendants] 22 | ['A', 'B', 'E'] 23 | ``` 24 | 25 | - Reading from a `file`-like object: 26 | ```python 27 | >>> import io 28 | >>> from newick import load 29 | >>> with io.open('fname', encoding='utf8') as fp: 30 | ... trees = load(fp) 31 | ``` 32 | 33 | - Reading from a path: 34 | ```python 35 | >>> from newick import read 36 | >>> trees = read('fname') 37 | >>> import pathlib 38 | >>> trees = read(pathlib.Path('fname')) 39 | ``` 40 | 41 | ### Supported Newick dialects 42 | 43 | While the set of reserved characters in Newick (`;(),:`) is relatively small, it's still often 44 | seen as too restrictive, in particular when it comes to adding more data to tree nodes. Thus, Newick 45 | provides two mechanisms to overcome this restriction: 46 | - *quoted labels* to allow arbitrary text as node names, 47 | - *comments* enclosed in square brackets. 48 | 49 | 50 | #### Quoted node labels 51 | 52 | Node labels in Newick may be quoted (i.e. enclosed in single quotes `'`) to make it possible to 53 | add characters which are otherwise reserved. The `newick` package supports quoted labels. 54 | 55 | ```python 56 | >>> from newick import loads 57 | >>> print(loads("('A:B','C''D')'E(F)'")[0].ascii_art()) 58 | ┌─'A:B' 59 | ──'E(F)'─┤ 60 | └─'C''D' 61 | ``` 62 | 63 | When creating Newick trees programmatically, names can be quoted (if necessary) automatically: 64 | ```python 65 | >>> from newick import Node 66 | >>> print(Node("A(F')", auto_quote=True).name) 67 | 'A(F'')' 68 | >>> print(Node("A(F')", auto_quote=True).unquoted_name) 69 | A(F') 70 | ``` 71 | 72 | Note: `newick` provides no support to parse structured data from node labels (as it can be found 73 | in the trees distributed by the Genome Taxonomy Database). 74 | 75 | 76 | #### Additional information in comments 77 | 78 | The ["Newick specification"](http://biowiki.org/wiki/index.php/Newick_Format) states 79 | 80 | > Comments are enclosed in square brackets and may appear anywhere 81 | 82 | This has spawned a host of ad-hoc mechanisms to insert additional data into Newick trees. 83 | 84 | The `newick` package allows to deal with comments in two ways. 85 | 86 | - Ignoring comments: 87 | ```python 88 | >>> newick.loads('[a comment](a,b)c;', strip_comments=True)[0].newick 89 | '(a,b)c' 90 | ``` 91 | - Reading comments as node annotations: Several software packages use Newick comments to 92 | store node annotations, e.g. *BEAST, MrBayes or TreeAnnotator. Provided there are no 93 | comments in places where they cannot be interpreted as node annotations, `newick` supports 94 | reading and writing these annotations: 95 | ```python 96 | >>> newick.loads('(a[annotation],b)c;')[0].descendants[0].name 97 | 'a' 98 | >>> newick.loads('(a[annotation],b)c;')[0].descendants[0].comment 99 | 'annotation' 100 | >>> newick.loads('(a[annotation],b)c;')[0].newick 101 | '(a[annotation],b)c' 102 | ``` 103 | Annotations may come before and/or after the `:` which separates node label and length: 104 | - ```python 105 | >>> newick.loads('(a[annotation]:2,b)c;')[0].descendants[0].length 106 | 2.0 107 | >>> newick.loads('(a:[annotation]2,b)c;')[0].descendants[0].length 108 | 2.0 109 | >>> newick.loads('(a[annotation1]:[annotation2]2,b)c;')[0].descendants[0].comments 110 | ['annotation1', 'annotation2'] 111 | ``` 112 | 113 | Note that square brackets inside *quoted labels* will **not** be interpreted as comments 114 | or annotations: 115 | ```python 116 | >>> newick.loads("('a[label]',b)c;")[0].descendants[0].name 117 | "'a[label]'" 118 | >>> newick.loads("('a[label]',b)c;")[0].newick 119 | "('a[label]',b)c" 120 | ``` 121 | 122 | Some support for reading key-value data from node comments is available as well. If the comment 123 | format follows the [NHX](https://en.wikipedia.org/wiki/Newick_format#New_Hampshire_X_format) spec 124 | or the `&=,...`-format used e.g. by the MrBayes or BEAST software, additional data 125 | can be accessed from the `dict` `Node.properties`: 126 | ```python 127 | >>> newick.loads('(A,B)C[&&NHX:k1=v1:k2=v2];')[0].properties 128 | {'k1': 'v1', 'k2': 'v2'} 129 | ``` 130 | 131 | **Limitations:** 132 | 133 | - **Typed** node properties are not supported. I.e. values in `Node.properties` are 134 | always strings. Since typed properties tend to be specific to the application writing the newick, 135 | this level of support would require more knowledge of the creation context of the tree than can 136 | safely be inferred from the Newick string alone. 137 | ```python 138 | >>> newick.loads('(A,B)C[&range={1,5},support="100"];')[0].properties 139 | {'range': '{1,5}', 'support': '"100"'} 140 | ``` 141 | - Node annotations in comments are not completely round-trip-safe. In particular multiple comments 142 | per node may be lumped together (using `|` as separator) when serializing a Newick node: 143 | ```python 144 | >>> newick.loads('(a,b)c[c1][c2]:3')[0].newick 145 | '(a,b)c[c1|c2]:3' 146 | ``` 147 | 148 | 149 | ## Writing Newick 150 | 151 | In parallel to the read operations there are three functions to serialize a single `Node` object or a `list` of `Node` 152 | objects to Newick format: 153 | - `dumps(trees) -> str` 154 | - `dump(trees, fp)` 155 | - `write(trees, 'fname')` 156 | 157 | A tree may be assembled using the factory methods of the `Node` class: 158 | - `Node.__init__` 159 | - `Node.create` 160 | - `Node.add_descendant` 161 | 162 | 163 | ## Manipulating trees 164 | 165 | - Displaying tree topology in the terminal: 166 | ```python 167 | >>> import newick 168 | >>> tree = newick.loads('(b,(c,(d,(e,(f,g))h)i)a)')[0] 169 | >>> print(tree.ascii_art()) 170 | ┌─b 171 | ────┤ 172 | │ ┌─c 173 | └─a─┤ 174 | │ ┌─d 175 | └─i─┤ 176 | │ ┌─e 177 | └─h─┤ 178 | │ ┌─f 179 | └───┤ 180 | └─g 181 | ``` 182 | - Pruning trees: The example below prunes the tree such that `b`, `c` and `i` are the only 183 | remaining leafs. 184 | ```python 185 | >>> tree.prune_by_names(['b', 'c', 'i'], inverse=True) 186 | >>> print(tree.ascii_art()) 187 | ┌─b 188 | ────┤ 189 | │ ┌─c 190 | └─a─┤ 191 | └─i 192 | ``` 193 | - Running a callable on a filtered set of nodes: 194 | ```python 195 | >>> tree.visit(lambda n: setattr(n, 'name', n.name.upper()), lambda n: n.name in ['a', 'b']) 196 | >>> print(tree.ascii_art()) 197 | ┌─B 198 | ────┤ 199 | │ ┌─c 200 | └─A─┤ 201 | └─i 202 | ``` 203 | - Removing (topologically) redundant internal nodes: 204 | ```python 205 | >>> tree.prune_by_names(['B', 'c'], inverse=True) 206 | >>> print(tree.ascii_art()) 207 | ┌─B 208 | ────┤ 209 | └─A ──c 210 | >>> tree.remove_redundant_nodes(keep_leaf_name=True) 211 | >>> print(tree.ascii_art()) 212 | ┌─B 213 | ────┤ 214 | └─c 215 | ``` 216 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | 2 | Releasing python-newick 3 | ======================= 4 | 5 | Clone dlce-eva/python-newick and switch to the master branch. Then: 6 | 7 | - Do platform test via tox: 8 | ```shell 9 | $ tox -r 10 | ``` 11 | Make sure statement coverage is at 100% 12 | 13 | - Make sure flake8 passes:: 14 | ```shell 15 | $ flake8 src 16 | ``` 17 | 18 | - Change the version to the new version number in 19 | - `setup.cfg` 20 | - `src/newick.py` 21 | - and note changes in `CHANGELOG.md` 22 | 23 | - Create the release commit: 24 | ```shell 25 | git commit -a -m "release " 26 | ``` 27 | 28 | - Create a release tag: 29 | ```shell 30 | git tag -a v -m " release" 31 | ``` 32 | 33 | - Release to PyPI: 34 | ```shell 35 | git checkout tags/v 36 | rm dist/* 37 | python -m build -n 38 | twine upload dist/* 39 | ``` 40 | 41 | - Push to github: 42 | ```shell 43 | git push origin 44 | git push --tags origin 45 | ``` 46 | 47 | - Append `.dev0` to the version number for the new development cycle in 48 | - `setup.cfg` 49 | - `src/newick.py` 50 | 51 | - Commit/push the version change: 52 | ```shell 53 | git commit -a -m "bump version for development" 54 | git push origin 55 | ``` 56 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # development environment: install in development mode 2 | -e .[dev,test] 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = newick 3 | version = 1.9.1.dev0 4 | 5 | author = Robert Forkel 6 | author_email = robert_forkel@eva.mpg.de 7 | description = A python module to read and write the Newick format 8 | long_description = file: README.md 9 | long_description_content_type = text/markdown 10 | license = Apache 2.0 11 | license_files = LICENSE 12 | url = https://github.com/dlce-eva/python-newick 13 | project_urls = 14 | Bug Tracker = https://github.com/dlce-eva/python-newick/issues 15 | platforms = any 16 | classifiers = 17 | Development Status :: 5 - Production/Stable 18 | Intended Audience :: Developers 19 | Intended Audience :: Science/Research 20 | Natural Language :: English 21 | Operating System :: OS Independent 22 | Programming Language :: Python :: 3 23 | Programming Language :: Python :: 3.8 24 | Programming Language :: Python :: 3.9 25 | Programming Language :: Python :: 3.10 26 | Programming Language :: Python :: 3.11 27 | Programming Language :: Python :: 3.12 28 | Programming Language :: Python :: 3.13 29 | Programming Language :: Python :: Implementation :: CPython 30 | Programming Language :: Python :: Implementation :: PyPy 31 | License :: OSI Approved :: Apache Software License 32 | 33 | [options] 34 | zip_safe = False 35 | py_modules = 36 | newick 37 | packages = find: 38 | package_dir = 39 | = src 40 | python_requires = >=3.8 41 | install_requires = 42 | include_package_data = True 43 | 44 | [options.packages.find] 45 | where = src 46 | 47 | [options.extras_require] 48 | dev = 49 | build 50 | tox 51 | flake8 52 | wheel>=0.36 53 | twine 54 | test = 55 | pytest>=3.6 56 | pytest-mock 57 | pytest-cov 58 | coverage>=4.2 59 | 60 | [bdist_wheel] 61 | universal = 1 62 | 63 | [flake8] 64 | ignore = E711,E712,D100,D101,D103,D102,D301 65 | max-line-length = 100 66 | exclude = .tox 67 | 68 | [tool:pytest] 69 | minversion = 5 70 | testpaths = tests 71 | addopts = --cov 72 | markers = 73 | slow: mark test as slow. 74 | 75 | [easy_install] 76 | zip_ok = false 77 | 78 | [coverage:run] 79 | source = 80 | newick 81 | tests 82 | 83 | [coverage:report] 84 | show_missing = true 85 | 86 | [tox:tox] 87 | envlist = py38, py39, py310, py311, py312, py313 88 | isolated_build = true 89 | skip_missing_interpreter = true 90 | 91 | [testenv] 92 | deps = .[test] 93 | commands = pytest {posargs} 94 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup() 5 | -------------------------------------------------------------------------------- /src/newick.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functionality to read and write the Newick serialization format for trees. 3 | 4 | .. seealso:: https://en.wikipedia.org/wiki/Newick_format 5 | """ 6 | import re 7 | import enum 8 | import typing 9 | import pathlib 10 | import itertools 11 | import dataclasses 12 | 13 | __version__ = "1.9.1.dev0" 14 | 15 | QUOTE = "'" 16 | ESCAPE = {"'", "\\"} 17 | COMMENT = {'[': 1, ']': -1} 18 | WHITESPACE = '\t\r\n ' 19 | 20 | 21 | class TokenType(enum.Enum): 22 | WORD = 1 23 | QWORD = 2 # A quoted string 24 | COMMENT = 3 25 | WHITESPACE = 4 26 | OBRACE = 5 # Opening brace 27 | CBRACE = 6 # Closing brace 28 | COMMA = 7 29 | COLON = 8 30 | SEMICOLON = 9 31 | 32 | 33 | RESERVED_PUNCTUATION = { 34 | ":": TokenType.COLON, 35 | ";": TokenType.SEMICOLON, 36 | ",": TokenType.COMMA, 37 | "(": TokenType.OBRACE, 38 | ")": TokenType.CBRACE, 39 | } 40 | RP_PATTERN = re.compile('|'.join(re.escape(c) for c in RESERVED_PUNCTUATION)) 41 | 42 | 43 | def _iter_properties(c): 44 | """ 45 | Parse key-value properties from known comment formats. 46 | """ 47 | NHX_KV_PATTERN = re.compile(r':(?P[^=]+)=(?P[^:]+)') 48 | if c.startswith('&&NHX'): 49 | c = c[5:] 50 | m = NHX_KV_PATTERN.match(c) 51 | while m: 52 | yield (m.groupdict()['key'], m.groupdict()['value']) 53 | c = c[m.end():] 54 | m = NHX_KV_PATTERN.match(c) 55 | elif c.startswith('&'): 56 | # MrBayes comment. 57 | kv = [] 58 | inquote, bracketlevel = False, 0 59 | for cc in c[1:]: 60 | if cc == ',': 61 | if not (inquote or bracketlevel != 0): 62 | assert kv 63 | k, _, v = ''.join(kv).partition('=') 64 | yield k, v 65 | kv = [] 66 | inquote, bracketlevel = False, 0 67 | continue 68 | elif cc == '{': 69 | bracketlevel += 1 70 | elif cc == '}': 71 | bracketlevel -= 1 72 | elif cc == '"': 73 | inquote = not inquote 74 | kv.append(cc) 75 | if kv: 76 | k, _, v = ''.join(kv).partition('=') 77 | yield k, v 78 | 79 | 80 | def length_parser(x: str) -> float: 81 | return float(x or 0.0) 82 | 83 | 84 | def length_formatter(x: float) -> str: 85 | return '%s' % x 86 | 87 | 88 | def check_string(n, type_): 89 | if RP_PATTERN.search(n) or any(c in n for c in WHITESPACE): 90 | raise ValueError('"{}" may not appear in {}'.format(RESERVED_PUNCTUATION.keys(), type_)) 91 | 92 | 93 | class Node: 94 | """ 95 | A Node may be a tree, a subtree or a leaf. 96 | 97 | :ivar typing.Optional[Node] ancestor: `None` if the node is the root node of a tree. 98 | :ivar typing.List[Node] descendants: List of immediate children of the node. 99 | """ 100 | def __init__(self, 101 | name: typing.Optional[str] = None, 102 | length: typing.Optional[typing.Union[str, float]] = None, 103 | comment: typing.Optional[str] = None, 104 | comments: typing.Optional[list] = None, 105 | descendants: typing.Optional[typing.Iterable] = None, 106 | auto_quote: bool = False, 107 | **kw): 108 | """ 109 | :param name: Node label. 110 | :param length: Branch length from the new node to its parent. 111 | :param auto_quote: Optional flag specifying whether the node name should be quoted if \ 112 | necessary. 113 | :param kw: Recognized keyword arguments:\ 114 | `length_parser`: Custom parser for the `length` attribute of a Node.\ 115 | `length_formatter`: Custom formatter for the branch length when formatting a\ 116 | Node as Newick string. 117 | """ 118 | self._auto_quote = auto_quote 119 | self.name = name 120 | self.comments = comments or ([comment] if comment else []) 121 | self.descendants = descendants or [] 122 | self.ancestor = None 123 | self._length_parser = kw.pop('length_parser', length_parser) 124 | self._length_formatter = kw.pop('length_formatter', length_formatter) 125 | self._colon_before_comment = kw.pop('colon_before_comment', False) 126 | self.length = length 127 | 128 | @property 129 | def properties(self) -> dict: 130 | res = {} 131 | for comment in self.comments: 132 | res.update(list(_iter_properties(comment))) 133 | return res 134 | 135 | @property 136 | def name(self) -> str: 137 | return self._name 138 | 139 | @name.setter 140 | def name(self, n): 141 | quoted = n and n.startswith(QUOTE) and n.endswith(QUOTE) 142 | 143 | if (not quoted) and self._auto_quote and \ 144 | any(char in n for char in ''.join(RESERVED_PUNCTUATION) + QUOTE + WHITESPACE): 145 | n = "{}{}{}".format(QUOTE, n.replace("'", "''"), QUOTE) 146 | quoted = True 147 | 148 | if n and not quoted: 149 | check_string(n, 'unquoted string') 150 | self._name = n 151 | 152 | def __repr__(self): 153 | return 'Node("%s")' % self.name 154 | 155 | @property 156 | def unquoted_name(self) -> str: 157 | n = self.name 158 | if n and n.startswith(QUOTE) and n.endswith(QUOTE): 159 | n = n[1:-1] 160 | for esc in ESCAPE: 161 | n = n.replace(esc + QUOTE, QUOTE) 162 | return n 163 | 164 | @property 165 | def length(self) -> float: 166 | return self._length_parser(self._length) 167 | 168 | @length.setter 169 | def length(self, length_: float): 170 | if length_ is None: 171 | self._length = length_ 172 | else: 173 | if isinstance(length_, str): 174 | length_ = length_.strip() 175 | check_string(length_, 'branch length') 176 | self._length = self._length_formatter(length_) 177 | 178 | @property 179 | def comment(self): # Backwards compatibility. 180 | return self.comments[0] if self.comments else None 181 | 182 | @classmethod 183 | def create(cls, **kw) -> 'Node': # Backwards compatibility. 184 | return cls(**kw) 185 | 186 | @property 187 | def descendants(self) -> typing.Iterable['Node']: 188 | return self._descendants 189 | 190 | @descendants.setter 191 | def descendants(self, nodes: typing.Iterable): 192 | self._descendants = [] 193 | for node in nodes: 194 | self.add_descendant(node) 195 | 196 | def add_descendant(self, node: 'Node'): 197 | node.ancestor = self 198 | self._descendants.append(node) 199 | 200 | @property 201 | def newick(self) -> str: 202 | """The representation of the Node in Newick format.""" 203 | colon_done = False 204 | label = self.name or '' 205 | if self.comments: 206 | if self._length and len(self.comments) == 2 and not self._colon_before_comment: 207 | # We assume that's the variant where one comment comes before and one after the ":". 208 | label += '[{}]:[{}]'.format(*self.comments) 209 | colon_done = True 210 | else: 211 | if self._length and self._colon_before_comment: 212 | label += ':' 213 | colon_done = True 214 | label += '[{}]'.format('|'.join(self.comments)) 215 | if self._length: 216 | if not colon_done: 217 | label += ':' 218 | label += self._length 219 | descendants = ','.join([n.newick for n in self.descendants]) 220 | if descendants: 221 | descendants = '(' + descendants + ')' 222 | return descendants + label 223 | 224 | def _ascii_art(self, char1='\u2500', show_internal=True, maxlen=None): 225 | if maxlen is None: 226 | maxlen = max( 227 | len((n.name or '') + ' ') for n in self.walk() if show_internal or n.is_leaf) 228 | pad = ' ' * (maxlen - 1) 229 | namestr = '\u2500' + (self.name or '') 230 | 231 | if self.descendants: 232 | mids = [] 233 | result = [] 234 | for i, c in enumerate(self.descendants): 235 | if len(self.descendants) == 1: 236 | char2 = '\u2500' 237 | elif i == 0: 238 | char2 = '\u250c' 239 | elif i == len(self.descendants) - 1: 240 | char2 = '\u2514' 241 | else: 242 | char2 = '\u2500' 243 | clines, mid = c._ascii_art( 244 | char1=char2, show_internal=show_internal, maxlen=maxlen) 245 | mids.append(mid + len(result)) 246 | result.extend(clines) 247 | result.append('') 248 | result.pop() 249 | lo, hi, end = mids[0], mids[-1], len(result) 250 | prefixes = [pad] * (lo + 1) +\ 251 | [pad + '\u2502'] * (hi - lo - 1) + \ 252 | [pad] * (end - hi) 253 | mid = (lo + hi) // 2 254 | prefixes[mid] = char1 + '\u2500' * (len(prefixes[mid]) - 2) + prefixes[mid][-1] 255 | result = [p + l for p, l in zip(prefixes, result)] 256 | if show_internal: 257 | stem = result[mid] 258 | result[mid] = stem[0] + namestr + stem[len(namestr) + 1:] 259 | return result, mid 260 | return [char1 + namestr], 0 261 | 262 | def ascii_art(self, strict: bool = False, show_internal: bool = True) -> str: 263 | r""" 264 | Return a unicode string representing a tree in ASCII art fashion. 265 | 266 | :param strict: Use ASCII characters strictly (for the tree symbols). 267 | :param show_internal: Show labels of internal nodes. 268 | :return: unicode string 269 | 270 | >>> node = loads('((A,B)C,((D,E)F,G,H)I)J;')[0] 271 | >>> print(node.ascii_art(show_internal=False, strict=True)) 272 | /-A 273 | /---| 274 | | \-B 275 | ----| /-D 276 | | /---| 277 | | | \-E 278 | \---| 279 | |-G 280 | \-H 281 | """ 282 | cmap = { 283 | '\u2500': '-', 284 | '\u2502': '|', 285 | '\u250c': '/', 286 | '\u2514': '\\', 287 | '\u251c': '|', 288 | '\u2524': '|', 289 | '\u253c': '+', 290 | } 291 | 292 | def normalize(line): 293 | m = re.compile(r'(?<=\u2502)(?P\s+)(?=[\u250c\u2514\u2502])') 294 | line = m.sub(lambda m: m.group('s')[1:], line) 295 | line = re.sub('\u2500\u2502', '\u2500\u2524', line) # -| 296 | line = re.sub('\u2502\u2500', '\u251c', line) # |- 297 | line = re.sub('\u2524\u2500', '\u253c', line) # -|- 298 | if strict: 299 | for u, a in cmap.items(): 300 | line = line.replace(u, a) 301 | return line 302 | return '\n'.join( 303 | normalize(line) for line in self._ascii_art(show_internal=show_internal)[0] 304 | if set(line) != {' ', '\u2502'}) # remove lines of only spaces and pipes 305 | 306 | @property 307 | def is_leaf(self) -> bool: 308 | return not bool(self.descendants) 309 | 310 | @property 311 | def is_binary(self) -> bool: 312 | return all([len(n.descendants) in (0, 2) for n in self.walk()]) 313 | 314 | def walk(self, mode=None) -> typing.Generator['Node', None, None]: 315 | """ 316 | Traverses the (sub)tree rooted at self, yielding each visited Node. 317 | 318 | .. seealso:: https://en.wikipedia.org/wiki/Tree_traversal 319 | 320 | :param mode: Specifies the algorithm to use when traversing the subtree rooted \ 321 | at self. `None` for breadth-first, `'postorder'` for post-order depth-first \ 322 | search. 323 | :return: Generator of the visited Nodes. 324 | """ 325 | if mode == 'postorder': 326 | for n in self._postorder(): 327 | yield n 328 | else: # default to a breadth-first search 329 | yield self 330 | for node in self.descendants: 331 | for n in node.walk(): 332 | yield n 333 | 334 | def visit(self, 335 | visitor: typing.Callable[['Node'], None], 336 | predicate: typing.Optional[typing.Callable[['Node'], bool]] = None, 337 | **kw): 338 | """ 339 | Apply a function to matching nodes in the (sub)tree rooted at self. 340 | 341 | :param visitor: A callable accepting a Node object as single argument.. 342 | :param predicate: A callable accepting a Node object as single argument and \ 343 | returning a boolean signaling whether Node matches; if `None` all nodes match. 344 | :param kw: Addtional keyword arguments are passed through to self.walk. 345 | """ 346 | predicate = predicate or bool 347 | 348 | for n in self.walk(**kw): 349 | if predicate(n): 350 | visitor(n) 351 | 352 | def rename(self, auto_quote: bool = False, **names: str) -> 'Node': 353 | """ 354 | Rename nodes according to the mapping `names`. 355 | 356 | :param auto_quote: Flag signaling whether to quote names if necessary before renaming. 357 | :param names: Mapping of old names to new names. 358 | :return: The `Node` with updated names. 359 | """ 360 | if auto_quote: 361 | names = {k: Node(v, auto_quote=True).name for k, v in names.items()} 362 | 363 | def visitor(node): 364 | if node.name in names: 365 | node.name = names[node.name] 366 | elif node.unquoted_name in names: 367 | node.name = names[node.unquoted_name] 368 | 369 | self.visit(visitor) 370 | return self 371 | 372 | def strip_comments(self) -> 'Node': 373 | """ 374 | Remove comments from all nodes of a tree. 375 | 376 | .. code-block:: python 377 | 378 | >>> n = newick.loads("(a[comment],b)c;")[0] 379 | >>> n.strip_comments().newick 380 | '(a,b)c' 381 | """ 382 | def strip(n): 383 | n.comments = [] 384 | self.visit(strip) 385 | return self 386 | 387 | def _postorder(self): 388 | stack = [self] 389 | descendant_map = {id(node): [n for n in node.descendants] for node in self.walk()} 390 | 391 | while stack: 392 | node = stack[-1] 393 | descendants = descendant_map[id(node)] 394 | 395 | # if we are at a leave-node, we remove the item from the stack 396 | if not descendants: 397 | stack.pop() 398 | yield node 399 | if stack: 400 | descendant_map[id(stack[-1])].pop(0) 401 | else: 402 | stack.append(descendants[0]) 403 | 404 | def get_leaves(self) -> typing.List['Node']: 405 | """ 406 | Get all the leaf nodes of the subtree descending from this node. 407 | 408 | :return: List of Nodes with no descendants. 409 | """ 410 | return [n for n in self.walk() if n.is_leaf] 411 | 412 | def get_node(self, label: str) -> 'Node': 413 | """ 414 | Gets the specified node by name. 415 | 416 | :return: Node or None if name does not exist in tree 417 | """ 418 | for n in self.walk(): 419 | if n.name == label: 420 | return n 421 | 422 | def get_leaf_names(self) -> typing.List[str]: 423 | """ 424 | Get the names of all the leaf nodes of the subtree descending from 425 | this node. 426 | 427 | :return: List of names of Nodes with no descendants. 428 | """ 429 | return [n.name for n in self.get_leaves()] 430 | 431 | def prune(self, nodes: typing.List['Node'], inverse: bool = False): 432 | """ 433 | Remove all those nodes in the specified list, or if inverse=True, 434 | remove all those nodes not in the specified list. The specified nodes 435 | must be distinct from the root node. 436 | 437 | :param nodes: A list of Node objects 438 | :param inverse: Specifies whether to remove nodes in the list or not in the list. 439 | """ 440 | self.visit( 441 | lambda n: n.ancestor.descendants.remove(n), 442 | # We won't prune the root node, even if it is a leave and requested to 443 | # be pruned! 444 | lambda n: ((not inverse and n in nodes) or # noqa: W504 445 | (inverse and n.is_leaf and n not in nodes)) and n.ancestor, 446 | mode="postorder") 447 | 448 | def prune_by_names(self, node_names: typing.List[str], inverse: bool = False): 449 | """ 450 | Perform an (inverse) prune, with leaves specified by name. 451 | :param node_names: A list of Node names (strings) 452 | :param inverse: Specifies whether to remove nodes in the list or not in the list. 453 | """ 454 | self.prune([n for n in self.walk() if n.name in node_names], inverse) 455 | 456 | def remove_redundant_nodes(self, preserve_lengths: bool = True, keep_leaf_name: bool = False): 457 | """ 458 | Remove all nodes which have only a single child, and attach their 459 | grandchildren to their parent. The resulting tree has the minimum 460 | number of internal nodes required for the number of leaves. 461 | 462 | :param preserve_lengths: If `True`, branch lengths of removed nodes are \ 463 | added to those of their children. 464 | :param keep_leave_name: If `True`, the name of the leaf on a branch with redundant \ 465 | nodes will be kept; otherwise, the name of the node closest to the root will be used. 466 | """ 467 | for n in self.walk(mode='postorder'): 468 | while n.ancestor and len(n.ancestor.descendants) == 1: 469 | grandfather = n.ancestor.ancestor 470 | father = n.ancestor 471 | if preserve_lengths: 472 | n.length += father.length 473 | if keep_leaf_name: 474 | father.name = n.name 475 | 476 | if grandfather: 477 | for i, child in enumerate(grandfather.descendants): 478 | if child is father: 479 | del grandfather.descendants[i] 480 | grandfather.add_descendant(n) 481 | father.ancestor = None 482 | else: 483 | self.descendants = n.descendants 484 | if preserve_lengths: 485 | self.length = n.length 486 | 487 | def resolve_polytomies(self): 488 | """ 489 | Insert additional nodes with length=0 into the subtree in such a way 490 | that all non-leaf nodes have only 2 descendants, i.e. the tree becomes 491 | a fully resolved binary tree. 492 | """ 493 | def _resolve_polytomies(n): 494 | new = Node(length=self._length_formatter(self._length_parser('0'))) 495 | while len(n.descendants) > 1: 496 | new.add_descendant(n.descendants.pop()) 497 | n.descendants.append(new) 498 | 499 | self.visit(_resolve_polytomies, lambda n: len(n.descendants) > 2) 500 | 501 | def remove_names(self): 502 | """ 503 | Set the name of all nodes in the subtree to None. 504 | """ 505 | self.visit(lambda n: setattr(n, 'name', None)) 506 | 507 | def remove_internal_names(self): 508 | """ 509 | Set the name of all non-leaf nodes in the subtree to None. 510 | """ 511 | self.visit(lambda n: setattr(n, 'name', None), lambda n: not n.is_leaf) 512 | 513 | def remove_leaf_names(self): 514 | """ 515 | Set the name of all leaf nodes in the subtree to None. 516 | """ 517 | self.visit(lambda n: setattr(n, 'name', None), lambda n: n.is_leaf) 518 | 519 | def remove_lengths(self): 520 | """ 521 | Set the length of all nodes in the subtree to None. 522 | """ 523 | self.visit(lambda n: setattr(n, 'length', None)) 524 | 525 | 526 | def loads(s: typing.Union[str, typing.Iterable[str]], strip_comments: bool = False, **kw) \ 527 | -> typing.List[Node]: 528 | """ 529 | Load a list of trees from a Newick formatted string. 530 | 531 | :param s: Newick formatted string. 532 | :param strip_comments: Flag signaling whether to strip comments enclosed in square \ 533 | brackets. 534 | :param kw: Keyword arguments are passed through to `Node.create`. 535 | :return: List of Node objects. 536 | """ 537 | return [ns.to_node() for ns in NewickString(s).iter_subtrees(strip_comments=strip_comments)] 538 | 539 | 540 | def dumps(trees: typing.Union[Node, typing.Iterable[Node]]) -> str: 541 | """ 542 | Serialize a list of trees in Newick format. 543 | 544 | :param trees: List of Node objects or a single Node object. 545 | :return: Newick formatted string. 546 | """ 547 | if isinstance(trees, Node): 548 | trees = [trees] 549 | return ';\n'.join([tree.newick for tree in trees]) + ';' 550 | 551 | 552 | def load(fp, strip_comments=False, **kw) -> typing.List[Node]: 553 | """ 554 | Load a list of trees from an open Newick formatted file. 555 | 556 | :param fp: open file handle. 557 | :param strip_comments: Flag signaling whether to strip comments enclosed in square \ 558 | brackets. 559 | :param kw: Keyword arguments are passed through to `Node.create`. 560 | :return: List of Node objects. 561 | """ 562 | kw['strip_comments'] = strip_comments 563 | return loads(itertools.chain.from_iterable(fp), **kw) 564 | 565 | 566 | def dump(tree: typing.Union[Node, typing.Iterable[Node]], fp): 567 | fp.write(dumps(tree)) 568 | 569 | 570 | def read(fname, encoding='utf8', strip_comments=False, **kw) -> typing.List[Node]: 571 | """ 572 | Load a list of trees from a Newick formatted file. 573 | 574 | :param fname: file path. 575 | :param strip_comments: Flag signaling whether to strip comments enclosed in square \ 576 | brackets. 577 | :param kw: Keyword arguments are passed through to `Node.create`. 578 | :return: List of Node objects. 579 | """ 580 | kw['strip_comments'] = strip_comments 581 | with pathlib.Path(fname).open(encoding=encoding) as fp: 582 | return load(fp, **kw) 583 | 584 | 585 | def write(tree: typing.Union[Node, typing.Iterable[Node]], fname, encoding='utf8'): 586 | with pathlib.Path(fname).open(encoding=encoding, mode='w') as fp: 587 | dump(tree, fp) 588 | 589 | 590 | @dataclasses.dataclass 591 | class Token: 592 | """ 593 | We parse Newick in one pass, storing the data as list of tokens with enough 594 | information to extract relevant parts from this list lateron. 595 | """ 596 | __slots__ = [ 597 | 'char', 598 | 'level', 599 | 'type', 600 | ] 601 | char: str # The character, i.e. string of length 1. 602 | type: TokenType 603 | level: int # How deep the character is nested in the tree. 604 | 605 | 606 | class NewickString(list): 607 | """ 608 | A list of tokens with methods to access newick constituents. 609 | """ 610 | def __init__(self, s: typing.Union[str, typing.Iterable, typing.List[Token]]): 611 | list.__init__(self, s if isinstance(s, list) else []) 612 | 613 | if not isinstance(s, list): 614 | if isinstance(s, str): 615 | s = iter(s) 616 | word, lookahead, level, inquote, incomment = [], None, 0, False, False 617 | 618 | while 1: 619 | try: 620 | c = lookahead or next(s) # Read the data one character at a time. 621 | lookahead = None 622 | 623 | # An unparsed string. We must convert it to a list of tokens. 624 | if c == QUOTE: # Start of quoted string - we read to the end immediately. 625 | inquote, doublequote = True, False 626 | n = [c] # Accumulate all characters within quotes. 627 | while 1: 628 | c = lookahead or next(s) 629 | lookahead = None 630 | while c not in ESCAPE: 631 | n.append(c) 632 | c = next(s) 633 | 634 | n.append(c) 635 | if doublequote and c == QUOTE: # The escaped quote. 636 | doublequote = False 637 | else: 638 | try: # Check if this is the escape character for a following quote: 639 | lookahead = next(s) 640 | except StopIteration: 641 | lookahead = None 642 | if lookahead == QUOTE: 643 | doublequote = True # Yep, mark it. 644 | else: # End of quoted string 645 | inquote = False 646 | self.append(Token(''.join(n), TokenType.QWORD, level)) 647 | break 648 | continue 649 | 650 | if c == '[': # Start of a comment - we read to the end immediately. 651 | incomment, commentlevel = True, 1 652 | n = [c] # Accumulate all characters in the comment. 653 | while 1: 654 | c = next(s) 655 | while c not in COMMENT: 656 | n.append(c) 657 | c = next(s) 658 | n.append(c) 659 | commentlevel += COMMENT[c] 660 | if commentlevel == 0: # End of comment. 661 | incomment = False 662 | self.append(Token(''.join(n), TokenType.COMMENT, level)) 663 | break 664 | continue 665 | 666 | if c in WHITESPACE: 667 | # Outside of quotes and comments, whitespace splits words. 668 | if word: 669 | self.append(Token(''.join(word), TokenType.WORD, level)) 670 | word = [] 671 | self.append(Token(c, TokenType.WHITESPACE, level)) 672 | continue 673 | 674 | if c == ']': 675 | raise ValueError('invalid comment nesting') 676 | 677 | if c in RESERVED_PUNCTUATION: 678 | # Punctuation separates words: 679 | if word: 680 | self.append(Token(''.join(word), TokenType.WORD, level)) 681 | word = [] 682 | 683 | # Outside of quoted strings and comments we keep track of node nesting. 684 | # Note: The enclosing brackets have lower level than the content. 685 | if c == ')': 686 | level -= 1 687 | if level < 0: 688 | raise ValueError('invalid brace nesting') 689 | self.append(Token(c, TokenType.CBRACE, level)) 690 | continue 691 | 692 | if c == '(': 693 | self.append(Token(c, TokenType.OBRACE, level)) 694 | level += 1 695 | continue 696 | 697 | self.append(Token(c, RESERVED_PUNCTUATION[c], level)) 698 | continue 699 | 700 | word.append(c) # All other characters are just accumulated into a word. 701 | except StopIteration: 702 | if inquote: 703 | raise ValueError('Unterminated quote!') 704 | if incomment: 705 | raise ValueError('Unterminated comment!') 706 | break 707 | if word: 708 | self.append(Token(''.join(word), TokenType.WORD, level)) 709 | 710 | # The minimal bracket level of the list of tokens: 711 | # This becomes important when splitting a NewickString into nodes by - essentially - 712 | # subsetting the token list. 713 | self.minlevel = self[-1].level if self else 0 714 | 715 | def to_node(self) -> Node: 716 | # Parse label and length of the root node: 717 | tokens = list( 718 | itertools.takewhile(lambda t: t.level == self.minlevel, reversed(self))) 719 | if tokens and tokens[-1].type == TokenType.CBRACE: 720 | tokens = tokens[:-1] 721 | tokens.reverse() 722 | 723 | name, length, comments = [], [], [] 724 | # We store the index of the colon and of the first comment: 725 | icolon, icomment = -1, -1 726 | 727 | for i, t in enumerate(t for t in tokens if t.type != TokenType.WHITESPACE): 728 | if t.type == TokenType.COLON: 729 | icolon = i 730 | else: 731 | if t.type == TokenType.COMMENT: 732 | comments.append(t.char) 733 | if icomment == -1: 734 | icomment = i 735 | else: 736 | if icolon == -1: 737 | name.append(t.char) 738 | else: 739 | length.append(t.char) 740 | if len(name) > 1: 741 | raise ValueError('Node names must not contain whitespace or punctuation') 742 | 743 | return Node.create( 744 | name=''.join(name).strip() or None, 745 | length=''.join(length) or None, 746 | comments=[c[1:-1] for c in comments], 747 | colon_before_comment=icolon < icomment, 748 | descendants=[d.to_node() for d in self.iter_descendants()]) 749 | 750 | def iter_descendants(self) -> typing.Generator['NewickString', None, None]: 751 | tokens, comma = [], False 752 | for t in self: 753 | if t.type == TokenType.COMMA and t.level == self.minlevel + 1: 754 | comma = True 755 | yield NewickString(tokens) 756 | tokens = [] 757 | elif t.level > self.minlevel: 758 | tokens.append(t) 759 | if comma or tokens: 760 | yield NewickString(tokens) 761 | 762 | def iter_subtrees(self, strip_comments=False) -> typing.Generator['NewickString', None, None]: 763 | def checked(t): 764 | if t: 765 | if t[0].level != t[-1].level: 766 | raise ValueError('different number of opening and closing braces') 767 | return NewickString(t) 768 | 769 | tokens = [] 770 | for t in self: 771 | if t.type == TokenType.SEMICOLON: 772 | yield checked(tokens) 773 | tokens = [] 774 | continue 775 | if not (strip_comments and t.type == TokenType.COMMENT): 776 | tokens.append(t) 777 | 778 | if tokens: 779 | yield checked(tokens) 780 | -------------------------------------------------------------------------------- /tests/fixtures/mrbayes.nwk: -------------------------------------------------------------------------------- 1 | (1[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.322576e-02[&length_mean=1.32336084e-02,length_median=1.32257600e-02,length_95%HPD={1.25875600e-02,1.38462600e-02}],38[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.042862e-02[&length_mean=1.04380427e-02,length_median=1.04286200e-02,length_95%HPD={9.86784000e-03,1.09835100e-02}],((2[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.805922e-02[&length_mean=1.80632043e-02,length_median=1.80592200e-02,length_95%HPD={1.73201700e-02,1.88186900e-02}],((((3[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.048291e-02[&length_mean=1.04814432e-02,length_median=1.04829100e-02,length_95%HPD={9.87788200e-03,1.09683200e-02}],14[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.292925e-02[&length_mean=2.29403531e-02,length_median=2.29292500e-02,length_95%HPD={2.20497700e-02,2.38873300e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.973374e-04[&length_mean=9.09345541e-04,length_median=8.97337400e-04,length_95%HPD={6.86192600e-04,1.13926900e-03}],((((4[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.414139e-03[&length_mean=4.42206340e-03,length_median=4.41413900e-03,length_95%HPD={4.10106000e-03,4.83418900e-03}],(11[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.352973e-03[&length_mean=1.35605558e-03,length_median=1.35297300e-03,length_95%HPD={1.15930500e-03,1.58211200e-03}],12[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.142089e-03[&length_mean=2.14423965e-03,length_median=2.14208900e-03,length_95%HPD={1.90138300e-03,2.41811300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.877575e-03[&length_mean=1.88696637e-03,length_median=1.87757500e-03,length_95%HPD={1.65189200e-03,2.13232700e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.150098e-03[&length_mean=1.15605113e-03,length_median=1.15009800e-03,length_95%HPD={9.33346600e-04,1.33154600e-03}],29[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.441717e-03[&length_mean=8.45061282e-03,length_median=8.44171700e-03,length_95%HPD={7.93418600e-03,8.93661800e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.195597e-03[&length_mean=4.18184831e-03,length_median=4.19559700e-03,length_95%HPD={3.80772900e-03,4.53813300e-03}],25[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.345199e-03[&length_mean=5.34523972e-03,length_median=5.34519900e-03,length_95%HPD={4.97863800e-03,5.75723100e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.308626e-03[&length_mean=1.31441717e-03,length_median=1.30862600e-03,length_95%HPD={1.07893600e-03,1.55931100e-03}],((((((((((((6[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:7.997664e-04[&length_mean=8.05280460e-04,length_median=7.99766400e-04,length_95%HPD={6.54704800e-04,9.65665600e-04}],9[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.378679e-03[&length_mean=2.38636301e-03,length_median=2.37867900e-03,length_95%HPD={2.13390100e-03,2.69463100e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.072251e-04[&length_mean=6.09333459e-04,length_median=6.07225100e-04,length_95%HPD={4.75384500e-04,7.50724300e-04}],30[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.075382e-03[&length_mean=2.06860747e-03,length_median=2.07538200e-03,length_95%HPD={1.81880900e-03,2.31710900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.506152e-05[&length_mean=8.62308455e-05,length_median=8.50615200e-05,length_95%HPD={3.35563500e-05,1.33873700e-04}],((8[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.372926e-03[&length_mean=1.37373203e-03,length_median=1.37292600e-03,length_95%HPD={1.18738400e-03,1.57559300e-03}],27[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.601160e-03[&length_mean=2.60487960e-03,length_median=2.60116000e-03,length_95%HPD={2.30852500e-03,2.86024300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.509489e-04[&length_mean=6.52791198e-04,length_median=6.50948900e-04,length_95%HPD={5.09931000e-04,7.84005600e-04}],20[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.660163e-03[&length_mean=1.66117145e-03,length_median=1.66016300e-03,length_95%HPD={1.43747800e-03,1.85349800e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.258876e-05[&length_mean=8.53024373e-05,length_median=8.25887600e-05,length_95%HPD={3.40372300e-05,1.40183100e-04}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:9.954144e-04[&length_mean=9.91750831e-04,length_median=9.95414400e-04,length_95%HPD={8.03118700e-04,1.15567900e-03}],58[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.241178e-03[&length_mean=3.25238069e-03,length_median=3.24117800e-03,length_95%HPD={2.93880200e-03,3.55860200e-03}])[&prob=9.98002663e-01,prob_stddev=9.41553637e-04,prob_range={9.97336884e-01,9.98668442e-01},prob(percent)="100",prob+-sd="100+-0"]:1.139994e-04[&length_mean=1.16621065e-04,length_median=1.13999400e-04,length_95%HPD={5.71785600e-05,1.86962800e-04}],51[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.510251e-03[&length_mean=2.52117771e-03,length_median=2.51025100e-03,length_95%HPD={2.24026400e-03,2.78486500e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.753134e-04[&length_mean=4.79813638e-04,length_median=4.75313400e-04,length_95%HPD={3.44982500e-04,6.16510100e-04}],7[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.913954e-03[&length_mean=1.91377621e-03,length_median=1.91395400e-03,length_95%HPD={1.68336900e-03,2.16077600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.249693e-03[&length_mean=1.25242188e-03,length_median=1.24969300e-03,length_95%HPD={1.07086500e-03,1.47725600e-03}],52[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.261674e-03[&length_mean=4.26539478e-03,length_median=4.26167400e-03,length_95%HPD={3.93521500e-03,4.57050500e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.674826e-04[&length_mean=1.71125393e-04,length_median=1.67482600e-04,length_95%HPD={8.96063200e-05,2.67364200e-04}],(((18[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.602899e-06[&length_mean=5.95620423e-06,length_median=3.60289900e-06,length_95%HPD={1.16095600e-08,1.93716400e-05}],49[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.047785e-06[&length_mean=7.82486327e-06,length_median=5.04778500e-06,length_95%HPD={4.12616900e-08,2.44780600e-05}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.721420e-03[&length_mean=2.73146204e-03,length_median=2.72142000e-03,length_95%HPD={2.45057900e-03,3.03132900e-03}],19[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.970452e-03[&length_mean=4.95634659e-03,length_median=4.97045200e-03,length_95%HPD={4.56566100e-03,5.36221900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.900571e-03[&length_mean=1.90626838e-03,length_median=1.90057100e-03,length_95%HPD={1.64471900e-03,2.19852700e-03}],47[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.018304e-03[&length_mean=2.01796387e-03,length_median=2.01830400e-03,length_95%HPD={1.75256100e-03,2.28866400e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.263857e-03[&length_mean=2.26407763e-03,length_median=2.26385700e-03,length_95%HPD={1.97599200e-03,2.56984900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.161877e-04[&length_mean=5.17917749e-04,length_median=5.16187700e-04,length_95%HPD={3.12545300e-04,7.36318200e-04}],(((40[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.095280e-04[&length_mean=8.10668066e-04,length_median=8.09528000e-04,length_95%HPD={6.64837800e-04,9.57788400e-04}],46[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.821678e-04[&length_mean=6.88761975e-04,length_median=6.82167800e-04,length_95%HPD={5.45679000e-04,8.37722500e-04}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.314854e-04[&length_mean=1.33312721e-04,length_median=1.31485400e-04,length_95%HPD={6.62162900e-05,1.97541700e-04}],44[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.729620e-04[&length_mean=5.74374725e-04,length_median=5.72962000e-04,length_95%HPD={4.55883800e-04,7.15652400e-04}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.082015e-03[&length_mean=1.08544222e-03,length_median=1.08201500e-03,length_95%HPD={8.86882200e-04,1.26799700e-03}],(42[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:9.622473e-04[&length_mean=9.65181165e-04,length_median=9.62247300e-04,length_95%HPD={7.84902500e-04,1.13447100e-03}],43[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.236725e-03[&length_mean=1.24940799e-03,length_median=1.23672500e-03,length_95%HPD={1.06052400e-03,1.44000600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.456371e-03[&length_mean=1.46217205e-03,length_median=1.45637100e-03,length_95%HPD={1.26310100e-03,1.69575000e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.750341e-03[&length_mean=4.74611710e-03,length_median=4.75034100e-03,length_95%HPD={4.31345000e-03,5.13927900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.384410e-03[&length_mean=4.38653051e-03,length_median=4.38441000e-03,length_95%HPD={3.92378200e-03,4.73773300e-03}],(31[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.717989e-04[&length_mean=4.70038369e-04,length_median=4.71798900e-04,length_95%HPD={3.48171000e-04,5.97915600e-04}],57[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:7.388340e-04[&length_mean=7.38362146e-04,length_median=7.38834000e-04,length_95%HPD={6.00734500e-04,8.96900300e-04}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.151312e-02[&length_mean=1.15073434e-02,length_median=1.15131200e-02,length_95%HPD={1.08953700e-02,1.21053900e-02}])[&prob=9.22103862e-01,prob_stddev=4.80192355e-02,prob_range={8.88149134e-01,9.56058589e-01},prob(percent)="92",prob+-sd="92+-5"]:3.662087e-04[&length_mean=3.76712870e-04,length_median=3.66208700e-04,length_95%HPD={1.74374300e-04,6.01967800e-04}],(((10[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.664350e-03[&length_mean=2.67128379e-03,length_median=2.66435000e-03,length_95%HPD={2.33493800e-03,3.00622300e-03}],(26[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.391074e-04[&length_mean=2.40027452e-04,length_median=2.39107400e-04,length_95%HPD={1.61865600e-04,3.17819700e-04}],56[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.791833e-04[&length_mean=1.82006017e-04,length_median=1.79183300e-04,length_95%HPD={1.09472000e-04,2.61625000e-04}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.734636e-03[&length_mean=2.73484295e-03,length_median=2.73463600e-03,length_95%HPD={2.44509500e-03,3.01449300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.754844e-04[&length_mean=8.91604261e-04,length_median=8.75484400e-04,length_95%HPD={6.04226100e-04,1.18521200e-03}],(13[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.875633e-02[&length_mean=1.87394192e-02,length_median=1.87563300e-02,length_95%HPD={1.78836600e-02,1.95810000e-02}],32[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.639129e-03[&length_mean=6.63743939e-03,length_median=6.63912900e-03,length_95%HPD={6.11756300e-03,7.13799900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.682043e-03[&length_mean=6.69071001e-03,length_median=6.68204300e-03,length_95%HPD={6.05214200e-03,7.29198700e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.379281e-03[&length_mean=1.38378899e-03,length_median=1.37928100e-03,length_95%HPD={1.02559700e-03,1.78183500e-03}],(33[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.363296e-03[&length_mean=2.36403096e-03,length_median=2.36329600e-03,length_95%HPD={2.09971900e-03,2.70202300e-03}],((34[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.852793e-03[&length_mean=2.85985069e-03,length_median=2.85279300e-03,length_95%HPD={2.55837600e-03,3.15791800e-03}],36[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.724223e-03[&length_mean=2.72747965e-03,length_median=2.72422300e-03,length_95%HPD={2.43541700e-03,3.01920400e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.464235e-04[&length_mean=8.53458458e-04,length_median=8.46423500e-04,length_95%HPD={6.27179300e-04,1.06949400e-03}],35[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.223358e-03[&length_mean=5.22755849e-03,length_median=5.22335800e-03,length_95%HPD={4.75651000e-03,5.62566000e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.524110e-03[&length_mean=3.53416937e-03,length_median=3.52411000e-03,length_95%HPD={3.13358400e-03,3.92501600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.725979e-03[&length_mean=3.71882977e-03,length_median=3.72597900e-03,length_95%HPD={3.34259600e-03,4.13867800e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.765793e-04[&length_mean=5.82986430e-04,length_median=5.76579300e-04,length_95%HPD={3.14198600e-04,8.21673900e-04}])[&prob=9.22103862e-01,prob_stddev=4.80192355e-02,prob_range={8.88149134e-01,9.56058589e-01},prob(percent)="92",prob+-sd="92+-5"]:3.620286e-04[&length_mean=3.67906353e-04,length_median=3.62028600e-04,length_95%HPD={2.42759000e-04,4.97695900e-04}],45[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.511124e-03[&length_mean=3.50904089e-03,length_median=3.51112400e-03,length_95%HPD={3.15562300e-03,3.82638300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.349789e-03[&length_mean=5.35707506e-03,length_median=5.34978900e-03,length_95%HPD={4.92356000e-03,5.82903300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.367183e-03[&length_mean=1.38092252e-03,length_median=1.36718300e-03,length_95%HPD={1.14782800e-03,1.66787300e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.920152e-03[&length_mean=6.91465195e-03,length_median=6.92015200e-03,length_95%HPD={6.42659700e-03,7.35784500e-03}],((((((((5[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.722413e-04[&length_mean=5.77495624e-04,length_median=5.72241300e-04,length_95%HPD={4.61295100e-04,7.06104400e-04}],55[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.135506e-03[&length_mean=1.13896295e-03,length_median=1.13550600e-03,length_95%HPD={9.28234000e-04,1.31149200e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.624592e-04[&length_mean=4.66338786e-04,length_median=4.62459200e-04,length_95%HPD={3.54902900e-04,5.84133400e-04}],37[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.759065e-03[&length_mean=2.76182430e-03,length_median=2.75906500e-03,length_95%HPD={2.51330900e-03,3.03925800e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.520913e-04[&length_mean=4.55180962e-04,length_median=4.52091300e-04,length_95%HPD={3.42383300e-04,5.71307600e-04}],17[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.941969e-03[&length_mean=1.95044207e-03,length_median=1.94196900e-03,length_95%HPD={1.71614700e-03,2.18955500e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:5.689619e-04[&length_mean=5.71469589e-04,length_median=5.68961900e-04,length_95%HPD={4.03148800e-04,7.30515200e-04}],50[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.310137e-03[&length_mean=3.31173024e-03,length_median=3.31013700e-03,length_95%HPD={2.99551400e-03,3.65566600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.824175e-02[&length_mean=3.82429098e-02,length_median=3.82417500e-02,length_95%HPD={3.70725300e-02,3.95195100e-02}],28[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.758962e-02[&length_mean=4.75900425e-02,length_median=4.75896200e-02,length_95%HPD={4.61947400e-02,4.90192200e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.458749e-03[&length_mean=3.45185517e-03,length_median=3.45874900e-03,length_95%HPD={2.89666300e-03,3.97080700e-03}],15[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.325038e-02[&length_mean=1.32480883e-02,length_median=1.32503800e-02,length_95%HPD={1.25459500e-02,1.38753300e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.656753e-03[&length_mean=4.66390434e-03,length_median=4.65675300e-03,length_95%HPD={4.20215600e-03,5.10947400e-03}],((21[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.536922e-03[&length_mean=3.53561883e-03,length_median=3.53692200e-03,length_95%HPD={3.17400300e-03,3.86215800e-03}],(22[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.453448e-03[&length_mean=2.44454845e-03,length_median=2.45344800e-03,length_95%HPD={2.16587200e-03,2.69577200e-03}],(23[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.660074e-03[&length_mean=3.65204801e-03,length_median=3.66007400e-03,length_95%HPD={3.33868800e-03,4.00550900e-03}],54[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.723088e-03[&length_mean=1.72406660e-03,length_median=1.72308800e-03,length_95%HPD={1.46496400e-03,1.99199600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.137337e-03[&length_mean=1.13948557e-03,length_median=1.13733700e-03,length_95%HPD={9.54110100e-04,1.36764900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.901617e-03[&length_mean=1.91035770e-03,length_median=1.90161700e-03,length_95%HPD={1.63335800e-03,2.19400500e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:9.897917e-03[&length_mean=9.89905351e-03,length_median=9.89791700e-03,length_95%HPD={9.30956000e-03,1.04691800e-02}],59[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.871489e-02[&length_mean=1.87188244e-02,length_median=1.87148900e-02,length_95%HPD={1.80013500e-02,1.95146100e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:4.514311e-03[&length_mean=4.51643510e-03,length_median=4.51431100e-03,length_95%HPD={4.08398900e-03,4.97056900e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.081903e-03[&length_mean=8.07290517e-03,length_median=8.08190300e-03,length_95%HPD={7.54718300e-03,8.59254900e-03}],(24[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.269126e-03[&length_mean=6.25930570e-03,length_median=6.26912600e-03,length_95%HPD={5.86850500e-03,6.67464600e-03}],(39[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.245534e-03[&length_mean=1.25031702e-03,length_median=1.24553400e-03,length_95%HPD={1.04779700e-03,1.44714800e-03}],48[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.313908e-03[&length_mean=1.31617558e-03,length_median=1.31390800e-03,length_95%HPD={1.12492900e-03,1.51015600e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:7.305975e-03[&length_mean=7.31184685e-03,length_median=7.30597500e-03,length_95%HPD={6.86779100e-03,7.84551400e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:8.266431e-03[&length_mean=8.26144685e-03,length_median=8.26643100e-03,length_95%HPD={7.80018100e-03,8.76236800e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:3.384288e-03[&length_mean=3.38100258e-03,length_median=3.38428800e-03,length_95%HPD={3.06790800e-03,3.74588100e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.946444e-03[&length_mean=1.95313681e-03,length_median=1.94644400e-03,length_95%HPD={1.67050900e-03,2.21376600e-03}],16[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.274745e-02[&length_mean=1.27331788e-02,length_median=1.27474500e-02,length_95%HPD={1.21146100e-02,1.33494100e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.979368e-03[&length_mean=2.97942989e-03,length_median=2.97936800e-03,length_95%HPD={2.63485600e-03,3.28071200e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.079171e-03[&length_mean=1.07951114e-03,length_median=1.07917100e-03,length_95%HPD={8.72404600e-04,1.26990000e-03}],(41[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.446689e-03[&length_mean=6.45837957e-03,length_median=6.44668900e-03,length_95%HPD={6.00713200e-03,6.85584400e-03}],53[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:1.111522e-02[&length_mean=1.11083812e-02,length_median=1.11152200e-02,length_95%HPD={1.04509900e-02,1.16511400e-02}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:2.956290e-03[&length_mean=2.95991883e-03,length_median=2.95629000e-03,length_95%HPD={2.64881600e-03,3.32060400e-03}])[&prob=1.00000000e+00,prob_stddev=0.00000000e+00,prob_range={1.00000000e+00,1.00000000e+00},prob(percent)="100",prob+-sd="100+-0"]:6.794853e-03[&length_mean=6.80217575e-03,length_median=6.79485300e-03,length_95%HPD={6.40309100e-03,7.36139100e-03}]); 2 | end; -------------------------------------------------------------------------------- /tests/test_nescent.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from newick import loads 3 | 4 | # Run tests from NESCENT wiki: 5 | # http://informatics.nescent.org/wiki/Supporting_NEXUS#Test_files_for_NEXUS_parsers 6 | 7 | testdata = [ 8 | ('basal_trifurcation', 9 | '(((A:1,B:1):1,(C:1,D:1):1):1,(E:1,F:1):2,(G:1,H:1):2)'), 10 | ('bush', 11 | '(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1)'), 12 | ('bush_branchlength_negative', 13 | '(((A:1,B:1):1,(C:1,D:1):_0.25):1,((E:1,F:1):1,(G:1,H:1):1):1)'), 14 | ('bush_branchlength_scientific', 15 | '(((A:1,B:2e+01):1,(C:9e_01,D:1):1):1,((E:1,F:9E_01):1,(G:2E+01,H:1):1):1)'), 16 | ('bush_branchlength_zero', 17 | '(((A:1,B:1):1,(C:0,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1)'), 18 | ('bush_cladogram', 19 | '(((A,B),(C,D)),((E,F),(G,H)))'), 20 | ('bush_extended_root_branch', 21 | '(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1):1'), 22 | ('bush_inode_labels', 23 | '(((A:1,B:1)AB:1,(C:1,D:1)CD:1)ABCD:1,((E:1,F:1)EF:1,(G:1,H:1)GH:1)EFGH:1)'), 24 | ('bush_inode_labels_partial', 25 | '(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1)EF:1,(G:1,H:1)GH:1)EFGH:1)'), 26 | ('bush_inode_labels_quoted2', 27 | "(((A:1,B:1)'inode AB':1,(C:1,D:1)'inode CD':1)'inode ABCD':1,((E:1,F:1)'inode EF':1," 28 | "(G:1,H:1)'inode GH':1)'inode EFGH':1)"), 29 | ('bush quoted string name2', 30 | '(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1)'), 31 | ('bush_uneven', 32 | '(((A:1,B:2):1,(C:1,D:2):1):1,((E:1,F:2):1,(G:1,H:2):1):1)'), 33 | ('ladder', 34 | '(((((((A:1,B:1):1,C:2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7)'), 35 | ('ladder_cladogram', 36 | '(((((((A,B),C),D),E),F),G),H)'), 37 | ('ladder_uneven', 38 | '(((((((A:1,B:2):1,C:2):1,D:4):1,E:4):1,F:6):1,G:6):1,H:8)'), 39 | ('rake', 40 | '(A:1,B:1,C:1,D:1,E:1,F:1,G:1,H:1)'), 41 | ('rake_cladogram', 42 | '(A,B,C,D,E,F,G,H)'), 43 | ] 44 | 45 | 46 | @pytest.mark.parametrize("id,nwk", testdata) 47 | def test_nescent(id, nwk): 48 | tree = loads(nwk)[0] 49 | assert tree.newick == nwk, "%s\n%s\n%s\n" % (id, nwk, tree.newick) 50 | -------------------------------------------------------------------------------- /tests/test_newick.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | from newick import loads, dumps, Node, read, write 5 | 6 | 7 | @pytest.fixture 8 | def fixture_dir(): 9 | return pathlib.Path(__file__).parent / 'fixtures' 10 | 11 | 12 | def test_empty_node(): 13 | node = Node() 14 | assert node.name is None 15 | assert node.length == 0.0 16 | assert "" == node.newick 17 | assert [] == node.descendants 18 | 19 | 20 | def test_Node_name(): 21 | with pytest.raises(ValueError): 22 | Node("()") 23 | 24 | with pytest.raises(ValueError): 25 | Node(name='A)') 26 | 27 | n = Node() 28 | assert n.unquoted_name is None 29 | 30 | n = Node("a'b", auto_quote=True) 31 | assert n.name == "'a''b'" 32 | assert n.unquoted_name == "a'b" 33 | n.name = ":" 34 | assert n.name == "':'" 35 | n.name = 'A' 36 | assert n.name == n.unquoted_name 37 | assert repr(n) == 'Node("A")' 38 | assert Node("a b", auto_quote=True).name == "'a b'" 39 | 40 | 41 | def test_Node_length(): 42 | with pytest.raises(ValueError): 43 | Node(None, length=':') 44 | 45 | 46 | def test_Node_comments(): 47 | n = Node('A', comments=['first', 'second']) 48 | assert n.newick == "A[first|second]" 49 | 50 | n = Node('A', comment='first') 51 | n.comments.append('second') 52 | assert n.newick == "A[first|second]" 53 | 54 | 55 | def test_node_newick_representation_without_length(): 56 | test_obj = Node(name="A") 57 | assert test_obj.length == 0.0 58 | assert "A" == test_obj.newick 59 | 60 | 61 | def test_node_newick_representation_with_length(): 62 | test_obj = Node(name="A", length="3") 63 | assert pytest.approx(test_obj.length) == 3.0 64 | assert "A:3" == test_obj.newick 65 | 66 | 67 | def test_node_parameters_changeability(): 68 | test_obj = Node(name="A") 69 | assert "A" == test_obj.name 70 | test_obj.name = "B" 71 | assert "B" == test_obj.name 72 | 73 | 74 | def test_node_length_changeability(): 75 | test_obj = Node(length="10") 76 | assert 10 == test_obj.length 77 | test_obj.length = "12" 78 | assert 12 == test_obj.length 79 | 80 | 81 | @pytest.mark.parametrize( 82 | 'test_data', 83 | [["D1.1", "D1.2", "D1.3"], ["D", "", ""], ["", "", ""]] 84 | ) 85 | def test_node_representation_with_deeper_descendants(test_data): 86 | """ 87 | Procedure: 88 | 1. Make simple tree with one descendant having two more descendants inside 89 | 2. Verify if its newick representation is correct in comparison to parsed "proper_result" 90 | """ 91 | single_nodes_reprs = [ 92 | "{0}:{1}".format(name, length) 93 | for name, length in zip(test_data, ["2.0", "3.0", "4.0"])] 94 | proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs) 95 | 96 | d1, d2, d3 = [Node(name, length) for name, length in zip(test_data, ["2.0", "3.0", "4.0"])] 97 | d1.add_descendant(d2) 98 | d1.add_descendant(d3) 99 | test_obj = Node("A", "1.0") 100 | test_obj.add_descendant(d1) 101 | assert proper_result == test_obj.newick 102 | 103 | 104 | def test_node_as_descendants_list(): 105 | test_obj = Node("A", "1.0") 106 | desc = Node("D", "2.0") 107 | test_obj.add_descendant(desc) 108 | assert [desc] == test_obj.descendants 109 | 110 | 111 | @pytest.mark.slow 112 | def test_read_write(tmp_path, fixture_dir): 113 | trees = read(fixture_dir / 'tree-glottolog-newick.txt') 114 | 115 | assert '[' in trees[0].descendants[0].name 116 | descs = [len(tree.descendants) for tree in trees] 117 | # The bookkeeping family has 391 languages 118 | assert descs[0] == 391 119 | tmp = tmp_path / 'test.txt' 120 | write(trees, tmp) 121 | assert tmp.exists() 122 | assert [len(tree.descendants) for tree in read(tmp)] == descs 123 | 124 | 125 | def test_Node(): 126 | root = loads('(A,B,(C,D)E)F;')[0] 127 | assert [n.name for n in root.walk()] == ['F', 'A', 'B', 'E', 'C', 'D'] 128 | assert [n.name for n in root.walk() if n.is_leaf] == ['A', 'B', 'C', 'D'] 129 | assert [n.name for n in root.walk(mode='postorder')] == ['A', 'B', 'C', 'D', 'E', 'F'] 130 | assert root.ancestor is None 131 | assert root.descendants[0].ancestor == root 132 | root = loads('(((a,b),(c,d)),e);')[0] 133 | assert [n.ancestor.newick for n in root.walk() if n.ancestor] == \ 134 | [ 135 | '(((a,b),(c,d)),e)', 136 | '((a,b),(c,d))', 137 | '(a,b)', 138 | '(a,b)', 139 | '((a,b),(c,d))', 140 | '(c,d)', 141 | '(c,d)', 142 | '(((a,b),(c,d)),e)'] 143 | 144 | 145 | @pytest.mark.parametrize( 146 | 's,assertion', 147 | [ 148 | ("", lambda r: r == []), 149 | ("A", lambda r: r[0].name == 'A'), 150 | ("A;", lambda r: r[-1].name == 'A'), 151 | ("A-B.C;", lambda r: r[-1].name == 'A-B.C'), 152 | ("'A\\'C';", lambda r: r[-1].name == "'A\\'C'"), 153 | ("A ;", lambda r: r[-1].name == 'A'), 154 | ("'A[noc]'", lambda r: r[0].name == "'A[noc]'"), 155 | ("'A(B'", lambda r: r[0].name == "'A(B'"), 156 | ("'A[noc'[c]", lambda r: r[0].comment == "c"), 157 | ("'A[noc]'[c(a)]", lambda r: r[0].comment == "c(a)"), 158 | (r"(A,B)'C ,\':''D':1.3;", lambda r: r[0].unquoted_name == "C ,':'D"), 159 | ( 160 | '[&R] (A,B)C [% ] [% ] [% setBetweenBits = selected ];', 161 | lambda r: r[0].name == 'C' and r[0].comment == '% '), 162 | ( 163 | '[&R] (A,B)C [% ] [% ] [% setBetweenBits = selected ];', 164 | lambda r: r[0].comments == ['% ', '% ', '% setBetweenBits = selected ']), 165 | ( 166 | "(A,B)C[&k1=v1]:[&k2=v2]2.0;", 167 | lambda r: r[0].comments == ['&k1=v1', '&k2=v2'] and r[0].length == 2.0), 168 | ( 169 | "(A,B)C[&k1=v1]:[&k2=v2]2.0;", 170 | lambda r: r[0].properties == dict(k1='v1', k2='v2')), 171 | ("('A;B',C)D;", lambda r: len(r) == 1), 172 | ("('A:B':2,C:3)D:4;", lambda r: r[0].descendants[0].unquoted_name == 'A:B'), 173 | ("('A:B':2,C:3)D:4;", lambda r: pytest.approx(r[0].descendants[0].length) == 2.0), 174 | # parse examples from https://en.wikipedia.org/wiki/Newick_format 175 | ('(,,(,));', lambda r: r[0].name is None), 176 | ('(,,(,));', lambda r: r[0].descendants[0].length == 0.0), 177 | ('(,,(,));', lambda r: len(r[0].descendants) == 3), 178 | ('(A,B,(C,D));', lambda r: r[0].name is None), 179 | ('(A,B,(C,D));', lambda r: len(r[0].descendants) == 3), 180 | ('(A,B,(C,D)E)Fäß;', lambda r: r[0].name == 'Fäß'), 181 | ('(:0.1,:0.2,(:0.3,:0.4):0.5);', lambda r: r[0].descendants[0].length == 0.1), 182 | ('(:0.1,:0.2,(:0.3,:0.4):0.5);', lambda r: len(r[0].descendants) == 3), 183 | ('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', lambda r: r[0].name == 'A'), 184 | ('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', lambda r: r[0].descendants[-1].length == 0.1), 185 | ('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', lambda r: r[0].name == 'A'), 186 | ('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', lambda r: len(r[0].descendants) == 1), 187 | # http://marvin.cs.uidaho.edu/Teaching/CS515/newickFormat.html 188 | ( 189 | "(ant:17, (bat:31, cow:22):7, dog:22, (elk:33, fox:12):40);", 190 | lambda r: set(r[0].get_leaf_names()) == {'ant', 'bat', 'cow', 'dog', 'elk', 'fox'}), 191 | ( 192 | """\ 193 | ( 194 | (raccoon:19.19959,bear:6.80041):0.84600, 195 | ( 196 | (sea_lion:11.99700, seal:12.00300):7.52973, 197 | ( 198 | (monkey:100.85930,cat:47.14069):20.59201, 199 | weasel:18.87953 200 | ):2.09460 201 | ):3.87382, 202 | dog:25.46154 203 | );""", 204 | lambda r: set(r[0].get_leaf_names()) == 205 | {'raccoon', 'bear', 'sea_lion', 'seal', 'monkey', 'cat', 'weasel', 'dog'}), 206 | # https://evolution.genetics.washington.edu/phylip/newicktree.html 207 | ( 208 | "(,(,,),);", 209 | lambda r: len(r[0].get_leaves()) == 5), 210 | ( 211 | "((a:3[&&NHX:name=a:support=100],b:2[&&NHX:name=b:support=100]):4[&&NHX:name=ab:support=60],c:5[&&NHX:name=c:support=100]);", 212 | lambda r: r[0].get_leaves()[0].properties['support'] == '100') 213 | ] 214 | ) 215 | def test_quoting_and_comments(s, assertion): 216 | assert assertion(loads(s)) 217 | 218 | 219 | def test_comments(): 220 | t = '[&R] (A,B)C [% ] [% ] [% setBetweenBits = selected ];' 221 | tree = loads(t, strip_comments=True)[0] 222 | assert len(list(tree.walk())) == 3 and tree.comment is None 223 | 224 | 225 | @pytest.mark.parametrize( 226 | 's', 227 | [ 228 | '((A)C;', 229 | "(A,B,C),D);", 230 | '((A)C;D)', 231 | '(),;', 232 | ');', 233 | '(A,B)C[abc', 234 | '(A,B)C[abc]]', 235 | "(A,B)'C", 236 | "(A B)C;" 237 | "('AB'G,D)C;" 238 | ] 239 | ) 240 | def test_invalid(s): 241 | with pytest.raises(ValueError): 242 | loads(s) 243 | 244 | 245 | def test_Node_custom_length(): 246 | root = Node.create(length='1e2', length_parser=lambda l: l + 'i') 247 | assert root.length == '1e2i' 248 | root = Node.create(length_formatter=lambda l: 5) 249 | root.length = 10 250 | assert root.length == pytest.approx(5) 251 | 252 | root = Node.create(length=100., length_formatter="{:0.1e}".format) 253 | assert root.newick == ':1.0e+02' 254 | 255 | weird_numbers_tree = "((a:1.e2,b:3j),(c:0x0BEFD6B0,d:003))" 256 | root = loads(weird_numbers_tree, length_parser=None)[0] 257 | assert weird_numbers_tree == root.newick 258 | 259 | 260 | def test_rename(): 261 | n = loads("('a 1',b)c;")[0] 262 | with pytest.raises(ValueError): 263 | n.rename(**{'a 1': 'x y'}) 264 | assert n.rename(**{'a 1': "'x y'", 'c': 'z'}).newick == "('x y',b)z" 265 | assert n.rename(auto_quote=True, **{'x y': "a b"}).newick == "('a b',b)z" 266 | 267 | 268 | def test_strip_comments(): 269 | n = loads("(a[c1]:2.0,b:[c2]1.0)c;")[0] 270 | assert '[c1]' in n.newick and ('[c2]' in n.newick) 271 | n.strip_comments() 272 | assert n.newick == '(a:2.0,b:1.0)c' 273 | 274 | 275 | @pytest.mark.parametrize( 276 | 'nwk,kw,art', 277 | [ 278 | ("(A,(B,C)D)Ex;", 279 | {}, 280 | """\ 281 | /-A 282 | --Ex-| 283 | | /-B 284 | \\-D--| 285 | \\-C"""), 286 | ("(A,(B,C)D)Ex;", 287 | dict(show_internal=False), 288 | """\ 289 | /-A 290 | ----| 291 | | /-B 292 | \\---| 293 | \\-C"""), 294 | ("(A,B,C)D;", 295 | dict(show_internal=False), 296 | """\ 297 | /-A 298 | ----+-B 299 | \\-C"""), 300 | ("((A,B)C)Ex;", 301 | {}, 302 | """\ 303 | /-A 304 | --Ex --C--| 305 | \\-B"""), 306 | ("(,(,,),);", 307 | {}, 308 | " /-\n | /-\n---+--+-\n | \\-\n \\-"), 309 | ("(((((A),B),(C,D))),E);", 310 | {'strict': False}, 311 | """\ 312 | ┌── ──A 313 | ┌───┤ 314 | │ └─B 315 | ┌── ────┤ 316 | │ │ ┌─C 317 | ────┤ └───┤ 318 | │ └─D 319 | └─E""") 320 | ] 321 | ) 322 | def test_Node_ascii_art(nwk, kw, art): 323 | kw.setdefault('strict', True) 324 | assert loads(nwk)[0].ascii_art(**kw) == art 325 | 326 | 327 | def test_dumps(*trees): 328 | for ex in [ 329 | '(,,(,));', 330 | '(A,B,(C,D));', 331 | '(A,B,(C,D)E)F;', 332 | '(:0.1,:0.2,(:0.3,:0.4):0.5);', 333 | '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', 334 | ]: 335 | assert ex == dumps(loads(ex)[0]) 336 | 337 | 338 | def test_clone(): 339 | """ 340 | This test illustrates how a tree can be assembled programmatically. 341 | """ 342 | newick = '(A,B,(C,D)E)F' 343 | tree1 = loads(newick)[0] 344 | 345 | def clone_node(n): 346 | c = Node(name=n.name) 347 | for nn in n.descendants: 348 | c.add_descendant(clone_node(nn)) 349 | return c 350 | 351 | assert clone_node(tree1).newick == newick 352 | 353 | 354 | def test_leaf_functions(): 355 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 356 | assert set(tree.get_leaf_names()) == {"B", "C", "D"} 357 | 358 | 359 | @pytest.mark.parametrize( 360 | 'tree,nodes,inverse, result', 361 | [ 362 | ('(A,((B,C),(D,E)))', 'A C E', False, '(B,D)'), 363 | ('((A,B),((C,D),(E,F)))', 'A C E', True, '((C,E),A)'), 364 | ('(b,(c,(d,(e,(f,g))h)i)a)', 'b c i', True, '(b,(c,i)a)'), 365 | ('(b,(c,(d,(e,(f,g))h)i)a)', 'b c i', False, ''), 366 | ('(b,(c,(d,(e,(f,g))h)i)a)', 'c i', False, '(b,a)'), 367 | ] 368 | ) 369 | def test_prune(tree, nodes, inverse, result): 370 | tree = loads(tree)[0] 371 | tree.prune_by_names(nodes.split(), inverse=inverse) 372 | tree.remove_redundant_nodes(preserve_lengths=False) 373 | assert tree.newick == result 374 | 375 | 376 | def test_prune_single_node_tree(): 377 | tree = loads('A')[0] 378 | tree.prune(tree.get_leaves()) 379 | assert tree.newick == 'A' 380 | 381 | 382 | @pytest.mark.parametrize( 383 | 'newick,kw,result', 384 | [ 385 | ('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', dict(), '(B:0.2,(C:0.3,D:0.4)E:0.5)A:0.1'), 386 | ('((C)B)A', dict(preserve_lengths=False), 'A'), 387 | ('((C)B)A', dict(preserve_lengths=False, keep_leaf_name=True), 'C'), 388 | ( 389 | '((aiw),((aas,(kbt)),((abg),abf)))', 390 | dict(preserve_lengths=False, keep_leaf_name=True), 391 | '(((aas,kbt),(abf,abg)),aiw)'), 392 | ] 393 | ) 394 | def test_redundant_node_removal(newick, kw, result): 395 | tree = loads(newick)[0] 396 | tree.remove_redundant_nodes(**kw) 397 | assert tree.newick == result 398 | 399 | 400 | def test_prune_and_node_removal(): 401 | tree2 = loads("((A:1,B:1):1,C:1)")[0] 402 | tree2.prune_by_names(['A']) 403 | assert tree2.newick == '((B:1):1,C:1)' 404 | tree2.remove_redundant_nodes() 405 | assert tree2.newick == '(C:1,B:2.0)' 406 | 407 | 408 | def test_stacked_redundant_node_removal(): 409 | tree = loads("(((((A,B))),C))")[0] 410 | tree.remove_redundant_nodes(preserve_lengths=False) 411 | assert tree.newick == "(C,(A,B))" 412 | 413 | tree = loads("(((A,B):1):2)")[0] 414 | tree.remove_redundant_nodes() 415 | assert tree.newick == '(A,B):3.0' 416 | 417 | 418 | def test_polytomy_resolution(): 419 | tree = loads('(A,B,(C,D,(E,F)))')[0] 420 | assert not tree.is_binary 421 | tree.resolve_polytomies() 422 | assert tree.newick == '(A,((C,((E,F),D):0.0),B):0.0)' 423 | assert tree.is_binary 424 | 425 | tree = loads('(A,B,C,D,E,F)')[0] 426 | assert not tree.is_binary 427 | tree.resolve_polytomies() 428 | assert tree.newick == '(A,(F,(B,(E,(C,D):0.0):0.0):0.0):0.0)' 429 | assert tree.is_binary 430 | 431 | 432 | def test_name_removal(): 433 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 434 | tree.remove_names() 435 | assert dumps(tree) == '((:0.2,(:0.3,:0.4):0.5):0.1);' 436 | 437 | 438 | def test_internal_name_removal(): 439 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 440 | tree.remove_internal_names() 441 | assert dumps(tree) == '((B:0.2,(C:0.3,D:0.4):0.5):0.1);' 442 | 443 | 444 | def test_leaf_name_removal(): 445 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 446 | tree.remove_leaf_names() 447 | assert dumps(tree) == '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;' 448 | 449 | 450 | def test_length_removal(): 451 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 452 | tree.remove_lengths() 453 | assert dumps(tree) == '((B,(C,D)E)F)A;' 454 | 455 | 456 | def test_all_removal(): 457 | tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] 458 | tree.remove_names() 459 | tree.remove_lengths() 460 | topology_only = dumps(tree) 461 | assert topology_only == '((,(,)));' 462 | 463 | 464 | def test_singletons(): 465 | tree = loads('(((((A), B), (C, D))), E);')[0] 466 | assert len(list(tree.walk())) == 11 467 | tree.remove_redundant_nodes() 468 | assert len(list(tree.walk())) == 9 469 | 470 | 471 | def test_get_node(): 472 | tree = loads('(A,B,(C,D)E)F;')[0] 473 | assert tree.get_node("A").name == 'A' 474 | assert len(tree.get_node('E').get_leaves()) == 2 475 | 476 | # rename 477 | tree.get_node('E').name = 'G' 478 | assert tree.newick == '(A,B,(C,D)G)F' 479 | 480 | 481 | def test_prune_node(): 482 | tree = '(A,(B,(C,D)E)F)G;' 483 | t1 = loads(tree)[0] 484 | t1.prune_by_names(["C", "D", "E"]) 485 | t2 = loads(tree)[0] 486 | t2.prune_by_names(["E"]) 487 | assert t1.newick == t2.newick 488 | 489 | 490 | def test_with_comments(): 491 | nwk = "(1[x&dmv={1},dmv1=0.260,dmv1_95%_hpd={0.003,0.625},dmv1_median=0.216,dmv1_range=" \ 492 | "{0.001,1.336},height=1.310e-15,height_95%_hpd={0.0,3.552e-15},height_median=0.0," \ 493 | "height_range={0.0,7.105e-15},length=2.188,length_95%_hpd={1.725,2.634}," \ 494 | "length_median=2.182,length_range={1.307,3.236}]:1.14538397925438," \ 495 | "2[&dmv={1},dmv1=0.260,dmv1_95%_hpd={0.003,0.625},dmv1_median=0.216,dmv1_range=" \ 496 | "{0.001,1.336},height=1.310e-15,height_95%_hpd={0.0,3.552e-15},height_median=0.0," \ 497 | "height_range={0.0,7.105e-15},length=2.188,length_95%_hpd={1.725,2.634}," \ 498 | "length_median=2.182,length_range={1.307,3.236}]:1.14538397925438)[y&dmv={1}," \ 499 | "dmv1=0.260,dmv1_95%_hpd={0.003,0.625},dmv1_median=0.216,dmv1_range={0.001,1.336}," \ 500 | "height=1.310e-15,height_95%_hpd={0.0,3.552e-15},height_median=0.0," \ 501 | "height_range={0.0,7.105e-15},length=2.188,length_95%_hpd={1.725,2.634}," \ 502 | "length_median=2.182,length_range={1.307,3.236}]" 503 | tree = loads(nwk)[0] 504 | assert tree.comment.startswith('y') 505 | assert tree.descendants[0].name == '1' 506 | assert tree.descendants[0].comment[:47] == 'x&dmv={1},dmv1=0.260,dmv1_95%_hpd={0.003,0.625}' 507 | assert tree.newick == nwk 508 | 509 | 510 | def test_with_comments_beast(): 511 | nwk = "((((20:[&rate=9.363171791537587E-5]1320.9341043566992,(21:[&rate=9.363171791537587E-5]" \ 512 | "1225.8822690335624,(((((15:[&rate=9.363171791537587E-5]638.1949811891477,16:[&rate=" \ 513 | "9.363171791537587E-5]638.1949811891477):[&rate=9.363171791537587E-5]257.76795318129564" \ 514 | ",8:[&rate=9.363171791537587E-5]895.9629343704433):[&rate=9.363171791537587E-5]" \ 515 | "41.795862802882425,12:[&rate=9.363171791537587E-5]937.7587971733258):" \ 516 | "[&rate=9.363171791537587E-5]95.6952785114238,14:[&rate=9.363171791537587E-5]" \ 517 | "1033.4540756847496):[&rate=9.363171791537587E-5]59.28887326566064,((25:" \ 518 | "[&rate=9.363171791537587E-5]368.1724945784702,28:[&rate=9.363171791537587E-" \ 519 | "5]368.1724945784702):[&rate=9.363171791537587E-5]618.1292632448451,(13:[&rate=" \ 520 | "9.363171791537587E-5]894.6169275367406,((22:[&rate=9.363171791537587E-5]532." \ 521 | "4463352965287,33:[&rate=9.363171791537587E-5]532.4463352965287):[&rate=9." \ 522 | "363171791537587E-5]124.75991679524702,19:[&rate=9.363171791537587E-5]657." \ 523 | "2062520917757):[&rate=9.363171791537587E-5]237.4106754449649):[&rate=9." \ 524 | "363171791537587E-5]91.68483028657465):[&rate=9.363171791537587E-5]106.44119112709495):" \ 525 | "[&rate=9.363171791537587E-5]133.13932008315214):[&rate=9.363171791537587E-5]95." \ 526 | "05183532313686):[&rate=9.363171791537587E-5]239.53051384576952,((23:[&rate=9." \ 527 | "363171791537587E-5]886.6590941437129,2:[&rate=9.363171791537587E-5]886.6590941437129):" \ 528 | "[&rate=9.363171791537587E-5]318.065540579532,((6:[&rate=9.363171791537587E-5]1128." \ 529 | "8289029154403,37:[&rate=9.363171791537587E-5]1128.8289029154403):[&rate=9." \ 530 | "363171791537587E-5]17.349382774569676,((((((3:[&rate=9.363171791537587E-5]459." \ 531 | "5487115479798,36:[&rate=9.363171791537587E-5]459.5487115479798):[&rate=9." \ 532 | "363171791537587E-5]306.57918484718175,(31:[&rate=9.363171791537587E-5]485." \ 533 | "4575256190764,34:[&rate=9.363171791537587E-5]485.4575256190764):[&rate=9." \ 534 | "363171791537587E-5]280.6703707760851):[&rate=9.363171791537587E-5]15.246829791795335," \ 535 | "(30:[&rate=9.363171791537587E-5]543.1657161064542,1:[&rate=9.363171791537587E-5]543." \ 536 | "1657161064542):[&rate=9.363171791537587E-5]238.2090100805027):[&rate=9." \ 537 | "363171791537587E-5]118.69392508203657,((7:[&rate=9.363171791537587E-5]520." \ 538 | "3998734304117,35:[&rate=9.363171791537587E-5]520.3998734304117):[&rate=9." \ 539 | "363171791537587E-5]238.7668559806733,(32:[&rate=9.363171791537587E-5]720." \ 540 | "2892667226898,17:[&rate=9.363171791537587E-5]720.2892667226898):[&rate=9." \ 541 | "363171791537587E-5]38.87746268839521):[&rate=9.363171791537587E-5]140.9019218579084)" \ 542 | ":[&rate=9.363171791537587E-5]52.21797041264119,26:[&rate=9.363171791537587E-5]" \ 543 | "952.2866216816346):[&rate=9.363171791537587E-5]163.25701515522496,((18:[&rate=9." \ 544 | "363171791537587E-5]720.6233628054213,10:[&rate=9.363171791537587E-5]720.6233628054213):"\ 545 | "[&rate=9.363171791537587E-5]119.64362661776931,(29:[&rate=9.363171791537587E-5]617." \ 546 | "5158316030422,(9:[&rate=9.363171791537587E-5]593.9192324440043,(11:[&rate=9." \ 547 | "363171791537587E-5]472.3642192781455,27:[&rate=9.363171791537587E-5]472.3642192781455)" \ 548 | ":[&rate=9.363171791537587E-5]121.55501316585872):[&rate=9.363171791537587E-5]23." \ 549 | "596599159037964):[&rate=9.363171791537587E-5]222.75115782014836):[&rate=9." \ 550 | "363171791537587E-5]275.276647413669):[&rate=9.363171791537587E-5]30.63464885315034):" \ 551 | "[&rate=9.363171791537587E-5]58.54634903323495):[&rate=9.363171791537587E-5]355." \ 552 | "73998347922384):[&rate=9.363171791537587E-5]1186.6682306101936,24:[&rate=9." \ 553 | "363171791537587E-5]2747.1328488126624):[&rate=9.363171791537587E-5]301.4581721015056," \ 554 | "(38:[&rate=9.363171791537587E-5]963.0459960655501,(5:[&rate=9.363171791537587E-5]500." \ 555 | "66376645282014,4:[&rate=9.363171791537587E-5]500.66376645282014):[&rate=9." \ 556 | "363171791537587E-5]462.38222961272993):[&rate=9.363171791537587E-5]2085.5450248486177)" 557 | tree = loads(nwk)[0] 558 | assert tree.descendants[0].comment == '&rate=9.363171791537587E-5' 559 | assert tree.descendants[0].name is None 560 | assert tree.descendants[0].length == pytest.approx(301.4581721015056) 561 | assert tree.newick == nwk 562 | 563 | 564 | def test_roundtrip_two_comments(): 565 | nwk = "((1[&height=9.687616008832612E-12,height_95%_HPD={0.0,2.9103830456733704E-11}," \ 566 | "height_median=0.0,height_range={0.0,3.725290298461914E-9},length=107922.03600478375," \ 567 | "length_95%_HPD={474.13147831140884,255028.5553480226},length_median=16766.239568341443," \ 568 | "length_range={474.13147831140884,3.1088350108564742E7}]:[&rate=0.10061354528306601]" \ 569 | "14581.043598225671,(7[&height=9.309604621069004E-12,height_95%_HPD=" \ 570 | "{0.0,2.9103830456733704E-11},height_median=0.0,height_range={0.0,3.725290298461914E-9}," \ 571 | "length=65008.773722909886,length_95%_HPD={232.35151363136288,185443.9546690862}," \ 572 | "length_median=11647.615898062268,length_range={232.35151363136288,1.150899255812396E7}]:" \ 573 | "[&rate=0.14084529313827582]10185.506184914375,(9[&height=9.127260087722948E-12," \ 574 | "height_95%_HPD={0.0,2.9103830456733704E-11},height_median=0.0," \ 575 | "height_range={0.0,3.725290298461914E-9},length=25379.761049583933,length_95%_HPD=" \ 576 | "{56.592660460186835,58976.07308471651},length_median=3942.1518558536773,length_range=" \ 577 | "{56.592660460186835,6951356.4983186275}]:[&rate=0.22402647395508335]3942.1518558536773,12" \ 578 | "[&height=9.127260087722948E-12,height_95%_HPD={0.0,2.9103830456733704E-11},height_median=" \ 579 | "0.0,height_range={0.0,3.725290298461914E-9},length=25379.761049583933,length_95%_HPD=" \ 580 | "{56.592660460186835,58976.07308471651},length_median=3942.1518558536773,length_range=" \ 581 | "{56.592660460186835,6951356.4983186275}]:[&rate=0.22402647395508335]3942.1518558536773)" \ 582 | "[&height=25379.76104958394,height_95%_HPD={56.59266046018695,58976.07308471651}," \ 583 | "height_median=3942.1518558536773,height_range={56.59266046018695,6951356.498318631}," \ 584 | "length=38091.50530423341,length_95%_HPD={46.39747042888712,111881.67133971374}," \ 585 | "length_median=6404.173915508887,length_range={46.39747042888712,5175584.190381359}," \ 586 | "posterior=1.0]:[&rate=0.1764340946476647]6243.354329060698)[&height=63964.7133354256," \ 587 | "height_95%_HPD={232.35151363136288,174173.2306986642},height_median=10185.506184914375," \ 588 | "height_range={232.35151363136288,1.150899255812396E7},length=35902.53304149997," \ 589 | "length_95%_HPD={14.560332318756082,95384.63040570999},length_median=4975.027852972411," \ 590 | "length_range={14.560332318756082,7445332.392798465},posterior=0.8589521397147047]:" \ 591 | "[&rate=0.07923911606477686]4395.537413311296)[&height=80767.98182026888,height_95%_HPD=" \ 592 | "{529.6147479238796,233688.59740367957},height_median=14581.043598225671,height_range=" \ 593 | "{529.6147479238796,1.4524763136448236E7},length=31190.19922532072,length_95%_HPD=" \ 594 | "{3.6537288492327207,79714.48294750144},length_median=4454.437543634325,length_range=" \ 595 | "{3.6537288492327207,1.1428212448711276E7},posterior=0.5345953872816958]:[&rate=" \ 596 | "0.15749022880313052]9820.657628578989,(((2[&height=7.737585890856534E-12,height_95%_HPD=" \ 597 | "{0.0,2.9103830456733704E-11},height_median=0.0,height_range={0.0,3.725290298461914E-9}," \ 598 | "length=45609.46047931727,length_95%_HPD={206.83939466644824,123542.2654486959}," \ 599 | "length_median=8434.012912102502,length_range={206.83939466644824,1.6023311824151885E7}]:" \ 600 | "[&rate=0.3421656699625563]8434.18109992326,(3[&height=9.379050440351825E-12," \ 601 | "height_95%_HPD={0.0,2.9103830456733704E-11},height_median=0.0,height_range=" \ 602 | "{0.0,3.725290298461914E-9},length=29034.80730007669,length_95%_HPD={173.60894978441502," \ 603 | "80127.43307802954},length_median=5408.332869562197,length_range={173.60894978441502," \ 604 | "7565197.594627727}]:[&rate=0.40878488857536766]5408.332869562197,(((4[&height=" \ 605 | "9.533674239636711E-12,height_95%_HPD={0.0,2.9103830456733704E-11},height_median=0.0," \ 606 | "height_range={0.0,3.725290298461914E-9},length=12414.863740264116,length_95%_HPD=" \ 607 | "{66.08153995214036,31146.118985240348},length_median=2052.7139045146614,length_range=" \ 608 | "{66.08153995214036,4162747.7669137157}]:[&rate=0.40878488857536766]2052.713904514665," \ 609 | "(8[&height=8.76596601525156E-12,height_95%_HPD={0.0,2.9103830456733704E-11},height_median=" \ 610 | "0.0,height_range={0.0,3.725290298461914E-9},length=7495.104386864869,length_95%_HPD=" \ 611 | "{33.229724377705,18284.937529115006},length_median=1238.5410223305225,length_range=" \ 612 | "{33.229724377705,2652538.9755849764}]:[&rate=1.1616489920397255]1238.5410223305262,10" \ 613 | "[&height=8.76596601525156E-12,height_95%_HPD={0.0,2.9103830456733704E-11},height_median=" \ 614 | "0.0,height_range={0.0,3.725290298461914E-9},length=7495.104386864869,length_95%_HPD={" \ 615 | "33.229724377705,18284.937529115006},length_median=1238.5410223305225,length_range={" \ 616 | "33.229724377705,2652538.9755849764}]:[&rate=0.6807065555747316]1238.5410223305262)[" \ 617 | "&height=7495.104386864877,height_95%_HPD={33.229724377705,18284.937529115006}," \ 618 | "height_median=1238.5410223305262,height_range={33.229724377705,2652538.97558498}," \ 619 | "length=4919.759353399242,length_95%_HPD={11.135741831899963,13615.173191136855}," \ 620 | "length_median=727.9975598861292,length_range={11.135741831899963,1806011.4114757068}," \ 621 | "posterior=1.0]:[&rate=0.6807065555747316]814.1728821841389)[&height=12414.863740264125," \ 622 | "height_95%_HPD={66.08153995214036,31146.118985240348},height_median=2052.713904514665," \ 623 | "height_range={66.08153995214036,4162747.7669137195},length=4494.749985367252," \ 624 | "length_95%_HPD={7.997119765025445,13544.827830110327},length_median=743.8499422922905," \ 625 | "length_range={7.997119765025445,1079092.8689759858},posterior=1.0]:[&rate=1.1616489920397255" \ 626 | "]556.5552100585955,11[&height=8.828667314765534E-12,height_95%_HPD={" \ 627 | "0.0,2.9103830456733704E-11},height_median=0.0,height_range={0.0,3.725290298461914E-9}," \ 628 | "length=14628.206134120901,length_95%_HPD={94.62325431555337,39330.92845327759}," \ 629 | "length_median=2698.76238812456,length_range={84.29227912953411,3969567.691730257}]:" \ 630 | "[&rate=1.1616489920397255]2609.2691145732606)[&height=11972.847735373902,height_95%_HPD=" \ 631 | "{84.29227912953411,37091.0728580773},height_median=2609.2691145732606,height_range=" \ 632 | "{84.29227912953411,1575701.7833227757},length=2532.3352144701103,length_95%_HPD={" \ 633 | "0.004421725508564123,7620.200479995983},length_median=483.3085065544801,length_range={" \ 634 | "0.004421725508564123,488670.1513605751},posterior=0.674310091987735]:[&rate=" \ 635 | "0.14084529313827582]843.471284182473,13[&height=9.62737001763898E-12,height_95%_HPD={" \ 636 | "0.0,2.9103830456733704E-11},height_median=0.0,height_range={0.0,3.725290298461914E-9}," \ 637 | "length=16098.152887077857,length_95%_HPD={101.12152367712349,43253.00278986094}," \ 638 | "length_median=3021.1956203544178,length_range={87.52788789503245,3969567.691730257}]:" \ 639 | "[&rate=0.015281431734503132]3452.7403987557336)[&height=18854.825355500612,height_95%_HPD=" \ 640 | "{117.06952824319609,50863.21772980399},height_median=3452.7403987557336,height_range=" \ 641 | "{117.06952824319609,5241840.635889705},length=10179.981944576117,length_95%_HPD=" \ 642 | "{21.621494899670324,27419.87806782825},length_median=1701.737850154088,length_range=" \ 643 | "{21.621494899670324,2891063.76993455},posterior=1.0]:[&rate=0.22402647395508335]" \ 644 | "1955.592470806463)[&height=29034.807300076696,height_95%_HPD={173.60894978441502," \ 645 | "80127.4330780296},height_median=5408.332869562197,height_range={173.60894978441502," \ 646 | "7565197.594627731},length=16575.3792990243,length_95%_HPD={33.23044488203345,42503.93157259235}," \ 647 | "length_median=2600.4791495973714,length_range={33.23044488203345,9106986.442180987}," \ 648 | "posterior=1.0]:[&rate=0.40878488857536766]3025.8482303610635)[&height=45684.27028379785," \ 649 | "height_95%_HPD={206.83939466644847,123833.12145059655},height_median=8434.18109992326," \ 650 | "height_range={206.83939466644847,1.6023311824151888E7},length=18834.009102754226," \ 651 | "length_95%_HPD={12.551497256452194,44713.49388946092},length_median=2723.4761572134503," \ 652 | "length_range={12.551497256452194,8370840.365275718},posterior=0.9980002666311159]:[" \ 653 | "&rate=0.1260365850769451]3483.6026288094054,6[&height=8.559841366136245E-12," \ 654 | "height_95%_HPD={0.0,2.9103830456733704E-11},height_median=0.0,height_range={0.0," \ 655 | "3.725290298461914E-9},length=64263.38390382354,length_95%_HPD={242.88760407354857," \ 656 | "168923.3746203835},length_median=11621.922183927634,length_range={242.88760407354857," \ 657 | "2.4394152189427603E7}]:[&rate=0.15749022880313052]11917.783728732666)[&height=" \ 658 | "66462.97722328358,height_95%_HPD={242.8876040735488,172328.18950797373},height_median=" \ 659 | "11917.783728732666,height_range={242.8876040735488,2.4394152189427607E7},length=" \ 660 | "21074.24071127439,length_95%_HPD={9.33738591016754,65106.83779065257},length_median=" \ 661 | "3497.449465911657,length_range={9.33738591016754,3253753.156094387},posterior=" \ 662 | "0.9266764431409146]:[&rate=0.15749022880313052]1863.0826187841885,5[&height=" \ 663 | "8.909222646386704E-12,height_95%_HPD={0.0,2.9103830456733704E-11},height_median=0.0," \ 664 | "height_range={0.0,3.725290298461914E-9},length=86228.77947668775,length_95%_HPD={" \ 665 | "320.8279037754549,250680.98349718086},length_median=16191.236491985874,length_range={" \ 666 | "320.8279037754549,2.5385363199801262E7}]:[&rate=0.1764340946476647]13780.866347516854)[" \ 667 | "&height=68851.55714103008,height_95%_HPD={332.8967971018673,208360.19528088247}," \ 668 | "height_median=13780.866347516854,height_range={332.8967971018673,1.3209839631035523E7}," \ 669 | "length=36292.10334947041,length_95%_HPD={6.842269636452329,77005.81130131785}," \ 670 | "length_median=3986.097675025603,length_range={6.842269636452329,7690578.10700983}," \ 671 | "posterior=0.6665777896280496]:[&rate=0.06963566760197197]10620.834879287806)[&height=" \ 672 | "138844.64905308632,height_95%_HPD={474.13147831140884,367996.7683084475},height_median=" \ 673 | "24401.70122680466,height_range={474.13147831140884,3.1088350108564746E7},length=0.0,posterior=1.0]:0.0" 674 | tree = loads(nwk)[0] 675 | leafs = [n for n in tree.walk() if n.is_leaf] 676 | assert len(leafs[0].comments) == 2 677 | assert 'rate' in leafs[0].properties 678 | assert tree.newick == nwk 679 | 680 | 681 | @pytest.mark.slow 682 | def test_gtdb_tree(fixture_dir): 683 | tree = read(fixture_dir / 'ar53_r207.tree')[0] 684 | nodes = [node.name for node in tree.walk() if node.name] 685 | assert nodes[-9] == "'100.0:p__Undinarchaeota; c__Undinarchaeia; o__Undinarchaeales'" 686 | 687 | 688 | def test_mrbayes_tree(fixture_dir): 689 | tree = read(fixture_dir / 'mrbayes.nwk')[0] 690 | nodes = {node.name: node.properties for node in tree.walk() if node.name} 691 | assert nodes['1'] == { 692 | 'prob': '1.00000000e+00', 693 | 'prob_stddev': '0.00000000e+00', 694 | 'prob_range': '{1.00000000e+00,1.00000000e+00}', 695 | 'prob(percent)': '"100"', 696 | 'prob+-sd': '"100+-0"', 697 | 'length_mean': '1.32336084e-02', 698 | 'length_median': '1.32257600e-02', 699 | 'length_95%HPD': '{1.25875600e-02,1.38462600e-02}', 700 | } 701 | 702 | 703 | def test_mesquite(): 704 | tree = loads('((1:15.3,4:15.3):4.5,(3:12.7,(2:8.2,5:8.2):4.5):7.1)[%selected = on ] [% ] [% setBetweenBits = selected ];')[0]; 705 | assert {'1', '2', '3', '4', '5'} == {n.name for n in tree.walk() if n.name} 706 | 707 | --------------------------------------------------------------------------------