├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── main.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE.txt ├── Makefile ├── NOTICE.txt ├── README.md ├── docker-compose.yml ├── docs └── images │ └── Bitcoin SpamScope.jpg ├── pyproject.toml ├── src └── mailparser │ ├── __init__.py │ ├── __main__.py │ ├── const.py │ ├── core.py │ ├── exceptions.py │ ├── utils.py │ └── version.py ├── tests ├── mails │ ├── mail_malformed_1 │ ├── mail_malformed_2 │ ├── mail_malformed_3 │ ├── mail_outlook_1 │ ├── mail_test_1 │ ├── mail_test_10 │ ├── mail_test_11 │ ├── mail_test_12 │ ├── mail_test_13 │ ├── mail_test_14 │ ├── mail_test_15 │ ├── mail_test_16 │ ├── mail_test_17 │ ├── mail_test_2 │ ├── mail_test_3 │ ├── mail_test_4 │ ├── mail_test_5 │ ├── mail_test_6 │ ├── mail_test_7 │ ├── mail_test_8 │ └── mail_test_9 ├── test_mail_parser.py └── test_main.py └── uv.lock /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [fedelemantuano] 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. `import mailparser` 13 | 2. `mail = mailparser.parse_from_file(f)` 14 | 3. '....' 15 | 4. See error 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Raw mail** 21 | The raw mail to reproduce the behavior. 22 | You can use a `gist` like [this](https://gist.github.com/fedelemantuano/5dd702004c25a46b2bd60de21e67458e). 23 | The issues without raw mail will be closed. 24 | 25 | **Environment:** 26 | - OS: [e.g. Linux, Windows] 27 | - Docker: [yes or no] 28 | - mail-parser version [e.g. 3.6.0] 29 | 30 | **Additional context** 31 | Add any other context about the problem here (e.g. stack traceback error). 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Python application and Docker image CI 2 | 3 | on: 4 | push: 5 | branches: [ master, develop, feature/* ] 6 | tags: [ '*.*.*' ] # Enable pipeline on tag pushes 7 | pull_request: 8 | branches: [ master, develop ] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Install dependencies 28 | run: | 29 | curl -LsSf https://astral.sh/uv/install.sh | sh 30 | sudo apt-get -qq update 31 | sudo apt-get install -y libemail-outlook-message-perl 32 | uv sync 33 | export PERL_MM_USE_DEFAULT=1 34 | sudo cpan -f -i Email::Outlook::Message 35 | 36 | - name: Run tests 37 | env: 38 | PYTHONPATH: src 39 | run: | 40 | make test 41 | uv run mail-parser -v 42 | uv run mail-parser -h 43 | uv run mail-parser -f tests/mails/mail_malformed_3 -j 44 | cat tests/mails/mail_malformed_3 | uv run mail-parser -k -j 45 | 46 | - name: Run pre-commit 47 | run: | 48 | make pre-commit 49 | 50 | - name: Report to Coveralls 51 | if: matrix.python-version == '3.10' 52 | uses: coverallsapp/github-action@v2.2.3 53 | with: 54 | github-token: ${{ secrets.GITHUB_TOKEN }} 55 | 56 | - name: Build 57 | if: matrix.python-version == '3.10' 58 | run: | 59 | uv build 60 | 61 | - name: Upload artifacts 62 | if: matrix.python-version == '3.10' 63 | uses: actions/upload-artifact@v4 64 | with: 65 | name: build-artifacts 66 | path: | 67 | dist/mail_parser-* 68 | 69 | - name: Publish to PyPI 70 | if: matrix.python-version == '3.10' && startsWith(github.ref, 'refs/tags/') 71 | uses: pypa/gh-action-pypi-publish@v1.5.1 72 | with: 73 | user: ${{ secrets.PYPI_USERNAME }} 74 | password: ${{ secrets.PYPI_PASSWORD }} 75 | 76 | docker: 77 | if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/develop' || startsWith(github.ref, 'refs/tags/')) 78 | runs-on: ubuntu-latest 79 | needs: build 80 | steps: 81 | - uses: actions/checkout@v4 82 | 83 | - name: Download build artifacts 84 | uses: actions/download-artifact@v4 85 | with: 86 | name: build-artifacts 87 | path: dist/ 88 | 89 | - name: Set up Docker Buildx 90 | uses: docker/setup-buildx-action@v3 91 | 92 | - name: Log in to GitHub Container Registry 93 | uses: docker/login-action@v3 94 | with: 95 | registry: ghcr.io 96 | username: ${{ github.actor }} 97 | password: ${{ secrets.GITHUB_TOKEN }} 98 | 99 | - name: Log in to Docker Hub 100 | uses: docker/login-action@v3 101 | with: 102 | registry: docker.io 103 | username: ${{ secrets.DOCKER_USERNAME }} 104 | password: ${{ secrets.DOCKER_PASSWORD }} 105 | 106 | - name: Extract branch or tag name 107 | id: extract_ref 108 | run: | 109 | if [ -n "${GITHUB_HEAD_REF}" ]; then 110 | REF_NAME=${GITHUB_HEAD_REF} 111 | else 112 | REF_NAME=$(git describe --tags --exact-match 2>/dev/null || git rev-parse --abbrev-ref HEAD) 113 | fi 114 | echo "REF_NAME=${REF_NAME,,}" >> $GITHUB_ENV 115 | 116 | - name: Build and push Docker image on GitHub Container Registry 117 | run: | 118 | IMAGE_NAME=ghcr.io/ghcr.io/spamscope/mail-parser/mailparser 119 | if [[ $GITHUB_REF == refs/tags/* ]]; then 120 | TAG=${GITHUB_REF#refs/tags/} 121 | docker build \ 122 | --label "org.opencontainers.image.source=${{ github.repositoryUrl }}" \ 123 | --label "org.opencontainers.image.description=Easy way to pass from raw mail to Python object" \ 124 | --label "org.opencontainers.image.licenses=Apache-2.0" \ 125 | -t $IMAGE_NAME:$TAG \ 126 | -t $IMAGE_NAME:latest . 127 | docker push $IMAGE_NAME:$TAG 128 | docker push $IMAGE_NAME:latest 129 | else 130 | docker build \ 131 | --label "org.opencontainers.image.source=${{ github.repositoryUrl }}" \ 132 | --label "org.opencontainers.image.description=Easy way to pass from raw mail to Python object" \ 133 | --label "org.opencontainers.image.licenses=Apache-2.0" \ 134 | -t $IMAGE_NAME:develop . 135 | docker push $IMAGE_NAME:develop 136 | fi 137 | 138 | - name: Build and push Docker image on Docker Hub 139 | run: | 140 | IMAGE_NAME=docker.io/${{ secrets.DOCKER_USERNAME }}/spamscope-mail-parser 141 | if [[ $GITHUB_REF == refs/tags/* ]]; then 142 | TAG=${GITHUB_REF#refs/tags/} 143 | docker build \ 144 | --label "org.opencontainers.image.source=${{ github.repositoryUrl }}" \ 145 | --label "org.opencontainers.image.description=Easy way to pass from raw mail to Python object" \ 146 | --label "org.opencontainers.image.licenses=Apache-2.0" \ 147 | -t $IMAGE_NAME:$TAG \ 148 | -t $IMAGE_NAME:latest . 149 | docker push $IMAGE_NAME:$TAG 150 | docker push $IMAGE_NAME:latest 151 | else 152 | docker build \ 153 | --label "org.opencontainers.image.source=${{ github.repositoryUrl }}" \ 154 | --label "org.opencontainers.image.description=Easy way to pass from raw mail to Python object" \ 155 | --label "org.opencontainers.image.licenses=Apache-2.0" \ 156 | -t $IMAGE_NAME:develop . 157 | docker push $IMAGE_NAME:develop 158 | fi 159 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # python_template defaults 2 | /env/ 3 | /build/ 4 | /dist/ 5 | *.egg-info 6 | *.pyc 7 | /pip-wheel-metadata/ 8 | .pytest_cache/ 9 | coverage.xml 10 | coverage.json 11 | .coverage 12 | htmlcov/ 13 | .mypy_cache/ 14 | .eggs/ 15 | .*.tgt 16 | .idea/ 17 | .vscode/ 18 | junit.xml 19 | /venv/ 20 | /__pycache__/ 21 | /*.egg-info/ 22 | .DS_Store 23 | VERSION 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v5.0.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | args: ['--maxkb=5000'] 12 | - id: check-case-conflict 13 | - id: check-json 14 | - id: check-merge-conflict 15 | - id: detect-aws-credentials 16 | args: ["--allow-missing-credentials"] 17 | - id: detect-private-key 18 | - id: mixed-line-ending 19 | - id: check-ast 20 | 21 | - repo: https://github.com/astral-sh/ruff-pre-commit 22 | # Ruff version. 23 | rev: v0.7.3 24 | hooks: 25 | # Run the linter. 26 | - id: ruff 27 | args: [ --fix ] 28 | # Run the formatter. 29 | - id: ruff-format 30 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim-bookworm 2 | 3 | # Set environment variables 4 | 5 | # Don’t buffer stdout/stderr, don’t write .pyc files 6 | ENV PYTHONUNBUFFERED=1 7 | ENV PYTHONDONTWRITEBYTECODE=1 8 | 9 | ENV MAIL_PARSER_PATH=/app 10 | ENV BINARY_NAME="mail_parser-latest.tar.gz" 11 | 12 | # Copy the mail-parser binary from the build context 13 | COPY ./dist/*.tar.gz ${MAIL_PARSER_PATH}/${BINARY_NAME} 14 | 15 | # Install dependencies 16 | RUN apt-get -yqq update && \ 17 | apt-get -yqq --no-install-recommends install libemail-outlook-message-perl && \ 18 | apt-get clean && \ 19 | rm -rf /var/lib/apt/lists/* 20 | 21 | # Install the mail-parser package 22 | RUN useradd -m mailparser \ 23 | && chown mailparser:mailparser ${MAIL_PARSER_PATH} \ 24 | && pip install "${MAIL_PARSER_PATH}/${BINARY_NAME}" 25 | 26 | USER mailparser 27 | 28 | ENTRYPOINT ["mail-parser"] 29 | CMD ["-h"] 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help clean clean-build clean-test test lint format check install build release 2 | .DEFAULT_GOAL := help 3 | 4 | define PRINT_HELP_PYSCRIPT 5 | import re, sys 6 | 7 | for line in sys.stdin: 8 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 9 | if match: 10 | target, help = match.groups() 11 | print("%-20s %s" % (target, help)) 12 | endef 13 | export PRINT_HELP_PYSCRIPT 14 | 15 | help: ## show this help message 16 | @python3 -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 17 | 18 | install: ## install dependencies using uv 19 | uv sync 20 | 21 | clean-build: ## remove build artifacts 22 | find . -type d -name "build" -exec rm -rf {} + 23 | find . -type d -name "dist" -exec rm -rf {} + 24 | find . -type d -name "*.egg-info" -exec rm -rf {} + 25 | 26 | clean-test: ## remove test and coverage artifacts 27 | find . -type f -name "*.log" -delete 28 | find . -type f -name "coverage.xml" -delete 29 | find . -type f -name "junit.xml" -delete 30 | find . -type f -name ".coverage" -delete 31 | find . -type d -name ".pytest_cache" -exec rm -rf {} + 32 | find . -type d -name "htmlcov" -exec rm -rf {} + 33 | find . -type d -name ".mypy_cache" -exec rm -rf {} + 34 | find . -type d -name "__pycache__" -exec rm -rf {} + 35 | 36 | clean: clean-test clean-build ## remove all artifacts 37 | 38 | test: ## run tests 39 | uv run pytest 40 | 41 | lint: ## run linting with ruff 42 | uv run ruff check . 43 | 44 | format: ## format code with ruff 45 | uv run ruff format . 46 | 47 | check: lint test ## run linting and tests 48 | 49 | build: clean ## build package 50 | uv build 51 | 52 | pre-commit: ## run pre-commit hooks 53 | uv run pre-commit run --all-files 54 | 55 | release: build ## build and upload to PyPI 56 | uv run twine upload dist/* 57 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/) 2 | [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) 3 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser) 4 | 5 | 6 | ![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png) 7 | 8 | # mail-parser 9 | mail-parser goes beyond being just a simple wrapper for the Python Standard Library's [email module](https://docs.python.org/2/library/email.message.html). It seamlessly transforms raw emails into versatile Python objects that you can integrate effortlessly into your projects. As the cornerstone of [SpamScope](https://github.com/SpamScope/spamscope), mail-parser empowers you to handle emails with ease and efficiency. 10 | 11 | Additionally, mail-parser supports the parsing of Outlook email formats (.msg). To enable this functionality on Debian-based systems, simply install the necessary package: 12 | 13 | ``` 14 | $ apt-get install libemail-outlook-message-perl 15 | ``` 16 | 17 | For further details about the package, you can run: 18 | 19 | ``` 20 | $ apt-cache show libemail-outlook-message-perl 21 | ``` 22 | 23 | mail-parser is fully compatible with Python 3, ensuring modern performance and reliability. 24 | 25 | 26 | # Apache 2 Open Source License 27 | mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license. 28 | 29 | 30 | # Support the Future of mail-parser 31 | Every contribution fuels innovation! If you believe in a powerful and reliable email parsing tool, consider investing in mail-parser. Your donation directly supports ongoing development, ensuring that we continue providing a robust, cutting-edge solution for developers everywhere. 32 | 33 | **Invest in Innovation** 34 | By donating, you help us: 35 | - Enhance and expand features. 36 | - Maintain a secure and reliable project. 37 | - Continue offering a valuable tool to the community. 38 | 39 | [![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2) 40 | 41 | Or contribute with Bitcoin: 42 | 43 | 44 | Bitcoin 45 | 46 | 47 | **Bitcoin Address:** `bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32` 48 | 49 | Thank you for supporting the evolution of mail-parser! 50 | 51 | 52 | # mail-parser on Web 53 | Explore mail-parser on these platforms: 54 | 55 | - **[FreeBSD port](https://www.freshports.org/mail/py-mail-parser/)** 56 | - **[Arch User Repository](https://aur.archlinux.org/packages/mailparser/)** 57 | - **[REMnux](https://docs.remnux.org/discover-the-tools/analyze+documents/email+messages#mail-parser)** 58 | 59 | 60 | # Description 61 | mail-parser takes a raw email as input and converts it into a comprehensive Python object that mirrors the structure of an email as defined by the relevant RFCs. Each property of this object directly maps to standard [RFC headers](https://www.iana.org/assignments/message-headers/message-headers.xhtml) such as "From", "To", "Cc", "Bcc", "Subject", and more. 62 | 63 | In addition, the parser extracts supplementary components including: 64 | - Plain text and HTML bodies for versatile processing. 65 | - Attachments along with their metadata (e.g., filename, content type, encoding, and more). 66 | - Detailed diagnostics like timestamp conversions, defects indicating non-compliant header formats, and custom header management (using underscore substitutions for hyphenated header names). 67 | 68 | Moreover, each header and property is accessible in multiple formats: 69 | - A native Python value for immediate use. 70 | - A raw string to retain original formatting. 71 | - A JSON representation for simplified integration with other tools or services. 72 | 73 | This rich parsing capability makes mail-parser a robust tool for email processing, enabling developers to handle, analyze, and even troubleshoot raw email data with comprehensive detail. 74 | 75 | - bcc 76 | - cc 77 | - date 78 | - delivered_to 79 | - from\_ (not `from` because is a keyword of Python) 80 | - message_id 81 | - received 82 | - reply_to 83 | - subject 84 | - to 85 | 86 | There are other properties to get: 87 | - body 88 | - body html 89 | - body plain 90 | - headers 91 | - attachments 92 | - sender IP address 93 | - to domains 94 | - timezone 95 | 96 | The `attachments` property is a list of objects. Every object has the following keys: 97 | - binary: it's true if the attachment is a binary 98 | - charset 99 | - content_transfer_encoding 100 | - content-disposition 101 | - content-id 102 | - filename 103 | - mail_content_type 104 | - payload: attachment payload in base64 105 | 106 | To get custom headers you should replace "-" with "\_". 107 | Example for header `X-MSMail-Priority`: 108 | 109 | ``` 110 | $ mail.X_MSMail_Priority 111 | ``` 112 | 113 | The `received` header is parsed and splitted in hop. The fields supported are: 114 | - by 115 | - date 116 | - date_utc 117 | - delay (between two hop) 118 | - envelope_from 119 | - envelope_sender 120 | - for 121 | - from 122 | - hop 123 | - with 124 | 125 | 126 | > **Important:** mail-parser can detect defects in mail. 127 | - [defects](https://docs.python.org/2/library/email.message.html#email.message.Message.defects): mail with some not compliance RFC part 128 | 129 | All properties have a JSON and raw property that you can get with: 130 | - name_json 131 | - name_raw 132 | 133 | Example: 134 | 135 | ``` 136 | $ mail.to (Python object) 137 | $ mail.to_json (JSON) 138 | $ mail.to_raw (raw header) 139 | ``` 140 | 141 | The command line tool use the JSON format. 142 | 143 | 144 | ## Defects and Their Impact on Email Security 145 | Email defects, such as malformed boundaries, can be exploited by malicious actors to bypass antispam filters. For instance, a poorly formatted boundary in an email might conceal an illegitimate epilogue that contains hidden malicious content, such as malware payloads or phishing links. 146 | 147 | mail-parser is built to detect these structural irregularities, ensuring that even subtle anomalies are captured and analyzed. By identifying these defects, the library provides an early warning system, allowing you to: 148 | 149 | - Uncover hidden parts of an email that may be deliberately obfuscated. 150 | - Diagnose potential security threats stemming from non-standard email formatting. 151 | - Facilitate deeper forensic analysis of suspicious emails where the epilogue might carry harmful code or deceitful information. 152 | 153 | This robust defect detection mechanism is essential for maintaining the integrity of your email processing systems and enhancing overall cybersecurity. 154 | 155 | 156 | # Authors 157 | 158 | ## Main Author 159 | **Fedele Mantuano**: [LinkedIn](https://www.linkedin.com/in/fmantuano/) 160 | 161 | 162 | # Installation 163 | To install mail-parser, follow these simple steps: 164 | 165 | 1. Make sure you have Python 3 installed on your system. 166 | 2. Open your terminal or command prompt. 167 | 3. Run the following command to install mail-parser from PyPI: 168 | 169 | ```bash 170 | $ pip install mail-parser 171 | ``` 172 | 173 | 4. (Optional) To verify the installation, you can run: 174 | 175 | ```bash 176 | $ pip show mail-parser 177 | ``` 178 | 179 | If you plan to contribute or develop further, consider setting up a `uv` environment and syncing all development dependencies: 180 | 181 | ```bash 182 | $ git clone https://github.com/SpamScope/mail-parser.git 183 | $ cd mail-parser 184 | $ uv sync 185 | ``` 186 | 187 | With these commands, you’ll have all dependencies installed inside your virtual environment. 188 | 189 | For more detailed instructions about `uv`, please refer to the [uv documentation](https://docs.astral.sh/uv/). 190 | 191 | 192 | # Usage in a project 193 | Import `mailparser` module: 194 | 195 | ``` 196 | import mailparser 197 | 198 | mail = mailparser.parse_from_bytes(byte_mail) 199 | mail = mailparser.parse_from_file(f) 200 | mail = mailparser.parse_from_file_msg(outlook_mail) 201 | mail = mailparser.parse_from_file_obj(fp) 202 | mail = mailparser.parse_from_string(raw_mail) 203 | ``` 204 | 205 | Then you can get all parts 206 | 207 | ``` 208 | mail.attachments: list of all attachments 209 | mail.body 210 | mail.date: datetime object in UTC 211 | mail.defects: defect RFC not compliance 212 | mail.defects_categories: only defects categories 213 | mail.delivered_to 214 | mail.from_ 215 | mail.get_server_ipaddress(trust="my_server_mail_trust") 216 | mail.headers 217 | mail.mail: tokenized mail in a object 218 | mail.message: email.message.Message object 219 | mail.message_as_string: message as string 220 | mail.message_id 221 | mail.received 222 | mail.subject 223 | mail.text_plain: only text plain mail parts in a list 224 | mail.text_html: only text html mail parts in a list 225 | mail.text_not_managed: all not managed text (check the warning logs to find content subtype) 226 | mail.to 227 | mail.to_domains 228 | mail.timezone: returns the timezone, offset from UTC 229 | mail.mail_partial: returns only the mains parts of emails 230 | ``` 231 | 232 | It's possible to write the attachments on disk with the method: 233 | 234 | ``` 235 | mail.write_attachments(base_path) 236 | ``` 237 | 238 | # Usage from command-line 239 | If you installed mailparser with `pip` or `setup.py` you can use it with command-line. 240 | 241 | These are all swithes: 242 | 243 | ``` 244 | usage: mailparser [-h] (-f FILE | -s STRING | -k) 245 | [-l {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET}] [-j] [-b] 246 | [-a] [-r] [-t] [-dt] [-m] [-u] [-c] [-d] [-o] 247 | [-i Trust mail server string] [-p] [-z] [-v] 248 | 249 | Wrapper for email Python Standard Library 250 | 251 | optional arguments: 252 | -h, --help show this help message and exit 253 | -f FILE, --file FILE Raw email file (default: None) 254 | -s STRING, --string STRING 255 | Raw email string (default: None) 256 | -k, --stdin Enable parsing from stdin (default: False) 257 | -l {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET}, --log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET} 258 | Set log level (default: WARNING) 259 | -j, --json Show the JSON of parsed mail (default: False) 260 | -b, --body Print the body of mail (default: False) 261 | -a, --attachments Print the attachments of mail (default: False) 262 | -r, --headers Print the headers of mail (default: False) 263 | -t, --to Print the to of mail (default: False) 264 | -dt, --delivered-to Print the delivered-to of mail (default: False) 265 | -m, --from Print the from of mail (default: False) 266 | -u, --subject Print the subject of mail (default: False) 267 | -c, --receiveds Print all receiveds of mail (default: False) 268 | -d, --defects Print the defects of mail (default: False) 269 | -o, --outlook Analyze Outlook msg (default: False) 270 | -i Trust mail server string, --senderip Trust mail server string 271 | Extract a reliable sender IP address heuristically 272 | (default: None) 273 | -p, --mail-hash Print mail fingerprints without headers (default: 274 | False) 275 | -z, --attachments-hash 276 | Print attachments with fingerprints (default: False) 277 | -sa, --store-attachments 278 | Store attachments on disk (default: False) 279 | -ap ATTACHMENTS_PATH, --attachments-path ATTACHMENTS_PATH 280 | Path where store attachments (default: /tmp) 281 | -v, --version show program's version number and exit 282 | 283 | It takes as input a raw mail and generates a parsed object. 284 | ``` 285 | 286 | Example: 287 | 288 | ```shell 289 | $ mailparser -f example_mail -j 290 | ``` 291 | 292 | This example will show you the tokenized mail in a JSON pretty format. 293 | 294 | From [raw mail](https://gist.github.com/fedelemantuano/5dd702004c25a46b2bd60de21e67458e) to 295 | [parsed mail](https://gist.github.com/fedelemantuano/e958aa2813c898db9d2d09469db8e6f6). 296 | 297 | 298 | # Exceptions 299 | Exceptions hierarchy of mail-parser: 300 | 301 | ``` 302 | MailParserError: Base MailParser Exception 303 | | 304 | \── MailParserOutlookError: Raised with Outlook integration errors 305 | | 306 | \── MailParserEnvironmentError: Raised when the environment is not correct 307 | | 308 | \── MailParserOSError: Raised when there is an OS error 309 | | 310 | \── MailParserReceivedParsingError: Raised when a received header cannot be parsed 311 | ``` 312 | 313 | # fmantuano/spamscope-mail-parser 314 | This Docker image encapsulates the functionality of `mail-parser`. You can find the [official image on Docker Hub](https://hub.docker.com/r/fmantuano/spamscope-mail-parser/). 315 | 316 | ## Running the Docker Image 317 | 318 | After installing Docker, you can run the container with the following command: 319 | 320 | ```shell 321 | sudo docker run -it --rm -v ~/mails:/mails fmantuano/spamscope-mail-parser 322 | ``` 323 | 324 | This command mounts your local `~/mails` directory into the container at `/mails`. The image runs `mail-parser` in its default mode, but you can pass any additional options as needed. 325 | 326 | ## Using docker-compose 327 | 328 | A `docker-compose.yml` file is also provided. From the directory containing the file, run: 329 | 330 | ```shell 331 | sudo docker-compose up 332 | ``` 333 | 334 | The configuration in the `docker-compose.yml` file includes: 335 | - Mounting your local `~/mails` directory (read-only) into the container at `/mails`. 336 | - Running a command-line test example to verify functionality. 337 | 338 | Review the `docker-compose.yml` file to customize the launch parameters to suit your needs. 339 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | 5 | mailparser: 6 | image: fmantuano/spamscope-mail-parser:develop 7 | command: --json -f /mails/mail_test_1 8 | container_name: mailparser 9 | volumes: 10 | - ~/mails/:/mails/:ro 11 | -------------------------------------------------------------------------------- /docs/images/Bitcoin SpamScope.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpamScope/mail-parser/1782bf724190d15c5063b6cc079637599ceb61ca/docs/images/Bitcoin SpamScope.jpg -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mail-parser" 3 | dynamic = ["version"] 4 | description = "A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object." 5 | license = "Apache-2.0" 6 | readme = "README.md" 7 | requires-python = ">=3.9,<3.14" 8 | keywords = ["email", "mail", "parser", "security", "forensics", "threat detection", "phishing", "malware", "spam"] 9 | classifiers = [ 10 | "Natural Language :: English", 11 | "Operating System :: Unix", 12 | "Operating System :: MacOS", 13 | "Operating System :: Microsoft :: Windows", 14 | "Programming Language :: Python", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.9", 17 | "Programming Language :: Python :: 3.10", 18 | "Programming Language :: Python :: 3.11", 19 | "Programming Language :: Python :: 3.12", 20 | "Programming Language :: Python :: 3.13", 21 | ] 22 | authors = [ 23 | { name = "Fedele Mantuano", email = "mantuano.fedele@gmail.com" } 24 | ] 25 | maintainers = [ 26 | { name = "Fedele Mantuano", email = "mantuano.fedele@gmail.com" } 27 | ] 28 | dependencies = [ 29 | "six>=1.17.0", 30 | ] 31 | 32 | [dependency-groups] 33 | dev = [ 34 | "build>=1.2.2.post1", 35 | "hatch>=1.14.0", 36 | "pre-commit>=4.0.1", 37 | "ruff>=0.9.9", 38 | "twine>=6.0.1", 39 | "wheel>=0.45.1", 40 | ] 41 | test = [ 42 | "coverage>=7.6.10", 43 | "pytest>=8.3.4", 44 | "pytest-cov>=6.0.0", 45 | "pytest-mock>=3.14.0", 46 | "pytest-ordering>=0.6", 47 | ] 48 | 49 | [build-system] 50 | requires = ["hatchling"] 51 | build-backend = "hatchling.build" 52 | 53 | [tool.uv] 54 | default-groups = ["dev", "test"] 55 | 56 | [tool.hatch.build.targets.wheel] 57 | packages = ["src/mailparser"] 58 | 59 | [tool.hatch.version] 60 | path = "src/mailparser/version.py" 61 | 62 | [project.scripts] 63 | mail-parser = "mailparser.__main__:main" 64 | 65 | [tool.pytest.ini_options] 66 | testpaths = "tests" 67 | markers = [ 68 | "integration: integration tests", 69 | ] 70 | addopts = """ 71 | --strict-markers 72 | --strict-config 73 | -ra 74 | --cov=src 75 | --cov=tests 76 | --cov-report=term 77 | --cov-branch 78 | --cov-report=xml 79 | --cov-report=html 80 | --junitxml=junit.xml 81 | --verbose 82 | """ 83 | 84 | [tool.ruff.lint] 85 | select = [ 86 | # pycodestyle 87 | "E", 88 | # pyflakes 89 | "F", 90 | # pyupgrade 91 | # "UP", 92 | # flask8-bugbear 93 | # "B", 94 | # flake8-simplify 95 | # "SIM", 96 | # isort 97 | "I", 98 | # flask8-bandit 99 | # "S", 100 | # flake8-pytest-style 101 | # "PT", 102 | # flake8-annotations 103 | # "ANN", 104 | ] 105 | -------------------------------------------------------------------------------- /src/mailparser/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | from mailparser.core import ( 20 | MailParser, 21 | parse_from_bytes, 22 | parse_from_file, 23 | parse_from_file_msg, 24 | parse_from_file_obj, 25 | parse_from_string, 26 | ) 27 | from mailparser.utils import get_header 28 | 29 | __all__ = [ 30 | "MailParser", 31 | "parse_from_bytes", 32 | "parse_from_file", 33 | "parse_from_file_msg", 34 | "parse_from_file_obj", 35 | "parse_from_string", 36 | "get_header", 37 | ] 38 | -------------------------------------------------------------------------------- /src/mailparser/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | import mailparser 24 | from mailparser.exceptions import MailParserOutlookError 25 | from mailparser.utils import ( 26 | custom_log, 27 | print_attachments, 28 | print_mail_fingerprints, 29 | safe_print, 30 | write_attachments, 31 | ) 32 | from mailparser.version import __version__ 33 | 34 | log = logging.getLogger("mailparser") 35 | 36 | 37 | def get_args(): 38 | """ 39 | Get arguments from command line. 40 | :return: argparse.ArgumentParser 41 | :rtype: argparse.ArgumentParser 42 | """ 43 | parser = argparse.ArgumentParser( 44 | description="Wrapper for email Python Standard Library", 45 | epilog="It takes as input a raw mail and generates a parsed object.", 46 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 47 | ) 48 | 49 | parsing_group = parser.add_mutually_exclusive_group(required=True) 50 | parsing_group.add_argument("-f", "--file", dest="file", help="Raw email file") 51 | parsing_group.add_argument("-s", "--string", dest="string", help="Raw email string") 52 | parsing_group.add_argument( 53 | "-k", 54 | "--stdin", 55 | dest="stdin", 56 | action="store_true", 57 | help="Enable parsing from stdin", 58 | ) 59 | 60 | parser.add_argument( 61 | "-l", 62 | "--log-level", 63 | dest="log_level", 64 | default="WARNING", 65 | choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], 66 | help="Set log level", 67 | ) 68 | 69 | parser.add_argument( 70 | "-j", 71 | "--json", 72 | dest="json", 73 | action="store_true", 74 | help="Show the JSON of parsed mail", 75 | ) 76 | 77 | parser.add_argument( 78 | "-b", "--body", dest="body", action="store_true", help="Print the body of mail" 79 | ) 80 | 81 | parser.add_argument( 82 | "-a", 83 | "--attachments", 84 | dest="attachments", 85 | action="store_true", 86 | help="Print the attachments of mail", 87 | ) 88 | 89 | parser.add_argument( 90 | "-r", 91 | "--headers", 92 | dest="headers", 93 | action="store_true", 94 | help="Print the headers of mail", 95 | ) 96 | 97 | parser.add_argument( 98 | "-t", "--to", dest="to", action="store_true", help="Print the to of mail" 99 | ) 100 | 101 | parser.add_argument( 102 | "-dt", 103 | "--delivered-to", 104 | dest="delivered_to", 105 | action="store_true", 106 | help="Print the delivered-to of mail", 107 | ) 108 | 109 | parser.add_argument( 110 | "-m", "--from", dest="from_", action="store_true", help="Print the from of mail" 111 | ) 112 | 113 | parser.add_argument( 114 | "-u", 115 | "--subject", 116 | dest="subject", 117 | action="store_true", 118 | help="Print the subject of mail", 119 | ) 120 | 121 | parser.add_argument( 122 | "-c", 123 | "--receiveds", 124 | dest="receiveds", 125 | action="store_true", 126 | help="Print all receiveds of mail", 127 | ) 128 | 129 | parser.add_argument( 130 | "-d", 131 | "--defects", 132 | dest="defects", 133 | action="store_true", 134 | help="Print the defects of mail", 135 | ) 136 | 137 | parser.add_argument( 138 | "-o", 139 | "--outlook", 140 | dest="outlook", 141 | action="store_true", 142 | help="Analyze Outlook msg", 143 | ) 144 | 145 | parser.add_argument( 146 | "-i", 147 | "--senderip", 148 | dest="senderip", 149 | metavar="Trust mail server string", 150 | help="Extract a reliable sender IP address heuristically", 151 | ) 152 | 153 | parser.add_argument( 154 | "-p", 155 | "--mail-hash", 156 | dest="mail_hash", 157 | action="store_true", 158 | help="Print mail fingerprints without headers", 159 | ) 160 | 161 | parser.add_argument( 162 | "-z", 163 | "--attachments-hash", 164 | dest="attachments_hash", 165 | action="store_true", 166 | help="Print attachments with fingerprints", 167 | ) 168 | 169 | parser.add_argument( 170 | "-sa", 171 | "--store-attachments", 172 | dest="store_attachments", 173 | action="store_true", 174 | help="Store attachments on disk", 175 | ) 176 | 177 | parser.add_argument( 178 | "-ap", 179 | "--attachments-path", 180 | dest="attachments_path", 181 | default="/tmp", 182 | help="Path where store attachments", 183 | ) 184 | 185 | parser.add_argument( 186 | "-v", "--version", action="version", version=f"%(prog)s {__version__}" 187 | ) 188 | 189 | return parser 190 | 191 | 192 | def main(): 193 | """ 194 | Main function. 195 | """ 196 | args = get_args().parse_args() 197 | log = custom_log(level=args.log_level, name="mailparser") 198 | 199 | try: 200 | parser = get_parser(args) 201 | process_output(args, parser) 202 | except Exception as e: 203 | log.exception(f"An error occurred: {e}") 204 | sys.exit(1) 205 | 206 | 207 | def get_parser(args): 208 | """ 209 | Get the correct parser based on the input source. 210 | :param args: argparse.Namespace 211 | :type args: argparse.Namespace 212 | :return: MailParser 213 | :rtype: mailparser.core.MailParser 214 | """ 215 | if args.file: 216 | return parse_file(args) 217 | elif args.string: 218 | log.debug("Start analysis by string mail") 219 | return mailparser.parse_from_string(args.string) 220 | elif args.stdin: 221 | return parse_stdin(args) 222 | else: 223 | raise ValueError("No input source provided") 224 | 225 | 226 | def parse_file(args): 227 | """ 228 | Parse the file based on the arguments provided. 229 | :param args: argparse.Namespace 230 | :type args: argparse.Namespace 231 | :return: MailParser 232 | :rtype: mailparser.core.MailParser 233 | """ 234 | log.debug("Start analysis by file mail") 235 | if args.outlook: 236 | log.debug("Start analysis by Outlook msg") 237 | return mailparser.parse_from_file_msg(args.file) 238 | else: 239 | log.debug("Start analysis by raw mail") 240 | return mailparser.parse_from_file(args.file) 241 | 242 | 243 | def parse_stdin(args): 244 | """ 245 | Parse the stdin based on the arguments provided. 246 | :param args: argparse.Namespace 247 | :type args: argparse.Namespace 248 | :return: MailParser 249 | :rtype: mailparser.core.MailParser 250 | """ 251 | log.debug("Start analysis by stdin mail") 252 | if args.outlook: 253 | raise MailParserOutlookError("You can't use stdin with msg Outlook") 254 | return mailparser.parse_from_file_obj(sys.stdin) 255 | 256 | 257 | def process_output(args, parser): 258 | """ 259 | Process the output based on the arguments provided. 260 | :param args: argparse.Namespace 261 | :type args: argparse.Namespace 262 | :param parser: MailParser 263 | :type parser: mailparser.core.MailParser 264 | :param log: logger 265 | :type log: logging.Logger 266 | """ 267 | if args.json: 268 | safe_print(parser.mail_json) 269 | 270 | if args.body: 271 | safe_print(parser.body) 272 | 273 | if args.headers: 274 | safe_print(parser.headers_json) 275 | 276 | if args.to: 277 | safe_print(parser.to_json) 278 | 279 | if args.delivered_to: 280 | safe_print(parser.delivered_to_json) 281 | 282 | if args.from_: 283 | safe_print(parser.from_json) 284 | 285 | if args.subject: 286 | safe_print(parser.subject) 287 | 288 | if args.receiveds: 289 | safe_print(parser.received_json) 290 | 291 | if args.defects: 292 | print_defects(parser) 293 | 294 | if args.senderip: 295 | print_sender_ip(parser, args) 296 | 297 | if args.attachments or args.attachments_hash: 298 | print_attachments_details(parser, args) 299 | 300 | if args.mail_hash: 301 | log.debug("Printing also mail fingerprints") 302 | print_mail_fingerprints(parser.body.encode("utf-8")) 303 | 304 | if args.store_attachments: 305 | log.debug("Store attachments on disk") 306 | write_attachments(parser.attachments, args.attachments_path) 307 | 308 | 309 | def print_defects(parser): 310 | """ 311 | Print email defects. 312 | :param parser: MailParser 313 | :type parser: mailparser.core.MailParser 314 | """ 315 | log.debug("Printing defects") 316 | for defect in parser.defects_categories: 317 | safe_print(defect) 318 | 319 | 320 | def print_sender_ip(parser, args): 321 | """ 322 | Print sender IP address. 323 | :param parser: MailParser 324 | :type parser: mailparser.core.MailParser 325 | :param args: argparse.Namespace 326 | :type args: argparse.Namespace 327 | """ 328 | log.debug("Printing sender IP") 329 | sender_ip = parser.get_server_ipaddress(args.senderip) 330 | safe_print(sender_ip if sender_ip else "Not Found") 331 | 332 | 333 | def print_attachments_details(parser, args): 334 | """ 335 | Print attachments details. 336 | :param parser: MailParser 337 | :type parser: mailparser.core.MailParser 338 | :param args: argparse.Namespace 339 | :type args: argparse.Namespace 340 | """ 341 | log.debug("Printing attachments details") 342 | print_attachments(parser.attachments, args.attachments_hash) 343 | 344 | 345 | if __name__ == "__main__": # pragma: no cover 346 | main() 347 | -------------------------------------------------------------------------------- /src/mailparser/const.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2018 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import re 20 | 21 | REGXIP = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}") 22 | 23 | JUNK_PATTERN = r"[ \(\)\[\]\t\n]+" 24 | 25 | # Patterns for receiveds 26 | RECEIVED_PATTERNS = [ 27 | # each pattern handles matching a single clause 28 | # need to exclude withs followed by cipher (e.g., google); (?! cipher) 29 | # TODO: ideally would do negative matching for with in parens 30 | # need the beginning or space to differentiate from envelope-from 31 | ( 32 | r"(?:(?:^|\s)from\s+(?P.+?)(?:\s*[(]?" 33 | r"envelope-from|\s*[(]?envelope-sender|\s+" 34 | r"by|\s+with(?! cipher)|\s+id|\s+for|\s+via|;))" 35 | ), 36 | # need to make sure envelope-from comes before from to prevent mismatches 37 | # envelope-from and -sender seem to optionally have space and/or 38 | # ( before them other clauses must have whitespace before 39 | ( 40 | r"(?:[^-\.]by\s+(?P.+?)(?:\s*[(]?envelope-from|\s*" 41 | r"[(]?envelope-sender|\s+from|\s+with" 42 | r"(?! cipher)|\s+id|\s+for|\s+via|;))" 43 | ), 44 | ( 45 | r"(?:with(?! cipher)\s+(?P.+?)(?:\s*[(]?envelope-from|\s*[(]?" 46 | r"envelope-sender|\s+from|\s+by|\s+id|\s+for|\s+via|;))" 47 | ), 48 | ( 49 | r"[^\w\.](?:id\s+(?P.+?)(?:\s*[(]?envelope-from|\s*" 50 | r"[(]?envelope-sender|\s+from|\s+by|\s+with" 51 | r"(?! cipher)|\s+for|\s+via|;))" 52 | ), 53 | ( 54 | r"(?:for\s+(?P.+?)(?:\s*[(]?envelope-from|\s*[(]?" 55 | r"envelope-sender|\s+from|\s+by|\s+with" 56 | r"(?! cipher)|\s+id|\s+via|;))" 57 | ), 58 | ( 59 | r"(?:via\s+(?P.+?)(?:\s*[(]?" 60 | r"envelope-from|\s*[(]?envelope-sender|\s+" 61 | r"from|\s+by|\s+id|\s+for|\s+with(?! cipher)|;))" 62 | ), 63 | # assumes emails are always inside <> 64 | r"(?:envelope-from\s+<(?P.+?)>)", 65 | r"(?:envelope-sender\s+<(?P.+?)>)", 66 | # datetime comes after ; at the end 67 | r";\s*(?P.*)", 68 | # sendgrid datetime 69 | ( 70 | r"(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:" 71 | r"\d{2}\.\d{9} \+0000 UTC) m=\+\d+\.\d+" 72 | ), 73 | ] 74 | 75 | RECEIVED_COMPILED_LIST = [re.compile(i, re.I | re.DOTALL) for i in RECEIVED_PATTERNS] 76 | 77 | EPILOGUE_DEFECTS = {"StartBoundaryNotFoundDefect"} 78 | 79 | ADDRESSES_HEADERS = set(["bcc", "cc", "delivered-to", "from", "reply-to", "to"]) 80 | 81 | # These parts are always returned 82 | OTHERS_PARTS = set( 83 | [ 84 | "attachments", 85 | "body", 86 | "date", 87 | "message-id", 88 | "received", 89 | "subject", 90 | "timezone", 91 | "to_domains", 92 | "user-agent", 93 | "x-mailer", 94 | "x-original-to", 95 | ] 96 | ) 97 | -------------------------------------------------------------------------------- /src/mailparser/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import base64 20 | import email 21 | import ipaddress 22 | import json 23 | import logging 24 | import os 25 | 26 | import six 27 | 28 | from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP 29 | from mailparser.exceptions import MailParserEnvironmentError 30 | from mailparser.utils import ( 31 | convert_mail_date, 32 | decode_header_part, 33 | find_between, 34 | get_header, 35 | get_mail_keys, 36 | get_to_domains, 37 | msgconvert, 38 | ported_open, 39 | ported_string, 40 | random_string, 41 | receiveds_parsing, 42 | write_attachments, 43 | ) 44 | 45 | log = logging.getLogger(__name__) 46 | 47 | 48 | def parse_from_file_obj(fp): 49 | """ 50 | Parsing email from a file-like object. 51 | 52 | Args: 53 | fp (file-like object): file-like object of raw email 54 | 55 | Returns: 56 | Instance of MailParser with raw email parsed 57 | """ 58 | return MailParser.from_file_obj(fp) 59 | 60 | 61 | def parse_from_file(fp): 62 | """ 63 | Parsing email from file. 64 | 65 | Args: 66 | fp (string): file path of raw email 67 | 68 | Returns: 69 | Instance of MailParser with raw email parsed 70 | """ 71 | return MailParser.from_file(fp) 72 | 73 | 74 | def parse_from_file_msg(fp): 75 | """ 76 | Parsing email from file Outlook msg. 77 | 78 | Args: 79 | fp (string): file path of raw Outlook email 80 | 81 | Returns: 82 | Instance of MailParser with raw email parsed 83 | """ 84 | return MailParser.from_file_msg(fp) 85 | 86 | 87 | def parse_from_string(s): 88 | """ 89 | Parsing email from string. 90 | 91 | Args: 92 | s (string): raw email 93 | 94 | Returns: 95 | Instance of MailParser with raw email parsed 96 | """ 97 | return MailParser.from_string(s) 98 | 99 | 100 | def parse_from_bytes(bt): 101 | """ 102 | Parsing email from bytes. Only for Python 3 103 | 104 | Args: 105 | bt (bytes-like object): raw email as bytes-like object 106 | 107 | Returns: 108 | Instance of MailParser with raw email parsed 109 | """ 110 | return MailParser.from_bytes(bt) 111 | 112 | 113 | class MailParser: 114 | """ 115 | MailParser package provides a standard parser that understands 116 | most email document structures like official email package. 117 | MailParser handles the encoding of email and split the raw email for you. 118 | 119 | Headers: 120 | https://www.iana.org/assignments/message-headers/message-headers.xhtml 121 | """ 122 | 123 | def __init__(self, message=None): 124 | """ 125 | Init a new object from a message object structure. 126 | """ 127 | self._message = message 128 | log.debug("All headers of emails: {}".format(", ".join(message.keys()))) 129 | self.parse() 130 | 131 | def __str__(self): 132 | if self.message: 133 | return self.subject 134 | else: 135 | return six.text_type() 136 | 137 | @classmethod 138 | def from_file_obj(cls, fp): 139 | """ 140 | Init a new object from a file-like object. 141 | Not for Outlook msg. 142 | 143 | Args: 144 | fp (file-like object): file-like object of raw email 145 | 146 | Returns: 147 | Instance of MailParser 148 | """ 149 | log.debug("Parsing email from file object") 150 | try: 151 | fp.seek(0) 152 | except OSError: 153 | # When stdout is a TTY it's a character device 154 | # and it's not seekable, you cannot seek in a TTY. 155 | pass 156 | finally: 157 | s = fp.read() 158 | 159 | return cls.from_string(s) 160 | 161 | @classmethod 162 | def from_file(cls, fp, is_outlook=False): 163 | """ 164 | Init a new object from a file path. 165 | 166 | Args: 167 | fp (string): file path of raw email 168 | is_outlook (boolean): if True is an Outlook email 169 | 170 | Returns: 171 | Instance of MailParser 172 | """ 173 | log.debug(f"Parsing email from file {fp!r}") 174 | 175 | with ported_open(fp) as f: 176 | message = email.message_from_file(f) 177 | 178 | if is_outlook: 179 | log.debug(f"Removing temp converted Outlook email {fp!r}") 180 | os.remove(fp) 181 | 182 | return cls(message) 183 | 184 | @classmethod 185 | def from_file_msg(cls, fp): 186 | """ 187 | Init a new object from a Outlook message file, 188 | mime type: application/vnd.ms-outlook 189 | 190 | Args: 191 | fp (string): file path of raw Outlook email 192 | 193 | Returns: 194 | Instance of MailParser 195 | """ 196 | log.debug("Parsing email from file Outlook") 197 | f, _ = msgconvert(fp) 198 | return cls.from_file(f, True) 199 | 200 | @classmethod 201 | def from_string(cls, s): 202 | """ 203 | Init a new object from a string. 204 | 205 | Args: 206 | s (string): raw email 207 | 208 | Returns: 209 | Instance of MailParser 210 | """ 211 | 212 | log.debug("Parsing email from string") 213 | message = email.message_from_string(s) 214 | return cls(message) 215 | 216 | @classmethod 217 | def from_bytes(cls, bt): 218 | """ 219 | Init a new object from bytes. 220 | 221 | Args: 222 | bt (bytes-like object): raw email as bytes-like object 223 | 224 | Returns: 225 | Instance of MailParser 226 | """ 227 | log.debug("Parsing email from bytes") 228 | if six.PY2: 229 | raise MailParserEnvironmentError( 230 | "Parsing from bytes is valid only for Python 3.x version" 231 | ) 232 | message = email.message_from_bytes(bt) 233 | return cls(message) 234 | 235 | def _reset(self): 236 | """ 237 | Reset the state of mail object. 238 | """ 239 | log.debug("Reset all variables") 240 | 241 | self._attachments = [] 242 | self._text_plain = [] 243 | self._text_html = [] 244 | self._text_not_managed = [] 245 | self._defects = [] 246 | self._defects_categories = set() 247 | self._has_defects = False 248 | 249 | def _append_defects(self, part, part_content_type): 250 | """ 251 | Add new defects and defects categories to object attributes. 252 | 253 | The defects are a list of all the problems found 254 | when parsing this message. 255 | 256 | Args: 257 | part (string): mail part 258 | part_content_type (string): content type of part 259 | """ 260 | 261 | part_defects = {} 262 | 263 | for e in part.defects: 264 | defects = f"{e.__class__.__name__}: {e.__doc__}" 265 | self._defects_categories.add(e.__class__.__name__) 266 | part_defects.setdefault(part_content_type, []).append(defects) 267 | log.debug(f"Added defect {defects!r}") 268 | 269 | # Tag mail with defect 270 | if part_defects: 271 | self._has_defects = True 272 | 273 | # Save all defects 274 | self._defects.append(part_defects) 275 | 276 | def _make_mail(self, complete=True): 277 | """ 278 | This method assigns the right values to all tokens of email. 279 | Returns a parsed object 280 | 281 | Keyword Arguments: 282 | complete {bool} -- If True returns all mails parts 283 | (default: {True}) 284 | 285 | Returns: 286 | dict -- Parsed email object 287 | """ 288 | 289 | mail = {} 290 | keys = get_mail_keys(self.message, complete) 291 | 292 | for i in keys: 293 | log.debug(f"Getting header or part {i!r}") 294 | value = getattr(self, i) 295 | if value: 296 | mail[i] = value 297 | 298 | # add defects 299 | mail["has_defects"] = self.has_defects 300 | if self.has_defects: 301 | mail["defects"] = self.defects 302 | mail["defects_categories"] = list(self.defects_categories) 303 | 304 | return mail 305 | 306 | def parse(self): 307 | """ 308 | This method parses the raw email and makes the tokens. 309 | 310 | Returns: 311 | Instance of MailParser with raw email parsed 312 | """ 313 | 314 | if not self.message: 315 | return self 316 | 317 | # reset and start parsing 318 | self._reset() 319 | parts = [] # Normal parts plus defects 320 | 321 | # walk all mail parts to search defects 322 | for p in self.message.walk(): 323 | part_content_type = p.get_content_type() 324 | self._append_defects(p, part_content_type) 325 | parts.append(p) 326 | 327 | # If defects are in epilogue defects get epilogue 328 | if self.defects_categories & EPILOGUE_DEFECTS: 329 | log.debug("Found defects in emails") 330 | epilogue = find_between( 331 | self.message.epilogue, 332 | "{}".format("--" + self.message.get_boundary()), 333 | "{}".format("--" + self.message.get_boundary() + "--"), 334 | ) 335 | 336 | try: 337 | p = email.message_from_string(epilogue) 338 | parts.append(p) 339 | except TypeError: 340 | log.debug("Failed to get epilogue part for TypeError") 341 | except Exception: 342 | log.error("Failed to get epilogue part. Check raw mail.") 343 | 344 | # walk all mail parts 345 | for i, p in enumerate(parts): 346 | if ( 347 | not p.is_multipart() 348 | or ported_string(p.get_content_disposition()).lower() == "attachment" 349 | ): 350 | charset = p.get_content_charset("utf-8") 351 | charset_raw = p.get_content_charset() 352 | log.debug(f"Charset {charset!r} part {i!r}") 353 | content_disposition = ported_string(p.get_content_disposition()).lower() 354 | log.debug(f"content-disposition {content_disposition!r} part {i!r}") 355 | content_id = ported_string(p.get("content-id")) 356 | log.debug(f"content-id {content_id!r} part {i!r}") 357 | content_subtype = ported_string(p.get_content_subtype()) 358 | log.debug(f"content subtype {content_subtype!r} part {i!r}") 359 | filename = decode_header_part(p.get_filename()) 360 | 361 | is_attachment = False 362 | if filename: 363 | is_attachment = True 364 | else: 365 | if content_id and content_subtype not in ("html", "plain"): 366 | is_attachment = True 367 | filename = content_id 368 | elif content_subtype in ("rtf"): 369 | is_attachment = True 370 | filename = f"{random_string()}.rtf" 371 | elif content_disposition == "attachment": 372 | is_attachment = True 373 | filename = f"{random_string()}.txt" 374 | 375 | # this is an attachment 376 | if is_attachment: 377 | log.debug(f"Email part {i!r} is an attachment") 378 | log.debug(f"Filename {filename!r} part {i!r}") 379 | binary = False 380 | mail_content_type = ported_string(p.get_content_type()) 381 | log.debug(f"Mail content type {mail_content_type!r} part {i!r}") 382 | transfer_encoding = ported_string( 383 | p.get("content-transfer-encoding", "") 384 | ).lower() 385 | log.debug(f"Transfer encoding {transfer_encoding!r} part {i!r}") 386 | content_disposition = ported_string(p.get("content-disposition")) 387 | log.debug(f"content-disposition {content_disposition!r} part {i!r}") 388 | 389 | if p.is_multipart(): 390 | payload = "".join( 391 | [m.as_string() for m in p.get_payload(decode=False)] 392 | ) 393 | binary = False 394 | log.debug(f"Filename {filename!r} part {i!r} is multipart") 395 | elif transfer_encoding == "base64" or ( 396 | transfer_encoding 397 | == "quoted-\ 398 | printable" 399 | and "application" in mail_content_type 400 | ): 401 | payload = p.get_payload(decode=False) 402 | binary = True 403 | log.debug(f"Filename {filename!r} part {i!r} is binary") 404 | elif "uuencode" in transfer_encoding: 405 | # Re-encode in base64 406 | payload = base64.b64encode(p.get_payload(decode=True)).decode( 407 | "ascii" 408 | ) 409 | binary = True 410 | transfer_encoding = "base64" 411 | log.debug( 412 | f"Filename {filename!r} part {i!r} is binary (uuencode" 413 | " re-encoded to base64)" 414 | ) 415 | else: 416 | payload = ported_string( 417 | p.get_payload(decode=True), encoding=charset 418 | ) 419 | log.debug(f"Filename {filename!r} part {i!r} is not binary") 420 | 421 | self._attachments.append( 422 | { 423 | "filename": filename, 424 | "payload": payload, 425 | "binary": binary, 426 | "mail_content_type": mail_content_type, 427 | "content-id": content_id, 428 | "content-disposition": content_disposition, 429 | "charset": charset_raw, 430 | "content_transfer_encoding": transfer_encoding, 431 | } 432 | ) 433 | 434 | # this isn't an attachments 435 | else: 436 | log.debug(f"Email part {i!r} is not an attachment") 437 | 438 | # Get the payload using get_payload method with decode=True 439 | # As Python truly decodes only 'base64', 440 | # 'quoted-printable', 'x-uuencode', 441 | # 'uuencode', 'uue', 'x-uue' 442 | # And for other encodings it breaks the characters so 443 | # we need to decode them with encoding python is appying 444 | # To maintain the characters 445 | payload = p.get_payload(decode=True) 446 | cte = p.get("Content-Transfer-Encoding") 447 | if cte: 448 | cte = cte.lower() 449 | 450 | if not cte or cte in ["7bit", "8bit"]: 451 | try: 452 | payload = payload.decode("raw-unicode-escape") 453 | except UnicodeDecodeError: 454 | payload = ported_string(payload, encoding=charset) 455 | else: 456 | payload = ported_string(payload, encoding=charset) 457 | 458 | if payload: 459 | if p.get_content_subtype() == "html": 460 | self._text_html.append(payload) 461 | elif p.get_content_subtype() == "plain": 462 | self._text_plain.append(payload) 463 | else: 464 | log.warning( 465 | f"Email content {p.get_content_subtype()!r} not handled" 466 | ) 467 | self._text_not_managed.append(payload) 468 | 469 | # Parsed object mail with all parts 470 | self._mail = self._make_mail() 471 | 472 | # Parsed object mail with mains parts 473 | self._mail_partial = self._make_mail(complete=False) 474 | 475 | def get_server_ipaddress(self, trust): 476 | """ 477 | Return the ip address of sender 478 | 479 | Overview: 480 | Extract a reliable sender IP address heuristically for each message. 481 | Although the message format dictates a chain of relaying IP 482 | addresses in each message, a malicious relay can easily alter that. 483 | Therefore we cannot simply take the first IP in 484 | the chain. Instead, our method is as follows. 485 | First we trust the sender IP reported by our mail server in the 486 | Received headers, and if the previous relay IP address is on our trust 487 | list (e.g. other well-known mail services), we continue to 488 | follow the previous Received line, till we reach the first unrecognized 489 | IP address in the email header. 490 | 491 | From article Characterizing Botnets from Email Spam Records: 492 | Li Zhuang, J. D. Tygar 493 | 494 | In our case we trust only our mail server with the trust string. 495 | 496 | Args: 497 | trust (string): String that identify our mail server 498 | 499 | Returns: 500 | string with the ip address 501 | """ 502 | log.debug(f"Trust string is {trust!r}") 503 | 504 | if not trust.strip(): 505 | return 506 | 507 | received = self.message.get_all("received", []) 508 | 509 | for i in received: 510 | i = ported_string(i) 511 | if trust in i: 512 | log.debug(f"Trust string {trust!r} is in {i!r}") 513 | ip_str = self._extract_ip(i) 514 | if ip_str: 515 | return ip_str 516 | 517 | def _extract_ip(self, received_header): 518 | """ 519 | Extract the IP address from the received header if it is not private. 520 | 521 | Args: 522 | received_header (string): The received header string 523 | 524 | Returns: 525 | string with the ip address or None 526 | """ 527 | check = REGXIP.findall(received_header[0 : received_header.find("by")]) 528 | if check: 529 | try: 530 | ip_str = six.text_type(check[-1]) 531 | log.debug(f"Found sender IP {ip_str!r} in {received_header!r}") 532 | ip = ipaddress.ip_address(ip_str) 533 | except ValueError: 534 | return None 535 | else: 536 | if not ip.is_private: 537 | log.debug(f"IP {ip_str!r} not private") 538 | return ip_str 539 | return None 540 | 541 | def write_attachments(self, base_path): 542 | """This method writes the attachments of mail on disk 543 | 544 | Arguments: 545 | base_path {str} -- Base path where write the attachments 546 | """ 547 | write_attachments(attachments=self.attachments, base_path=base_path) 548 | 549 | def __getattr__(self, name): 550 | name = name.strip("_").lower() 551 | name_header = name.replace("_", "-") 552 | 553 | # json headers 554 | if name.endswith("_json"): 555 | name = name[:-5] 556 | return json.dumps(getattr(self, name), ensure_ascii=False) 557 | 558 | # raw headers 559 | elif name.endswith("_raw"): 560 | name = name[:-4] 561 | raw = self.message.get_all(name) 562 | return json.dumps(raw, ensure_ascii=False) 563 | 564 | # object headers 565 | elif name_header in ADDRESSES_HEADERS: 566 | h = decode_header_part(self.message.get(name_header, six.text_type())) 567 | h_parsed = email.utils.getaddresses([h], strict=True) 568 | return ( 569 | h_parsed 570 | if h_parsed != [("", "")] 571 | else email.utils.getaddresses([h], strict=False) 572 | ) 573 | 574 | # others headers 575 | else: 576 | return get_header(self.message, name_header) 577 | 578 | @property 579 | def attachments(self): 580 | """ 581 | Return a list of all attachments in the mail 582 | """ 583 | return self._attachments 584 | 585 | @property 586 | def received(self): 587 | """ 588 | Return a list of all received headers parsed 589 | """ 590 | output = self.received_raw 591 | return receiveds_parsing(output) 592 | 593 | @property 594 | def received_json(self): 595 | """ 596 | Return a JSON of all received headers 597 | """ 598 | return json.dumps(self.received, ensure_ascii=False, indent=2) 599 | 600 | @property 601 | def received_raw(self): 602 | """ 603 | Return a list of all received headers in raw format 604 | """ 605 | output = [] 606 | for i in self.message.get_all("received", []): 607 | output.append(decode_header_part(i)) 608 | return output 609 | 610 | @property 611 | def body(self): 612 | """ 613 | Return all text plain and text html parts of mail delimited from string 614 | "--- mail_boundary ---" 615 | """ 616 | return "\n--- mail_boundary ---\n".join( 617 | self.text_plain + self.text_html + self.text_not_managed 618 | ) 619 | 620 | @property 621 | def headers(self) -> dict: 622 | """ 623 | Return only the headers as Python object 624 | """ 625 | all_headers = set(self.message.keys()) - set(["headers"]) 626 | return {i: getattr(self, i) for i in all_headers} 627 | 628 | @property 629 | def headers_json(self): 630 | """ 631 | Return the JSON of headers 632 | """ 633 | return json.dumps(self.headers, ensure_ascii=False, indent=2) 634 | 635 | @property 636 | def text_plain(self): 637 | """ 638 | Return a list of all text plain parts of email. 639 | """ 640 | return self._text_plain 641 | 642 | @property 643 | def text_html(self): 644 | """ 645 | Return a list of all text html parts of email. 646 | """ 647 | return self._text_html 648 | 649 | @property 650 | def text_not_managed(self): 651 | """ 652 | Return a list of all text not managed of email. 653 | """ 654 | return self._text_not_managed 655 | 656 | @property 657 | def date(self): 658 | """ 659 | Return the mail date in datetime.datetime format and UTC. 660 | """ 661 | date = self.message.get("date") 662 | conv = None 663 | 664 | try: 665 | conv, _ = convert_mail_date(date) 666 | except Exception: 667 | pass 668 | return conv 669 | 670 | @property 671 | def timezone(self): 672 | """ 673 | Return timezone. Offset from UTC. 674 | """ 675 | date = self.message.get("date") 676 | timezone = 0 677 | 678 | try: 679 | _, timezone = convert_mail_date(date) 680 | except Exception: 681 | pass 682 | return timezone 683 | 684 | @property 685 | def date_json(self): 686 | """ 687 | Return the JSON of date 688 | """ 689 | if self.date: 690 | return json.dumps(self.date.isoformat(), ensure_ascii=False) 691 | 692 | @property 693 | def mail(self): 694 | """ 695 | Return the Python object of mail parsed 696 | """ 697 | return self._mail 698 | 699 | @property 700 | def mail_json(self): 701 | """ 702 | Return the JSON of mail parsed 703 | """ 704 | if self.mail.get("date"): 705 | self._mail["date"] = self.date.isoformat() 706 | return json.dumps(self.mail, ensure_ascii=False, indent=2) 707 | 708 | @property 709 | def mail_partial(self): 710 | """ 711 | Return the Python object of mail parsed 712 | with only the mains headers 713 | """ 714 | return self._mail_partial 715 | 716 | @property 717 | def mail_partial_json(self): 718 | """ 719 | Return the JSON of mail parsed partial 720 | """ 721 | if self.mail_partial.get("date"): 722 | self._mail_partial["date"] = self.date.isoformat() 723 | return json.dumps(self.mail_partial, ensure_ascii=False, indent=2) 724 | 725 | @property 726 | def defects(self): 727 | """ 728 | The defects property contains a list of 729 | all the problems found when parsing this message. 730 | """ 731 | return self._defects 732 | 733 | @property 734 | def defects_categories(self): 735 | """ 736 | Return a set with only defects categories. 737 | """ 738 | return self._defects_categories 739 | 740 | @property 741 | def has_defects(self): 742 | """ 743 | Return a boolean: True if mail has defects. 744 | """ 745 | return self._has_defects 746 | 747 | @property 748 | def message(self): 749 | """ 750 | email.message.Message class. 751 | """ 752 | return self._message 753 | 754 | @property 755 | def message_as_string(self): 756 | """ 757 | Return the entire message flattened as a string. 758 | """ 759 | return self.message.as_string() 760 | 761 | @property 762 | def to_domains(self): 763 | """ 764 | Return all domain of 'to' and 'reply-to' email addresses 765 | """ 766 | return get_to_domains(self.to, self.reply_to) 767 | -------------------------------------------------------------------------------- /src/mailparser/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2018 Fedele Mantuano (https://www.linkedin.com/in/fmantuano/) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | __all__ = ( 20 | "MailParserError", 21 | "MailParserOutlookError", 22 | "MailParserEnvironmentError", 23 | "MailParserOSError", 24 | "MailParserReceivedParsingError", 25 | ) 26 | 27 | 28 | class MailParserError(Exception): 29 | """ 30 | Base MailParser Exception 31 | """ 32 | 33 | pass 34 | 35 | 36 | class MailParserOutlookError(MailParserError): 37 | """ 38 | Raised when there is an error with Outlook integration 39 | """ 40 | 41 | pass 42 | 43 | 44 | class MailParserEnvironmentError(MailParserError): 45 | """ 46 | Raised when the environment is not correct 47 | """ 48 | 49 | pass 50 | 51 | 52 | class MailParserOSError(MailParserError): 53 | """ 54 | Raised when there is an OS error 55 | """ 56 | 57 | pass 58 | 59 | 60 | class MailParserReceivedParsingError(MailParserError): 61 | """ 62 | Raised when a received header cannot be parsed 63 | """ 64 | 65 | pass 66 | -------------------------------------------------------------------------------- /src/mailparser/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import base64 20 | import datetime 21 | import email 22 | import functools 23 | import hashlib 24 | import json 25 | import logging 26 | import os 27 | import random 28 | import re 29 | import string 30 | import subprocess 31 | import sys 32 | import tempfile 33 | from collections import Counter, namedtuple 34 | from email.errors import HeaderParseError 35 | from email.header import decode_header 36 | from unicodedata import normalize 37 | 38 | import six 39 | 40 | from mailparser.const import ( 41 | ADDRESSES_HEADERS, 42 | JUNK_PATTERN, 43 | OTHERS_PARTS, 44 | RECEIVED_COMPILED_LIST, 45 | ) 46 | from mailparser.exceptions import MailParserOSError, MailParserReceivedParsingError 47 | 48 | log = logging.getLogger(__name__) 49 | 50 | 51 | def custom_log(level="WARNING", name=None): # pragma: no cover 52 | """ 53 | This function returns a custom logger. 54 | :param level: logging level 55 | :type level: str 56 | :param name: logger name 57 | :type name: str 58 | :return: logger 59 | """ 60 | if name: 61 | log = logging.getLogger(name) 62 | else: 63 | log = logging.getLogger() 64 | log.setLevel(level) 65 | ch = logging.StreamHandler(sys.stdout) 66 | formatter = logging.Formatter( 67 | "%(asctime)s | " 68 | "%(name)s | " 69 | "%(module)s | " 70 | "%(funcName)s | " 71 | "%(lineno)d | " 72 | "%(levelname)s | " 73 | "%(message)s" 74 | ) 75 | ch.setFormatter(formatter) 76 | log.addHandler(ch) 77 | return log 78 | 79 | 80 | def sanitize(func): 81 | """NFC is the normalization form recommended by W3C.""" 82 | 83 | @functools.wraps(func) 84 | def wrapper(*args, **kwargs): 85 | return normalize("NFC", func(*args, **kwargs)) 86 | 87 | return wrapper 88 | 89 | 90 | @sanitize 91 | def ported_string(raw_data, encoding="utf-8", errors="ignore"): 92 | """ 93 | Give as input raw data and output a str in Python 3 94 | and unicode in Python 2. 95 | 96 | Args: 97 | raw_data: Python 2 str, Python 3 bytes or str to porting 98 | encoding: string giving the name of an encoding 99 | errors: his specifies the treatment of characters 100 | which are invalid in the input encoding 101 | 102 | Returns: 103 | str (Python 3) or unicode (Python 2) 104 | """ 105 | 106 | if not raw_data: 107 | return six.text_type() 108 | 109 | if isinstance(raw_data, six.text_type): 110 | return raw_data 111 | 112 | if six.PY2: 113 | try: 114 | return six.text_type(raw_data, encoding, errors) 115 | except LookupError: 116 | return six.text_type(raw_data, "utf-8", errors) 117 | 118 | if six.PY3: 119 | try: 120 | return six.text_type(raw_data, encoding) 121 | except (LookupError, UnicodeDecodeError): 122 | return six.text_type(raw_data, "utf-8", errors) 123 | 124 | 125 | def decode_header_part(header): 126 | """ 127 | Given an raw header returns an decoded header 128 | 129 | Args: 130 | header (string): header to decode 131 | 132 | Returns: 133 | str (Python 3) or unicode (Python 2) 134 | """ 135 | if not header: 136 | return six.text_type() 137 | 138 | output = six.text_type() 139 | 140 | try: 141 | for d, c in decode_header(header): 142 | c = c if c else "utf-8" 143 | output += ported_string(d, c, "ignore") 144 | 145 | # Header parsing failed, when header has charset Shift_JIS 146 | except (HeaderParseError, UnicodeError): 147 | log.error(f"Failed decoding header part: {header}") 148 | output += header 149 | 150 | return output.strip() 151 | 152 | 153 | def ported_open(file_): 154 | if six.PY2: 155 | return open(file_) 156 | elif six.PY3: 157 | return open(file_, encoding="utf-8", errors="ignore") 158 | 159 | 160 | def find_between(text, first_token, last_token): 161 | try: 162 | start = text.index(first_token) + len(first_token) 163 | end = text.index(last_token, start) 164 | return text[start:end].strip() 165 | except ValueError: 166 | return 167 | 168 | 169 | def fingerprints(data): 170 | """ 171 | This function return the fingerprints of data. 172 | 173 | Args: 174 | data (string): raw data 175 | 176 | Returns: 177 | namedtuple: fingerprints md5, sha1, sha256, sha512 178 | """ 179 | 180 | hashes = namedtuple("Hashes", "md5 sha1 sha256 sha512") 181 | 182 | if not isinstance(data, six.binary_type): 183 | data = data.encode("utf-8") 184 | 185 | # md5 186 | md5 = hashlib.md5() 187 | md5.update(data) 188 | md5 = md5.hexdigest() 189 | 190 | # sha1 191 | sha1 = hashlib.sha1() 192 | sha1.update(data) 193 | sha1 = sha1.hexdigest() 194 | 195 | # sha256 196 | sha256 = hashlib.sha256() 197 | sha256.update(data) 198 | sha256 = sha256.hexdigest() 199 | 200 | # sha512 201 | sha512 = hashlib.sha512() 202 | sha512.update(data) 203 | sha512 = sha512.hexdigest() 204 | 205 | return hashes(md5, sha1, sha256, sha512) 206 | 207 | 208 | def msgconvert(email): 209 | """ 210 | Exec msgconvert tool, to convert msg Outlook 211 | mail in eml mail format 212 | 213 | Args: 214 | email (string): file path of Outlook msg mail 215 | 216 | Returns: 217 | tuple with file path of mail converted and 218 | standard output data (unicode Python 2, str Python 3) 219 | """ 220 | log.debug("Started converting Outlook email") 221 | temph, temp = tempfile.mkstemp(prefix="outlook_") 222 | command = ["msgconvert", "--outfile", temp, email] 223 | 224 | try: 225 | if six.PY2: 226 | with open(os.devnull, "w") as devnull: 227 | out = subprocess.Popen( 228 | command, 229 | stdin=subprocess.PIPE, 230 | stdout=subprocess.PIPE, 231 | stderr=devnull, 232 | ) 233 | elif six.PY3: 234 | out = subprocess.Popen( 235 | command, 236 | stdin=subprocess.PIPE, 237 | stdout=subprocess.PIPE, 238 | stderr=subprocess.DEVNULL, 239 | ) 240 | 241 | except OSError as e: 242 | message = f"Check if 'msgconvert' tool is installed / {e!r}" 243 | log.exception(message) 244 | raise MailParserOSError(message) 245 | 246 | else: 247 | stdoutdata, _ = out.communicate() 248 | return temp, stdoutdata.decode("utf-8").strip() 249 | 250 | finally: 251 | os.close(temph) 252 | 253 | 254 | def parse_received(received): 255 | """ 256 | Parse a single received header. 257 | Return a dictionary of values by clause. 258 | 259 | Arguments: 260 | received {str} -- single received header 261 | 262 | Raises: 263 | MailParserReceivedParsingError -- Raised when a 264 | received header cannot be parsed 265 | 266 | Returns: 267 | dict -- values by clause 268 | """ 269 | 270 | values_by_clause = {} 271 | for pattern in RECEIVED_COMPILED_LIST: 272 | matches = [match for match in pattern.finditer(received)] 273 | 274 | if len(matches) == 0: 275 | # no matches for this clause, but it's ok! keep going! 276 | log.debug("No matches found for %s in %s" % (pattern.pattern, received)) 277 | elif len(matches) > 1: 278 | # uh, can't have more than one of each clause in a received. 279 | # so either there's more than one or the current regex is wrong 280 | msg = "More than one match found for %s in %s" % (pattern.pattern, received) 281 | log.error(msg) 282 | raise MailParserReceivedParsingError(msg) 283 | else: 284 | # otherwise we have one matching clause! 285 | log.debug("Found one match for %s in %s" % (pattern.pattern, received)) 286 | match = matches[0].groupdict() 287 | if six.PY2: 288 | values_by_clause[match.keys()[0]] = match.values()[0] 289 | elif six.PY3: 290 | key = list(match.keys())[0] 291 | value = list(match.values())[0] 292 | values_by_clause[key] = value 293 | 294 | if len(values_by_clause) == 0: 295 | # we weren't able to match anything... 296 | msg = "Unable to match any clauses in %s" % (received) 297 | 298 | # Modification #1: Commenting the following log as 299 | # this raised exception is caught above and then 300 | # raw header is updated in response 301 | # We dont want to get so many errors in our error 302 | # logger as we are not even trying to parse the 303 | # received headers 304 | # Wanted to make it configurable via settiings, 305 | # but this package does not depend on django and 306 | # making configurable setting 307 | # will make it django dependent, 308 | # so better to keep it working with only python 309 | # dependent and on any framework of python 310 | # commenting it just for our use 311 | 312 | # log.error(msg) 313 | 314 | raise MailParserReceivedParsingError(msg) 315 | return values_by_clause 316 | 317 | 318 | def receiveds_parsing(receiveds): 319 | """ 320 | This function parses the receiveds headers. 321 | 322 | Args: 323 | receiveds (list): list of raw receiveds headers 324 | 325 | Returns: 326 | a list of parsed receiveds headers with first hop in first position 327 | """ 328 | 329 | parsed = [] 330 | receiveds = [re.sub(JUNK_PATTERN, " ", i).strip() for i in receiveds] 331 | n = len(receiveds) 332 | log.debug(f"Nr. of receiveds. {n}") 333 | 334 | for idx, received in enumerate(receiveds): 335 | log.debug(f"Parsing received {idx + 1}/{n}") 336 | log.debug(f"Try to parse {received!r}") 337 | try: 338 | # try to parse the current received header... 339 | values_by_clause = parse_received(received) 340 | except MailParserReceivedParsingError: 341 | # if we can't, let's append the raw 342 | parsed.append({"raw": received}) 343 | else: 344 | # otherwise append the full values_by_clause dict 345 | parsed.append(values_by_clause) 346 | 347 | log.debug("len(receiveds) %s, len(parsed) %s" % (len(receiveds), len(parsed))) 348 | 349 | if len(receiveds) != len(parsed): 350 | # something really bad happened, 351 | # so just return raw receiveds with hop indices 352 | log.error( 353 | "len(receiveds): %s, len(parsed): %s, receiveds: %s, \ 354 | parsed: %s" 355 | % (len(receiveds), len(parsed), receiveds, parsed) 356 | ) 357 | return receiveds_not_parsed(receiveds) 358 | 359 | else: 360 | # all's good! we have parsed or raw receiveds for each received header 361 | return receiveds_format(parsed) 362 | 363 | 364 | def convert_mail_date(date): 365 | """ 366 | Convert a mail date in a datetime object. 367 | """ 368 | log.debug(f"Date to parse: {date!r}") 369 | d = email.utils.parsedate_tz(date) 370 | log.debug(f"Date parsed: {d!r}") 371 | t = email.utils.mktime_tz(d) 372 | log.debug(f"Date parsed in timestamp: {t!r}") 373 | date_utc = datetime.datetime.fromtimestamp(t, datetime.timezone.utc) 374 | timezone = d[9] / 3600.0 if d[9] else 0 375 | timezone = f"{timezone:+.1f}" 376 | log.debug(f"Calculated timezone: {timezone!r}") 377 | return date_utc, timezone 378 | 379 | 380 | def receiveds_not_parsed(receiveds): 381 | """ 382 | If receiveds are not parsed, makes a new structure with raw 383 | field. It's useful to have the same structure of receiveds 384 | parsed. 385 | 386 | Args: 387 | receiveds (list): list of raw receiveds headers 388 | 389 | Returns: 390 | a list of not parsed receiveds headers with first hop in first position 391 | """ 392 | log.debug("Receiveds for this email are not parsed") 393 | 394 | output = [] 395 | counter = Counter() 396 | 397 | for i in receiveds[::-1]: 398 | j = {"raw": i.strip()} 399 | j["hop"] = counter["hop"] + 1 400 | counter["hop"] += 1 401 | output.append(j) 402 | 403 | return output 404 | 405 | 406 | def receiveds_format(receiveds): 407 | """ 408 | Given a list of receiveds hop, adds metadata and reformat 409 | field values 410 | 411 | Args: 412 | receiveds (list): list of receiveds hops already formatted 413 | 414 | Returns: 415 | list of receiveds reformated and with new fields 416 | """ 417 | log.debug("Receiveds for this email are parsed") 418 | 419 | output = [] 420 | counter = Counter() 421 | 422 | for i in receiveds[::-1]: 423 | # Clean strings 424 | j = {k: v.strip() for k, v in i.items() if v} 425 | 426 | # Add hop 427 | j["hop"] = counter["hop"] + 1 428 | 429 | # Add UTC date 430 | if i.get("date"): 431 | # Modify date to manage strange header like: 432 | # "for ; Tue, 7 Mar 2017 14:29:24 -0800", 433 | i["date"] = i["date"].split(";")[-1] 434 | try: 435 | j["date_utc"], _ = convert_mail_date(i["date"]) 436 | except TypeError: 437 | j["date_utc"] = None 438 | 439 | # Add delay 440 | size = len(output) 441 | now = j.get("date_utc") 442 | 443 | if size and now: 444 | before = output[counter["hop"] - 1].get("date_utc") 445 | if before: 446 | j["delay"] = (now - before).total_seconds() 447 | else: 448 | j["delay"] = 0 449 | else: 450 | j["delay"] = 0 451 | 452 | # append result 453 | output.append(j) 454 | 455 | # new hop 456 | counter["hop"] += 1 457 | 458 | for i in output: 459 | if i.get("date_utc"): 460 | i["date_utc"] = i["date_utc"].isoformat() 461 | return output 462 | 463 | 464 | def get_to_domains(to=[], reply_to=[]): 465 | domains = set() 466 | for i in to + reply_to: 467 | try: 468 | domains.add(i[1].split("@")[-1].lower().strip()) 469 | except KeyError: 470 | pass 471 | 472 | return list(domains) 473 | 474 | 475 | def get_header(message, name): 476 | """ 477 | Gets an email.message.Message and a header name and returns 478 | the mail header decoded with the correct charset. 479 | 480 | Args: 481 | message (email.message.Message): email message object 482 | name (string): header to get 483 | 484 | Returns: 485 | str if there is an header 486 | list if there are more than one 487 | """ 488 | 489 | headers = message.get_all(name) 490 | log.debug(f"Getting header {name!r}: {headers!r}") 491 | if headers: 492 | headers = [decode_header_part(i) for i in headers] 493 | if len(headers) == 1: 494 | # in this case return a string 495 | return headers[0].strip() 496 | # in this case return a list 497 | return headers 498 | return six.text_type() 499 | 500 | 501 | def get_mail_keys(message, complete=True): 502 | """ 503 | Given an email.message.Message, return a set with all email parts to get 504 | 505 | Args: 506 | message (email.message.Message): email message object 507 | complete (bool): if True returns all email headers 508 | 509 | Returns: 510 | set with all email parts 511 | """ 512 | 513 | if complete: 514 | log.debug("Get all headers") 515 | all_headers_keys = {i.lower() for i in message.keys()} 516 | all_parts = ADDRESSES_HEADERS | OTHERS_PARTS | all_headers_keys 517 | else: 518 | log.debug("Get only mains headers") 519 | all_parts = ADDRESSES_HEADERS | OTHERS_PARTS 520 | 521 | log.debug("All parts to get: {}".format(", ".join(all_parts))) 522 | return all_parts 523 | 524 | 525 | def safe_print(data): # pragma: no cover 526 | try: 527 | print(data) 528 | except UnicodeEncodeError: 529 | print(data.encode("utf-8")) 530 | 531 | 532 | def print_mail_fingerprints(data): # pragma: no cover 533 | md5, sha1, sha256, sha512 = fingerprints(data) 534 | print(f"md5:\t{md5}") 535 | print(f"sha1:\t{sha1}") 536 | print(f"sha256:\t{sha256}") 537 | print(f"sha512:\t{sha512}") 538 | 539 | 540 | def print_attachments(attachments, flag_hash): # pragma: no cover 541 | if flag_hash: 542 | for i in attachments: 543 | if i.get("content_transfer_encoding") == "base64": 544 | payload = base64.b64decode(i["payload"]) 545 | else: 546 | payload = i["payload"] 547 | 548 | i["md5"], i["sha1"], i["sha256"], i["sha512"] = fingerprints(payload) 549 | 550 | for i in attachments: 551 | safe_print(json.dumps(i, ensure_ascii=False, indent=4)) 552 | 553 | 554 | def write_attachments(attachments, base_path): # pragma: no cover 555 | for a in attachments: 556 | write_sample( 557 | binary=a["binary"], 558 | payload=a["payload"], 559 | path=base_path, 560 | filename=a["filename"], 561 | ) 562 | 563 | 564 | def write_sample(binary, payload, path, filename): # pragma: no cover 565 | """ 566 | This function writes a sample on file system. 567 | 568 | Args: 569 | binary (bool): True if it's a binary file 570 | payload: payload of sample, in base64 if it's a binary 571 | path (string): path of file 572 | filename (string): name of file 573 | hash_ (string): file hash 574 | """ 575 | if not os.path.exists(path): 576 | os.makedirs(path) 577 | sample = os.path.join(path, filename) 578 | 579 | if binary: 580 | with open(sample, "wb") as f: 581 | f.write(base64.b64decode(payload)) 582 | else: 583 | with open(sample, "w") as f: 584 | f.write(payload) 585 | 586 | 587 | def random_string(string_length=10): 588 | """Generate a random string of fixed length 589 | 590 | Keyword Arguments: 591 | string_length {int} -- String length (default: {10}) 592 | 593 | Returns: 594 | str -- Random string 595 | """ 596 | letters = string.ascii_lowercase 597 | return "".join(random.choice(letters) for _ in range(string_length)) 598 | -------------------------------------------------------------------------------- /src/mailparser/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | __version__ = "4.1.4" 20 | -------------------------------------------------------------------------------- /tests/mails/mail_malformed_2: -------------------------------------------------------------------------------- 1 | Return-Path: <> 2 | Delivered-To: tanya@infospacemail.com 3 | Received: (qmail 32129 invoked from network); 22 Aug 2016 09:22:13 -0000 4 | Received: from c.netpar.com.br (HELO apus.netpar.com.br) (189.125.104.100) 5 | by smtp.customers.net with SMTP; 22 Aug 2016 09:22:13 -0000 6 | To: tanya@infospacemail.com 7 | Subject: Delivery failure (abh@netpar.com.br) 8 | From: postmaster@netpar.com.br 9 | Message-Id: 10 | Date: Mon, 22 Aug 2016 06:23:36 -0300 11 | MIME-Version: 1.0 12 | Content-Type: multipart/report; report-type=delivery-status; boundary="48461/50/1471857816/MailSite/apus.netpar.com.br" 13 | 14 | --48461/50/1471857816/MailSite/apus.netpar.com.br 15 | Content-Type: text/plain 16 | 17 | Your message has encountered delivery problems 18 | to the following recipient(s): 19 | 20 | abh 21 | (Was addressed to abh@netpar.com.br) 22 | Delivery failed 23 | User's mailbox quota exceeded 24 | 25 | 26 | 27 | 28 | --48461/50/1471857816/MailSite/apus.netpar.com.br 29 | Content-Type: message/delivery-status 30 | Content-Disposition: attachment; filename="DSN57BAC498.txt" 31 | 32 | Reporting-MTA: dns; apus.netpar.com.br 33 | Arrival-Date: Mon, 22 Aug 2016 06:23:36 -0300 34 | 35 | Final-Recipient: rfc822; abh@netpar.com.br 36 | Action: failed 37 | Status: 4.2.2 (Persistent transient failure - mailbox: mailbox full) 38 | 39 | --48461/50/1471857816/MailSite/apus.netpar.com.br 40 | Content-Type: message/rfc822-headers 41 | 42 | Received: from cygnus.netpar.com.br (unverified [189.125.104.24]) by apus.netpar.com.br 43 | (Rockliffe SMTPRA 4.5.6) with ESMTP id for ; 44 | Mon, 22 Aug 2016 06:23:36 -0300 45 | Received: from [117.201.229.175] (unknown [117.201.229.175]) 46 | by cygnus.netpar.com.br (Postfix) with ESMTP id 9B8AE886048E 47 | for ; Mon, 22 Aug 2016 09:22:04 +0000 (UTC) 48 | From: "Reynaldo Stevens" 49 | To: 50 | Subject: Trabajo perfecto a distancia 51 | Date: 22 Aug 2016 18:37:49 +0400 52 | Message-ID: <004801d1fc84$01c934ac$b247f0b7$@infospacemail.com> 53 | MIME-Version: 1.0 54 | Content-Type: multipart/alternative; 55 | boundary="----=_NextPart_000_0045_01D1FC84.01C36A99" 56 | X-Mailer: Microsoft Outlook 14.0 57 | Thread-Index: Ac1doyjrxh5tdsxl1doyjrxh5tdsxl== 58 | Content-Language: en 59 | 60 | --48461/50/1471857816/MailSite/apus.netpar.com.br-- 61 | -------------------------------------------------------------------------------- /tests/mails/mail_malformed_3: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Delivered-To: luzina@test.it 3 | Received: (qmail 24134 invoked from network); 8 Mar 2017 01:21:00 -0000 4 | Received: from unknown (HELO 192.168.15.15) (189.207.175.29) 5 | by smtp.test.net with SMTP; 8 Mar 2017 01:21:00 -0000 6 | Received: from 139.88.66.159 by ; Mon, 20 Mar 2017 03:11:54 +0400 7 | Message-ID: 8 | From: "El Punto de los Frutos & Verdes" 9 | Reply-To: "VICTORIA Souvenirs" 10 | To: luzina@test.it 11 | Subject: Ahorre Dinero en su Recibo de Luz 12 | Date: Mon, 20 Mar 2017 05:12:54 +0600 13 | X-Mailer: guardian angel 9 dilettantes 14 | MIME-Version: 1.0 15 | Content-Type: multipart/alternative; 16 | boundary="--=_HsWzsKVZkho" 17 | X-Priority: 1 18 | X-MSMail-Priority: High 19 | 20 | ----=_GlSzQZUbRbpVOpy 21 | Content-Type: text/html; 22 | Content-Transfer-Encoding: base64 23 | 24 | PGh0bWw+PGhlYWQ+PC9oZWFkPjxib2R5Pjxicj4NCjx0YWJsZSANCnN0eWxlPSJGT05ULVNJ 25 | WkU6IDEycHg7IEZPTlQtRkFNSUxZOiBBcmlhbCwgSGVsdmV0aWNhLCBzYW5zLXNlcmlmOyBD 26 | T0xPUjogI2ZmZmZmZjsgVEVYVC1BTElHTjogY2VudGVyIiANCmNlbGxTcGFjaW5nPTAgY2Vs 27 | bFBhZGRpbmc9MCB3aWR0aD0iMTAwJSIgYWxpZ249Y2VudGVyIGJnQ29sb3I9I2IxMDEwMT4N 28 | CiAgPHRib2R5Pg0KICA8dHI+DQogICAgPHRkPg0KICAgICAgPHRhYmxlIHN0eWxlPSJGT05U 29 | LVNJWkU6IDEycHgiIGNlbGxTcGFjaW5nPTAgY2VsbFBhZGRpbmc9MCB3aWR0aD02MDAgDQog 30 | ICAgICBhbGlnbj1jZW50ZXI+DQogICAgICAgIDx0Ym9keT4NCiAgICAgICAgPHRyPg0KICAg 31 | ICAgICAgIDx0ZCBzdHlsZT0iVEVYVC1BTElHTjogY2VudGVyIiBoZWlnaHQ9MzQgYWxpZ249 32 | Y2VudGVyPjxzcGFuIA0KICAgICAgICAgICAgc3R5bGU9IkZPTlQtU0laRTogbGFyZ2UiPjxh 33 | IA0KICAgICAgICAgICAgaHJlZj0iaHR0cDovL2NhbG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2 34 | MDVkIiB0YXJnZXQ9X2JsYW5rPjxzcGFuIA0KICAgICAgICAgICAgc3R5bGU9IkNPTE9SOiAj 35 | MDAwMDAwIj5Qcm9ibGVtYXMgcGFyYSB2aXN1YWxpemFyIHwgPC9zcGFuPjxmb250IA0KICAg 36 | ICAgICAgICAgY29sb3I9IzAwY2NmZj5WZXIgDQogIG9uLWxpbmU8L2ZvbnQ+PC9hPjwvc3Bh 37 | bj48L3RkPjwvdHI+PC90Ym9keT48L3RhYmxlPjwvdGQ+PC90cj48L3Rib2R5PjwvdGFibGU+ 38 | PGEgDQpocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIHRhcmdl 39 | dD1fYmxhbms+PGJyPjwvYT4NCjx0YWJsZSBzdHlsZT0iVEVYVC1BTElHTjogY2VudGVyIiBo 40 | ZWlnaHQ9Mzc0MiBjZWxsU3BhY2luZz0wIGNlbGxQYWRkaW5nPTAgDQp3aWR0aD04NzkgYWxp 41 | Z249Y2VudGVyIGJnQ29sb3I9IzAwMWUzMj4NCiAgPHRib2R5Pg0KICA8dHI+DQogICAgPHRk 42 | Pg0KICAgICAgPHRhYmxlIGNlbGxTcGFjaW5nPTAgY2VsbFBhZGRpbmc9MCB3aWR0aD02MDAg 43 | YWxpZ249Y2VudGVyIA0KICAgICAgICBiZ0NvbG9yPSNmZmZmZmY+PHRib2R5Pg0KICAgICAg 44 | ICA8dHI+DQogICAgICAgICAgPHRkIGhlaWdodD0zOTA+PGEgaHJlZj0iaHR0cDovL2NhbG9y 45 | c29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIj48YSANCiAgICAgICAgICAgIGhyZWY9Imh0dHA6 46 | Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCI+PGltZyANCiAgICAgICAgICAgIHN0 47 | eWxlPSJCT1JERVItVE9QOiAwcHggc29saWQ7IEhFSUdIVDogMzc1cHg7IEJPUkRFUi1SSUdI 48 | VDogMHB4IHNvbGlkOyBXSURUSDogNjAwcHg7IEJPUkRFUi1CT1RUT006IDBweCBzb2xpZDsg 49 | Qk9SREVSLUxFRlQ6IDBweCBzb2xpZDsgRElTUExBWTogYmxvY2siIA0KICAgICAgICAgICAg 50 | YWx0PSKhRmVsaXogTmF2aWRhZCEiIA0KICAgICAgICAgICAgc3JjPSJodHRwOi8vY2FsZWZh 51 | Y2Npb25zaW5saW1pdGVzLmNvbS9tb2R1bGVzL3RoZW1lY29uZmlndXJhdG9yL2ltZy9kNzJl 52 | ZDcwNWNjODVhNTRjMGU2YTY3ODdjZjdiNWVmMzdhMGRlNTU2X3NsaWRlLTE1LmpwZyI+PC9h 53 | PjwvYT4gDQoNCiAgICAgICAgICAgIDxwIGFsaWduPWNlbnRlcj48YSBocmVmPSJodHRwOi8v 54 | Y2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9i 55 | bGFuaz48YnI+PC9hPjwvcD48L3RkPjwvdHI+PC90Ym9keT48L3RhYmxlPjwvdGQ+PC90cj4N 56 | CiAgPHRyPg0KICAgIDx0ZD4NCiAgICAgIDx0YWJsZSBjZWxsU3BhY2luZz0wIGNlbGxQYWRk 57 | aW5nPTAgd2lkdGg9NTMwIGFsaWduPWNlbnRlciANCiAgICAgICAgYmdDb2xvcj0jZmZmZmZm 58 | Pjx0Ym9keT4NCiAgICAgICAgPHRyPg0KICAgICAgICAgIDx0ZCBoZWlnaHQ9MjU+DQogICAg 59 | ICAgICAgICA8cCBhbGlnbj1jZW50ZXI+PGEgaHJlZj0iaHR0cDovL2NhbG9yc29sYXIudGsv 60 | P3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgIHRhcmdldD1fYmxhbms+Jm5ic3A7PHNw 61 | YW4gY2xhc3M9c3Q+kzxlbT5icm9jaHVyZTwvZW0+lCBwdWVkZSB0ZW5lciANCiAgICAgICAg 62 | ICAgIDxlbT5pbmZvcm1hY2nzbjwvZW0+IGRlIGxhIGNvbXBh8e1hJm5ic3A7Li4uPC9zcGFu 63 | PjwvYT48L3A+DQogICAgICAgICAgICA8cCBhbGlnbj1jZW50ZXI+PHNwYW4gY2xhc3M9c3Q+ 64 | PHN0cm9uZz48YSANCiAgICAgICAgICAgIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92 65 | PTFmZGE0ZmE1NjA1ZCIgdGFyZ2V0PV9ibGFuaz5DTElDSyANCiAgICAgICAgICAgIEFRVUk8 66 | L2E+PC9zdHJvbmc+PC9zcGFuPjwvcD4NCiAgICAgICAgICAgIDxwIGFsaWduPWNlbnRlcj48 67 | YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAg 68 | ICAgICAgdGFyZ2V0PV9ibGFuaz4tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 69 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS08L2E+PC9wPjwvdGQ+PC90cj4NCiAg 70 | ICAgICAgPHRyPg0KICAgICAgICAgIDx0ZCBoZWlnaHQ9MT48aW1nIA0KICAgICAgICAgICAg 71 | c3R5bGU9IkJPUkRFUi1UT1A6IDBweCBzb2xpZDsgQk9SREVSLVJJR0hUOiAwcHggc29saWQ7 72 | IEJPUkRFUi1CT1RUT006IDBweCBzb2xpZDsgQk9SREVSLUxFRlQ6IDBweCBzb2xpZDsgRElT 73 | UExBWTogYmxvY2siIA0KICAgICAgICAgICAgc3JjPSJodHRwOi8vdmlhamUtb212LmlwLXpv 74 | bmUuY29tL2NjbS90ZW1wbGF0ZXMvbmF2aWRhZC0zL2ltYWdlcy9saW5lLmdpZiI+PC90ZD48 75 | L3RyPg0KICAgICAgICA8dHI+DQogICAgICAgICAgPHRkIHN0eWxlPSJGT05ULVNJWkU6IDE4 76 | cHg7IENPTE9SOiAjMzMzMzMzIiBoZWlnaHQ9MTgwIA0KICAgICAgICAgICAgYWxpZ249Y2Vu 77 | dGVyPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQog 78 | ICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjxzcGFuIGNsYXNzPXN0PmNhbGVmYWN0b3JlcyBk 79 | ZSBuYW5vdGVjbm9sb2dpYSBtYXMgDQogICAgICAgICAgICBjb21wbGV0YSBkZWwgbWVyY2Fk 80 | bywgY2FsZWZhY3RvciBlY29sb2dpY28gZGUgYmFqbyBjb25zdW1vIA0KICAgICAgICAgICAg 81 | ZWxlY3RyaWNvLDEwMCUgc2FsdWRhYmxlLCBhaG9ycmEgZW5lcmdpYSBlbiB0dSZuYnNwOy4u 82 | Ljwvc3Bhbj4mbmJzcDsgDQogICAgICAgICAgICA8L2E+DQogICAgICAgICAgICA8ZGl2IGFs 83 | aWduPWxlZnQ+DQogICAgICAgICAgICA8ZGl2IGNsYXNzPWNvbnRhaW5lcj48YSBocmVmPSJo 84 | dHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFy 85 | Z2V0PV9ibGFuaz48L2E+Jm5ic3A7PC9kaXY+DQogICAgICAgICAgICA8ZGl2IGNsYXNzPSJw 86 | Yi1sZWZ0LWNvbHVtbiBjb2wteHMtMTIgY29sLXNtLTQgY29sLW1kLTUiPg0KICAgICAgICAg 87 | ICAgPGRpdiBpZD1pbWFnZS1ibG9jayBjbGFzcz1jbGVhcmZpeD4NCiAgICAgICAgICAgIDxk 88 | aXYgY2xhc3M9em9vbVBhZD48YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRh 89 | NGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz48c3BhbiBpZD12aWV3X2Z1 90 | bGxfc2l6ZT48c3BhbiBjbGFzcz1qcXpvb20gDQogICAgICAgICAgICBzdHlsZT0iVEVYVC1E 91 | RUNPUkFUSU9OOiBub25lOyBPVVRMSU5FLVNUWUxFOiBub25lIj48aW1nIA0KICAgICAgICAg 92 | ICAgdGl0bGU9MUNTVFQtQmwgc3R5bGU9Im9wYWNpdHk6IDEiIGFsdD0xQ1NUVC1CbCANCiAg 93 | ICAgICAgICAgIHNyYz0iaHR0cDovL2NhbGVmYWNjaW9uc2lubGltaXRlcy5jb20vMjQtbGFy 94 | Z2VfZGVmYXVsdC8xY3N0dC1ibC5qcGciIA0KICAgICAgICAgICAgaXRlbXByb3A9ImltYWdl 95 | Ij48L3NwYW4+PC9zcGFuPiA8L2E+PC9kaXY+PC9kaXY+PC9kaXY+DQogICAgICAgICAgICA8 96 | ZGl2IGNsYXNzPSJwYi1jZW50ZXItY29sdW1uIGNvbC14cy0xMiBjb2wtc20tNCI+DQogICAg 97 | ICAgICAgICA8aDEgaXRlbXByb3A9Im5hbWUiPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFy 98 | LnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPlNlY2Fk 99 | b3IgVG9hbGxlcm8gVOlybWljbzwvYT48L2gxPg0KICAgICAgICAgICAgPHAgaWQ9cHJvZHVj 100 | dF9yZWZlcmVuY2U+PGEgDQogICAgICAgICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50 101 | ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz48bGFiZWw+ 102 | UmVmZXJlbmNpYSA8L2xhYmVsPjxzcGFuIGNsYXNzPWVkaXRhYmxlIA0KICAgICAgICAgICAg 103 | aXRlbXByb3A9InNrdSIgY29udGVudD0iVG9hbGxlcm9zIFTpcm1pY29zIj5Ub2FsbGVyb3Mg 104 | DQogICAgICAgICAgICBU6XJtaWNvczwvc3Bhbj48L2E+PC9wPg0KICAgICAgICAgICAgPHAg 105 | aWQ9cHJvZHVjdF9jb25kaXRpb24+PGEgDQogICAgICAgICAgICBocmVmPSJodHRwOi8vY2Fs 106 | b3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFu 107 | az48bGFiZWw+Q29uZGljafNuOiA8L2xhYmVsPjxzcGFuIGNsYXNzPWVkaXRhYmxlPk51ZXZv 108 | IA0KICAgICAgICAgICAgcHJvZHVjdG88L3NwYW4+PC9hPjwvcD4NCiAgICAgICAgICAgIDxk 109 | aXYgaWQ9c2hvcnRfZGVzY3JpcHRpb25fYmxvY2s+DQogICAgICAgICAgICA8ZGl2IGlkPXNo 110 | b3J0X2Rlc2NyaXB0aW9uX2NvbnRlbnQgY2xhc3M9InJ0ZSBhbGlnbl9qdXN0aWZ5IiANCiAg 111 | ICAgICAgICAgIGl0ZW1wcm9wPSJkZXNjcmlwdGlvbiI+DQogICAgICAgICAgICA8aDI+PGEg 112 | aHJlZj0iaHR0cDovL2NhbG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAg 113 | ICAgIHRhcmdldD1fYmxhbms+RGVzY3JpcGNp824gZGVsIFByb2R1Y3RvPC9hPjwvaDI+DQog 114 | ICAgICAgICAgICA8cD48YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZh 115 | NTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz5Ub2FsbGVybyBU6XJtaWNvIGRl 116 | IFBhcmVkIGNvbiAzIGNvbGdhbnRlcyBwbGF0ZWFkb3MgLSANCiAgICAgICAgICAgIENyaXN0 117 | YWwgVGVtcGxhZG8gUmFkaWFudGUgQ2FsZWZhY2Npb25hZG8gLSBTaXN0ZW1hIGRlIENhbGVm 118 | YWNjafNuIGRlIA0KICAgICAgICAgICAgT25kYSBJbmZyYXJyb2phIGRlIE5hbm90ZWNub2xv 119 | Z+1hIHkgQmFqbyBDb25zdW1vIEVs6WN0cmljby4gU2VjYSBsYXMgDQogICAgICAgICAgICB0 120 | b2FsbGFzIHkgbGEgaHVtZWRhZCBkZWwgYmHxbzwvYT48L3A+PC9kaXY+PC9kaXY+PC9kaXY+ 121 | DQogICAgICAgICAgICA8ZGl2IGNsYXNzPWNvbnRhaW5lcj48YSBocmVmPSJodHRwOi8vY2Fs 122 | b3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFu 123 | az4tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 124 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 125 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLTwvYT48L2Rpdj4NCiAgICAgICAgICAg 126 | IDxkaXYgY2xhc3M9InBiLWxlZnQtY29sdW1uIGNvbC14cy0xMiBjb2wtc20tNCBjb2wtbWQt 127 | NSI+DQogICAgICAgICAgICA8ZGl2IGlkPWltYWdlLWJsb2NrIGNsYXNzPWNsZWFyZml4Pg0K 128 | ICAgICAgICAgICAgPGRpdiBjbGFzcz16b29tUGFkPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNv 129 | bGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjxz 130 | cGFuIGlkPXZpZXdfZnVsbF9zaXplPjxzcGFuIGNsYXNzPWpxem9vbSANCiAgICAgICAgICAg 131 | IHN0eWxlPSJURVhULURFQ09SQVRJT046IG5vbmU7IE9VVExJTkUtU1RZTEU6IG5vbmUiPjxp 132 | bWcgDQogICAgICAgICAgICB0aXRsZT0iQ2FsZWZhY3RvciBVbHRyYWRlbGdhZG8gZGUgUG9y 133 | Y2VsYW5hdG8iIHN0eWxlPSJvcGFjaXR5OiAxIiANCiAgICAgICAgICAgIGFsdD0iQ2FsZWZh 134 | Y3RvciBVbHRyYWRlbGdhZG8gZGUgUG9yY2VsYW5hdG8iIA0KICAgICAgICAgICAgc3JjPSJo 135 | dHRwOi8vY2FsZWZhY2Npb25zaW5saW1pdGVzLmNvbS8zMS1sYXJnZV9kZWZhdWx0L2NhbGVm 136 | YWN0b3ItdWx0cmFkZWxnYWRvLWRlLXBvcmNlbGFuYXRvLmpwZyIgDQogICAgICAgICAgICBp 137 | dGVtcHJvcD0iaW1hZ2UiPjwvc3Bhbj48L3NwYW4+IDwvYT48L2Rpdj48L2Rpdj4NCiAgICAg 138 | ICAgICAgIDxkaXYgaWQ9dmlld3NfYmxvY2sgY2xhc3M9ImNsZWFyZml4ICI+PGEgDQogICAg 139 | ICAgICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIHRh 140 | cmdldD1fYmxhbms+PHNwYW4gDQogICAgICAgICAgICBjbGFzcz12aWV3X3Njcm9sbF9zcGFj 141 | ZXI+PHNwYW4gDQogICAgICAgICAgICBzdHlsZT0iQ1VSU09SOiBkZWZhdWx0OyBvcGFjaXR5 142 | OiAwIj5QcmV2aTwvc3Bhbj48L3NwYW4+IA0KICAgICAgICAgICAgPC9hPjwvZGl2PjwvZGl2 143 | Pg0KICAgICAgICAgICAgPGRpdiBjbGFzcz0icGItY2VudGVyLWNvbHVtbiBjb2wteHMtMTIg 144 | Y29sLXNtLTQiPg0KICAgICAgICAgICAgPGgxIGl0ZW1wcm9wPSJuYW1lIj48YSBocmVmPSJo 145 | dHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFy 146 | Z2V0PV9ibGFuaz5DYWxlZmFjdG9yIFVsdHJhZGVsZ2FkbyBkZSBQb3JjZWxhbmF0bzwvYT48 147 | L2gxPg0KICAgICAgICAgICAgPHAgaWQ9cHJvZHVjdF9yZWZlcmVuY2U+PGEgDQogICAgICAg 148 | ICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAg 149 | ICAgICAgICAgdGFyZ2V0PV9ibGFuaz48bGFiZWw+UmVmZXJlbmNpYSA8L2xhYmVsPjxzcGFu 150 | IGNsYXNzPWVkaXRhYmxlIA0KICAgICAgICAgICAgaXRlbXByb3A9InNrdSIgY29udGVudD0i 151 | Q2FtYmVycmEgLSA1NXg1NWNtIC0gMzMwdyI+Q2FtYmVycmEgLSANCiAgICAgICAgICAgIDU1 152 | eDU1Y20gLSAzMzB3PC9zcGFuPjwvYT48L3A+DQogICAgICAgICAgICA8cCBpZD1wcm9kdWN0 153 | X2NvbmRpdGlvbj48YSANCiAgICAgICAgICAgIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRr 154 | Lz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjxsYWJlbD5D 155 | b25kaWNp8246IDwvbGFiZWw+PHNwYW4gY2xhc3M9ZWRpdGFibGU+TnVldm8gDQogICAgICAg 156 | ICAgICBwcm9kdWN0bzwvc3Bhbj48L2E+PC9wPg0KICAgICAgICAgICAgPGRpdiBpZD1zaG9y 157 | dF9kZXNjcmlwdGlvbl9ibG9jaz4NCiAgICAgICAgICAgIDxkaXYgaWQ9c2hvcnRfZGVzY3Jp 158 | cHRpb25fY29udGVudCBjbGFzcz0icnRlIGFsaWduX2p1c3RpZnkiIA0KICAgICAgICAgICAg 159 | aXRlbXByb3A9ImRlc2NyaXB0aW9uIj4NCiAgICAgICAgICAgIDxwPjxhIGhyZWY9Imh0dHA6 160 | Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9 161 | X2JsYW5rPkNhbGVmYWN0b3IgZGUgUGFyZWQgLSBQb3JjZWxhbmF0byBSYWRpYW50ZSANCiAg 162 | ICAgICAgICAgIENhbGVmYWNjaW9uYWRvIC0gQ2FtYmVycmEgLSBTaXN0ZW1hIGRlIENhbGVm 163 | YWNjafNuIGRlIE9uZGEgDQogICAgICAgICAgICBJbmZyYXJyb2phIGRlIE5hbm90ZWNub2xv 164 | Z+1hIHkgQmFqbyBDb25zdW1vIEVs6WN0cmljby4gUmFkaWFjafNuIGRlIA0KICAgICAgICAg 165 | ICAgVGlwbyBTb2xhciBzaW4gUmF5b3MgVVYuPC9hPjwvcD48L2Rpdj48L2Rpdj48L2Rpdj48 166 | YSANCiAgICAgICAgICAgIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1 167 | NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPi0tLS0tLS0tLS0tLS0tLS0tLS0t 168 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 169 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 170 | LS0tLS0tLSANCiAgICAgICAgICAgIDwvYT4NCiAgICAgICAgICAgIDxkaXYgY2xhc3M9Y29u 171 | dGFpbmVyPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIg 172 | DQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjwvYT4mbmJzcDs8L2Rpdj4NCiAgICAgICAg 173 | ICAgIDxkaXYgY2xhc3M9InBiLWxlZnQtY29sdW1uIGNvbC14cy0xMiBjb2wtc20tNCBjb2wt 174 | bWQtNSI+DQogICAgICAgICAgICA8ZGl2IGlkPWltYWdlLWJsb2NrIGNsYXNzPWNsZWFyZml4 175 | Pg0KICAgICAgICAgICAgPGRpdiBjbGFzcz16b29tUGFkPjxhIGhyZWY9Imh0dHA6Ly9jYWxv 176 | cnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5r 177 | PjxzcGFuIGlkPXZpZXdfZnVsbF9zaXplPjxzcGFuIGNsYXNzPWpxem9vbSANCiAgICAgICAg 178 | ICAgIHN0eWxlPSJURVhULURFQ09SQVRJT046IG5vbmU7IE9VVExJTkUtU1RZTEU6IG5vbmUi 179 | PjxpbWcgDQogICAgICAgICAgICB0aXRsZT0iRGVzY29uZ2VsYWRvcmVzIERlc2hpZHJhdGFk 180 | b3JlcyIgc3R5bGU9Im9wYWNpdHk6IDEiIA0KICAgICAgICAgICAgYWx0PSJEZXNjb25nZWxh 181 | ZG9yZXMgRGVzaGlkcmF0YWRvcmVzIiANCiAgICAgICAgICAgIHNyYz0iaHR0cDovL2NhbGVm 182 | YWNjaW9uc2lubGltaXRlcy5jb20vNDgtbGFyZ2VfZGVmYXVsdC9kZXNjb25nZWxhZG9yZXMt 183 | ZGVzaGlkcmF0YWRvcmVzLmpwZyIgDQogICAgICAgICAgICBpdGVtcHJvcD0iaW1hZ2UiPjwv 184 | c3Bhbj48L3NwYW4+IDwvYT48L2Rpdj48L2Rpdj4NCiAgICAgICAgICAgIDxkaXYgaWQ9dmll 185 | d3NfYmxvY2sgY2xhc3M9ImNsZWFyZml4ICI+PHNwYW4gDQogICAgICAgICAgICBzdHlsZT0i 186 | Rk9OVC1TSVpFOiAyNnB4Ij48c3Ryb25nPjxhIA0KICAgICAgICAgICAgaHJlZj0iaHR0cDov 187 | L2NhbG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgIHRhcmdldD1f 188 | Ymxhbms+RGVzY29uZ2VsYWRvcmVzIA0KICAgICAgICAgICAgRGVzaGlkcmF0YWRvcmVzPC9h 189 | Pjwvc3Ryb25nPjwvc3Bhbj48L2Rpdj48L2Rpdj4NCiAgICAgICAgICAgIDxkaXYgY2xhc3M9 190 | InBiLWNlbnRlci1jb2x1bW4gY29sLXhzLTEyIGNvbC1zbS00Ij4NCiAgICAgICAgICAgIDxw 191 | IGlkPXByb2R1Y3RfcmVmZXJlbmNlPjxhIA0KICAgICAgICAgICAgaHJlZj0iaHR0cDovL2Nh 192 | bG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgIHRhcmdldD1fYmxh 193 | bms+PGxhYmVsPlJlZmVyZW5jaWEgPC9sYWJlbD48c3BhbiBjbGFzcz1lZGl0YWJsZSANCiAg 194 | ICAgICAgICAgIGl0ZW1wcm9wPSJza3UiIGNvbnRlbnQ9Ik9yZ2FuaWMgRGVmcm9zdGVyIj5P 195 | cmdhbmljIA0KICAgICAgICAgICAgRGVmcm9zdGVyPC9zcGFuPjwvYT48L3A+DQogICAgICAg 196 | ICAgICA8cCBpZD1wcm9kdWN0X2NvbmRpdGlvbj48YSANCiAgICAgICAgICAgIGhyZWY9Imh0 197 | dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJn 198 | ZXQ9X2JsYW5rPjxsYWJlbD5Db25kaWNp8246IDwvbGFiZWw+PHNwYW4gY2xhc3M9ZWRpdGFi 199 | bGU+TnVldm8gDQogICAgICAgICAgICBwcm9kdWN0bzwvc3Bhbj48L2E+PC9wPg0KICAgICAg 200 | ICAgICAgPGRpdiBpZD1zaG9ydF9kZXNjcmlwdGlvbl9ibG9jaz4NCiAgICAgICAgICAgIDxk 201 | aXYgaWQ9c2hvcnRfZGVzY3JpcHRpb25fY29udGVudCBjbGFzcz0icnRlIGFsaWduX2p1c3Rp 202 | ZnkiIA0KICAgICAgICAgICAgaXRlbXByb3A9ImRlc2NyaXB0aW9uIj4NCiAgICAgICAgICAg 203 | IDxoMj48YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0K 204 | ICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz5EZXNjcmlwY2nzbiBkZWwgUHJvZHVjdG88L2E+ 205 | PC9oMj4NCiAgICAgICAgICAgIDxwPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92 206 | PTFmZGE0ZmE1NjA1ZCIgdGFyZ2V0PV9ibGFuaz5CYXNlIA0KICAgICAgICAgICAgZGUgMTd4 207 | NTZjbSBkZSBWaWRyaW8gVGVtcGxhZG8uIFJhZGlhY2nzbiBpbmZyYXJyb2phIGRlIG9uZGEg 208 | bGFyZ2EsIA0KICAgICAgICAgICAgRWNvbPNnaWNhIHkgYmFqbyBjb25zdW1vLCBwYXJhIGRl 209 | c2NvbmdlbGFyLCBkZXNoaWRyYXRhciwgeSBtYW50ZW5lciANCiAgICAgICAgICAgIGNhbGll 210 | bnRlIGxvcyBhbGltZW50b3MgZGUgbWFuZXJhIG9yZ+FuaWNhLiBDdWVudGEgY29uIDIgbml2 211 | ZWxlcyBkZSANCiAgICAgICAgICAgIHRlbXBlcmF0dXJhLiBDb25zdW1vIE3tbiA2MHcgeSBN 212 | 4XggDQogICAgICAgICAgICAxMjBXPGJyPjxicj4tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 213 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 214 | LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t 215 | LS0tLTwvYT48L3A+DQogICAgICAgICAgICA8ZGl2IGNsYXNzPWNvbnRhaW5lcj4NCiAgICAg 216 | ICAgICAgIDxkaXYgY2xhc3M9dG9wLWhyPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRr 217 | Lz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjwvYT4mbmJz 218 | cDs8L2Rpdj48L2Rpdj4NCiAgICAgICAgICAgIDxkaXYgY2xhc3M9InBiLWxlZnQtY29sdW1u 219 | IGNvbC14cy0xMiBjb2wtc20tNCBjb2wtbWQtNSI+DQogICAgICAgICAgICA8ZGl2IGlkPWlt 220 | YWdlLWJsb2NrIGNsYXNzPWNsZWFyZml4Pg0KICAgICAgICAgICAgPGRpdiBjbGFzcz16b29t 221 | UGFkPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQog 222 | ICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPjxzcGFuIGlkPXZpZXdfZnVsbF9zaXplPjxzcGFu 223 | IGNsYXNzPWpxem9vbSANCiAgICAgICAgICAgIHN0eWxlPSJURVhULURFQ09SQVRJT046IG5v 224 | bmU7IE9VVExJTkUtU1RZTEU6IG5vbmUiPjxpbWcgDQogICAgICAgICAgICBzdHlsZT0ib3Bh 225 | Y2l0eTogMSIgYWx0PSJDZWxkYSBDRVJBVEkgcGFyYSBQaXNvcyIgDQogICAgICAgICAgICBz 226 | cmM9Imh0dHA6Ly9jYWxlZmFjY2lvbnNpbmxpbWl0ZXMuY29tLzU0LWxhcmdlX2RlZmF1bHQv 227 | Y2VsZGEtY2VyYXRpLXBhcmEtcGlzb3MuanBnIiANCiAgICAgICAgICAgIGl0ZW1wcm9wPSJp 228 | bWFnZSI+PC9zcGFuPjwvc3Bhbj4gPC9hPg0KICAgICAgICAgICAgPGRpdiBjbGFzcz16b29t 229 | V2luZG93IA0KICAgICAgICAgICAgc3R5bGU9IkNVUlNPUjogZGVmYXVsdDsgUE9TSVRJT046 230 | IGFic29sdXRlOyBMRUZUOiAwcHg7IFotSU5ERVg6IDUwMDE7IERJU1BMQVk6IGJsb2NrOyBU 231 | T1A6IDBweCI+DQogICAgICAgICAgICA8ZGl2IGNsYXNzPXpvb21XcmFwcGVyIA0KICAgICAg 232 | ICAgICAgc3R5bGU9IkJPUkRFUi1MRUZULVdJRFRIOiAxcHg7IENVUlNPUjogY3Jvc3NoYWly 233 | OyBCT1JERVItUklHSFQtV0lEVEg6IDFweDsgV0lEVEg6IDQ1OHB4OyBCT1JERVItQk9UVE9N 234 | LVdJRFRIOiAxcHg7IEJPUkRFUi1UT1AtV0lEVEg6IDFweCI+PC9kaXY+PC9kaXY+PGEgDQog 235 | ICAgICAgICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQi 236 | IA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz48L2E+PC9kaXY+PC9kaXY+DQogICAgICAg 237 | ICAgICA8ZGl2IGlkPXZpZXdzX2Jsb2NrIGNsYXNzPSJjbGVhcmZpeCAiPjxzdHJvbmc+PHNw 238 | YW4gDQogICAgICAgICAgICBzdHlsZT0iRk9OVC1TSVpFOiAyNnB4Ij48YSANCiAgICAgICAg 239 | ICAgIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgdGFyZ2V0 240 | PV9ibGFuaz5DZWxkYSANCiAgICAgICAgICAgIENFUkFUSSBwYXJhIFBpc29zPC9hPjwvc3Bh 241 | bj48L3N0cm9uZz48L2Rpdj48L2Rpdj4NCiAgICAgICAgICAgIDxkaXYgY2xhc3M9InBiLWNl 242 | bnRlci1jb2x1bW4gY29sLXhzLTEyIGNvbC1zbS00Ij4NCiAgICAgICAgICAgIDxwIGlkPXBy 243 | b2R1Y3RfY29uZGl0aW9uPjxhIA0KICAgICAgICAgICAgaHJlZj0iaHR0cDovL2NhbG9yc29s 244 | YXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgIHRhcmdldD1fYmxhbms+PGxh 245 | YmVsPkNvbmRpY2nzbjogPC9sYWJlbD48c3BhbiBjbGFzcz1lZGl0YWJsZT5OdWV2byANCiAg 246 | ICAgICAgICAgIHByb2R1Y3RvPC9zcGFuPjwvYT48L3A+DQogICAgICAgICAgICA8ZGl2IGlk 247 | PXNob3J0X2Rlc2NyaXB0aW9uX2Jsb2NrPg0KICAgICAgICAgICAgPGRpdiBpZD1zaG9ydF9k 248 | ZXNjcmlwdGlvbl9jb250ZW50IGNsYXNzPSJydGUgYWxpZ25fanVzdGlmeSIgDQogICAgICAg 249 | ICAgICBpdGVtcHJvcD0iZGVzY3JpcHRpb24iPg0KICAgICAgICAgICAgPGg0PjxhIGhyZWY9 250 | Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0 251 | YXJnZXQ9X2JsYW5rPkNlbGRhIGRlIFJhZGlhY2nzbiBJbmZyYXJvamEgZGUgTmFub3RlY25v 252 | bG9n7WEgcGFyYSANCiAgICAgICAgICAgIGluc3RhbGFjafNuIHBvciBkZWJham8gZGUgUGlz 253 | b3MgTGFtaW5hZG9zIGRlIG1hZGVyYS4gUmFuZ28gZGUgVGVtcCANCiAgICAgICAgICAgIGFs 254 | IHRhY3RvOiA0MC02MCC6QyBQcm9tZWRpbyA1MLBDLCBDQSAxMDAtMTIwViwgNTAvNjAgSHos 255 | IGNvbnN1bW8gDQogICAgICAgICAgICA1MFcuIEVzdGUgcHJvZHVjdG8gbm8gaW5jbHV5ZSAN 256 | CiAgICAgICAgICAgIGNhYmxlPC9hPjwvaDQ+PC9kaXY+PC9kaXY+PC9kaXY+PC9kaXY+PC9k 257 | aXY+PC9kaXY+PC9kaXY+PC90ZD48L3RyPg0KICAgICAgICA8dHI+DQogICAgICAgICAgPHRk 258 | IGhlaWdodD0xPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1 259 | ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2JsYW5rPiZuYnNwOzwvYT48L3RkPjwvdHI+DQog 260 | ICAgICAgIDx0cj4NCiAgICAgICAgICA8dGQgaGVpZ2h0PTIwPjxhIGhyZWY9Imh0dHA6Ly9j 261 | YWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICB0YXJnZXQ9X2Js 262 | YW5rPiZuYnNwOzwvYT48L3RkPjwvdHI+DQogICAgICAgIDx0cj4NCiAgICAgICAgICA8dGQ+ 263 | DQogICAgICAgICAgICA8aDQ+PHNwYW4gc3R5bGU9IkZPTlQtU0laRTogMjBweCI+PGEgDQog 264 | ICAgICAgICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQi 265 | IA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz5JbmZvcm1hY2nzbiBzb2JyZSBsYSB0aWVu 266 | ZGE8L2E+PC9zcGFuPjwvaDQ+PHNlY3Rpb24gDQogICAgICAgICAgICBpZD1ibG9ja19jb250 267 | YWN0X2luZm9zIGNsYXNzPSJmb290ZXItYmxvY2sgY29sLXhzLTEyIGNvbC1zbS00Ij4NCiAg 268 | ICAgICAgICAgIDxkaXY+DQogICAgICAgICAgICA8aDQ+PGEgaHJlZj0iaHR0cDovL2NhbG9y 269 | c29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgIHRhcmdldD1fYmxhbms+ 270 | SW5mb3JtYWNp824gc29icmUgbGEgdGllbmRhPC9hPjwvaDQ+DQogICAgICAgICAgICA8dWwg 271 | Y2xhc3M9dG9nZ2xlLWZvb3Rlcj4NCiAgICAgICAgICAgICAgPGxpPjxhIGhyZWY9Imh0dHA6 272 | Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICAgIHRhcmdl 273 | dD1fYmxhbms+Y2FsZWZhY2Npb25zaW5saW1pdGVzLmNvbSwgTWlndWVsIEFsbGVuZGUgMjUs 274 | IA0KICAgICAgICAgICAgICBQcmFkZXJhcyBkZSBTYW4gTWF0ZW8sIE5hdWNhbHBhbiBkZSBq 275 | YXVyZXouIEVkbyBkZSBNZXhpY28gPC9hPg0KICAgICAgICAgICAgICA8bGk+PGEgaHJlZj0i 276 | aHR0cDovL2NhbG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiANCiAgICAgICAgICAgICAg 277 | dGFyZ2V0PV9ibGFuaz5MbOFtYW5vcyBhaG9yYTogPHNwYW4+V2hhdHNhcHAgKDUyKSA1NTI1 278 | NTk2Mzk1IA0KICAgICAgICAgICAgICBPZmljaW5hLi4uIENETVg6ICgrNTI1NSkgNjcxMTk3 279 | NjA8L3NwYW4+IDwvYT4NCiAgICAgICAgICAgICAgPGxpPjxhIGhyZWY9Imh0dHA6Ly9jYWxv 280 | cnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAgICAgICAgIHRhcmdldD1fYmxh 281 | bms+RW1haWw6Jm5ic3A7dmVudGFzQGNhbG9yc29sYXIudGs8L2E+IA0KICAgICAgICAgICAg 282 | PC9saT48L3VsPjwvZGl2Pjwvc2VjdGlvbj48L3RkPjwvdHI+PC90Ym9keT48L3RhYmxlPjwv 283 | dGQ+PC90cj4NCiAgPHRyPg0KICAgIDx0ZD48YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50 284 | ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgIHRhcmdldD1fYmxhbms+Jm5ic3A7PC9hPjwvdGQ+ 285 | PC90cj48L3Rib2R5PjwvdGFibGU+DQo8cD48YSBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50 286 | ay8/dj0xZmRhNGZhNTYwNWQiIHRhcmdldD1fYmxhbms+PGJyPjwvYT48L3A+DQo8dGFibGUg 287 | aGVpZ2h0PTQ2IGNlbGxTcGFjaW5nPTAgY2VsbFBhZGRpbmc9MCB3aWR0aD04NDkgYWxpZ249 288 | Y2VudGVyIA0KYmdDb2xvcj0jMDAxZTMyPg0KICA8dGJvZHk+DQogIDx0cj4NCiAgICA8dGQg 289 | aGVpZ2h0PTI4PjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1 290 | ZCIgDQogICAgICB0YXJnZXQ9X2JsYW5rPiZuYnNwOzwvYT48L3RkPjwvdHI+DQogIDx0cj4N 291 | CiAgICA8dGQgYWxpZ249Y2VudGVyPjxhIGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92 292 | PTFmZGE0ZmE1NjA1ZCIgDQogICAgICB0YXJnZXQ9X2JsYW5rPiZuYnNwOzwvYT48L3RkPjwv 293 | dHI+PC90Ym9keT48L3RhYmxlPg0KPHA+PGEgaHJlZj0iaHR0cDovL2NhbG9yc29sYXIudGsv 294 | P3Y9MWZkYTRmYTU2MDVkIiB0YXJnZXQ9X2JsYW5rPjxicj48L2E+PC9wPg0KPHRhYmxlIA0K 295 | c3R5bGU9IkZPTlQtRkFNSUxZOiAnVHJlYnVjaGV0IE1TJywgQXJpYWwsIEhlbHZldGljYSwg 296 | c2Fucy1zZXJpZjsgQ09MT1I6ICNmZmZmZmYiIA0KaGVpZ2h0PTExNiBjZWxsU3BhY2luZz0w 297 | IGNlbGxQYWRkaW5nPTAgd2lkdGg9ODUyIGFsaWduPWNlbnRlciBiZ0NvbG9yPSNiMTAxMDE+ 298 | DQogIDx0Ym9keT4NCiAgPHRyPg0KICAgIDx0ZD4NCiAgICAgIDx0YWJsZSBjZWxsU3BhY2lu 299 | Zz0wIGNlbGxQYWRkaW5nPTAgd2lkdGg9NjAwIGFsaWduPWNlbnRlcj4NCiAgICAgICAgPHRi 300 | b2R5Pg0KICAgICAgICA8dHI+DQogICAgICAgICAgPHRkIGhlaWdodD0zMD48YSBocmVmPSJo 301 | dHRwOi8vY2Fsb3Jzb2xhci50ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFy 302 | Z2V0PV9ibGFuaz4mbmJzcDs8L2E+PC90ZD4NCiAgICAgICAgICA8dGQgaGVpZ2h0PTE1Pjxh 303 | IGhyZWY9Imh0dHA6Ly9jYWxvcnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgDQogICAgICAg 304 | ICAgICB0YXJnZXQ9X2JsYW5rPiZuYnNwOzwvYT48L3RkPg0KICAgICAgICAgIDx0ZCBoZWln 305 | aHQ9MTU+PGEgaHJlZj0iaHR0cDovL2NhbG9yc29sYXIudGsvP3Y9MWZkYTRmYTU2MDVkIiAN 306 | CiAgICAgICAgICAgIHRhcmdldD1fYmxhbms+Jm5ic3A7PC9hPjwvdGQ+PC90cj4NCiAgICAg 307 | ICAgPHRyPg0KICAgICAgICAgIDx0ZCB3aWR0aD0xMD48c3BhbiANCiAgICAgICAgICAgIHN0 308 | eWxlPSJGT05ULVNJWkU6IHNtYWxsOyBGT05ULUZBTUlMWTogYXJpYWwsIGhlbHZldGljYSwg 309 | c2Fucy1zZXJpZiI+PGEgDQogICAgICAgICAgICBocmVmPSJodHRwOi8vY2Fsb3Jzb2xhci50 310 | ay8/dj0xZmRhNGZhNTYwNWQiIA0KICAgICAgICAgICAgdGFyZ2V0PV9ibGFuaz4mbmJzcDs8 311 | L2E+PC9zcGFuPjwvdGQ+DQogICAgICAgICAgPHRkPg0KICAgICAgICAgICAgPHAgc3R5bGU9 312 | IkZPTlQtU0laRTogMTJweDsgVEVYVC1BTElHTjogY2VudGVyOyBNQVJHSU46IDBweCIgDQog 313 | ICAgICAgICAgICBhbGlnbj1jZW50ZXI+PHNwYW4gDQogICAgICAgICAgICBzdHlsZT0iRk9O 314 | VC1TSVpFOiBsYXJnZTsgRk9OVC1GQU1JTFk6IGFyaWFsLCBoZWx2ZXRpY2EsIHNhbnMtc2Vy 315 | aWY7IENPTE9SOiAjZmZmZmZmIj48YSANCiAgICAgICAgICAgIGhyZWY9Imh0dHA6Ly9jYWxv 316 | cnNvbGFyLnRrLz92PTFmZGE0ZmE1NjA1ZCIgdGFyZ2V0PV9ibGFuaz5FbiANCiAgICAgICAg 317 | ICAgIGN1bXBsaW1pZW50byBkZSBsbyBkaXNwdWVzdG8gZW4gbGEgUHJvdGVjY2nzbiBkZSBE 318 | YXRvcywgcGFyYSBlbCANCiAgICAgICAgICAgIGVqZXJjaWNpbyBkZSBzdXMgZGVyZWNob3Mg 319 | ZGUgYWNjZXNvLCByZWN0aWZpY2FjafNuLCBjYW5jZWxhY2nzbiB5IA0KICAgICAgICAgICAg 320 | b3Bvc2ljafNuIGFsIHRyYXRhbWllbnRvIGRlIHN1cyBkYXRvcyBwZXJzb25hbGVzLCBjb250 321 | ZW5pZG9zIGVuIA0KICAgICAgICAgICAgbnVlc3RyYXMgY29uZGljaW9uZXMgZGUgcHJvdGVj 322 | Y2nzbiBkZSBkYXRvcywgc29sYW1lbnRlIHRpZW5lIHF1ZSANCiAgICAgICAgICAgIGhhY2Vy 323 | IDxmb250IGNvbG9yPSMzMzk5Y2M+Y2xpYyBhcXXtPC9mb250Pi48L2E+PC9zcGFuPjwvcD48 324 | L3RkPg0KICAgICAgICAgIDx0ZCB3aWR0aD0xMD4mbmJzcDs8L3RkPjwvdHI+PC90Ym9keT48 325 | L3RhYmxlPjwvdGQ+PC90cj4NCiAgPHRyPg0KICAgIDx0ZCBoZWlnaHQ9MzA+Jm5ic3A7PC90 326 | ZD48L3RyPjwvdGJvZHk+PC90YWJsZT4NCjxwPjxicj48YnI+PC9wPjxzZWN0aW9uIGlkPWJs 327 | b2NrX2NvbnRhY3RfaW5mb3MgDQpjbGFzcz0iZm9vdGVyLWJsb2NrIGNvbC14cy0xMiBjb2wt 328 | c20tNCI+DQo8cD4mbmJzcDs8L3A+DQo8ZGl2PjwvZGl2Pg0KPHA+Jm5ic3A7PC9wPjwvc2Vj 329 | dGlvbj4NCjxwPiZuYnNwOzwvcD4NCjxoMSBzdHlsZT0iUEFERElORy1MRUZUOiAzMDBweCI+ 330 | PHNwYW4gDQpzdHlsZT0iRk9OVC1GQU1JTFk6IGFyaWFsLGhlbHZldGljYSxzYW5zLXNlcmlm 331 | Ij48c3Ryb25nPjxzcGFuIA0Kc3R5bGU9IkZPTlQtU0laRTogbGFyZ2UiPjxhIGhyZWY9Im1h 332 | aWx0bzpzbWdlc2k0QGdtYWlsLmNvbT9zdWJqZWN0PURhcm1lJTIwZGUlMjBCYWphJTIwZGUl 333 | MjBsYSUyMGxpc3RhJTIwZGUlMjBEaXN0cmlidWNpb24lMjAmYW1wO2JvZHk9Tm8lMjBtZSUy 334 | MGludGVyZXNhbiUyMG1hcyUyMFBVQkxJQ0lEQUQlMjBERSUyMElOVEVSTkVUISI+Y2xpY2sg 335 | DQphcXXtIHBhcmEgZGFyc2UgZGUgYmFqYTwvYT48L3NwYW4+PC9zdHJvbmc+PC9zcGFuPjwv 336 | aDE+DQo8aDEgc3R5bGU9IlBBRERJTkctTEVGVDogMzAwcHgiPjxzcGFuIA0Kc3R5bGU9IkZP 337 | TlQtRkFNSUxZOiBhcmlhbCxoZWx2ZXRpY2Esc2Fucy1zZXJpZiI+PHN0cm9uZz48c3BhbiAN 338 | CnN0eWxlPSJGT05ULVNJWkU6IGxhcmdlIj4mbmJzcDs8YSBocmVmPSJtYWlsdG86c21nZXNp 339 | NEBnbWFpbC5jb20/c3ViamVjdD1EYXJtZSUyMGRlJTIwQmFqYSUyMGRlJTIwbGElMjBsaXN0 340 | YSUyMGRlJTIwRGlzdHJpYnVjaW9uJTIwJmFtcDtib2R5PU5vJTIwbWUlMjBpbnRlcmVzYW4l 341 | MjBtYXMlMjBQVUJMSUNJREFEJTIwREUlMjBJTlRFUk5FVCEiPmNsaWNrIA0KYXF17SBwYXJh 342 | IGRhcnNlIGRlIGJhamE8L2E+PC9zcGFuPjwvc3Ryb25nPjwvc3Bhbj48L2gxPjwvYm9keT48 343 | L2h0bWw+ 344 | 345 | ----=_1wyyTH1wPm-- 346 | -------------------------------------------------------------------------------- /tests/mails/mail_outlook_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpamScope/mail-parser/1782bf724190d15c5063b6cc079637599ceb61ca/tests/mails/mail_outlook_1 -------------------------------------------------------------------------------- /tests/mails/mail_test_12: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | X-Original-To: danglüe@email.com 3 | Delivered-To: root@localhost 4 | Received: from hotmail.com (unknown [96.202.181.20]) 5 | by localhost (Postfix) with ESMTP id 6EF7F2E0BAB 6 | for ; Sun, 13 May 2018 04:32:23 +0000 (UTC) 7 | From: =?GB2312?B?1cXPyMn6?= 8 | Subject: =?GB2312?B?tPq/qrj3tdjU9ta1y7C3osax?= 9 | To: danglüe@email.com 10 | Content-Type: text/plain;charset="GB2312" 11 | Content-Transfer-Encoding: 8bit 12 | Date: Sun, 13 May 2018 12:32:22 +0800 13 | X-Priority: 3 14 | X-Mailer: Microsoft Outlook Express 5.00.2919.6700 15 | 16 | 你好!我公司可代开各类增值税发票,17%专用发票,3%普通发票,点数优惠,所开发票均可查询验证后 17 | 付款,电话:13662615434张经理 (微信同号13662615434)此信息永久有效望存 18 | -------------------------------------------------------------------------------- /tests/mails/mail_test_14: -------------------------------------------------------------------------------- 1 | From: example@example.com 2 | Subject: Test 3 | Date: Wed, 24 Apr 2019 10:05:02 +0200 (CEST) 4 | Mime-Version: 1.0 5 | Content-Type: multipart/mixed; boundary="===============8544575414772382491==" 6 | To: rcpt@example.com 7 | Received-SPF: custom_header1 8 | Received-SPF: custom_header2 9 | 10 | --===============8544575414772382491== 11 | Content-Type: text/html; charset=UTF-8 12 | Content-Transfer-Encoding: 7bit 13 | 14 | 15 | Foo 16 | 17 | 18 | HTML here 19 | 20 | --===============8544575414772382491== 21 | Content-Type: image/png 22 | Content-Transfer-Encoding: base64 23 | Content-Disposition: inline 24 | 25 | UE5HIGhlcmU= 26 | --===============8544575414772382491== 27 | Content-Type: text/plain; charset="us-ascii" 28 | MIME-Version: 1.0 29 | Content-Transfer-Encoding: 7bit 30 | Content-Disposition: inline 31 | 32 | Plaintext here. 33 | --===============8544575414772382491==-- 34 | -------------------------------------------------------------------------------- /tests/mails/mail_test_16: -------------------------------------------------------------------------------- 1 | Subject: Test spam mail (GTUBE) 2 | Message-ID: 3 | Date: Wed, 23 Jul 2003 23:30:00 +0200 4 | From: Sender 5 | To: Recipient 6 | Precedence: junk 7 | MIME-Version: 1.0 8 | Content-Type: text/plain; charset=us-ascii 9 | Content-Transfer-Encoding: 7bit 10 | headers: hello-world 11 | 12 | This is the GTUBE, the 13 | Generic 14 | Test for 15 | Unsolicited 16 | Bulk 17 | Email 18 | 19 | If your spam filter supports it, the GTUBE provides a test by which you 20 | can verify that the filter is installed correctly and is detecting incoming 21 | spam. You can send yourself a test mail containing the following string of 22 | characters (in upper case and with no white spaces and line breaks): 23 | 24 | XJS*C4JDBQADN1.NSBN3*2IDNEN*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X 25 | 26 | You should send this test mail from an account outside of your network. 27 | -------------------------------------------------------------------------------- /tests/mails/mail_test_17: -------------------------------------------------------------------------------- 1 | Received: from apiron13.triara.com (200.57.129.98) by APCNHUB11.correo.local 2 | (172.18.31.175) with Microsoft SMTP Server id 14.3.498.0; Mon, 10 Oct 2022 3 | 15:11:35 -0500 4 | Authentication-Results: mailb.triara.com; spf=None smtp.mailfrom=www-data@vps-051e4cda.vps.ovh.net 5 | Received-SPF: None (mailb.triara.com: no sender authenticity 6 | information available from domain of 7 | www-data@vps-051e4cda.vps.ovh.net) identity=mailfrom; 8 | client-ip=152.228.133.10; receiver=mailb.triara.com; 9 | envelope-from="www-data@vps-051e4cda.vps.ovh.net"; 10 | x-sender="www-data@vps-051e4cda.vps.ovh.net"; 11 | x-conformance=spf_only 12 | X-IPAS-Result: =?us-ascii?q?A0C0kgB4e0RjYQqF5JhaGgEBAQEBKwEBAQEDAQEBAQEBA?= 13 | =?us-ascii?q?QMBAQEBAgIBAQEBAgEJAYFmTFMbAgFlBzFJKwEpEB8EC0IDA4RLXIgihXEtg?= 14 | =?us-ascii?q?yULiwaKFWSFYyKBAgNfAQEBAQEBAQEBBAEEEgIRDwgGBAEBhVYBAQeEHAIjT?= 15 | =?us-ascii?q?QECBAEBAQEDAgMBAQEBAQEDAQEBBAEBAQIBAwQEB0kFEgorhWgNgiIBhEkag?= 16 | =?us-ascii?q?RkCBEobgweDDxEDBQulbodVgTKBAYNQAYELg1gngRgQgSYVAgEBAYFRhT4Fg?= 17 | =?us-ascii?q?TgBgwqCJ4IvJ4FnQhKBA4U4Q08CAQEBF4ITgwoYgk4EmTN7BFIDMxEdEyMBC?= 18 | =?us-ascii?q?QMLdxgDFAMFIQcDGQ8jDQ0EHQwDAwUlAwICGwcCAgMCBhMFAgJNNAgECAQrJ?= 19 | =?us-ascii?q?A8FAgcvBQQvAh4EBQYRCAIWAgYEBAQEFQIQCAIIJhcHEzMZAQVZDgkhHAkfD?= 20 | =?us-ascii?q?QUGEwMgbwVCDygvaSsdGweBDCokBBUDBAQDAgYTAyACDSkxFAQpEw8tBylxC?= 21 | =?us-ascii?q?QIDImoDAwQoLAMJQAcoJAQ4B1g6BQMCECI8BgMJAwIkWYEkDRkFAw0XJggFN?= 22 | =?us-ascii?q?xsECDwCBQZSEwIKEgMSDwkkSQ9KPjsXCB8PlVxigRE3AgEPCkFQLi6CR4EGg?= 23 | =?us-ascii?q?jiWPYZ2hSsJgjSNH49qB4JOJG+LPpUwgRWEMaNAlxCJXYNjhw0CAoFEjAqFU?= 24 | =?us-ascii?q?DWBWgyBW4FMWAEBggABCToOGQ+NfoMrhFqBI4R1VUQxAgEBGR4CBgEKAQEDC?= 25 | =?us-ascii?q?QGCOohaAQE?= 26 | IronPort-PHdr: A9a23:spSpFxSrnRjDE/aKheFpfAHQh9psomOdAWYlg6HP6ppLe6Wn9dL+J 27 | k3W//wrh1jMDs3A8/wRre3Qvuj7XHAYp5OIsXQMaptJAgAaloAalhApKNCdEgijcafnMyc2F 28 | YJcTEQj+G+7d05YBK4SfnX0pXu/pX4XExT7P0xpPOX8AIeUhMOyhai0/NXIbgNEiSD1aK5uL 29 | BiwsQTat9UH5OkqYqc3xBzTp3JUeuNQjWp2LFOXlhz464++5plmuyhXvvsg8YZHX8CYN+wnU 30 | qMdAD06Okgr/Nau7USaQFeE6nJaTH0N1B1WA07D4Q27Fpb9vy3mt/ZsjSyTPMn4V7cxClHAp 31 | +9gTB7ljjtCNiZsqTyNzJYq3OQC+0nn/kQsi5TZa4yUKvdkK6bUfNcQA3ZbX89LXmpKBYbvC 32 | ulHR+cHI+tcqJHw4lUUqh7rTxazG6bjwyNBrmHuxuhijr0sTQrK1Uo+B8lIt2rU6tP4KO1BN 33 | IL9hLmN1jjFY/5MjH3i95eNfhk9qNmSRak1KZeIxBUkGwqDkE2L74v7P3WT2/hH4A35p6JwE 34 | OmojWAgsQR4pDOik9w0hL7OgIYEorgr3Tth28NteIS1FEV3aJi6CIAWsD+acY17XpF6K1w= 35 | IronPort-Data: A9a23:P1Ts4KAPg74kWhVW/xXjw5YqxClBgxIJ4kV8jS/XYbTApDoj0DRTm 36 | 2AeXTvTO/uNM2v9e91wYIqzoEoOuZKBmIMyTANkpHpgZkwRpJueD7x1DKtR0wB+jCHnZBg6h 37 | ynLQoCYdKjYdleF+lH3dOCJQUBUjcmgXqD7BPPPJhd/TAplTDZJoR94kobVuKYw6TSCK17L6 38 | I6aT/H3Ygf/gWcsazpMscpvlTs21BjMkGJA1rABTa0T1LPuvyF9JI4SI6i3M0z5TuF8dgJtb 39 | 7+epF0R1jqxEyYFUrtJoJ6iGqE5auS60Ty1t5Zjc/PKbi6uCcAF+v1T2PI0MS+7gtgS9jx74 40 | I0lWZeYEW/FMkBQ8QgQe0EwLs1wAUFJ0IPuPWSdtcGP9HzfaULK+PJlPX4nAYJNr46bAUkWn 41 | RAZAC4VdVXb2ru7nLm3Q69ynt9lK9freoUSphmMzxmAUKxgG8qTBfyRvpkBhF/chegWdRraT 42 | 9UGdn9jZQjJSwNXIRJPVcszwOykjz/hayYer0yV46w6/wA/ySQoiOO1aYeOJ4HULSlTtkiiu 43 | TzlrlvTOx8HOte/1j+12Wqx2taayEsXX6pITOXlp5aGmma7zWsWEBkXXF2wrNGwiUe3XdlWM 44 | EUY9yApqbM7skesS5/4T3WFTGWsoAYHHoINSuBm4w2EjLTM+ECeGGhCSDNdADA7iCMobQADy 45 | kOrm971Pg12qoSzcnWWx4iFkTznbED5MlQ+TSMDSAIE5fzqr4cykg/DQ75f/Eid0YGd9dbYn 46 | GvikcQuu1kApZJbh/jrpzgrlxr2/cOZEWbZ8y2KBjrN0+9vWGKyT6KSgWU3DMqsz67AETFtW 47 | UTofeDDt4gz4WmlznDlfQn0NOjBCwy5GDPdm0VzOJIq6i6g/XWuFagJvm8jeB03bJ1cJWa0C 48 | KM2he+3zM8IVJdNRf8sC79d9+x7lPG4fTgYfquLMYARO8QZmPGvpXo0PhTJt4wSrKTcuftnY 49 | crKGSpdJXEECex8xSHeegvu+e9D+8zK/kuCFfjTlkz5uZLHPS79YelfYDOzghURqfjsTPP9r 50 | YoCaqNnCnx3C4XDX8Ug2ddMfABUdylgWMGeRg4+XrfrHzeK0VoJU5f5qY7NsaQ+90iJvo8kP 51 | 01RmGcAoLY8uZEDARuScTU7NeriAZ1zpDQlJTdqOkionX4ufe5DKUvZm4QfJdEayQC78cNJc 52 | g== 53 | IronPort-HdrOrdr: A9a23:IE0U3q0tRN9Bymkrd2qGBQqjBNUkLtp133Aq2lEZdPWPSL36qy 54 | nAppkmPHPP6Qr5O0tBpTn/AsW9qBrnnPYfi7X5W43SPjUO01HHEGgN1+Tf/wE= 55 | X-IronPort-Anti-Spam-Filtered: true 56 | Subject: =?UTF-8?B?VHJhbnNmZXJlbmNpYSBJbnRlcmJhbmNhcmlhIEJhbmNhIGVuIEzDrW5lYQ==?= 57 | X-IronPort-AV: E=Sophos;i="5.95,173,1661835600"; 58 | d="html'217?zip'217,48?scan'217,48,208,48,217";a="1030153556" 59 | X-MGA-submission: =?us-ascii?q?MDFfnUeOjYtx/kPz1rQrHfQrNIBGcOsaef6Pxd?= 60 | =?us-ascii?q?fYLajkW0aqpJS8e/pX+Kfz9sRkdcd5xyISxIOdF+YXiKE+ILG7RI8546?= 61 | =?us-ascii?q?G6AgtU3PrBER1OQ5hnnprxu8FcYwakAkY4OSc=3D?= 62 | Received: from vps-051e4cda.vps.ovh.net ([152.228.133.10]) by 63 | apiron13.triara.com with ESMTP/TLS/ECDHE-RSA-AES128-GCM-SHA256; 10 Oct 2022 64 | 15:11:34 -0500 65 | Received: from vps-051e4cda.vps.ovh.net (localhost [127.0.0.1]) by 66 | vps-051e4cda.vps.ovh.net (8.15.2/8.15.2/Debian-10) with ESMTP id 67 | 29AKBWdZ005349 for ; Mon, 10 Oct 2022 68 | 20:11:32 GMT 69 | Received: (from www-data@localhost) by vps-051e4cda.vps.ovh.net 70 | (8.15.2/8.15.2/Submit) id 29AKBWwn005317; Mon, 10 Oct 2022 20:11:32 GMT 71 | Date: Mon, 10 Oct 2022 20:11:32 +0000 72 | Message-ID: <202210102011.29AKBWwn005317@vps-051e4cda.vps.ovh.net> 73 | To: 74 | From: =?UTF-8?B?bm90aWZpY2FjY2lvbi1jbGllbnRlc0BiYnZhLm14?= 75 | 76 | Reply-To: 77 | Return-Path: www-data@vps-051e4cda.vps.ovh.net 78 | X-MS-Exchange-Organization-AuthSource: APCNHUB11.correo.local 79 | X-MS-Exchange-Organization-AuthAs: Anonymous 80 | Content-Type: multipart/mixed; boundary="B_3748259537_1769991493" 81 | MIME-Version: 1.0 82 | 83 | --B_3748259537_1769991493 84 | Content-Type: multipart/alternative; boundary="B_3748259537_911412125" 85 | 86 | --B_3748259537_911412125 87 | Content-Type: text/plain; charset="UTF-8" 88 | Content-Transfer-Encoding: quoted-printable 89 | 90 | 91 | 92 | --B_3748259537_911412125 93 | Content-Type: text/html; charset="UTF-8" 94 | Content-Transfer-Encoding: quoted-printable 95 | 96 | 97 | 98 | 99 | Test 100 | 101 | 102 | --B_3748259537_911412125-- 103 | -------------------------------------------------------------------------------- /tests/mails/mail_test_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpamScope/mail-parser/1782bf724190d15c5063b6cc079637599ceb61ca/tests/mails/mail_test_2 -------------------------------------------------------------------------------- /tests/mails/mail_test_3: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Delivered-To: kalinokyra@thismail.it 3 | Received: (qmail 13850 invoked from network); 22 Aug 2016 13:56:15 -0000 4 | Received: from 67.175.76.202.static.randtelekom.com.tr (67.175.76.202) 5 | by smtp.server.net with SMTP; 22 Aug 2016 13:56:15 -0000 6 | Date: Mon, 22 Aug 2016 16:56:09 +0300 7 | From: "Ava Oneil" 8 | To: kalinokyra@thismail.it 9 | Message-ID: <8AF5A2192C2A46BE.2743cdb5-7033-d969-fce8-c552542a49a3@mail.outlook.com> 10 | Subject: Hi there 11 | MIME-Version: 1.0 12 | Content-Type: multipart/mixed; 13 | boundary="----=_NextPart_000_0010_01D1FC96.14E54A00" 14 | X-Mailer: Outlook for iOS and Android 15 | 16 | ------=_NextPart_000_0010_01D1FC96.14E54A00 17 | Content-Type: multipart/alternative; 18 | boundary="----=_NextPart_001_0011_01D1FC96.14E54A00" 19 | 20 | ------=_NextPart_001_0011_01D1FC96.14E54A00 21 | Content-Type: text/plain; charset=UTF-8 22 | Content-Transfer-Encoding: quoted-printable 23 | X-WatchGuard-AntiVirus: part scanned. clean action=allow 24 | 25 | I am sending you the bills of the goods we delivered to you in the = 26 | attachment 27 | 28 | ------=_NextPart_001_0011_01D1FC96.14E54A00 29 | Content-Type: text/html; charset=utf-8 30 | Content-Transfer-Encoding: 7bit 31 | X-WatchGuard-AntiVirus: part scanned. clean action=allow 32 | 33 |

I am sending you the bills of the goods we delivered to you in the attachment

34 | 35 | ------=_NextPart_001_0011_01D1FC96.14E54A00-- 36 | 37 | ------=_NextPart_000_0010_01D1FC96.14E54A00 38 | Content-Type: text/plain; name="message.txt"; charset="iso-8859-1" 39 | Content-Transfer-Encoding: quoted-printable 40 | Content-Disposition: inline 41 | 42 | 43 | The WatchGuard Firebox that protects your network has detected a message = 44 | that may not be safe. 45 | 46 | Cause : The file type may not be safe. 47 | Content type : application/x-compressed 48 | File name : export_pdf_ 65acf801~.js 49 | Status : File Name violation 50 | Action : The Firebox deleted export_pdf_ 65acf801~.js. 51 | 52 | Your network administrator can not restore this attachment. 53 | 54 | 55 | ------=_NextPart_000_0010_01D1FC96.14E54A00-- 56 | -------------------------------------------------------------------------------- /tests/mails/mail_test_8: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Delivered-To: eboktor@romolo.com 3 | Received: (qmail 23530 invoked from network); 7 Mar 2017 22:29:25 -0000 4 | Received: from host.colocrossing.com (HELO 04d930f1.moneytrack.top) (198.23.142.158) 5 | by smtp.dakota.net with SMTP; 7 Mar 2017 22:29:25 -0000 6 | Received: from 04d930f1.lyeo00hp.moneytrack.top ([127.0.0.1]:8615 helo=lyeo00hp.moneytrack.top) 7 | by lyeo00hp.moneytrack.top with ESMTP id 04EKOWYD930WHFJSF1; 8 | for ; Tue, 7 Mar 2017 14:29:24 -0800 9 | Message-ID: <3615343813390903615122237013831979@lyeo00hp.moneytrack.top> 10 | To: 11 | Date: Tue, 7 Mar 2017 14:29:24 -0800 12 | From: "Helicopter_flight_simulator" 13 | Subject: Have you ever wanted to land on an Aircraft Carrier 14 | Content-Language: en-us 15 | MIME-Version: 1.0 16 | Content-Transfer-Encoding: 8bit 17 | Content-Type: multipart/alternative; 18 | boundary="----=Part.335.4745.1488925764" 19 | 20 | ------=Part.335.4745.1488925764 21 | Content-Transfer-Encoding: 8bit 22 | Content-Type: text/plain; charset="UTF-8" 23 | 24 | "The Perfect Filler Between Real World Flying" 25 | Imagine "Real Life" Flying At The Comfort Of Your Home... 26 | Click here 27 | 28 | http://www.moneytrack.top/l/lt10VX3615QP370UC/1222A1383JJ1979TG249B81339090GF3323432606 29 | 30 | With 120+ Aircraft to Master, From the 1903 Wright Flyer to the Latest Military Fighter Jets. 31 | 20,000+ Real Airports With changeable Weather and NASA Flight Models. 32 | Realistic Worldwide Terrain Based On US Defense Mapping Agency + Lifetime FREE updates/upgrades. 33 | Used On Television Episodes & Professional Flight Schools - The Most Realistic Flight Sim To Date... 34 | 35 | http://www.moneytrack.top/l/lt10YC3615UB370FH/1222R1383JC1979CT249C81339090FD3323432606 36 | 37 | Meet up online with other ProFlightSimulator Users to fly in formation, put on an 38 | air show or just to have fun. You can easily locate other pilots or yourself with 39 | Google Map integration - Multiplayer map server shows all the active pilots 40 | superimposed on top of a Google map. 41 | Enjoy Real-Life Flying Today 42 | 43 | http://www.moneytrack.top/l/lt10SG3615LD370EN/1222O1383GD1979LL249Y81339090AK3323432606 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | If you do not want to receive any further mail click here. 53 | 82023 Peters Road, Suite 1000 Plantation, FL 33324 54 | 55 | http://www.moneytrack.top/l/lc13NE3615MS370NH/1222I1383VY1979GM249T81339090TL3323432606 56 | 57 | ------=Part.335.4745.1488925764 58 | Content-Transfer-Encoding: 8bit 59 | Content-Type: text/html; charset="UTF-8" 60 | 61 | 62 | 63 | 64 | 65 | 66 |
67 | 68 | 69 | 70 | 71 | 72 | 112 | 113 | 114 | 115 | 118 | 119 | 120 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 |
"The Perfect Filler Between Real World Flying"
73 | 74 | 75 | 77 |
76 | Imagine "Real Life" Flying At The Comfort Of Your Home...
78 | 79 | 80 | 81 | 84 | 85 | 109 | 110 | 111 |
82 | 83 | Click here 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 |
 With 120+ Aircraft to Master, From the 1903 Wright Flyer to the Latest Military Fighter Jets.
 20,000+ Real Airports With changeable Weather and NASA Flight Models.
 Realistic Worldwide Terrain Based On US Defense Mapping Agency + Lifetime FREE updates/upgrades.
 Used On Television Episodes & Professional Flight Schools - The Most Realistic Flight Sim To Date...
108 |
116 | Meet up online with other ProFlightSimulator Users to fly in formation, put on an
117 | air show or just to have fun. You can easily locate other pilots or yourself with
Google Map integration - Multiplayer map server shows all the active pilots
superimposed on top of a Google map.
121 | 125 | 126 | 127 |
122 | Enjoy Real-Life Flying Today 123 | 124 |

137 |

138 |

139 | 140 |
141 | 142 |
143 | 144 | If you do not want to receive any further mail click here.
82023 Peters Road, Suite 1000 Plantation, FL 33324 145 |
146 |

147 |

148 |

149 |
150 |
T%]/Earth /Wheel/Highway / 151 |
God /Guitar /Sandar /Sandpaper /Typewriter ar /Post-office /Pepper /Hat /Earth /Spiral /Garden /Tennis rectcity /Pants /Sphere /Table /Comet / /Bee /Button /Map /Dhroom /Eyes /Train /Hat /Planet /Web /Girl /Surveyor /Rocket /Slave /Software /Vampi /God /od /Guitar /Sandpaper /Typewriter ar /Post-office /Pepper /Hat /Earth /Spiral /Garden /Tennis ract 152 |
/Earth /Wheel/Highway / 153 |
God /Guitar /Sandpaper /Typewriter /Finpaper /Typewriter /Finuum /Sun /Egg /Leg /Ice /Guitar /Post-office /Pepper 154 |
/Hat /Eartricity /Pants /Sphere /Table /Comet / /Bee /Button /Map /Dhroom /Eyes /Train /Hat /Planet /Web /Girl /Surveyor /Rocket /Slave /Software /Vampi /God /Guitar /Sandpaper /Typewriter ar /Post-office /Pepper /Hat /Earth /Spiral /Garden /Tennis rectcity /Pants /Sphere /Table /Comet / /Bee /Button /Map /Dhroom /Eyes /Train /Hat /Planet /Web /Girl /Surveyor /Rocket /Slave /Software /Vampi /God /Guitar /Sandpaper /Typewriter ar /Post-office /Pepper /Hat /Earth /Spiral /Garden /Tennis ract 155 |
/Earth /Wheel/Highway / 156 |
God /Guitar /Sandpaper /Typewriter /Finger /Feather /Salt
157 | 158 | 159 | 160 | 161 | 162 | ------=Part.335.4745.1488925764-- 163 | -------------------------------------------------------------------------------- /tests/mails/mail_test_9: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Delivered-To: sherlock@dada.com 3 | Received: (qmail 12733 invoked from network); 8 Mar 2017 05:57:05 -0000 4 | Received: from unknown (HELO sgis.com.cn) (218.15.33.11) 5 | by smtp.didi.net with SMTP; 8 Mar 2017 05:57:05 -0000 6 | Received: (qmail 11857 invoked by uid 8061); 26 Dec 2016 05:32:04 -0000 7 | Received: from 223.152.177.168 by mail.sgis.com.cn (envelope-from , uid 0) with qmail-scanner-1.24 8 | (4.4.4.56. 9 | Clear:RC:0(223.152.177.168):. 10 | Processed in 2.047196 secs); 26 Dec 2016 05:32:04 -0000 11 | Received: from unknown (HELO ljhw) (zyb@sgis.com.cn@[223.152.177.168]) 12 | (envelope-sender ) 13 | by 0 (magicmail) with SMTP 14 | for ; 26 Dec 2016 05:32:02 -0000 15 | X-Smtpd-Remotehost: unknown 16 | X-Smtpd-Helo: ljhw 17 | X-Smtpd-IP: 223.152.177.168 18 | X-Smtpd-From: zyb@sgis.com.cn 19 | X-Smtpd-To: webmaster@khaleejtimes.com 20 | Message-ID: <5AF385F77A53C2509D46E44914A76433@ljhw> 21 | From: "xwfcpggy" 22 | To: , 23 | , 24 | , 25 | , 26 | , 27 | , 28 | , 29 | , 30 | , 31 | , 32 | , 33 | , 34 | , 35 | , 36 | , 37 | , 38 | , 39 | , 40 | , 41 | , 42 | 43 | Subject: =?gb2312?B?ztKw0b61zbfArb380KmjrMTjw8e/tL+01eK49srHyrLDtIzCo6zV4g==?= 44 | =?gb2312?B?1tazobrP19TIu8nZsrvBy8TayeQ=?= 45 | Date: Mon, 26 Dec 2016 13:33:31 +0800 46 | MIME-Version: 1.0 47 | Content-Type: text/html; 48 | charset="gb2312" 49 | Content-Transfer-Encoding: base64 50 | X-Priority: 3 51 | X-MSMail-Priority: Normal 52 | X-Mailer: Microsoft Outlook Express 6.00.2900.5512 53 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.5512 54 | 55 | PCFET0NUWVBFIEhUTUwgUFVCTElDICItLy9XM0MvL0RURCBIVE1MIDQuMCBUcmFuc2l0aW9uYWwv 56 | L0VOIj4NCjxIVE1MPjxIRUFEPg0KPE1FVEEgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PWdi 57 | MjMxMiIgaHR0cC1lcXVpdj1Db250ZW50LVR5cGU+DQo8TUVUQSBuYW1lPUdFTkVSQVRPUiBjb250 58 | ZW50PSJNU0hUTUwgMTAuMDAuOTIwMC4xNzI5NiI+PC9IRUFEPg0KPEJPRFk+DQo8UD48QSBocmVm 59 | PSJodHRwOi8vZ28yYnV5LmNvbS5jbi8iPjxFTT48Rk9OVCANCnNpemU9NT5odHRwOi8vZ28yYnV5 60 | LmNvbS5jbi88L0ZPTlQ+PC9FTT48L0E+Jm5ic3A7Jm5ic3A7Jm5ic3A7Jm5ic3A7IA0KPFNUUk9O 61 | Rz48Rk9OVCBzdHlsZT0iQkFDS0dST1VORC1DT0xPUjogZGFya2JsdWUiIA0KY29sb3I9eWVsbG93 62 | PjkwNjUmbmJzcDsmbmJzcDs8L0ZPTlQ+PC9TVFJPTkc+PC9QPg0KPFA+PFNUUk9ORz48Rk9OVCBz 63 | dHlsZT0iQkFDS0dST1VORC1DT0xPUjogZGFya2JsdWUiIA0KY29sb3I9eWVsbG93PjEzOjMzOjM5 64 | Jm5ic3A7Jm5ic3A7Jm5ic3A7IA0KMTM6MzM6Mzk8L0ZPTlQ+PC9TVFJPTkc+PC9QPg0KPFA+PFNU 65 | Uk9ORz48Rk9OVCBzdHlsZT0iQkFDS0dST1VORC1DT0xPUjogZGFya2JsdWUiIA0KY29sb3I9eWVs 66 | bG93PiZuYnNwOyZuYnNwOyZuYnNwO87SsNG+tc23wK29/NCpo6zE48PHv7S/tNXiuPbKx8qyw7SM 67 | wqOs1eLW1rOhus/X1Mi7ydmyu8HLxNrJ5DwvRk9OVD48L1NUUk9ORz48L1A+PC9CT0RZPjwvSFRN 68 | TD4NCg== 69 | -------------------------------------------------------------------------------- /tests/test_mail_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import datetime 20 | import hashlib 21 | import os 22 | import shutil 23 | import sys 24 | import tempfile 25 | import unittest 26 | from unittest.mock import patch 27 | 28 | import six 29 | 30 | import mailparser 31 | from mailparser.utils import ( 32 | convert_mail_date, 33 | fingerprints, 34 | get_header, 35 | get_mail_keys, 36 | get_to_domains, 37 | parse_received, 38 | ported_open, 39 | ported_string, 40 | random_string, 41 | receiveds_parsing, 42 | ) 43 | 44 | # base paths 45 | base_path = os.path.realpath(os.path.dirname(__file__)) 46 | root = os.path.join(base_path, "..") 47 | 48 | # raw mails to test 49 | mail_test_1 = os.path.join(base_path, "mails", "mail_test_1") 50 | mail_test_2 = os.path.join(base_path, "mails", "mail_test_2") 51 | mail_test_3 = os.path.join(base_path, "mails", "mail_test_3") 52 | mail_test_4 = os.path.join(base_path, "mails", "mail_test_4") 53 | mail_test_5 = os.path.join(base_path, "mails", "mail_test_5") 54 | mail_test_6 = os.path.join(base_path, "mails", "mail_test_6") 55 | mail_test_7 = os.path.join(base_path, "mails", "mail_test_7") 56 | mail_test_8 = os.path.join(base_path, "mails", "mail_test_8") 57 | mail_test_9 = os.path.join(base_path, "mails", "mail_test_9") 58 | mail_test_10 = os.path.join(base_path, "mails", "mail_test_10") 59 | mail_test_11 = os.path.join(base_path, "mails", "mail_test_11") 60 | mail_test_12 = os.path.join(base_path, "mails", "mail_test_12") 61 | mail_test_13 = os.path.join(base_path, "mails", "mail_test_13") 62 | mail_test_14 = os.path.join(base_path, "mails", "mail_test_14") 63 | mail_test_15 = os.path.join(base_path, "mails", "mail_test_15") 64 | mail_test_16 = os.path.join(base_path, "mails", "mail_test_16") 65 | mail_test_17 = os.path.join(base_path, "mails", "mail_test_17") 66 | mail_malformed_1 = os.path.join(base_path, "mails", "mail_malformed_1") 67 | mail_malformed_2 = os.path.join(base_path, "mails", "mail_malformed_2") 68 | mail_malformed_3 = os.path.join(base_path, "mails", "mail_malformed_3") 69 | mail_outlook_1 = os.path.join(base_path, "mails", "mail_outlook_1") 70 | 71 | 72 | class TestMailParser(unittest.TestCase): 73 | def setUp(self): 74 | self.all_mails = ( 75 | mail_test_1, 76 | mail_test_2, 77 | mail_test_3, 78 | mail_test_4, 79 | mail_test_5, 80 | mail_test_6, 81 | mail_test_7, 82 | mail_test_8, 83 | mail_test_9, 84 | mail_test_10, 85 | mail_test_11, 86 | mail_test_12, 87 | mail_test_13, 88 | mail_malformed_1, 89 | mail_malformed_2, 90 | mail_malformed_3, 91 | ) 92 | 93 | def test_write_attachments(self): 94 | attachments = [ 95 | "<_1_0B4E44A80B15F6FC005C1243C12580DD>", 96 | "<_1_0B4E420C0B4E3DD0005C1243C12580DD>", 97 | "<_1_0B4E24640B4E1564005C1243C12580DD>", 98 | "Move To Eight ZWEP6227F.pdf", 99 | ] 100 | random_path = os.path.join(root, "tests", random_string()) 101 | mail = mailparser.parse_from_file(mail_test_10) 102 | os.makedirs(random_path) 103 | mail.write_attachments(random_path) 104 | for i in attachments: 105 | self.assertTrue(os.path.exists(os.path.join(random_path, i))) 106 | shutil.rmtree(random_path) 107 | 108 | def test_issue62(self): 109 | mail = mailparser.parse_from_file(mail_test_14) 110 | received_spf = mail.Received_SPF 111 | self.assertIsInstance(received_spf, list) 112 | self.assertIn("custom_header1", received_spf) 113 | self.assertIn("custom_header2", received_spf) 114 | 115 | def test_html_field(self): 116 | mail = mailparser.parse_from_file(mail_malformed_1) 117 | self.assertIsInstance(mail.text_html, list) 118 | self.assertIsInstance(mail.text_html_json, six.text_type) 119 | self.assertEqual(len(mail.text_html), 1) 120 | 121 | def test_text_not_managed(self): 122 | mail = mailparser.parse_from_file(mail_test_14) 123 | self.assertIsInstance(mail.text_not_managed, list) 124 | self.assertIsInstance(mail.text_not_managed_json, six.text_type) 125 | self.assertEqual(len(mail.text_not_managed), 1) 126 | self.assertEqual("PNG here", mail.text_not_managed[0]) 127 | 128 | def test_get_mail_keys(self): 129 | mail = mailparser.parse_from_file(mail_test_11) 130 | all_parts = get_mail_keys(mail.message) 131 | mains_parts = get_mail_keys(mail.message, False) 132 | self.assertNotEqual(all_parts, mains_parts) 133 | self.assertIn("message-id", mains_parts) 134 | self.assertIn("x-filterd-recvd-size", all_parts) 135 | self.assertNotIn("x-filterd-recvd-size", mains_parts) 136 | 137 | def test_mail_partial(self): 138 | mail = mailparser.parse_from_file(mail_test_10) 139 | self.assertNotEqual(mail.mail, mail.mail_partial) 140 | self.assertIn("message-id", mail.mail_partial) 141 | self.assertIn("x-ibm-av-version", mail.mail) 142 | self.assertNotIn("x-ibm-av-version", mail.mail_partial) 143 | result = mail.mail_partial_json 144 | self.assertIsInstance(result, six.text_type) 145 | nr_attachments = len(mail._attachments) 146 | self.assertEqual(nr_attachments, 4) 147 | 148 | def test_not_parsed_received(self): 149 | mail = mailparser.parse_from_file(mail_test_9) 150 | for i in mail.received: 151 | self.assertNotIn("raw", i) 152 | self.assertIn("hop", i) 153 | 154 | def test_issue_received(self): 155 | mail = mailparser.parse_from_file(mail_test_8) 156 | for i in mail.received: 157 | self.assertIn("date_utc", i) 158 | self.assertIsNotNone(i["date_utc"]) 159 | 160 | def test_get_header(self): 161 | mail = mailparser.parse_from_file(mail_test_1) 162 | h1 = get_header(mail.message, "from") 163 | self.assertIsInstance(h1, six.text_type) 164 | 165 | def test_receiveds_parsing(self): 166 | for i in self.all_mails: 167 | mail = mailparser.parse_from_file(i) 168 | receiveds = mail.received_raw 169 | result = receiveds_parsing(receiveds) 170 | self.assertIsInstance(result, list) 171 | for j in result: 172 | self.assertIsInstance(j, dict) 173 | self.assertIn("hop", j) 174 | self.assertIn("delay", j) 175 | 176 | def test_ipaddress(self): 177 | mail = mailparser.parse_from_file(mail_test_2) 178 | trust = "smtp.customers.net" 179 | 180 | ip = "217.76.210.112" 181 | result = mail.get_server_ipaddress(trust) 182 | self.assertEqual(result, ip) 183 | 184 | trust = "" 185 | result = mail.get_server_ipaddress(trust) 186 | self.assertIsNone(result) 187 | 188 | trust = " " 189 | result = mail.get_server_ipaddress(trust) 190 | self.assertIsNone(result) 191 | 192 | def test_ipaddress_unicodeerror(self): 193 | mail = mailparser.parse_from_file(mail_test_12) 194 | trust = "localhost" 195 | result = mail.get_server_ipaddress(trust) 196 | self.assertEqual(result, "96.202.181.20") 197 | 198 | def test_fingerprints_body(self): 199 | mail = mailparser.parse_from_file(mail_test_1) 200 | md5, sha1, sha256, sha512 = fingerprints(mail.body.encode("utf-8")) 201 | self.assertEqual(md5, "55852a2efe95e7249887c92cc02123f8") 202 | self.assertEqual(sha1, "62fef1e38327ed09363624c3aff8ea11723ee05f") 203 | self.assertEqual( 204 | sha256, 205 | ("cd4af1017f2e623f6d38f691048b6a28d8b1f44a0478137b4337eac6de78f71a"), 206 | ) 207 | self.assertEqual( 208 | sha512, 209 | ( 210 | "4a573c7929b078f2a2c1c0f869d418b0c020d4" 211 | "d37196bd6dcc209f9ccb29ca67355aa5e47b97" 212 | "c8bf90377204f59efde7ba1fc071b6f250a665" 213 | "72f63b997e92e8" 214 | ), 215 | ) 216 | 217 | def test_fingerprints_unicodeencodeerror(self): 218 | mail = mailparser.parse_from_file(mail_test_7) 219 | for i in mail.attachments: 220 | fingerprints(i["payload"]) 221 | 222 | def test_malformed_mail(self): 223 | mail = mailparser.parse_from_file(mail_malformed_3) 224 | defects_categories = mail.defects_categories 225 | self.assertIn("StartBoundaryNotFoundDefect", defects_categories) 226 | self.assertIn("MultipartInvariantViolationDefect", defects_categories) 227 | self.assertIn("reply-to", mail.mail) 228 | self.assertNotIn("reply_to", mail.mail) 229 | reply_to = [("VICTORIA Souvenirs", "smgesi4@gmail.com")] 230 | self.assertEqual(mail.reply_to, reply_to) 231 | self.assertEqual(mail.fake_header, six.text_type()) 232 | 233 | # This email has header X-MSMail-Priority 234 | msmail_priority = mail.X_MSMail_Priority 235 | self.assertEqual(msmail_priority, "High") 236 | 237 | def test_type_error(self): 238 | mail = mailparser.parse_from_file(mail_test_5) 239 | self.assertEqual(len(mail.attachments), 5) 240 | for i in mail.attachments: 241 | self.assertIsInstance(i["filename"], six.text_type) 242 | 243 | def test_filename_decode(self): 244 | mail = mailparser.parse_from_file(mail_test_11) 245 | for i in mail.attachments: 246 | self.assertIsInstance(i["filename"], six.text_type) 247 | 248 | def test_valid_mail(self): 249 | m = mailparser.parse_from_string("fake mail") 250 | self.assertFalse(m.message) 251 | 252 | def test_receiveds(self): 253 | mail = mailparser.parse_from_file(mail_test_1) 254 | self.assertEqual(len(mail.received), 6) 255 | 256 | self.assertIsInstance(mail.received, list) 257 | for i in mail.received: 258 | self.assertIsInstance(i, dict) 259 | 260 | self.assertIsInstance(mail.received_raw, list) 261 | for i in mail.received_raw: 262 | self.assertIsInstance(i, six.text_type) 263 | 264 | self.assertIsInstance(mail.received_json, six.text_type) 265 | 266 | def test_parsing_know_values(self): 267 | mail = mailparser.parse_from_file(mail_test_2) 268 | trust = "smtp.customers.net" 269 | 270 | self.assertFalse(mail.has_defects) 271 | 272 | raw = "217.76.210.112" 273 | result = mail.get_server_ipaddress(trust) 274 | self.assertEqual(raw, result) 275 | 276 | raw = "<4516257BC5774408ADC1263EEBBBB73F@ad.regione.vda.it>" 277 | result = mail.message_id 278 | self.assertEqual(raw, result) 279 | 280 | raw = "echo@tu-berlin.de" 281 | result = mail.to 282 | self.assertEqual(len(result), 2) 283 | self.assertIsInstance(result, list) 284 | self.assertIsInstance(result[0], tuple) 285 | self.assertIsInstance(mail.to_json, six.text_type) 286 | self.assertIsInstance(mail.to_raw, six.text_type) 287 | self.assertEqual(raw, result[0][1]) 288 | 289 | raw = "meteo@regione.vda.it" 290 | result = mail.from_ 291 | self.assertEqual(raw, result[0][1]) 292 | 293 | raw = "Bollettino Meteorologico del 29/11/2015" 294 | result = mail.subject 295 | self.assertEqual(raw, result) 296 | 297 | result = mail.has_defects 298 | self.assertFalse(result) 299 | 300 | result = len(mail.attachments) 301 | self.assertEqual(3, result) 302 | 303 | self.assertIsInstance(mail.date_raw, six.text_type) 304 | self.assertIsInstance(mail.date_json, six.text_type) 305 | raw_utc = "2015-11-29T08:45:18+00:00" 306 | result = mail.date.isoformat() 307 | self.assertEqual(raw_utc, result) 308 | 309 | def test_types(self): 310 | mail = mailparser.parse_from_file(mail_test_2) 311 | trust = "smtp.customers.net" 312 | 313 | self.assertFalse(mail.has_defects) 314 | 315 | result = mail.mail 316 | self.assertIsInstance(result, dict) 317 | self.assertNotIn("defects", result) 318 | self.assertIn("has_defects", result) 319 | 320 | result = mail.get_server_ipaddress(trust) 321 | self.assertIsInstance(result, six.text_type) 322 | 323 | result = mail.mail_json 324 | self.assertIsInstance(result, six.text_type) 325 | 326 | result = mail.headers_json 327 | self.assertIsInstance(result, six.text_type) 328 | 329 | result = mail.headers 330 | self.assertIsInstance(result, dict) 331 | 332 | result = mail.body 333 | self.assertIsInstance(result, six.text_type) 334 | 335 | result = mail.date 336 | self.assertIsInstance(result, datetime.datetime) 337 | 338 | result = mail.from_ 339 | self.assertIsInstance(result, list) 340 | 341 | result = mail.to 342 | self.assertIsInstance(result, list) 343 | self.assertEqual(len(result), 2) 344 | self.assertIsInstance(result[0], tuple) 345 | self.assertEqual(len(result[0]), 2) 346 | 347 | result = mail.subject 348 | self.assertIsInstance(result, six.text_type) 349 | 350 | result = mail.message_id 351 | self.assertIsInstance(result, six.text_type) 352 | 353 | result = mail.attachments 354 | self.assertIsInstance(result, list) 355 | 356 | result = mail.date 357 | self.assertIsInstance(result, datetime.datetime) 358 | 359 | result = mail.defects 360 | self.assertIsInstance(result, list) 361 | 362 | def test_defects(self): 363 | mail = mailparser.parse_from_file(mail_malformed_1) 364 | 365 | self.assertTrue(mail.has_defects) 366 | self.assertEqual(1, len(mail.defects)) 367 | self.assertEqual(1, len(mail.defects_categories)) 368 | self.assertIn("defects", mail.mail) 369 | self.assertIn("StartBoundaryNotFoundDefect", mail.defects_categories) 370 | self.assertIsInstance(mail.mail_json, six.text_type) 371 | 372 | result = len(mail.attachments) 373 | self.assertEqual(1, result) 374 | 375 | mail = mailparser.parse_from_file(mail_test_1) 376 | if six.PY2: 377 | self.assertFalse(mail.has_defects) 378 | self.assertNotIn("defects", mail.mail) 379 | elif six.PY3: 380 | self.assertTrue(mail.has_defects) 381 | self.assertEqual(1, len(mail.defects)) 382 | self.assertEqual(1, len(mail.defects_categories)) 383 | self.assertIn("defects", mail.mail) 384 | self.assertIn("CloseBoundaryNotFoundDefect", mail.defects_categories) 385 | 386 | def test_defects_bug(self): 387 | mail = mailparser.parse_from_file(mail_malformed_2) 388 | 389 | self.assertTrue(mail.has_defects) 390 | self.assertEqual(1, len(mail.defects)) 391 | self.assertEqual(1, len(mail.defects_categories)) 392 | self.assertIn("defects", mail.mail) 393 | self.assertIn("StartBoundaryNotFoundDefect", mail.defects_categories) 394 | self.assertIsInstance(mail.parsed_mail_json, six.text_type) 395 | 396 | result = len(mail.attachments) 397 | self.assertEqual(1, result) 398 | 399 | def test_add_content_type(self): 400 | mail = mailparser.parse_from_file(mail_test_3) 401 | 402 | self.assertFalse(mail.has_defects) 403 | 404 | result = mail.mail 405 | 406 | self.assertEqual(len(result["attachments"]), 1) 407 | self.assertIsInstance( 408 | result["attachments"][0]["mail_content_type"], six.text_type 409 | ) 410 | self.assertFalse(result["attachments"][0]["binary"]) 411 | self.assertIsInstance(result["attachments"][0]["payload"], six.text_type) 412 | self.assertEqual( 413 | result["attachments"][0]["content_transfer_encoding"], "quoted-printable" 414 | ) 415 | self.assertEqual(result["attachments"][0]["charset"], "iso-8859-1") 416 | self.assertEqual(result["attachments"][0]["content-disposition"], "inline") 417 | 418 | mail = mailparser.parse_from_file(mail_malformed_1) 419 | attachments = mail.mail["attachments"] 420 | self.assertEqual(attachments[0]["content-disposition"], "") 421 | 422 | def test_classmethods(self): 423 | # MailParser.from_file 424 | m = mailparser.MailParser.from_file(mail_test_3) 425 | m.parse() 426 | result = m.mail 427 | self.assertEqual(len(result["attachments"]), 1) 428 | 429 | # MailParser.from_string 430 | m = mailparser.MailParser.from_string(m.message_as_string) 431 | m.parse() 432 | result = m.mail 433 | self.assertEqual(len(result["attachments"]), 1) 434 | 435 | def test_bug_UnicodeDecodeError(self): 436 | m = mailparser.parse_from_file(mail_test_6) 437 | self.assertIsInstance(m.mail, dict) 438 | self.assertIsInstance(m.mail_json, six.text_type) 439 | 440 | @patch("mailparser.core.os.remove") 441 | @patch("mailparser.core.msgconvert") 442 | def test_parse_from_file_msg(self, mock_msgconvert, mock_remove): 443 | """ 444 | Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 445 | 446 | The email used for unittest were found randomly on VirusTotal and 447 | then already publicly available so can not be considered 448 | as privacy violation 449 | """ 450 | mock_msgconvert.return_value = (mail_test_2, None) 451 | m = mailparser.parse_from_file_msg(mail_outlook_1) 452 | mock_remove.assert_called_once_with(mail_test_2) 453 | email = m.mail 454 | self.assertIn("attachments", email) 455 | self.assertEqual(len(email["attachments"]), 3) 456 | self.assertIn("from", email) 457 | self.assertEqual(email["from"][0][1], "meteo@regione.vda.it") 458 | self.assertIn("subject", email) 459 | 460 | def test_from_file_obj(self): 461 | with ported_open(mail_test_2) as fp: 462 | mail = mailparser.parse_from_file_obj(fp) 463 | trust = "smtp.customers.net" 464 | 465 | self.assertFalse(mail.has_defects) 466 | 467 | result = mail.mail 468 | self.assertIsInstance(result, dict) 469 | self.assertNotIn("defects", result) 470 | self.assertNotIn("anomalies", result) 471 | self.assertIn("has_defects", result) 472 | 473 | result = mail.get_server_ipaddress(trust) 474 | self.assertIsInstance(result, six.text_type) 475 | 476 | result = mail.mail_json 477 | self.assertIsInstance(result, six.text_type) 478 | 479 | result = mail.headers 480 | self.assertIsInstance(result, dict) 481 | 482 | result = mail.headers_json 483 | self.assertIsInstance(result, six.text_type) 484 | 485 | result = mail.body 486 | self.assertIsInstance(result, six.text_type) 487 | 488 | result = mail.date 489 | self.assertIsInstance(result, datetime.datetime) 490 | 491 | result = mail.from_ 492 | self.assertIsInstance(result, list) 493 | 494 | result = mail.to 495 | self.assertIsInstance(result, list) 496 | self.assertEqual(len(result), 2) 497 | self.assertIsInstance(result[0], tuple) 498 | self.assertEqual(len(result[0]), 2) 499 | 500 | result = mail.subject 501 | self.assertIsInstance(result, six.text_type) 502 | 503 | result = mail.message_id 504 | self.assertIsInstance(result, six.text_type) 505 | 506 | result = mail.attachments 507 | self.assertIsInstance(result, list) 508 | 509 | result = mail.date 510 | self.assertIsInstance(result, datetime.datetime) 511 | 512 | result = mail.defects 513 | self.assertIsInstance(result, list) 514 | 515 | result = mail.timezone 516 | self.assertEqual(result, "+1.0") 517 | 518 | def test_get_to_domains(self): 519 | m = mailparser.parse_from_file(mail_test_6) 520 | 521 | domains_1 = get_to_domains(m.to, m.reply_to) 522 | self.assertIsInstance(domains_1, list) 523 | self.assertIn("test.it", domains_1) 524 | 525 | domains_2 = m.to_domains 526 | self.assertIsInstance(domains_2, list) 527 | self.assertIn("test.it", domains_2) 528 | self.assertEqual(domains_1, domains_2) 529 | 530 | self.assertIsInstance(m.to_domains_json, six.text_type) 531 | 532 | def test_convert_mail_date(self): 533 | s = "Mon, 20 Mar 2017 05:12:54 +0600" 534 | d, t = convert_mail_date(s) 535 | self.assertEqual(t, "+6.0") 536 | self.assertEqual(str(d), "2017-03-19 23:12:54+00:00") 537 | s = "Mon, 20 Mar 2017 05:12:54 -0600" 538 | d, t = convert_mail_date(s) 539 | self.assertEqual(t, "-6.0") 540 | s = "Mon, 11 Dec 2017 15:27:44 +0530" 541 | d, t = convert_mail_date(s) 542 | self.assertEqual(t, "+5.5") 543 | 544 | def test_ported_string(self): 545 | raw_data = "" 546 | s = ported_string(raw_data) 547 | self.assertEqual(s, six.text_type()) 548 | 549 | raw_data = "test" 550 | s = ported_string(raw_data) 551 | self.assertEqual(s, "test") 552 | 553 | def test_parse_domain_with_tld_dot_id(self): 554 | """Support for .id tld (Indonesia)""" 555 | received = """ 556 | from web.myhost.id 557 | by smtp.domain.id (Proxmox) with ESMTPS id SOMEIDHERE 558 | for ; Wed, 19 Feb 2025 15:00:00 +0700 (WIB) 559 | """.strip() 560 | 561 | expected = { 562 | "from": "web.myhost.id", 563 | "by": "smtp.domain.id (Proxmox)", 564 | "with": "ESMTPS", 565 | "id": "SOMEIDHERE", 566 | "for": "", 567 | "date": "Wed, 19 Feb 2025 15:00:00 +0700 (WIB)", 568 | } 569 | 570 | values_by_clause = parse_received(received) 571 | 572 | self.assertEqual(expected, values_by_clause) 573 | 574 | def test_parse_domain_with_tld_dot_by(self): 575 | """Support for .by tld (Belarus)""" 576 | received = """ 577 | from web.myhost.by 578 | by smtp.domain.by (Proxmox) with ESMTPS id SOMEIDHERE 579 | for ; Wed, 19 Feb 2025 15:00:00 +0700 (WIB) 580 | """.strip() 581 | 582 | expected = { 583 | "from": "web.myhost.by", 584 | "by": "smtp.domain.by (Proxmox)", 585 | "with": "ESMTPS", 586 | "id": "SOMEIDHERE", 587 | "for": "", 588 | "date": "Wed, 19 Feb 2025 15:00:00 +0700 (WIB)", 589 | } 590 | 591 | values_by_clause = parse_received(received) 592 | 593 | self.assertEqual(expected, values_by_clause) 594 | 595 | def test_standard_outlook(self): 596 | """Verify a basic outlook received header works.""" 597 | received = """ 598 | from DM3NAM03FT035 599 | by CY4PR0601CA0051.outlook.office365.com 600 | with Microsoft SMTP Server version=TLS1_2, cipher=TLS 601 | id 15.20.1185.23 602 | via Frontend Transport; Mon, 1 Oct 2018 09:49:21 +0000 603 | """.strip() 604 | 605 | expected = { 606 | "from": "DM3NAM03FT035", 607 | "by": "CY4PR0601CA0051.outlook.office365.com", 608 | "with": "Microsoft SMTP Server version=TLS1_2, cipher=TLS", 609 | "id": "15.20.1185.23", 610 | "via": "Frontend Transport", 611 | "date": "Mon, 1 Oct 2018 09:49:21 +0000", 612 | } 613 | values_by_clause = parse_received(received) 614 | 615 | self.assertEqual(expected, values_by_clause) 616 | 617 | def test_standard_google__with_cipher(self): 618 | """Verify that we don't match 'with cipher' a la google.""" 619 | received = """ 620 | from mail_yw1_f65.google.com 621 | by subdomain.domain.com Postfix with ESMTPS 622 | id abc123 for ; 623 | Tue, 25 Sep 2018 13:09:36 +0000 (UTC)""" 624 | 625 | expected = { 626 | "from": "mail_yw1_f65.google.com", 627 | "by": "subdomain.domain.com Postfix", 628 | "with": "ESMTPS", 629 | "id": "abc123", 630 | "for": "", 631 | "date": "Tue, 25 Sep 2018 13:09:36 +0000 (UTC)", 632 | } 633 | values_by_clause = parse_received(received) 634 | self.assertEqual(expected, values_by_clause) 635 | 636 | @unittest.skipIf(sys.version_info[0] < 3, "Must be using Python 3") 637 | def test_parse_from_bytes(self): 638 | with open(mail_test_2, "rb") as f: 639 | mail_bytes = f.read() 640 | 641 | mail = mailparser.parse_from_bytes(mail_bytes) 642 | trust = "smtp.customers.net" 643 | 644 | self.assertFalse(mail.has_defects) 645 | 646 | raw = "217.76.210.112" 647 | result = mail.get_server_ipaddress(trust) 648 | self.assertEqual(raw, result) 649 | 650 | raw = "<4516257BC5774408ADC1263EEBBBB73F@ad.regione.vda.it>" 651 | result = mail.message_id 652 | self.assertEqual(raw, result) 653 | 654 | raw = "echo@tu-berlin.de" 655 | result = mail.to 656 | self.assertEqual(len(result), 2) 657 | self.assertIsInstance(result, list) 658 | self.assertIsInstance(result[0], tuple) 659 | self.assertIsInstance(mail.to_json, six.text_type) 660 | self.assertIsInstance(mail.to_raw, six.text_type) 661 | self.assertEqual(raw, result[0][1]) 662 | 663 | raw = "meteo@regione.vda.it" 664 | result = mail.from_ 665 | self.assertEqual(raw, result[0][1]) 666 | 667 | raw = "Bollettino Meteorologico del 29/11/2015" 668 | result = mail.subject 669 | self.assertEqual(raw, result) 670 | 671 | result = mail.has_defects 672 | self.assertFalse(result) 673 | 674 | result = len(mail.attachments) 675 | self.assertEqual(3, result) 676 | 677 | self.assertIsInstance(mail.date_raw, six.text_type) 678 | self.assertIsInstance(mail.date_json, six.text_type) 679 | raw_utc = "2015-11-29T08:45:18+00:00" 680 | result = mail.date.isoformat() 681 | self.assertEqual(raw_utc, result) 682 | 683 | def test_write_uuencode_attachment(self): 684 | mail = mailparser.parse_from_file(mail_test_15) 685 | temp_dir = tempfile.mkdtemp() 686 | mail.write_attachments(temp_dir) 687 | md5 = hashlib.md5() 688 | with open(os.path.join(temp_dir, "REQUEST FOR QUOTE.zip"), "rb") as f: 689 | md5.update(f.read()) 690 | shutil.rmtree(temp_dir) 691 | self.assertEqual(md5.hexdigest(), "4f2cf891e7cfb349fca812091f184ecc") 692 | 693 | def test_issue_139(self): 694 | mail = mailparser.parse_from_file(mail_test_16) 695 | assert mail.headers == { 696 | "MIME-Version": "1.0", 697 | "Precedence": "junk", 698 | "Content-Type": "text/plain; charset=us-ascii", 699 | "From": [("Sender", "sender@example.net")], 700 | "Date": "Wed, 23 Jul 2003 23:30:00 +0200", 701 | "Content-Transfer-Encoding": "7bit", 702 | "Message-ID": "", 703 | "Subject": "Test spam mail (GTUBE)", 704 | "To": [("Recipient", "recipient@example.net")], 705 | } 706 | 707 | def test_issue_136(self): 708 | mail = mailparser.parse_from_file(mail_test_17) 709 | assert mail.from_ == [ 710 | ("", "notificaccion-clientes@bbva.mx"), 711 | ("", "notificaccion-clientes@bbva.mx"), 712 | ] 713 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2017 Fedele Mantuano (https://twitter.com/fedelemantuano) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | from unittest.mock import MagicMock, patch 20 | 21 | import pytest 22 | 23 | from mailparser.__main__ import get_args, process_output 24 | 25 | 26 | @pytest.fixture 27 | def parser(): 28 | return get_args() 29 | 30 | 31 | class TestMain: 32 | def test_required(self, parser): 33 | with pytest.raises(SystemExit): 34 | parser.parse_args(["--file", "test", "--string", "test"]) 35 | 36 | with pytest.raises(SystemExit): 37 | parser.parse_args(["--file", "test", "--stdin"]) 38 | 39 | with pytest.raises(SystemExit): 40 | parser.parse_args(["--file"]) 41 | 42 | with pytest.raises(SystemExit): 43 | parser.parse_args(["--string"]) 44 | 45 | def test_options(self, parser): 46 | args = parser.parse_args(["--file", "mail.eml"]) 47 | assert args.file == "mail.eml" 48 | 49 | args = parser.parse_args(["--string", "mail.str"]) 50 | assert args.string == "mail.str" 51 | 52 | args = parser.parse_args(["--file", "mail.eml", "--json"]) 53 | assert args.json 54 | 55 | args = parser.parse_args(["--file", "mail.eml", "--body"]) 56 | assert args.body 57 | 58 | args = parser.parse_args(["--file", "mail.eml", "-a"]) 59 | assert args.attachments 60 | 61 | args = parser.parse_args(["--file", "mail.eml", "-r"]) 62 | assert args.headers 63 | 64 | args = parser.parse_args(["--file", "mail.eml", "--to"]) 65 | assert args.to 66 | 67 | args = parser.parse_args(["--file", "mail.eml", "--from"]) 68 | assert args.from_ 69 | 70 | args = parser.parse_args(["--file", "mail.eml", "-u"]) 71 | assert args.subject 72 | 73 | args = parser.parse_args(["--file", "mail.eml", "-d"]) 74 | assert args.defects 75 | 76 | args = parser.parse_args(["--file", "mail.eml", "--senderip", "trust"]) 77 | assert args.senderip 78 | 79 | args = parser.parse_args(["--file", "mail.eml", "-p"]) 80 | assert args.mail_hash 81 | 82 | args = parser.parse_args(["--file", "mail.eml", "--attachments-hash"]) 83 | assert args.attachments_hash 84 | 85 | args = parser.parse_args(["--file", "mail.eml", "-c"]) 86 | assert args.receiveds 87 | 88 | @pytest.mark.parametrize( 89 | "args, patch_process_output, mocked", 90 | [ 91 | ( 92 | ["--file", "mail.eml", "--json"], 93 | "mailparser.__main__.safe_print", 94 | MagicMock(), 95 | ), 96 | ( 97 | ["--file", "mail.eml", "--body"], 98 | "mailparser.__main__.safe_print", 99 | MagicMock(), 100 | ), 101 | ( 102 | ["--file", "mail.eml", "--headers"], 103 | "mailparser.__main__.safe_print", 104 | MagicMock(), 105 | ), 106 | ( 107 | ["--file", "mail.eml", "--to"], 108 | "mailparser.__main__.safe_print", 109 | MagicMock(), 110 | ), 111 | ( 112 | ["--file", "mail.eml", "--delivered-to"], 113 | "mailparser.__main__.safe_print", 114 | MagicMock(), 115 | ), 116 | ( 117 | ["--file", "mail.eml", "--from"], 118 | "mailparser.__main__.safe_print", 119 | MagicMock(), 120 | ), 121 | ( 122 | ["--file", "mail.eml", "--subject"], 123 | "mailparser.__main__.safe_print", 124 | MagicMock(), 125 | ), 126 | ( 127 | ["--file", "mail.eml", "--receiveds"], 128 | "mailparser.__main__.safe_print", 129 | MagicMock(), 130 | ), 131 | ( 132 | ["--file", "mail.eml", "--defects"], 133 | "mailparser.__main__.print_defects", 134 | MagicMock(), 135 | ), 136 | ( 137 | ["--file", "mail.eml", "--senderip", "server"], 138 | "mailparser.__main__.print_sender_ip", 139 | MagicMock(), 140 | ), 141 | ( 142 | ["--file", "mail.eml", "--attachments"], 143 | "mailparser.__main__.print_attachments_details", 144 | MagicMock(), 145 | ), 146 | ( 147 | ["--file", "mail.eml", "--attachments-hash"], 148 | "mailparser.__main__.print_attachments_details", 149 | MagicMock(), 150 | ), 151 | ( 152 | ["--file", "mail.eml", "--mail-hash"], 153 | "mailparser.__main__.print_mail_fingerprints", 154 | MagicMock(), 155 | ), 156 | ( 157 | ["--file", "mail.eml", "--store-attachments"], 158 | "mailparser.__main__.write_attachments", 159 | MagicMock(), 160 | ), 161 | ], 162 | ) 163 | def test_process_output( 164 | self, 165 | args, 166 | patch_process_output, 167 | mocked, 168 | parser, 169 | ): 170 | args = parser.parse_args(args) 171 | with patch(patch_process_output) as mock: 172 | process_output(args, mocked) 173 | mock.assert_called_once() 174 | --------------------------------------------------------------------------------