├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.rst ├── CONTRIBUTING.rst ├── LICENSE ├── README.rst ├── pyproject.toml ├── script ├── chores ├── console ├── setup ├── smoke-test ├── static-analysis └── test ├── src └── pyisemail │ ├── __about__.py │ ├── __init__.py │ ├── diagnosis │ ├── __init__.py │ ├── base_diagnosis.py │ ├── cfws_diagnosis.py │ ├── deprecated_diagnosis.py │ ├── dns_diagnosis.py │ ├── gtld_diagnosis.py │ ├── invalid_diagnosis.py │ ├── rfc5321_diagnosis.py │ ├── rfc5322_diagnosis.py │ └── valid_diagnosis.py │ ├── email_validator.py │ ├── reference.py │ ├── utils.py │ └── validators │ ├── __init__.py │ ├── dns_validator.py │ ├── gtld_validator.py │ └── parser_validator.py └── tests ├── __init__.py ├── data └── tests.xml ├── diagnosis ├── __init__.py └── test_base_diagnosis.py ├── test_email_validator.py ├── test_is_email.py ├── test_reference.py └── validators ├── __init__.py ├── test_dns_validator.py └── test_parser_validator.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | - pull_request 5 | 6 | jobs: 7 | lint: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Setup Python 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: 3.x 17 | 18 | - name: Install dependencies 19 | run: | 20 | pipx install hatch==1.6.* 21 | hatch env create lint 22 | 23 | - name: Check formatting 24 | run: | 25 | script/static-analysis 26 | 27 | test: 28 | runs-on: ubuntu-latest 29 | strategy: 30 | matrix: 31 | python-version: 32 | - 3.7 33 | - 3.8 34 | - 3.9 35 | - "3.10" 36 | - pypy-3.7 37 | - pypy-3.8 38 | steps: 39 | - uses: actions/checkout@v3 40 | 41 | - name: Setup Python 42 | uses: actions/setup-python@v4 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | 46 | - name: Install dependencies 47 | run: | 48 | pipx install hatch==1.6.* 49 | hatch env create 50 | hatch env create ci 51 | 52 | - name: Run tests 53 | run: | 54 | script/test ci 55 | 56 | - name: Report coverage 57 | env: 58 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 59 | run: | 60 | hatch run ci:coveralls --service=github 61 | 62 | smoke_test: 63 | runs-on: ubuntu-latest 64 | 65 | steps: 66 | - uses: actions/checkout@v3 67 | 68 | - name: Setup Python 69 | uses: actions/setup-python@v4 70 | with: 71 | python-version: 3.x 72 | 73 | - name: Install dependencies 74 | run: | 75 | pipx install hatch==1.6.* 76 | hatch env create 77 | 78 | - name: Ensure built wheel works properly 79 | run: | 80 | script/smoke-test 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # Packages 4 | *.egg 5 | *.egg-info 6 | dist 7 | build 8 | eggs 9 | parts 10 | bin 11 | var 12 | sdist 13 | develop-eggs 14 | .installed.cfg 15 | lib 16 | lib64 17 | 18 | # Installer logs 19 | pip-log.txt 20 | 21 | # Unit test / coverage reports 22 | .coverage 23 | .tox 24 | nosetests.xml 25 | htmlcov 26 | 27 | .idea 28 | .pypirc 29 | smoke_test/ 30 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | 2.0.1 (2022-10-24) 2 | ------------------ 3 | 4 | - Fix the packaging to use the correct package name so that the wheel works as expected [`d36f22e`_] - `@michaelherold`_. 5 | 6 | .. _d36f22e: https://github.com/michaelherold/pyIsEmail/commit/d36f22eac615f29ffa62fe13161244c5e1f7e906 7 | 8 | 2.0.0 (2022-10-17) **YANKED** 9 | ----------------------------- 10 | 11 | - **Breaking (Python 2):** Upgrade to the new ``dnspython`` resolve API for DNS checks for future-proofing [`229db4f`_] - `@moshfrid`_. 12 | - **Breaking (Python 2):** Drop support for Python versions prior to 3.6 [`635aff4`_] - `@michaelherold`_. 13 | - Consider emails with domains that have null MX records, per RFC7505, invalid when checking DNS [`ebc4a2f`] - `@bmcculley`_. 14 | 15 | .. _229db4f: https://github.com/michaelherold/pyIsEmail/commit/229db4fe4f790b5a95e1e85bffbdd42464472ea5 16 | .. _635aff4: https://github.com/michaelherold/pyIsEmail/commit/635aff42c3cd0a04f3bad8c79099cd5827fed74f 17 | .. _ebc4a2f: https://github.com/michaelherold/pyIsEmail/commit/ebc4a2f8120b02d488472c1f5bf293b155b58118 18 | .. _@moshfrid: https://github.com/moshfrid 19 | .. _@bmcculley: https://github.com/bmcculley 20 | 21 | 1.4.0 (2021-09-16) 22 | ------------------ 23 | 24 | - Allow limiting of email addresses on Generic Top-Level Domains (gTLDs) with the ``allow_gtld=False`` option [`bf13a6c`_] - `@michaelherold`_. 25 | 26 | .. _bf13a6c: https://github.com/michaelherold/pyIsEmail/commit/bf13a6cfe662e66c8c6a5a9228d80cacf901b1ba 27 | 28 | 1.3.2 (2018-07-05) 29 | ------------------ 30 | 31 | - Upgrade to universal dnspython version - `@peterdemin`_. 32 | 33 | .. _@peterdemin: https://github.com/peterdemin 34 | 35 | 1.3.1 (2015-09-18) 36 | ------------------ 37 | 38 | - Release as non-universal wheels because of the dnspython dependency. 39 | 40 | 1.3.0 (2015-04-18) 41 | ------------------ 42 | 43 | - Bugfix: Ensures that DNS checks fail context with or without 44 | a diagnosis [`c7b91f6`_] - `@michaelherold`_. 45 | - The DNSValidator now fails checks when a query times out or fails to 46 | return a response from any nameserver [`f8f4af7`_] - `@michaelherold`_. 47 | 48 | .. _c7b91f6: https://github.com/michaelherold/pyIsEmail/commit/c7b91f64b87b88a501628bb73cc6777b10e45ba5 49 | .. _f8f4af7: https://github.com/michaelherold/pyIsEmail/commit/f8f4af7b4b2441c81a442f41b977ce8780f129a4 50 | 51 | 1.2.0 (2015-03-13) 52 | ------------------ 53 | 54 | - Removed dependency on pypandoc for building. This should fix any user 55 | installation issues [`60a4d6`_] - `@michaelherold`_. 56 | 57 | .. _60a4d6: https://github.com/michaelherold/pyIsEmail/commit/60a4d65906736593a6c2547065ad0d5b0024aaec 58 | 59 | 1.1.0 (2014-07-14) 60 | ------------------ 61 | 62 | - Failed DNS checks now return DNSDiagnosis instead of RFC5322Diagnosis [`84d258`_] - `@michaelherold`_. 63 | 64 | .. _84d258: https://github.com/michaelherold/pyIsEmail/commit/84d2581ef7dd7b222ae21bee0692a618a073e9c2 65 | 66 | 1.0.1 (2014-01-27) 67 | ------------------ 68 | 69 | - Little bits of cleanup [`8044aa`_] `@michaelherold`_. 70 | 71 | .. _8044aa: https://github.com/michaelherold/pyIsEmail/commit/8044aa1132ecf7ebb6d7c72719d6ebb239cb3eba 72 | 73 | 1.0.0 (2013-10-30) 74 | ------------------ 75 | 76 | - Initial Release [`b8b885`_] - `@michaelherold`_. 77 | 78 | .. _@michaelherold: https://github.com/michaelherold 79 | .. _b8b885: https://github.com/michaelherold/pyIsEmail/commit/b8b88598a244a48db8f00ff7d9860f09f984b7e1 80 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | In the spirit of `free software `__, we encourage **everyone** to help improve this project. Here are some ways *you* can contribute: 5 | 6 | - Use alpha, beta, and pre-release versions. 7 | - Report bugs. 8 | - Suggest new features. 9 | - Write or edit documentation. 10 | - Write specifications. 11 | - Write code (**no patch is too small**: fix typos, add comments, clean up inconsistent whitespace). 12 | - Refactor code. 13 | - Fix `issues `__. 14 | - Review patches. 15 | 16 | Submitting an issue 17 | ------------------- 18 | 19 | We use the `GitHub issue tracker `__ to track bugs and features. Before submitting a bug report or feature request, check to make sure no one else has already submitted the same bug report. 20 | 21 | When submitting a bug report, please include a `Gist `__ that includes a stack trace and any details that may be necessary to reproduce the bug, including your pyIsEmail version, Python version, and operating system. 22 | 23 | Ideally, a bug report should include a pull request with failing tests. 24 | 25 | Writing code 26 | ------------ 27 | 28 | There is a setup script that you can run directly on any platform that has a POSIX shell. Run ``script/setup`` to get started, then skip to the next section. For more information, read on in this section. 29 | 30 | We use `Hatch `__ to manage the project. It enables us to centralize the organization of our dependencies and development harness. 31 | 32 | To get started with Hatch, you can `install it `__ in a variety of ways. We recommend installing it via your operating system's package manager or with ``pipx`` instead of using ``pip``. 33 | 34 | Once you have installed Hatch, you are ready to started contributing code! 35 | 36 | Submitting a pull request 37 | ------------------------- 38 | 39 | 1. Fork the repository. 40 | 2. Create a topic branch. 41 | 3. Add tests for your unimplemented feature or bug fix. 42 | 4. Run ``script/test``. If your tests pass, return to step 3. 43 | 5. Implement your feature or bug fix. 44 | 6. Run ``script/chores``. If your tests or any of the linters fail, return to step 5. 45 | 7. Open ``coverage/index.html``. If your changes are not fully covered by your tests, return to step 3. 46 | 8. Add documentation for your feature or bug fix. 47 | 9. Commit and push your changes. 48 | 10. Submit a pull request. 49 | 50 | Tools to help you succeed 51 | ------------------------- 52 | 53 | After checking out the repository, run ``script/setup`` to install dependencies. Then, run ``script/test`` to run the tests. You can also run ``script/console`` for an interactive prompt that will allow you to experiment. 54 | 55 | Before committing code, run ``script/chores`` to check that the code conforms to the style guidelines of the project, that all of the tests are green (if you’re writing a feature; if you’re only submitting a failing test, then it does not have to pass!), and that the changes are sufficiently documented. 56 | 57 | Releasing a new version (release maintainers only) 58 | -------------------------------------------------- 59 | 60 | Hatch has built-in support for managing releases. Use ``hatch build`` to build the wheel and source distribution. Then, run ``tar -tvf dist/pyisemail-.tar.gz`` to verify the contents of the archive. If everything looks good, publish with ``hatch publish``. 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2013-2022, Michael Herold 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pyIsEmail 2 | ========= 3 | 4 | |pypi| |ci| |coveralls| |downloads| 5 | 6 | Getting Started 7 | --------------- 8 | 9 | pyIsEmail is a no-nonsense approach for checking whether that 10 | user-supplied email address could be real. Sick of not being able to use 11 | `email address tagging`_ to sort through your `Bacn`_? We can fix that. 12 | 13 | Regular expressions are cheap to write, but often require maintenance when 14 | new top-level domains come out or don't conform to email addressing 15 | features that come back into vogue. pyIsEmail allows you to validate an 16 | email address -- and even check the domain, if you wish -- with one simple 17 | call, making your code more readable and faster to write. When you want to 18 | know why an email address doesn't validate, we even provide you with 19 | a diagnosis. 20 | 21 | .. _email address tagging: http://en.wikipedia.org/wiki/Email_address#Address_tags 22 | .. _Bacn: http://en.wikipedia.org/wiki/Bacn 23 | 24 | Install 25 | ------- 26 | 27 | Install from PyPI using `pip`_, a package manager for Python. 28 | 29 | .. code-block:: bash 30 | 31 | $ pip install pyIsEmail 32 | 33 | Don't have pip installed? Try installing it by running this from the 34 | command line: 35 | 36 | .. code-block:: bash 37 | 38 | $ curl https://raw.github.com/pypa/pip/master/contrib/get-pip.py | python 39 | 40 | Or you can `download the source code (zip)`_ for ``pyIsEmail`` and then 41 | run: 42 | 43 | .. code-block:: bash 44 | 45 | $ python setup.py install 46 | 47 | You may need to run the above commands with ``sudo``. 48 | 49 | .. _pip: http://www.pip-installer.org/en/latest/ 50 | .. _download the source code (zip): https://github.com/michaelherold/pyIsEmail/zipball/master 51 | 52 | Usage 53 | ----- 54 | 55 | For the simplest usage, import and use the ``is_email`` function: 56 | 57 | .. code-block:: python 58 | 59 | from pyisemail import is_email 60 | 61 | address = "test@example.com" 62 | bool_result = is_email(address) 63 | detailed_result = is_email(address, diagnose=True) 64 | 65 | You can also check whether the domain used in the email is a valid domain 66 | and whether or not it has a valid MX record: 67 | 68 | .. code-block:: python 69 | 70 | from pyisemail import is_email 71 | 72 | address = "test@example.com" 73 | bool_result_with_dns = is_email(address, check_dns=True) 74 | detailed_result_with_dns = is_email(address, check_dns=True, diagnose=True) 75 | 76 | These are primary indicators of whether an email address can even be 77 | issued at that domain. However, a valid response here *is not a guarantee 78 | that the email exists*, merely that is *can* exist. 79 | 80 | If you want to limit using a `gTLD`_ as the domain part of the email 81 | address, you can do so with a flag: 82 | 83 | .. code-block:: python 84 | 85 | from pyisemail import is_email 86 | 87 | address = "thiswont@workatall" 88 | bool_result_with_check = is_email(address, allow_gtld=False) 89 | detailed_result_with_check = is_email(address, allow_gtld=False, diagnose=True) 90 | 91 | In addition to the base ``is_email`` functionality, you can also use the 92 | validators by themselves. Check the validator source doe to see how this 93 | works. 94 | 95 | .. _gTLD: https://en.wikipedia.org/wiki/Generic_top-level_domain 96 | 97 | Uninstall 98 | --------- 99 | 100 | Want to get rid of pyIsEmail? Did you install with pip? Here you go: 101 | 102 | .. code-block:: bash 103 | 104 | $ pip uninstall pyIsEmail 105 | 106 | Acknowledgements 107 | ---------------- 108 | 109 | The base ``ParserValidator`` is based off of `Dominic Sayers`_' `is_email 110 | script`_. I wanted the functionality in Python, so I ported it from the 111 | original PHP. 112 | 113 | .. _Dominic Sayers: https://github.com/dominicsayers 114 | .. _is_email script: https://github.com/dominicsayers/isemail 115 | 116 | Contributing 117 | ------------ 118 | 119 | So you're interested in contributing to pyIsEmail? Check out our `contributing guidelines <./CONTRIBUTING.rst>`__ for more information on how to do that. 120 | 121 | Versioning 122 | ---------- 123 | 124 | This library aims to adhere to `Semantic Versioning 2.0.0`_. Violations of 125 | this scheme should be reported as bugs. 126 | 127 | .. _Semantic Versioning 2.0.0: http://semver.org/ 128 | 129 | Copyright 130 | --------- 131 | 132 | Copyright (c) 2015 Michael Herold. Open sourced under the terms of the 133 | `MIT license`_. 134 | 135 | .. _MIT license: http://opensource.org/licenses/MIT 136 | 137 | 138 | .. |pypi| image:: https://img.shields.io/pypi/v/pyIsEmail.svg?style=flat-square 139 | :target: https://pypi.python.org/pypi/pyIsEmail 140 | :alt: Latest version released on PyPI 141 | .. |ci| image:: https://github.com/michaelherold/pyIsEmail/actions/workflows/ci.yml/badge.svg 142 | :target: https://github.com/michaelherold/pyIsEmail/actions/workflows/ci.yml 143 | .. |coveralls| image:: https://img.shields.io/coveralls/michaelherold/pyIsEmail/master.svg?style=flat-square 144 | :target: https://coveralls.io/r/michaelherold/pyIsEmail?branch=master 145 | :alt: Test coverage 146 | .. |downloads| image:: https://img.shields.io/pypi/dm/pyIsEmail.svg?style=flat-square 147 | :target: https://pypi.python.org/pypi/pyIsEmail/ 148 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pyIsEmail" 7 | dynamic = ["version"] 8 | description = "Simple, robust email validation" 9 | readme = "README.rst" 10 | license = "MIT" 11 | authors = [ 12 | { name = "Michael Herold", email = "opensource@michaeljherold.com" }, 13 | ] 14 | keywords = [ 15 | "email", 16 | "validation", 17 | ] 18 | classifiers = [ 19 | "Development Status :: 5 - Production/Stable", 20 | "Intended Audience :: Developers", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: OS Independent", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3 :: Only", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: Implementation :: PyPy", 29 | "Programming Language :: Python :: 3.10", 30 | "Topic :: Communications :: Email", 31 | "Topic :: Software Development :: Libraries :: Python Modules", 32 | ] 33 | dependencies = [ 34 | "dnspython >= 2.0.0", 35 | ] 36 | requires-python = ">=3.7" 37 | 38 | [project.urls] 39 | Homepage = "https://github.com/michaelherold/pyIsEmail" 40 | Source = "https://github.com/michaelherold/pyIsEmail" 41 | Tracker = "https://github.com/michaelherold/pyIsEmail/issues" 42 | 43 | [tool.bandit] 44 | recursive = true 45 | exclude_dirs = ["tests"] 46 | skips = ["B105"] 47 | 48 | [tool.black] 49 | include = '\.pyi?$' 50 | skip-string-normalization = true 51 | target-version = ["py37"] 52 | 53 | [tool.coverage.report] 54 | omit = ["*test*"] 55 | 56 | [tool.coverage.run] 57 | source = ["src/pyisemail"] 58 | 59 | [tool.isort] 60 | default_section = "THIRDPARTY" 61 | force_grid_wrap = 0 62 | include_trailing_comma = true 63 | known_first_party = ["hatch", "hatchling"] 64 | multi_line_output = 3 65 | use_parentheses = true 66 | 67 | [tool.hatch.version] 68 | path = "src/pyisemail/__about__.py" 69 | 70 | [tool.hatch.build.targets.sdist] 71 | include = [ 72 | "/CHANGELOG.rst", 73 | "/CONTRIBUTING.rst", 74 | "/src/pyisemail", 75 | ] 76 | 77 | [tool.hatch.build.targets.wheel] 78 | packages = ["src/pyisemail"] 79 | 80 | [tool.hatch.envs.default] 81 | dependencies = [ 82 | "coverage[toml]>=6.2", 83 | "pytest", 84 | "pytest-cov", 85 | "wheel-inspect", 86 | ] 87 | 88 | [tool.hatch.envs.default.scripts] 89 | cov = "python -m pytest --cov-report=html:coverage --cov-config=pyproject.toml --cov=src/pyisemail --cov=tests {args}" 90 | test = "cov --no-cov {args}" 91 | 92 | [tool.hatch.envs.ci] 93 | dependencies = [ 94 | "coverage[toml]>=6.2", 95 | "coveralls", 96 | "pytest", 97 | "pytest-cov", 98 | ] 99 | 100 | [tool.hatch.envs.lint] 101 | detached = true 102 | dependencies = [ 103 | "bandit[toml]==1.7.4", 104 | "black==22.10.0", 105 | "isort==5.10.1", 106 | ] 107 | 108 | [tool.hatch.envs.lint.scripts] 109 | all = [ 110 | "fmt", 111 | "security", 112 | ] 113 | fmt = [ 114 | "black --quiet {args:.}", 115 | "isort --quiet {args:.}", 116 | "style", 117 | ] 118 | lint = [ 119 | "security", 120 | "style", 121 | ] 122 | security = "bandit --configfile pyproject.toml --quiet --recursive {args:.}" 123 | style = [ 124 | "black --quiet --check --diff {args:.}", 125 | "isort --quiet --check-only --diff {args:.}" 126 | ] 127 | 128 | [[tool.hatch.envs.test.matrix]] 129 | python = ["37", "38", "39", "310"] 130 | -------------------------------------------------------------------------------- /script/chores: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | script/test 6 | script/static-analysis 7 | -------------------------------------------------------------------------------- /script/console: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | hatch run python 6 | -------------------------------------------------------------------------------- /script/setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | if ! command -v hatch >/dev/null; then 6 | cat <&2 7 | Whoops! You need to install Hatch first. 8 | 9 | Install it with your package manager or pipx, then try again: 10 | https://hatch.pypa.io/1.6/install/ 11 | MSG 12 | fi 13 | 14 | hatch env create 15 | hatch env create lint 16 | -------------------------------------------------------------------------------- /script/smoke-test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | msg() { 6 | echo "==> $1" 7 | } 8 | 9 | msg "Cleaning" 10 | [ -d dist/ ] && rm -rf dist/ 11 | 12 | msg "Building" 13 | hatch build -c 14 | 15 | msg "Diagnostics" 16 | hatch run wheel2json dist/*.whl 17 | 18 | msg "Change into temporary directory" 19 | REPO_DIR="$PWD" 20 | CLEAN_ROOM="$(mktemp -d)" 21 | cd "$CLEAN_ROOM" 22 | 23 | msg "Creating virtualenv" 24 | python -m venv smoke_test 25 | source smoke_test/bin/activate 26 | 27 | msg "Installing" 28 | pip install $REPO_DIR/dist/*.whl 29 | 30 | msg "Testing" 31 | python -c 'from pyisemail import is_email; is_email("test@example.com")' 32 | 33 | msg "Cleaning up" 34 | deactivate 35 | cd "$REPO_DIR" 36 | rm -rf "$CLEAN_ROOM" 37 | -------------------------------------------------------------------------------- /script/static-analysis: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | hatch run lint:all 6 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | HATCH_ENV="${1:-test}" 6 | 7 | hatch build -c 8 | hatch run $HATCH_ENV:pip install --force-reinstall --quiet dist/*.whl 9 | hatch run $HATCH_ENV:cov 10 | -------------------------------------------------------------------------------- /src/pyisemail/__about__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2.0.1" 2 | -------------------------------------------------------------------------------- /src/pyisemail/__init__.py: -------------------------------------------------------------------------------- 1 | from pyisemail.__about__ import __version__ 2 | from pyisemail.diagnosis import BaseDiagnosis 3 | from pyisemail.email_validator import EmailValidator 4 | from pyisemail.reference import Reference 5 | from pyisemail.validators import DNSValidator, GTLDValidator, ParserValidator 6 | 7 | __all__ = ["is_email"] 8 | 9 | 10 | def is_email(address, check_dns=False, diagnose=False, allow_gtld=True): 11 | """Validate an email address. 12 | 13 | Keyword arguments: 14 | address --- the email address as a string 15 | check_dns --- flag for whether to check the DNS status of the domain 16 | diagnose --- flag for whether to return True/False or a Diagnosis 17 | allow_gtld --- flag for whether to prevent gTLDs as the domain 18 | 19 | """ 20 | 21 | threshold = BaseDiagnosis.CATEGORIES["THRESHOLD"] 22 | d = ParserValidator().is_email(address, True) 23 | 24 | if d < BaseDiagnosis.CATEGORIES["DNSWARN"]: 25 | domain = address.split("@")[1] 26 | 27 | if check_dns is True or allow_gtld is False: 28 | threshold = BaseDiagnosis.CATEGORIES["VALID"] 29 | if check_dns is True: 30 | d = max(d, DNSValidator().is_valid(domain, True)) 31 | if allow_gtld is False: 32 | d = max(d, GTLDValidator().is_valid(domain, True)) 33 | 34 | return d if diagnose else d < threshold 35 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/__init__.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis.base_diagnosis import BaseDiagnosis 2 | from pyisemail.diagnosis.cfws_diagnosis import CFWSDiagnosis 3 | from pyisemail.diagnosis.deprecated_diagnosis import DeprecatedDiagnosis 4 | from pyisemail.diagnosis.dns_diagnosis import DNSDiagnosis 5 | from pyisemail.diagnosis.gtld_diagnosis import GTLDDiagnosis 6 | from pyisemail.diagnosis.invalid_diagnosis import InvalidDiagnosis 7 | from pyisemail.diagnosis.rfc5321_diagnosis import RFC5321Diagnosis 8 | from pyisemail.diagnosis.rfc5322_diagnosis import RFC5322Diagnosis 9 | from pyisemail.diagnosis.valid_diagnosis import ValidDiagnosis 10 | 11 | __all__ = [ 12 | "BaseDiagnosis", 13 | "CFWSDiagnosis", 14 | "DeprecatedDiagnosis", 15 | "DNSDiagnosis", 16 | "InvalidDiagnosis", 17 | "RFC5321Diagnosis", 18 | "RFC5322Diagnosis", 19 | "ValidDiagnosis", 20 | ] 21 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/base_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.reference import Reference 2 | 3 | 4 | class BaseDiagnosis(object): 5 | 6 | """Super class for an error diagnosis. 7 | 8 | You should rarely, i.e. only in testing, instantiate a BaseDiagnosis, as 9 | it does not provide any pertinent information. Always use one of its 10 | subclasses. 11 | 12 | """ 13 | 14 | CATEGORIES = { 15 | "VALID": 1, 16 | "DNSWARN": 7, 17 | "RFC5321": 15, 18 | "THRESHOLD": 16, 19 | "CFWS": 31, 20 | "DEPREC": 63, 21 | "RFC5322": 127, 22 | "ERR": 255, 23 | } 24 | DESCRIPTION = "" 25 | ERROR_CODES = {} 26 | MESSAGES = {} 27 | REFERENCES = {} 28 | 29 | def __init__(self, diagnosis_type): 30 | self.diagnosis_type = str(diagnosis_type) 31 | self.description = self.DESCRIPTION 32 | self.message = self.MESSAGES.get(diagnosis_type, "") 33 | self.references = self.get_references(diagnosis_type) 34 | self.code = self.ERROR_CODES.get(diagnosis_type, -1) 35 | 36 | def get_references(self, diagnosis_type): 37 | refs = self.REFERENCES.get(diagnosis_type, []) 38 | return [Reference(ref) for ref in refs] 39 | 40 | def __repr__(self): 41 | return "<%s: %s>" % (self.__class__.__name__, self.diagnosis_type) 42 | 43 | def __hash__(self): 44 | return hash((self.__class__.__name__, self.diagnosis_type)) 45 | 46 | def __eq__(self, other): 47 | return repr(self) == repr(other) 48 | 49 | def __lt__(self, other): 50 | if isinstance(other, BaseDiagnosis): 51 | return self.code < other.code 52 | else: 53 | return self.code < other 54 | 55 | def __gt__(self, other): 56 | if isinstance(other, BaseDiagnosis): 57 | return self.code > other.code 58 | else: 59 | return self.code > other 60 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/cfws_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class CFWSDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating a problem with white space in the address.""" 7 | 8 | DESCRIPTION = ( 9 | "Address is valid within the message " 10 | "but cannot be used unmodified for the envelope." 11 | ) 12 | 13 | ERROR_CODES = { 14 | "COMMENT": 17, 15 | "FWS": 18, 16 | } 17 | 18 | MESSAGES = { 19 | "COMMENT": "Address contains messages", 20 | "FWS": "Address contains Folding White Space", 21 | } 22 | 23 | REFERENCES = { 24 | "COMMENT": ["dot-atom"], 25 | "FWS": ["local-part"], 26 | } 27 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/deprecated_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class DeprecatedDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating the presence of deprecated address features.""" 7 | 8 | DESCRIPTION = ( 9 | "Address contains deprecated elements " 10 | "but may still be valid in restricted contexts." 11 | ) 12 | 13 | ERROR_CODES = { 14 | "LOCALPART": 33, 15 | "FWS": 34, 16 | "QTEXT": 35, 17 | "QP": 36, 18 | "COMMENT": 37, 19 | "CTEXT": 38, 20 | "CFWS_NEAR_AT": 49, 21 | } 22 | 23 | MESSAGES = { 24 | "LOCALPART": "Address contains a local part in deprecated form.", 25 | "FWS": "Address contains Folding White Space in deprecated form.", 26 | "QTEXT": "Address contains a quoted string in deprecated form.", 27 | "QP": "Address contains a quoted pair in deprecated form.", 28 | "COMMENT": "Address contains a comment in deprecated form.", 29 | "CTEXT": "Address contains a comment with a deprecated character.", 30 | "CFWS_NEAR_AT": ( 31 | "Address contains a comment or Folding White Space " "around the @ sign." 32 | ), 33 | } 34 | 35 | REFERENCES = { 36 | "LOCALPART": ["obs-local-part"], 37 | "FWS": ["obs-local-part", "obs-domain"], 38 | "QTEXT": ["obs-qtext"], 39 | "QP": ["obs-qp"], 40 | "COMMENT": ["obs-local-part", "obs-domain"], 41 | "CTEXT": ["obs-ctext"], 42 | "CFWS_NEAR_AT": ["CFWS-near-at", "SHOULD-NOT"], 43 | } 44 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/dns_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class DNSDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating a lack of a DNS record for a domain.""" 7 | 8 | DESCRIPTION = "Address is valid but a DNS check was not successful." 9 | 10 | ERROR_CODES = { 11 | "NO_NAMESERVERS": 3, 12 | "DNS_TIMEDOUT": 4, 13 | "NO_MX_RECORD": 5, 14 | "NO_RECORD": 6, 15 | "NULL_MX_RECORD": 7, 16 | } 17 | 18 | MESSAGES = { 19 | "NO_NAMESERVERS": "All nameservers failed to answer the query", 20 | "DNS_TIMEOUT": "The DNS query timed out", 21 | "NO_MX_RECORD": ( 22 | "Couldn't find an MX record for this domain " "but an A record does exist." 23 | ), 24 | "NO_RECORD": "Couldn't find an MX record or A record for this domain.", 25 | "NULL_MX_RECORD": "Domain does not support email service.", 26 | } 27 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/gtld_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class GTLDDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating that a domain is a disallowed gTLD.""" 7 | 8 | DESCRIPTION = "Address uses a gTLD as its domain." 9 | 10 | ERROR_CODES = {"GTLD": 2} 11 | 12 | MESSAGES = { 13 | "GTLD": ( 14 | "Address has a gTLD as its domain and you " 15 | "have disallowed those in your check." 16 | ) 17 | } 18 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/invalid_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class InvalidDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating the presence of an invalid address component.""" 7 | 8 | DESCRIPTION = "Address is invalid for any purpose" 9 | 10 | ERROR_CODES = { 11 | "EXPECTING_DTEXT": 129, 12 | "NOLOCALPART": 130, 13 | "NODOMAIN": 131, 14 | "CONSECUTIVEDOTS": 132, 15 | "ATEXT_AFTER_CFWS": 133, 16 | "ATEXT_AFTER_QS": 134, 17 | "ATEXT_AFTER_DOMLIT": 135, 18 | "EXPECTING_QPAIR": 136, 19 | "EXPECTING_ATEXT": 137, 20 | "EXPECTING_QTEXT": 138, 21 | "EXPECTING_CTEXT": 139, 22 | "BACKSLASHEND": 140, 23 | "DOT_START": 141, 24 | "DOT_END": 142, 25 | "DOMAINHYPHENSTART": 143, 26 | "DOMAINHYPHENEND": 144, 27 | "UNCLOSEDQUOTEDSTR": 145, 28 | "UNCLOSEDCOMMENT": 146, 29 | "UNCLOSEDDOMLIT": 147, 30 | "FWS_CRLF_X2": 148, 31 | "FWS_CRLF_END": 149, 32 | "CR_NO_LF": 150, 33 | "BAD_PARSE": 151, 34 | } 35 | 36 | MESSAGES = { 37 | "EXPECTING_DTEXT": ( 38 | "Address contains a character that is " "not allowed in a domain literal." 39 | ), 40 | "NOLOCALPART": "Address has no local part.", 41 | "NODOMAIN": "Address has no domain part.", 42 | "CONSECUTIVEDOTS": "Address contains consecutive dots.", 43 | "ATEXT_AFTER_CFWS": ( 44 | "Address contains text after a comment " "or Folding White Space." 45 | ), 46 | "ATEXT_AFTER_QS": "Address contains text after a quoted string.", 47 | "ATEXT_AFTER_DOMLIT": ( 48 | "Address contains extra characters " "after the domain literal." 49 | ), 50 | "EXPECTING_QPAIR": ( 51 | "Address contains a character that is " "not allowed in a quoted pair." 52 | ), 53 | "EXPECTING_ATEXT": "Address contains a character that is not allowed.", 54 | "EXPECTING_QTEXT": ( 55 | "Address contains a character that is " "not allowed in a quoted string." 56 | ), 57 | "EXPECTING_CTEXT": ( 58 | "Address contains a character that is " "not allowed in a comment." 59 | ), 60 | "BACKSLASHEND": "Address ends in a backslash.", 61 | "DOT_START": ("Address has a local part or domain " "that begins with a dot."), 62 | "DOT_END": ("Address has a local part or domain " "that ends with a dot."), 63 | "DOMAINHYPHENSTART": ( 64 | "Address has a local part or domain " "that begins with a hyphen." 65 | ), 66 | "DOMAINHYPHENEND": ( 67 | "Address has a local part or domain " "that ends with a hyphen." 68 | ), 69 | "UNCLOSEDQUOTEDSTR": "Address contains an unclosed quoted string.", 70 | "UNCLOSEDCOMMENT": "Address contains an unclosed comment.", 71 | "UNCLOSEDDOMLIT": ( 72 | "Address contains a domain literal " "that is missing its closing bracket." 73 | ), 74 | "FWS_CRLF_X2": ( 75 | "Address contains a Folding White Space " 76 | "that has consecutive CRLF sequences." 77 | ), 78 | "FWS_CRLF_END": ( 79 | "Address contains a Folding White Space " "that ends with a CRLF sequence." 80 | ), 81 | "CR_NO_LF": ( 82 | "Address contains a carriage return " 83 | "that is not followed by a line return." 84 | ), 85 | "BAD_PARSE": "Address is malformed.", 86 | } 87 | 88 | REFERENCES = { 89 | "EXPECTING_DTEXT": ["dtext"], 90 | "NOLOCALPART": ["local-part"], 91 | "NODOMAIN": ["addr-spec", "mailbox"], 92 | "CONSECUTIVEDOTS": ["local-part", "domain-RFC5322", "domain-RFC5321"], 93 | "ATEXT_AFTER_CFWS": ["local-part", "domain-RFC5322"], 94 | "ATEXT_AFTER_QS": ["local-part"], 95 | "ATEXT_AFTER_DOMLIT": ["domain-RFC5322"], 96 | "EXPECTING_QPAIR": ["quoted-pair"], 97 | "EXPECTING_ATEXT": ["atext"], 98 | "EXPECTING_QTEXT": ["qtext"], 99 | "EXPECTING_CTEXT": ["ctext"], 100 | "BACKSLASHEND": ["domain-RFC5322", "domain-RFC5321", "quoted-pair"], 101 | "DOT_START": ["local-part", "domain-RFC5322", "domain-RFC5321"], 102 | "DOT_END": ["local-part", "domain-RFC5322", "domain-RFC5321"], 103 | "DOMAINHYPHENSTART": ["sub-domain"], 104 | "DOMAINHYPHENEND": ["sub-domain"], 105 | "UNCLOSEDQUOTEDSTR": ["quoted-string"], 106 | "UNCLOSEDCOMMENT": ["CFWS"], 107 | "UNCLOSEDDOMLIT": ["domain-literal"], 108 | "FWS_CRLF_X2": ["CFWS"], 109 | "FWS_CRLF_END": ["CFWS"], 110 | "CR_NO_LF": ["CFWS", "CRLF"], 111 | "BAD_PARSE": [], 112 | } 113 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/rfc5321_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class RFC5321Diagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating the address is only valid for SMTP.""" 7 | 8 | DESCRIPTION = "Address is valid for SMTP but has unusual elements." 9 | 10 | ERROR_CODES = { 11 | "TLD": 9, 12 | "TLDNUMERIC": 10, 13 | "QUOTEDSTRING": 11, 14 | "ADDRESSLITERAL": 12, 15 | "IPV6DEPRECATED": 13, 16 | } 17 | 18 | MESSAGES = { 19 | "TLD": "Address is valid but at a Top Level Domain.", 20 | "TLDNUMERIC": ( 21 | "Address is valid but the Top Level Domain " "begins with a number." 22 | ), 23 | "QUOTEDSTRING": "Address is valid but contains a quoted string.", 24 | "ADDRESSLITERAL": ( 25 | "Address is valid but at a literal address, " "not a domain." 26 | ), 27 | "IPV6DEPRECATED": ( 28 | "Address is valid but contains a :: that " "only elides one zero group." 29 | ), 30 | } 31 | 32 | REFERENCES = { 33 | "TLD": ["TLD"], 34 | "TLDNUMERIC": ["TLD-format"], 35 | "QUOTEDSTRING": ["quoted-string"], 36 | "ADDRESSLITERAL": ["address-literal", "address-literal-IPv4"], 37 | "IPV6DEPRECATED": ["address-literal-IPv6"], 38 | } 39 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/rfc5322_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class RFC5322Diagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating the address is only technically valid.""" 7 | 8 | DESCRIPTION = ( 9 | "Address is only valid according to the " 10 | "broad definition of RFC5322. It is otherwise invalid." 11 | ) 12 | 13 | ERROR_CODES = { 14 | "DOMAIN": 65, 15 | "TOOLONG": 66, 16 | "LOCAL_TOOLONG": 67, 17 | "DOMAIN_TOOLONG": 68, 18 | "LABEL_TOOLONG": 69, 19 | "DOMAINLITERAL": 70, 20 | "DOMLIT_OBSDTEXT": 71, 21 | "IPV6_GRPCOUNT": 72, 22 | "IPV6_2X2XCOLON": 73, 23 | "IPV6_BADCHAR": 74, 24 | "IPV6_MAXGRPS": 75, 25 | "IPV6_COLONSTRT": 76, 26 | "IPV6_COLONEND": 77, 27 | } 28 | 29 | MESSAGES = { 30 | "DOMAIN": ( 31 | "Address is RFC5322 compliant but contains domain " 32 | "characters that are not allowed by DNS." 33 | ), 34 | "TOOLONG": "Address is too long.", 35 | "LOCAL_TOOLONG": "Address contains a local part that is too long.", 36 | "DOMAIN_TOOLONG": "Address contains a domain that is too long.", 37 | "LABEL_TOOLONG": ( 38 | "Address contains a domain part with an element " "that is too long." 39 | ), 40 | "DOMAINLITERAL": ( 41 | "Address contains a domain literal that is " 42 | "not a valid RFC5321 address literal." 43 | ), 44 | "DOMLIT_OBSDTEXT": ( 45 | "Address contains a domain literal that is " 46 | "not a valid RFC5321 address literal and " 47 | "contains obsolete characters." 48 | ), 49 | "IPV6_GRPCOUNT": ( 50 | "Address contains an IPv6 literal address with " 51 | "the wrong number of groups." 52 | ), 53 | "IPV6_2X2XCOLON": ( 54 | "Address contains an IPv6 literal address with " "too many :: sequences." 55 | ), 56 | "IPV6_BADCHAR": ( 57 | "Address contains an IPv6 literal address with " 58 | "an illegal group of characters." 59 | ), 60 | "IPV6_MAXGRPS": ( 61 | "Address contains an IPv6 literal address with " "too many groups." 62 | ), 63 | "IPV6_COLONSTRT": ( 64 | "Address contains an IPv6 literal address that " 65 | "starts with a single colon." 66 | ), 67 | "IPV6_COLONEND": ( 68 | "Address contains an IPv6 literal address that " "ends with a single colon." 69 | ), 70 | } 71 | 72 | REFERENCES = { 73 | "DOMAIN": ["domain-RFC5322"], 74 | "TOOLONG": ["mailbox-maximum"], 75 | "LOCAL_TOOLONG": ["local-part-maximum"], 76 | "DOMAIN_TOOLONG": ["domain-maximum"], 77 | "LABEL_TOOLONG": ["label"], 78 | "DOMAINLITERAL": ["domain-literal"], 79 | "DOMLIT_OBSDTEXT": ["obs-dtext"], 80 | "IPV6_GRPCOUNT": ["address-literal-IPv6"], 81 | "IPV6_2X2XCOLON": ["address-literal-IPv6"], 82 | "IPV6_BADCHAR": ["address-literal-IPv6"], 83 | "IPV6_MAXGRPS": ["address-literal-IPv6"], 84 | "IPV6_COLONSTRT": ["address-literal-IPv6"], 85 | "IPV6_COLONEND": ["address-literal-IPv6"], 86 | } 87 | -------------------------------------------------------------------------------- /src/pyisemail/diagnosis/valid_diagnosis.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import BaseDiagnosis 2 | 3 | 4 | class ValidDiagnosis(BaseDiagnosis): 5 | 6 | """A diagnosis indicating the address is valid for use.""" 7 | 8 | DESCRIPTION = "Address is valid." 9 | 10 | MESSAGE = ( 11 | "Address is valid. Please note that this does not mean " 12 | "the address actually exists, nor even that the domain " 13 | "actually exists. This address could be issued by the " 14 | "domain owner without breaking the rules of any RFCs." 15 | ) 16 | 17 | def __init__(self, diagnosis_type="VALID"): 18 | self.diagnosis_type = diagnosis_type 19 | self.description = self.DESCRIPTION 20 | self.message = self.MESSAGE 21 | self.references = None 22 | self.code = 0 23 | -------------------------------------------------------------------------------- /src/pyisemail/email_validator.py: -------------------------------------------------------------------------------- 1 | class EmailValidator(object): 2 | 3 | """Abstract email validator to subclass from. 4 | 5 | You should not instantiate an EmailValidator, as it merely provides the 6 | interface for is_email, not an implementation. 7 | 8 | """ 9 | 10 | def is_email(self, address, diagnose=False): 11 | """Interface for is_email method. 12 | 13 | Keyword arguments: 14 | address -- address to check. 15 | diagnose -- flag to report a diagnose or just True/False 16 | """ 17 | raise NotImplementedError() 18 | 19 | is_valid = is_email 20 | -------------------------------------------------------------------------------- /src/pyisemail/reference.py: -------------------------------------------------------------------------------- 1 | class Reference(object): 2 | 3 | """A container for diagnosis references, for more information. 4 | 5 | Holds the citation in the pertinent RFC, as well as a link to the specific 6 | section of the RFC being referred to. 7 | 8 | """ 9 | 10 | DATA = { 11 | "local-part": { 12 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 13 | "citation": "RFC5322 section 3.4.1", 14 | }, 15 | "local-part-maximum": { 16 | "link": "http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1", 17 | "citation": "RFC5321 section 4.5.3.1.1", 18 | }, 19 | "obs-local-part": { 20 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 21 | "citation": "RFC 5322 section 3.4.1", 22 | }, 23 | "dot-atom": { 24 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 25 | "citation": "RFC 5322 section 3.4.1", 26 | }, 27 | "quoted-string": { 28 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 29 | "citation": "RFC 5322 section 3.4.1", 30 | }, 31 | "CFWS-near-at": { 32 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 33 | "citation": "RFC 5322 section 3.4.1", 34 | }, 35 | "SHOULD-NOT": { 36 | "link": "http://tools.ietf.org/html/rfc2119", 37 | "citation": "RFC2119 section 4", 38 | }, 39 | "atext": { 40 | "link": "http://tools.ietf.org/html/rfc5322#section-3.2.3", 41 | "citation": "RFC5322 section 3.2.3", 42 | }, 43 | "obs-domain": { 44 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 45 | "citation": "RFC5322 section 3.4.1", 46 | }, 47 | "domain-RFC5322": { 48 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 49 | "citation": "RFC5322 section 3.4.1", 50 | }, 51 | "domain-RFC5321": { 52 | "link": "http://tools.ietf.org/html/rfc5321#section-4.1.2", 53 | "citation": "RFC5321 section 4.1.2", 54 | }, 55 | "label": { 56 | "link": "http://tools.ietf.org/html/rfc1035#section-2.3.4", 57 | "citation": "RFC1035 section 2.3.4", 58 | }, 59 | "CRLF": { 60 | "link": "http://tools.ietf.org/html/rfc5234#section-2.3", 61 | "citation": "RFC5234 section 2.3", 62 | }, 63 | "CFWS": { 64 | "link": "http://tools.ietf.org/html/rfc5322#section-3.2.2", 65 | "citation": "RFC5322 section 3.2.2", 66 | }, 67 | "domain-literal": { 68 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 69 | "citation": "RFC5322 section 3.4.1", 70 | }, 71 | "address-literal": { 72 | "link": "http://tools.ietf.org/html/rfc5321#section-4.1.2", 73 | "citation": "RFC5321 section 4.1.2", 74 | }, 75 | "address-literal-IPv4": { 76 | "link": "http://tools.ietf.org/html/rfc5321#section-4.1.3", 77 | "citation": "RFC5321 section 4.1.3", 78 | }, 79 | "address-literal-IPv6": { 80 | "link": "http://tools.ietf.org/html/rfc5321#section-4.1.3", 81 | "citation": "RFC5321 section 4.1.3", 82 | }, 83 | "dtext": { 84 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 85 | "citation": "RFC5322 section 3.4.1", 86 | }, 87 | "obs-dtext": { 88 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 89 | "citation": "RFC5322 section 3.4.1", 90 | }, 91 | "qtext": { 92 | "link": "http://tools.ietf.org/html/rfc5322#section-3.2.4", 93 | "citation": "RFC5322 section 3.2.4", 94 | }, 95 | "obs-qtext": { 96 | "link": "http://tools.ietf.org/html/rfc5322#section-4.1", 97 | "citation": "RFC5322 section 4.1", 98 | }, 99 | "ctext": { 100 | "link": "http://tools.ietf.org/html/rfc5322#section-3.2.3", 101 | "citation": "RFC5322 section 3.2.3", 102 | }, 103 | "obs-ctext": { 104 | "link": "http://tools.ietf.org/html/rfc5322#section-4.1", 105 | "citation": "RFC5322 section 4.1", 106 | }, 107 | "quoted-pair": { 108 | "link": "http://tools.ietf.org/html/rfc5322#section-3.2.1", 109 | "citation": "RFC5322 section 3.2.1", 110 | }, 111 | "obs-qp": { 112 | "link": "http://tools.ietf.org/html/rfc5322#section-4.1", 113 | "citation": "RFC5322 section 4.1", 114 | }, 115 | "TLD": { 116 | "link": "http://tools.ietf.org/html/rfc5321#section-2.3.5", 117 | "citation": "RFC5321 section 2.3.5", 118 | }, 119 | "TLD-format": { 120 | "link": "http://www.rfc-editor.org/errata_search.php?eid=1353", 121 | "citation": "John Klensin, RFC 1123 erratum 1353", 122 | }, 123 | "mailbox-maximum": { 124 | "link": "http://www.rfc-editor.org/errata_search.php?eid=1690", 125 | "citation": "Dominic Sayers, RFC 3696 erratum 1690", 126 | }, 127 | "domain-maximum": { 128 | "link": "http://tools.ietf.org/html/rfc1035#section-4.5.3.1.2", 129 | "citation": "RFC 5321 section 4.5.3.1.2", 130 | }, 131 | "mailbox": { 132 | "link": "http://tools.ietf.org/html/rfc5321#section-4.1.2", 133 | "citation": "RFC 5321 section 4.1.2", 134 | }, 135 | "addr-spec": { 136 | "link": "http://tools.ietf.org/html/rfc5322#section-3.4.1", 137 | "citation": "RFC 5322 section 3.4.1", 138 | }, 139 | } 140 | 141 | def __init__(self, name=""): 142 | data = self.DATA.get(name, {"link": "", "citation": ""}) 143 | self.link = data["link"] 144 | self.citation = data["citation"] 145 | 146 | def __repr__(self): 147 | return "%s (%r)" % (self.__class__, self.__dict__) 148 | 149 | def __str__(self): 150 | return "%s <%s>" % (self.citation, self.link) 151 | -------------------------------------------------------------------------------- /src/pyisemail/utils.py: -------------------------------------------------------------------------------- 1 | def enum(**enums): 2 | 3 | """Provide the capabilities of an enum from other languages. 4 | 5 | Keyword arguments: 6 | enums --- name/value pairs of arguments for enum names/values 7 | 8 | """ 9 | 10 | return type("Enum", (), enums) 11 | -------------------------------------------------------------------------------- /src/pyisemail/validators/__init__.py: -------------------------------------------------------------------------------- 1 | from pyisemail.validators.dns_validator import DNSValidator 2 | from pyisemail.validators.gtld_validator import GTLDValidator 3 | from pyisemail.validators.parser_validator import ParserValidator 4 | 5 | __all__ = ["DNSValidator", "GTLDValidator", "ParserValidator"] 6 | -------------------------------------------------------------------------------- /src/pyisemail/validators/dns_validator.py: -------------------------------------------------------------------------------- 1 | import dns.exception 2 | import dns.resolver 3 | from dns.rdatatype import MX 4 | 5 | from pyisemail.diagnosis import DNSDiagnosis, RFC5321Diagnosis, ValidDiagnosis 6 | 7 | 8 | class DNSValidator(object): 9 | def is_valid(self, domain, diagnose=False): 10 | 11 | """Check whether a domain has a valid MX or A record. 12 | 13 | Keyword arguments: 14 | domain --- the domain to check 15 | diagnose --- flag to report a diagnosis or a boolean (default False) 16 | 17 | """ 18 | 19 | return_status = [ValidDiagnosis()] 20 | dns_checked = False 21 | 22 | # http://tools.ietf.org/html/rfc5321#section-2.3.5 23 | # Names that can be resolved to MX RRs or address (i.e., A or AAAA) 24 | # RRs (as discussed in Section 5) are permitted, as are CNAME RRs 25 | # whose targets can be resolved, in turn, to MX or address RRs. 26 | # 27 | # http://tools.ietf.org/html/rfc5321#section-5.1 28 | # The lookup first attempts to locate an MX record associated with 29 | # the name. If a CNAME record is found, the resulting name is 30 | # processed as if it were the initial name. ... If an empty list of 31 | # MXs is returned, the address is treated as if it was associated 32 | # with an implicit MX RR, with a preference of 0, pointing to that 33 | # host. 34 | # 35 | # is_email() author's note: We will regard the existence of a CNAME to 36 | # be sufficient evidence of the domain's existence. For performance 37 | # reasons we will not repeat the DNS lookup for the CNAME's target, but 38 | # we will raise a warning because we didn't immediately find an MX 39 | # record. 40 | try: 41 | records = dns.resolver.resolve(domain, MX) 42 | dns_checked = True 43 | 44 | # Even if there's an MX record set we need to verify the preference 45 | # value and label length. If it's a single MX record with a 46 | # preference of 0 and an empty label it should return null MX. 47 | # https://www.rfc-editor.org/rfc/rfc7505.html#section-3 48 | if len(records) == 1: 49 | if records[0].preference == 0 and len(records[0].exchange) <= 1: 50 | return_status.append(DNSDiagnosis("NULL_MX_RECORD")) 51 | except (dns.resolver.NXDOMAIN, dns.name.NameTooLong): 52 | # Domain can't be found in DNS 53 | return_status.append(DNSDiagnosis("NO_RECORD")) 54 | 55 | # Since dns.resolver gives more information than the PHP analog, we 56 | # can say that TLDs that throw an NXDOMAIN or NameTooLong error 57 | # have been checked 58 | if len(domain.split(".")) == 1: 59 | dns_checked = True 60 | except dns.resolver.NoAnswer: 61 | # MX-record for domain can't be found 62 | return_status.append(DNSDiagnosis("NO_MX_RECORD")) 63 | 64 | try: 65 | dns.resolver.resolve(domain) 66 | except dns.resolver.NoAnswer: 67 | # No usable records for the domain can be found 68 | return_status.append(DNSDiagnosis("NO_RECORD")) 69 | except dns.resolver.NoNameservers: 70 | return_status.append(DNSDiagnosis("NO_NAMESERVERS")) 71 | except (dns.exception.Timeout, dns.resolver.Timeout): 72 | return_status.append(DNSDiagnosis("DNS_TIMEDOUT")) 73 | 74 | # Check for TLD addresses 75 | # ----------------------- 76 | # TLD addresses are specifically allowed in RFC 5321 but they are 77 | # unusual to say the least. We will allocate a separate 78 | # status to these addresses on the basis that they are more likely 79 | # to be typos than genuine addresses (unless we've already 80 | # established that the domain does have an MX record) 81 | # 82 | # http://tools.ietf.org/html/rfc5321#section-2.3.5 83 | # In the case of a top-level domain used by itself in an address, a 84 | # single string is used without any dots. This makes the requirement, 85 | # described in more detail below, that only fully-qualified domain 86 | # names appear in SMTP transactions on the public Internet, 87 | # particularly important where top-level domains are involved. 88 | # 89 | # TLD format 90 | # ---------- 91 | # The format of TLDs has changed a number of times. The standards 92 | # used by IANA have been largely ignored by ICANN, leading to 93 | # confusion over the standards being followed. These are not defined 94 | # anywhere, except as a general component of a DNS host name (a label). 95 | # However, this could potentially lead to 123.123.123.123 being a 96 | # valid DNS name (rather than an IP address) and thereby creating 97 | # an ambiguity. The most authoritative statement on TLD formats that 98 | # the author can find is in a (rejected!) erratum to RFC 1123 99 | # submitted by John Klensin, the author of RFC 5321: 100 | # 101 | # http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353 102 | # However, a valid host name can never have the dotted-decimal 103 | # form #.#.#.#, since this change does not permit the highest-level 104 | # component label to start with a digit even if it is not 105 | # all-numeric. 106 | if not dns_checked: 107 | atom_list = domain.split(".") 108 | if len(atom_list) == 1: 109 | return_status.append(RFC5321Diagnosis("TLD")) 110 | 111 | try: 112 | float(atom_list[len(atom_list) - 1][0]) 113 | return_status.append(RFC5321Diagnosis("TLDNUMERIC")) 114 | except ValueError: 115 | pass 116 | 117 | final_status = max(return_status) 118 | 119 | return final_status if diagnose else final_status == ValidDiagnosis() 120 | -------------------------------------------------------------------------------- /src/pyisemail/validators/gtld_validator.py: -------------------------------------------------------------------------------- 1 | from pyisemail.diagnosis import GTLDDiagnosis, ValidDiagnosis 2 | 3 | 4 | class GTLDValidator(object): 5 | def is_valid(self, domain, diagnose=False): 6 | 7 | """Check whether a domain is a gTLD. 8 | 9 | Keyword arguments: 10 | domain --- the domain to check 11 | diagnose --- flag to report a diagnosis or a boolean (default False) 12 | 13 | """ 14 | 15 | if "." in domain: 16 | d = ValidDiagnosis() 17 | else: 18 | d = GTLDDiagnosis("GTLD") 19 | 20 | return d 21 | -------------------------------------------------------------------------------- /src/pyisemail/validators/parser_validator.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | from pyisemail import EmailValidator 5 | from pyisemail.diagnosis import ( 6 | BaseDiagnosis, 7 | CFWSDiagnosis, 8 | DeprecatedDiagnosis, 9 | InvalidDiagnosis, 10 | RFC5321Diagnosis, 11 | RFC5322Diagnosis, 12 | ValidDiagnosis, 13 | ) 14 | from pyisemail.utils import enum 15 | 16 | __all__ = ["ParserValidator"] 17 | 18 | Char = enum( 19 | AT="@", 20 | BACKSLASH="\\", 21 | DOT=".", 22 | DQUOTE='"', 23 | OPENPARENTHESIS="(", 24 | CLOSEPARENTHESIS=")", 25 | OPENSQBRACKET="[", 26 | CLOSESQBRACKET="]", 27 | HYPHEN="-", 28 | COLON=":", 29 | DOUBLECOLON="::", 30 | SP=" ", 31 | HTAB="\t", 32 | CR="\r", 33 | LF="\n", 34 | IPV6TAG="IPv6:", 35 | # US-ASCII visible characters not valid for atext 36 | # (http:#tools.ietf.org/html/rfc5322#section-3.2.3) 37 | SPECIALS='()<>[]:;@\\,."', 38 | ) 39 | 40 | Context = enum( 41 | LOCALPART=0, DOMAIN=1, LITERAL=2, COMMENT=3, FWS=4, QUOTEDSTRING=5, QUOTEDPAIR=6 42 | ) 43 | 44 | 45 | def to_char(token): 46 | """Transforms the ASCII control character symbols to their real char. 47 | 48 | Note: If the token is not an ASCII control character symbol, just 49 | return the token. 50 | 51 | Keyword arguments: 52 | token -- the token to transform 53 | 54 | """ 55 | if ord(token) in range(9216, 9229 + 1): 56 | token = chr(ord(token) - 9216) 57 | 58 | return token 59 | 60 | 61 | class ParserValidator(EmailValidator): 62 | def is_email(self, address, diagnose=False): 63 | """Check that an address address conforms to RFCs 5321, 5322 and others. 64 | 65 | More specifically, see the follow RFCs: 66 | * http://tools.ietf.org/html/rfc5321 67 | * http://tools.ietf.org/html/rfc5322 68 | * http://tools.ietf.org/html/rfc4291#section-2.2 69 | * http://tools.ietf.org/html/rfc1123#section-2.1 70 | * http://tools.ietf.org/html/rfc3696) (guidance only) 71 | 72 | Keyword arguments: 73 | address -- address to check. 74 | diagnose -- flag to report a diagnosis or a boolean (default False) 75 | 76 | """ 77 | 78 | threshold = BaseDiagnosis.CATEGORIES["VALID"] 79 | return_status = [ValidDiagnosis()] 80 | parse_data = {} 81 | 82 | # Parse the address into components, character by character 83 | raw_length = len(address) 84 | context = Context.LOCALPART # Where we are 85 | context_stack = [context] # Where we've been 86 | context_prior = Context.LOCALPART # Where we just came from 87 | token = "" # The current character 88 | token_prior = "" # The previous character 89 | parse_data[Context.LOCALPART] = "" # The address' components 90 | parse_data[Context.DOMAIN] = "" 91 | atom_list = { 92 | Context.LOCALPART: [""], 93 | Context.DOMAIN: [""], 94 | } # The address' dot-atoms 95 | element_count = 0 96 | element_len = 0 97 | hyphen_flag = False # Hyphen cannot occur at the end of a subdomain 98 | end_or_die = False # CFWS can only appear at the end of an element 99 | skip = False # Skip flag that simulates i++ 100 | crlf_count = -1 # crlf_count = -1 == !isset(crlf_count) 101 | 102 | for i in range(raw_length): 103 | 104 | # Skip simulates the use of ++ operator 105 | if skip: 106 | skip = False 107 | continue 108 | 109 | token = address[i] 110 | token = to_char(token) 111 | 112 | # Switch to simulate decrementing; needed for FWS 113 | repeat = True 114 | 115 | while repeat: 116 | repeat = False 117 | 118 | # ------------------------------------------------------- 119 | # Local part 120 | # ------------------------------------------------------- 121 | if context == Context.LOCALPART: 122 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 123 | # local-part = dot-atom / quoted-string / 124 | # obs-local-part 125 | # 126 | # dot-atom = [CFWS] dot-atom-text [CFWS] 127 | # 128 | # dot-atom-text = 1*atext *("." 1*atext) 129 | # 130 | # quoted-string = [CFWS] 131 | # DQUOTE *([FWS] qcontent) [FWS] DQUOTE 132 | # [CFWS] 133 | # 134 | # obs-local-part = word *("." word) 135 | # 136 | # word = atom / quoted-string 137 | # 138 | # atom = [CFWS] 1*atext [CFWS] 139 | if token == Char.OPENPARENTHESIS: 140 | if element_len == 0: 141 | # Comments are OK at the beginning of an element 142 | if element_count == 0: 143 | return_status.append(CFWSDiagnosis("COMMENT")) 144 | else: 145 | return_status.append(DeprecatedDiagnosis("COMMENT")) 146 | else: 147 | return_status.append(CFWSDiagnosis("COMMENT")) 148 | # We can't start a comment in the middle of an 149 | # element, so this better be the end 150 | end_or_die = True 151 | 152 | context_stack.append(context) 153 | context = Context.COMMENT 154 | elif token == Char.DOT: 155 | if element_len == 0: 156 | # Another dot, already? Fatal error 157 | if element_count == 0: 158 | return_status.append(InvalidDiagnosis("DOT_START")) 159 | else: 160 | return_status.append( 161 | InvalidDiagnosis("CONSECUTIVEDOTS") 162 | ) 163 | else: 164 | # The entire local-part can be a quoted string for 165 | # RFC 5321. If it's just one atom that is quoted 166 | # then it's an RFC 5322 obsolete form 167 | if end_or_die: 168 | return_status.append(DeprecatedDiagnosis("LOCALPART")) 169 | 170 | # CFWS & quoted strings are OK again now we're at 171 | # the beginning of an element (although they are 172 | # obsolete forms) 173 | end_or_die = False 174 | element_len = 0 175 | element_count += 1 176 | parse_data[Context.LOCALPART] += token 177 | atom_list[Context.LOCALPART].append("") 178 | elif token == Char.DQUOTE: 179 | if element_len == 0: 180 | # The entire local-part can be a quoted string for 181 | # RFC 5321. If it's just one atom that is quoted 182 | # then it's an RFC 5322 obsolete form 183 | if element_count == 0: 184 | return_status.append(RFC5321Diagnosis("QUOTEDSTRING")) 185 | else: 186 | return_status.append(DeprecatedDiagnosis("LOCALPART")) 187 | 188 | parse_data[Context.LOCALPART] += token 189 | atom_list[Context.LOCALPART][element_count] += token 190 | element_len += 1 191 | end_or_die = True 192 | context_stack.append(context) 193 | context = Context.QUOTEDSTRING 194 | else: 195 | # Fatal error 196 | return_status.append(InvalidDiagnosis("EXPECTING_ATEXT")) 197 | # Folding White Space (FWS) 198 | elif token in [Char.CR, Char.SP, Char.HTAB]: 199 | # Skip simulates the use of ++ operator if the latter 200 | # check doesn't short-circuit 201 | if token == Char.CR: 202 | skip = True 203 | 204 | if ( 205 | i + 1 == raw_length 206 | or to_char(address[i + 1]) != Char.LF 207 | ): 208 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 209 | break 210 | 211 | if element_len == 0: 212 | if element_count == 0: 213 | return_status.append(CFWSDiagnosis("FWS")) 214 | else: 215 | return_status.append(DeprecatedDiagnosis("FWS")) 216 | else: 217 | # We can't start FWS in the middle of an element, 218 | # so this better be the end 219 | end_or_die = True 220 | 221 | context_stack.append(context) 222 | context = Context.FWS 223 | token_prior = token 224 | # @ 225 | elif token == Char.AT: 226 | # At this point we should have a valid local-part 227 | if len(context_stack) != 1: # pragma: no cover 228 | if diagnose: 229 | return InvalidDiagnosis("BAD_PARSE") 230 | else: 231 | return False 232 | 233 | if parse_data[Context.LOCALPART] == "": 234 | # Fatal error 235 | return_status.append(InvalidDiagnosis("NOLOCALPART")) 236 | elif element_len == 0: 237 | # Fatal error 238 | return_status.append(InvalidDiagnosis("DOT_END")) 239 | # http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1 240 | # The maximum total length of a user name or other 241 | # local-part is 64 octets. 242 | elif len(parse_data[Context.LOCALPART]) > 64: 243 | return_status.append(RFC5322Diagnosis("LOCAL_TOOLONG")) 244 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 245 | # Comments and folding white space 246 | # SHOULD NOT be used around the "@" in the addr-spec. 247 | # 248 | # http://tools.ietf.org/html/rfc2119 249 | # 4. SHOULD NOT This phrase, or the phrase "NOT 250 | # RECOMMENDED" mean that there may exist valid 251 | # reasons in particular circumstances when the 252 | # particular behavior is acceptable or even useful, 253 | # but the full implications should be understood and 254 | # the case carefully weighed before implementing any 255 | # behavior described with this label. 256 | elif context_prior in [Context.COMMENT, Context.FWS]: 257 | return_status.append(DeprecatedDiagnosis("CFWS_NEAR_AT")) 258 | 259 | # Clear everything down for the domain parsing 260 | context = Context.DOMAIN 261 | context_stack = [] 262 | element_count = 0 263 | element_len = 0 264 | # CFWS can only appear at the end of the element 265 | end_or_die = False 266 | # atext 267 | else: 268 | # http://tools.ietf.org/html/rfc5322#section-3.2.3 269 | # atext = ALPHA / DIGIT / ; Printable US-ASCII 270 | # "!" / "#" / ; characters not 271 | # "$" / "%" / ; including specials. 272 | # "&" / "'" / ; Used for atoms. 273 | # "*" / "+" / 274 | # "-" / "/" / 275 | # "=" / "?" / 276 | # "^" / "_" / 277 | # "`" / "{" / 278 | # "|" / "}" / 279 | # "~" 280 | if end_or_die: 281 | # We have encountered atext where it is no longer 282 | # valid 283 | if context_prior in [Context.COMMENT, Context.FWS]: 284 | return_status.append( 285 | InvalidDiagnosis("ATEXT_AFTER_CFWS") 286 | ) 287 | elif context_prior == Context.QUOTEDSTRING: 288 | return_status.append(InvalidDiagnosis("ATEXT_AFTER_QS")) 289 | else: # pragma: no cover 290 | if diagnose: 291 | return InvalidDiagnosis("BAD_PARSE") 292 | else: 293 | return False 294 | else: 295 | context_prior = context 296 | o = ord(token) 297 | 298 | if o < 33 or o > 126 or o == 10 or token in Char.SPECIALS: 299 | return_status.append( 300 | InvalidDiagnosis("EXPECTING_ATEXT") 301 | ) 302 | 303 | parse_data[Context.LOCALPART] += token 304 | atom_list[Context.LOCALPART][element_count] += token 305 | element_len += 1 306 | # ------------------------------------------------------- 307 | # Domain 308 | # ------------------------------------------------------- 309 | elif context == Context.DOMAIN: 310 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 311 | # domain = dot-atom / domain-literal / obs-domain 312 | # 313 | # dot-atom = [CFWS] dot-atom-text [CFWS] 314 | # 315 | # dot-atom-text = 1*atext *("." 1*atext) 316 | # 317 | # domain-literal = [CFWS] 318 | # "[" *([FWS] dtext) [FWS] "]" 319 | # [CFWS] 320 | # 321 | # dtext = %d33-90 / ; Printable US-ASCII 322 | # %d94-126 / ; characters not 323 | # obs-dtext ; including [, ], or \ 324 | # 325 | # obs-domain = atom *("." atom) 326 | # 327 | # atom = [CFWS] 1*atext [CFWS] 328 | # 329 | # 330 | # http://tools.ietf.org/html/rfc5321#section-4.1.2 331 | # Mailbox = Local-part 332 | # "@" 333 | # ( Domain / address-literal ) 334 | # 335 | # Domain = sub-domain *("." sub-domain) 336 | # 337 | # address-literal = "[" ( IPv4-address-literal / 338 | # IPv6-address-literal / 339 | # General-address-literal ) "]" 340 | # ; See Section 4.1.3 341 | # 342 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 343 | # Note: A liberal syntax for the domain portion of 344 | # addr-spec is given here. However, the domain portion 345 | # contains addressing information specified by and 346 | # used in other protocols (e.g., RFC 1034, RFC 1035, 347 | # RFC 1123, RFC5321). It is therefore incumbent upon 348 | # implementations to conform to the syntax of 349 | # addresse for the context in which they are used. 350 | # is_email() author's note: it's not clear how to interpret 351 | # this in the context of a general address address 352 | # validator. The conclusion I have reached is this: 353 | # "addressing information" must comply with RFC 5321 (and 354 | # in turn RFC 1035), anything that is "semantically 355 | # invisible" must comply only with RFC 5322. 356 | 357 | # Comment 358 | if token == Char.OPENPARENTHESIS: 359 | if element_len == 0: 360 | # Comments at the start of the domain are 361 | # deprecated in the text 362 | # Comments at the start of a subdomain are 363 | # obs-domain 364 | # (http://tools.ietf.org/html/rfc5322#section-3.4.1) 365 | if element_count == 0: 366 | return_status.append( 367 | DeprecatedDiagnosis("CFWS_NEAR_AT") 368 | ) 369 | else: 370 | return_status.append(DeprecatedDiagnosis("COMMENT")) 371 | else: 372 | return_status.append(CFWSDiagnosis("COMMENT")) 373 | # We can't start a comment in the middle of an 374 | # element, so this better be the end 375 | end_or_die = True 376 | 377 | context_stack.append(context) 378 | context = Context.COMMENT 379 | # Next dot-atom element 380 | elif token == Char.DOT: 381 | if element_len == 0: 382 | # Another dot, already? Fatal error 383 | if element_count == 0: 384 | return_status.append(InvalidDiagnosis("DOT_START")) 385 | else: 386 | return_status.append( 387 | InvalidDiagnosis("CONSECUTIVEDOTS") 388 | ) 389 | elif hyphen_flag: 390 | # Previous subdomain ended in a hyphen. Fatal error 391 | return_status.append(InvalidDiagnosis("DOMAINHYPHENEND")) 392 | else: 393 | # Nowhere in RFC 5321 does it say explicitly that 394 | # the domain part of a Mailbox must be a valid 395 | # domain according to the DNS standards set out in 396 | # RFC 1035, but this *is* implied in several 397 | # places. For instance, wherever the idea of host 398 | # routing is discussed the RFC says that the domain 399 | # must be looked up in the DNS. This would be 400 | # nonsense unless the domain was designed to be a 401 | # valid DNS domain. Hence we must conclude that the 402 | # RFC 1035 restriction on label length also applies 403 | # to RFC 5321 domains. 404 | # 405 | # http://tools.ietf.org/html/rfc1035#section-2.3.4 406 | # labels 63 octets or less 407 | if element_len > 63: 408 | return_status.append(RFC5322Diagnosis("LABEL_TOOLONG")) 409 | 410 | # CFWS is OK again now we're at the beginning of an 411 | # element (although it may be obsolete CFWS) 412 | end_or_die = False 413 | element_len = 0 414 | element_count += 1 415 | atom_list[Context.DOMAIN].append("") 416 | parse_data[Context.DOMAIN] += token 417 | # Domain literal 418 | elif token == Char.OPENSQBRACKET: 419 | if parse_data[Context.DOMAIN] == "": 420 | # Domain literal must be the only component 421 | end_or_die = True 422 | element_len += 1 423 | context_stack.append(context) 424 | context = Context.LITERAL 425 | parse_data[Context.DOMAIN] += token 426 | atom_list[Context.DOMAIN][element_count] += token 427 | parse_data["literal"] = "" 428 | else: 429 | # Fatal error 430 | return_status.append(InvalidDiagnosis("EXPECTING_ATEXT")) 431 | 432 | # Folding White Space (FWS) 433 | elif token in [Char.CR, Char.SP, Char.HTAB]: 434 | # Skip simulates the use of ++ operator if the latter 435 | # check doesn't short-circuit 436 | if token == Char.CR: 437 | skip = True 438 | 439 | if i + 1 == raw_length or ( 440 | to_char(address[i + 1]) != Char.LF 441 | ): 442 | # Fatal error 443 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 444 | break 445 | 446 | if element_len == 0: 447 | if element_count == 0: 448 | return_status.append( 449 | DeprecatedDiagnosis("CFWS_NEAR_AT") 450 | ) 451 | else: 452 | return_status.append(DeprecatedDiagnosis("FWS")) 453 | else: 454 | return_status.append(CFWSDiagnosis("FWS")) 455 | # We can't start FWS in the middle of an element, 456 | # so this better be the end 457 | end_or_die = True 458 | 459 | context_stack.append(context) 460 | context = Context.FWS 461 | token_prior = token 462 | # atext 463 | else: 464 | # RFC 5322 allows any atext... 465 | # http://tools.ietf.org/html/rfc5322#section-3.2.3 466 | # atext = ALPHA / DIGIT / ; Printable US-ASCII 467 | # "!" / "#" / ; characters not 468 | # "$" / "%" / ; including specials. 469 | # "&" / "'" / ; Used for atoms. 470 | # "*" / "+" / 471 | # "-" / "/" / 472 | # "=" / "?" / 473 | # "^" / "_" / 474 | # "`" / "{" / 475 | # "|" / "}" / 476 | # "~" 477 | 478 | # But RFC 5321 only allows letter-digit-hyphen to 479 | # comply with DNS rules (RFCs 1034 & 1123) 480 | # http://tools.ietf.org/html/rfc5321#section-4.1.2 481 | # sub-domain = Let-dig [Ldh-str] 482 | # 483 | # Let-dig = ALPHA / DIGIT 484 | # 485 | # Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig 486 | # 487 | if end_or_die: 488 | # We have encountered atext where it is no longer 489 | # valid 490 | if context_prior in [Context.COMMENT, Context.FWS]: 491 | return_status.append( 492 | InvalidDiagnosis("ATEXT_AFTER_CFWS") 493 | ) 494 | elif context_prior == Context.LITERAL: 495 | return_status.append( 496 | InvalidDiagnosis("ATEXT_AFTER_DOMLIT") 497 | ) 498 | else: # pragma: no cover 499 | if diagnose: 500 | return InvalidDiagnosis("BAD_PARSE") 501 | else: 502 | return False 503 | 504 | o = ord(token) 505 | # Assume this token isn't a hyphen unless we discover 506 | # it is 507 | hyphen_flag = False 508 | 509 | if o < 33 or o > 126 or token in Char.SPECIALS: 510 | # Fatal error 511 | return_status.append(InvalidDiagnosis("EXPECTING_ATEXT")) 512 | elif token == Char.HYPHEN: 513 | if element_len == 0: 514 | # Hyphens can't be at the beginning of a 515 | # subdomain 516 | # Fatal error 517 | return_status.append( 518 | InvalidDiagnosis("DOMAINHYPHENSTART") 519 | ) 520 | 521 | hyphen_flag = True 522 | elif not (47 < o < 58 or 64 < o < 91 or 96 < o < 123): 523 | # Not an RFC 5321 subdomain, but still OK by RFC 524 | # 5322 525 | return_status.append(RFC5322Diagnosis("DOMAIN")) 526 | 527 | parse_data[Context.DOMAIN] += token 528 | atom_list[Context.DOMAIN][element_count] += token 529 | element_len += 1 530 | # ------------------------------------------------------- 531 | # Domain literal 532 | # ------------------------------------------------------- 533 | elif context == Context.LITERAL: 534 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 535 | # domain-literal = [CFWS] 536 | # "[" *([FWS] dtext) [FWS] "]" 537 | # [CFWS] 538 | # 539 | # dtext = %d33-90 / ; Printable US-ASCII 540 | # %d94-126 / ; characters not 541 | # obs-dtext ; including [, ], or \ 542 | # 543 | # obs-dtext = obs-NO-WS-CTL / quoted-pair 544 | 545 | # End of domain literal 546 | if token == Char.CLOSESQBRACKET: 547 | if max(return_status) < BaseDiagnosis.CATEGORIES["DEPREC"]: 548 | # Could be a valid RFC 5321 address literal, so 549 | # let's check 550 | # 551 | # http://tools.ietf.org/html/rfc5321#section-4.1.2 552 | # address-literal = "[" ( IPv4-address-literal / 553 | # IPv6-address-literal / 554 | # General-address-literal ) "]" 555 | # ; See Section 4.1.3 556 | # 557 | # http://tools.ietf.org/html/rfc5321#section-4.1.3 558 | # IPv4-address-literal = Snum 3("." Snum) 559 | # 560 | # IPv6-address-literal = "IPv6:" IPv6-addr 561 | # 562 | # General-address-literal = Standardized-tag ":" 563 | # 1*dcontent 564 | # 565 | # Standardized-tag = Ldh-str 566 | # ; Standardized-tag MUST be 567 | # ; specified in a 568 | # ; Standards-Track RFC and 569 | # ; registered with IANA 570 | # 571 | # dcontent = %d33-90 / ; Printable US-ASCII 572 | # %d94-126 ; excl. "[", "\", "]" 573 | # 574 | # Snum = 1*3DIGIT 575 | # ; representing a decimal integer 576 | # ; value in the range 0-255 577 | # 578 | # IPv6-addr = IPv6-full / IPv6-comp / 579 | # IPv6v4-full / IPv6v4-comp 580 | # 581 | # IPv6-hex = 1*4HEXDIG 582 | # 583 | # IPv6-full = IPv6-hex 7(":" IPv6-hex) 584 | # 585 | # IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] 586 | # "::" 587 | # [IPv6-hex *5(":" IPv6-hex)] 588 | # ; The "::" represents at least 2 589 | # ; 16-bit groups of zeros. No more 590 | # ; than 6 groups in addition to 591 | # ; the "::" may be present. 592 | # 593 | # IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" 594 | # IPv4-address-literal 595 | # 596 | # IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] 597 | # "::" 598 | # [IPv6-hex *3(":" IPv6-hex) ":"] 599 | # IPv4-address-literal 600 | # ; The "::" represents at least 2 601 | # ; 16-bit groups of zeros. No more 602 | # ; than 4 groups in addition to 603 | # ; the "::" and 604 | # ; IPv4-address-literal may be 605 | # ; present. 606 | 607 | max_groups = 8 608 | index = False 609 | address_literal = parse_data["literal"] 610 | 611 | # Extract IPv4 part from the end of the 612 | # address-literal (if there is one) 613 | regex = ( 614 | r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.)" 615 | r"{3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" 616 | ) 617 | match_ip = re.search(regex, address_literal) 618 | if match_ip: 619 | index = address_literal.rfind(match_ip.group(0)) 620 | if index != 0: 621 | # Convert IPv4 part to IPv6 format for 622 | # further testing 623 | address_literal = address_literal[0:index] + "0:0" 624 | 625 | if index == 0 and index is not False: 626 | # Nothing there except a valid IPv4 address 627 | return_status.append(RFC5321Diagnosis("ADDRESSLITERAL")) 628 | elif not address_literal.startswith(Char.IPV6TAG): 629 | return_status.append(RFC5322Diagnosis("DOMAINLITERAL")) 630 | else: 631 | ipv6 = address_literal[5:] 632 | # Revision 2.7: Daniel Marschall's new IPv6 633 | # testing strategy 634 | match_ip = ipv6.split(Char.COLON) 635 | grp_count = len(match_ip) 636 | index = ipv6.find(Char.DOUBLECOLON) 637 | 638 | if index == -1: 639 | # We need exactly the right number of 640 | # groups 641 | if grp_count != max_groups: 642 | return_status.append( 643 | RFC5322Diagnosis("IPV6_GRPCOUNT") 644 | ) 645 | else: 646 | if index != ipv6.rfind(Char.DOUBLECOLON): 647 | return_status.append( 648 | RFC5322Diagnosis("IPV6_2X2XCOLON") 649 | ) 650 | else: 651 | if index in [0, len(ipv6) - 2]: 652 | # RFC 4291 allows :: at the start 653 | # or end of an address with 7 other 654 | # groups in addition 655 | max_groups += 1 656 | 657 | if grp_count > max_groups: 658 | return_status.append( 659 | RFC5322Diagnosis("IPV6_MAXGRPS") 660 | ) 661 | elif grp_count == max_groups: 662 | # Eliding a single "::" 663 | return_status.append( 664 | RFC5321Diagnosis("IPV6DEPRECATED") 665 | ) 666 | 667 | # Revision 2.7: Daniel Marschall's new IPv6 668 | # testing strategy 669 | if ipv6[0] == Char.COLON and ipv6[1] != Char.COLON: 670 | # Address starts with a single colon 671 | return_status.append( 672 | RFC5322Diagnosis("IPV6_COLONSTRT") 673 | ) 674 | elif ipv6[-1] == Char.COLON and ipv6[-2] != Char.COLON: 675 | # Address ends with a single colon 676 | return_status.append( 677 | RFC5322Diagnosis("IPV6_COLONEND") 678 | ) 679 | elif [ 680 | re.match(r"^[0-9A-Fa-f]{0,4}$", i) for i in match_ip 681 | ].count(None) != 0: 682 | # Check for unmatched characters 683 | return_status.append( 684 | RFC5322Diagnosis("IPV6_BADCHAR") 685 | ) 686 | else: 687 | return_status.append( 688 | RFC5321Diagnosis("ADDRESSLITERAL") 689 | ) 690 | else: 691 | return_status.append(RFC5322Diagnosis("DOMAINLITERAL")) 692 | 693 | parse_data[Context.DOMAIN] += token 694 | atom_list[Context.DOMAIN][element_count] += token 695 | element_len += 1 696 | context_prior = context 697 | context = context_stack.pop() 698 | elif token == Char.BACKSLASH: 699 | return_status.append(RFC5322Diagnosis("DOMLIT_OBSDTEXT")) 700 | context_stack.append(context) 701 | context = Context.QUOTEDPAIR 702 | # Folding White Space (FWS) 703 | elif token in [Char.CR, Char.SP, Char.HTAB]: 704 | # Skip simulates the use of ++ operator if the latter 705 | # check doesn't short-circuit 706 | if token == Char.CR: 707 | skip = True 708 | 709 | if ( 710 | i + 1 == raw_length 711 | or to_char(address[i + 1]) != Char.LF 712 | ): 713 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 714 | break 715 | 716 | return_status.append(CFWSDiagnosis("FWS")) 717 | 718 | context_stack.append(context) 719 | context = Context.FWS 720 | token_prior = token 721 | # dtext 722 | else: 723 | # http://tools.ietf.org/html/rfc5322#section-3.4.1 724 | # dtext = %d33-90 / ; Printable US-ASCII 725 | # %d94-126 / ; characters not 726 | # obs-dtext ; including [, ], or \ 727 | # 728 | # obs-dtext = obs-NO-WS-CTL / quoted-pair 729 | # 730 | # obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 731 | # %d11 / ; characters that do 732 | # %d12 / ; not include the 733 | # %d14-31 / ; carriage return, line 734 | # %d127 ; feed, and white space 735 | # ; characters 736 | o = ord(token) 737 | 738 | # CR, LF, SP & HTAB have already been parsed above 739 | if o > 127 or o == 0 or token == Char.OPENSQBRACKET: 740 | # Fatal error 741 | return_status.append(InvalidDiagnosis("EXPECTING_DTEXT")) 742 | break 743 | elif o < 33 or o == 127: 744 | return_status.append(RFC5322Diagnosis("DOMLIT_OBSDTEXT")) 745 | 746 | parse_data["literal"] += token 747 | parse_data[Context.DOMAIN] += token 748 | atom_list[Context.DOMAIN][element_count] += token 749 | element_len += 1 750 | # ------------------------------------------------------- 751 | # Quoted string 752 | # ------------------------------------------------------- 753 | elif context == Context.QUOTEDSTRING: 754 | # http://tools.ietf.org/html/rfc5322#section-3.2.4 755 | # quoted-string = [CFWS] 756 | # DQUOTE *([FWS] qcontent) [FWS] DQUOTE 757 | # [CFWS] 758 | # 759 | # qcontent = qtext / quoted-pair 760 | 761 | # Quoted pair 762 | if token == Char.BACKSLASH: 763 | context_stack.append(context) 764 | context = Context.QUOTEDPAIR 765 | # Folding White Space (FWS) 766 | # Inside a quoted string, spaces are allow as regular 767 | # characters. It's only FWS if we include HTAB or CRLF 768 | elif token in [Char.CR, Char.HTAB]: 769 | # Skip simulates the use of ++ operator if the latter 770 | # check doesn't short-circuit 771 | if token == Char.CR: 772 | skip = True 773 | 774 | if ( 775 | i + 1 == raw_length 776 | or to_char(address[i + 1]) != Char.LF 777 | ): 778 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 779 | break 780 | 781 | # http://tools.ietf.org/html/rfc5322#section-3.2.2 782 | # Runs of FWS, comment, or CFWS that occur between 783 | # lexical tokens in a structured header field are 784 | # semantically interpreted as a single space 785 | # character. 786 | 787 | # http://tools.ietf.org/html/rfc5322#section-3.2.4 788 | # the CRLF in any FWS/CFWS that appears within the 789 | # quoted string [is] semantically "invisible" and 790 | # therefore not part of the quoted-string 791 | parse_data[Context.LOCALPART] += Char.SP 792 | atom_list[Context.LOCALPART][element_count] += Char.SP 793 | element_len += 1 794 | 795 | return_status.append(CFWSDiagnosis("FWS")) 796 | context_stack.append(context) 797 | context = Context.FWS 798 | token_prior = token 799 | # End of quoted string 800 | elif token == Char.DQUOTE: 801 | parse_data[Context.LOCALPART] += token 802 | atom_list[Context.LOCALPART][element_count] += token 803 | element_len += 1 804 | context_prior = context 805 | context = context_stack.pop() 806 | # qtext 807 | else: 808 | # http://tools.ietf.org/html/rfc5322#section-3.2.4 809 | # qtext = %d33 / ; Printable US-ASCII 810 | # %d35-91 / ; characters not 811 | # %d93-126 / ; including "\" or 812 | # obs-qtext ; the quote 813 | # ; character 814 | # 815 | # obs-qtext = obs-NO-WS-CTL 816 | # 817 | # obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 818 | # %d11 / ; characters that do 819 | # %d12 / ; not include the CR, 820 | # %d14-31 / ; LF, and white space 821 | # %d127 ; characters 822 | o = ord(token) 823 | 824 | if o > 127 or o == 0 or o == 10: 825 | # Fatal error 826 | return_status.append(InvalidDiagnosis("EXPECTING_QTEXT")) 827 | elif o < 32 or o == 127: 828 | return_status.append(DeprecatedDiagnosis("QTEXT")) 829 | 830 | parse_data[Context.LOCALPART] += token 831 | atom_list[Context.LOCALPART][element_count] += token 832 | element_len += 1 833 | # ------------------------------------------------------- 834 | # Quoted pair 835 | # ------------------------------------------------------- 836 | elif context == Context.QUOTEDPAIR: 837 | # http://tools.ietf.org/html/rfc5322#section-3.2.1 838 | # quoted-pair = ("\" (VCHAR / WSP)) / obs-qp 839 | # 840 | # VCHAR = %d33-126 ; visible (printing) 841 | # ; characters 842 | # 843 | # WSP = SP / HTAB ; white space 844 | # 845 | # obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) 846 | # 847 | # obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 848 | # %d11 / ; characters that do not 849 | # %d12 / ; include the carriage 850 | # %d14-31 / ; return, line feed, and 851 | # %d127 ; white space characters 852 | # 853 | # i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127) 854 | 855 | o = ord(token) 856 | 857 | if o > 127: 858 | # Fatal error 859 | return_status.append(InvalidDiagnosis("EXPECTING_QPAIR")) 860 | elif (o < 31 and o != 9) or o == 127: 861 | # SP & HTAB are allowed 862 | return_status.append(DeprecatedDiagnosis("QP")) 863 | 864 | # At this point we know where this qpair occurred so 865 | # we could check to see if the character actually 866 | # needed to be quoted at all. 867 | # http://tools.ietf.org/html/rfc5321#section-4.1.2 868 | # the sending system SHOULD transmit the 869 | # form that uses the minimum quoting possible. 870 | context_prior = context 871 | context = context_stack.pop() # End of qpair 872 | token = Char.BACKSLASH + token 873 | 874 | if context == Context.COMMENT: 875 | pass 876 | elif context == Context.QUOTEDSTRING: 877 | parse_data[Context.LOCALPART] += token 878 | atom_list[Context.LOCALPART][element_count] += token 879 | # The maximum sizes specified by RFC 5321 are octet 880 | # counts, so we must include the backslash 881 | element_len += 2 882 | elif context == Context.LITERAL: 883 | parse_data[Context.DOMAIN] += token 884 | atom_list[Context.DOMAIN][element_count] += token 885 | # The maximum sizes specified by RFC 5321 are octet 886 | # counts, so we must include the backslash 887 | element_len += 2 888 | else: # pragma: no cover 889 | if diagnose: 890 | return InvalidDiagnosis("BAD_PARSE") 891 | else: 892 | return False 893 | # ------------------------------------------------------- 894 | # Comment 895 | # ------------------------------------------------------- 896 | elif context == Context.COMMENT: 897 | # http://tools.ietf.org/html/rfc5322#section-3.2.2 898 | # comment = "(" *([FWS] ccontent) [FWS] ")" 899 | # 900 | # ccontent = ctext / quoted-pair / comment 901 | 902 | # Nested comment 903 | if token == Char.OPENPARENTHESIS: 904 | # Nested comments are OK 905 | context_stack.append(context) 906 | context = Context.COMMENT 907 | # End of comment 908 | elif token == Char.CLOSEPARENTHESIS: 909 | context_prior = context 910 | context = context_stack.pop() 911 | # Quoted pair 912 | elif token == Char.BACKSLASH: 913 | context_stack.append(context) 914 | context = Context.QUOTEDPAIR 915 | # Folding White Space (FWS) 916 | elif token in [Char.CR, Char.SP, Char.HTAB]: 917 | # Skip simulates the use of ++ operator if the latter 918 | # check doesn't short-circuit 919 | if token == Char.CR: 920 | skip = True 921 | 922 | if ( 923 | i + 1 == raw_length 924 | or to_char(address[i + 1]) != Char.LF 925 | ): 926 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 927 | break 928 | 929 | return_status.append(CFWSDiagnosis("FWS")) 930 | 931 | context_stack.append(context) 932 | context = Context.FWS 933 | token_prior = token 934 | # ctext 935 | else: 936 | # http://tools.ietf.org/html/rfc5322#section-3.2.3 937 | # ctext = %d33-39 / ; Printable US- 938 | # %d42-91 / ; ASCII characters 939 | # %d93-126 / ; not including 940 | # obs-ctext ; "(", ")", or "\" 941 | # 942 | # obs-ctext = obs-NO-WS-CTL 943 | # 944 | # obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 945 | # %d11 / ; characters that 946 | # %d12 / ; do not include 947 | # %d14-31 / ; the CR, LF, and 948 | # ; white space 949 | # ; characters 950 | 951 | o = ord(token) 952 | 953 | if o > 127 or o == 0 or o == 10: 954 | # Fatal error 955 | return_status.append(InvalidDiagnosis("EXPECTING_CTEXT")) 956 | break 957 | elif o < 32 or o == 127: 958 | return_status.append(DeprecatedDiagnosis("CTEXT")) 959 | 960 | # ------------------------------------------------------- 961 | # Folding White Space (FWS) 962 | # ------------------------------------------------------- 963 | elif context == Context.FWS: 964 | # http://tools.ietf.org/html/rfc5322#section-3.2.2 965 | # FWS = ([*WSP CRLF] 1*WSP) / obs-FWS 966 | # ; Folding white space 967 | # 968 | # But note the erratum: 969 | # http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908 970 | # In the obsolete syntax, any amount of folding white 971 | # space MAY be inserted where the obs-FWS rule is 972 | # allowed. This creates the possibility of having two 973 | # consecutive "folds" in a line, and therefore the 974 | # possibility that a line which makes up a folded header 975 | # field could be composed entirely of white space. 976 | # 977 | # obs-FWS = 1*([CRLF] WSP) 978 | 979 | if token_prior == Char.CR: 980 | if token == Char.CR: 981 | # Fatal error 982 | return_status.append(InvalidDiagnosis("FWS_CRLF_X2")) 983 | break 984 | 985 | if crlf_count != -1: 986 | crlf_count += 1 987 | if crlf_count > 1: 988 | # Multiple folds = obsolete FWS 989 | return_status.append(DeprecatedDiagnosis("FWS")) 990 | else: 991 | crlf_count = 1 992 | 993 | # Skip simulates the use of ++ operator if the latter 994 | # check doesn't short-circuit 995 | if token == Char.CR: 996 | skip = True 997 | 998 | if i + 1 == raw_length or to_char(address[i + 1]) != Char.LF: 999 | return_status.append(InvalidDiagnosis("CR_NO_LF")) 1000 | break 1001 | elif token in [Char.SP, Char.HTAB]: 1002 | pass 1003 | else: 1004 | if token_prior == Char.CR: 1005 | # Fatal error 1006 | return_status.append(InvalidDiagnosis("FWS_CRLF_END")) 1007 | break 1008 | 1009 | if crlf_count != -1: 1010 | crlf_count = -1 1011 | 1012 | context_prior = context 1013 | # End of FWS 1014 | context = context_stack.pop() 1015 | 1016 | # Look at this token again in the parent context 1017 | repeat = True 1018 | 1019 | token_prior = token 1020 | 1021 | # ------------------------------------------------------- 1022 | # A context we aren't expecting 1023 | # ------------------------------------------------------- 1024 | else: # pragma: no cover 1025 | if diagnose: 1026 | return InvalidDiagnosis("BAD_PARSE") 1027 | else: 1028 | return False 1029 | 1030 | # No point in going on if we've got a fatal error 1031 | if max(return_status) > BaseDiagnosis.CATEGORIES["RFC5322"]: 1032 | break 1033 | 1034 | # Some simple final tests 1035 | if max(return_status) < BaseDiagnosis.CATEGORIES["RFC5322"]: 1036 | if context == Context.QUOTEDSTRING: 1037 | # Fatal error 1038 | return_status.append(InvalidDiagnosis("UNCLOSEDQUOTEDSTR")) 1039 | elif context == Context.QUOTEDPAIR: 1040 | # Fatal error 1041 | return_status.append(InvalidDiagnosis("BACKSLASHEND")) 1042 | elif context == Context.COMMENT: 1043 | # Fatal error 1044 | return_status.append(InvalidDiagnosis("UNCLOSEDCOMMENT")) 1045 | elif context == Context.LITERAL: 1046 | # Fatal error 1047 | return_status.append(InvalidDiagnosis("UNCLOSEDDOMLIT")) 1048 | elif token == Char.CR: 1049 | # Fatal error 1050 | return_status.append(InvalidDiagnosis("FWS_CRLF_END")) 1051 | elif parse_data[Context.DOMAIN] == "": 1052 | # Fatal error 1053 | return_status.append(InvalidDiagnosis("NODOMAIN")) 1054 | elif element_len == 0: 1055 | # Fatal error 1056 | return_status.append(InvalidDiagnosis("DOT_END")) 1057 | elif hyphen_flag: 1058 | # Fatal error 1059 | return_status.append(InvalidDiagnosis("DOMAINHYPHENEND")) 1060 | # http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2 1061 | # The maximum total length of a domain name or number is 255 octets 1062 | elif len(parse_data[Context.DOMAIN]) > 255: 1063 | return_status.append(RFC5322Diagnosis("DOMAIN_TOOLONG")) 1064 | # http://tools.ietf.org/html/rfc5321#section-4.1.2 1065 | # Forward-path = Path 1066 | # 1067 | # Path = "<" [ A-d-l ":" ] Mailbox ">" 1068 | # 1069 | # http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3 1070 | # The maximum total length of a reverse-path or forward-path is 1071 | # 256 octets (including the punctuation and element separators). 1072 | # 1073 | # Thus, even without (obsolete) routing information, the Mailbox 1074 | # can only be 254 characters long. This is confirmed by this 1075 | # verified erratum to RFC 3696: 1076 | # 1077 | # http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690 1078 | # However, there is a restriction in RFC 2821 on the length of an 1079 | # address in MAIL and RCPT commands of 254 characters. Since 1080 | # addresses that do not fit in those fields are not normally 1081 | # useful, the upper limit on address lengths should normally be 1082 | # considered to be 254. 1083 | elif ( 1084 | len( 1085 | parse_data[Context.LOCALPART] + Char.AT + parse_data[Context.DOMAIN] 1086 | ) 1087 | > 254 1088 | ): 1089 | return_status.append(RFC5322Diagnosis("TOOLONG")) 1090 | # http://tools.ietf.org/html/rfc1035#section-2.3.4 1091 | # labels 63 octets or less 1092 | elif element_len > 63: 1093 | return_status.append(RFC5322Diagnosis("LABEL_TOOLONG")) 1094 | 1095 | return_status = list(set(return_status)) 1096 | final_status = max(return_status) 1097 | 1098 | if len(return_status) != 1: 1099 | # Remove redundant ValidDiagnosis 1100 | return_status.pop(0) 1101 | 1102 | parse_data["status"] = return_status 1103 | 1104 | if final_status < threshold: 1105 | final_status = ValidDiagnosis() 1106 | 1107 | if diagnose: 1108 | return final_status 1109 | else: 1110 | return final_status < BaseDiagnosis.CATEGORIES["THRESHOLD"] 1111 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelherold/pyIsEmail/9d2cb38048b4ae08f10b53ffa5e753f10b234756/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/tests.xml: -------------------------------------------------------------------------------- 1 | 2 | 29 | 30 | 31 |

New test set

32 |

This test set is designed to replace and extend the coverage of the original set but with fewer tests.

33 |

Thanks to Michael Rushton (michael@squiloople.com) for starting this work and contributing tests 1-100

34 |
35 | 36 |
37 | ISEMAIL_ERR 38 | ISEMAIL_ERR_NODOMAIN 39 | Michael Rushton 40 | http://squiloople.com/tag/email/ 41 | 42 | 43 |
test
44 | ISEMAIL_ERR 45 | ISEMAIL_ERR_NODOMAIN 46 | Michael Rushton 47 | http://squiloople.com/tag/email/ 48 |
49 | 50 |
@
51 | ISEMAIL_ERR 52 | ISEMAIL_ERR_NOLOCALPART 53 | Michael Rushton 54 | http://squiloople.com/tag/email/ 55 |
56 | 57 |
test@
58 | ISEMAIL_ERR 59 | ISEMAIL_ERR_NODOMAIN 60 | Michael Rushton 61 | http://squiloople.com/tag/email/ 62 |
63 | 64 |
test@io
65 | io. currently has an MX-record (Feb 2011). Some DNS setups seem to find it, some don't. If you don't see the MX for io. then try setting your DNS server to 8.8.8.8 (the Google DNS server) 66 | ISEMAIL_VALID_CATEGORY 67 | ISEMAIL_VALID 68 | Michael Rushton 69 | http://squiloople.com/tag/email/ 70 |
71 | 72 |
@io
73 | io. currently has an MX-record (Feb 2011) 74 | ISEMAIL_ERR 75 | ISEMAIL_ERR_NOLOCALPART 76 | Michael Rushton 77 | http://squiloople.com/tag/email/ 78 |
79 | 80 |
@iana.org
81 | ISEMAIL_ERR 82 | ISEMAIL_ERR_NOLOCALPART 83 | Michael Rushton 84 | http://squiloople.com/tag/email/ 85 |
86 | 87 |
test@iana.org
88 | ISEMAIL_VALID_CATEGORY 89 | ISEMAIL_VALID 90 | Michael Rushton 91 | http://squiloople.com/tag/email/ 92 |
93 | 94 |
test@nominet.org.uk
95 | ISEMAIL_VALID_CATEGORY 96 | ISEMAIL_VALID 97 | Michael Rushton 98 | http://squiloople.com/tag/email/ 99 |
100 | 101 |
test@about.museum
102 | ISEMAIL_VALID_CATEGORY 103 | ISEMAIL_VALID 104 | Michael Rushton 105 | http://squiloople.com/tag/email/ 106 |
107 | 108 |
a@iana.org
109 | ISEMAIL_VALID_CATEGORY 110 | ISEMAIL_VALID 111 | Michael Rushton 112 | http://squiloople.com/tag/email/ 113 |
114 | 115 |
test.test@iana.org
116 | ISEMAIL_VALID_CATEGORY 117 | ISEMAIL_VALID 118 | Michael Rushton 119 | http://squiloople.com/tag/email/ 120 |
121 | 122 |
.test@iana.org
123 | ISEMAIL_ERR 124 | ISEMAIL_ERR_DOT_START 125 | Michael Rushton 126 | http://squiloople.com/tag/email/ 127 |
128 | 129 |
test.@iana.org
130 | ISEMAIL_ERR 131 | ISEMAIL_ERR_DOT_END 132 | Michael Rushton 133 | http://squiloople.com/tag/email/ 134 |
135 | 136 |
test..iana.org
137 | ISEMAIL_ERR 138 | ISEMAIL_ERR_CONSECUTIVEDOTS 139 | Michael Rushton 140 | http://squiloople.com/tag/email/ 141 |
142 | 143 |
test_exa-mple.com
144 | ISEMAIL_ERR 145 | ISEMAIL_ERR_NODOMAIN 146 | Michael Rushton 147 | http://squiloople.com/tag/email/ 148 |
149 | 150 |
!#$%&`*+/=?^`{|}~@iana.org
151 | ISEMAIL_VALID_CATEGORY 152 | ISEMAIL_VALID 153 | Michael Rushton 154 | http://squiloople.com/tag/email/ 155 |
156 | 157 |
test\@test@iana.org
158 | ISEMAIL_ERR 159 | ISEMAIL_ERR_EXPECTING_ATEXT 160 | Michael Rushton 161 | http://squiloople.com/tag/email/ 162 |
163 | 164 |
123@iana.org
165 | ISEMAIL_VALID_CATEGORY 166 | ISEMAIL_VALID 167 | Michael Rushton 168 | http://squiloople.com/tag/email/ 169 |
170 | 171 |
test@123.com
172 | ISEMAIL_VALID_CATEGORY 173 | ISEMAIL_VALID 174 | Michael Rushton 175 | http://squiloople.com/tag/email/ 176 |
177 | 178 |
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org
179 | ISEMAIL_VALID_CATEGORY 180 | ISEMAIL_VALID 181 | Michael Rushton 182 | http://squiloople.com/tag/email/ 183 |
184 | 185 |
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklmn@iana.org
186 | ISEMAIL_RFC5322 187 | ISEMAIL_RFC5322_LOCAL_TOOLONG 188 | Michael Rushton 189 | http://squiloople.com/tag/email/ 190 |
191 | 192 |
test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm.com
193 | ISEMAIL_RFC5322 194 | ISEMAIL_RFC5322_LABEL_TOOLONG 195 | Michael Rushton 196 | http://squiloople.com/tag/email/ 197 |
198 | 199 |
test@mason-dixon.com
200 | ISEMAIL_VALID_CATEGORY 201 | ISEMAIL_VALID 202 | Michael Rushton 203 | http://squiloople.com/tag/email/ 204 |
205 | 206 |
test@-iana.org
207 | ISEMAIL_ERR 208 | ISEMAIL_ERR_DOMAINHYPHENSTART 209 | Michael Rushton 210 | http://squiloople.com/tag/email/ 211 |
212 | 213 |
test@iana-.com
214 | ISEMAIL_ERR 215 | ISEMAIL_ERR_DOMAINHYPHENEND 216 | Michael Rushton 217 | http://squiloople.com/tag/email/ 218 |
219 | 220 |
test@g--a.com
221 | ISEMAIL_VALID_CATEGORY 222 | ISEMAIL_VALID 223 | Michael Rushton 224 | http://squiloople.com/tag/email/ 225 |
226 | 227 |
test@.iana.org
228 | ISEMAIL_ERR 229 | ISEMAIL_ERR_DOT_START 230 | Michael Rushton 231 | http://squiloople.com/tag/email/ 232 |
233 | 234 |
test@iana.org.
235 | ISEMAIL_ERR 236 | ISEMAIL_ERR_DOT_END 237 | Michael Rushton 238 | http://squiloople.com/tag/email/ 239 |
240 | 241 |
test@iana..com
242 | ISEMAIL_ERR 243 | ISEMAIL_ERR_CONSECUTIVEDOTS 244 | Michael Rushton 245 | http://squiloople.com/tag/email/ 246 |
247 | 248 |
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij
249 | ISEMAIL_RFC5322 250 | ISEMAIL_RFC5322_TOOLONG 251 | Michael Rushton 252 | http://squiloople.com/tag/email/ 253 |
254 | 255 |
a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hij
256 | ISEMAIL_RFC5322 257 | ISEMAIL_RFC5322_TOOLONG 258 | Michael Rushton 259 | http://squiloople.com/tag/email/ 260 |
261 | 262 |
a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hijk
263 | ISEMAIL_RFC5322 264 | ISEMAIL_RFC5322_DOMAIN_TOOLONG 265 | Michael Rushton 266 | http://squiloople.com/tag/email/ 267 |
268 | 269 |
"test"@iana.org
270 | ISEMAIL_RFC5321 271 | ISEMAIL_RFC5321_QUOTEDSTRING 272 | Michael Rushton 273 | http://squiloople.com/tag/email/ 274 |
275 | 276 |
""@iana.org
277 | ISEMAIL_RFC5321 278 | ISEMAIL_RFC5321_QUOTEDSTRING 279 | Michael Rushton 280 | http://squiloople.com/tag/email/ 281 |
282 | 283 |
"""@iana.org
284 | ISEMAIL_ERR 285 | ISEMAIL_ERR_EXPECTING_ATEXT 286 | Michael Rushton 287 | http://squiloople.com/tag/email/ 288 |
289 | 290 |
"\a"@iana.org
291 | ISEMAIL_RFC5321 292 | ISEMAIL_RFC5321_QUOTEDSTRING 293 | Michael Rushton 294 | http://squiloople.com/tag/email/ 295 |
296 | 297 |
"\""@iana.org
298 | ISEMAIL_RFC5321 299 | ISEMAIL_RFC5321_QUOTEDSTRING 300 | Michael Rushton 301 | http://squiloople.com/tag/email/ 302 |
303 | 304 |
"\"@iana.org
305 | ISEMAIL_ERR 306 | ISEMAIL_ERR_UNCLOSEDQUOTEDSTR 307 | Michael Rushton 308 | http://squiloople.com/tag/email/ 309 |
310 | 311 |
"\\"@iana.org
312 | ISEMAIL_RFC5321 313 | ISEMAIL_RFC5321_QUOTEDSTRING 314 | Michael Rushton 315 | http://squiloople.com/tag/email/ 316 |
317 | 318 |
test"@iana.org
319 | ISEMAIL_ERR 320 | ISEMAIL_ERR_EXPECTING_ATEXT 321 | Michael Rushton 322 | http://squiloople.com/tag/email/ 323 |
324 | 325 |
"test@iana.org
326 | ISEMAIL_ERR 327 | ISEMAIL_ERR_UNCLOSEDQUOTEDSTR 328 | Michael Rushton 329 | http://squiloople.com/tag/email/ 330 |
331 | 332 |
"test"test@iana.org
333 | ISEMAIL_ERR 334 | ISEMAIL_ERR_ATEXT_AFTER_QS 335 | Michael Rushton 336 | http://squiloople.com/tag/email/ 337 |
338 | 339 |
test"text"@iana.org
340 | ISEMAIL_ERR 341 | ISEMAIL_ERR_EXPECTING_ATEXT 342 | Michael Rushton 343 | http://squiloople.com/tag/email/ 344 |
345 | 346 |
"test""test"@iana.org
347 | ISEMAIL_ERR 348 | ISEMAIL_ERR_EXPECTING_ATEXT 349 | Michael Rushton 350 | http://squiloople.com/tag/email/ 351 |
352 | 353 |
"test"."test"@iana.org
354 | ISEMAIL_DEPREC 355 | ISEMAIL_DEPREC_LOCALPART 356 | Michael Rushton 357 | http://squiloople.com/tag/email/ 358 |
359 | 360 |
"test\ test"@iana.org
361 | ISEMAIL_RFC5321 362 | ISEMAIL_RFC5321_QUOTEDSTRING 363 | Michael Rushton 364 | http://squiloople.com/tag/email/ 365 |
366 | 367 |
"test".test@iana.org
368 | ISEMAIL_DEPREC 369 | ISEMAIL_DEPREC_LOCALPART 370 | Michael Rushton 371 | http://squiloople.com/tag/email/ 372 |
373 | 374 |
"test␀"@iana.org
375 | ISEMAIL_ERR 376 | ISEMAIL_ERR_EXPECTING_QTEXT 377 | Michael Rushton 378 | http://squiloople.com/tag/email/ 379 |
380 | 381 |
"test\␀"@iana.org
382 | ISEMAIL_DEPREC 383 | ISEMAIL_DEPREC_QP 384 | Michael Rushton 385 | http://squiloople.com/tag/email/ 386 |
387 | 388 |
"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghj"@iana.org
389 | Quotes are still part of the length restriction 390 | ISEMAIL_RFC5322 391 | ISEMAIL_RFC5322_LOCAL_TOOLONG 392 | Dominic Sayers 393 | http://www.dominicsayers.com/isemail 394 |
395 | 396 |
"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefg\h"@iana.org
397 | Quoted pair is still part of the length restriction 398 | ISEMAIL_RFC5322 399 | ISEMAIL_RFC5322_LOCAL_TOOLONG 400 | Dominic Sayers 401 | http://www.dominicsayers.com/isemail 402 |
403 | 404 |
test@[255.255.255.255]
405 | ISEMAIL_RFC5321 406 | ISEMAIL_RFC5321_ADDRESSLITERAL 407 | Michael Rushton 408 | http://squiloople.com/tag/email/ 409 |
410 | 411 |
test@a[255.255.255.255]
412 | ISEMAIL_ERR 413 | ISEMAIL_ERR_EXPECTING_ATEXT 414 | Michael Rushton 415 | http://squiloople.com/tag/email/ 416 |
417 | 418 |
test@[255.255.255]
419 | ISEMAIL_RFC5322 420 | ISEMAIL_RFC5322_DOMAINLITERAL 421 | Michael Rushton 422 | http://squiloople.com/tag/email/ 423 |
424 | 425 |
test@[255.255.255.255.255]
426 | ISEMAIL_RFC5322 427 | ISEMAIL_RFC5322_DOMAINLITERAL 428 | Michael Rushton 429 | http://squiloople.com/tag/email/ 430 |
431 | 432 |
test@[255.255.255.256]
433 | ISEMAIL_RFC5322 434 | ISEMAIL_RFC5322_DOMAINLITERAL 435 | Michael Rushton 436 | http://squiloople.com/tag/email/ 437 |
438 | 439 |
test@[1111:2222:3333:4444:5555:6666:7777:8888]
440 | ISEMAIL_RFC5322 441 | ISEMAIL_RFC5322_DOMAINLITERAL 442 | Michael Rushton 443 | http://squiloople.com/tag/email/ 444 |
445 | 446 |
test@[IPv6:1111:2222:3333:4444:5555:6666:7777]
447 | ISEMAIL_RFC5322 448 | ISEMAIL_RFC5322_IPV6_GRPCOUNT 449 | Michael Rushton 450 | http://squiloople.com/tag/email/ 451 | 452 |
453 | 454 |
test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]
455 | ISEMAIL_RFC5321 456 | ISEMAIL_RFC5321_ADDRESSLITERAL 457 | Michael Rushton 458 | http://squiloople.com/tag/email/ 459 |
460 | 461 |
test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]
462 | ISEMAIL_RFC5322 463 | ISEMAIL_RFC5322_IPV6_GRPCOUNT 464 | Michael Rushton 465 | http://squiloople.com/tag/email/ 466 |
467 | 468 |
test@[IPv6:1111:2222:3333:4444:5555:6666:7777:888G]
469 | ISEMAIL_RFC5322 470 | ISEMAIL_RFC5322_IPV6_BADCHAR 471 | Michael Rushton 472 | http://squiloople.com/tag/email/ 473 |
474 | 475 |
test@[IPv6:1111:2222:3333:4444:5555:6666::8888]
476 | ISEMAIL_RFC5321 477 | ISEMAIL_RFC5321_IPV6DEPRECATED 478 | Michael Rushton 479 | http://squiloople.com/tag/email/ 480 |
481 | 482 |
test@[IPv6:1111:2222:3333:4444:5555::8888]
483 | ISEMAIL_RFC5321 484 | ISEMAIL_RFC5321_ADDRESSLITERAL 485 | Michael Rushton 486 | http://squiloople.com/tag/email/ 487 |
488 | 489 |
test@[IPv6:1111:2222:3333:4444:5555:6666::7777:8888]
490 | ISEMAIL_RFC5322 491 | ISEMAIL_RFC5322_IPV6_MAXGRPS 492 | Michael Rushton 493 | http://squiloople.com/tag/email/ 494 |
495 | 496 |
test@[IPv6::3333:4444:5555:6666:7777:8888]
497 | ISEMAIL_RFC5322 498 | ISEMAIL_RFC5322_IPV6_COLONSTRT 499 | Michael Rushton 500 | http://squiloople.com/tag/email/ 501 |
502 | 503 |
test@[IPv6:::3333:4444:5555:6666:7777:8888]
504 | ISEMAIL_RFC5321 505 | ISEMAIL_RFC5321_ADDRESSLITERAL 506 | Michael Rushton 507 | http://squiloople.com/tag/email/ 508 |
509 | 510 |
test@[IPv6:1111::4444:5555::8888]
511 | ISEMAIL_RFC5322 512 | ISEMAIL_RFC5322_IPV6_2X2XCOLON 513 | Michael Rushton 514 | http://squiloople.com/tag/email/ 515 |
516 | 517 |
test@[IPv6:::]
518 | ISEMAIL_RFC5321 519 | ISEMAIL_RFC5321_ADDRESSLITERAL 520 | Michael Rushton 521 | http://squiloople.com/tag/email/ 522 |
523 | 524 |
test@[IPv6:1111:2222:3333:4444:5555:255.255.255.255]
525 | ISEMAIL_RFC5322 526 | ISEMAIL_RFC5322_IPV6_GRPCOUNT 527 | Michael Rushton 528 | http://squiloople.com/tag/email/ 529 |
530 | 531 |
test@[IPv6:1111:2222:3333:4444:5555:6666:255.255.255.255]
532 | ISEMAIL_RFC5321 533 | ISEMAIL_RFC5321_ADDRESSLITERAL 534 | Michael Rushton 535 | http://squiloople.com/tag/email/ 536 |
537 | 538 |
test@[IPv6:1111:2222:3333:4444:5555:6666:7777:255.255.255.255]
539 | ISEMAIL_RFC5322 540 | ISEMAIL_RFC5322_IPV6_GRPCOUNT 541 | Michael Rushton 542 | http://squiloople.com/tag/email/ 543 |
544 | 545 |
test@[IPv6:1111:2222:3333:4444::255.255.255.255]
546 | ISEMAIL_RFC5321 547 | ISEMAIL_RFC5321_ADDRESSLITERAL 548 | Michael Rushton 549 | http://squiloople.com/tag/email/ 550 |
551 | 552 |
test@[IPv6:1111:2222:3333:4444:5555:6666::255.255.255.255]
553 | ISEMAIL_RFC5322 554 | ISEMAIL_RFC5322_IPV6_MAXGRPS 555 | Michael Rushton 556 | http://squiloople.com/tag/email/ 557 |
558 | 559 |
test@[IPv6:1111:2222:3333:4444:::255.255.255.255]
560 | ISEMAIL_RFC5322 561 | ISEMAIL_RFC5322_IPV6_2X2XCOLON 562 | Michael Rushton 563 | http://squiloople.com/tag/email/ 564 |
565 | 566 |
test@[IPv6::255.255.255.255]
567 | ISEMAIL_RFC5322 568 | ISEMAIL_RFC5322_IPV6_COLONSTRT 569 | Michael Rushton 570 | http://squiloople.com/tag/email/ 571 |
572 | 573 |
test @iana.org
574 | ISEMAIL_DEPREC 575 | ISEMAIL_DEPREC_CFWS_NEAR_AT 576 | Michael Rushton 577 | http://squiloople.com/tag/email/ 578 |
579 | 580 |
test@ iana .com
581 | ISEMAIL_DEPREC 582 | ISEMAIL_DEPREC_CFWS_NEAR_AT 583 | Michael Rushton 584 | http://squiloople.com/tag/email/ 585 |
586 | 587 |
test . test@iana.org
588 | ISEMAIL_DEPREC 589 | ISEMAIL_DEPREC_FWS 590 | Michael Rushton 591 | http://squiloople.com/tag/email/ 592 |
593 | 594 |
␍␊ test@iana.org
595 | FWS 596 | ISEMAIL_CFWS 597 | ISEMAIL_CFWS_FWS 598 | Michael Rushton 599 | http://squiloople.com/tag/email/ 600 |
601 | 602 |
␍␊ ␍␊ test@iana.org
603 | FWS with one line composed entirely of WSP -- only allowed as obsolete FWS (someone might allow only non-obsolete FWS) 604 | ISEMAIL_DEPREC 605 | ISEMAIL_DEPREC_FWS 606 | Michael Rushton 607 | http://squiloople.com/tag/email/ 608 |
609 | 610 |
(comment)test@iana.org
611 | ISEMAIL_CFWS 612 | ISEMAIL_CFWS_COMMENT 613 | Michael Rushton 614 | http://squiloople.com/tag/email/ 615 |
616 | 617 |
((comment)test@iana.org
618 | ISEMAIL_ERR 619 | ISEMAIL_ERR_UNCLOSEDCOMMENT 620 | Michael Rushton 621 | http://squiloople.com/tag/email/ 622 |
623 | 624 |
(comment(comment))test@iana.org
625 | ISEMAIL_CFWS 626 | ISEMAIL_CFWS_COMMENT 627 | Michael Rushton 628 | http://squiloople.com/tag/email/ 629 |
630 | 631 |
test@(comment)iana.org
632 | ISEMAIL_DEPREC 633 | ISEMAIL_DEPREC_CFWS_NEAR_AT 634 | Michael Rushton 635 | http://squiloople.com/tag/email/ 636 |
637 | 638 |
test(comment)test@iana.org
639 | ISEMAIL_ERR 640 | ISEMAIL_ERR_ATEXT_AFTER_CFWS 641 | Michael Rushton 642 | http://squiloople.com/tag/email/ 643 |
644 | 645 |
test@(comment)[255.255.255.255]
646 | ISEMAIL_DEPREC 647 | ISEMAIL_DEPREC_CFWS_NEAR_AT 648 | Michael Rushton 649 | http://squiloople.com/tag/email/ 650 |
651 | 652 |
(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org
653 | ISEMAIL_CFWS 654 | ISEMAIL_CFWS_COMMENT 655 | Michael Rushton 656 | http://squiloople.com/tag/email/ 657 |
658 | 659 |
test@(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.com
660 | ISEMAIL_DEPREC 661 | ISEMAIL_DEPREC_CFWS_NEAR_AT 662 | Michael Rushton 663 | http://squiloople.com/tag/email/ 664 |
665 | 666 |
(comment)test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstu
667 | ISEMAIL_CFWS 668 | ISEMAIL_CFWS_COMMENT 669 | Michael Rushton 670 | http://squiloople.com/tag/email/ 671 |
672 | 673 |
test@iana.org␊
674 | ISEMAIL_ERR 675 | ISEMAIL_ERR_EXPECTING_ATEXT 676 | Michael Rushton 677 | http://squiloople.com/tag/email/ 678 |
679 | 680 |
test@xn--hxajbheg2az3al.xn--jxalpdlp
681 | 682 | A valid IDN from ICANN's IDN TLD evaluation gateway. 683 | 684 | ISEMAIL_VALID_CATEGORY 685 | ISEMAIL_VALID 686 | Michael Rushton 687 | http://squiloople.com/tag/email/ 688 |
689 | 690 |
xn--test@iana.org
691 | RFC 3490: "unless the 692 | email standards are revised to invite the use of IDNA for local 693 | parts, a domain label that holds the local part of an email address 694 | SHOULD NOT begin with the ACE prefix, and even if it does, it is to 695 | be interpreted literally as a local part that happens to begin with 696 | the ACE prefix" 697 | ISEMAIL_VALID_CATEGORY 698 | ISEMAIL_VALID 699 | Dominic Sayers 700 | http://www.dominicsayers.com/isemail 701 |
702 | 703 |
test@iana.org-
704 | ISEMAIL_ERR 705 | ISEMAIL_ERR_DOMAINHYPHENEND 706 | Dominic Sayers 707 | http://www.dominicsayers.com/isemail 708 |
709 | 710 |
"test@iana.org
711 | ISEMAIL_ERR 712 | ISEMAIL_ERR_UNCLOSEDQUOTEDSTR 713 | Dominic Sayers 714 | http://www.dominicsayers.com/isemail 715 |
716 | 717 |
(test@iana.org
718 | ISEMAIL_ERR 719 | ISEMAIL_ERR_UNCLOSEDCOMMENT 720 | Dominic Sayers 721 | http://www.dominicsayers.com/isemail 722 |
723 | 724 |
test@(iana.org
725 | ISEMAIL_ERR 726 | ISEMAIL_ERR_UNCLOSEDCOMMENT 727 | Dominic Sayers 728 | http://www.dominicsayers.com/isemail 729 |
730 | 731 |
test@[1.2.3.4
732 | ISEMAIL_ERR 733 | ISEMAIL_ERR_UNCLOSEDDOMLIT 734 | Dominic Sayers 735 | http://www.dominicsayers.com/isemail 736 |
737 | 738 |
"test\"@iana.org
739 | ISEMAIL_ERR 740 | ISEMAIL_ERR_UNCLOSEDQUOTEDSTR 741 | Dominic Sayers 742 | http://www.dominicsayers.com/isemail 743 |
744 | 745 |
(comment\)test@iana.org
746 | ISEMAIL_ERR 747 | ISEMAIL_ERR_UNCLOSEDCOMMENT 748 | Dominic Sayers 749 | http://www.dominicsayers.com/isemail 750 |
751 | 752 |
test@iana.org(comment\)
753 | ISEMAIL_ERR 754 | ISEMAIL_ERR_UNCLOSEDCOMMENT 755 | Dominic Sayers 756 | http://www.dominicsayers.com/isemail 757 |
758 | 759 |
test@iana.org(comment\
760 | ISEMAIL_ERR 761 | ISEMAIL_ERR_BACKSLASHEND 762 | Dominic Sayers 763 | http://www.dominicsayers.com/isemail 764 |
765 | 766 |
test@[RFC-5322-domain-literal]
767 | ISEMAIL_RFC5322 768 | ISEMAIL_RFC5322_DOMAINLITERAL 769 | Dominic Sayers 770 | http://www.dominicsayers.com/isemail 771 |
772 | 773 |
test@[RFC-5322]-domain-literal]
774 | ISEMAIL_ERR 775 | ISEMAIL_ERR_ATEXT_AFTER_DOMLIT 776 | Dominic Sayers 777 | http://www.dominicsayers.com/isemail 778 |
779 | 780 |
test@[RFC-5322-[domain-literal]
781 | ISEMAIL_ERR 782 | ISEMAIL_ERR_EXPECTING_DTEXT 783 | Dominic Sayers 784 | http://www.dominicsayers.com/isemail 785 |
786 | 787 |
test@[RFC-5322-\␇-domain-literal]
788 | obs-dtext and obs-qp 789 | ISEMAIL_RFC5322 790 | ISEMAIL_RFC5322_DOMLIT_OBSDTEXT 791 | Dominic Sayers 792 | http://www.dominicsayers.com/isemail 793 |
794 | 795 |
test@[RFC-5322-\␉-domain-literal]
796 | ISEMAIL_RFC5322 797 | ISEMAIL_RFC5322_DOMLIT_OBSDTEXT 798 | Dominic Sayers 799 | http://www.dominicsayers.com/isemail 800 |
801 | 802 |
test@[RFC-5322-\]-domain-literal]
803 | ISEMAIL_RFC5322 804 | ISEMAIL_RFC5322_DOMLIT_OBSDTEXT 805 | Dominic Sayers 806 | http://www.dominicsayers.com/isemail 807 |
808 | 809 |
test@[RFC-5322-domain-literal\]
810 | ISEMAIL_ERR 811 | ISEMAIL_ERR_UNCLOSEDDOMLIT 812 | Dominic Sayers 813 | http://www.dominicsayers.com/isemail 814 |
815 | 816 |
test@[RFC-5322-domain-literal\
817 | ISEMAIL_ERR 818 | ISEMAIL_ERR_BACKSLASHEND 819 | Dominic Sayers 820 | http://www.dominicsayers.com/isemail 821 |
822 | 823 |
test@[RFC 5322 domain literal]
824 | Spaces are FWS in a domain literal 825 | ISEMAIL_RFC5322 826 | ISEMAIL_RFC5322_DOMAINLITERAL 827 | Dominic Sayers 828 | http://www.dominicsayers.com/isemail 829 |
830 | 831 |
test@[RFC-5322-domain-literal] (comment)
832 | ISEMAIL_RFC5322 833 | ISEMAIL_RFC5322_DOMAINLITERAL 834 | Dominic Sayers 835 | http://www.dominicsayers.com/isemail 836 |
837 | 838 |
@iana.org
839 | ISEMAIL_ERR 840 | ISEMAIL_ERR_EXPECTING_ATEXT 841 | Dominic Sayers 842 | http://www.dominicsayers.com/isemail 843 |
844 | 845 |
test@.org
846 | ISEMAIL_ERR 847 | ISEMAIL_ERR_EXPECTING_ATEXT 848 | Dominic Sayers 849 | http://www.dominicsayers.com/isemail 850 |
851 | 852 |
""@iana.org
853 | ISEMAIL_DEPREC 854 | ISEMAIL_DEPREC_QTEXT 855 | Dominic Sayers 856 | http://www.dominicsayers.com/isemail 857 |
858 | 859 |
"\"@iana.org
860 | ISEMAIL_DEPREC 861 | ISEMAIL_DEPREC_QP 862 | Dominic Sayers 863 | http://www.dominicsayers.com/isemail 864 |
865 | 866 |
()test@iana.org
867 | ISEMAIL_DEPREC 868 | ISEMAIL_DEPREC_CTEXT 869 | Dominic Sayers 870 | http://www.dominicsayers.com/isemail 871 |
872 | 873 |
test@iana.org␍
874 | No LF after the CR 875 | ISEMAIL_ERR 876 | ISEMAIL_ERR_CR_NO_LF 877 | Dominic Sayers 878 | http://www.dominicsayers.com/isemail 879 |
880 | 881 |
␍test@iana.org
882 | No LF after the CR 883 | ISEMAIL_ERR 884 | ISEMAIL_ERR_CR_NO_LF 885 | Dominic Sayers 886 | http://www.dominicsayers.com/isemail 887 |
888 | 889 |
"␍test"@iana.org
890 | No LF after the CR 891 | ISEMAIL_ERR 892 | ISEMAIL_ERR_CR_NO_LF 893 | Dominic Sayers 894 | http://www.dominicsayers.com/isemail 895 |
896 | 897 |
(␍)test@iana.org
898 | No LF after the CR 899 | ISEMAIL_ERR 900 | ISEMAIL_ERR_CR_NO_LF 901 | Dominic Sayers 902 | http://www.dominicsayers.com/isemail 903 |
904 | 905 |
test@iana.org(␍)
906 | No LF after the CR 907 | ISEMAIL_ERR 908 | ISEMAIL_ERR_CR_NO_LF 909 | Dominic Sayers 910 | http://www.dominicsayers.com/isemail 911 |
912 | 913 |
␊test@iana.org
914 | ISEMAIL_ERR 915 | ISEMAIL_ERR_EXPECTING_ATEXT 916 | Michael Rushton 917 | http://squiloople.com/tag/email/ 918 |
919 | 920 |
"␊"@iana.org
921 | ISEMAIL_ERR 922 | ISEMAIL_ERR_EXPECTING_QTEXT 923 | Dominic Sayers 924 | http://www.dominicsayers.com/isemail 925 |
926 | 927 |
"\␊"@iana.org
928 | ISEMAIL_DEPREC 929 | ISEMAIL_DEPREC_QP 930 | Dominic Sayers 931 | http://www.dominicsayers.com/isemail 932 |
933 | 934 |
(␊)test@iana.org
935 | ISEMAIL_ERR 936 | ISEMAIL_ERR_EXPECTING_CTEXT 937 | Dominic Sayers 938 | http://www.dominicsayers.com/isemail 939 |
940 | 941 |
␇@iana.org
942 | ISEMAIL_ERR 943 | ISEMAIL_ERR_EXPECTING_ATEXT 944 | Dominic Sayers 945 | http://www.dominicsayers.com/isemail 946 |
947 | 948 |
test@␇.org
949 | ISEMAIL_ERR 950 | ISEMAIL_ERR_EXPECTING_ATEXT 951 | Dominic Sayers 952 | http://www.dominicsayers.com/isemail 953 |
954 | 955 |
"␇"@iana.org
956 | ISEMAIL_DEPREC 957 | ISEMAIL_DEPREC_QTEXT 958 | Dominic Sayers 959 | http://www.dominicsayers.com/isemail 960 |
961 | 962 |
"\␇"@iana.org
963 | ISEMAIL_DEPREC 964 | ISEMAIL_DEPREC_QP 965 | Dominic Sayers 966 | http://www.dominicsayers.com/isemail 967 |
968 | 969 |
(␇)test@iana.org
970 | ISEMAIL_DEPREC 971 | ISEMAIL_DEPREC_CTEXT 972 | Dominic Sayers 973 | http://www.dominicsayers.com/isemail 974 |
975 | 976 |
␍␊test@iana.org
977 | Not FWS because no actual white space 978 | ISEMAIL_ERR 979 | ISEMAIL_ERR_FWS_CRLF_END 980 | Dominic Sayers 981 | http://www.dominicsayers.com/isemail 982 |
983 | 984 |
␍␊ ␍␊test@iana.org
985 | Not obs-FWS because there must be white space on each "fold" 986 | ISEMAIL_ERR 987 | ISEMAIL_ERR_FWS_CRLF_END 988 | Dominic Sayers 989 | http://www.dominicsayers.com/isemail 990 |
991 | 992 |
␍␊test@iana.org
993 | Not FWS because no white space after the fold 994 | ISEMAIL_ERR 995 | ISEMAIL_ERR_FWS_CRLF_END 996 | Dominic Sayers 997 | http://www.dominicsayers.com/isemail 998 |
999 | 1000 |
␍␊ test@iana.org
1001 | FWS 1002 | ISEMAIL_CFWS 1003 | ISEMAIL_CFWS_FWS 1004 | Dominic Sayers 1005 | http://www.dominicsayers.com/isemail 1006 |
1007 | 1008 |
␍␊ ␍␊test@iana.org
1009 | Not FWS because no white space after the second fold 1010 | ISEMAIL_ERR 1011 | ISEMAIL_ERR_FWS_CRLF_END 1012 | Dominic Sayers 1013 | http://www.dominicsayers.com/isemail 1014 |
1015 | 1016 |
␍␊␍␊test@iana.org
1017 | Not FWS because no white space after either fold 1018 | ISEMAIL_ERR 1019 | ISEMAIL_ERR_FWS_CRLF_X2 1020 | Dominic Sayers 1021 | http://www.dominicsayers.com/isemail 1022 |
1023 | 1024 |
␍␊␍␊ test@iana.org
1025 | Not FWS because no white space after the first fold 1026 | ISEMAIL_ERR 1027 | ISEMAIL_ERR_FWS_CRLF_X2 1028 | Dominic Sayers 1029 | http://www.dominicsayers.com/isemail 1030 |
1031 | 1032 |
test@iana.org␍␊
1033 | FWS 1034 | ISEMAIL_CFWS 1035 | ISEMAIL_CFWS_FWS 1036 | Dominic Sayers 1037 | http://www.dominicsayers.com/isemail 1038 |
1039 | 1040 |
test@iana.org␍␊ ␍␊
1041 | FWS with one line composed entirely of WSP -- only allowed as obsolete FWS (someone might allow only non-obsolete FWS) 1042 | ISEMAIL_DEPREC 1043 | ISEMAIL_DEPREC_FWS 1044 | Dominic Sayers 1045 | http://www.dominicsayers.com/isemail 1046 |
1047 | 1048 |
test@iana.org␍␊
1049 | Not FWS because no actual white space 1050 | ISEMAIL_ERR 1051 | ISEMAIL_ERR_FWS_CRLF_END 1052 | Dominic Sayers 1053 | http://www.dominicsayers.com/isemail 1054 |
1055 | 1056 |
test@iana.org␍␊ ␍␊
1057 | Not obs-FWS because there must be white space on each "fold" 1058 | ISEMAIL_ERR 1059 | ISEMAIL_ERR_FWS_CRLF_END 1060 | Dominic Sayers 1061 | http://www.dominicsayers.com/isemail 1062 |
1063 | 1064 |
test@iana.org ␍␊
1065 | Not FWS because no white space after the fold 1066 | ISEMAIL_ERR 1067 | ISEMAIL_ERR_FWS_CRLF_END 1068 | Dominic Sayers 1069 | http://www.dominicsayers.com/isemail 1070 |
1071 | 1072 |
test@iana.org ␍␊
1073 | FWS 1074 | ISEMAIL_CFWS 1075 | ISEMAIL_CFWS_FWS 1076 | Dominic Sayers 1077 | http://www.dominicsayers.com/isemail 1078 |
1079 | 1080 |
test@iana.org ␍␊ ␍␊
1081 | Not FWS because no white space after the second fold 1082 | ISEMAIL_ERR 1083 | ISEMAIL_ERR_FWS_CRLF_END 1084 | Dominic Sayers 1085 | http://www.dominicsayers.com/isemail 1086 |
1087 | 1088 |
test@iana.org ␍␊␍␊
1089 | Not FWS because no white space after either fold 1090 | ISEMAIL_ERR 1091 | ISEMAIL_ERR_FWS_CRLF_X2 1092 | Dominic Sayers 1093 | http://www.dominicsayers.com/isemail 1094 |
1095 | 1096 |
test@iana.org ␍␊␍␊
1097 | Not FWS because no white space after the first fold 1098 | ISEMAIL_ERR 1099 | ISEMAIL_ERR_FWS_CRLF_X2 1100 | Dominic Sayers 1101 | http://www.dominicsayers.com/isemail 1102 |
1103 | 1104 |
test@iana.org
1105 | ISEMAIL_CFWS 1106 | ISEMAIL_CFWS_FWS 1107 | Dominic Sayers 1108 | http://www.dominicsayers.com/isemail 1109 |
1110 | 1111 |
test@iana.org
1112 | ISEMAIL_CFWS 1113 | ISEMAIL_CFWS_FWS 1114 | Dominic Sayers 1115 | http://www.dominicsayers.com/isemail 1116 |
1117 | 1118 |
test@[IPv6:1::2:]
1119 | ISEMAIL_RFC5322 1120 | ISEMAIL_RFC5322_IPV6_COLONEND 1121 | Dominic Sayers 1122 | http://www.dominicsayers.com/isemail 1123 |
1124 | 1125 |
"test\©"@iana.org
1126 | ISEMAIL_ERR 1127 | ISEMAIL_ERR_EXPECTING_QPAIR 1128 | Dominic Sayers 1129 | http://www.dominicsayers.com/isemail 1130 |
1131 | 1132 |
test@iana/icann.org
1133 | ISEMAIL_RFC5322 1134 | ISEMAIL_RFC5322_DOMAIN 1135 | Dominic Sayers 1136 | http://www.dominicsayers.com/isemail 1137 |
1138 | 1139 |
test.(comment)test@iana.org
1140 | ISEMAIL_DEPREC 1141 | ISEMAIL_DEPREC_COMMENT 1142 | Dominic Sayers 1143 | http://www.dominicsayers.com/isemail 1144 |
1145 | 1146 | -------------------------------------------------------------------------------- /tests/diagnosis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelherold/pyIsEmail/9d2cb38048b4ae08f10b53ffa5e753f10b234756/tests/diagnosis/__init__.py -------------------------------------------------------------------------------- /tests/diagnosis/test_base_diagnosis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyisemail.diagnosis import BaseDiagnosis 4 | 5 | 6 | def test_diagnosis_less_than(): 7 | d1 = BaseDiagnosis("test") 8 | d1.code = 1 9 | d2 = BaseDiagnosis("test") 10 | d2.code = 2 11 | 12 | assert d1 < d2 13 | assert d1 < 3 14 | 15 | 16 | def test_diagnosis_greater_than(): 17 | d1 = BaseDiagnosis("test") 18 | d1.code = 1 19 | d2 = BaseDiagnosis("test") 20 | d2.code = 2 21 | 22 | assert d2 > d1 23 | assert 3 > d1 24 | 25 | 26 | def test_diagnosis_equal_to(): 27 | d1 = BaseDiagnosis("test") 28 | d1.code = 1 29 | d2 = BaseDiagnosis("test") 30 | d2.code = 1 31 | 32 | assert d1 == d2 33 | 34 | 35 | def test_diagnosis_hash(): 36 | d1 = BaseDiagnosis("test") 37 | d2 = BaseDiagnosis("test") 38 | 39 | assert hash(d1) == hash(d2) 40 | -------------------------------------------------------------------------------- /tests/test_email_validator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyisemail import EmailValidator 4 | 5 | 6 | def test_abstract_is_email(): 7 | v = EmailValidator() 8 | 9 | with pytest.raises(NotImplementedError): 10 | v.is_email("test@example.com") 11 | -------------------------------------------------------------------------------- /tests/test_is_email.py: -------------------------------------------------------------------------------- 1 | import dns.resolver 2 | import pytest 3 | 4 | from pyisemail import is_email 5 | from pyisemail.diagnosis import ( 6 | BaseDiagnosis, 7 | DNSDiagnosis, 8 | GTLDDiagnosis, 9 | ValidDiagnosis, 10 | ) 11 | from tests.validators import create_diagnosis, get_scenarios 12 | 13 | scenarios = get_scenarios("tests.xml") 14 | threshold = BaseDiagnosis.CATEGORIES["THRESHOLD"] 15 | 16 | 17 | def side_effect(*_): 18 | raise dns.resolver.NoAnswer 19 | 20 | 21 | @pytest.mark.parametrize("test_id,address,diagnosis", scenarios) 22 | def test_without_diagnosis(test_id, address, diagnosis): 23 | result = is_email(address) 24 | expected = create_diagnosis(diagnosis) < threshold 25 | 26 | assert result == expected, "%s (%s): Got %s, but expected %s." % ( 27 | test_id, 28 | address, 29 | result, 30 | expected, 31 | ) 32 | 33 | 34 | @pytest.mark.parametrize("test_id,address,diagnosis", scenarios) 35 | def test_with_diagnosis(test_id, address, diagnosis): 36 | result = is_email(address, diagnose=True) 37 | expected = create_diagnosis(diagnosis) 38 | 39 | assert result == expected, "%s (%s): Got %s, but expected %s." % ( 40 | test_id, 41 | address, 42 | result, 43 | expected, 44 | ) 45 | 46 | 47 | def test_dns_without_diagnosis(monkeypatch): 48 | monkeypatch.setattr(dns.resolver, "resolve", side_effect) 49 | 50 | result = is_email("test@example.com", check_dns=True) 51 | expected = False 52 | 53 | assert result == expected 54 | 55 | 56 | def test_dns_with_diagnosis(monkeypatch): 57 | monkeypatch.setattr(dns.resolver, "resolve", side_effect) 58 | 59 | result = is_email("test@example.com", check_dns=True, diagnose=True) 60 | expected = DNSDiagnosis("NO_RECORD") 61 | 62 | assert result == expected 63 | 64 | 65 | def test_gtld_with_diagnosis(): 66 | assert is_email("a@b") == True 67 | assert is_email("a@b", allow_gtld=False) == False 68 | 69 | 70 | def test_gtld_without_diagnosis(): 71 | assert is_email("a@b", diagnose=True) == ValidDiagnosis() 72 | assert is_email("a@b", allow_gtld=False, diagnose=True) == GTLDDiagnosis("GTLD") 73 | -------------------------------------------------------------------------------- /tests/test_reference.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyisemail import Reference 4 | 5 | 6 | def test_reference_repr(): 7 | r = Reference("local-part") 8 | 9 | result = repr(r) 10 | expected = "%s (%r)" % (r.__class__, r.__dict__) 11 | 12 | assert result == expected 13 | 14 | 15 | def test_reference_str(): 16 | r = Reference("local-part") 17 | 18 | result = str(r) 19 | expected = "%s <%s>" % (r.citation, r.link) 20 | 21 | assert result == expected 22 | -------------------------------------------------------------------------------- /tests/validators/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import xml.etree.ElementTree as ET 4 | 5 | from pyisemail.diagnosis import ( 6 | CFWSDiagnosis, 7 | DeprecatedDiagnosis, 8 | DNSDiagnosis, 9 | InvalidDiagnosis, 10 | RFC5321Diagnosis, 11 | RFC5322Diagnosis, 12 | ValidDiagnosis, 13 | ) 14 | 15 | __all__ = ["create_diagnosis", "get_scenarios"] 16 | 17 | 18 | def create_diagnosis(tag): 19 | 20 | """Create a Diagnosis for a given tag. 21 | 22 | Keyword arguments: 23 | tag --- the tag string to create a Diagnosis for 24 | 25 | """ 26 | 27 | split_tag = tag.split("_") 28 | d_class = _get_diagnosis_class(split_tag[1]) 29 | diagnosis_type = "_".join(split_tag[2:]) 30 | if diagnosis_type == "" and d_class == ValidDiagnosis: 31 | diagnosis_type = "VALID" 32 | 33 | return d_class(diagnosis_type) 34 | 35 | 36 | def get_scenarios(filename, flaky=False): 37 | 38 | """Parse the given test file and return the scenarios list. 39 | 40 | Keyword arguments: 41 | filename --- the name of the test XML file to parse 42 | flaky --- flag to include or exclude only flaky tests 43 | 44 | """ 45 | 46 | document = ET.parse("%s/../data/%s" % (os.path.dirname(__file__), filename)) 47 | root = document.getroot() 48 | 49 | scenarios = [] 50 | 51 | for test in root.iter("test"): 52 | test_id = str(test.attrib["id"]) 53 | address = _get_node_text(test.find("address").text) 54 | diagnosis = _get_node_text(test.find("diagnosis").text) 55 | try: 56 | flaky_test = _get_node_text(test.find("flaky").text) == "True" 57 | except AttributeError: 58 | flaky_test = False 59 | 60 | if flaky_test is flaky: 61 | scenario = (test_id, address, diagnosis) 62 | scenarios.append(scenario) 63 | 64 | return scenarios 65 | 66 | 67 | def _get_node_text(text): 68 | 69 | """Cast text to a unicode string to handle unicode characters. 70 | 71 | Keyword arguments: 72 | text --- the string to cast to unicode 73 | 74 | """ 75 | 76 | if text: 77 | return str(text) 78 | else: 79 | return "" 80 | 81 | 82 | def _get_diagnosis_class(tag): 83 | 84 | """Get class of the Diagnosis to use for a given tag. 85 | 86 | Keyword arguments: 87 | tag --- the tag string to look up 88 | 89 | """ 90 | 91 | if tag == "ERR": 92 | d_class = InvalidDiagnosis 93 | elif tag == "DNSWARN": 94 | d_class = DNSDiagnosis 95 | elif tag == "VALID": 96 | d_class = ValidDiagnosis 97 | elif tag == "RFC5321": 98 | d_class = RFC5321Diagnosis 99 | elif tag == "VALID": 100 | d_class = ValidDiagnosis 101 | elif tag == "RFC5321": 102 | d_class = RFC5321Diagnosis 103 | elif tag == "RFC5322": 104 | d_class = RFC5322Diagnosis 105 | elif tag == "CFWS": 106 | d_class = CFWSDiagnosis 107 | elif tag == "DEPREC": 108 | d_class = DeprecatedDiagnosis 109 | else: 110 | d_class = "" 111 | 112 | return d_class 113 | -------------------------------------------------------------------------------- /tests/validators/test_dns_validator.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import dns.name 4 | import dns.resolver 5 | import pytest 6 | 7 | from pyisemail.diagnosis import DNSDiagnosis, RFC5321Diagnosis, ValidDiagnosis 8 | from pyisemail.validators import DNSValidator 9 | 10 | is_valid = DNSValidator().is_valid 11 | 12 | message_text_null_mx = """id 1234 13 | opcode QUERY 14 | rcode NOERROR 15 | flags QR AA RD 16 | ;QUESTION 17 | example.com. IN MX 18 | ;ANSWER 19 | example.com. 86400 IN MX 0 . 20 | ;AUTHORITY 21 | ;ADDITIONAL 22 | """ 23 | 24 | message_text_zero_preference = """id 1234 25 | opcode QUERY 26 | rcode NOERROR 27 | flags QR AA RD 28 | ;QUESTION 29 | example.com. IN MX 30 | ;ANSWER 31 | example.com. 86400 IN MX 0 mail.example.com. 32 | ;AUTHORITY 33 | ;ADDITIONAL 34 | """ 35 | 36 | 37 | class FakeAnswer(object): 38 | def __init__(self, expiration): 39 | self.expiration = expiration 40 | 41 | def __len__(self): 42 | return 2 43 | 44 | 45 | def null_mx_record(*_): 46 | message = dns.message.from_text(message_text_null_mx) 47 | name = dns.name.from_text("example.com.") 48 | 49 | return dns.resolver.Answer(name, dns.rdatatype.MX, dns.rdataclass.IN, message) 50 | 51 | 52 | def zero_preference_mx_record(*_): 53 | message = dns.message.from_text(message_text_zero_preference) 54 | name = dns.name.from_text("example.com.") 55 | 56 | return dns.resolver.Answer(name, dns.rdatatype.MX, dns.rdataclass.IN, message) 57 | 58 | 59 | def no_side_effect(*_): 60 | return FakeAnswer(time.time() + 1) 61 | 62 | 63 | def nx_domain_side_effect(*_): 64 | raise dns.resolver.NXDOMAIN 65 | 66 | 67 | def too_long_side_effect(*_): 68 | raise dns.name.NameTooLong 69 | 70 | 71 | def no_record_side_effect(*_): 72 | raise dns.resolver.NoAnswer 73 | 74 | 75 | def no_ns_side_effect(*_): 76 | raise dns.resolver.NoNameservers 77 | 78 | 79 | def timeout_side_effect(*_): 80 | raise dns.resolver.Timeout 81 | 82 | 83 | def test_working_mx_record_without_diagnosis(monkeypatch): 84 | monkeypatch.setattr(dns.resolver, "resolve", no_side_effect) 85 | 86 | assert is_valid("example.com") 87 | 88 | 89 | def test_working_mx_record_with_diagnosis(monkeypatch): 90 | monkeypatch.setattr(dns.resolver, "resolve", no_side_effect) 91 | 92 | assert is_valid("example.com", diagnose=True) == ValidDiagnosis() 93 | 94 | 95 | def test_non_existant_mx_record_without_diagnosis(monkeypatch): 96 | monkeypatch.setattr(dns.resolver, "resolve", nx_domain_side_effect) 97 | 98 | assert not is_valid("example.com") 99 | 100 | 101 | def test_non_existant_mx_record_with_diagnosis(monkeypatch): 102 | monkeypatch.setattr(dns.resolver, "resolve", nx_domain_side_effect) 103 | 104 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("NO_RECORD") 105 | 106 | 107 | def test_domain_too_long_without_diagnosis(monkeypatch): 108 | monkeypatch.setattr(dns.resolver, "resolve", too_long_side_effect) 109 | 110 | assert not is_valid("example.com") 111 | 112 | 113 | def test_domain_too_long_with_diagnosis(monkeypatch): 114 | monkeypatch.setattr(dns.resolver, "resolve", too_long_side_effect) 115 | 116 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("NO_RECORD") 117 | 118 | 119 | def test_no_record_without_diagnosis(monkeypatch): 120 | monkeypatch.setattr(dns.resolver, "resolve", too_long_side_effect) 121 | 122 | assert not is_valid("example.com") 123 | 124 | 125 | def test_no_record_with_diagnosis(monkeypatch): 126 | monkeypatch.setattr(dns.resolver, "resolve", too_long_side_effect) 127 | 128 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("NO_RECORD") 129 | 130 | 131 | def test_no_mx_on_tld_without_diagnosis(monkeypatch): 132 | monkeypatch.setattr(dns.resolver, "resolve", nx_domain_side_effect) 133 | 134 | assert not is_valid("com") 135 | 136 | 137 | def test_no_mx_on_tld_with_diagnosis(monkeypatch): 138 | monkeypatch.setattr(dns.resolver, "resolve", nx_domain_side_effect) 139 | 140 | assert is_valid("com", diagnose=True) == DNSDiagnosis("NO_RECORD") 141 | 142 | 143 | def test_no_records_on_tld_without_diagnosis(monkeypatch): 144 | monkeypatch.setattr(dns.resolver, "resolve", no_record_side_effect) 145 | 146 | assert not is_valid("com") 147 | 148 | 149 | def test_no_records_on_tld_with_diagnosis(monkeypatch): 150 | monkeypatch.setattr(dns.resolver, "resolve", no_record_side_effect) 151 | 152 | assert is_valid("com", diagnose=True) == RFC5321Diagnosis("TLD") 153 | 154 | 155 | def test_no_records_on_numeric_tld_without_diagnosis(monkeypatch): 156 | monkeypatch.setattr(dns.resolver, "resolve", no_record_side_effect) 157 | 158 | assert not is_valid("iana.123") 159 | 160 | 161 | def test_no_records_on_numeric_tld_with_diagnosis(monkeypatch): 162 | monkeypatch.setattr(dns.resolver, "resolve", no_record_side_effect) 163 | 164 | assert is_valid("iana.123", diagnose=True) == RFC5321Diagnosis("TLDNUMERIC") 165 | 166 | 167 | def test_no_nameservers_respond_without_diagnosis(monkeypatch): 168 | monkeypatch.setattr(dns.resolver, "resolve", no_ns_side_effect) 169 | 170 | assert not is_valid("example.com") 171 | 172 | 173 | def test_no_nameservers_respond_with_diagnosis(monkeypatch): 174 | monkeypatch.setattr(dns.resolver, "resolve", no_ns_side_effect) 175 | 176 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("NO_NAMESERVERS") 177 | 178 | 179 | def test_dns_timeout_without_diagnosis(monkeypatch): 180 | monkeypatch.setattr(dns.resolver, "resolve", timeout_side_effect) 181 | 182 | assert not is_valid("example.com") 183 | 184 | 185 | def test_dns_timeout_with_diagnosis(monkeypatch): 186 | monkeypatch.setattr(dns.resolver, "resolve", timeout_side_effect) 187 | 188 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("DNS_TIMEDOUT") 189 | 190 | 191 | def test_null_mx_record_without_diagnosis(monkeypatch): 192 | monkeypatch.setattr(dns.resolver, "resolve", null_mx_record) 193 | 194 | assert not is_valid("example.com") 195 | 196 | 197 | def test_null_mx_record_with_diagnosis(monkeypatch): 198 | monkeypatch.setattr(dns.resolver, "resolve", null_mx_record) 199 | 200 | assert is_valid("example.com", diagnose=True) == DNSDiagnosis("NULL_MX_RECORD") 201 | 202 | 203 | def test_zero_preference_mx_record_without_diagnosis(monkeypatch): 204 | monkeypatch.setattr(dns.resolver, "resolve", zero_preference_mx_record) 205 | 206 | assert is_valid("example.com") 207 | 208 | 209 | def test_zero_preference_mx_record_with_diagnosis(monkeypatch): 210 | monkeypatch.setattr(dns.resolver, "resolve", zero_preference_mx_record) 211 | 212 | assert is_valid("example.com", diagnose=True) == ValidDiagnosis() 213 | -------------------------------------------------------------------------------- /tests/validators/test_parser_validator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyisemail.diagnosis import BaseDiagnosis 4 | from pyisemail.validators import ParserValidator 5 | from tests.validators import create_diagnosis, get_scenarios 6 | 7 | scenarios = get_scenarios("tests.xml") 8 | threshold = BaseDiagnosis.CATEGORIES["THRESHOLD"] 9 | 10 | 11 | @pytest.mark.parametrize("test_id,address,diagnosis", scenarios) 12 | def test_without_diagnosis(test_id, address, diagnosis): 13 | 14 | v = ParserValidator() 15 | 16 | result = v.is_email(address) 17 | expected = create_diagnosis(diagnosis) < threshold 18 | 19 | assert result == expected, "%s (%s): Got %s, but expected %s." % ( 20 | test_id, 21 | address, 22 | result, 23 | expected, 24 | ) 25 | 26 | 27 | @pytest.mark.parametrize("test_id,address,diagnosis", scenarios) 28 | def test_with_diagnosis(test_id, address, diagnosis): 29 | 30 | v = ParserValidator() 31 | 32 | result = v.is_email(address, True) 33 | expected = create_diagnosis(diagnosis) 34 | 35 | assert result == expected, "%s (%s): Got %s, but expected %s." % ( 36 | test_id, 37 | address, 38 | result, 39 | expected, 40 | ) 41 | --------------------------------------------------------------------------------