├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github └── workflows │ ├── nightly.yml │ └── on-push.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── Setup.lhs ├── So you want to validate an email address.md ├── Syntax.md ├── email-validate.cabal ├── src └── Text │ └── Email │ ├── Parser.hs │ ├── QuasiQuotation.hs │ └── Validate.hs ├── stack.yaml ├── stack.yaml.lock └── tests └── Main.hs /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.187.0/containers/ubuntu/.devcontainer/base.Dockerfile 2 | 3 | # [Choice] Ubuntu version: bionic, focal 4 | ARG VARIANT="focal" 5 | FROM mcr.microsoft.com/vscode/devcontainers/base:0-${VARIANT} 6 | 7 | RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 8 | && apt-get -y install --no-install-recommends \ 9 | build-essential curl libffi-dev libffi7 libgmp-dev libgmp10 libncurses-dev libncurses5 libtinfo5 \ 10 | gcc libc6-dev make xz-utils zlib1g-dev libtinfo-dev 11 | 12 | USER vscode 13 | 14 | RUN curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | \ 15 | BOOTSTRAP_HASKELL_MINIMAL=1 \ 16 | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 \ 17 | BOOTSTRAP_HASKELL_VERBOSE=1 \ 18 | BOOTSTRAP_HASKELL_ADJUST_BASHRC=1 \ 19 | GHCUP_PROFILE_FILE=/home/vscode/.bashrc \ 20 | MY_SHELL=bash \ 21 | sh 22 | 23 | USER root 24 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.187.0/containers/ubuntu 3 | { 4 | "name": "Ubuntu", 5 | "build": { 6 | "dockerfile": "Dockerfile", 7 | // Update 'VARIANT' to pick an Ubuntu version: focal, bionic 8 | "args": { "VARIANT": "focal" } 9 | }, 10 | 11 | "settings": {}, 12 | 13 | "extensions": [ 14 | "haskell.haskell" 15 | ], 16 | 17 | "onCreateCommand": "ghcup install hls && ghcup install stack", 18 | "updateContentCommand": "stack build --test --only-dependencies", 19 | 20 | "remoteUser": "vscode" 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | name: Run tests on Stackage nightly 2 | 3 | on: 4 | workflow_dispatch: 5 | # allow manual trigger 6 | 7 | schedule: 8 | - cron: '33 3 * * *' 9 | # run daily 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | resolver: 17 | - 'lts' 18 | - 'nightly' 19 | 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Cache stack dir 26 | uses: actions/cache@v4 27 | env: 28 | cache-name: cache-stack-dir 29 | with: 30 | path: ~/.stack 31 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ matrix.resolver }}-${{ hashFiles('*.cabal') }} 32 | restore-keys: | 33 | ${{ runner.os }}-build-${{ env.cache-name }}-${{ matrix.resolver }}- 34 | 35 | - name: Build & Test 36 | run: stack test --haddock --no-terminal --resolver ${{ matrix.resolver }} 37 | -------------------------------------------------------------------------------- /.github/workflows/on-push.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | cabal: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Cache Cabal dir 11 | uses: actions/cache@v4 12 | with: 13 | path: ~/.cabal 14 | key: ${{ runner.os }}-cabal-${{ hashFiles('*.cabal') }} 15 | restore-keys: ${{ runner.os }}-cabal- 16 | 17 | - run: cabal update 18 | - run: cabal test 19 | - run: cabal install doctest 20 | - run: PATH=$HOME/.cabal/bin:$PATH cabal repl --with-ghc=doctest --ghc-options="-XQuasiQuotes -XOverloadedStrings" 21 | 22 | stackage: 23 | runs-on: ubuntu-latest 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | resolver: 28 | - 'lts-12' 29 | - 'lts-13' 30 | - 'lts-14' 31 | - 'lts-15' 32 | - 'lts-16' 33 | - 'lts-17' 34 | - 'lts-18' 35 | - 'lts-19' 36 | - 'lts-20' 37 | - 'lts-21' 38 | - 'lts-22' 39 | 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Cache stack dir 43 | uses: actions/cache@v4 44 | with: 45 | path: ~/.stack 46 | key: ${{ runner.os }}-stack-${{ matrix.resolver }}-${{ hashFiles('*.cabal') }} 47 | restore-keys: ${{ runner.os }}-stack-${{ matrix.resolver }}- 48 | 49 | - run: stack test --haddock --no-terminal --resolver ${{ matrix.resolver }} 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cabal-sandbox/ 2 | cabal.sandbox.config 3 | dist/ 4 | dist-newstyle/ 5 | .stack-work/ 6 | .vscode/ 7 | 8 | *.tix 9 | 10 | # profiling 11 | *.htm 12 | *.html 13 | *.prof 14 | 15 | # Core output 16 | *.dump-simpl 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [2.3.2.21] - 2024-09-22 4 | 5 | - Relax some upper bounds to major-major versions to reduce frequency of updates: 6 | - `QuickCheck` now <3 7 | - `template-haskell` now <3 8 | 9 | ## [2.3.2.20] - 2024-02-23 10 | 11 | - Bump upper bounds: 12 | - allow `template-haskell` 2.21 13 | 14 | ## [2.3.2.19] - 2023-07-27 15 | 16 | ### Changed 17 | 18 | - Bump upper bounds: 19 | - allow `hspec` 2.11 20 | - allow `bytestring` 0.12 21 | - allow `template-haskell` 2.20 22 | 23 | ## [2.3.2.18] - 2022-11-24 24 | 25 | ### Changed 26 | 27 | - Bump upper bound to allow `hspec` 2.10 ([#63](https://github.com/Porges/email-validate-hs/pull/63) thanks to [@felixonmars](https://github.com/felixonmars)). 28 | 29 | 30 | ## [2.3.2.17] - 2022-11-24 31 | 32 | ### Changed 33 | 34 | - Bump upper bound to allow `template-haskell` 2.19 ([#65](https://github.com/Porges/email-validate-hs/pull/65) thanks to [@ysangkok](https://github.com/ysangkok)). 35 | 36 | ## [2.3.2.16] - 2022-03-21 37 | 38 | ### Changed 39 | 40 | - Bump upper bound to allow `doctest` 0.19. 41 | - Bump upper bound to allow `hspec` 2.9. 42 | - Bump upper bound to allow `template-haskell` 2.18. 43 | 44 | ## [2.3.2.15] - 2021-07-05 45 | 46 | ### Changed 47 | 48 | - Bump upper bound to allow `bytestring` 0.11. 49 | - Bump upper bound to allow `doctest` 0.18. 50 | 51 | ## [2.3.2.14] - 2021-07-01 52 | 53 | ### Changed 54 | 55 | - Bump upper bound for `template-haskell` to allow 2.17. 56 | - Bump upper bound to allow `doctest` 0.17. 57 | - Bump upper bound to allow `hspec` 2.8. 58 | - Bump upper bound to allow `attoparsec` 0.14. 59 | 60 | ## [2.3.2.13] - 2020-05-02 61 | 62 | ### Changed 63 | 64 | - Bump upper bound for `template-haskell` to allow 2.16. 65 | 66 | ## [2.3.2.12] - 2019-09-27 67 | 68 | ### Changed 69 | 70 | - Bump upper bound for `template-haskell` to allow 2.15. 71 | 72 | ## [2.3.2.11] - 2019-03-30 73 | 74 | ### Changed 75 | 76 | - Bump upper bound for `QuickCheck` to allow 2.13. 77 | 78 | ## [2.3.2.10] - 2019-01-21 79 | 80 | ### Changed 81 | 82 | - Bump upper bound for `hspec` to 2.8. 83 | 84 | ## [2.3.2.9] - 2018-11-17 85 | 86 | ### Changed 87 | 88 | - Bump upper bound for `hspec` to 2.7. 89 | 90 | ## [2.3.2.8] - 2018-10-14 91 | 92 | ### Changed 93 | 94 | - Bump upper bound for `template-haskell` to 2.14. 95 | 96 | ## [2.3.2.7] - 2018-09-11 97 | 98 | ### Changed 99 | 100 | - Bump upper bound for `QuickCheck` to 2.13. 101 | 102 | ## [2.3.2.6] - 2018-07-03 103 | 104 | ### Changed 105 | 106 | - Bump upper bound for `doctest` to 0.17. 107 | 108 | ## [2.3.2.5] - 2017-03-15 109 | 110 | ### Changed 111 | 112 | - Bump upper bound for `hspec` to 2.6. 113 | 114 | ## [2.3.2.4] - 2017-03-15 115 | 116 | ### Changed 117 | 118 | - Bump upper bound for `doctest` to 0.16. 119 | 120 | ## [2.3.2.3] - 2017-03-07 121 | 122 | ### Changed 123 | 124 | - Bump upper bound for `template-haskell` to 2.14. 125 | 126 | ## [2.3.2.2] - 2017-02-15 127 | 128 | ### Changed 129 | 130 | - Bump `doctest` upper bound. 131 | 132 | ## [2.3.2.1] - 2017-01-23 133 | 134 | ### Changed 135 | 136 | - Bump upper bound for `QuickCheck` to 2.12. 137 | 138 | ## [2.3.2] - 2017-11-06 139 | 140 | ### Changed 141 | 142 | - Relaxed `doctest` upper bound. 143 | 144 | ## [2.3.1] - 2017-07-29 145 | 146 | ### Changed 147 | 148 | - Relax version bounds on `doctest` and `template-haskell`. 149 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 George Pollard 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # email-validate 2 | Email address validation for Haskell 3 | 4 | [![Hackage](https://img.shields.io/hackage/v/email-validate.svg)](https://hackage.haskell.org/package/email-validate) 5 | 6 | [See the documentation on Hackage.](http://hackage.haskell.org/package/email-validate) 7 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | > import Distribution.Simple 3 | > main = defaultMain 4 | -------------------------------------------------------------------------------- /So you want to validate an email address.md: -------------------------------------------------------------------------------- 1 | So you want to validate an email address... 2 | === 3 | 4 | This is a list of RFCs that are related to parsing and validating an email address. Yes, you will need all of them in some way or another. (And no, this library does not do everything correctly - it's lacking all the Unicode support.) 5 | 6 | I've tried to avoid listing obsoleted RFCs, so for example we have 5322 instead of 2822 and 5321 instead of 2821. 7 | 8 | ### [RFC5322](https://tools.ietf.org/html/rfc5322): Internet Message Format 9 | 10 | This is the main RFC that defines the email address syntax. If you want to do something basic (e.g., you want to write a self-referential regular expression) you should start here. 11 | 12 | Among other fun things, you will learn that a C-style null-terminated string cannot hold an email address. 13 | 14 | Don't look at [RFC5321](https://tools.ietf.org/html/rfc5321) for the syntax! It defines [similar-but-different](https://tools.ietf.org/html/rfc5321#section-4.1.2) versions of the ABNF (see below) rules. The rule names are case-sensitive! 15 | 16 | ### [RFC6532](https://tools.ietf.org/html/rfc6532): Internationalized Email Headers 17 | 18 | This [extends the syntax](https://tools.ietf.org/html/rfc6532#section-3.2) from 5322 to handle Unicode in email addresses. 19 | 20 | Don't read RFC6531! The situation is the same as with RFC5321. 21 | 22 | ### [RFC5234](https://tools.ietf.org/html/rfc5234): Augmented BNF for Syntax Specifications: ABNF 23 | 24 | This describes the syntax that the syntax for email addresses is described by. It's fairly intuitive, but this also contains some basic rules used elsewhere (e.g. `VCHAR`) that aren't obvious, so you need to check them. 25 | 26 | ### [RFC3629](https://tools.ietf.org/html/rfc3629): UTF-8, a transformation format of ISO 10646 27 | 28 | Okay, you probably don't need this as your language likely has facilities to do UTF reëncoding for you. It does have some ABNF definitions that are referenced in 6532, though. 29 | 30 | ### [RFC5198](https://tools.ietf.org/html/rfc5198): Unicode Format for Network Interchange 31 | 32 | This falls into a similar category as the previous RFC, but I think Javascript doesn't get support until ES6 -- and you'll be supporting IE10 forever. 33 | 34 | That said, this is a huge cop-out of an RFC. It's basically a thunk for [UAX#15](http://unicode.org/reports/tr15/), so go read that instead. 35 | 36 | UAX#15 defines a family of Unicode "normalization" algorithms -- in simple terms, what it does is tell you how to convert "é" into "é", or vice versa. 37 | 38 | Domain Names 39 | --- 40 | 41 | The email address specification in 5322 is intentionally imprecise with regards to domain names. We can't have that. 42 | 43 | ### [RFC952](https://tools.ietf.org/html/rfc952): DOD INTERNET HOST TABLE SPECIFICATION 44 | 45 | This defines the syntax of a domain name. 46 | 47 | ### [RFC1123](https://tools.ietf.org/html/rfc1123): Requirements for Internet Hosts -- Application and Support 48 | 49 | This relaxes the syntax of a domain name to allow it to start with a digit. It also shows you how to write down or read 32-bit numbers (IPv4 addresses), so you can skip 780 or 790. 50 | 51 | IP Literals 52 | --- 53 | 54 | ### [RFC4291](https://tools.ietf.org/html/rfc4291): IP Version 6 Addressing Architecture 55 | 56 | This describes how to write down or read 128-bit numbers (IPv6 addresses). 57 | 58 | ### [RFC2765](https://tools.ietf.org/html/rfc2765): Stateless IP/ICMP Translation Algorithm (SIIT) 59 | 60 | This describes how to write down or read 32-bit numbers as if they were 128-bit-numbers. 61 | 62 | Limits 63 | --- 64 | 65 | ### [RFC1034](https://tools.ietf.org/html/rfc1034): DOMAIN NAMES - CONCEPTS AND FACILITIES 66 | ### [RFC1035](https://tools.ietf.org/html/rfc1035): DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION 67 | 68 | You can tell these RFCs are old because they are shouty. Both of then define a few limits that are critical to getting your validation correct, but it doesn't matter which one you read. 69 | 70 | In particular, labels are limited to 63 octets and names are limited to 255 octets (this includes 1 'length' byte per label plus one empty label (that still has a length byte), so the maximum length of a domain name is 253 characters when written in `the.usual.manner`). 71 | 72 | ### [RFC3696](https://tools.ietf.org/html/rfc3696): Application Techniques for Checking and Transformation of Names 73 | 74 | This RFC tells you that an email address can be up to 320 octets long. 75 | 76 | ### [RFC5321](https://tools.ietf.org/html/rfc5321): Simple Mail Transfer Protocol 77 | 78 | This RFC tells you that if you actually wish to send an email to an email address, it had better not be longer than 254 octets (see Erratum 1690 on RFC3696). 79 | 80 | Unicode in Domain Names 81 | --- 82 | 83 | ### [RFC3492](https://tools.ietf.org/html/rfc3492): Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA) 84 | 85 | The greatest algorithm name of all time? This converts Unicode to ASCII in a way that it can be reversed. 86 | 87 | (Why do you need this? Because converting Unicode to ASCII makes the domain name longer, and an email address that is too long might be invalid - that simply won't do!) 88 | 89 | ### Aside: "I heard you like homographs" 90 | > ### [RFC3490](https://tools.ietf.org/html/rfc3490): Internationalizing Domain Names in Applications (IDNA) 91 | 92 | > Describes how to turn an ASCII domain name into a Unicode one and vice versa. 93 | 94 | > ### [RFC3454](https://tools.ietf.org/html/rfc3454): Preparation of Internationalized Strings ("stringprep") 95 | 96 | > This describes an algorithm but doesn't tell you how to use it. Luckily, you can then read: 97 | 98 | > ### [RFC3491](https://tools.ietf.org/html/rfc3491): Nameprep: A Stringprep Profile for Internationalized Domain Names (IDN) 99 | 100 | > This tells you how to use RFC3454. 101 | 102 | Don't read any of these, as they've all been obsoleted now, mostly due to security concerns. 103 | 104 | The implementation they describe is known as "IDNA2003". Instead, you need to read "IDNA2008". Of course, [IDNA2003 and IDNA2008 are incompatible](http://unicode.org/faq/idn.html), so two browsers might disagree on where a link goes. That doesn't matter here though, we only need to validate an email address. Ignore the fact that there's a middle-ground defined by [UTS#46](http://unicode.org/reports/tr46/). 105 | 106 | The IDNA2008 specifications consist of: 107 | 108 | ### [RFC5890](https://tools.ietf.org/html/rfc5890): Internationalized Domain Names for Applications (IDNA): Definitions and Document Framework 109 | 110 | This is like a glossary for the next 3 RFCs... 111 | 112 | ### [RFC5891](https://tools.ietf.org/html/rfc5891): Internationalized Domain Names in Applications (IDNA): Protocol 113 | 114 | This replaces the `ToAscii`/`ToUnicode` algorithms of RFC3490. 115 | 116 | ### [RFC5892](https://tools.ietf.org/html/rfc5892): The Unicode Code Points and Internationalized Domain Names for Applications (IDNA) 117 | 118 | 70 pages of Unicode codepoints - hold me back! NB: **ERRATA EXIST**. Good luck with that. 119 | 120 | ### [RFC5893](https://tools.ietf.org/html/rfc5893): Right-to-Left Scripts for Internationalized Domain Names for Applications (IDNA) 121 | 122 | Because you should put the hardest part of Unicode into a security-critical syntax designed for entry by people who can't program their microwaves. 123 | 124 | ### [RFC5894](https://tools.ietf.org/html/rfc5894): Internationalized Domain Names for Applications (IDNA): Background, Explanation, and Rationale 125 | 126 | This RFC explains why it took 4 RFCs to define IDNA. 127 | 128 | Meta 129 | --- 130 | 131 | ### [RFC2119](https://tools.ietf.org/html/rfc2119): Key words for use in RFCs to Indicate Requirement Levels 132 | 133 | It is a rule that all RFCs published after RFC2119 SHOULD reference RFC2119, so you had better understand it. 134 | 135 | ### [RFC2606](https://tools.ietf.org/html/rfc2606): Reserved Top Level DNS Names 136 | 137 | You should use these names in your test suite. 138 | 139 | 140 | You might need these, I'm not sure 141 | --- 142 | 143 | ### [RFC7564](https://tools.ietf.org/html/rfc7564): PRECIS Framework: Preparation, Enforcement, and Comparison of Internationalized Strings in Application Protocols 144 | 145 | I'm not sure if you need this yet. Anyhow, "stringprep" was a much more fun name. 146 | -------------------------------------------------------------------------------- /Syntax.md: -------------------------------------------------------------------------------- 1 | Email address syntax 2 | ==================== 3 | 4 | Since you need to consult a bunch of RFCs to figure this out, 5 | here is a compilation of what it means to be an (internationalized) email address. 6 | 7 | [RFC5322] 8 | addr-spec = local-part "@" domain 9 | 10 | An email address is a *local-part*, the `@` symbol, and then a *domain*. 11 | 12 | Local-part 13 | ---------- 14 | 15 | [RFC5322] 16 | local-part = dot-atom / quoted-string / obs-local-part 17 | 18 | Dotted atoms: 19 | 20 | [RFC5322] 21 | dot-atom = [CFWS] dot-atom-text [CFWS] 22 | 23 | 24 | CFWS = (1*([FWS] comment) [FWS]) / FWS 25 | 26 | comment = "(" *([FWS] ccontent) [FWS] ")" 27 | 28 | ccontent = ctext / quoted-pair / comment 29 | 30 | ctext = %d33-39 / ; Printable US-ASCII 31 | %d42-91 / ; characters not including 32 | %d93-126 / ; "(", ")", or "\" 33 | obs-ctext 34 | 35 | [RFC6532] 36 | =/ UTF8-non-ascii 37 | 38 | 39 | [RFC5322] 40 | dot-atom-text = 1*atext *("." 1*atext) 41 | 42 | atext = ALPHA / DIGIT / ; Printable US-ASCII 43 | "!" / "#" / ; characters not including 44 | "$" / "%" / ; specials. Used for atoms. 45 | "&" / "'" / 46 | "*" / "+" / 47 | "-" / "/" / 48 | "=" / "?" / 49 | "^" / "_" / 50 | "`" / "{" / 51 | "|" / "}" / 52 | "~" 53 | [RFC6532] 54 | =/ UTF8-non-ascii 55 | 56 | [RFC5234] 57 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 58 | DIGIT = %x30-39 59 | ; 0-9 60 | 61 | [RFC6532] 62 | UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4 63 | 64 | [RFC3629] 65 | UTF8-2 = %xC2-DF UTF8-tail 66 | UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / 67 | %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) 68 | UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / 69 | %xF4 %x80-8F 2( UTF8-tail ) 70 | UTF8-tail = %x80-BF 71 | 72 | Quoted strings: 73 | 74 | [RFC5322] 75 | quoted-string = [CFWS] 76 | DQUOTE *([FWS] qcontent) [FWS] DQUOTE 77 | [CFWS] 78 | 79 | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS 80 | ; Folding white space 81 | 82 | qcontent = qtext / quoted-pair 83 | 84 | qtext = %d33 / ; Printable US-ASCII 85 | %d35-91 / ; characters not including 86 | %d93-126 / ; "\" or the quote character 87 | obs-qtext 88 | 89 | [RFC6532] 90 | =/ UTF8-non-ascii 91 | 92 | 93 | [RFC5322] 94 | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp 95 | 96 | 97 | [RFC5234] 98 | DQUOTE = %x22 99 | ; " (Double Quote) 100 | 101 | WSP = SP / HTAB 102 | ; white space 103 | 104 | SP = %x20 105 | 106 | HTAB = %x09 107 | ; horizontal tab 108 | 109 | CRLF = CR LF 110 | ; Internet standard newline 111 | 112 | CR = %x0D 113 | ; carriage return 114 | 115 | LF = %x0A 116 | ; linefeed 117 | 118 | VCHAR = %x21-7E 119 | ; visible (printing) characters 120 | 121 | [RFC6532] 122 | =/ UTF8-non-ascii 123 | 124 | 125 | Domain-part 126 | ----------- 127 | 128 | A *domain* is a dot-separated list of *sub-domain*s. 129 | 130 | [RFC5322] 131 | domain = dot-atom / domain-literal / obs-domain 132 | 133 | domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] 134 | 135 | dtext = %d33-90 / ; Printable US-ASCII 136 | %d94-126 / ; characters not including 137 | obs-dtext ; "[", "]", or "\" 138 | 139 | [RFC6532] 140 | =/ UTF8-non-ascii 141 | 142 | 143 | Obsolete syntax 144 | =============== 145 | 146 | [RFC5322] 147 | obs-local-part = word *("." word) 148 | 149 | atom = [CFWS] 1*atext [CFWS] 150 | 151 | word = atom / quoted-string 152 | 153 | obs-FWS = 1*WSP *(CRLF 1*WSP) 154 | 155 | obs-ctext = obs-NO-WS-CTL 156 | 157 | obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 158 | %d11 / ; characters that do not 159 | %d12 / ; include the carriage 160 | %d14-31 / ; return, line feed, and 161 | %d127 ; white space characters 162 | 163 | obs-qtext = obs-NO-WS-CTL 164 | 165 | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) 166 | 167 | obs-dtext = obs-NO-WS-CTL / quoted-pair 168 | -------------------------------------------------------------------------------- /email-validate.cabal: -------------------------------------------------------------------------------- 1 | name: email-validate 2 | version: 2.3.2.21 3 | license: BSD3 4 | license-file: LICENSE 5 | author: George Pollard 6 | maintainer: George Pollard 7 | homepage: https://github.com/Porges/email-validate-hs 8 | category: Text 9 | synopsis: Email address validation 10 | description: Validating an email address string against RFC 5322 11 | build-type: Simple 12 | stability: experimental 13 | cabal-version: >= 1.10 14 | 15 | source-repository head 16 | type: git 17 | location: git://github.com/Porges/email-validate-hs.git 18 | 19 | source-repository this 20 | type: git 21 | location: git://github.com/Porges/email-validate-hs.git 22 | tag: v2.3.2.21 23 | 24 | library 25 | build-depends: 26 | base >= 4.4 && < 5, 27 | attoparsec >= 0.10.0 && < 0.15, 28 | bytestring >= 0.9 && < 0.13, 29 | template-haskell >= 2.10.0.0 && < 3 30 | default-language: Haskell2010 31 | hs-source-dirs: src 32 | ghc-options: -Wall 33 | exposed-modules: 34 | Text.Email.QuasiQuotation, 35 | Text.Email.Validate, 36 | Text.Email.Parser 37 | 38 | test-suite Main 39 | type: exitcode-stdio-1.0 40 | main-is: Main.hs 41 | ghc-options: -threaded 42 | hs-source-dirs: tests 43 | default-language: Haskell2010 44 | build-depends: 45 | email-validate, 46 | base >= 4 && < 5, 47 | hspec >= 2.2.3 && < 2.12, 48 | QuickCheck >= 2.4 && < 3, 49 | bytestring >= 0.9 && < 0.13 50 | -------------------------------------------------------------------------------- /src/Text/Email/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 2 | 3 | module Text.Email.Parser 4 | ( addrSpec 5 | , localPart 6 | , domainPart 7 | , EmailAddress 8 | , unsafeEmailAddress 9 | , toByteString 10 | ) 11 | where 12 | 13 | import Control.Applicative 14 | import Control.Monad (guard, void, when) 15 | import Data.Attoparsec.ByteString.Char8 16 | import Data.ByteString (ByteString) 17 | import qualified Data.ByteString.Char8 as BS 18 | import Data.Data (Data, Typeable) 19 | import GHC.Generics (Generic) 20 | import qualified Text.Read as Read 21 | 22 | -- | Represents an email address. 23 | data EmailAddress = EmailAddress ByteString ByteString 24 | deriving (Eq, Ord, Data, Typeable, Generic) 25 | 26 | -- | Creates an email address without validating it. 27 | -- You should only use this when reading data from 28 | -- somewhere it has already been validated (e.g. a 29 | -- database). 30 | unsafeEmailAddress :: ByteString -> ByteString -> EmailAddress 31 | unsafeEmailAddress = EmailAddress 32 | 33 | instance Show EmailAddress where 34 | show = show . toByteString 35 | 36 | instance Read EmailAddress where 37 | readListPrec = Read.readListPrecDefault 38 | readPrec = Read.parens (do 39 | bs <- Read.readPrec 40 | case parseOnly (addrSpec <* endOfInput) bs of 41 | Left _ -> Read.pfail 42 | Right a -> return a) 43 | 44 | -- | Converts an email address back to a ByteString 45 | toByteString :: EmailAddress -> ByteString 46 | toByteString (EmailAddress l d) = BS.concat [l, BS.singleton '@', d] 47 | 48 | -- | Extracts the local part of an email address. 49 | localPart :: EmailAddress -> ByteString 50 | localPart (EmailAddress l _) = l 51 | 52 | -- | Extracts the domain part of an email address. 53 | domainPart :: EmailAddress -> ByteString 54 | domainPart (EmailAddress _ d) = d 55 | 56 | -- | A parser for email addresses. 57 | addrSpec :: Parser EmailAddress 58 | addrSpec = do 59 | l <- local 60 | 61 | -- Maximum length of local-part is 64, per RFC3696 62 | when (BS.length l > 64) (fail "local-part of email is too long (more than 64 octets)") 63 | 64 | _ <- char '@' "at sign" 65 | d <- domain 66 | 67 | -- Maximum length is 254, per Erratum 1690 on RFC3696 68 | when (BS.length l + BS.length d + 1 > 254) (fail "email address is too long (more than 254 octets)") 69 | 70 | return (unsafeEmailAddress l d) 71 | 72 | local :: Parser ByteString 73 | local = dottedAtoms 74 | 75 | domain :: Parser ByteString 76 | domain = domainName <|> domainLiteral 77 | 78 | domainName :: Parser ByteString 79 | domainName = do 80 | parsedDomain <- BS.intercalate (BS.singleton '.') <$> 81 | domainLabel `sepBy1` char '.' <* optional (char '.') 82 | 83 | -- Domain name must be no greater than 253 chars, per RFC1035 84 | guard (BS.length parsedDomain <= 253) 85 | return parsedDomain 86 | 87 | domainLabel :: Parser ByteString 88 | domainLabel = do 89 | content <- between1 (optional cfws) (fst <$> match (alphaNum >> skipWhile isAlphaNumHyphen)) 90 | 91 | -- Per RFC1035: 92 | -- label must be no greater than 63 chars and cannot end with '-' 93 | -- (we already enforced that it does not start with '-') 94 | guard (BS.length content <= 63 && BS.last content /= '-') 95 | return content 96 | 97 | alphaNum :: Parser Char 98 | alphaNum = satisfy isAlphaNum 99 | 100 | isAlphaNumHyphen :: Char -> Bool 101 | isAlphaNumHyphen x = isDigit x || isAlpha_ascii x || x == '-' 102 | 103 | dottedAtoms :: Parser ByteString 104 | dottedAtoms = BS.intercalate (BS.singleton '.') <$> 105 | between1 (optional cfws) 106 | (atom <|> quotedString) `sepBy1` char '.' 107 | 108 | atom :: Parser ByteString 109 | atom = takeWhile1 isAtomText 110 | 111 | isAtomText :: Char -> Bool 112 | isAtomText x = isAlphaNum x || inClass "!#$%&'*+/=?^_`{|}~-" x 113 | 114 | domainLiteral :: Parser ByteString 115 | domainLiteral = 116 | (BS.cons '[' . flip BS.snoc ']' . BS.concat) <$> 117 | between (optional cfws *> char '[') (char ']' <* optional cfws) 118 | (many (optional fws >> takeWhile1 isDomainText) <* optional fws) 119 | 120 | isDomainText :: Char -> Bool 121 | isDomainText x = inClass "\33-\90\94-\126" x || isObsNoWsCtl x 122 | 123 | quotedString :: Parser ByteString 124 | quotedString = 125 | (BS.cons '"' . flip BS.snoc '"' . BS.concat) <$> 126 | between1 (char '"') 127 | (many (optional fws >> quotedContent) <* optional fws) 128 | 129 | quotedContent :: Parser ByteString 130 | quotedContent = takeWhile1 isQuotedText <|> quotedPair 131 | 132 | isQuotedText :: Char -> Bool 133 | isQuotedText x = inClass "\33\35-\91\93-\126" x || isObsNoWsCtl x 134 | 135 | quotedPair :: Parser ByteString 136 | quotedPair = (BS.cons '\\' . BS.singleton) <$> (char '\\' *> (vchar <|> wsp <|> lf <|> cr <|> obsNoWsCtl <|> nullChar)) 137 | 138 | cfws :: Parser () 139 | cfws = skipMany (comment <|> fws) 140 | 141 | fws :: Parser () 142 | fws = void (wsp1 >> optional (crlf >> wsp1)) <|> (skipMany1 (crlf >> wsp1)) 143 | 144 | between :: Applicative f => f l -> f r -> f a -> f a 145 | between l r x = l *> x <* r 146 | 147 | between1 :: Applicative f => f lr -> f a -> f a 148 | between1 lr x = lr *> x <* lr 149 | 150 | comment :: Parser () 151 | comment = between (char '(') (char ')') $ skipMany (void commentContent <|> fws) 152 | 153 | commentContent :: Parser () 154 | commentContent = skipWhile1 isCommentText <|> void quotedPair <|> comment 155 | 156 | isCommentText :: Char -> Bool 157 | isCommentText x = inClass "\33-\39\42-\91\93-\126" x || isObsNoWsCtl x 158 | 159 | nullChar :: Parser Char 160 | nullChar = char '\0' 161 | 162 | skipWhile1 :: (Char -> Bool) -> Parser() 163 | skipWhile1 x = satisfy x >> skipWhile x 164 | 165 | wsp1 :: Parser () 166 | wsp1 = skipWhile1 isWsp 167 | 168 | wsp :: Parser Char 169 | wsp = satisfy isWsp 170 | 171 | isWsp :: Char -> Bool 172 | isWsp x = x == ' ' || x == '\t' 173 | 174 | isAlphaNum :: Char -> Bool 175 | isAlphaNum x = isDigit x || isAlpha_ascii x 176 | 177 | cr :: Parser Char 178 | cr = char '\r' 179 | 180 | lf :: Parser Char 181 | lf = char '\n' 182 | 183 | crlf :: Parser () 184 | crlf = void $ cr >> lf 185 | 186 | isVchar :: Char -> Bool 187 | isVchar = inClass "\x21-\x7e" 188 | 189 | vchar :: Parser Char 190 | vchar = satisfy isVchar 191 | 192 | isObsNoWsCtl :: Char -> Bool 193 | isObsNoWsCtl = inClass "\1-\8\11-\12\14-\31\127" 194 | 195 | obsNoWsCtl :: Parser Char 196 | obsNoWsCtl = satisfy isObsNoWsCtl 197 | -------------------------------------------------------------------------------- /src/Text/Email/QuasiQuotation.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 800 3 | {-# LANGUAGE TemplateHaskellQuotes #-} 4 | #else 5 | {-# LANGUAGE TemplateHaskell #-} 6 | #endif 7 | 8 | module Text.Email.QuasiQuotation 9 | ( email 10 | ) where 11 | 12 | import qualified Data.ByteString.Char8 as BS8 13 | 14 | import Language.Haskell.TH.Quote (QuasiQuoter(..)) 15 | 16 | import Text.Email.Validate (validate, localPart, domainPart, unsafeEmailAddress) 17 | 18 | -- | A QuasiQuoter for email addresses. 19 | -- 20 | -- Use it like this (requires `QuasiQuotes` to be enabled): 21 | -- 22 | -- >>> [email|someone@example.com|] 23 | -- "someone@example.com" 24 | email :: QuasiQuoter 25 | email = QuasiQuoter 26 | { quoteExp = quoteEmail emailToExp 27 | , quotePat = error "email is not supported as a pattern" 28 | , quoteDec = error "email is not supported at top-level" 29 | , quoteType = error "email is not supported as a type" 30 | } 31 | where 32 | 33 | quoteEmail p s = 34 | case validate (BS8.pack s) of 35 | Left err -> error ("Invalid quasi-quoted email address: " ++ err) 36 | Right e -> p e 37 | 38 | emailToExp e = 39 | let lp = BS8.unpack (localPart e) in 40 | let dp = BS8.unpack (domainPart e) in 41 | [| unsafeEmailAddress (BS8.pack lp) (BS8.pack dp) |] 42 | -------------------------------------------------------------------------------- /src/Text/Email/Validate.hs: -------------------------------------------------------------------------------- 1 | module Text.Email.Validate 2 | ( isValid 3 | , validate 4 | , emailAddress 5 | , canonicalizeEmail 6 | 7 | -- Re-exports: 8 | , EmailAddress 9 | , domainPart 10 | , localPart 11 | , toByteString 12 | , unsafeEmailAddress 13 | ) 14 | where 15 | 16 | import Data.Attoparsec.ByteString (endOfInput, parseOnly) 17 | import Data.ByteString (ByteString) 18 | 19 | import Text.Email.Parser 20 | ( EmailAddress 21 | , addrSpec 22 | , domainPart 23 | , localPart 24 | , toByteString 25 | , unsafeEmailAddress) 26 | 27 | -- | Smart constructor for an email address 28 | emailAddress :: ByteString -> Maybe EmailAddress 29 | emailAddress = either (const Nothing) Just . validate 30 | 31 | -- | Checks that an email is valid and returns a version of it 32 | -- where comments and whitespace have been removed. 33 | -- 34 | -- Example (requires `OverloadedStrings` to be enabled): 35 | -- 36 | -- >>> canonicalizeEmail "spaces. are. allowed@example.com" 37 | -- Just "spaces.are.allowed@example.com" 38 | canonicalizeEmail :: ByteString -> Maybe ByteString 39 | canonicalizeEmail = fmap toByteString . emailAddress 40 | 41 | -- | Validates whether a particular string is an email address 42 | -- according to RFC5322. 43 | isValid :: ByteString -> Bool 44 | isValid = either (const False) (const True) . validate 45 | 46 | -- | If you want to find out *why* a particular string is not 47 | -- an email address, use this. 48 | -- 49 | -- Examples (both require `OverloadedStrings` to be enabled): 50 | -- 51 | -- >>> validate "example@example.com" 52 | -- Right "example@example.com" 53 | -- 54 | -- >>> validate "not.good" 55 | -- Left "at sign > @: not enough input" 56 | validate :: ByteString -> Either String EmailAddress 57 | validate = parseOnly (addrSpec >>= \r -> endOfInput >> return r) 58 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | flags: {} 2 | extra-package-dbs: [] 3 | packages: 4 | - '.' 5 | extra-deps: [] 6 | resolver: lts-21.4 7 | -------------------------------------------------------------------------------- /stack.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: caa77fdbc5b9f698262b21ee78030133272ec53116ad6ddbefdc4c321f668e0c 10 | size: 640014 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/4.yaml 12 | original: lts-21.4 13 | -------------------------------------------------------------------------------- /tests/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NamedFieldPuns #-} 2 | {-# LANGUAGE QuasiQuotes #-} 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | module Main where 6 | 7 | import Control.Exception (evaluate) 8 | import Control.Monad (forM_) 9 | import Data.ByteString (ByteString) 10 | import qualified Data.ByteString.Char8 as BS 11 | import Data.List (isInfixOf) 12 | import Data.Maybe (Maybe(..), isNothing, fromJust) 13 | import Data.Monoid ((<>)) 14 | 15 | import Test.Hspec (hspec, context, describe, errorCall, it, parallel, shouldBe, shouldSatisfy) 16 | import Test.QuickCheck (Arbitrary(..), suchThat, property) 17 | 18 | import Text.Email.QuasiQuotation (email) 19 | import Text.Email.Validate 20 | ( EmailAddress 21 | , canonicalizeEmail 22 | , domainPart 23 | , emailAddress 24 | , localPart 25 | , isValid 26 | , toByteString 27 | , validate 28 | , unsafeEmailAddress 29 | ) 30 | 31 | main :: IO () 32 | main = hspec $ parallel $ do 33 | 34 | showAndRead 35 | canonicalization 36 | exampleTests 37 | specificFailures 38 | simpleAccessors 39 | quasiQuotationTests 40 | 41 | canonicalization = 42 | describe "emailAddress" $ do 43 | it "is idempotent" $ 44 | property prop_doubleCanonicalize 45 | 46 | exampleTests = 47 | describe "Examples" $ do 48 | forM_ examples $ \Example{example, exampleValid, exampleWhy, errorContains} -> do 49 | context (show example ++ (if null exampleWhy then "" else " (" ++ exampleWhy ++ ")")) $ do 50 | if exampleValid 51 | then do 52 | it "should be valid" $ 53 | isValid example `shouldBe` True 54 | 55 | it "passes double-canonicalization test" $ 56 | prop_doubleCanonicalize (fromJust (emailAddress example)) 57 | 58 | else do 59 | it "should be invalid" $ 60 | isValid example `shouldBe` False 61 | 62 | case (errorContains, validate example) of 63 | (Just err, Left errMessage) -> 64 | it "should have correct error message" $ 65 | errMessage `shouldSatisfy` (err `isInfixOf`) 66 | (_, _) -> return () 67 | 68 | showAndRead = 69 | describe "show/read instances" $ do 70 | 71 | it "can roundtrip" $ 72 | property prop_showAndReadBack 73 | 74 | it "shows in the same way as ByteString" $ 75 | property prop_showLikeByteString 76 | 77 | it "should fail if read back without a quote" $ 78 | property prop_showAndReadBackWithoutQuoteFails 79 | 80 | specificFailures = do 81 | describe "GitHub issue #12" $ do 82 | it "is fixed" $ 83 | let (Right em) = validate (BS.pack "\"\"@1") in 84 | em `shouldBe` read (show em) 85 | 86 | describe "Trailing dot" $ do 87 | it "is canonicalized" $ 88 | canonicalizeEmail "foo@bar.com." `shouldBe` Just "foo@bar.com" 89 | 90 | simpleAccessors = do 91 | describe "localPart" $ 92 | it "extracts local part" $ 93 | localPart (unsafeEmailAddress "local" undefined) `shouldBe` "local" 94 | 95 | 96 | describe "domainPart" $ 97 | it "extracts domain part" $ 98 | domainPart (unsafeEmailAddress undefined "domain") `shouldBe` "domain" 99 | 100 | quasiQuotationTests = 101 | describe "QuasiQuoter" $ do 102 | it "works as expected" $ 103 | [email|local@domain.com|] `shouldBe` unsafeEmailAddress "local" "domain.com" 104 | 105 | instance Arbitrary ByteString where 106 | arbitrary = fmap BS.pack arbitrary 107 | 108 | instance Arbitrary EmailAddress where 109 | arbitrary = do 110 | local <- suchThat arbitrary (\l -> isEmail l (BS.pack "example.com")) 111 | domain <- suchThat arbitrary (\d -> isEmail (BS.pack "example") d) 112 | let (Just result) = emailAddress (makeEmailLike local domain) 113 | pure result 114 | 115 | where 116 | isEmail l d = isValid (makeEmailLike l d) 117 | makeEmailLike l d = BS.concat [l, BS.singleton '@', d] 118 | 119 | {- Properties -} 120 | 121 | prop_doubleCanonicalize :: EmailAddress -> Bool 122 | prop_doubleCanonicalize email = Just email == emailAddress (toByteString email) 123 | 124 | prop_showLikeByteString :: EmailAddress -> Bool 125 | prop_showLikeByteString email = show (toByteString email) == show email 126 | 127 | prop_showAndReadBack :: EmailAddress -> Bool 128 | prop_showAndReadBack email = read (show email) == email 129 | 130 | prop_showAndReadBackWithoutQuoteFails :: EmailAddress -> Bool 131 | prop_showAndReadBackWithoutQuoteFails email = 132 | isNothing (readMaybe (init s)) && isNothing (readMaybe (tail s)) 133 | where 134 | s = show email 135 | readMaybe :: String -> Maybe EmailAddress 136 | readMaybe s = case reads s of 137 | [(x, "")] -> Just x 138 | _ -> Nothing 139 | 140 | {- Examples -} 141 | 142 | data Example = Example 143 | { example :: ByteString 144 | , exampleValid :: Bool 145 | , exampleWhy :: String 146 | , errorContains :: Maybe String } 147 | 148 | valid, invalid :: ByteString -> Example 149 | valid e = Example e True "" Nothing 150 | invalid e = Example e False "" Nothing 151 | 152 | why :: Example -> String -> Example 153 | why ex str = ex { exampleWhy = str } 154 | 155 | errorShouldContain :: Example -> String -> Example 156 | errorShouldContain ex str = ex { errorContains = Just str } 157 | 158 | 159 | examples :: [Example] 160 | examples = 161 | let domain249 = BS.intercalate "." (take 25 (repeat (BS.replicate 9 'x'))) in 162 | [ valid "first.last@example.com" 163 | , valid "first.last@example.com." `why` "Dot allowed on end of domain" 164 | , invalid "local@exam_ple.com" `why` "Underscore not permitted in domain" 165 | , valid "1234567890123456789012345678901234567890123456789012345678901234@example.com" 166 | , valid "\"first last\"@example.com" `why` "Contains quoted spaces" 167 | , valid "\"first\\\"last\"@example.com" `why` "Contains quoted escaped quote" 168 | , invalid "first\\@last@example.com" `why` "Escaping can only happen within a quoted string" 169 | , valid "\"first@last\"@example.com" `why` "Contains quoted at-sign" 170 | , valid "\"first\\\\last\"@example.com" `why` "Contains quoted escaped backslash" 171 | , valid ("1234@" <> domain249) 172 | `why` "Maximum length is 254, this is 254 exactly" 173 | , valid ("1234@" <> domain249 <> ".") 174 | `why` "Trailing dot doesn't increase length" 175 | , invalid ("12345@" <> domain249) 176 | `why` "Maximum length is 254, this is 255" 177 | `errorShouldContain` "too long" 178 | , valid "first.last@[12.34.56.78]" `why` "IP address" 179 | , valid "first.last@[IPv6:::12.34.56.78]" `why` "IPv6 address" 180 | , valid "first.last@[IPv6:1111:2222:3333::4444:12.34.56.78]" 181 | , valid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:12.34.56.78]" 182 | , valid "first.last@[IPv6:::1111:2222:3333:4444:5555:6666]" 183 | , valid "first.last@[IPv6:1111:2222:3333::4444:5555:6666]" 184 | , valid "first.last@[IPv6:1111:2222:3333:4444:5555:6666::]" 185 | , valid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]" 186 | , valid "first.last@x23456789012345678901234567890123456789012345678901234567890123.example.com" 187 | , valid "first.last@1xample.com" 188 | , valid "first.last@123.example.com" 189 | , invalid "first.last" `why` "no at sign" `errorShouldContain` "at sign" 190 | , invalid ".first.last@example.com" `why` "Local part starts with a dot" 191 | , invalid "first.last.@example.com" `why` "Local part ends with a dot" 192 | , invalid "first..last@example.com" `why` "Local part has consecutive dots" 193 | , invalid "\"first\"last\"@example.com" `why` "Local part contains unescaped excluded characters" 194 | , valid "\"first\\last\"@example.com" `why` "Any character can be escaped in a quoted string" 195 | , invalid "\"\"\"@example.com" `why` "Local part contains unescaped excluded characters" 196 | , invalid "\"\\\"@example.com" `why` "Local part cannot end with a backslash" 197 | , invalid "first\\\\@last@example.com" `why` "Local part contains unescaped excluded characters" 198 | , invalid "first.last@" `why` "No domain" 199 | , valid "\"Abc\\@def\"@example.com" 200 | , valid "\"Fred\\ Bloggs\"@example.com" 201 | , valid "\"Joe.\\\\Blow\"@example.com" 202 | , valid "\"Abc@def\"@example.com" 203 | , valid "\"Fred Bloggs\"@example.com" 204 | , valid "user+mailbox@example.com" 205 | , valid "customer/department=shipping@example.com" 206 | , valid "$A12345@example.com" 207 | , valid "!def!xyz%abc@example.com" 208 | , valid "_somename@example.com" 209 | , valid "dclo@us.ibm.com" 210 | , invalid "abc\\@def@example.com" `why` "This example from RFC3696 was corrected in an erratum" 211 | , invalid "abc\\\\@example.com" `why` "This example from RFC3696 was corrected in an erratum" 212 | , valid "peter.piper@example.com" 213 | , invalid "Doug\\ \\\"Ace\\\"\\ Lovell@example.com" `why` "Escaping can only happen in a quoted string" 214 | , valid "\"Doug \\\"Ace\\\" L.\"@example.com" 215 | , invalid "abc@def@example.com" `why` "Doug Lovell says this should fail" 216 | , invalid "abc\\\\@def@example.com" `why` "Doug Lovell says this should fail" 217 | , invalid "abc\\@example.com" `why` "Doug Lovell says this should fail" 218 | , invalid "@example.com" `why` "no local part" 219 | , invalid "doug@" `why` "no domain part" 220 | , invalid "\"qu@example.com" `why` "Doug Lovell says this should fail" 221 | , invalid "ote\"@example.com" `why` "Doug Lovell says this should fail" 222 | , invalid ".dot@example.com" `why` "Doug Lovell says this should fail" 223 | , invalid "dot.@example.com" `why` "Doug Lovell says this should fail" 224 | , invalid "two..dot@example.com" `why` "Doug Lovell says this should fail" 225 | , invalid "\"Doug \"Ace\" L.\"@example.com" `why` "Doug Lovell says this should fail" 226 | , invalid "Doug\\ \\\"Ace\\\"\\ L\\.@example.com" `why` "Doug Lovell says this should fail" 227 | , invalid "hello world@example.com" `why` "Doug Lovell says this should fail" 228 | , valid "gatsby@f.sc.ot.t.f.i.tzg.era.l.d." 229 | , valid "test@example.com" 230 | , valid "TEST@example.com" 231 | , valid "1234567890@example.com" 232 | , valid "test+test@example.com" 233 | , valid "test-test@example.com" 234 | , valid "t*est@example.com" 235 | , valid "+1~1+@example.com" 236 | , valid "{_test_}@example.com" 237 | , valid "\"[[ test ]]\"@example.com" 238 | , valid "test.test@example.com" 239 | , valid "\"test.test\"@example.com" 240 | , valid "test.\"test\"@example.com" `why` "Obsolete form, but documented in RFC2822" 241 | , valid "\"test@test\"@example.com" 242 | , valid "test@123.123.123.x123" 243 | , valid "test@[123.123.123.123]" 244 | , valid "test@example.example.com" 245 | , valid "test@example.example.example.com" 246 | , invalid "test.example.com" 247 | , invalid "test.@example.com" 248 | , invalid "test..test@example.com" 249 | , invalid ".test@example.com" 250 | , invalid "test@test@example.com" 251 | , invalid "test@@example.com" 252 | , invalid "-- test --@example.com" `why` "No spaces allowed in local part" 253 | , invalid "[test]@example.com" `why` "Square brackets only allowed within quotes" 254 | , valid "\"test\\test\"@example.com" `why` "Any character can be escaped in a quoted string" 255 | , invalid "\"test\"test\"@example.com" `why` "Quotes cannot be nested" 256 | , invalid "()[]\\;:,><@example.com" `why` "Disallowed Characters" 257 | , invalid "test@." `why` "Dave Child says so" 258 | , valid "test@example." 259 | , invalid "test@.org" `why` "Dave Child says so" 260 | , invalid "test@[123.123.123.123" `why` "Dave Child says so" 261 | , invalid "test@123.123.123.123]" `why` "Dave Child says so" 262 | , invalid "NotAnEmail" `why` "Phil Haack says so" 263 | , invalid "@NotAnEmail" `why` "Phil Haack says so" 264 | , valid "\"test\\\\blah\"@example.com" 265 | , valid "\"test\\blah\"@example.com" `why` "Any character can be escaped in a quoted string" 266 | , valid "\"test\\\rblah\"@example.com" `why` "Quoted string specifically excludes carriage returns unless escaped" 267 | , invalid "\"test\rblah\"@example.com" `why` "Quoted string specifically excludes carriage returns" 268 | , valid "\"test\\\"blah\"@example.com" 269 | , invalid "\"test\"blah\"@example.com" `why` "Phil Haack says so" 270 | , valid "customer/department@example.com" 271 | , valid "_Yosemite.Sam@example.com" 272 | , valid "~@example.com" 273 | , invalid ".wooly@example.com" `why` "Phil Haack says so" 274 | , invalid "wo..oly@example.com" `why` "Phil Haack says so" 275 | , invalid "pootietang.@example.com" `why` "Phil Haack says so" 276 | , invalid ".@example.com" `why` "Phil Haack says so" 277 | , valid "\"Austin@Powers\"@example.com" 278 | , valid "Ima.Fool@example.com" 279 | , valid "\"Ima.Fool\"@example.com" 280 | , valid "\"Ima Fool\"@example.com" 281 | , invalid "Ima Fool@example.com" `why` "Phil Haack says so" 282 | , invalid "phil.h\\@\\@ck@haacked.com" `why` "Escaping can only happen in a quoted string" 283 | , valid "\"first\".\"last\"@example.com" 284 | , valid "\"first\".middle.\"last\"@example.com" 285 | , invalid "\"first\\\\\"last\"@example.com" `why` "Contains an unescaped quote" 286 | , valid "\"first\".last@example.com" `why` "obs-local-part form as described in RFC 2822" 287 | , valid "first.\"last\"@example.com" `why` "obs-local-part form as described in RFC 2822" 288 | , valid "\"first\".\"middle\".\"last\"@example.com" `why` "obs-local-part form as described in RFC 2822" 289 | , valid "\"first.middle\".\"last\"@example.com" `why` "obs-local-part form as described in RFC 2822" 290 | , valid "\"first.middle.last\"@example.com" `why` "obs-local-part form as described in RFC 2822" 291 | , valid "\"first..last\"@example.com" `why` "obs-local-part form as described in RFC 2822" 292 | , invalid "foo@[\\1.2.3.4]" `why` "RFC 5321 specifies the syntax for address-literal and does not allow escaping" 293 | , valid "\"first\\\\\\\"last\"@example.com" 294 | , valid "first.\"mid\\dle\".\"last\"@example.com" `why` "Backslash can escape anything but must escape something" 295 | , valid "Test.\r\n Folding.\r\n Whitespace@example.com" 296 | , invalid "first\\last@example.com" `why` "Unquoted string must be an atom" 297 | , invalid "Abc\\@def@example.com" `why` "Was incorrectly given as a valid address in the original RFC3696" 298 | , invalid "Fred\\ Bloggs@example.com" `why` "Was incorrectly given as a valid address in the original RFC3696" 299 | , invalid "Joe.\\\\Blow@example.com" `why` "Was incorrectly given as a valid address in the original RFC3696" 300 | , invalid "\"test\\\r\n blah\"@example.com" `why` "Folding white space can\'t appear within a quoted pair" 301 | , valid "\"test\r\n blah\"@example.com" `why` "This is a valid quoted string with folding white space" 302 | , invalid "{^c\\@**Dog^}@cartoon.com" `why` "This is a throwaway example from Doug Lovell\'s article. Actually it\'s not a valid address." 303 | , valid "(foo)cal(bar)@(baz)iamcal.com(quux)" `why` "A valid address containing comments" 304 | , valid "cal@iamcal(woo).(yay)com" `why` "A valid address containing comments" 305 | , valid "cal(woo(yay)hoopla)@iamcal.com" `why` "A valid address containing comments" 306 | , valid "cal(foo\\@bar)@iamcal.com" `why` "A valid address containing comments" 307 | , valid "cal(foo\\)bar)@iamcal.com" `why` "A valid address containing comments and an escaped parenthesis" 308 | , invalid "cal(foo(bar)@iamcal.com" `why` "Unclosed parenthesis in comment" 309 | , invalid "cal(foo)bar)@iamcal.com" `why` "Too many closing parentheses" 310 | , invalid "cal(foo\\)@iamcal.com" `why` "Backslash at end of comment has nothing to escape" 311 | , valid "first().last@example.com" `why` "A valid address containing an empty comment" 312 | , valid "first.(\r\n middle\r\n )last@example.com" `why` "Comment with folding white space" 313 | , invalid "first(12345678901234567890123456789012345678901234567890)last@(1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890)example.com" `why` "Too long with comments, not too long without" 314 | , valid "first(Welcome to\r\n the (\"wonderful\" (!)) world\r\n of email)@example.com" `why` "Silly example from my blog post" 315 | , valid "pete(his account)@silly.test(his host)" `why` "Canonical example from RFC5322" 316 | , valid "c@(Chris\'s host.)public.example" `why` "Canonical example from RFC5322" 317 | , valid "jdoe@machine(comment). example" `why` "Canonical example from RFC5322" 318 | , valid "1234 @ local(blah) .machine .example" `why` "Canonical example from RFC5322" 319 | , invalid "first(middle)last@example.com" `why` "Can\'t have a comment or white space except at an element boundary" 320 | , valid "first(abc.def).last@example.com" `why` "Comment can contain a dot" 321 | , valid "first(a\"bc.def).last@example.com" `why` "Comment can contain double quote" 322 | , valid "first.(\")middle.last(\")@example.com" `why` "Comment can contain a quote" 323 | , invalid "first(abc(\"def\".ghi).mno)middle(abc(\"def\".ghi).mno).last@(abc(\"def\".ghi).mno)example(abc(\"def\".ghi).mno).(abc(\"def\".ghi).mno)com(abc(\"def\".ghi).mno)" `why` "Can\'t have comments or white space except at an element boundary" 324 | , valid "first(abc\\(def)@example.com" `why` "Comment can contain quoted-pair" 325 | , valid "first.last@x(1234567890123456789012345678901234567890123456789012345678901234567890).com" `why` "Label is longer than 63 octets, but not with comment removed" 326 | , valid "a(a(b(c)d(e(f))g)h(i)j)@example.com" 327 | , invalid "a(a(b(c)d(e(f))g)(h(i)j)@example.com" `why` "Braces are not properly matched" 328 | , valid "name.lastname@domain.com" 329 | , invalid ".@" 330 | , invalid "@bar.com" 331 | , invalid "@@bar.com" 332 | , valid "a@bar.com" 333 | , invalid "aaa.com" 334 | , invalid "aaa@.com" 335 | , invalid "aaa@.123" 336 | , valid "aaa@[123.123.123.123]" 337 | , invalid "aaa@[123.123.123.123]a" `why` "extra data outside ip" 338 | , valid "a@bar.com." 339 | , valid "a-b@bar.com" 340 | , valid "+@b.c" `why` "TLDs can be any length" 341 | , valid "+@b.com" 342 | , invalid "-@..com" 343 | , invalid "-@a..com" 344 | , valid "a@b.co-foo.uk" 345 | , valid "\"hello my name is\"@stutter.com" 346 | , valid "\"Test \\\"Fail\\\" Ing\"@example.com" 347 | , valid "valid@special.museum" 348 | , valid "shaitan@my-domain.thisisminekthx" `why` "Disagree with Paul Gregg here" 349 | , invalid "test@...........com" `why` "......" 350 | , valid "\"Joe\\\\Blow\"@example.com" 351 | , invalid "Invalid \\\n Folding \\\n Whitespace@example.com" `why` "This isn\'t FWS so Dominic Sayers says it\'s invalid" 352 | , valid "HM2Kinsists@(that comments are allowed)this.is.ok" 353 | , valid "user%uucp!path@somehost.edu" 354 | , valid "\"first(last)\"@example.com" 355 | , valid " \r\n (\r\n x \r\n ) \r\n first\r\n ( \r\n x\r\n ) \r\n .\r\n ( \r\n x) \r\n last \r\n ( x \r\n ) \r\n @example.com" 356 | , valid "test.\r\n \r\n obs@syntax.com" `why` "obs-fws allows multiple lines" 357 | , valid "test. \r\n \r\n obs@syntax.com" `why` "obs-fws allows multiple lines (test 2: space before break)" 358 | , invalid "test.\r\n\r\n obs@syntax.com" `why` "obs-fws must have at least one WSP per line" 359 | , valid "\"null \\\0\"@char.com" `why` "can have escaped null character" 360 | , invalid "\"null \0\"@char.com" `why` "cannot have unescaped null character" 361 | -- items below here are invalid according to other RFCs (or opinions) 362 | --, invalid "\"\"@example.com" `why` "Local part is effectively empty" 363 | --, invalid "foobar@192.168.0.1" `why` "ip need to be []" 364 | --, invalid "first.last@[.12.34.56.78]" `why` "Only char that can precede IPv4 address is \':\'" 365 | --, invalid "first.last@[12.34.56.789]" `why` "Can\'t be interpreted as IPv4 so IPv6 tag is missing" 366 | --, invalid "first.last@[::12.34.56.78]" `why` "IPv6 tag is missing" 367 | --, invalid "first.last@[IPv5:::12.34.56.78]" `why` "IPv6 tag is wrong" 368 | --, invalid "first.last@[IPv6:1111:2222:3333::4444:5555:12.34.56.78]" `why` "Too many IPv6 groups (4 max)" 369 | --, invalid "first.last@[IPv6:1111:2222:3333:4444:5555:12.34.56.78]" `why` "Not enough IPv6 groups" 370 | --, invalid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:12.34.56.78]" `why` "Too many IPv6 groups (6 max)" 371 | --, invalid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777]" `why` "Not enough IPv6 groups" 372 | --, invalid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]" `why` "Too many IPv6 groups (8 max)" 373 | --, invalid "first.last@[IPv6:1111:2222::3333::4444:5555:6666]" `why` "Too many \'::\' (can be none or one)" 374 | --, invalid "first.last@[IPv6:1111:2222:3333::4444:5555:6666:7777]" `why` "Too many IPv6 groups (6 max)" 375 | --, invalid "first.last@[IPv6:1111:2222:333x::4444:5555]" `why` "x is not valid in an IPv6 address" 376 | --, invalid "first.last@[IPv6:1111:2222:33333::4444:5555]" `why` "33333 is not a valid group in an IPv6 address" 377 | --, invalid "first.last@example.123" `why` "TLD can\'t be all digits" 378 | --, invalid "aaa@[123.123.123.333]" `why` "not a valid IP" 379 | --, invalid "first.last@[IPv6:1111:2222:3333:4444:5555:6666:12.34.567.89]" `why` "IPv4 part contains an invalid octet" 380 | , valid "a@b" 381 | , valid "a@bar" 382 | , invalid "invalid@special.museum-" `why` "domain can't end with hyphen" 383 | , invalid "a@-b.com" `why` "domain can't start with hyphen" 384 | , invalid "a@b-.com" `why` "domain label can't end with hyphen" 385 | --, invalid "\"foo\"(yay)@(hoopla)[1.2.3.4]" `why` "Address literal can\'t be commented (RFC5321)" 386 | --, invalid "first.\"\".last@example.com" `why` "Contains a zero-length element" 387 | --, invalid "test@example" `why` "Dave Child says so" 388 | , invalid (BS.replicate 65 'x' <> "@x") `why` "local-part longer than 64 octets" `errorShouldContain` "too long" 389 | , invalid "x@x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456789.x23456" `why` "Domain exceeds 255 chars" 390 | , invalid "test@123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012.com" `why` "255 characters is maximum length for domain. This is 256." 391 | , invalid "123456789012345678901234567890123456789012345678901234567890@12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.1234.example.com" `why` "Entire address is longer than 254 characters (this is 257)" 392 | , invalid "123456789012345678901234567890123456789012345678901234567890@12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.123.example.com" `why` "Entire address is longer than 254 characters (this is 256)" 393 | , invalid "123456789012345678901234567890123456789012345678901234567890@12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.12.example.com" `why` "Entire address is longer than 254 characters (this is 255)" 394 | , valid "123456789012345678901234567890123456789012345678901234567890@12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.12345678901234567890123456789012345678901234567890123456789.1.example.com" `why` "Entire address is 254 characters" 395 | --, invalid "test@123.123.123.123" `why` "Top Level Domain won\'t be all-numeric (see RFC3696 Section 2). I disagree with Dave Child on this one." 396 | , invalid "first.last@x234567890123456789012345678901234567890123456789012345678901234.example.com" `why` "Label can\'t be longer than 63 octets" 397 | --, invalid "first.last@com" `why` "Mail host must be second- or lower level" 398 | , invalid "first.last@e.-xample.com" `why` "Label can\'t begin with a hyphen" 399 | , invalid "first.last@exampl-.e.com" `why` "Label can\'t end with a hyphen" 400 | ] 401 | --------------------------------------------------------------------------------