├── .gitignore
├── CONTRIBUTING.md
├── COPYRIGHT
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
└── src
    └── lib.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | If you send a pull request / patch, please observe the following.
 2 | 
 3 | ## Licensing
 4 | 
 5 | Since this crate is dual-licensed,
 6 | [section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
 7 | is considered to apply in the sense of Contributions being automatically
 8 | under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
 9 | That is, by the act of offering a Contribution, you place your Contribution
10 | under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
11 | file. Please do not contribute if you aren't willing or allowed to license your
12 | contributions in this manner.
13 | 
14 | You are encouraged to dedicate test code that you contribute to the Public
15 | Domain using the CC0 dedication. If you contribute test code that is not
16 | dedicated to the Public Domain, please be sure not to put it in a part of
17 | source code that the comments designate as being dedicated to the Public
18 | Domain.
19 | 
20 | ## Copyright Notices
21 | 
22 | If you require the addition of your copyright notice, it's up to you to edit in
23 | your notice as part of your Contribution. Not adding a copyright notice is
24 | taken as a waiver of copyright notice.
25 | 
26 | ## Compatibility with Stable Rust
27 | 
28 | Please ensure that your Contribution compiles with the latest stable-channel
29 | rustc.
30 | 
31 | ## rustfmt
32 | 
33 | The `rustfmt` version used for this code is `rustfmt-nightly`. Please either
34 | use that version or avoid using `rustfmt` (so as not to reformat all the code).
35 | 
36 | ## Unit tests
37 | 
38 | Please ensure that `cargo test` succeeds.
39 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | charset is copyright 2013-2016 Mozilla Foundation.
 2 | 
 3 | Licensed under the Apache License, Version 2.0
 4 | <LICENSE-APACHE or
 5 | https://www.apache.org/licenses/LICENSE-2.0> or the MIT
 6 | license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
 7 | at your option. All files in the project carrying such
 8 | notice may not be copied, modified, or distributed except
 9 | according to those terms.
10 | 
11 | Test code within encoding_rs is dedicated to the Public Domain when so
12 | designated (see the individual files for PD/CC0-dedicated sections).
13 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "charset"
 3 | description = "Character encoding decoding for email"
 4 | version = "0.1.5"
 5 | authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
 6 | edition = "2018"
 7 | license = "Apache-2.0 OR MIT"
 8 | readme = "README.md"
 9 | documentation = "https://docs.rs/charset/"
10 | homepage = "https://docs.rs/charset/"
11 | repository = "https://github.com/hsivonen/charset"
12 | keywords = ["encoding", "email", "unicode", "charset", "utf-7"]
13 | categories = ["text-processing", "encoding", "email"]
14 | rust-version = "1.47.0"
15 | 
16 | [dependencies]
17 | encoding_rs = "0.8.34"
18 | base64 = { version = "0.22.1", default-features = false }
19 | serde = { version = "1.0", optional = true }
20 | 
21 | [dev-dependencies]
22 | serde_derive = "1.0"
23 | bincode = "1.3.3"
24 | serde_json = "1.0"
25 | 
26 | [badges.maintenance]
27 | status = "passively-maintained"
28 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright Mozilla Foundation
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # charset
  2 | 
  3 | [![crates.io](https://img.shields.io/crates/v/charset.svg)](https://crates.io/crates/charset)
  4 | [![docs.rs](https://docs.rs/charset/badge.svg)](https://docs.rs/charset/)
  5 | [![Apache-2.0 OR MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/charset/blob/master/COPYRIGHT)
  6 | 
  7 | `charset` is a wrapper around [`encoding_rs`][1] that provides
  8 | (non-streaming) decoding for character encodings that occur in _email_ by
  9 | providing decoding for [UTF-7][2] in addition to the encodings defined by
 10 | the [Encoding Standard][3] (and provided by `encoding_rs`).
 11 | 
 12 | _Note:_ Do _not_ use this crate for consuming _Web_ content. For security
 13 | reasons, consumers of Web content are [_prohibited_][4] from supporting
 14 | UTF-7. Use `encoding_rs` directly when consuming Web content.
 15 | 
 16 | The set of encodings consisting of UTF-7 and the encodings defined in the
 17 | Encoding Standard is believed to be appropriate for consuming email,
 18 | because that's the set of encodings supported by [Thunderbird][5].
 19 | Furthermore, UTF-7 support is believed to be necessary based on the
 20 | experience of the Firefox OS email client. In fact, while the UTF-7
 21 | implementation in this crate is independent of Thunderbird's UTF-7
 22 | implementation, Thunderbird uses `encoding_rs` to decode the other
 23 | encodings. In addition to the labels defined in the Encoding Standard,
 24 | this crate recognizes additional `java.io` and `java.nio` names for
 25 | compatibility with JavaMail. For UTF-7, IANA and Netscape 4.0 labels
 26 | are recognized.
 27 | 
 28 | Known compatibility limitations (known from Thunderbird bug reports):
 29 | 
 30 |  * Some ancient Usenet posting in Chinese may not be decodable, because
 31 |    this crate does not support HZ.
 32 |  * Some emails sent in Chinese by Sun's email client for CDE on Solaris
 33 |    around the turn of the millennium may not decodable, because this
 34 |    crate does not support ISO-2022-CN.
 35 |  * Some emails sent in Korean by IBM/Lotus Notes may not be decodable,
 36 |    because this crate does not support ISO-2022-KR.
 37 | 
 38 | This crate intentionally does not support encoding content into legacy
 39 | encodings. When sending email, _always_ use UTF-8. This is, just call
 40 | `.as_bytes()` on `&str` and label the content as `UTF-8`.
 41 | 
 42 | [1]: https://crates.io/crates/encoding_rs/
 43 | [2]: https://tools.ietf.org/html/rfc2152
 44 | [3]: https://encoding.spec.whatwg.org/
 45 | [4]: https://html.spec.whatwg.org/#character-encodings
 46 | [5]: https://thunderbird.net/
 47 | 
 48 | ## Version 1.0
 49 | 
 50 | Logically this crate should be at version 1.0, but it's not worth the hassle
 51 | to do a version number semver break when there's no actual API break. The
 52 | expectation is to do 1.0 when `encoding_rs` 1.0 comes along.
 53 | 
 54 | ## Licensing
 55 | 
 56 | Apache-2.0 OR MIT; please see the file named
 57 | [COPYRIGHT](https://github.com/hsivonen/charset/blob/master/COPYRIGHT).
 58 | 
 59 | ## API Documentation
 60 | 
 61 | Generated [API documentation](https://docs.rs/charset/) is available
 62 | online.
 63 | 
 64 | ## Security Considerations
 65 | 
 66 | Again, this crate is for _email_. Please do _NOT_ use it for _Web_
 67 | content.
 68 | 
 69 | Never try to perform any security analysis on the undecoded data in
 70 | ASCII-incompatible encodings and in UTF-7 in particular. Always decode
 71 | first and analyze after. UTF-7 allows even characters that don't have to
 72 | be represented as base64 to be represented as base64. Also, for consistency
 73 | with Thunderbird, the UTF-7 decoder in this crate allows e.g. ASCII
 74 | controls to be represented without base64 encoding even when the spec
 75 | says they should be base64-encoded.
 76 | 
 77 | This implementation is non-constant-time by design. An attacker who
 78 | can observe input length and the time it takes to decode it can make
 79 | guesses about relative proportions of characters from different ranges.
 80 | Guessing the proportion of ASCII vs. non-ASCII should be particularly
 81 | feasible.
 82 | 
 83 | ## Serde support
 84 | 
 85 | The cargo features `serde` enables Serde support for `Charset`.
 86 | 
 87 | ## Minimum Rust Version
 88 | 
 89 | The MSRV depends on the `encoding_rs` and `base64` dependencies; not on this
 90 | crate. The current MSRV appears to be 1.47.0. This crate does not undergo
 91 | semver bumps for `base64` semver bumps.
 92 | 
 93 | ## Disclaimer
 94 | 
 95 | This is a personal project. It has a Mozilla copyright notice, because
 96 | I copied and pasted from encoding_rs. You should not try to read anything
 97 | more into Mozilla's name appearing.
 98 | 
 99 | ## Release Notes
100 | 
101 | ### 0.1.5
102 | 
103 | * Update `bincode` (dev dependency only) to 1.3.3.
104 | 
105 | ### 0.1.4
106 | 
107 | * Update `base64` to 0.22.1.
108 | * Update `encoding_rs` to 0.8.34.
109 | * This crate is now a `no_std` + `alloc` crate.
110 | * Added support for java.io and java.nio names to accommodate JavaMail:
111 |   - ISO-8859-N series in the form iso8859_N, except 10, 11, 14 and 16 (no evidence of existing in JavaMail) and 8 (unclear if visual or logical in JavaMail if even actually sent by JavaMail).
112 |   - CJK and Thai Windows code page numbers prefixed with ms (and 950 also suffixed with _hkscs).
113 |   - EUC variants (including CN, i.e. GBK) and KOI with underscore: euc_jp, euc_kr, euc_cn, koi8_r, and koi8_u.
114 |   - Windows code page numbers 874, 949, 950 prefixed with x-windows-.
115 |   - tis620 and iso2022jp without hyphens. 
116 | * Added IANA and Netscape 4.0 aliases for UTF-7.
117 | 
118 | ### 0.1.3
119 | 
120 | * Update `base64` to 0.13.0.
121 | 
122 | ### 0.1.2
123 | 
124 | * Implemented `From<&'static Encoding>` for `Charset`.
125 | * Added optional Serde support.
126 | 
127 | ### 0.1.1
128 | 
129 | * Added `decode_ascii()`.
130 | * Added `decode_latin1()`.
131 | 
132 | ### 0.1.0
133 | 
134 | Initial release.


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | // Copyright Mozilla Foundation. See the COPYRIGHT
   2 | // file at the top-level directory of this distribution.
   3 | //
   4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
   7 | // option. This file may not be copied, modified, or distributed
   8 | // except according to those terms.
   9 | 
  10 | //! `charset` is a wrapper around [`encoding_rs`][1] that provides
  11 | //! (non-streaming) decoding for character encodings that occur in _email_ by
  12 | //! providing decoding for [UTF-7][2] in addition to the encodings defined by
  13 | //! the [Encoding Standard][3] (and provided by `encoding_rs`).
  14 | //!
  15 | //! _Note:_ Do _not_ use this crate for consuming _Web_ content. For security
  16 | //! reasons, consumers of Web content are [_prohibited_][4] from supporting
  17 | //! UTF-7. Use `encoding_rs` directly when consuming Web content.
  18 | //!
  19 | //! The set of encodings consisting of UTF-7 and the encodings defined in the
  20 | //! Encoding Standard is believed to be appropriate for consuming email,
  21 | //! because that's the set of encodings supported by [Thunderbird][5].
  22 | //! Furthermore, UTF-7 support is believed to be necessary based on the
  23 | //! experience of the Firefox OS email client. In fact, while the UTF-7
  24 | //! implementation in this crate is independent of Thunderbird's UTF-7
  25 | //! implementation, Thunderbird uses `encoding_rs` to decode the other
  26 | //! encodings. In addition to the labels defined in the Encoding Standard,
  27 | //! this crate recognizes additional `java.io` and `java.nio` names for
  28 | //! compatibility with JavaMail. For UTF-7, IANA and Netscape 4.0 labels
  29 | //! are recognized.
  30 | //!
  31 | //! Known compatibility limitations (known from Thunderbird bug reports):
  32 | //!
  33 | //!  * Some ancient Usenet posting in Chinese may not be decodable, because
  34 | //!    this crate does not support HZ.
  35 | //!  * Some emails sent in Chinese by Sun's email client for CDE on Solaris
  36 | //!    around the turn of the millennium may not decodable, because this
  37 | //!    crate does not support ISO-2022-CN.
  38 | //!  * Some emails sent in Korean by IBM/Lotus Notes may not be decodable,
  39 | //!    because this crate does not support ISO-2022-KR.
  40 | //!
  41 | //! This crate intentionally does not support encoding content into legacy
  42 | //! encodings. When sending email, _always_ use UTF-8. This is, just call
  43 | //! `.as_bytes()` on `&str` and label the content as `UTF-8`.
  44 | //!
  45 | //! [1]: https://crates.io/crates/encoding_rs/
  46 | //! [2]: https://tools.ietf.org/html/rfc2152
  47 | //! [3]: https://encoding.spec.whatwg.org/
  48 | //! [4]: https://html.spec.whatwg.org/#character-encodings
  49 | //! [5]: https://thunderbird.net/
  50 | //!
  51 | //! # Security considerations
  52 | //!
  53 | //! Again, this crate is for _email_. Please do _NOT_ use it for _Web_
  54 | //! content.
  55 | //!
  56 | //! Never try to perform any security analysis on the undecoded data in
  57 | //! ASCII-incompatible encodings and in UTF-7 in particular. Always decode
  58 | //! first and analyze after. UTF-7 allows even characters that don't have to
  59 | //! be represeted as base64 to be represented as base64. Also, for consistency
  60 | //! with Thunderbird, the UTF-7 decoder in this crate allows e.g. ASCII
  61 | //! controls to be represented without base64 encoding even when the spec
  62 | //! says they should be base64-encoded.
  63 | //!
  64 | //! This implementation is non-constant-time by design. An attacker who
  65 | //! can observe input length and the time it takes to decode it can make
  66 | //! guesses about relative proportions of characters from different ranges.
  67 | //! Guessing the proportion of ASCII vs. non-ASCII should be particularly
  68 | //! feasible.
  69 | 
  70 | #![no_std]
  71 | 
  72 | #[cfg_attr(feature = "serde", macro_use)]
  73 | extern crate alloc;
  74 | extern crate base64;
  75 | extern crate encoding_rs;
  76 | 
  77 | #[cfg(feature = "serde")]
  78 | extern crate serde;
  79 | 
  80 | #[cfg(all(test, feature = "serde"))]
  81 | extern crate bincode;
  82 | #[cfg(all(test, feature = "serde"))]
  83 | #[macro_use]
  84 | extern crate serde_derive;
  85 | #[cfg(all(test, feature = "serde"))]
  86 | extern crate serde_json;
  87 | 
  88 | use base64::engine::general_purpose::STANDARD_NO_PAD;
  89 | use base64::Engine;
  90 | use encoding_rs::CoderResult;
  91 | use encoding_rs::Encoding;
  92 | use encoding_rs::GB18030;
  93 | use encoding_rs::GBK;
  94 | use encoding_rs::UTF_16BE;
  95 | 
  96 | use alloc::borrow::Cow;
  97 | use alloc::string::String;
  98 | use alloc::vec::Vec;
  99 | 
 100 | use core::cmp::Ordering;
 101 | 
 102 | #[cfg(feature = "serde")]
 103 | use serde::de::Visitor;
 104 | #[cfg(feature = "serde")]
 105 | use serde::{Deserialize, Deserializer, Serialize, Serializer};
 106 | 
 107 | /// The UTF-7 encoding.
 108 | pub const UTF_7: Charset = Charset {
 109 |     variant: VariantCharset::Utf7,
 110 | };
 111 | 
 112 | /// Converts bytes whose unsigned value is interpreted as Unicode code point
 113 | /// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
 114 | ///
 115 | /// This is useful for decoding non-conforming header names such that the
 116 | /// names stay unique and the decoding cannot fail (except for allocation
 117 | /// failure).
 118 | ///
 119 | /// Borrows if input is ASCII-only. Performs a single heap allocation
 120 | /// otherwise.
 121 | pub fn decode_latin1<'a>(bytes: &'a [u8]) -> Cow<'a, str> {
 122 |     encoding_rs::mem::decode_latin1(bytes)
 123 | }
 124 | 
 125 | /// Converts ASCII to UTF-8 with non-ASCII bytes replaced with the
 126 | /// REPLACEMENT CHARACTER.
 127 | ///
 128 | /// This is can be used for strict MIME compliance when there is no declared
 129 | /// encoding.
 130 | ///
 131 | /// Borrows if input is ASCII-only. Performs a single heap allocation
 132 | /// otherwise.
 133 | pub fn decode_ascii<'a>(bytes: &'a [u8]) -> Cow<'a, str> {
 134 |     let up_to = Encoding::ascii_valid_up_to(bytes);
 135 |     // >= makes later things optimize better than ==
 136 |     if up_to >= bytes.len() {
 137 |         debug_assert_eq!(up_to, bytes.len());
 138 |         let s: &str = unsafe { ::core::str::from_utf8_unchecked(bytes) };
 139 |         return Cow::Borrowed(s);
 140 |     }
 141 |     let (head, tail) = bytes.split_at(up_to);
 142 |     let capacity = head.len() + tail.len() * 3;
 143 |     let mut vec = Vec::with_capacity(capacity);
 144 |     vec.extend_from_slice(head);
 145 |     for &b in tail.into_iter() {
 146 |         if b < 0x80 {
 147 |             vec.push(b);
 148 |         } else {
 149 |             vec.extend_from_slice("\u{FFFD}".as_bytes());
 150 |         }
 151 |     }
 152 |     Cow::Owned(unsafe { String::from_utf8_unchecked(vec) })
 153 | }
 154 | 
 155 | /// A character encoding suitable for decoding _email_.
 156 | ///
 157 | /// This is either an encoding as defined in the [Encoding Standard][1]
 158 | /// or UTF-7 as defined in [RFC 2152][2].
 159 | ///
 160 | /// [1]: https://encoding.spec.whatwg.org/
 161 | /// [2]: https://tools.ietf.org/html/rfc2152
 162 | ///
 163 | /// Each `Charset` has one or more _labels_ that are used to identify
 164 | /// the `Charset` in protocol text. In MIME/IANA terminology, these are
 165 | /// called _names_ and _aliases_, but for consistency with the Encoding
 166 | /// Standard and the encoding_rs crate, they are called labels in this
 167 | /// crate. What this crate calls the _name_ (again, for consistency
 168 | /// with the Encoding Standard and the encoding_rs crate) is known as
 169 | /// _preferred name_ in MIME/IANA terminology.
 170 | ///
 171 | /// Instances of `Charset` can be compared with `==`. `Charset` is
 172 | /// `Copy` and is meant to be passed by value.
 173 | ///
 174 | /// _Note:_ It is wrong to use this for decoding Web content. Use
 175 | /// `encoding_rs::Encoding` instead!
 176 | #[derive(PartialEq, Debug, Copy, Clone, Hash)]
 177 | pub struct Charset {
 178 |     variant: VariantCharset,
 179 | }
 180 | 
 181 | impl Charset {
 182 |     /// Implements the
 183 |     /// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
 184 |     /// algorithm with the label "UTF-7" added to the set of labels recognized.
 185 |     /// GBK is unified with gb18030, since they decode the same and `Charset`
 186 |     /// only supports decoding.
 187 |     ///
 188 |     /// If, after ASCII-lowercasing and removing leading and trailing
 189 |     /// whitespace, the argument matches a label defined in the Encoding
 190 |     /// Standard or "utf-7", `Some(Charset)` representing the corresponding
 191 |     /// encoding is returned. If there is no match, `None` is returned.
 192 |     ///
 193 |     /// This is the right method to use if the action upon the method returning
 194 |     /// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`) instead.
 195 |     /// When the action upon the method returning `None` is not to proceed with
 196 |     /// a fallback but to refuse processing, `for_label_no_replacement()` is more
 197 |     /// appropriate.
 198 |     ///
 199 |     /// The argument is of type `&[u8]` instead of `&str` to save callers
 200 |     /// that are extracting the label from a non-UTF-8 protocol the trouble
 201 |     /// of conversion to UTF-8. (If you have a `&str`, just call `.as_bytes()`
 202 |     /// on it.)
 203 |     #[inline]
 204 |     pub fn for_label(label: &[u8]) -> Option<Charset> {
 205 |         if let Some(encoding) = Encoding::for_label(label) {
 206 |             Some(Charset::for_encoding(encoding))
 207 |         } else if let Some(variant_charset) = for_label_extended(label) {
 208 |             Some(Charset {
 209 |                 variant: variant_charset,
 210 |             })
 211 |         } else {
 212 |             None
 213 |         }
 214 |     }
 215 | 
 216 |     /// This method behaves the same as `for_label()`, except when `for_label()`
 217 |     /// would return `Some(Charset::for_encoding(encoding_rs::REPLACEMENT))`,
 218 |     /// this method returns `None` instead.
 219 |     ///
 220 |     /// This method is useful in scenarios where a fatal error is required
 221 |     /// upon invalid label, because in those cases the caller typically wishes
 222 |     /// to treat the labels that map to the replacement encoding as fatal
 223 |     /// errors, too.
 224 |     ///
 225 |     /// It is not OK to use this method when the action upon the method returning
 226 |     /// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`) with `text/html`
 227 |     /// email. In such a case, the `for_label()` method should be used instead in
 228 |     /// order to avoid unsafe fallback for labels that `for_label()` maps to
 229 |     /// `Some(REPLACEMENT)`. Such fallback might be safe, though not particularly
 230 |     /// useful for `text/plain` email, though.
 231 |     #[inline]
 232 |     pub fn for_label_no_replacement(label: &[u8]) -> Option<Charset> {
 233 |         if let Some(encoding) = Encoding::for_label_no_replacement(label) {
 234 |             Some(Charset::for_encoding(encoding))
 235 |         } else if let Some(variant_charset) = for_label_extended(label) {
 236 |             Some(Charset {
 237 |                 variant: variant_charset,
 238 |             })
 239 |         } else {
 240 |             None
 241 |         }
 242 |     }
 243 | 
 244 |     /// Returns the `Charset` corresponding to an `&'static Encoding`.
 245 |     ///
 246 |     /// `GBK` is unified with `GB18030`, since those two decode the same
 247 |     /// and `Charset` only supports decoding.
 248 |     #[inline]
 249 |     pub fn for_encoding(encoding: &'static Encoding) -> Charset {
 250 |         let enc = if encoding == GBK { GB18030 } else { encoding };
 251 |         Charset {
 252 |             variant: VariantCharset::Encoding(enc),
 253 |         }
 254 |     }
 255 | 
 256 |     /// Performs non-incremental BOM sniffing.
 257 |     ///
 258 |     /// The argument must either be a buffer representing the entire input
 259 |     /// stream (non-streaming case) or a buffer representing at least the first
 260 |     /// three bytes of the input stream (streaming case).
 261 |     ///
 262 |     /// Returns `Some((Charset::for_encoding(encoding_rs::UTF_8), 3))`,
 263 |     /// `Some((Charset::for_encoding(encoding_rs::UTF_16LE), 2))` or
 264 |     /// `Some((Charset::for_encoding(encoding_rs::UTF_16BE), 2))` if the
 265 |     /// argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or `None`
 266 |     /// otherwise.
 267 |     #[inline]
 268 |     pub fn for_bom(buffer: &[u8]) -> Option<(Charset, usize)> {
 269 |         if let Some((encoding, length)) = Encoding::for_bom(buffer) {
 270 |             Some((Charset::for_encoding(encoding), length))
 271 |         } else {
 272 |             None
 273 |         }
 274 |     }
 275 | 
 276 |     /// Returns the name of this encoding.
 277 |     ///
 278 |     /// Mostly useful for debugging
 279 |     pub fn name(self) -> &'static str {
 280 |         match self.variant {
 281 |             VariantCharset::Encoding(encoding) => encoding.name(),
 282 |             VariantCharset::Utf7 => "UTF-7",
 283 |         }
 284 |     }
 285 | 
 286 |     /// Checks whether the bytes 0x00...0x7F map exclusively to the characters
 287 |     /// U+0000...U+007F and vice versa.
 288 |     #[inline]
 289 |     pub fn is_ascii_compatible(self) -> bool {
 290 |         match self.variant {
 291 |             VariantCharset::Encoding(encoding) => encoding.is_ascii_compatible(),
 292 |             VariantCharset::Utf7 => false,
 293 |         }
 294 |     }
 295 | 
 296 |     /// Decode complete input to `Cow<'a, str>` _with BOM sniffing_ and with
 297 |     /// malformed sequences replaced with the REPLACEMENT CHARACTER when the
 298 |     /// entire input is available as a single buffer (i.e. the end of the
 299 |     /// buffer marks the end of the stream).
 300 |     ///
 301 |     /// This method implements the (non-streaming version of) the
 302 |     /// [_decode_](https://encoding.spec.whatwg.org/#decode) spec concept.
 303 |     ///
 304 |     /// The second item in the returned tuple is the encoding that was actually
 305 |     /// used (which may differ from this encoding thanks to BOM sniffing).
 306 |     ///
 307 |     /// The third item in the returned tuple indicates whether there were
 308 |     /// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
 309 |     ///
 310 |     /// _Note:_ It is wrong to use this when the input buffer represents only
 311 |     /// a segment of the input instead of the whole input.
 312 |     ///
 313 |     /// # Panics
 314 |     ///
 315 |     /// If the size calculation for a heap-allocated backing buffer overflows
 316 |     /// `usize`.
 317 |     #[inline]
 318 |     pub fn decode<'a>(self, bytes: &'a [u8]) -> (Cow<'a, str>, Charset, bool) {
 319 |         let (charset, without_bom) = match Charset::for_bom(bytes) {
 320 |             Some((charset, bom_length)) => (charset, &bytes[bom_length..]),
 321 |             None => (self, bytes),
 322 |         };
 323 |         let (cow, had_errors) = charset.decode_without_bom_handling(without_bom);
 324 |         (cow, charset, had_errors)
 325 |     }
 326 | 
 327 |     /// Decode complete input to `Cow<'a, str>` _with BOM removal_ and with
 328 |     /// malformed sequences replaced with the REPLACEMENT CHARACTER when the
 329 |     /// entire input is available as a single buffer (i.e. the end of the
 330 |     /// buffer marks the end of the stream).
 331 |     ///
 332 |     /// When invoked on `UTF_8`, this method implements the (non-streaming
 333 |     /// version of) the
 334 |     /// [_UTF-8 decode_](https://encoding.spec.whatwg.org/#utf-8-decode) spec
 335 |     /// concept.
 336 |     ///
 337 |     /// The second item in the returned pair indicates whether there were
 338 |     /// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
 339 |     ///
 340 |     /// _Note:_ It is wrong to use this when the input buffer represents only
 341 |     /// a segment of the input instead of the whole input.
 342 |     ///
 343 |     /// # Panics
 344 |     ///
 345 |     /// If the size calculation for a heap-allocated backing buffer overflows
 346 |     /// `usize`.
 347 |     #[inline]
 348 |     pub fn decode_with_bom_removal<'a>(self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
 349 |         match self.variant {
 350 |             VariantCharset::Encoding(encoding) => encoding.decode_with_bom_removal(bytes),
 351 |             VariantCharset::Utf7 => decode_utf7(bytes),
 352 |         }
 353 |     }
 354 | 
 355 |     /// Decode complete input to `Cow<'a, str>` _without BOM handling_ and
 356 |     /// with malformed sequences replaced with the REPLACEMENT CHARACTER when
 357 |     /// the entire input is available as a single buffer (i.e. the end of the
 358 |     /// buffer marks the end of the stream).
 359 |     ///
 360 |     /// When invoked on `UTF_8`, this method implements the (non-streaming
 361 |     /// version of) the
 362 |     /// [_UTF-8 decode without BOM_](https://encoding.spec.whatwg.org/#utf-8-decode-without-bom)
 363 |     /// spec concept.
 364 |     ///
 365 |     /// The second item in the returned pair indicates whether there were
 366 |     /// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
 367 |     ///
 368 |     /// _Note:_ It is wrong to use this when the input buffer represents only
 369 |     /// a segment of the input instead of the whole input.
 370 |     ///
 371 |     /// # Panics
 372 |     ///
 373 |     /// If the size calculation for a heap-allocated backing buffer overflows
 374 |     /// `usize`.
 375 |     #[inline]
 376 |     pub fn decode_without_bom_handling<'a>(self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
 377 |         match self.variant {
 378 |             VariantCharset::Encoding(encoding) => encoding.decode_without_bom_handling(bytes),
 379 |             VariantCharset::Utf7 => decode_utf7(bytes),
 380 |         }
 381 |     }
 382 | }
 383 | 
 384 | impl From<&'static Encoding> for Charset {
 385 |     fn from(encoding: &'static Encoding) -> Self {
 386 |         Charset::for_encoding(encoding)
 387 |     }
 388 | }
 389 | 
 390 | #[cfg(feature = "serde")]
 391 | impl Serialize for Charset {
 392 |     #[inline]
 393 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 394 |     where
 395 |         S: Serializer,
 396 |     {
 397 |         serializer.serialize_str(self.name())
 398 |     }
 399 | }
 400 | 
 401 | #[cfg(feature = "serde")]
 402 | struct CharsetVisitor;
 403 | 
 404 | #[cfg(feature = "serde")]
 405 | impl<'de> Visitor<'de> for CharsetVisitor {
 406 |     type Value = Charset;
 407 | 
 408 |     fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
 409 |         formatter.write_str("a valid charset label")
 410 |     }
 411 | 
 412 |     fn visit_str<E>(self, value: &str) -> Result<Charset, E>
 413 |     where
 414 |         E: serde::de::Error,
 415 |     {
 416 |         if let Some(charset) = Charset::for_label(value.as_bytes()) {
 417 |             Ok(charset)
 418 |         } else {
 419 |             Err(E::custom(format!("invalid charset label: {}", value)))
 420 |         }
 421 |     }
 422 | }
 423 | 
 424 | #[cfg(feature = "serde")]
 425 | impl<'de> Deserialize<'de> for Charset {
 426 |     fn deserialize<D>(deserializer: D) -> Result<Charset, D::Error>
 427 |     where
 428 |         D: Deserializer<'de>,
 429 |     {
 430 |         deserializer.deserialize_str(CharsetVisitor)
 431 |     }
 432 | }
 433 | 
 434 | static LABELS_SORTED: [&'static str; 29] = [
 435 |     "ms950",
 436 |     "ms874",
 437 |     "ms936",
 438 |     "utf-7",
 439 |     "ms949",
 440 |     "tis620",
 441 |     "euc_cn",
 442 |     "euc_jp",
 443 |     "koi8_r",
 444 |     "euc_kr",
 445 |     "koi8_u",
 446 |     "iso8859_1",
 447 |     "iso8859_2",
 448 |     "iso8859_3",
 449 |     "iso8859_4",
 450 |     "iso8859_5",
 451 |     "iso8859_6",
 452 |     "iso8859_7",
 453 |     "iso8859_9",
 454 |     "iso2022jp",
 455 |     "iso8859_13",
 456 |     "iso8859_15",
 457 |     "ms950_hkscs",
 458 |     "x-windows-950",
 459 |     "x-windows-874",
 460 |     "x-windows-949",
 461 |     "csunicode11utf7",
 462 |     "unicode-1-1-utf-7",
 463 |     "x-unicode-2-0-utf-7",
 464 | ];
 465 | 
 466 | static ENCODINGS_IN_LABEL_SORT: [VariantCharset; 29] = [
 467 |     VariantCharset::Encoding(&encoding_rs::BIG5_INIT),
 468 |     VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 469 |     VariantCharset::Encoding(&encoding_rs::GB18030_INIT),
 470 |     VariantCharset::Utf7,
 471 |     VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT),
 472 |     VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 473 |     VariantCharset::Encoding(&encoding_rs::GB18030_INIT),
 474 |     VariantCharset::Encoding(&encoding_rs::EUC_JP_INIT),
 475 |     VariantCharset::Encoding(&encoding_rs::KOI8_R_INIT),
 476 |     VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT),
 477 |     VariantCharset::Encoding(&encoding_rs::KOI8_U_INIT),
 478 |     VariantCharset::Encoding(&encoding_rs::WINDOWS_1252_INIT),
 479 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_2_INIT),
 480 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_3_INIT),
 481 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_4_INIT),
 482 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_5_INIT),
 483 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_6_INIT),
 484 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_7_INIT),
 485 |     VariantCharset::Encoding(&encoding_rs::WINDOWS_1254_INIT),
 486 |     VariantCharset::Encoding(&encoding_rs::ISO_2022_JP_INIT),
 487 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_13_INIT),
 488 |     VariantCharset::Encoding(&encoding_rs::ISO_8859_15_INIT),
 489 |     VariantCharset::Encoding(&encoding_rs::BIG5_INIT),
 490 |     VariantCharset::Encoding(&encoding_rs::BIG5_INIT),
 491 |     VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 492 |     VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT),
 493 |     VariantCharset::Utf7,
 494 |     VariantCharset::Utf7,
 495 |     VariantCharset::Utf7,
 496 | ];
 497 | 
 498 | const LONGEST_LABEL_LENGTH: usize = 19; // x-unicode-2-0-utf-7
 499 | 
 500 | /// Copypaste from encoding_rs to search over the labels known to this
 501 | /// crate but not encoding_rs.
 502 | #[inline(never)]
 503 | fn for_label_extended(label: &[u8]) -> Option<VariantCharset> {
 504 |     let mut trimmed = [0u8; LONGEST_LABEL_LENGTH];
 505 |     let mut trimmed_pos = 0usize;
 506 |     let mut iter = label.into_iter();
 507 |     // before
 508 |     loop {
 509 |         match iter.next() {
 510 |             None => {
 511 |                 return None;
 512 |             }
 513 |             Some(byte) => {
 514 |                 // The characters used in labels are:
 515 |                 // a-z (except q, but excluding it below seems excessive)
 516 |                 // 0-9
 517 |                 // . _ - :
 518 |                 match *byte {
 519 |                     0x09u8 | 0x0Au8 | 0x0Cu8 | 0x0Du8 | 0x20u8 => {
 520 |                         continue;
 521 |                     }
 522 |                     b'A'..=b'Z' => {
 523 |                         trimmed[trimmed_pos] = *byte + 0x20u8;
 524 |                         trimmed_pos = 1usize;
 525 |                         break;
 526 |                     }
 527 |                     b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b':' | b'.' => {
 528 |                         trimmed[trimmed_pos] = *byte;
 529 |                         trimmed_pos = 1usize;
 530 |                         break;
 531 |                     }
 532 |                     _ => {
 533 |                         return None;
 534 |                     }
 535 |                 }
 536 |             }
 537 |         }
 538 |     }
 539 |     // inside
 540 |     loop {
 541 |         match iter.next() {
 542 |             None => {
 543 |                 break;
 544 |             }
 545 |             Some(byte) => {
 546 |                 match *byte {
 547 |                     0x09u8 | 0x0Au8 | 0x0Cu8 | 0x0Du8 | 0x20u8 => {
 548 |                         break;
 549 |                     }
 550 |                     b'A'..=b'Z' => {
 551 |                         if trimmed_pos == LONGEST_LABEL_LENGTH {
 552 |                             // There's no encoding with a label this long
 553 |                             return None;
 554 |                         }
 555 |                         trimmed[trimmed_pos] = *byte + 0x20u8;
 556 |                         trimmed_pos += 1usize;
 557 |                         continue;
 558 |                     }
 559 |                     b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b':' | b'.' => {
 560 |                         if trimmed_pos == LONGEST_LABEL_LENGTH {
 561 |                             // There's no encoding with a label this long
 562 |                             return None;
 563 |                         }
 564 |                         trimmed[trimmed_pos] = *byte;
 565 |                         trimmed_pos += 1usize;
 566 |                         continue;
 567 |                     }
 568 |                     _ => {
 569 |                         return None;
 570 |                     }
 571 |                 }
 572 |             }
 573 |         }
 574 |     }
 575 |     // after
 576 |     loop {
 577 |         match iter.next() {
 578 |             None => {
 579 |                 break;
 580 |             }
 581 |             Some(byte) => {
 582 |                 match *byte {
 583 |                     0x09u8 | 0x0Au8 | 0x0Cu8 | 0x0Du8 | 0x20u8 => {
 584 |                         continue;
 585 |                     }
 586 |                     _ => {
 587 |                         // There's no label with space in the middle
 588 |                         return None;
 589 |                     }
 590 |                 }
 591 |             }
 592 |         }
 593 |     }
 594 |     let candidate = &trimmed[..trimmed_pos];
 595 |     match LABELS_SORTED.binary_search_by(|probe| {
 596 |         let bytes = probe.as_bytes();
 597 |         let c = bytes.len().cmp(&candidate.len());
 598 |         if c != Ordering::Equal {
 599 |             return c;
 600 |         }
 601 |         let probe_iter = bytes.iter().rev();
 602 |         let candidate_iter = candidate.iter().rev();
 603 |         probe_iter.cmp(candidate_iter)
 604 |     }) {
 605 |         Ok(i) => Some(ENCODINGS_IN_LABEL_SORT[i]),
 606 |         Err(_) => None,
 607 |     }
 608 | }
 609 | 
 610 | #[inline]
 611 | fn utf7_ascii_up_to(bytes: &[u8]) -> usize {
 612 |     for (i, &byte) in bytes.into_iter().enumerate() {
 613 |         if byte == b'+' || byte >= 0x80 {
 614 |             return i;
 615 |         }
 616 |     }
 617 |     bytes.len()
 618 | }
 619 | 
 620 | #[inline]
 621 | fn utf7_base64_up_to(bytes: &[u8]) -> usize {
 622 |     for (i, &byte) in bytes.into_iter().enumerate() {
 623 |         match byte {
 624 |             b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'/' => {}
 625 |             _ => {
 626 |                 return i;
 627 |             }
 628 |         }
 629 |     }
 630 |     bytes.len()
 631 | }
 632 | 
 633 | #[inline]
 634 | fn utf7_base64_decode(bytes: &[u8], string: &mut String) -> bool {
 635 |     // The intermediate buffer should be long enough to fit a line
 636 |     // of 80 base64 bytes and should also be a multiple of 3. This
 637 |     // way, normal email lines will be handled in one go, but
 638 |     // longer sequences won't get split between base64 groups of
 639 |     // 4 input / 3 output bytes.
 640 |     let mut decoder = UTF_16BE.new_decoder_without_bom_handling();
 641 |     let mut buf = [0u8; 60];
 642 |     let mut tail = bytes;
 643 |     let mut had_errors = false;
 644 |     let mut trailing_error = false;
 645 |     loop {
 646 |         let (last, mut cap) = if tail.len() <= 80 {
 647 |             (true, tail.len())
 648 |         } else {
 649 |             (false, 80)
 650 |         };
 651 |         let len;
 652 |         loop {
 653 |             match STANDARD_NO_PAD.decode_slice(&tail[..cap], &mut buf[..]) {
 654 |                 Ok(l) => {
 655 |                     len = l;
 656 |                     break;
 657 |                 }
 658 |                 Err(_) => {
 659 |                     assert!(last);
 660 |                     had_errors = true;
 661 |                     trailing_error = true;
 662 |                     tail = &tail[..tail.len() - 1];
 663 |                     cap -= 1;
 664 |                 }
 665 |             }
 666 |         }
 667 |         let mut total_read = 0;
 668 |         loop {
 669 |             let (result, read, err) = decoder.decode_to_string(&buf[total_read..len], string, last);
 670 |             total_read += read;
 671 |             had_errors |= err;
 672 |             match result {
 673 |                 CoderResult::InputEmpty => {
 674 |                     if last {
 675 |                         if trailing_error {
 676 |                             string.push_str("\u{FFFD}");
 677 |                         }
 678 |                         return had_errors;
 679 |                     }
 680 |                     break;
 681 |                 }
 682 |                 CoderResult::OutputFull => {
 683 |                     let left = len - total_read;
 684 |                     let needed = decoder.max_utf8_buffer_length(left).unwrap();
 685 |                     string.reserve(needed);
 686 |                 }
 687 |             }
 688 |         }
 689 |         tail = &tail[80..];
 690 |     }
 691 | }
 692 | 
 693 | #[inline(never)]
 694 | fn decode_utf7<'a>(bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
 695 |     let up_to = utf7_ascii_up_to(bytes);
 696 |     if up_to == bytes.len() {
 697 |         let s: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
 698 |         return (Cow::Borrowed(s), false);
 699 |     }
 700 |     let mut had_errors = false;
 701 |     let mut out = String::with_capacity(bytes.len());
 702 |     out.push_str(unsafe { core::str::from_utf8_unchecked(&bytes[..up_to]) });
 703 | 
 704 |     let mut tail = &bytes[up_to..];
 705 |     loop {
 706 |         // `tail[0]` is now either a plus sign or non-ASCII
 707 |         let first = tail[0];
 708 |         tail = &tail[1..];
 709 |         if first == b'+' {
 710 |             let up_to = utf7_base64_up_to(tail);
 711 |             had_errors |= utf7_base64_decode(&tail[..up_to], &mut out);
 712 |             if up_to == tail.len() {
 713 |                 if up_to == 0 {
 714 |                     // Plus sign didn't start a base64 run and also
 715 |                     // wasn't followed by a minus.
 716 |                     had_errors = true;
 717 |                     out.push_str("\u{FFFD}");
 718 |                 }
 719 |                 return (Cow::Owned(out), had_errors);
 720 |             }
 721 |             if up_to == 0 {
 722 |                 if tail[up_to] == b'-' {
 723 |                     // There was no base64 data between
 724 |                     // plus and minus, so we had the sequence
 725 |                     // meaning the plus sign itself.
 726 |                     out.push_str("+");
 727 |                     tail = &tail[up_to + 1..];
 728 |                 } else {
 729 |                     // Plus sign didn't start a base64 run and also
 730 |                     // wasn't followed by a minus.
 731 |                     had_errors = true;
 732 |                     out.push_str("\u{FFFD}");
 733 |                 }
 734 |             } else if tail[up_to] == b'-' {
 735 |                 tail = &tail[up_to + 1..];
 736 |             } else {
 737 |                 tail = &tail[up_to..];
 738 |             }
 739 |         } else {
 740 |             had_errors = true;
 741 |             out.push_str("\u{FFFD}");
 742 |         }
 743 |         let up_to = utf7_ascii_up_to(tail);
 744 |         out.push_str(unsafe { core::str::from_utf8_unchecked(&tail[..up_to]) });
 745 |         if up_to == tail.len() {
 746 |             return (Cow::Owned(out), had_errors);
 747 |         }
 748 |         tail = &tail[up_to..];
 749 |     }
 750 | }
 751 | 
 752 | #[derive(PartialEq, Debug, Copy, Clone, Hash)]
 753 | enum VariantCharset {
 754 |     Utf7,
 755 |     Encoding(&'static Encoding),
 756 | }
 757 | 
 758 | #[cfg(all(test, feature = "serde"))]
 759 | #[derive(Serialize, Deserialize, Debug, PartialEq)]
 760 | struct Demo {
 761 |     num: u32,
 762 |     name: String,
 763 |     charset: Charset,
 764 | }
 765 | 
 766 | #[cfg(test)]
 767 | mod tests {
 768 |     use super::*;
 769 | 
 770 |     fn utf7_no_err(bytes: &[u8]) -> String {
 771 |         let (cow, had_errors) = UTF_7.decode_without_bom_handling(bytes);
 772 |         assert!(!had_errors);
 773 |         cow.into()
 774 |     }
 775 | 
 776 |     fn utf7_err(bytes: &[u8]) -> String {
 777 |         let (cow, had_errors) = UTF_7.decode_without_bom_handling(bytes);
 778 |         assert!(had_errors);
 779 |         cow.into()
 780 |     }
 781 | 
 782 |     // Any copyright to the test code below this comment is dedicated to the
 783 |     // Public Domain. https://creativecommons.org/publicdomain/zero/1.0/
 784 | 
 785 |     #[test]
 786 |     fn test_for_label() {
 787 |         assert_eq!(Charset::for_label(b"  uTf-7\t "), Some(UTF_7));
 788 |         assert_eq!(
 789 |             Charset::for_label(b"  uTf-8\t "),
 790 |             Some(Charset::for_encoding(encoding_rs::UTF_8))
 791 |         );
 792 |         assert_eq!(
 793 |             Charset::for_label(b"  iSo-8859-1\t "),
 794 |             Some(Charset::for_encoding(encoding_rs::WINDOWS_1252))
 795 |         );
 796 |         assert_eq!(
 797 |             Charset::for_label(b"  gb2312\t "),
 798 |             Some(Charset::for_encoding(encoding_rs::GB18030))
 799 |         );
 800 |         assert_eq!(
 801 |             Charset::for_label(b"  ISO-2022-KR\t "),
 802 |             Some(Charset::for_encoding(encoding_rs::REPLACEMENT))
 803 |         );
 804 | 
 805 |         assert_eq!(Charset::for_label(b"u"), None);
 806 |         assert_eq!(Charset::for_label(b"ut"), None);
 807 |         assert_eq!(Charset::for_label(b"utf"), None);
 808 |         assert_eq!(Charset::for_label(b"utf-"), None);
 809 |     }
 810 | 
 811 |     #[test]
 812 |     fn test_for_label_no_replacement() {
 813 |         assert_eq!(
 814 |             Charset::for_label_no_replacement(b"  uTf-7\t "),
 815 |             Some(UTF_7)
 816 |         );
 817 |         assert_eq!(
 818 |             Charset::for_label_no_replacement(b"  uTf-8\t "),
 819 |             Some(Charset::for_encoding(encoding_rs::UTF_8))
 820 |         );
 821 |         assert_eq!(
 822 |             Charset::for_label_no_replacement(b"  iSo-8859-1\t "),
 823 |             Some(Charset::for_encoding(encoding_rs::WINDOWS_1252))
 824 |         );
 825 |         assert_eq!(
 826 |             Charset::for_label_no_replacement(b"  Gb2312\t "),
 827 |             Some(Charset::for_encoding(encoding_rs::GB18030))
 828 |         );
 829 |         assert_eq!(Charset::for_label_no_replacement(b"  ISO-2022-KR\t "), None);
 830 | 
 831 |         assert_eq!(Charset::for_label_no_replacement(b"u"), None);
 832 |         assert_eq!(Charset::for_label_no_replacement(b"ut"), None);
 833 |         assert_eq!(Charset::for_label_no_replacement(b"utf"), None);
 834 |         assert_eq!(Charset::for_label_no_replacement(b"utf-"), None);
 835 |     }
 836 | 
 837 |     #[test]
 838 |     fn test_for_label_and_name() {
 839 |         assert_eq!(Charset::for_label(b"  uTf-7\t ").unwrap().name(), "UTF-7");
 840 |         assert_eq!(Charset::for_label(b"  uTf-8\t ").unwrap().name(), "UTF-8");
 841 |         assert_eq!(
 842 |             Charset::for_label(b"  Gb2312\t ").unwrap().name(),
 843 |             "gb18030"
 844 |         );
 845 |     }
 846 | 
 847 |     #[test]
 848 |     fn test_extended_labels() {
 849 |         let cases: [(&'static str, VariantCharset); 29] = [
 850 |             (
 851 |                 "iso8859_1",
 852 |                 VariantCharset::Encoding(&encoding_rs::WINDOWS_1252_INIT),
 853 |             ),
 854 |             (
 855 |                 "iso8859_2",
 856 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_2_INIT),
 857 |             ),
 858 |             (
 859 |                 "iso8859_3",
 860 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_3_INIT),
 861 |             ),
 862 |             (
 863 |                 "iso8859_4",
 864 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_4_INIT),
 865 |             ),
 866 |             (
 867 |                 "iso8859_5",
 868 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_5_INIT),
 869 |             ),
 870 |             (
 871 |                 "iso8859_6",
 872 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_6_INIT),
 873 |             ),
 874 |             (
 875 |                 "iso8859_7",
 876 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_7_INIT),
 877 |             ),
 878 |             (
 879 |                 "iso8859_9",
 880 |                 VariantCharset::Encoding(&encoding_rs::WINDOWS_1254_INIT),
 881 |             ),
 882 |             (
 883 |                 "iso8859_13",
 884 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_13_INIT),
 885 |             ),
 886 |             (
 887 |                 "iso8859_15",
 888 |                 VariantCharset::Encoding(&encoding_rs::ISO_8859_15_INIT),
 889 |             ),
 890 |             (
 891 |                 "ms936",
 892 |                 VariantCharset::Encoding(&encoding_rs::GB18030_INIT),
 893 |             ),
 894 |             ("ms949", VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT)),
 895 |             ("ms950", VariantCharset::Encoding(&encoding_rs::BIG5_INIT)),
 896 |             (
 897 |                 "ms950_hkscs",
 898 |                 VariantCharset::Encoding(&encoding_rs::BIG5_INIT),
 899 |             ),
 900 |             (
 901 |                 "ms874",
 902 |                 VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 903 |             ),
 904 |             (
 905 |                 "euc_jp",
 906 |                 VariantCharset::Encoding(&encoding_rs::EUC_JP_INIT),
 907 |             ),
 908 |             (
 909 |                 "euc_kr",
 910 |                 VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT),
 911 |             ),
 912 |             (
 913 |                 "euc_cn",
 914 |                 VariantCharset::Encoding(&encoding_rs::GB18030_INIT),
 915 |             ),
 916 |             (
 917 |                 "koi8_r",
 918 |                 VariantCharset::Encoding(&encoding_rs::KOI8_R_INIT),
 919 |             ),
 920 |             (
 921 |                 "koi8_u",
 922 |                 VariantCharset::Encoding(&encoding_rs::KOI8_U_INIT),
 923 |             ),
 924 |             (
 925 |                 "x-windows-874",
 926 |                 VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 927 |             ),
 928 |             (
 929 |                 "x-windows-949",
 930 |                 VariantCharset::Encoding(&encoding_rs::EUC_KR_INIT),
 931 |             ),
 932 |             (
 933 |                 "x-windows-950",
 934 |                 VariantCharset::Encoding(&encoding_rs::BIG5_INIT),
 935 |             ),
 936 |             (
 937 |                 "tis620",
 938 |                 VariantCharset::Encoding(&encoding_rs::WINDOWS_874_INIT),
 939 |             ),
 940 |             (
 941 |                 "iso2022jp",
 942 |                 VariantCharset::Encoding(&encoding_rs::ISO_2022_JP_INIT),
 943 |             ),
 944 |             ("x-unicode-2-0-utf-7", VariantCharset::Utf7), // Netscape 4.0 per https://jkorpela.fi/chars.html
 945 |             ("unicode-1-1-utf-7", VariantCharset::Utf7), // https://www.iana.org/assignments/character-sets/character-sets.xhtml
 946 |             ("csunicode11utf7", VariantCharset::Utf7), // https://www.iana.org/assignments/character-sets/character-sets.xhtml
 947 |             ("utf-7", VariantCharset::Utf7),
 948 |         ];
 949 |         for (label, expected) in cases.iter() {
 950 |             assert_eq!(
 951 |                 Charset::for_label(label.as_bytes()),
 952 |                 Some(Charset { variant: *expected })
 953 |             );
 954 |         }
 955 |     }
 956 | 
 957 |     #[test]
 958 |     fn test_utf7_decode() {
 959 |         assert_eq!(utf7_no_err(b""), "");
 960 |         assert_eq!(utf7_no_err(b"ab"), "ab");
 961 |         assert_eq!(utf7_no_err(b"+-"), "+");
 962 |         assert_eq!(utf7_no_err(b"a+-b"), "a+b");
 963 | 
 964 |         assert_eq!(utf7_no_err(b"+ACs-"), "+");
 965 |         assert_eq!(utf7_no_err(b"+AGEAKwBi-"), "a+b");
 966 | 
 967 |         assert_eq!(utf7_no_err(b"+JgM-"), "\u{2603}");
 968 |         assert_eq!(utf7_no_err(b"+JgM."), "\u{2603}.");
 969 |         assert_eq!(utf7_no_err(b"+JgM "), "\u{2603} ");
 970 |         assert_eq!(utf7_no_err(b"+JgM--"), "\u{2603}-");
 971 |         assert_eq!(utf7_no_err(b"+JgM"), "\u{2603}");
 972 | 
 973 |         assert_eq!(utf7_no_err(b"+JgMmAw-"), "\u{2603}\u{2603}");
 974 |         assert_eq!(utf7_no_err(b"+JgMmAw."), "\u{2603}\u{2603}.");
 975 |         assert_eq!(utf7_no_err(b"+JgMmAw "), "\u{2603}\u{2603} ");
 976 |         assert_eq!(utf7_no_err(b"+JgMmAw--"), "\u{2603}\u{2603}-");
 977 |         assert_eq!(utf7_no_err(b"+JgMmAw"), "\u{2603}\u{2603}");
 978 | 
 979 |         assert_eq!(utf7_no_err(b"+2D3cqQ-"), "\u{1F4A9}");
 980 |         assert_eq!(utf7_no_err(b"+2D3cqQ."), "\u{1F4A9}.");
 981 |         assert_eq!(utf7_no_err(b"+2D3cqQ "), "\u{1F4A9} ");
 982 |         assert_eq!(utf7_no_err(b"+2D3cqQ--"), "\u{1F4A9}-");
 983 |         assert_eq!(utf7_no_err(b"+2D3cqQ"), "\u{1F4A9}");
 984 | 
 985 |         assert_eq!(utf7_no_err(b"+JgPYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp2D3cqdg93KnYPdyp"), "\u{2603}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}\u{1F4A9}");
 986 | 
 987 |         assert_eq!(utf7_err(b"+"), "\u{FFFD}");
 988 | 
 989 |         assert_eq!(utf7_err(b"+J-"), "\u{FFFD}");
 990 |         assert_eq!(utf7_err(b"+Jg-"), "\u{FFFD}");
 991 |         assert_eq!(utf7_err(b"+J"), "\u{FFFD}");
 992 |         assert_eq!(utf7_err(b"+Jg"), "\u{FFFD}");
 993 |         assert_eq!(utf7_err(b"+."), "\u{FFFD}.");
 994 |         assert_eq!(utf7_err(b"+J."), "\u{FFFD}.");
 995 |         assert_eq!(utf7_err(b"+Jg."), "\u{FFFD}.");
 996 |         assert_eq!(utf7_err(b"+ "), "\u{FFFD} ");
 997 |         assert_eq!(utf7_err(b"+J "), "\u{FFFD} ");
 998 |         assert_eq!(utf7_err(b"+Jg "), "\u{FFFD} ");
 999 | 
1000 |         assert_eq!(utf7_err(b"+JgMmA-"), "\u{2603}\u{FFFD}\u{FFFD}");
1001 |         assert_eq!(utf7_err(b"+JgMmA"), "\u{2603}\u{FFFD}\u{FFFD}");
1002 |         assert_eq!(utf7_err(b"+JgMmA."), "\u{2603}\u{FFFD}\u{FFFD}.");
1003 |         assert_eq!(utf7_err(b"+JgMmA "), "\u{2603}\u{FFFD}\u{FFFD} ");
1004 | 
1005 |         assert_eq!(utf7_err(b"+JgMm-"), "\u{2603}\u{FFFD}");
1006 |         assert_eq!(utf7_err(b"+JgMm"), "\u{2603}\u{FFFD}");
1007 |         assert_eq!(utf7_err(b"+JgMm."), "\u{2603}\u{FFFD}.");
1008 |         assert_eq!(utf7_err(b"+JgMm "), "\u{2603}\u{FFFD} ");
1009 | 
1010 |         assert_eq!(utf7_err(b"+2D3cq-"), "\u{FFFD}\u{FFFD}");
1011 |         assert_eq!(utf7_err(b"+2D3cq"), "\u{FFFD}\u{FFFD}");
1012 |         assert_eq!(utf7_err(b"+2D3cq."), "\u{FFFD}\u{FFFD}.");
1013 |         assert_eq!(utf7_err(b"+2D3cq "), "\u{FFFD}\u{FFFD} ");
1014 | 
1015 |         assert_eq!(utf7_err(b"+2D3c-"), "\u{FFFD}");
1016 |         assert_eq!(utf7_err(b"+2D3c"), "\u{FFFD}");
1017 |         assert_eq!(utf7_err(b"+2D3c."), "\u{FFFD}.");
1018 |         assert_eq!(utf7_err(b"+2D3c "), "\u{FFFD} ");
1019 | 
1020 |         assert_eq!(utf7_err(b"+2D3-"), "\u{FFFD}");
1021 |         assert_eq!(utf7_err(b"+2D3"), "\u{FFFD}");
1022 |         assert_eq!(utf7_err(b"+2D3."), "\u{FFFD}.");
1023 |         assert_eq!(utf7_err(b"+2D3 "), "\u{FFFD} ");
1024 | 
1025 |         assert_eq!(utf7_err(b"+2D-"), "\u{FFFD}");
1026 |         assert_eq!(utf7_err(b"+2D"), "\u{FFFD}");
1027 |         assert_eq!(utf7_err(b"+2D."), "\u{FFFD}.");
1028 |         assert_eq!(utf7_err(b"+2D "), "\u{FFFD} ");
1029 | 
1030 |         assert_eq!(utf7_err(b"+2-"), "\u{FFFD}");
1031 |         assert_eq!(utf7_err(b"+2"), "\u{FFFD}");
1032 |         assert_eq!(utf7_err(b"+2."), "\u{FFFD}.");
1033 |         assert_eq!(utf7_err(b"+2 "), "\u{FFFD} ");
1034 | 
1035 |         // Lone high surrogate
1036 |         assert_eq!(utf7_err(b"+2D0-"), "\u{FFFD}");
1037 |         assert_eq!(utf7_err(b"+2D0"), "\u{FFFD}");
1038 |         assert_eq!(utf7_err(b"+2D0."), "\u{FFFD}.");
1039 |         assert_eq!(utf7_err(b"+2D0 "), "\u{FFFD} ");
1040 | 
1041 |         assert_eq!(utf7_err(b"+2D0AYQ-"), "\u{FFFD}a");
1042 |         assert_eq!(utf7_err(b"+2D0AYQ"), "\u{FFFD}a");
1043 |         assert_eq!(utf7_err(b"+2D0AYQ."), "\u{FFFD}a.");
1044 |         assert_eq!(utf7_err(b"+2D0AYQ "), "\u{FFFD}a ");
1045 | 
1046 |         assert_eq!(utf7_err(b"+2D3/QQ-"), "\u{FFFD}\u{FF41}");
1047 |         assert_eq!(utf7_err(b"+2D3/QQ"), "\u{FFFD}\u{FF41}");
1048 |         assert_eq!(utf7_err(b"+2D3/QQ."), "\u{FFFD}\u{FF41}.");
1049 |         assert_eq!(utf7_err(b"+2D3/QQ "), "\u{FFFD}\u{FF41} ");
1050 | 
1051 |         // Lone low surrogate
1052 |         assert_eq!(utf7_err(b"+AGHcqQ-"), "a\u{FFFD}");
1053 |         assert_eq!(utf7_err(b"+AGHcqQ"), "a\u{FFFD}");
1054 |         assert_eq!(utf7_err(b"+AGHcqQ."), "a\u{FFFD}.");
1055 |         assert_eq!(utf7_err(b"+AGHcqQ "), "a\u{FFFD} ");
1056 |     }
1057 | 
1058 |     #[test]
1059 |     fn test_decode_ascii() {
1060 |         assert_eq!(decode_ascii(b"aa\x80bb\xFFcc"), "aa\u{FFFD}bb\u{FFFD}cc");
1061 |     }
1062 | 
1063 |     #[test]
1064 |     fn test_from() {
1065 |         let _: Charset = encoding_rs::UTF_8.into();
1066 |     }
1067 | 
1068 |     #[cfg(feature = "serde")]
1069 |     #[test]
1070 |     fn test_serde_utf7() {
1071 |         let demo = Demo {
1072 |             num: 42,
1073 |             name: "foo".into(),
1074 |             charset: UTF_7,
1075 |         };
1076 | 
1077 |         let serialized = serde_json::to_string(&demo).unwrap();
1078 | 
1079 |         let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
1080 |         assert_eq!(deserialized, demo);
1081 | 
1082 |         let bincoded = bincode::serialize(&demo).unwrap();
1083 |         let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
1084 |         assert_eq!(debincoded, demo);
1085 |     }
1086 | 
1087 |     #[cfg(feature = "serde")]
1088 |     #[test]
1089 |     fn test_serde_utf8() {
1090 |         let demo = Demo {
1091 |             num: 42,
1092 |             name: "foo".into(),
1093 |             charset: encoding_rs::UTF_8.into(),
1094 |         };
1095 | 
1096 |         let serialized = serde_json::to_string(&demo).unwrap();
1097 | 
1098 |         let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
1099 |         assert_eq!(deserialized, demo);
1100 | 
1101 |         let bincoded = bincode::serialize(&demo).unwrap();
1102 |         let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
1103 |         assert_eq!(debincoded, demo);
1104 |     }
1105 | }
1106 | 


--------------------------------------------------------------------------------