├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── negotiate.rs ├── examples └── negotiate.rs ├── src ├── accepted_languages.rs ├── lib.rs └── negotiate │ ├── likely_subtags.rs │ └── mod.rs └── tests ├── fixtures ├── accepted_languages.json ├── locale │ ├── options-ext.json │ ├── options.json │ ├── parsing-ext.json │ ├── parsing.json │ └── serialize-options.json └── negotiate │ ├── filtering │ ├── available-as-range.json │ ├── cases.json │ ├── cross-region.json │ ├── cross-variant.json │ ├── default-locale.json │ ├── errors.json │ ├── exact-match.json │ ├── likely-subtag.json │ ├── prioritize.json │ └── requested-und.json │ ├── lookup │ └── main.json │ └── matching │ └── main.json └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: required 3 | cache: cargo 4 | dist: trusty 5 | rust: 6 | - stable 7 | - beta 8 | - nightly 9 | addons: 10 | apt: 11 | packages: 12 | - libssl-dev 13 | 14 | script: 15 | - cargo build 16 | - cargo test 17 | - cargo doc 18 | 19 | before_cache: | 20 | if [[ "$TRAVIS_RUST_VERSION" == stable ]]; then 21 | RUSTFLAGS="--cfg procmacro2_semver_exempt" cargo install cargo-tarpaulin 22 | cargo tarpaulin --ciserver travis-ci --coveralls $TRAVIS_JOB_ID 23 | fi 24 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | - … 6 | 7 | ## fluent-langneg 0.14.1 (March 16, 2024) 8 | 9 | - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers. 10 | - Minor optimizations for speed 11 | 12 | ## fluent-langneg 0.14.0 (December 13, 2023) 13 | 14 | - Move from using `unic-langid` to `icu-locid`. 15 | - Re-export `LanguageIdentifier`. 16 | 17 | ## fluent-langneg 0.13.0 (May 6, 2020) 18 | 19 | - Update `unic-langid` to 0.9. 20 | 21 | ## fluent-langneg 0.12.1 (January 29, 2020) 22 | 23 | - Fixing `maximize` calls in negotiation. 24 | 25 | ## fluent-langneg 0.12.0 (January 28, 2020) 26 | 27 | - Update `unic-langid` to 0.8. 28 | 29 | ## fluent-langneg 0.11.1 (November 17, 2019) 30 | 31 | - Improve handling of `und` in requested to match Unicode TR35. 32 | 33 | ## fluent-langneg 0.11.0 (November 7, 2019) 34 | 35 | - Change name to `fluent-langneg` to better reflect the purpose. 36 | - Update to `unic-langid` 0.7. 37 | - Include feature "cldr" to use full CLDR likely-subtags. 38 | - Improved performance by 50% in the default case, and by further 34% when using CLDR feature. 39 | - Accept `AsRef<[u8]>` instead of `AsRef`. 40 | 41 | ## fluent-locale 0.10.0 (October 3, 2019) 42 | 43 | - Update to `unic-langid` 0.6. 44 | 45 | ## fluent-locale 0.9.0 (October 1, 2019) 46 | 47 | - Use AsRef as bounds in negotiation. 48 | - Support unic-langid with full CLDR backed likelysubtags behind "cldr" feature. 49 | 50 | ## fluent-locale 0.8.0 (September 10, 2019) 51 | 52 | - Update to `unic-langid` 0.5. 53 | 54 | ## fluent-locale 0.7.0 (July 30, 2019) 55 | 56 | - Update `unic-langid` to 0.4. 57 | - Switch benchmark to criterion. 58 | - Update helper functions to be more generic. 59 | 60 | ## fluent-locale 0.6.0 (July 24, 2019) 61 | 62 | - Switch to use `unic-langid` (but allow for `unic-locale`). 63 | - Refactor the API to handle fallible lists. 64 | 65 | ## fluent-locale 0.5.0 (June 16, 2019) 66 | 67 | - Separate out `unic-langid` and `unic-locale` into new crates. 68 | - Switch from BCP47 conformance to Unicode Locale Identifier. 69 | - Update to Rust 2018. 70 | 71 | ## fluent-locale 0.4.1 (August 6, 2018) 72 | 73 | - Separate out requested from available to allow for different mixes of Vec and &[]. 74 | 75 | ## fluent-locale 0.4.0 (August 6, 2018) 76 | 77 | - Ergonomics improvement - `negotiate_languages` now accepts &[&str], &[String], Vec<&str> and Vec 78 | 79 | ## fluent-locale 0.3.2 (July 31, 2018) 80 | 81 | - Make Locale::matches reject matches if privateuse is not empty 82 | 83 | ## fluent-locale 0.3.1 (February 12, 2018) 84 | 85 | - Make fluent-locale compliant with rust stable (from 1.23) 86 | 87 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluent-langneg" 3 | description = """ 4 | A library for language and locale negotiation. 5 | """ 6 | version = "0.14.1" 7 | authors = [ 8 | "Zibi Braniecki " 9 | ] 10 | homepage = "http://projectfluent.org/" 11 | license = "Apache-2.0" 12 | repository = "https://github.com/projectfluent/fluent-langneg-rs" 13 | readme = "README.md" 14 | categories = ["internationalization", "localization"] 15 | edition = "2021" 16 | include = [ 17 | "src/**/*", 18 | "benches/*.rs", 19 | "Cargo.toml", 20 | "README.md" 21 | ] 22 | 23 | [badges] 24 | travis-ci = { repository = "projectfluent/fluent-langneg-rs" } 25 | coveralls = { repository = "projectfluent/fluent-langneg-rs", branch = "master", service = "github" } 26 | 27 | maintenance = { status = "actively-developed" } 28 | 29 | [dependencies] 30 | icu_locid = "1.4" 31 | icu_locid_transform = { version = "1.4", optional = true } 32 | 33 | [dev-dependencies] 34 | serde = { version = "1.0", features = ["derive"] } 35 | serde_json = "1.0" 36 | criterion = "0.5" 37 | 38 | [[bench]] 39 | name = "negotiate" 40 | harness = false 41 | 42 | [features] 43 | default = [] 44 | cldr = ["icu_locid_transform"] 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Mozilla 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fluent LangNeg 2 | 3 | **Fluent LangNeg is a library for language and locale identifier negotiation.** 4 | 5 | [![crates.io](https://img.shields.io/crates/v/fluent-langneg.svg)](https://crates.io/crates/fluent-langneg) 6 | [![Build Status](https://travis-ci.org/projectfluent/fluent-langneg-rs.svg?branch=master)](https://travis-ci.org/projectfluent/fluent-langneg-rs) 7 | [![Coverage Status](https://coveralls.io/repos/github/projectfluent/fluent-langneg-rs/badge.svg?branch=master)](https://coveralls.io/github/projectfluent/fluent-langneg-rs?branch=master) 8 | 9 | Introduction 10 | ------------ 11 | 12 | This is a Rust implementation of fluent-langneg library which is a part of Project Fluent. 13 | 14 | The library uses [icu-locid](https://github.com/unicode-org/icu4x) to retrieve and operate on Unicode Language and Locale Identifiers. 15 | The library provides algorithm for negotiating between lists of locales. 16 | 17 | Usage 18 | ----- 19 | 20 | ```rust 21 | use fluent_langneg::negotiate_languages; 22 | use fluent_langneg::NegotiationStrategy; 23 | use fluent_langneg::convert_vec_str_to_langids_lossy; 24 | use fluent_langneg::LanguageIdentifier; 25 | 26 | // Since langid parsing from string is fallible, we'll use a helper 27 | // function which strips any langids that failed to parse. 28 | let requested = convert_vec_str_to_langids_lossy(&["de-DE", "fr-FR", "en-US"]); 29 | let available = convert_vec_str_to_langids_lossy(&["it", "fr", "de-AT", "fr-CA", "en-US"]); 30 | let default: LanguageIdentifier = "en-US".parse() 31 | .expect("Parsing langid failed."); 32 | 33 | let supported = negotiate_languages( 34 | &requested, 35 | &available, 36 | Some(&default), 37 | NegotiationStrategy::Filtering 38 | ); 39 | 40 | let expected = convert_vec_str_to_langids_lossy(&["de-AT", "fr", "fr-CA", "en-US"]); 41 | assert_eq!(supported, 42 | expected.iter().map(|t| t.as_ref()).collect::>()); 43 | ``` 44 | 45 | See [docs.rs][] for more examples. 46 | 47 | [docs.rs]: https://docs.rs/fluent-langneg/ 48 | 49 | Status 50 | ------ 51 | 52 | The implementation is complete according to fluent-langneg 53 | corpus of tests, which means that it parses, serializes and negotiates as expected. 54 | 55 | The negotiation methods can operate on lists of `LanguageIdentifier` or `Locale`. 56 | 57 | The remaining work is on the path to 1.0 is to gain in-field experience of using it, 58 | add more tests and ensure that bad input is correctly handled. 59 | 60 | Compatibility 61 | ------------- 62 | 63 | The API is based on [UTS 35][] definition of [Unicode Locale Identifier][] and is aiming to 64 | parse and serialize all locale identifiers according to that definition. 65 | 66 | *Note*: Unicode Locale Identifier is similar, but different, from what [BCP47][] specifies under 67 | the name Language Tag. 68 | For most locale management and negotiation needs, the Unicode Locale Identifier used in this crate is likely a better choice, 69 | but in some case, like HTTP Accepted Headers, you may need the complete BCP47 Language Tag implementation which 70 | this crate does not provide. 71 | 72 | Language negotiation algorithms are custom Project Fluent solutions, 73 | based on [RFC4647][]. 74 | 75 | The language negotiation strategies aim to replicate the best-effort matches with 76 | the most limited amount of data. The algorithm returns reasonable 77 | results without any database, but the results can be improved with either limited 78 | or full [CLDR likely-subtags][] database. 79 | 80 | The result is a balance chosen for Project Fluent and may differ from other 81 | implementations of language negotiation algorithms which may choose different 82 | tradeoffs. 83 | 84 | [BCP47]: https://tools.ietf.org/html/bcp47 85 | [RFC6067]: https://www.ietf.org/rfc/rfc6067.txt 86 | [UTS 35]: http://www.unicode.org/reports/tr35/#Locale_Extension_Key_and_Type_Data 87 | [RFC4647]: https://tools.ietf.org/html/rfc4647 88 | [CLDR likely-subtags]: http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html 89 | [Unicode Locale Identifier]: (http://unicode.org/reports/tr35/#Identifiers) 90 | 91 | Alternatives 92 | ------------ 93 | 94 | Although Fluent Locale aims to stay close to W3C Accepted Languages, it does not aim 95 | to implement the full behavior and some aspects of the language negotiation strategy 96 | recommended by W3C, such as weights, are not a target right now. 97 | 98 | For such purposes, [rust-language-tags][] crate seems to be a better choice. 99 | 100 | [rust-language-tags]: https://github.com/pyfisch/rust-language-tags 101 | 102 | Performance 103 | ----------- 104 | 105 | The crate is considered to be fully optimized for production. 106 | 107 | 108 | Develop 109 | ------- 110 | 111 | cargo build 112 | cargo test 113 | cargo bench 114 | 115 | -------------------------------------------------------------------------------- /benches/negotiate.rs: -------------------------------------------------------------------------------- 1 | use criterion::criterion_group; 2 | use criterion::criterion_main; 3 | use criterion::Criterion; 4 | 5 | use fluent_langneg::convert_vec_str_to_langids_lossy; 6 | use fluent_langneg::negotiate_languages; 7 | use fluent_langneg::LanguageIdentifier; 8 | 9 | #[no_mangle] 10 | #[inline(never)] 11 | fn do_negotiate<'a>( 12 | requested: &[LanguageIdentifier], 13 | available: &'a [LanguageIdentifier], 14 | ) -> Vec<&'a LanguageIdentifier> { 15 | negotiate_languages( 16 | requested, 17 | available, 18 | None, 19 | fluent_langneg::NegotiationStrategy::Filtering, 20 | ) 21 | } 22 | 23 | fn negotiate_bench(c: &mut Criterion) { 24 | let requested = &["de", "it", "ru"]; 25 | let available = &[ 26 | "en-US", "fr", "de", "en-GB", "it", "pl", "ru", "sr-Cyrl", "sr-Latn", "zh-Hant", "zh-Hans", 27 | "ja-JP", "he-IL", "de-DE", "de-IT", 28 | ]; 29 | 30 | let requested = convert_vec_str_to_langids_lossy(requested); 31 | let available = convert_vec_str_to_langids_lossy(available); 32 | 33 | c.bench_function("negotiate", |b| { 34 | b.iter(|| do_negotiate(&requested, &available)) 35 | }); 36 | } 37 | 38 | criterion_group!(benches, negotiate_bench); 39 | criterion_main!(benches); 40 | -------------------------------------------------------------------------------- /examples/negotiate.rs: -------------------------------------------------------------------------------- 1 | use fluent_langneg::negotiate::NegotiationStrategy; 2 | use fluent_langneg::{convert_vec_str_to_langids_lossy, negotiate_languages}; 3 | 4 | fn main() { 5 | let requested = convert_vec_str_to_langids_lossy(["it", "pl", "ru"]); 6 | let available = convert_vec_str_to_langids_lossy(["fr", "en-GB", "en-US", "ru", "pl"]); 7 | let supported = 8 | negotiate_languages(&requested, &available, None, NegotiationStrategy::Filtering); 9 | 10 | println!("{:?}", supported); 11 | } 12 | -------------------------------------------------------------------------------- /src/accepted_languages.rs: -------------------------------------------------------------------------------- 1 | //! This function parses Accept-Language string into a list of language tags that 2 | //! can be later passed to language negotiation functions. 3 | //! 4 | //! # Example: 5 | //! 6 | //! ``` 7 | //! use fluent_langneg::negotiate_languages; 8 | //! use fluent_langneg::NegotiationStrategy; 9 | //! use fluent_langneg::parse_accepted_languages; 10 | //! use fluent_langneg::convert_vec_str_to_langids_lossy; 11 | //! use icu_locid::LanguageIdentifier; 12 | //! 13 | //! let requested = parse_accepted_languages("de-AT;0.9,de-DE;0.8,de;0.7;en-US;0.5"); 14 | //! let available = convert_vec_str_to_langids_lossy(&["fr", "pl", "de", "en-US"]); 15 | //! let default: LanguageIdentifier = "en-US".parse().expect("Failed to parse a langid."); 16 | //! 17 | //! let supported = negotiate_languages( 18 | //! &requested, 19 | //! &available, 20 | //! Some(&default), 21 | //! NegotiationStrategy::Filtering 22 | //! ); 23 | //! 24 | //! let expected = convert_vec_str_to_langids_lossy(&["de", "en-US"]); 25 | //! assert_eq!(supported, 26 | //! expected.iter().map(|t| t.as_ref()).collect::>()); 27 | //! ``` 28 | //! 29 | //! This function ignores the weights associated with the locales, since Fluent Locale 30 | //! language negotiation only uses the order of locales, not the weights. 31 | //! 32 | 33 | use icu_locid::LanguageIdentifier; 34 | 35 | pub fn parse(s: &str) -> Vec { 36 | s.split(',') 37 | .map(|t| t.trim().split(';').next().unwrap()) 38 | .filter(|t| !t.is_empty()) 39 | .filter_map(|t| t.parse().ok()) 40 | .collect() 41 | } 42 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! fluent-langneg is an API for operating on locales and language tags. 2 | //! It's part of Project Fluent, a localization framework designed to unleash 3 | //! the expressive power of the natural language. 4 | //! 5 | //! The primary use of fluent-langneg is to parse/modify/serialize language tags 6 | //! and to perform language negotiation. 7 | //! 8 | //! fluent-langneg operates on a subset of [BCP47](http://tools.ietf.org/html/bcp47). 9 | //! It can parse full BCP47 language tags, and will serialize them back, 10 | //! but currently only allows for operations on primary subtags and 11 | //! unicode extension keys. 12 | //! 13 | //! In result fluent-langneg is not suited to replace full implementations of 14 | //! BCP47 like [rust-language-tags](https://github.com/pyfisch/rust-language-tags), 15 | //! but is arguably a better option for use cases involving operations on 16 | //! language tags and for language negotiation. 17 | 18 | pub mod accepted_languages; 19 | pub mod negotiate; 20 | 21 | pub use accepted_languages::parse as parse_accepted_languages; 22 | pub use negotiate::negotiate_languages; 23 | pub use negotiate::NegotiationStrategy; 24 | 25 | pub use icu_locid::{LanguageIdentifier, ParserError as LangugeIdentifierParserError}; 26 | 27 | pub fn convert_vec_str_to_langids<'a, I, J>( 28 | input: I, 29 | ) -> Result, LangugeIdentifierParserError> 30 | where 31 | I: IntoIterator, 32 | J: AsRef<[u8]> + 'a, 33 | { 34 | input 35 | .into_iter() 36 | .map(|s| LanguageIdentifier::try_from_bytes(s.as_ref())) 37 | .collect() 38 | } 39 | 40 | pub fn convert_vec_str_to_langids_lossy<'a, I, J>(input: I) -> Vec 41 | where 42 | I: IntoIterator, 43 | J: AsRef<[u8]> + 'a, 44 | { 45 | input 46 | .into_iter() 47 | .filter_map(|t| LanguageIdentifier::try_from_bytes(t.as_ref()).ok()) 48 | .collect() 49 | } 50 | -------------------------------------------------------------------------------- /src/negotiate/likely_subtags.rs: -------------------------------------------------------------------------------- 1 | use icu_locid::{ 2 | langid, 3 | subtags::{language, region, Language, Region}, 4 | LanguageIdentifier, 5 | }; 6 | 7 | static REGION_MATCHING_KEYS: &[(Language, Region)] = &[ 8 | (language!("az"), region!("AZ")), 9 | (language!("bg"), region!("BG")), 10 | (language!("cs"), region!("CS")), 11 | (language!("de"), region!("DE")), 12 | (language!("es"), region!("ES")), 13 | (language!("fi"), region!("FI")), 14 | (language!("fr"), region!("FR")), 15 | (language!("it"), region!("IT")), 16 | (language!("lt"), region!("LT")), 17 | (language!("lv"), region!("LV")), 18 | (language!("nl"), region!("NL")), 19 | (language!("nu"), region!("NU")), 20 | (language!("pl"), region!("PL")), 21 | (language!("ro"), region!("RO")), 22 | (language!("ru"), region!("RU")), 23 | ]; 24 | 25 | #[derive(PartialEq, Eq, Debug)] 26 | pub enum TransformResult { 27 | Modified, 28 | Unmodified, 29 | } 30 | 31 | pub struct LocaleExpander; 32 | 33 | impl LocaleExpander { 34 | pub fn new() -> Self { 35 | Self 36 | } 37 | 38 | pub fn maximize(&self, input: &mut LanguageIdentifier) -> TransformResult { 39 | let extended = match &input { 40 | b if *b == &langid!("en") => langid!("en-Latn-US"), 41 | b if *b == &langid!("fr") => langid!("fr-Latn-FR"), 42 | b if *b == &langid!("sr") => langid!("sr-Cyrl-SR"), 43 | b if *b == &langid!("sr-RU") => langid!("sr-Latn-SR"), 44 | b if *b == &langid!("az-IR") => langid!("az-Arab-IR"), 45 | b if *b == &langid!("zh-GB") => langid!("zh-Hant-GB"), 46 | b if *b == &langid!("zh-US") => langid!("zh-Hant-US"), 47 | _ => { 48 | let lang = &input.language; 49 | 50 | if let Ok(idx) = REGION_MATCHING_KEYS.binary_search_by(|(l, _)| l.cmp(lang)) { 51 | let subtag = REGION_MATCHING_KEYS[idx].1; 52 | input.region = Some(subtag); 53 | return TransformResult::Modified; 54 | } 55 | return TransformResult::Unmodified; 56 | } 57 | }; 58 | let (language, script, region) = (extended.language, extended.script, extended.region); 59 | input.language = language; 60 | input.script = script; 61 | input.region = region; 62 | TransformResult::Modified 63 | } 64 | } 65 | 66 | #[cfg(test)] 67 | mod tests { 68 | use super::*; 69 | 70 | #[test] 71 | fn test_region_matching_sort() { 72 | for v in REGION_MATCHING_KEYS.windows(2) { 73 | let (v1, v2) = (v[0], v[1]); 74 | assert!( 75 | v1.0 < v2.0, 76 | "Language \"{}\" is placed after \"{}\"", 77 | v1.0, 78 | v2.0 79 | ); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/negotiate/mod.rs: -------------------------------------------------------------------------------- 1 | //! Language Negotiation is a process in which locales from different 2 | //! sources are filtered and sorted in an effort to produce the best 3 | //! possible selection of them. 4 | //! 5 | //! There are multiple language negotiation strategies, most popular is 6 | //! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt). 7 | //! 8 | //! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm, 9 | //! with several modifications. 10 | //! 11 | //! # Example: 12 | //! 13 | //! ``` 14 | //! use fluent_langneg::negotiate_languages; 15 | //! use fluent_langneg::NegotiationStrategy; 16 | //! use fluent_langneg::convert_vec_str_to_langids_lossy; 17 | //! use icu_locid::LanguageIdentifier; 18 | //! 19 | //! let requested = convert_vec_str_to_langids_lossy(&["pl", "fr", "en-US"]); 20 | //! let available = convert_vec_str_to_langids_lossy(&["it", "de", "fr", "en-GB", "en_US"]); 21 | //! let default: LanguageIdentifier = "en-US".parse().expect("Parsing langid failed."); 22 | //! 23 | //! let supported = negotiate_languages( 24 | //! &requested, 25 | //! &available, 26 | //! Some(&default), 27 | //! NegotiationStrategy::Filtering 28 | //! ); 29 | //! 30 | //! let expected = convert_vec_str_to_langids_lossy(&["fr", "en-US", "en-GB"]); 31 | //! assert_eq!(supported, 32 | //! expected.iter().map(|t| t.as_ref()).collect::>()); 33 | //! ``` 34 | //! 35 | //! # The exact algorithm is custom, and consists of a 6 level strategy: 36 | //! 37 | //! ### 1) Attempt to find an exact match for each requested locale in available locales. 38 | //! 39 | //! Example: 40 | //! 41 | //! ```text 42 | //! // [requested] * [available] = [supported] 43 | //! 44 | //! ["en-US"] * ["en-US"] = ["en-US"] 45 | //! ``` 46 | //! 47 | //! ### 2) Attempt to match a requested locale to an available locale treated as a locale range. 48 | //! 49 | //! Example: 50 | //! 51 | //! ```text 52 | //! // [requested] * [available] = [supported] 53 | //! 54 | //! ["en-US"] * ["en"] = ["en"] 55 | //! ^^ 56 | //! |-- becomes "en-*-*-*" 57 | //! ``` 58 | //! 59 | //! ### 3) Maximize the requested locale to find the best match in available locales. 60 | //! 61 | //! This part uses ICU's likelySubtags or similar database. 62 | //! 63 | //! Example: 64 | //! 65 | //! ```text 66 | //! // [requested] * [available] = [supported] 67 | //! 68 | //! ["en"] * ["en-GB", "en-US"] = ["en-US"] 69 | //! ^^ ^^^^^ ^^^^^ 70 | //! | | | 71 | //! | |----------- become "en-*-GB-*" and "en-*-US-*" 72 | //! | 73 | //! |-- ICU likelySubtags expands it to "en-Latn-US" 74 | //! ``` 75 | //! 76 | //! ### 4) Attempt to look up for a different variant of the same locale. 77 | //! 78 | //! Example: 79 | //! 80 | //! ```text 81 | //! // [requested] * [available] = [supported] 82 | //! 83 | //! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"] 84 | //! ^^^^^^^^^ ^^^^^^^^^ 85 | //! | |-- become "ja-*-JP-mac" 86 | //! | 87 | //! |----------- replace variant with range: "ja-JP-*" 88 | //! ``` 89 | //! 90 | //! ### 5) Look up for a maximized version of the requested locale, stripped of the region code. 91 | //! 92 | //! Example: 93 | //! 94 | //! ```text 95 | //! // [requested] * [available] = [supported] 96 | //! 97 | //! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"] 98 | //! ^^^^^ 99 | //! | ^^^^^ ^^^^^ 100 | //! | | | 101 | //! | |----------- become "en-*-ZA-*" and "en-*-US-*" 102 | //! | 103 | //! |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US" 104 | //! ``` 105 | //! 106 | //! 107 | //! ### 6) Attempt to look up for a different region of the same locale. 108 | //! 109 | //! Example: 110 | //! 111 | //! ```text 112 | //! // [requested] * [available] = [supported] 113 | //! 114 | //! ["en-GB"] * ["en-AU"] = ["en-AU"] 115 | //! ^^^^^ ^^^^^ 116 | //! | |-- become "en-*-AU-*" 117 | //! | 118 | //! |----- replace region with range: "en-*" 119 | //! ``` 120 | //! 121 | 122 | use icu_locid::LanguageIdentifier; 123 | 124 | #[cfg(not(feature = "cldr"))] 125 | mod likely_subtags; 126 | #[cfg(feature = "cldr")] 127 | use icu_locid_transform::{LocaleExpander, TransformResult}; 128 | #[cfg(not(feature = "cldr"))] 129 | use likely_subtags::{LocaleExpander, TransformResult}; 130 | 131 | #[derive(PartialEq, Debug, Clone, Copy)] 132 | pub enum NegotiationStrategy { 133 | Filtering, 134 | Matching, 135 | Lookup, 136 | } 137 | 138 | fn subtag_matches( 139 | subtag1: &Option

, 140 | subtag2: &Option

, 141 | as_range1: bool, 142 | as_range2: bool, 143 | ) -> bool { 144 | (as_range1 && subtag1.is_none()) || (as_range2 && subtag2.is_none()) || subtag1 == subtag2 145 | } 146 | 147 | #[inline(always)] 148 | fn matches( 149 | lid1: &LanguageIdentifier, 150 | lid2: &LanguageIdentifier, 151 | range1: bool, 152 | range2: bool, 153 | ) -> bool { 154 | ((range1 && lid1.language.is_empty()) 155 | || (range2 && lid2.language.is_empty()) 156 | || lid1.language == lid2.language) 157 | && subtag_matches(&lid1.script, &lid2.script, range1, range2) 158 | && subtag_matches(&lid1.region, &lid2.region, range1, range2) 159 | && ((range1 && lid1.variants.is_empty()) 160 | || (range2 && lid2.variants.is_empty()) 161 | || lid1.variants == lid2.variants) 162 | } 163 | 164 | pub fn filter_matches<'a, R: 'a + AsRef, A: 'a + AsRef>( 165 | requested: &[R], 166 | available: &'a [A], 167 | strategy: NegotiationStrategy, 168 | ) -> Vec<&'a A> { 169 | let mut lc: Option = None; 170 | 171 | let mut supported_locales = vec![]; 172 | 173 | let mut available_locales: Vec<&A> = available.iter().collect(); 174 | 175 | macro_rules! test_strategy { 176 | ($req:ident, $self_as_range:expr, $other_as_range:expr) => {{ 177 | let mut match_found = false; 178 | available_locales.retain(|locale| { 179 | if strategy != NegotiationStrategy::Filtering && match_found { 180 | return true; 181 | } 182 | 183 | if matches(locale.as_ref(), &$req, $self_as_range, $other_as_range) { 184 | match_found = true; 185 | supported_locales.push(*locale); 186 | return false; 187 | } 188 | true 189 | }); 190 | 191 | if match_found { 192 | match strategy { 193 | NegotiationStrategy::Filtering => {} 194 | NegotiationStrategy::Matching => continue, 195 | NegotiationStrategy::Lookup => break, 196 | } 197 | } 198 | }}; 199 | } 200 | 201 | for req in requested { 202 | let req = req.as_ref(); 203 | 204 | // 1) Try to find a simple (case-insensitive) string match for the request. 205 | test_strategy!(req, false, false); 206 | 207 | // 2) Try to match against the available locales treated as ranges. 208 | test_strategy!(req, true, false); 209 | 210 | // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to 211 | // requested locales, so we'll skip it from the rest of the steps. 212 | if req.language.is_empty() { 213 | continue; 214 | } 215 | 216 | let mut req = req.to_owned(); 217 | // 3) Try to match against a maximized version of the requested locale 218 | let lc = lc.get_or_insert_with(LocaleExpander::new); 219 | if lc.maximize(&mut req) == TransformResult::Modified { 220 | test_strategy!(req, true, false); 221 | } 222 | 223 | // 4) Try to match against a variant as a range 224 | req.variants.clear(); 225 | test_strategy!(req, true, true); 226 | 227 | // 5) Try to match against the likely subtag without region 228 | req.region = None; 229 | if lc.maximize(&mut req) == TransformResult::Modified { 230 | test_strategy!(req, true, false); 231 | } 232 | 233 | // 6) Try to match against a region as a range 234 | req.region = None; 235 | test_strategy!(req, true, true); 236 | } 237 | 238 | supported_locales 239 | } 240 | 241 | pub fn negotiate_languages< 242 | 'a, 243 | R: 'a + AsRef, 244 | A: 'a + AsRef + PartialEq, 245 | >( 246 | requested: &[R], 247 | available: &'a [A], 248 | default: Option<&'a A>, 249 | strategy: NegotiationStrategy, 250 | ) -> Vec<&'a A> { 251 | let mut supported = filter_matches(requested, available, strategy); 252 | 253 | if let Some(default) = default { 254 | if strategy == NegotiationStrategy::Lookup { 255 | if supported.is_empty() { 256 | supported.push(default); 257 | } 258 | } else if !supported.contains(&default) { 259 | supported.push(default); 260 | } 261 | } 262 | supported 263 | } 264 | -------------------------------------------------------------------------------- /tests/fixtures/accepted_languages.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": "en-US;0.9,pl;0.8,fr;0.5", 4 | "output": ["en-US", "pl", "fr"] 5 | }, 6 | { 7 | "input": "en-US,pl;0.8,fr;0.5", 8 | "output": ["en-US", "pl", "fr"] 9 | }, 10 | { 11 | "input": "en-US,pl,fr", 12 | "output": ["en-US", "pl", "fr"] 13 | }, 14 | { 15 | "input": "en-US,,fr", 16 | "output": ["en-US", "fr"] 17 | }, 18 | { 19 | "input": "", 20 | "output": [] 21 | }, 22 | { 23 | "input": "pl", 24 | "output": ["pl"] 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /tests/fixtures/locale/options-ext.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": { 4 | "string": "en", 5 | "options": { 6 | "hour-cycle": "h12" 7 | } 8 | }, 9 | "output": { 10 | "language": "en", 11 | "extensions": { 12 | "unicode": { 13 | "hour-cycle": "h12" 14 | } 15 | } 16 | } 17 | }, 18 | { 19 | "input": { 20 | "string": "en", 21 | "options": { 22 | "calendar": "gregory" 23 | } 24 | }, 25 | "output": { 26 | "language": "en", 27 | "extensions": { 28 | "unicode": { 29 | "calendar": "gregory" 30 | } 31 | } 32 | } 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /tests/fixtures/locale/options.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": { 4 | "string": "en", 5 | "options": { 6 | "language": "pl" 7 | } 8 | }, 9 | "output": { 10 | "language": "pl" 11 | } 12 | }, 13 | { 14 | "input": { 15 | "string": "en", 16 | "options": { 17 | "language": "pl", 18 | "script": "Cyrl" 19 | } 20 | }, 21 | "output": { 22 | "language": "pl", 23 | "script": "Cyrl" 24 | } 25 | }, 26 | { 27 | "input": { 28 | "string": "en-US", 29 | "options": { 30 | "language": "pl", 31 | "script": "Cyrl" 32 | } 33 | }, 34 | "output": { 35 | "language": "pl", 36 | "script": "Cyrl", 37 | "region": "US" 38 | } 39 | }, 40 | { 41 | "input": { 42 | "string": "en-Latn-US", 43 | "options": { 44 | "region": "GB" 45 | } 46 | }, 47 | "output": { 48 | "language": "en", 49 | "script": "Latn", 50 | "region": "GB" 51 | } 52 | } 53 | ] 54 | -------------------------------------------------------------------------------- /tests/fixtures/locale/parsing-ext.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": { 4 | "string": "en-u-hc-h12" 5 | }, 6 | "output": { 7 | "language": "en", 8 | "extensions": { 9 | "unicode": { 10 | "hour-cycle": "h12" 11 | } 12 | } 13 | } 14 | }, 15 | { 16 | "input": { 17 | "string": "en-US-u-hc-h23" 18 | }, 19 | "output": { 20 | "language": "en", 21 | "region": "US", 22 | "extensions": { 23 | "unicode": { 24 | "hour-cycle": "h23" 25 | } 26 | } 27 | } 28 | }, 29 | { 30 | "input": { 31 | "string": "en-US-u-hc-h23-ca-gregory" 32 | }, 33 | "output": { 34 | "language": "en", 35 | "region": "US", 36 | "extensions": { 37 | "unicode": { 38 | "hour-cycle": "h23", 39 | "calendar": "gregory" 40 | } 41 | } 42 | } 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /tests/fixtures/locale/parsing.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": { 4 | "string": "en" 5 | }, 6 | "output": { 7 | "language": "en" 8 | } 9 | }, 10 | { 11 | "input": { 12 | "string": "lij" 13 | }, 14 | "output": { 15 | "language": "lij" 16 | } 17 | }, 18 | { 19 | "input": { 20 | "string": "en-Latn" 21 | }, 22 | "output": { 23 | "language": "en", 24 | "script": "Latn" 25 | } 26 | }, 27 | { 28 | "input": { 29 | "string": "lij-Arab" 30 | }, 31 | "output": { 32 | "language": "lij", 33 | "script": "Arab" 34 | } 35 | }, 36 | { 37 | "input": { 38 | "string": "en-Latn-US" 39 | }, 40 | "output": { 41 | "language": "en", 42 | "script": "Latn", 43 | "region": "US" 44 | } 45 | }, 46 | { 47 | "input": { 48 | "string": "lij-Arab-FA" 49 | }, 50 | "output": { 51 | "language": "lij", 52 | "script": "Arab", 53 | "region": "FA" 54 | } 55 | }, 56 | { 57 | "input": { 58 | "string": "en-Latn-US-windows" 59 | }, 60 | "output": { 61 | "language": "en", 62 | "script": "Latn", 63 | "region": "US", 64 | "variants": ["windows"] 65 | } 66 | }, 67 | { 68 | "input": { 69 | "string": "lij-Arab-FA-linux" 70 | }, 71 | "output": { 72 | "language": "lij", 73 | "script": "Arab", 74 | "region": "FA", 75 | "variants": ["linux"] 76 | } 77 | }, 78 | { 79 | "input": { 80 | "string": "lij-Arab-FA-linux-nedis" 81 | }, 82 | "output": { 83 | "language": "lij", 84 | "script": "Arab", 85 | "region": "FA", 86 | "variants": ["linux", "nedis"] 87 | } 88 | }, 89 | { 90 | "input": { 91 | "string": "EN-latn-us" 92 | }, 93 | "output": { 94 | "language": "en", 95 | "script": "Latn", 96 | "region": "US" 97 | } 98 | }, 99 | { 100 | "input": { 101 | "string": "sl-nedis" 102 | }, 103 | "output": { 104 | "language": "sl", 105 | "variants": ["nedis"] 106 | } 107 | }, 108 | { 109 | "input": { 110 | "string": "de-CH-1996" 111 | }, 112 | "output": { 113 | "language": "de", 114 | "region": "CH", 115 | "variants": ["1996"] 116 | } 117 | }, 118 | { 119 | "input": { 120 | "string": "sr-Latn" 121 | }, 122 | "output": { 123 | "language": "sr", 124 | "script": "Latn" 125 | } 126 | }, 127 | { 128 | "input": { 129 | "string": "es-419" 130 | }, 131 | "output": { 132 | "language": "es", 133 | "region": "419" 134 | } 135 | }, 136 | { 137 | "input": { 138 | "string": "und" 139 | }, 140 | "output": { 141 | } 142 | }, 143 | { 144 | "input": { 145 | "string": "und-Latn" 146 | }, 147 | "output": { 148 | "script": "Latn" 149 | } 150 | } 151 | ] 152 | -------------------------------------------------------------------------------- /tests/fixtures/locale/serialize-options.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": { 4 | "string": "en", 5 | "options": { 6 | "hour-cycle": "h12" 7 | } 8 | }, 9 | "output": "en-u-hc-h12" 10 | }, 11 | { 12 | "input": { 13 | "string": "ar", 14 | "options": { 15 | "hour-cycle": "h12", 16 | "calendar": "buddhist" 17 | } 18 | }, 19 | "output": "ar-u-ca-buddhist-hc-h12" 20 | } 21 | ] 22 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/available-as-range.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en-US"], ["en"]], 4 | "output": ["en"] 5 | }, 6 | { 7 | "input": [["en-Latn-US"], ["en-US"]], 8 | "output": ["en-US"] 9 | }, 10 | { 11 | "input": [["en-US-windows"], ["en-US"]], 12 | "output": ["en-US"] 13 | }, 14 | { 15 | "input": [["fr-CA", "de-DE"], ["fr", "it", "de"]], 16 | "output": ["fr", "de"] 17 | }, 18 | { 19 | "input": [["ja-JP-windows"], ["ja"]], 20 | "output": ["ja"] 21 | }, 22 | { 23 | "input": [["en-Latn-GB", "en-Latn-IN"], ["en-IN", "en-GB"]], 24 | "output": ["en-GB", "en-IN"] 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/cases.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["fr_FR"], ["fr-FR"]], 4 | "output": ["fr-FR"] 5 | }, 6 | { 7 | "input": [["fr_fr"], ["fr-fr"]], 8 | "output": ["fr-fr"] 9 | }, 10 | { 11 | "input": [["fr_Fr"], ["fr-fR"]], 12 | "output": ["fr-fR"] 13 | }, 14 | { 15 | "input": [["fr_lAtN_fr"], ["fr-Latn-FR"]], 16 | "output": ["fr-Latn-FR"] 17 | }, 18 | { 19 | "input": [["fr_FR"], ["fr_FR"]], 20 | "output": ["fr_FR"] 21 | }, 22 | { 23 | "input": [["fr-FR"], ["fr_FR"]], 24 | "output": ["fr_FR"] 25 | }, 26 | { 27 | "input": [["fr_Cyrl_FR_macos"], ["fr_Cyrl_fr-macos"]], 28 | "output": ["fr_Cyrl_fr-macos"] 29 | }, 30 | { 31 | "input": [["fr_Cyrl_FR_mAcOs"], ["fr_Cyrl_fr-MaCoS"]], 32 | "output": ["fr_Cyrl_fr-MaCoS"] 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/cross-region.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en"], ["en-US"]], 4 | "output": ["en-US"] 5 | }, 6 | { 7 | "input": [["en-US"], ["en-GB"]], 8 | "output": ["en-GB"] 9 | }, 10 | { 11 | "input": [["en-Latn-US"], ["en-Latn-GB"]], 12 | "output": ["en-Latn-GB"] 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/cross-variant.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en-US-linux"], ["en-US-windows"]], 4 | "output": ["en-US-windows"] 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/default-locale.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["fr"], ["de", "it"]], 4 | "output": [] 5 | }, 6 | { 7 | "input": [["fr"], ["de", "it"], "en-US"], 8 | "output": ["en-US"] 9 | }, 10 | { 11 | "input": [["fr"], ["de", "en-US"], "en-US"], 12 | "output": ["en-US"] 13 | }, 14 | { 15 | "input": [["fr", "de-DE"], ["de-DE", "fr-CA"], "en-US"], 16 | "output": ["fr-CA", "de-DE", "en-US"] 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/errors.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [[], []], 4 | "output": [] 5 | }, 6 | { 7 | "input": [[""], []], 8 | "output": [] 9 | }, 10 | { 11 | "input": [[], [""]], 12 | "output": [] 13 | }, 14 | { 15 | "input": [[""], [""]], 16 | "output": [] 17 | }, 18 | { 19 | "input": [["2"], ["ąółż"]], 20 | "output": [] 21 | }, 22 | { 23 | "input": [[""], ["fr-FR"]], 24 | "output": [] 25 | }, 26 | { 27 | "input": [[""], ["2-1"]], 28 | "output": [] 29 | }, 30 | { 31 | "input": [[".-"], ["-2-1"]], 32 | "output": [] 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/exact-match.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en"], ["en"]], 4 | "output": ["en"] 5 | }, 6 | { 7 | "input": [["en-US"], ["en-US"]], 8 | "output": ["en-US"] 9 | }, 10 | { 11 | "input": [["en-Latn-US"], ["en-Latn-US"]], 12 | "output": ["en-Latn-US"] 13 | }, 14 | { 15 | "input": [["en-Latn-US-windows"], ["en-Latn-US-windows"]], 16 | "output": ["en-Latn-US-windows"] 17 | }, 18 | { 19 | "input": [["fr-FR"], ["de", "it", "fr-FR"]], 20 | "output": ["fr-FR"] 21 | }, 22 | { 23 | "input": [["fr", "pl", "de-DE"], ["pl", "en-US", "de-DE"]], 24 | "output": ["pl", "de-DE"] 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/likely-subtag.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en"], ["en-GB", "de", "en-US"]], 4 | "output": ["en-US", "en-GB"] 5 | }, 6 | { 7 | "input": [["en"], ["en-Latn-GB", "de", "en-Latn-US"]], 8 | "output": ["en-Latn-US", "en-Latn-GB"] 9 | }, 10 | { 11 | "input": [["fr"], ["fr-CA", "fr-FR"]], 12 | "output": ["fr-FR", "fr-CA"] 13 | }, 14 | { 15 | "input": [["az-IR"], ["az-Latn", "az-Arab"]], 16 | "output": ["az-Arab"] 17 | }, 18 | { 19 | "input": [["sr-RU"], ["sr-Cyrl", "sr-Latn"]], 20 | "output": ["sr-Latn"] 21 | }, 22 | { 23 | "input": [["sr"], ["sr-Latn", "sr-Cyrl"]], 24 | "output": ["sr-Cyrl"] 25 | }, 26 | { 27 | "input": [["zh-GB"], ["zh-Hans", "zh-Hant"]], 28 | "output": ["zh-Hant"] 29 | }, 30 | { 31 | "input": [["sr", "ru"], ["sr-Latn", "ru"]], 32 | "output": ["ru"] 33 | }, 34 | { 35 | "input": [["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"]], 36 | "output": ["sr-Latn-RO"] 37 | }, 38 | { 39 | "input": [["en-CA"], ["en-ZA", "en-GB", "en-US"]], 40 | "output": ["en-US", "en-ZA", "en-GB"] 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/prioritize.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["en-US"], ["en-US-windows", "en", "en-US"]], 4 | "output": ["en-US", "en", "en-US-windows"] 5 | }, 6 | { 7 | "input": [["en-Latn-US"], ["en-GB", "en-US"]], 8 | "output": ["en-US", "en-GB"] 9 | }, 10 | { 11 | "input": [["en"], ["en-Cyrl-US", "en-Latn-US"]], 12 | "output": ["en-Latn-US"] 13 | }, 14 | { 15 | "input": [["en-US-macos"], ["en-US-windows", "en-GB-macos"]], 16 | "output": ["en-US-windows", "en-GB-macos"] 17 | }, 18 | { 19 | "input": [["en-US-macos"], ["en-GB-windows"]], 20 | "output": ["en-GB-windows"] 21 | }, 22 | { 23 | "input": [["en-US"], ["en-GB", "en"]], 24 | "output": ["en", "en-GB"] 25 | }, 26 | { 27 | "input": [["fr-CA-macos", "de-DE"], ["de-DE", "fr-FR-windows"]], 28 | "output": ["fr-FR-windows", "de-DE"] 29 | } 30 | ] 31 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/filtering/requested-und.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["und"], ["de", "pl-PL", "it", "fr-Latn-CA", "ru"]], 4 | "output": [] 5 | }, 6 | { 7 | "input": [["und"], ["und", "en-US"], "en-US"], 8 | "output": ["und", "en-US"] 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/lookup/main.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["fr-FR", "en"], ["en-US", "fr-FR", "en", "fr"], "en-US"], 4 | "strategy": "lookup", 5 | "output": ["fr-FR"] 6 | }, 7 | { 8 | "input": [["fr", "en"], ["en-US", "fr-FR", "en"], "en-US"], 9 | "strategy": "lookup", 10 | "output": ["fr-FR"] 11 | }, 12 | { 13 | "input": [["en", "de"], ["en-GB", "en-US", "de"], "it"], 14 | "strategy": "lookup", 15 | "output": ["en-US"] 16 | }, 17 | { 18 | "input": [["und"], ["en-GB", "en-US", "de"], "it"], 19 | "strategy": "lookup", 20 | "output": ["it"] 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /tests/fixtures/negotiate/matching/main.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "input": [["fr", "en"], ["en-US", "fr-FR", "en", "fr"]], 4 | "strategy": "matching", 5 | "output": ["fr", "en"] 6 | }, 7 | { 8 | "input": [["und"], ["fr", "de", "it", "ru", "pl"]], 9 | "strategy": "matching", 10 | "output": [] 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /tests/lib.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs; 3 | use std::fs::File; 4 | use std::path::Path; 5 | 6 | use fluent_langneg::convert_vec_str_to_langids_lossy; 7 | use fluent_langneg::negotiate_languages; 8 | use fluent_langneg::parse_accepted_languages; 9 | use fluent_langneg::NegotiationStrategy; 10 | use icu_locid::{langid, locale, LanguageIdentifier, Locale}; 11 | 12 | use serde::{Deserialize, Serialize}; 13 | 14 | #[derive(Serialize, Deserialize)] 15 | #[serde(untagged)] 16 | enum NegotiateTestInput { 17 | NoDefault(Vec, Vec), 18 | Default(Vec, Vec, String), 19 | } 20 | 21 | #[derive(Serialize, Deserialize)] 22 | struct NegotiateTestSet { 23 | input: NegotiateTestInput, 24 | strategy: Option, 25 | output: Vec, 26 | } 27 | 28 | #[derive(Serialize, Deserialize)] 29 | struct AcceptedLanguagesTestSet { 30 | input: String, 31 | output: Vec, 32 | } 33 | 34 | fn read_negotiate_testsets>( 35 | path: P, 36 | ) -> Result, Box> { 37 | let file = File::open(path)?; 38 | let sets = serde_json::from_reader(file)?; 39 | Ok(sets) 40 | } 41 | 42 | fn test_negotiate_fixtures(path: &str) { 43 | println!("Testing path: {}", path); 44 | let tests = read_negotiate_testsets(path).unwrap(); 45 | 46 | for test in tests { 47 | let strategy = match test.strategy { 48 | Some(strategy) => match strategy.as_str() { 49 | "filtering" => NegotiationStrategy::Filtering, 50 | "matching" => NegotiationStrategy::Matching, 51 | "lookup" => NegotiationStrategy::Lookup, 52 | _ => NegotiationStrategy::Filtering, 53 | }, 54 | _ => NegotiationStrategy::Filtering, 55 | }; 56 | match test.input { 57 | NegotiateTestInput::NoDefault(requested, available) => { 58 | let requested = convert_vec_str_to_langids_lossy(requested); 59 | let available = convert_vec_str_to_langids_lossy(available); 60 | let output = convert_vec_str_to_langids_lossy(test.output); 61 | let output2: Vec<&LanguageIdentifier> = output.iter().collect(); 62 | assert_eq!( 63 | negotiate_languages(&requested, &available, None, strategy), 64 | output2, 65 | "Test in {} failed", 66 | path 67 | ); 68 | } 69 | NegotiateTestInput::Default(requested, available, default) => { 70 | let requested = convert_vec_str_to_langids_lossy(requested); 71 | let available = convert_vec_str_to_langids_lossy(available); 72 | let output = convert_vec_str_to_langids_lossy(test.output); 73 | let output2: Vec<&LanguageIdentifier> = output.iter().collect(); 74 | assert_eq!( 75 | negotiate_languages( 76 | &requested, 77 | &available, 78 | default.parse().ok().as_ref(), 79 | strategy 80 | ), 81 | output2, 82 | "Test in {} failed", 83 | path 84 | ); 85 | } 86 | } 87 | } 88 | } 89 | 90 | #[test] 91 | fn negotiate_filtering() { 92 | let paths = fs::read_dir("./tests/fixtures/negotiate/filtering").unwrap(); 93 | 94 | for path in paths { 95 | let p = path.unwrap().path().to_str().unwrap().to_owned(); 96 | test_negotiate_fixtures(p.as_str()); 97 | } 98 | } 99 | 100 | #[test] 101 | fn negotiate_matching() { 102 | let paths = fs::read_dir("./tests/fixtures/negotiate/matching").unwrap(); 103 | 104 | for path in paths { 105 | let p = path.unwrap().path().to_str().unwrap().to_owned(); 106 | test_negotiate_fixtures(p.as_str()); 107 | } 108 | } 109 | 110 | #[test] 111 | fn negotiate_lookup() { 112 | let paths = fs::read_dir("./tests/fixtures/negotiate/lookup").unwrap(); 113 | 114 | for path in paths { 115 | let p = path.unwrap().path().to_str().unwrap().to_owned(); 116 | test_negotiate_fixtures(p.as_str()); 117 | } 118 | } 119 | 120 | #[test] 121 | fn accepted_languages() { 122 | let file = File::open("./tests/fixtures/accepted_languages.json").unwrap(); 123 | let tests: Vec = serde_json::from_reader(file).unwrap(); 124 | 125 | for test in tests { 126 | let locales = parse_accepted_languages(test.input.as_str()); 127 | let output = convert_vec_str_to_langids_lossy(test.output); 128 | assert_eq!(output, locales); 129 | } 130 | } 131 | 132 | #[test] 133 | fn langid_matching() { 134 | let langid_en_us = langid!("en-US"); 135 | let langid_de_at = langid!("de-AT"); 136 | let langid_en = langid!("en"); 137 | let langid_de = langid!("de"); 138 | let langid_pl = langid!("pl"); 139 | 140 | let requested = &[&langid_en_us, &langid_de_at]; 141 | let available = &[&langid_pl, &langid_de, &langid_en]; 142 | assert_eq!( 143 | negotiate_languages(requested, available, None, NegotiationStrategy::Matching), 144 | &[&&langid_en, &&langid_de], 145 | ); 146 | 147 | let requested = &[langid_en_us, langid_de_at]; 148 | let available = &[langid_pl, langid_de.clone(), langid_en.clone()]; 149 | assert_eq!( 150 | negotiate_languages(requested, available, None, NegotiationStrategy::Matching), 151 | &[&langid_en, &langid_de], 152 | ); 153 | } 154 | 155 | #[test] 156 | fn cldr_feature() { 157 | // In this case, the full likelySubtags algorithm knows that `mn` -> `mn-Cyrl`, but 158 | // the mock doesn't. 159 | #[cfg(feature = "cldr")] 160 | assert_eq!( 161 | negotiate_languages( 162 | &[langid!("mn")], 163 | &[langid!("mn-Latn"), langid!("mn-Cyrl")], 164 | None, 165 | NegotiationStrategy::Filtering 166 | ), 167 | &[&langid!("mn-Cyrl")] 168 | ); 169 | 170 | // In result, the mock will just return both in undefined 171 | // order. 172 | #[cfg(not(feature = "cldr"))] 173 | assert_eq!( 174 | negotiate_languages( 175 | &[langid!("mn")], 176 | &[langid!("mn-Latn"), langid!("mn-Cyrl")], 177 | None, 178 | NegotiationStrategy::Filtering 179 | ) 180 | .len(), 181 | 2 182 | ); 183 | } 184 | 185 | #[test] 186 | fn locale_matching() { 187 | let loc_en_us = locale!("en-US-u-hc-h12"); 188 | let loc_de_at = locale!("de-AT-u-hc-h24"); 189 | let loc_en = locale!("en-u-ca-buddhist"); 190 | let loc_de = locale!("de"); 191 | let loc_pl: Locale = "pl-x-private".parse().unwrap(); 192 | 193 | assert_eq!( 194 | negotiate_languages( 195 | &[&loc_en_us, &loc_de_at], 196 | &[&loc_pl, &loc_de, &loc_en], 197 | None, 198 | NegotiationStrategy::Matching 199 | ), 200 | &[&&loc_en, &&loc_de], 201 | ); 202 | 203 | assert_eq!( 204 | negotiate_languages( 205 | &[loc_en_us, loc_de_at], 206 | &[loc_pl, loc_de.clone(), loc_en.clone()], 207 | None, 208 | NegotiationStrategy::Matching 209 | ), 210 | &[&loc_en, &loc_de], 211 | ); 212 | } 213 | --------------------------------------------------------------------------------