├── .gitignore ├── src ├── main │ ├── java │ │ ├── com │ │ │ └── optimaize │ │ │ │ └── langdetect │ │ │ │ ├── cybozu │ │ │ │ ├── package.html │ │ │ │ ├── util │ │ │ │ │ ├── package.html │ │ │ │ │ ├── Messages.java │ │ │ │ │ ├── TagExtractor.java │ │ │ │ │ ├── NGram.java │ │ │ │ │ ├── Util.java │ │ │ │ │ └── LangProfile.java │ │ │ │ └── GenProfile.java │ │ │ │ ├── ngram │ │ │ │ ├── package-info.java │ │ │ │ ├── NgramFilter.java │ │ │ │ ├── NgramExtractors.java │ │ │ │ ├── StandardNgramFilter.java │ │ │ │ ├── BackwardsCompatibleNgramFilter.java │ │ │ │ ├── OldNgramExtractor.java │ │ │ │ └── NgramExtractor.java │ │ │ │ ├── profiles │ │ │ │ ├── package-info.java │ │ │ │ ├── OldLangProfileConverter.java │ │ │ │ ├── util │ │ │ │ │ └── LanguageLister.java │ │ │ │ ├── LanguageProfileWriter.java │ │ │ │ ├── LanguageProfile.java │ │ │ │ ├── LanguageProfileBuilder.java │ │ │ │ ├── BuiltInLanguages.java │ │ │ │ ├── LanguageProfileImpl.java │ │ │ │ └── LanguageProfileReader.java │ │ │ │ ├── text │ │ │ │ ├── package-info.java │ │ │ │ ├── TextFilter.java │ │ │ │ ├── TextObjectFactory.java │ │ │ │ ├── CharNormalizerTextFilterImpl.java │ │ │ │ ├── UrlTextFilter.java │ │ │ │ ├── MultiTextFilter.java │ │ │ │ ├── CommonTextObjectFactories.java │ │ │ │ ├── TextObjectFactoryBuilder.java │ │ │ │ ├── RemoveMinorityScriptsTextFilter.java │ │ │ │ └── TextObject.java │ │ │ │ ├── frma │ │ │ │ ├── IOUtils.java │ │ │ │ ├── LangProfileWriter.java │ │ │ │ ├── GenProfile.java │ │ │ │ └── LangProfileReader.java │ │ │ │ ├── DetectedLanguage.java │ │ │ │ ├── LanguageDetector.java │ │ │ │ ├── NgramFrequencyData.java │ │ │ │ └── i18n │ │ │ │ └── LdLocale.java │ │ └── overview.html │ └── resources │ │ └── README.md └── test │ ├── resources │ ├── texts │ │ └── README.txt │ └── logback-test.xml │ └── java │ └── com │ └── optimaize │ └── langdetect │ ├── frma │ ├── IOUtilsTest.java │ ├── LangProfileReaderTest.java │ ├── LangProfileWriterTest.java │ └── GenProfileTest.java │ ├── text │ ├── TextObjectTest.java │ ├── MultiTextFilterTest.java │ └── RemoveMinorityScriptsTextFilterTest.java │ ├── ngram │ ├── StandardNgramFilterTest.java │ ├── BackwardsCompatibleNgramFilterTest.java │ ├── OldNgramExtractorTest.java │ └── NgramExtractorTest.java │ ├── cybozu │ ├── DetectedLanguageTest.java │ └── util │ │ ├── NGramTest.java │ │ ├── LangProfileTest.java │ │ └── TagExtractorTest.java │ ├── profiles │ ├── LanguageProfileWriterTest.java │ ├── LanguageProfileBuilderTest.java │ └── LanguageProfileReaderTest.java │ ├── NgramFrequencyDataTest.java │ ├── LanguageDetectorImplTest.java │ ├── TechnicalLanguageDetectorImplTest.java │ ├── i18n │ └── LdLocaleTest.java │ └── DataLanguageDetectorImplTest.java └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /language-detector.iml 3 | .idea/ -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/cybozu/package.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | Original language detection classes from https://code.google.com/p/language-detection/ 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/cybozu/util/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Provides the utility classes for language detection. 4 | Users don't use this package's classes directly. 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/test/resources/texts/README.txt: -------------------------------------------------------------------------------- 1 | I created these by copying text from the Wikipedia articles. 2 | Example: https://de.wikipedia.org/wiki/Deutschland 3 | 4 | The files are stored in UTF-8! (Save as UTF-8 in Windows Notepad) 5 | -------------------------------------------------------------------------------- /src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |See http://en.wikipedia.org/wiki/N-gram
21 | * 22 | * @author Fabian Kessler 23 | */ 24 | package com.optimaize.langdetect.ngram; 25 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/profiles/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Provides functionality for loading, storing and creating {@link com.optimaize.langdetect.profiles.LanguageProfile}s. 19 | * 20 | * @author Fabian Kessler 21 | */ 22 | package com.optimaize.langdetect.profiles; 23 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/ngram/NgramFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.optimaize.langdetect.ngram; 18 | 19 | /** 20 | * Filters out some undesired n-grams. 21 | * 22 | * Implementations must be immutable. 23 | * 24 | * @author Fabian Kessler 25 | */ 26 | public interface NgramFilter { 27 | 28 | boolean use(String ngram); 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/text/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Provides functionality for concatenating and cleaning text that is used as 19 | * a) learning text to produce {@link com.optimaize.langdetect.LanguageProfile}s 20 | * b) for the text for which the language is to be guessed. 21 | * 22 | * @author Fabian Kessler 23 | */ 24 | package com.optimaize.langdetect.text; 25 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/text/TextFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.optimaize.langdetect.text; 18 | 19 | /** 20 | * Allows to filter content from a text to be ignored for the n-gram analysis. 21 | * 22 | *Implementations must be immutable and stateless.
23 | * 24 | * @author Fabian Kessler 25 | */ 26 | public interface TextFilter { 27 | 28 | String filter(CharSequence text); 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/overview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |4 | Language-Detection is a language detection library for Java. (aliases: language identification, language guessing) 5 |
6 | 7 |16 | (c)2010 All rights reserved by Cybozu Labs, Inc. 17 |
18 | 19 |20 |36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/text/TextObjectFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.optimaize.langdetect.text; 18 | 19 | /** 20 | * Factory for {@link TextObject}s. 21 | * 22 | * @author Fabian Kessler 23 | */ 24 | public class TextObjectFactory { 25 | 26 | private final TextFilter textFilter; 27 | private final int maxTextLength; 28 | 29 | /** 30 | * @param maxTextLength 0 for none 31 | */ 32 | public TextObjectFactory(TextFilter textFilter, int maxTextLength) { 33 | this.textFilter = textFilter; 34 | this.maxTextLength = maxTextLength; 35 | } 36 | 37 | public TextObject create() { 38 | return new TextObject(textFilter, maxTextLength); 39 | } 40 | 41 | public TextObject forText(CharSequence text) { 42 | return create().append(text); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/optimaize/langdetect/frma/IOUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Francois ROLAND 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.optimaize.langdetect.frma; 18 | 19 | import java.io.Closeable; 20 | import java.io.IOException; 21 | 22 | /** 23 | * Utils to manage IO streams. 24 | * @author François ROLAND 25 | */ 26 | @Deprecated 27 | public class IOUtils { 28 | /** 29 | * Private constructor to prevent instantiation. 30 | */ 31 | private IOUtils() {} 32 | 33 | /** 34 | * Closes a stream without returning any exception. 35 | * 36 | * @param stream the stream to close. Can be21 | Licensed under the Apache License, Version 2.0 (the "License"); 22 | you may not use this file except in compliance with the License. 23 | You may obtain a copy of the License at 24 |
25 | 28 |29 | Unless required by applicable law or agreed to in writing, software 30 | distributed under the License is distributed on an "AS IS" BASIS, 31 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 | See the License for the specific language governing permissions and 33 | limitations under the License. 34 |
35 |
null.
37 | * @deprecated use java7 closeable
38 | */
39 | public static void closeQuietly(Closeable stream) {
40 | if (stream != null) {
41 | try {
42 | stream.close();
43 | } catch (IOException ioe) {
44 | // ignore exception at this point.
45 | }
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/test/java/com/optimaize/langdetect/frma/IOUtilsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 Francois ROLAND
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.optimaize.langdetect.frma;
18 |
19 | import static org.mockito.Mockito.*;
20 |
21 | import java.io.Closeable;
22 | import java.io.IOException;
23 |
24 | import org.junit.Test;
25 |
26 | public class IOUtilsTest {
27 |
28 | @Test
29 | public void closeQuietlyNullStream() {
30 | IOUtils.closeQuietly(null);
31 | }
32 |
33 | @Test
34 | public void closeQuietlyWhenExceptionThrown() throws IOException {
35 | Closeable stream = mock(Closeable.class);
36 | doThrow(new IOException()).when(stream).close();
37 | IOUtils.closeQuietly(stream);
38 | }
39 |
40 | @Test
41 | public void closeQuietly() throws IOException {
42 | Closeable stream = mock(Closeable.class);
43 | IOUtils.closeQuietly(stream);
44 | verify(stream, times(1)).close();
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/com/optimaize/langdetect/text/CharNormalizerTextFilterImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 Fabian Kessler
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.optimaize.langdetect.text;
18 |
19 | import com.optimaize.langdetect.cybozu.util.CharNormalizer;
20 |
21 | /**
22 | * Runs through the {@link CharNormalizer}.
23 | *
24 | * @author Fabian Kessler
25 | * @deprecated can't be used because it would be a big loss to not inline this code.
26 | */
27 | public class CharNormalizerTextFilterImpl implements TextFilter {
28 |
29 | @Override
30 | public String filter(CharSequence text) {
31 | StringBuilder ret = new StringBuilder();
32 | char pre = 0;
33 | for (int i=0; iNote that the order of filters. may be important. They are executed in the same order as they 45 | * are passed in here.
46 | */ 47 | public TextObjectFactoryBuilder withTextFilter(TextFilter textFilter) { 48 | textFilters.add(textFilter); 49 | return this; 50 | } 51 | 52 | public TextObjectFactory build() { 53 | return new TextObjectFactory( 54 | new MultiTextFilter(textFilters), 55 | maxTextLength 56 | ); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/test/java/com/optimaize/langdetect/ngram/OldNgramExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Fabian Kessler 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.optimaize.langdetect.ngram; 18 | 19 | import com.google.common.base.Stopwatch; 20 | import org.junit.Test; 21 | 22 | import java.util.*; 23 | 24 | import static org.junit.Assert.*; 25 | 26 | /** 27 | * @author Fabian Kessler 28 | */ 29 | public class OldNgramExtractorTest { 30 | 31 | @Test 32 | public void testExtractNGrams() { 33 | ListComparable: the "better" one comes before the worse. 26 | * First order by probability descending (1 to 0). 27 | * Then order by language ascending (a to z).
28 | * 29 | *This class is immutable.
30 | * 31 | * @author Nakatani Shuyo 32 | * @author Fabian Kessler 33 | */ 34 | public class DetectedLanguage implements ComparableThis detector cannot handle well: 30 | * Short input text, can work or give wrong results. 31 | * Text written in multiple languages. It likely returns the language for the most prominent text. It's not made for that. 32 | * Text written in languages for which the detector has no profile loaded. It may just return other similar languages. 33 | *
34 | * 35 | * @author Fabian Kessler 36 | */ 37 | public interface LanguageDetector { 38 | 39 | /** 40 | * Returns the best detected language if the algorithm is very confident. 41 | * 42 | *Note: you may want to use getProbabilities() instead. This here is very strict, and sometimes returns 43 | * absent even though the first choice in getProbabilities() is correct.
44 | * 45 | * @param text You probably want a {@link com.optimaize.langdetect.text.TextObject}. 46 | * @return The language if confident, absent if unknown or not confident enough. 47 | */ 48 | OptionalThere is a configurable cutoff applied for languages with very low probability.
54 | * 55 | *The way the algorithm currently works, it can be that, for example, this method returns a 0.99 for 56 | * Danish and less than 0.01 for Norwegian, and still they have almost the same chance. It would be nice if 57 | * this could be improved in future versions.
58 | * 59 | * @param text You probably want a {@link com.optimaize.langdetect.text.TextObject}. 60 | * @return Sorted from better to worse. May be empty. 61 | * It's empty if the program failed to detect any language, or if the input text did not 62 | * contain any usable text (just noise). 63 | */ 64 | ListAll file operations are done with UTF-8.
29 | * 30 | * @author François ROLAND 31 | * @author Fabian Kessler 32 | */ 33 | public class LanguageProfileWriter { 34 | 35 | /** 36 | * Writes a {@link LanguageProfile} to an OutputStream in UTF-8. 37 | * 38 | * @throws java.io.IOException 39 | */ 40 | public void write(@NotNull LanguageProfile languageProfile, @NotNull OutputStream outputStream) throws IOException { 41 | try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName("utf-8")))) { 42 | writer.write("{\"freq\":{"); 43 | boolean first = true; 44 | for (Map.EntryIt is built from a training text that should be fairly large and clean.
29 | * 30 | *It contains the n-grams from the training text in the desired gram sizes (eg 2 and 3-grams), 31 | * with possible text filters applied for cleaning. Also, rarely occurring n-grams may have been cut to 32 | * reduce the noise and index size. Use a {@link LanguageProfileBuilder}.
33 | * 34 | *The profile may be created at runtime on-the-fly, or it may be loaded from a previously generated 35 | * text file (see OldLangProfileConverter).
36 | * 37 | * @author Fabian Kessler 38 | */ 39 | public interface LanguageProfile { 40 | 41 | @NotNull 42 | LdLocale getLocale(); 43 | 44 | /** 45 | * Tells what the n in n-grams are used here. 46 | * Example: [1,2,3] 47 | * @return Sorted from smaller to larger. 48 | */ 49 | @NotNull 50 | ListFor each n-gram string it knows the locales (languages) in which it occurs, and how frequent it 30 | * occurs in those languages in relation to other n-grams of the same length in those same languages.
31 | * 32 | *Immutable by definition (can't make Arrays unmodifiable).
33 | * 34 | * @author Fabian Kessler 35 | */ 36 | public final class NgramFrequencyData { 37 | 38 | /** 39 | * Key = ngram 40 | * Value = array with probabilities per loaded language, in the same order as {@code langlist}. 41 | */ 42 | @NotNull 43 | private final MapThis class is immutable.
28 | * 29 | * @author Fabian Kessler 30 | */ 31 | public final class LanguageProfileImpl implements LanguageProfile { 32 | 33 | @NotNull 34 | private final LdLocale locale; 35 | @NotNull 36 | private final MapExample: when textPadding is a space ' ' then a text input "foo" becomes " foo ", ensuring that n-grams like " f" 56 | * are created.
57 | * 58 | *If the text already has such a character in that position (eg starts with), it is not added there.
59 | * 60 | * @param textPadding for example a space ' '. 61 | */ 62 | public NgramExtractor textPadding(char textPadding) { 63 | return new NgramExtractor(this.gramLengths, this.filter, textPadding); 64 | } 65 | 66 | private NgramExtractor(@NotNull ListExample: extractSortedGrams("Foo bar", 2) => [Fo,oo,o , b,ba,ar]
81 | * 82 | * @param text 83 | * @return The grams, empty if the input was empty or if none for that gramLength fits. 84 | */ 85 | @NotNull 86 | public ListIt represents a IETF BCP 47 tag, but does not implement all the features. Features can be added as needed.
29 | * 30 | *It is constructed through the {@link #fromString} factory method. The {@link #toString()} method 31 | * produces a parseable and persistable string.
32 | * 33 | *The class is immutable.
34 | * 35 | *The java.util.Locale cannot be used because it has issues for historical reasons, notably the
36 | * script code conversion for Hebrew, Yiddish and Indonesian, and more. If one needs a Locale,
37 | * it is simple to create one based on this object.
38 | * The ICU ULocale cannot be used because a) it has issues too (for our use case) and b) we're not
39 | * using ICU in here [yet].
This class does not perform any modifications on the input. The input is used as is, and the getters 42 | * return it in exactly the same way. No standardization, canonicalization, cleaning.
43 | * 44 | *The input is validated syntactically, but not for code existence. For example the script code must 45 | * be a valid ISO 15924 like "Latn" or "Cyrl", in correct case. But whether the code exists or not is not checked. 46 | * These code standards are not fixed, simply because regional entities like Countries can change for political 47 | * reasons, and languages are living entities. Therefore certain codes may exist at some point in time only 48 | * (be introduced late, or be deprecated or removed, or even be re-assigned another meaning). 49 | * It is not up to us to decide whether Kosovo is a country in 2015 or not. 50 | * If one needs to only work with a certain range of acceptable codes, he can validate the codes through other 51 | * classes that have knowledge about the codes. 52 | *
53 | * 54 | *Language: as for BCP 47, the iso 639-1 code must be used if there is one. For example "fr" for French. 55 | * If not, the ISO 639-3 should be used. It is highly discouraged to use 639-2. 56 | * Right now this class enforces a 2 or 3 char code, but this may be relaxed in the future.
57 | * 58 | *Script: Only ISO 15924, no discussion.
59 | * 60 | *Region: same as for BCP 47. That means ISO 3166-1 alpha-2 and "UN M.49". 61 | * I can imagine relaxing it in the future to also allow 3166-2 codes. 62 | * In most cases the "region" is a "country".
63 | * 64 | * @author fabian kessler 65 | */ 66 | public final class LdLocale { 67 | 68 | @NotNull 69 | private final String language; 70 | @NotNull 71 | private final Optional