├── .gitignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-udhr.R
    │   ├── test-scripts.R
    │   ├── test-utils.R
    │   ├── test-distances.R
    │   ├── test-franc.R
    │   ├── test-trigrams.R
    │   └── support.json
├── Makefile
├── LICENSE
├── .Rbuildignore
├── NAMESPACE
├── R
    ├── normalize.R
    ├── distances.R
    ├── ngrams.R
    ├── script.R
    ├── trigrams.R
    ├── speakers.R
    ├── expressions.R
    └── franc.R
├── NEWS.md
├── franc.Rproj
├── DESCRIPTION
├── man
    ├── speakers.Rd
    ├── franc.Rd
    └── franc_all.Rd
├── .github
    └── workflows
    │   ├── test-coverage.yaml
    │   └── check-pak.yaml
├── README.Rmd
├── README.md
└── inst
    └── speakers.json


/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | .Rproj.user
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(franc)
3 | 
4 | if (Sys.getenv("NOT_CRAN") == "true") test_check("franc")
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | all: README.md
3 | 
4 | README.md: README.Rmd
5 | 	Rscript -e "library(knitr); knit('$<', output = '$@', quiet = TRUE)"
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2004-2019
2 | COPYRIGHT HOLDER: Mango Solutions, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi
3 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^Makefile$
4 | ^README.Rmd$
5 | ^README.html$
6 | ^.travis.yml$
7 | ^appveyor.yml$
8 | ^\.github$
9 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(franc)
4 | export(franc_all)
5 | export(speakers)
6 | importFrom(jsonlite,fromJSON)
7 | 


--------------------------------------------------------------------------------
/R/normalize.R:
--------------------------------------------------------------------------------
1 | 
2 | normalize <- function(text, distances) {
3 |   min <- min(distances)
4 |   max <- nchar(text) * MAX_DIFFERENCE - min
5 |   1 - ((distances - min) / max)
6 | }
7 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # development version
 3 | 
 4 | # 1.1.4
 5 | 
 6 | No user visible changes.
 7 | 
 8 | # 1.1.3
 9 | 
10 | * Script detection is now better. Previous versions ignored some characters,
11 |   because of some bad regular expressions.
12 | 
13 | # 1.1.2
14 | 
15 | No user visible changes.
16 | 
17 | # 1.1.1
18 | 
19 | First public release.
20 | 


--------------------------------------------------------------------------------
/franc.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-udhr.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("UDHR")
 3 | 
 4 | test_that("All supported languages are recognized", {
 5 | 
 6 |   support <- jsonlite::fromJSON("support.json", )$iso6393
 7 |   fixtures <- jsonlite::fromJSON("fixtures.json")
 8 | 
 9 |   for (i in seq_along(fixtures)) {
10 |     if (nchar(fixtures[[i]]) != 0) {
11 |       lang <- franc(fixtures[[i]], min_speakers = 0)
12 |       expect_equal(lang, support[i], info = i)
13 |     }
14 |   }
15 | })
16 | 


--------------------------------------------------------------------------------
/R/distances.R:
--------------------------------------------------------------------------------
 1 | 
 2 | get_distance <- function(trigrams, model) {
 3 | 
 4 |   diff <- abs(trigrams - model[names(trigrams)])
 5 |   diff[is.na(diff)] <- MAX_DIFFERENCE
 6 |   sum(diff)
 7 | }
 8 | 
 9 | get_distances <- function(trigrams, languages, whitelist = NULL,
10 |                           blacklist = NULL) {
11 | 
12 |   languages <- filter_languages(languages, whitelist, blacklist)
13 |   sort(vapply(languages, get_distance, 1, trigrams = trigrams))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/ngrams.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ngrams <- function(text, n) {
 3 | 
 4 |   stopifnot(
 5 |     is.numeric(n),
 6 |     length(n) == 1,
 7 |     !is.na(n),
 8 |     n >= 1,
 9 |     is.finite(n)
10 |   )
11 |   
12 |   if (is.null(text) || length(text) == 0) return(list())
13 |   
14 |   text <- as.character(text)
15 | 
16 |   lapply(text, function(x) {
17 |     if (nchar(x) < n) return(character())
18 |     num <- nchar(x) - n + 1
19 |     substring(x, 1:num, 1:num + n - 1)
20 |   })
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-scripts.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Scripts")
 3 | 
 4 | test_that("script detection works", {
 5 | 
 6 |   expect_equal(get_top_script(""), NULL)
 7 |   expect_equal(get_top_script("this is in English"), "Latin")
 8 | 
 9 |   ben <- paste0(
10 |     "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
11 |     "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
12 |     "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
13 |   )
14 |   expect_equal(get_top_script(ben), "ben")
15 | })
16 | 


--------------------------------------------------------------------------------
/R/script.R:
--------------------------------------------------------------------------------
 1 | 
 2 | match_length <- function(pattern, text) {
 3 |   perl <- .Platform$OS.type == "windows"
 4 |   mat <- gregexpr(pattern, text, perl = perl)[[1]]
 5 |   if (mat[1] == -1) 0 else sum(attr(mat, "match.length"))
 6 | }
 7 | 
 8 | #' @include expressions.R
 9 | 
10 | get_top_script <- function(text) {
11 |   num_letters <- vapply(expressions, match_length, 1, text = text)
12 |   if (any(num_letters > 0)) {
13 |     names(which.max(num_letters))
14 |   } else {
15 |     NULL
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Utility functions")
 3 | 
 4 | test_that("match_length works", {
 5 | 
 6 |   expect_equal(match_length("[a-z]", "abcz"), 4)
 7 |   expect_equal(match_length("[a-z]", "x"), 1)
 8 |   expect_equal(match_length("[a-z]", ""), 0)
 9 |   expect_equal(match_length("[a-z]", "123"), 0)
10 | 
11 |   ben <- paste0(
12 |     "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
13 |     "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
14 |     "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
15 |   )
16 |   expect_equal(match_length(expressions$ben, ben), 23)
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-distances.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Model distances")
 3 | 
 4 | test_that("get_distance works", {
 5 | 
 6 |   tri_eng <- clean_trigrams_table("This is apparently in English")
 7 |   eng <- get_distance(tri_eng, data[["Latin"]][["eng"]])
 8 |   hun <- get_distance(tri_eng, data[["Latin"]][["hun"]])
 9 |   deu <- get_distance(tri_eng, data[["Latin"]][["deu"]])
10 | 
11 |   expect_true(eng < hun)
12 |   expect_true(eng < deu)
13 | 
14 |   expect_equal(eng, 5453)
15 |   expect_equal(hun, 7791)
16 |   expect_equal(deu, 7293)
17 | })
18 | 
19 | 
20 | test_that("filter_langages works", {
21 | 
22 |   expect_equal(data$Latin, filter_languages(data$Latin))
23 |   expect_equal(data$Latin[c("eng", "deu")],
24 |                filter_languages(data$Latin, whitelist = c("eng", "deu")))
25 |   expect_equal(data$Latin[setdiff(names(data$Latin), c("eng", "deu"))],
26 |                filter_languages(data$Latin, blacklist = c("eng", "deu")))
27 | })
28 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: franc
 2 | Title: Detect the Language of Text
 3 | Version: 1.1.4.9000
 4 | Author: Gabor Csardi, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout,
 5 |     and Kent S. Johnson
 6 | Maintainer: Gábor Csárdi <csardi.gabor@gmail.com>
 7 | Description: With no external dependencies and
 8 |     support for 335 languages; all languages spoken by
 9 |     more than one million speakers. 'Franc' is a port
10 |     of the 'JavaScript' project of the same name,
11 |     see <https://github.com/wooorm/franc>.
12 | License: MIT + file LICENSE
13 | URL: https://github.com/gaborcsardi/franc#readme
14 | BugReports: https://github.com/gaborcsardi/franc/issues
15 | Suggests:
16 |     testthat
17 | RoxygenNote: 6.1.1
18 | Encoding: UTF-8
19 | Imports:
20 |     jsonlite
21 | Collate:
22 |     'distances.R'
23 |     'expressions.R'
24 |     'franc.R'
25 |     'ngrams.R'
26 |     'normalize.R'
27 |     'script.R'
28 |     'speakers.R'
29 |     'trigrams.R'
30 | 


--------------------------------------------------------------------------------
/man/speakers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/speakers.R
 3 | \docType{data}
 4 | \name{speakers}
 5 | \alias{speakers}
 6 | \title{Number of speakers for 370 languages}
 7 | \format{A data frame with columns:
 8 | \describe{
 9 |   \item{language}{Three letter language code.}
10 |   \item{speakers}{Number of speakers.}
11 |   \item{name}{Full name of language.}
12 |   \item{iso6391}{ISO 639-1 codes. See more at
13 |     \code{https://en.wikipedia.org/wiki/ISO_639}.}
14 |   \item{iso6392}{ISO 639-2T codes. See more at
15 |     \code{https://en.wikipedia.org/wiki/ISO_639}.}
16 | }}
17 | \usage{
18 | speakers
19 | }
20 | \description{
21 | This is a superset of all languages detected by franc. Numbers were
22 | collected by Titus Wormer. To quote him: \emph{Painstakingly crawled by
23 | hand from OHCHR, the numbers are (in some cases, very) rough estimates
24 | or out-of-date.}.
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/R/trigrams.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ## This is mostly after
 3 | ## https://github.com/wooorm/trigram-utils/blob/master/index.js
 4 | 
 5 | trigrams <- function(text) ngrams(text, 3)
 6 | 
 7 | expression_symbols <- "[-!\"#$%&'()*+,\\./0123456789:;<=>?@]"
 8 | 
 9 | trim <- function(x) sub("\\s$", "", sub("^\\s*", "", x))
10 | 
11 | clean <- function(value) {
12 |   value <- as.character(value)
13 |   value <- gsub(pattern = expression_symbols, replacement = " ", value)
14 |   value <- gsub(pattern = "\\s+", replacement = " ", value)
15 |   value <- trim(value)
16 |   tolower(value)
17 | }
18 | 
19 | clean_trigrams <- function(value) {
20 |   if (length(value) == 0) return(list())
21 |   trigrams(paste0(' ', clean(value), ' '))
22 | }
23 | 
24 | clean_trigrams_table <- function(value) {
25 |   stopifnot(is.character(value), length(value) == 1)
26 |   tab <- table(clean_trigrams(value))
27 |   # This is the behavior of table before
28 |   # https://github.com/wch/r-source/commit/09ae38a25149d02a21b19ef33c3d09ef92f72351
29 |   # Not very important for us, but we had a test case for it.
30 |   names(dimnames(tab)) <- ""
31 |   tab
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-franc.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Language detection")
 3 | 
 4 | test_that("top language is detected correctly", {
 5 | 
 6 |   expect_equal(franc("Alle menslike wesens word vry"), "afr")
 7 |   expect_equal(franc(""), "und")
 8 |   expect_equal(franc("the"), "und")
 9 |   expect_equal(franc("the", min_length = 3), "sco")
10 | })
11 | 
12 | test_that("no matching script", {
13 |   expect_equal(franc(strrep("\U0001f4e6", 30)), "und")
14 | })
15 | 
16 | test_that("language scores are calculated correctly", {
17 | 
18 |   scores <- franc_all('O Brasil caiu 26 posi\u00c7\u00f5es')
19 | 
20 |   expect_equal(
21 |     scores[1:12,],
22 |     data.frame(
23 |       stringsAsFactors = FALSE,
24 |       language = c("por", "src", "glg", "snn", "bos", "hrv", "lav", "cat",
25 |         "spa", "bam", "sco", "rmy"),
26 |       score = c(1, 0.880093676814988, 0.870257611241218, 0.863700234192037,
27 |         0.816861826697892, 0.810304449648712, 0.809836065573771,
28 |         0.80655737704918, 0.799531615925059, 0.799531615925059,
29 |         0.779859484777518, 0.753629976580796)
30 |     )
31 |   )
32 | 
33 | })
34 | 
35 | test_that("whitelist", {
36 |   txt <- paste(
37 |     "Somogy és Baranya megyét egy földút is összeköti, ahová a",
38 |     "navigációs szoftverek néha bekalauzolják a gyanútlan autóst,",
39 |     "aztán a helyiek húzzák ki őket a sárból. -- telex.hu"
40 |   )
41 |   expect_equal(franc(txt, whitelist = c("hun", "eng", "esp")), "hun")
42 | })
43 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |       - x
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |       - master
11 |       - x
12 | 
13 | name: test-coverage
14 | 
15 | jobs:
16 |   test-coverage:
17 |     runs-on: macOS-latest
18 |     env:
19 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 |     steps:
21 |       - uses: actions/checkout@v2
22 | 
23 |       - uses: r-lib/actions/setup-r@v1
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v1
26 | 
27 |       - name: Install pak and query dependencies
28 |         run: |
29 |           install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/")
30 |           saveRDS(pak::pkg_deps_tree("local::.", dependencies = TRUE), ".github/r-depends.rds")
31 |         shell: Rscript {0}
32 | 
33 |       - name: Cache R packages
34 |         uses: actions/cache@v2
35 |         with:
36 |           path: ${{ env.R_LIBS_USER }}
37 |           key: ${{ runner.os }}-${{ steps.install-r.outputs.installed-r-version }}-2-${{ hashFiles('.github/r-depends.rds') }}
38 |           restore-keys: ${{ runner.os }}-${{ steps.install-r.outputs.installed-r-version }}-2-
39 | 
40 |       - name: Install system dependencies
41 |         if: runner.os == 'Linux'
42 |         run: Rscript -e 'pak::local_system_requirements(execute = TRUE)'
43 | 
44 |       - name: Install dependencies
45 |         run: |
46 |           pak::local_install_dev_deps(upgrade = TRUE)
47 |           pak::pkg_install("covr")
48 |         shell: Rscript {0}
49 | 
50 |       - name: Test coverage
51 |         run: covr::codecov()
52 |         shell: Rscript {0}
53 | 


--------------------------------------------------------------------------------
/R/speakers.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Number of speakers for 370 languages
 3 | #'
 4 | #' This is a superset of all languages detected by franc. Numbers were
 5 | #' collected by Titus Wormer. To quote him: \emph{Painstakingly crawled by
 6 | #' hand from OHCHR, the numbers are (in some cases, very) rough estimates
 7 | #' or out-of-date.}.
 8 | #'
 9 | #' @format
10 | #' A data frame with columns:
11 | #' \describe{
12 | #'   \item{language}{Three letter language code.}
13 | #'   \item{speakers}{Number of speakers.}
14 | #'   \item{name}{Full name of language.}
15 | #'   \item{iso6391}{ISO 639-1 codes. See more at
16 | #'     \code{https://en.wikipedia.org/wiki/ISO_639}.}
17 | #'   \item{iso6392}{ISO 639-2T codes. See more at
18 | #'     \code{https://en.wikipedia.org/wiki/ISO_639}.}
19 | #' }
20 | #'
21 | #' @docType data
22 | #' @importFrom jsonlite fromJSON
23 | #' @export
24 | 
25 | speakers <- jsonlite::fromJSON(
26 |   system.file("speakers.json", package = packageName()),
27 |   simplifyVector = FALSE)
28 | 
29 | for (i in seq_along(speakers)) {
30 |   if (is.null(speakers[[i]][[2]])) speakers[[i]][[2]] <- NA_character_
31 |   if (is.null(speakers[[i]][[3]])) speakers[[i]][[3]] <- NA_character_
32 | }
33 | 
34 | speakers <- data.frame(
35 |   stringsAsFactors = FALSE,
36 |   row.names = NULL,
37 |   language = names(speakers),
38 |   speakers = as.integer(vapply(speakers, "[[", 1, "speakers")),
39 |   name     = vapply(speakers, "[[", "", "name"),
40 |   iso6391  = vapply(speakers, "[[", "", "iso6391"),
41 |   iso6392  = vapply(speakers, "[[", "", "iso6392")
42 | )
43 | 
44 | speakers <- speakers[ order(speakers$speakers, decreasing = TRUE), ]
45 | 
46 | row.names(speakers) <- seq_len(nrow(speakers))
47 | 


--------------------------------------------------------------------------------
/man/franc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/franc.R
 3 | \encoding{utf8}
 4 | \name{franc}
 5 | \alias{franc}
 6 | \title{Detect the language of a string}
 7 | \usage{
 8 | franc(text, min_speakers = 1e+06, whitelist = NULL, blacklist = NULL,
 9 |   min_length = 10, max_length = 2048)
10 | }
11 | \arguments{
12 | \item{text}{A string constant. Should be at least \code{min_length}
13 | characters long, this is 10 characters by default.
14 | Only the first \code{max_length} characters are used (2048 by
15 | default), to make the detection reasonably fast.}
16 | 
17 | \item{min_speakers}{Languages with at least this many speakers are
18 | checked. By default this is one million. Set it to zero to
19 | include all languages known by franc. See also \code{\link{speakers}}.}
20 | 
21 | \item{whitelist}{List of three letter language codes to check against.}
22 | 
23 | \item{blacklist}{List of three letter language codes not to check
24 | againts.}
25 | 
26 | \item{min_length}{Minimum number of characters required in the text.}
27 | 
28 | \item{max_length}{Maximum number of characters used from the text.
29 | By default only the first 2048 characters are used.}
30 | }
31 | \value{
32 | A three letter ISO-639-3 language code, the detected
33 |   language of the text. \code{"und"} is returned for too short input.
34 | }
35 | \description{
36 | Detect the language of a string
37 | }
38 | \examples{
39 | ## afr
40 | franc("Alle menslike wesens word vry")
41 | 
42 | ## nno
43 | franc("Alle mennesker er født frie og")
44 | 
45 | ## Too short, und
46 | franc("the")
47 | 
48 | ## You can change what’s too short (default: 10), sco
49 | franc("the", min_length = 3)
50 | }
51 | \seealso{
52 | \code{\link{franc_all}} for scores against many languages,
53 |   \code{\link{speakers}}.
54 | }
55 | 


--------------------------------------------------------------------------------
/man/franc_all.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/franc.R
 3 | \encoding{utf8}
 4 | \name{franc_all}
 5 | \alias{franc_all}
 6 | \title{List of probably languages for a text}
 7 | \usage{
 8 | franc_all(text, min_speakers = 1e+06, whitelist = NULL,
 9 |   blacklist = NULL, min_length = 10, max_length = 2048)
10 | }
11 | \arguments{
12 | \item{text}{A string constant. Should be at least \code{min_length}
13 | characters long, this is 10 chracters by default.
14 | Only the first \code{max_length} characters are used (2048 by
15 | default), to make the detection reasonably fast.}
16 | 
17 | \item{min_speakers}{Languages with at least this many speakers are
18 | checked. By default this is one million. Set it to zero to
19 | include all languages known by franc. See also \code{\link{speakers}}.}
20 | 
21 | \item{whitelist}{List of three letter language codes to check against.}
22 | 
23 | \item{blacklist}{List of three letter language codes not to check
24 | againts.}
25 | 
26 | \item{min_length}{Minimum number of characters required in the text.}
27 | 
28 | \item{max_length}{Maximum number of characters used from the text.
29 | By default only the first 2048 characters are used.}
30 | }
31 | \value{
32 | A data frame with columns \code{language} and \code{score}.
33 |   The \code{language} column contains the three letter ISO-639-3
34 |   language codes. The \code{score} column contains the scores.
35 | }
36 | \description{
37 | Returns the scores for all languages that use the same script
38 | as the input text, in decreasing order of probability. The score
39 | is calculated from the distances of the trigram distributions
40 | in the input text and in the language model. The closer the languages,
41 | the higher the score. Scores are scaled, so that the closest language
42 | will have a score of 1.
43 | }
44 | \examples{
45 | head(franc_all("O Brasil caiu 26 posições"))
46 | 
47 | ## Provide a whitelist:
48 | franc_all("O Brasil caiu 26 posições",
49 |   whitelist = c("por", "src", "glg", "spa"))
50 | 
51 | ## Provide a blacklist:
52 | head(franc_all("O Brasil caiu 26 posições",
53 |   blacklist = c("src", "glg", "lav")))
54 | }
55 | \seealso{
56 | \code{\link{franc}} if you only want the top result,
57 |   \code{\link{speakers}}.
58 | }
59 | 


--------------------------------------------------------------------------------
/.github/workflows/check-pak.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master, x]
10 |   pull_request:
11 |     branches: [main, master, x]
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: macOS-latest,   r: 'release'}
26 | 
27 |           - {os: windows-latest, r: 'release'}
28 |           # Use 3.6 to trigger usage of RTools35
29 |           - {os: windows-latest, r: '3.6'}
30 | 
31 |           # Use older ubuntu to maximise backward compatibility
32 |           - {os: ubuntu-18.04,   r: 'devel', http-user-agent: 'release'}
33 |           - {os: ubuntu-18.04,   r: 'release'}
34 |           - {os: ubuntu-18.04,   r: 'oldrel-1'}
35 |           - {os: ubuntu-18.04,   r: 'oldrel-2'}
36 |           - {os: ubuntu-18.04,   r: 'oldrel-3'}
37 |           - {os: ubuntu-18.04,   r: 'oldrel-4'}
38 | 
39 |     env:
40 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
41 |       R_KEEP_PKG_SOURCE: yes
42 | 
43 |     steps:
44 |       - uses: actions/checkout@v2
45 | 
46 |       - uses: r-lib/actions/setup-pandoc@v1
47 | 
48 |       - uses: r-lib/actions/setup-r@v1
49 |         with:
50 |           r-version: ${{ matrix.config.r }}
51 |           http-user-agent: ${{ matrix.config.http-user-agent }}
52 |           use-public-rspm: true
53 | 
54 |       - uses: r-lib/actions/setup-r-dependencies@v1
55 |         with:
56 |           extra-packages: rcmdcheck
57 | 
58 |       - uses: r-lib/actions/check-r-package@v1
59 | 
60 |       - name: Show testthat output
61 |         if: always()
62 |         run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
63 |         shell: bash
64 | 
65 |       - name: Upload check results
66 |         if: failure()
67 |         uses: actions/upload-artifact@main
68 |         with:
69 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
70 |           path: check
71 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | ```{r, setup, echo = FALSE, message = FALSE}
 6 | knitr::opts_chunk$set(
 7 |   comment = "#>",
 8 |   tidy = FALSE,
 9 |   error = FALSE)
10 | ```
11 | 
12 | # franc
13 | 
14 | > Detect the Language of Text
15 | 
16 | <!-- badges: start -->
17 | 
18 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org)
19 | [![R build status](https://github.com/gaborcsardi/franc/workflows/R-CMD-check/badge.svg)](https://github.com/gaborcsardi/franc/actions)
20 | [![](https://www.r-pkg.org/badges/version/franc)](https://www.r-pkg.org/pkg/franc)
21 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/franc)](https://www.r-pkg.org/pkg/franc)
22 | [![Coverage Status](https://img.shields.io/codecov/c/github/gaborcsardi/franc/master.svg)](https://codecov.io/github/gaborcsardi/franc?branch=master)
23 | 
24 | <!-- badges: end -->
25 | 
26 | Franc has no external dependencies and supports 310 languages; all
27 | languages spoken by more than one million speakers. Franc is a port
28 | of the JavaScript project of the same name, see
29 | https://github.com/wooorm/franc.
30 | 
31 | ## Installation
32 | 
33 | ```{r eval = FALSE}
34 | install.packages("franc")
35 | ```
36 | 
37 | ## Usage
38 | 
39 | ```{r}
40 | library(franc)
41 | ```
42 | 
43 | Simply supply the text, and franc detects its language:
44 | 
45 | ```{r}
46 | franc("Alle menslike wesens word vry")
47 | franc("এটি একটি ভাষা একক IBM স্ক্রিপ্ট")
48 | franc("Alle mennesker er født frie og")
49 | head(franc_all("O Brasil caiu 26 posições"))
50 | ```
51 | 
52 | `und` is the `undefined` language, this is returned if the input is
53 | too short (shorter than 10 characters by default).
54 | 
55 | ```{r}
56 | franc("the")
57 | franc("the", min_length = 3)
58 | ```
59 | 
60 | You can provide a whitelist or a blacklist:
61 | 
62 | ```{r}
63 | franc_all("O Brasil caiu 26 posições",
64 |     whitelist = c("por", "src", "glg", "spa"))
65 | head(franc_all("O Brasil caiu 26 posições",
66 |     blacklist = c("src", "glg", "lav")))
67 | ```
68 | 
69 | ## Supported languages
70 | 
71 | The R version of franc supports 310 languages. By default only the
72 | languages with more than 1 million speakers are used, this is 175
73 | languages. The `min_speakers` argument can relax this, and allows
74 | using more languages:
75 | 
76 | ```{r}
77 | head(franc_all("O Brasil caiu 26 posições"))
78 | head(franc_all("O Brasil caiu 26 posições", min_speakers = 0))
79 | ```
80 | 
81 | ## License
82 | 
83 | MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer,
84 | Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-trigrams.R:
--------------------------------------------------------------------------------
  1 | 
  2 | context("Trigrams")
  3 | 
  4 | 
  5 | test_that("trigrams works", {
  6 | 
  7 |   expect_equal(trigrams("abcdef")[[1]], c("abc", "bcd", "cde", "def"))
  8 |   expect_equal(trigrams("abc")[[1]], "abc")
  9 |   expect_equal(trigrams("ab")[[1]], character(0))
 10 |   expect_equal(trigrams(c("ab", "abc", "abcd")),
 11 |                list(character(0), "abc", c("abc", "bcd")))
 12 |   expect_equal(trigrams(character(0)), list())
 13 | })
 14 | 
 15 | 
 16 | test_that("clean_trigrams works", {
 17 | 
 18 |   expect_equal(clean_trigrams("abcdef")[[1]],
 19 |                c(" ab", "abc", "bcd", "cde", "def", "ef "))
 20 |   expect_equal(clean_trigrams("abc")[[1]], c(" ab", "abc", "bc "))
 21 |   expect_equal(clean_trigrams("ab")[[1]], c(" ab", "ab "))
 22 |   expect_equal(clean_trigrams("a")[[1]], c(" a "))
 23 |   expect_equal(clean_trigrams(c("abcd", "xyzz")),
 24 |                list(c(" ab", "abc", "bcd", "cd "),
 25 |                     c(" xy", "xyz", "yzz", "zz ")))
 26 |   expect_equal(clean_trigrams(character(0)), list())
 27 | })
 28 | 
 29 | 
 30 | test_that("clean_trigrams removes non-letters", {
 31 | 
 32 |   expect_equal(clean_trigrams("a2345!+b<=>?c")[[1]],
 33 |                c(" a ", "a b", " b ", "b c", " c "))
 34 |   expect_equal(clean_trigrams("a-!\"#$%&'()*+,\\./0123456789:;<=>?@")[[1]],
 35 |                c(" a "))
 36 | })
 37 | 
 38 | 
 39 | test_that("clean_trigrams is case insensitive", {
 40 | 
 41 |   expect_equal(clean_trigrams("ABCDEF"), clean_trigrams("abcdef"))
 42 |   expect_equal(clean_trigrams("ABCDEF"), clean_trigrams("abCdEf"))
 43 | })
 44 | 
 45 | 
 46 | test_that("clean_trigrams keeps UniCode letters", {
 47 | 
 48 |   ben <- paste0(
 49 |     "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
 50 |     "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
 51 |     "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
 52 |   )
 53 |   expect_equal(
 54 |     clean_trigrams(ben)[[1]],
 55 |     c(" \u098F\u099F", "\u098F\u099F\u09BF", "\u099F\u09BF ",
 56 |       "\u09BF \u098F", " \u098F\u0995", "\u098F\u0995\u099F",
 57 |       "\u0995\u099F\u09BF", "\u099F\u09BF ", "\u09BF \u09AD",
 58 |       " \u09AD\u09BE", "\u09AD\u09BE\u09B7",
 59 |       "\u09BE\u09B7\u09BE", "\u09B7\u09BE ",
 60 |       "\u09BE \u098F", " \u098F\u0995", "\u098F\u0995\u0995",
 61 |       "\u0995\u0995 ", "\u0995 i", " ib", "ibm", "bm ",
 62 |       "m \u09B8", " \u09B8\u09CD", "\u09B8\u09CD\u0995",
 63 |       "\u09CD\u0995\u09CD", "\u0995\u09CD\u09B0",
 64 |                  "\u09CD\u09B0\u09BF", "\u09B0\u09BF\u09AA",
 65 |       "\u09BF\u09AA\u09CD", "\u09AA\u09CD\u099F",
 66 |       "\u09CD\u099F ")
 67 |   )
 68 | })
 69 | 
 70 | test_that("clean_trigrams removed excesive whitespace", {
 71 | 
 72 |   expect_equal(clean_trigrams("    a     ")[[1]], c(" a "))
 73 |   expect_equal(clean_trigrams("a   a")[[1]], c(" a ", "a a", " a "))
 74 | })
 75 | 
 76 | 
 77 | test_that("clean_trigrams_table works", {
 78 | 
 79 |   tab1 <- structure(
 80 |     c(1L, 3L, 1L, 2L, 2L),
 81 |     dim = 5L,
 82 |     dimnames = structure(
 83 |       list(c(" ab", "abc", "bc ", "bca", "cab")),
 84 |       names = ""
 85 |     ),
 86 |     class = "table"
 87 |   )
 88 | 
 89 |   expect_equal(clean_trigrams_table(c("abcabcabc")), tab1)
 90 | 
 91 |   tab2 <- structure(
 92 |     integer(0),
 93 |     dim = 0L,
 94 |     dimnames = structure(list(NULL), names = ""),
 95 |     class = "table"
 96 |   )
 97 | 
 98 |   expect_equal(clean_trigrams_table(""), tab2)
 99 | })
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # franc
  3 | 
  4 | > Detect the Language of Text
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![Project Status: Active - The project has reached a stable, usable
  9 | state and is being actively
 10 | developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org)
 11 | [![R build
 12 | status](https://github.com/gaborcsardi/franc/workflows/R-CMD-check/badge.svg)](https://github.com/gaborcsardi/franc/actions)
 13 | [![](https://www.r-pkg.org/badges/version/franc)](https://www.r-pkg.org/pkg/franc)
 14 | [![CRAN RStudio mirror
 15 | downloads](https://cranlogs.r-pkg.org/badges/franc)](https://www.r-pkg.org/pkg/franc)
 16 | [![Coverage
 17 | Status](https://img.shields.io/codecov/c/github/gaborcsardi/franc/master.svg)](https://codecov.io/github/gaborcsardi/franc?branch=master)
 18 | 
 19 | <!-- badges: end -->
 20 | 
 21 | Franc has no external dependencies and supports 310 languages; all
 22 | languages spoken by more than one million speakers. Franc is a port of
 23 | the JavaScript project of the same name, see
 24 | <https://github.com/wooorm/franc>.
 25 | 
 26 | ## Installation
 27 | 
 28 | ``` r
 29 | install.packages("franc")
 30 | ```
 31 | 
 32 | ## Usage
 33 | 
 34 | ``` r
 35 | library(franc)
 36 | ```
 37 | 
 38 | Simply supply the text, and franc detects its language:
 39 | 
 40 | ``` r
 41 | franc("Alle menslike wesens word vry")
 42 | ```
 43 | 
 44 |     #> [1] "afr"
 45 | 
 46 | ``` r
 47 | franc("এটি একটি ভাষা একক IBM স্ক্রিপ্ট")
 48 | ```
 49 | 
 50 |     #> [1] "ben"
 51 | 
 52 | ``` r
 53 | franc("Alle mennesker er født frie og")
 54 | ```
 55 | 
 56 |     #> [1] "nno"
 57 | 
 58 | ``` r
 59 | head(franc_all("O Brasil caiu 26 posições"))
 60 | ```
 61 | 
 62 |     #>   language     score
 63 |     #> 1      por 1.0000000
 64 |     #> 2      src 0.8800937
 65 |     #> 3      glg 0.8702576
 66 |     #> 4      snn 0.8637002
 67 |     #> 5      bos 0.8168618
 68 |     #> 6      hrv 0.8103044
 69 | 
 70 | `und` is the `undefined` language, this is returned if the input is too
 71 | short (shorter than 10 characters by default).
 72 | 
 73 | ``` r
 74 | franc("the")
 75 | ```
 76 | 
 77 |     #> [1] "und"
 78 | 
 79 | ``` r
 80 | franc("the", min_length = 3)
 81 | ```
 82 | 
 83 |     #> [1] "sco"
 84 | 
 85 | You can provide a whitelist or a blacklist:
 86 | 
 87 | ``` r
 88 | franc_all("O Brasil caiu 26 posições",
 89 |     whitelist = c("por", "src", "glg", "spa"))
 90 | ```
 91 | 
 92 |     #>   language     score
 93 |     #> 1      por 1.0000000
 94 |     #> 2      src 0.8800937
 95 |     #> 3      glg 0.8702576
 96 |     #> 4      spa 0.7995316
 97 | 
 98 | ``` r
 99 | head(franc_all("O Brasil caiu 26 posições",
100 |     blacklist = c("src", "glg", "lav")))
101 | ```
102 | 
103 |     #>   language     score
104 |     #> 1      por 1.0000000
105 |     #> 2      snn 0.8637002
106 |     #> 3      bos 0.8168618
107 |     #> 4      hrv 0.8103044
108 |     #> 5      cat 0.8065574
109 |     #> 6      spa 0.7995316
110 | 
111 | ## Supported languages
112 | 
113 | The R version of franc supports 310 languages. By default only the
114 | languages with more than 1 million speakers are used, this is 175
115 | languages. The `min_speakers` argument can relax this, and allows using
116 | more languages:
117 | 
118 | ``` r
119 | head(franc_all("O Brasil caiu 26 posições"))
120 | ```
121 | 
122 |     #>   language     score
123 |     #> 1      por 1.0000000
124 |     #> 2      src 0.8800937
125 |     #> 3      glg 0.8702576
126 |     #> 4      snn 0.8637002
127 |     #> 5      bos 0.8168618
128 |     #> 6      hrv 0.8103044
129 | 
130 | ``` r
131 | head(franc_all("O Brasil caiu 26 posições", min_speakers = 0))
132 | ```
133 | 
134 |     #>   language     score
135 |     #> 1      lad 1.0000000
136 |     #> 2      por 0.9442724
137 |     #> 3      pov 0.8788147
138 |     #> 4      ast 0.8677576
139 |     #> 5      roh 0.8363556
140 |     #> 6      src 0.8310482
141 | 
142 | ## License
143 | 
144 | MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer,
145 | Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi
146 | 


--------------------------------------------------------------------------------
/R/expressions.R:
--------------------------------------------------------------------------------
 1 | 
 2 | expressions <- list(
 3 |   "cmn" = paste0(
 4 |     "[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DB5\u4E00-\u9FCC\uF900-\uFA6D\uFA70-\uFAD9]|",
 5 |     "[\U00020000-\U0002A3FF]|[\U0002A800-\U0002B3FF]|",
 6 |     "[\U0002A400-\U0002A6D6]|[\U0002A700-\U0002A7FF]|",
 7 |     "[\U0002B400-\U0002B734]|[\U0002B740-\U0002B7FF]|",
 8 |     "[\U0002B800-\U0002B81D]|",
 9 |     "[\U0002F800-\U0002FA1D]"
10 |   ),
11 |   "Latin" = "[A-Za-z\u00AA\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA78E\uA790-\uA7AD\uA7B0\uA7B1\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB5F\uAB64\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A]",
12 |   "Cyrillic" = "[\u0400-\u0484\u0487-\u052F\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69D\uA69F]",
13 |   "Arabic" = paste0(
14 |     "[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061E\u0620-\u063F\u0641-\u064A\u0656-\u065F\u066A-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u08A0-\u08B2\u08E4-\u08FF\uFB50-\uFBC1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFD\uFE70-\uFE74\uFE76-\uFEFC]|",
15 |     "[\U00010E60-\U00010E7E]|",
16 |     "[\U0001EE00-\U0001EE03]|[\U0001EE05-\U0001EE1F]|",
17 |     "[\U0001EE21\U0001EE22\U0001EE24\U0001EE27\U0001EE29-\U0001EE32]|",
18 |     "[\U0001EE34-\U0001EE37\U0001EE39\U0001EE3B\U0001EE42\U0001EE47\U0001EE49\U0001EE4B\U0001EE4D-\U0001EE4F]|",
19 |     "[\U0001EE51-\U0001EE52\U0001EE54\U0001EE57\U0001EE59\U0001EE5b\U0001EE5d\U0001EE5f\U0001EE61\U0001EE62\U0001EE64]|",
20 |     "[\U0001EE67-\U0001EE6a\U0001EE6c-\U0001EE72\U0001EE74-\U0001EE77\U0001EE79-\U0001EE7c]|",
21 |     "[\U0001EE7e\U0001EE80-\U0001EE89\U0001EE8b-\U0001EE9b\U0001EEa1-\U0001EEa3\U0001EEa5-\U0001EEa9]|",
22 |     "[\U0001EEab-\U0001EEbb\U0001EEf0-\U0001EEf1]"
23 |   ),
24 |   "ben" = "[\u0980-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09FB]",
25 |   "Devanagari" = "[\u0900-\u0950\u0953-\u0963\u0966-\u097F\uA8E0-\uA8FB]",
26 |   "jpn" = "[\u3041-\u3096\u309D-\u309F]|\uD82C\uDC01|\uD83C\uDE00|[\u30A1-\u30FA\u30FD-\u30FF\u31F0-\u31FF\u32D0-\u32FE\u3300-\u3357\uFF66-\uFF6F\uFF71-\uFF9D]|\uD82C\uDC00",
27 |   "kor" = "[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]",
28 |   "tel" = "[\u0C00-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58\u0C59\u0C60-\u0C63\u0C66-\u0C6F\u0C78-\u0C7F]",
29 |   "tam" = "[\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BFA]",
30 |   "guj" = "[\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AF1]",
31 |   "mal" = "[\u0D01-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D-\u0D44\u0D46-\u0D48\u0D4A-\u0D4E\u0D57\u0D60-\u0D63\u0D66-\u0D75\u0D79-\u0D7F]",
32 |   "kan" = "[\u0C81-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1\u0CF2]",
33 |   "mya" = "[\u1000-\u109F\uA9E0-\uA9FE\uAA60-\uAA7F]",
34 |   "ori" = "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3C-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B5C\u0B5D\u0B5F-\u0B63\u0B66-\u0B77]",
35 |   "pan" = "[\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A75]",
36 |   "Ethiopic" = "[\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u137C\u1380-\u1399\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E]",
37 |   "tha" = "[\u0E01-\u0E3A\u0E40-\u0E5B]",
38 |   "sin" = paste0(
39 |     "[\u0D82\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2-\u0DF4]|",
40 |     "[\U000111E1-\U000111F4]"
41 |     ),
42 |   "ell" = paste0(
43 |     "[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|",
44 |     "[\U00010140-\U0001018C\U000101A0]|",
45 |     "[\U0001D200-\U0001D245]"
46 |   ),
47 |   "khm" = "[\u1780-\u17DD\u17E0-\u17E9\u17F0-\u17F9\u19E0-\u19FF]",
48 |   "hye" = "[\u0531-\u0556\u0559-\u055F\u0561-\u0587\u058A\u058D-\u058F\uFB13-\uFB17]",
49 |   "sat" = "[\u1C50-\u1C7F]",
50 |   "Tibetan" = "[\u0F00-\u0F47\u0F49-\u0F6C\u0F71-\u0F97\u0F99-\u0FBC\u0FBE-\u0FCC\u0FCE-\u0FD4\u0FD9\u0FDA]",
51 |   "Hebrew" = "[\u0591-\u05C7\u05D0-\u05EA\u05F0-\u05F4\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFB4F]",
52 |   "kat" = "[\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u10FF\u2D00-\u2D25\u2D27\u2D2D]",
53 |   "lao" = "[\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB9\u0EBB-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECD\u0ED0-\u0ED9\u0EDC-\u0EDF]",
54 |   "iii" = "[\uA000-\uA48C\uA490-\uA4C6]",
55 |   "aii" = "[\u0700-\u070D\u070F-\u074A\u074D-\u074F]",
56 |   "div" = "[\u0780-\u07B1]",
57 |   "vai" = "[\uA500-\uA62B]",
58 |   "Canadian_Aboriginal" = "[\u1400-\u167F\u18B0-\u18F5]"
59 | )
60 | 


--------------------------------------------------------------------------------
/R/franc.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## This is mostly after
  3 | ## https://github.com/wooorm/franc/blob/master/lib/franc.js
  4 | ##
  5 | ## Note that this happens at build time
  6 | 
  7 | #' @importFrom jsonlite fromJSON
  8 | 
  9 | data <- jsonlite::fromJSON(
 10 |   system.file("data.json", package = packageName()),
 11 |   simplifyVector = FALSE)
 12 | 
 13 | for (script in names(data)) {
 14 |   for (language in names(data[[script]])) {
 15 |     model <- strsplit(data[[script]][[language]], '|', fixed = TRUE)[[1]]
 16 |     model <- structure(seq_along(model) - 1L, names = model)
 17 |     data[[script]][[language]] <- model
 18 |   }
 19 | }
 20 | 
 21 | MAX_DIFFERENCE <- 300
 22 | 
 23 | filter_languages <- function(languages, whitelist = NULL,
 24 |                              blacklist = NULL) {
 25 | 
 26 |   l3 <- names(languages)
 27 | 
 28 |   if (!is.null(whitelist)) l3 <- intersect(l3, whitelist)
 29 |   if (!is.null(blacklist)) l3 <- setdiff(l3, blacklist)
 30 | 
 31 |   languages[l3]
 32 | }
 33 | 
 34 | lang <- function(x, score = 1) {
 35 |   data.frame(
 36 |     stringsAsFactors = FALSE,
 37 |     language = unname(x),
 38 |     score = unname(score)
 39 |   )
 40 | }
 41 | 
 42 | und <- function() lang("und")
 43 | 
 44 | #' List of probably languages for a text
 45 | #'
 46 | #' Returns the scores for all languages that use the same script
 47 | #' as the input text, in decreasing order of probability. The score
 48 | #' is calculated from the distances of the trigram distributions
 49 | #' in the input text and in the language model. The closer the languages,
 50 | #' the higher the score. Scores are scaled, so that the closest language
 51 | #' will have a score of 1.
 52 | #'
 53 | #' @param text A string constant. Should be at least \code{min_length}
 54 | #'    characters long, this is 10 chracters by default.
 55 | #'    Only the first \code{max_length} characters are used (2048 by
 56 | #'    default), to make the detection reasonably fast.
 57 | #' @param min_speakers Languages with at least this many speakers are
 58 | #'   checked. By default this is one million. Set it to zero to
 59 | #'   include all languages known by franc. See also \code{\link{speakers}}.
 60 | #' @param whitelist List of three letter language codes to check against.
 61 | #' @param blacklist List of three letter language codes not to check
 62 | #'   againts.
 63 | #' @param min_length Minimum number of characters required in the text.
 64 | #' @param max_length Maximum number of characters used from the text.
 65 | #'   By default only the first 2048 characters are used.
 66 | #' @return A data frame with columns \code{language} and \code{score}.
 67 | #'   The \code{language} column contains the three letter ISO-639-3
 68 | #'   language codes. The \code{score} column contains the scores.
 69 | #'
 70 | #' @encoding utf8
 71 | #' @seealso \code{\link{franc}} if you only want the top result,
 72 | #'   \code{\link{speakers}}.
 73 | #' @export
 74 | #' @examples
 75 | #' head(franc_all("O Brasil caiu 26 posições"))
 76 | #'
 77 | #' ## Provide a whitelist:
 78 | #' franc_all("O Brasil caiu 26 posições",
 79 | #'   whitelist = c("por", "src", "glg", "spa"))
 80 | #'
 81 | #' ## Provide a blacklist:
 82 | #' head(franc_all("O Brasil caiu 26 posições",
 83 | #'   blacklist = c("src", "glg", "lav")))
 84 | 
 85 | franc_all <- function(text, min_speakers = 1000000, whitelist = NULL,
 86 |                       blacklist = NULL, min_length = 10,
 87 |                       max_length = 2048) {
 88 | 
 89 |   text <- as.character(text)
 90 |   stopifnot(length(text) == 1, !is.na(text))
 91 | 
 92 |   if (nchar(text) < min_length) return(und())
 93 |   text <- substr(text, 1, max_length)
 94 | 
 95 |   script <- get_top_script(text)
 96 | 
 97 |   ## Returns NULL is script is unknown
 98 |   if (is.null(script)) return(und())
 99 | 
100 |   ## Return the language if script is a single language
101 |   if (! script %in% names(data)) return(lang(script))
102 | 
103 |   ## Candidate languages
104 |   if (min_speakers != 0) {
105 |     enough_speakers <- speakers$language[speakers$speakers >= min_speakers]
106 |     if (is.null(whitelist)) {
107 |       whitelist <- enough_speakers
108 |     } else {
109 |       whitelist <- intersect(whitelist, enough_speakers)
110 |     }
111 |   }
112 | 
113 |   languages <- filter_languages(
114 |     data[[script]],
115 |     whitelist = whitelist,
116 |     blacklist = blacklist
117 |   )
118 | 
119 |   trigrams <- clean_trigrams_table(text)
120 |   dist <- get_distances(trigrams, languages)
121 | 
122 |   lang(names(dist), normalize(text, dist))
123 | }
124 | 
125 | #' Detect the language of a string
126 | #'
127 | #' @param text A string constant. Should be at least \code{min_length}
128 | #'    characters long, this is 10 characters by default.
129 | #'    Only the first \code{max_length} characters are used (2048 by
130 | #'    default), to make the detection reasonably fast.
131 | #' @param min_speakers Languages with at least this many speakers are
132 | #'   checked. By default this is one million. Set it to zero to
133 | #'   include all languages known by franc. See also \code{\link{speakers}}.
134 | #' @param whitelist List of three letter language codes to check against.
135 | #' @param blacklist List of three letter language codes not to check
136 | #'   againts.
137 | #' @param min_length Minimum number of characters required in the text.
138 | #' @param max_length Maximum number of characters used from the text.
139 | #'   By default only the first 2048 characters are used.
140 | #' @return A three letter ISO-639-3 language code, the detected
141 | #'   language of the text. \code{"und"} is returned for too short input.
142 | #'
143 | #' @encoding utf8
144 | #' @seealso \code{\link{franc_all}} for scores against many languages,
145 | #'   \code{\link{speakers}}.
146 | #' @export
147 | #' @examples
148 | #' ## afr
149 | #' franc("Alle menslike wesens word vry")
150 | #'
151 | #' ## nno
152 | #' franc("Alle mennesker er født frie og")
153 | #'
154 | #' ## Too short, und
155 | #' franc("the")
156 | #'
157 | #' ## You can change what’s too short (default: 10), sco
158 | #' franc("the", min_length = 3)
159 | 
160 | franc <- function(text, min_speakers = 1000000, whitelist = NULL,
161 |                   blacklist = NULL, min_length = 10, max_length = 2048) {
162 | 
163 |   franc_all(text, min_speakers = min_speakers, whitelist = whitelist,
164 |             blacklist = blacklist, min_length = min_length,
165 |             max_length = max_length)$language[[1]]
166 | }
167 | 


--------------------------------------------------------------------------------
/inst/speakers.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "ote": {
   3 |     "speakers": 200000,
   4 |     "iso6391": null,
   5 |     "iso6392": null,
   6 |     "name": "Mezquital Otomi"
   7 |   },
   8 |   "tsz": {
   9 |     "speakers": 100000,
  10 |     "iso6391": null,
  11 |     "iso6392": null,
  12 |     "name": "Purepecha"
  13 |   },
  14 |   "ndo": {
  15 |     "speakers": 1000000,
  16 |     "iso6391": "ng",
  17 |     "iso6392": "ndo",
  18 |     "name": "Ndonga"
  19 |   },
  20 |   "epo": {
  21 |     "speakers": 2000000,
  22 |     "iso6391": "eo",
  23 |     "iso6392": "epo",
  24 |     "name": "Esperanto"
  25 |   },
  26 |   "kek": {
  27 |     "speakers": 500000,
  28 |     "iso6391": null,
  29 |     "iso6392": null,
  30 |     "name": "Kekchí"
  31 |   },
  32 |   "quc": {
  33 |     "speakers": 300000,
  34 |     "iso6391": null,
  35 |     "iso6392": null,
  36 |     "name": "K'iche'"
  37 |   },
  38 |   "hus": {
  39 |     "speakers": 150000,
  40 |     "iso6391": null,
  41 |     "iso6392": null,
  42 |     "name": "Huastec"
  43 |   },
  44 |   "snn": {
  45 |     "speakers": 1240000,
  46 |     "iso6391": null,
  47 |     "iso6392": null,
  48 |     "name": "Siona"
  49 |   },
  50 |   "jiv": {
  51 |     "speakers": 35000,
  52 |     "iso6391": null,
  53 |     "iso6392": null,
  54 |     "name": "Shuar"
  55 |   },
  56 |   "niv": {
  57 |     "speakers": 1000,
  58 |     "iso6391": null,
  59 |     "iso6392": null,
  60 |     "name": "Gilyak"
  61 |   },
  62 |   "arl": {
  63 |     "speakers": 150,
  64 |     "iso6391": null,
  65 |     "iso6392": null,
  66 |     "name": "Arabela"
  67 |   },
  68 |   "arn": {
  69 |     "speakers": 440000,
  70 |     "iso6391": null,
  71 |     "iso6392": "arn",
  72 |     "name": "Mapudungun"
  73 |   },
  74 |   "asm": {
  75 |     "speakers": 14604000,
  76 |     "iso6391": "as",
  77 |     "iso6392": "asm",
  78 |     "name": "Assamese"
  79 |   },
  80 |   "ast": {
  81 |     "speakers": 100000,
  82 |     "iso6391": null,
  83 |     "iso6392": "ast",
  84 |     "name": "Asturian"
  85 |   },
  86 |   "acu": {
  87 |     "speakers": 4500,
  88 |     "iso6391": null,
  89 |     "iso6392": null,
  90 |     "name": "Achuar-Shiwiar"
  91 |   },
  92 |   "awa": {
  93 |     "speakers": 38261000,
  94 |     "iso6391": null,
  95 |     "iso6392": "awa",
  96 |     "name": "Awadhi"
  97 |   },
  98 |   "ayr": {
  99 |     "speakers": 2200000,
 100 |     "iso6391": null,
 101 |     "iso6392": null,
 102 |     "name": "Central Aymara"
 103 |   },
 104 |   "azj": {
 105 |     "speakers": 13869000,
 106 |     "iso6391": null,
 107 |     "iso6392": null,
 108 |     "name": "North Azerbaijani"
 109 |   },
 110 |   "wwa": {
 111 |     "speakers": 40000,
 112 |     "iso6391": null,
 113 |     "iso6392": null,
 114 |     "name": "Waama"
 115 |   },
 116 |   "amh": {
 117 |     "speakers": 23000000,
 118 |     "iso6391": "am",
 119 |     "iso6392": "amh",
 120 |     "name": "Amharic"
 121 |   },
 122 |   "arb": {
 123 |     "speakers": 280000000,
 124 |     "iso6391": null,
 125 |     "iso6392": null,
 126 |     "name": "Standard Arabic"
 127 |   },
 128 |   "amc": {
 129 |     "speakers": 720,
 130 |     "iso6391": null,
 131 |     "iso6392": null,
 132 |     "name": "Amahuaca"
 133 |   },
 134 |   "alt": {
 135 |     "speakers": 68000,
 136 |     "iso6391": null,
 137 |     "iso6392": "alt",
 138 |     "name": "Southern Altai"
 139 |   },
 140 |   "als": {
 141 |     "speakers": 5000000,
 142 |     "iso6391": null,
 143 |     "iso6392": null,
 144 |     "name": "Tosk Albanian"
 145 |   },
 146 |   "abk": {
 147 |     "speakers": 105000,
 148 |     "iso6391": "ab",
 149 |     "iso6392": "abk",
 150 |     "name": "Abkhazian"
 151 |   },
 152 |   "aka": {
 153 |     "speakers": 7000000,
 154 |     "iso6391": "ak",
 155 |     "iso6392": "aka",
 156 |     "name": "Akan"
 157 |   },
 158 |   "hye": {
 159 |     "speakers": 6836000,
 160 |     "iso6391": "hy",
 161 |     "iso6392": "hye",
 162 |     "name": "Armenian"
 163 |   },
 164 |   "ajg": {
 165 |     "speakers": 200,
 166 |     "iso6391": null,
 167 |     "iso6392": null,
 168 |     "name": "Aja (Benin)"
 169 |   },
 170 |   "aii": {
 171 |     "speakers": 1000000,
 172 |     "iso6391": null,
 173 |     "iso6392": null,
 174 |     "name": "Assyrian Neo-Aramaic"
 175 |   },
 176 |   "ace": {
 177 |     "speakers": 3000000,
 178 |     "iso6391": null,
 179 |     "iso6392": "ace",
 180 |     "name": "Achinese"
 181 |   },
 182 |   "agr": {
 183 |     "speakers": 27500,
 184 |     "iso6391": null,
 185 |     "iso6392": null,
 186 |     "name": "Aguaruna"
 187 |   },
 188 |   "afr": {
 189 |     "speakers": 6365000,
 190 |     "iso6391": "af",
 191 |     "iso6392": "afr",
 192 |     "name": "Afrikaans"
 193 |   },
 194 |   "amr": {
 195 |     "speakers": 500,
 196 |     "iso6391": null,
 197 |     "iso6392": null,
 198 |     "name": "Amarakaeri"
 199 |   },
 200 |   "ame": {
 201 |     "speakers": 6000,
 202 |     "iso6391": null,
 203 |     "iso6392": null,
 204 |     "name": "Yanesha'"
 205 |   },
 206 |   "boa": {
 207 |     "speakers": 2000,
 208 |     "iso6391": null,
 209 |     "iso6392": null,
 210 |     "name": "Bora"
 211 |   },
 212 |   "ban": {
 213 |     "speakers": 3800000,
 214 |     "iso6391": null,
 215 |     "iso6392": "ban",
 216 |     "name": "Balinese"
 217 |   },
 218 |   "bba": {
 219 |     "speakers": 400000,
 220 |     "iso6391": null,
 221 |     "iso6392": null,
 222 |     "name": "Baatonum"
 223 |   },
 224 |   "bci": {
 225 |     "speakers": 2130000,
 226 |     "iso6391": null,
 227 |     "iso6392": null,
 228 |     "name": "Baoulé"
 229 |   },
 230 |   "bpy": {
 231 |     "speakers": 77500,
 232 |     "iso6391": null,
 233 |     "iso6392": null,
 234 |     "name": "Bishnupriya"
 235 |   },
 236 |   "bre": {
 237 |     "speakers": 500000,
 238 |     "iso6391": "br",
 239 |     "iso6392": "bre",
 240 |     "name": "Breton"
 241 |   },
 242 |   "buc": {
 243 |     "speakers": 39000,
 244 |     "iso6391": null,
 245 |     "iso6392": null,
 246 |     "name": "Bushi"
 247 |   },
 248 |   "bug": {
 249 |     "speakers": 3500000,
 250 |     "iso6391": null,
 251 |     "iso6392": "bug",
 252 |     "name": "Buginese"
 253 |   },
 254 |   "bul": {
 255 |     "speakers": 9000000,
 256 |     "iso6391": "bg",
 257 |     "iso6392": "bul",
 258 |     "name": "Bulgarian"
 259 |   },
 260 |   "bvi": {
 261 |     "speakers": 16000,
 262 |     "iso6391": null,
 263 |     "iso6392": null,
 264 |     "name": "Belanda Viri"
 265 |   },
 266 |   "bcl": {
 267 |     "speakers": 4000000,
 268 |     "iso6391": null,
 269 |     "iso6392": null,
 270 |     "name": "Central Bikol"
 271 |   },
 272 |   "mya": {
 273 |     "speakers": 31000000,
 274 |     "iso6391": "my",
 275 |     "iso6392": "mya",
 276 |     "name": "Burmese"
 277 |   },
 278 |   "bem": {
 279 |     "speakers": 2150000,
 280 |     "iso6391": null,
 281 |     "iso6392": "bem",
 282 |     "name": "Bemba (Zambia)"
 283 |   },
 284 |   "ben": {
 285 |     "speakers": 196000000,
 286 |     "iso6391": "bn",
 287 |     "iso6392": "ben",
 288 |     "name": "Bengali"
 289 |   },
 290 |   "bfa": {
 291 |     "speakers": 480000,
 292 |     "iso6391": null,
 293 |     "iso6392": null,
 294 |     "name": "Bari"
 295 |   },
 296 |   "bgp": {
 297 |     "speakers": 1735000,
 298 |     "iso6391": null,
 299 |     "iso6392": null,
 300 |     "name": "Eastern Balochi"
 301 |   },
 302 |   "bho": {
 303 |     "speakers": 25000000,
 304 |     "iso6391": null,
 305 |     "iso6392": "bho",
 306 |     "name": "Bhojpuri"
 307 |   },
 308 |   "bam": {
 309 |     "speakers": 3000000,
 310 |     "iso6391": "bm",
 311 |     "iso6392": "bam",
 312 |     "name": "Bambara"
 313 |   },
 314 |   "bis": {
 315 |     "speakers": 1200,
 316 |     "iso6391": "bi",
 317 |     "iso6392": "bis",
 318 |     "name": "Bislama"
 319 |   },
 320 |   "bjj": {
 321 |     "speakers": 9500000,
 322 |     "iso6391": null,
 323 |     "iso6392": null,
 324 |     "name": "Kanauji"
 325 |   },
 326 |   "eus": {
 327 |     "speakers": 588000,
 328 |     "iso6391": "eu",
 329 |     "iso6392": "eus",
 330 |     "name": "Basque"
 331 |   },
 332 |   "ces": {
 333 |     "speakers": 12000000,
 334 |     "iso6391": "cs",
 335 |     "iso6392": "ces",
 336 |     "name": "Czech"
 337 |   },
 338 |   "chj": {
 339 |     "speakers": 22000,
 340 |     "iso6391": null,
 341 |     "iso6392": null,
 342 |     "name": "Ojitlán Chinantec"
 343 |   },
 344 |   "cic": {
 345 |     "speakers": 1000,
 346 |     "iso6391": null,
 347 |     "iso6392": null,
 348 |     "name": "Chickasaw"
 349 |   },
 350 |   "cjk": {
 351 |     "speakers": 1004000,
 352 |     "iso6391": null,
 353 |     "iso6392": null,
 354 |     "name": "Chokwe"
 355 |   },
 356 |   "cjs": {
 357 |     "speakers": 10000,
 358 |     "iso6391": null,
 359 |     "iso6392": null,
 360 |     "name": "Shor"
 361 |   },
 362 |   "cab": {
 363 |     "speakers": 94500,
 364 |     "iso6391": null,
 365 |     "iso6392": null,
 366 |     "name": "Garifuna"
 367 |   },
 368 |   "cmn": {
 369 |     "speakers": 885000000,
 370 |     "iso6391": null,
 371 |     "iso6392": null,
 372 |     "name": "Mandarin Chinese"
 373 |   },
 374 |   "cak": {
 375 |     "speakers": 132200,
 376 |     "iso6391": null,
 377 |     "iso6392": null,
 378 |     "name": "Kaqchikel"
 379 |   },
 380 |   "cni": {
 381 |     "speakers": 45000,
 382 |     "iso6391": null,
 383 |     "iso6392": null,
 384 |     "name": "Asháninka"
 385 |   },
 386 |   "cof": {
 387 |     "speakers": 2300,
 388 |     "iso6391": null,
 389 |     "iso6392": null,
 390 |     "name": "Colorado"
 391 |   },
 392 |   "con": {
 393 |     "speakers": 1400,
 394 |     "iso6391": null,
 395 |     "iso6392": null,
 396 |     "name": "Cofán"
 397 |   },
 398 |   "cos": {
 399 |     "speakers": 341000,
 400 |     "iso6391": "co",
 401 |     "iso6392": "cos",
 402 |     "name": "Corsican"
 403 |   },
 404 |   "cot": {
 405 |     "speakers": 300,
 406 |     "iso6391": null,
 407 |     "iso6392": null,
 408 |     "name": "Caquinte"
 409 |   },
 410 |   "cpu": {
 411 |     "speakers": 5000,
 412 |     "iso6391": null,
 413 |     "iso6392": null,
 414 |     "name": "Pichis Ashéninka"
 415 |   },
 416 |   "crs": {
 417 |     "speakers": 72700,
 418 |     "iso6391": null,
 419 |     "iso6392": null,
 420 |     "name": "Seselwa Creole French"
 421 |   },
 422 |   "csa": {
 423 |     "speakers": 1000,
 424 |     "iso6391": null,
 425 |     "iso6392": null,
 426 |     "name": "Chiltepec Chinantec"
 427 |   },
 428 |   "csw": {
 429 |     "speakers": 60000,
 430 |     "iso6391": null,
 431 |     "iso6392": null,
 432 |     "name": "Swampy Cree"
 433 |   },
 434 |   "ceb": {
 435 |     "speakers": 15230000,
 436 |     "iso6391": null,
 437 |     "iso6392": "ceb",
 438 |     "name": "Cebuano"
 439 |   },
 440 |   "cat": {
 441 |     "speakers": 4353000,
 442 |     "iso6391": "ca",
 443 |     "iso6392": "cat",
 444 |     "name": "Catalan"
 445 |   },
 446 |   "cax": {
 447 |     "speakers": 47086,
 448 |     "iso6391": null,
 449 |     "iso6392": null,
 450 |     "name": "Chiquitano"
 451 |   },
 452 |   "cbr": {
 453 |     "speakers": 1500,
 454 |     "iso6391": null,
 455 |     "iso6392": null,
 456 |     "name": "Cashibo-Cacataibo"
 457 |   },
 458 |   "prq": {
 459 |     "speakers": 9000,
 460 |     "iso6391": null,
 461 |     "iso6392": null,
 462 |     "name": "Ashéninka Perené"
 463 |   },
 464 |   "cha": {
 465 |     "speakers": 78000,
 466 |     "iso6391": "ch",
 467 |     "iso6392": "cha",
 468 |     "name": "Chamorro"
 469 |   },
 470 |   "cbs": {
 471 |     "speakers": 2000,
 472 |     "iso6391": null,
 473 |     "iso6392": null,
 474 |     "name": "Cashinahua"
 475 |   },
 476 |   "cbt": {
 477 |     "speakers": 6000,
 478 |     "iso6391": null,
 479 |     "iso6392": null,
 480 |     "name": "Chayahuita"
 481 |   },
 482 |   "cbu": {
 483 |     "speakers": 3000,
 484 |     "iso6391": null,
 485 |     "iso6392": null,
 486 |     "name": "Candoshi-Shapra"
 487 |   },
 488 |   "ddn": {
 489 |     "speakers": 72000,
 490 |     "iso6391": null,
 491 |     "iso6392": null,
 492 |     "name": "Dendi (Benin)"
 493 |   },
 494 |   "dyu": {
 495 |     "speakers": 2700000,
 496 |     "iso6391": null,
 497 |     "iso6392": "dyu",
 498 |     "name": "Dyula"
 499 |   },
 500 |   "nld": {
 501 |     "speakers": 21000000,
 502 |     "iso6391": "nl",
 503 |     "iso6392": "nld",
 504 |     "name": "Dutch"
 505 |   },
 506 |   "dyo": {
 507 |     "speakers": 260000,
 508 |     "iso6391": null,
 509 |     "iso6392": null,
 510 |     "name": "Jola-Fonyi"
 511 |   },
 512 |   "dag": {
 513 |     "speakers": 540000,
 514 |     "iso6391": null,
 515 |     "iso6392": null,
 516 |     "name": "Dagbani"
 517 |   },
 518 |   "dan": {
 519 |     "speakers": 5292000,
 520 |     "iso6391": "da",
 521 |     "iso6392": "dan",
 522 |     "name": "Danish"
 523 |   },
 524 |   "div": {
 525 |     "speakers": 287000,
 526 |     "iso6391": "dv",
 527 |     "iso6392": "div",
 528 |     "name": "Dhivehi"
 529 |   },
 530 |   "dzo": {
 531 |     "speakers": 400000,
 532 |     "iso6391": "dz",
 533 |     "iso6392": "dzo",
 534 |     "name": "Dzongkha"
 535 |   },
 536 |   "dip": {
 537 |     "speakers": 1350000,
 538 |     "iso6391": null,
 539 |     "iso6392": null,
 540 |     "name": "Northeastern Dinka"
 541 |   },
 542 |   "dga": {
 543 |     "speakers": 501000,
 544 |     "iso6391": null,
 545 |     "iso6392": null,
 546 |     "name": "Southern Dagaare"
 547 |   },
 548 |   "gjn": {
 549 |     "speakers": 250000,
 550 |     "iso6391": null,
 551 |     "iso6392": null,
 552 |     "name": "Gonja"
 553 |   },
 554 |   "ewe": {
 555 |     "speakers": 2477600,
 556 |     "iso6391": "ee",
 557 |     "iso6392": "ewe",
 558 |     "name": "Ewe"
 559 |   },
 560 |   "kal": {
 561 |     "speakers": 47000,
 562 |     "iso6391": "kl",
 563 |     "iso6392": "kal",
 564 |     "name": "Kalaallisut"
 565 |   },
 566 |   "bin": {
 567 |     "speakers": 1000000,
 568 |     "iso6391": null,
 569 |     "iso6392": "bin",
 570 |     "name": "Bini"
 571 |   },
 572 |   "ike": {
 573 |     "speakers": 21500,
 574 |     "iso6391": null,
 575 |     "iso6392": null,
 576 |     "name": "Eastern Canadian Inuktitut"
 577 |   },
 578 |   "rgn": {
 579 |     "speakers": 20112,
 580 |     "iso6391": null,
 581 |     "iso6392": null,
 582 |     "name": "Romagnol"
 583 |   },
 584 |   "eng": {
 585 |     "speakers": 322000000,
 586 |     "iso6391": "en",
 587 |     "iso6392": "eng",
 588 |     "name": "English"
 589 |   },
 590 |   "est": {
 591 |     "speakers": 1100000,
 592 |     "iso6391": "et",
 593 |     "iso6392": "est",
 594 |     "name": "Estonian"
 595 |   },
 596 |   "eve": {
 597 |     "speakers": 7170,
 598 |     "iso6391": null,
 599 |     "iso6392": null,
 600 |     "name": "Even"
 601 |   },
 602 |   "evn": {
 603 |     "speakers": 40000,
 604 |     "iso6391": null,
 605 |     "iso6392": null,
 606 |     "name": "Evenki"
 607 |   },
 608 |   "fao": {
 609 |     "speakers": 47000,
 610 |     "iso6391": "fo",
 611 |     "iso6392": "fao",
 612 |     "name": "Faroese"
 613 |   },
 614 |   "wln": {
 615 |     "speakers": 600000,
 616 |     "iso6391": "wa",
 617 |     "iso6392": "wln",
 618 |     "name": "Walloon"
 619 |   },
 620 |   "fij": {
 621 |     "speakers": 650000,
 622 |     "iso6391": "fj",
 623 |     "iso6392": "fij",
 624 |     "name": "Fijian"
 625 |   },
 626 |   "fuc": {
 627 |     "speakers": 22000000,
 628 |     "iso6391": null,
 629 |     "iso6392": null,
 630 |     "name": "Pulaar"
 631 |   },
 632 |   "fra": {
 633 |     "speakers": 124000000,
 634 |     "iso6391": "fr",
 635 |     "iso6392": "fra",
 636 |     "name": "French"
 637 |   },
 638 |   "fur": {
 639 |     "speakers": 600000,
 640 |     "iso6391": null,
 641 |     "iso6392": "fur",
 642 |     "name": "Friulian"
 643 |   },
 644 |   "fon": {
 645 |     "speakers": 1436000,
 646 |     "iso6391": null,
 647 |     "iso6392": "fon",
 648 |     "name": "Fon"
 649 |   },
 650 |   "fin": {
 651 |     "speakers": 6000000,
 652 |     "iso6391": "fi",
 653 |     "iso6392": "fin",
 654 |     "name": "Finnish"
 655 |   },
 656 |   "pcd": {
 657 |     "speakers": 500000,
 658 |     "iso6391": null,
 659 |     "iso6392": null,
 660 |     "name": "Picard"
 661 |   },
 662 |   "hau": {
 663 |     "speakers": 22000000,
 664 |     "iso6391": "ha",
 665 |     "iso6392": "hau",
 666 |     "name": "Hausa"
 667 |   },
 668 |   "gug": {
 669 |     "speakers": 12000,
 670 |     "iso6391": null,
 671 |     "iso6392": null,
 672 |     "name": "Paraguayan Guaraní"
 673 |   },
 674 |   "guj": {
 675 |     "speakers": 44000000,
 676 |     "iso6391": "gu",
 677 |     "iso6392": "guj",
 678 |     "name": "Gujarati"
 679 |   },
 680 |   "guu": {
 681 |     "speakers": 17640,
 682 |     "iso6391": null,
 683 |     "iso6392": null,
 684 |     "name": "Yanomamö"
 685 |   },
 686 |   "gyr": {
 687 |     "speakers": 5933,
 688 |     "iso6391": null,
 689 |     "iso6392": null,
 690 |     "name": "Guarayu"
 691 |   },
 692 |   "gag": {
 693 |     "speakers": 198000,
 694 |     "iso6391": null,
 695 |     "iso6392": null,
 696 |     "name": "Gagauz"
 697 |   },
 698 |   "gbm": {
 699 |     "speakers": 2920000,
 700 |     "iso6391": null,
 701 |     "iso6392": null,
 702 |     "name": "Garhwali"
 703 |   },
 704 |   "deu": {
 705 |     "speakers": 121000000,
 706 |     "iso6391": "de",
 707 |     "iso6392": "deu",
 708 |     "name": "German"
 709 |   },
 710 |   "pov": {
 711 |     "speakers": 580000,
 712 |     "iso6391": null,
 713 |     "iso6392": null,
 714 |     "name": "Upper Guinea Crioulo"
 715 |   },
 716 |   "gaa": {
 717 |     "speakers": 1000000,
 718 |     "iso6391": null,
 719 |     "iso6392": "gaa",
 720 |     "name": "Ga"
 721 |   },
 722 |   "gkp": {
 723 |     "speakers": 808000,
 724 |     "iso6391": null,
 725 |     "iso6392": null,
 726 |     "name": "Guinea Kpelle"
 727 |   },
 728 |   "ada": {
 729 |     "speakers": 1000000,
 730 |     "iso6391": null,
 731 |     "iso6392": "ada",
 732 |     "name": "Adangme"
 733 |   },
 734 |   "gla": {
 735 |     "speakers": 63653,
 736 |     "iso6391": "gd",
 737 |     "iso6392": "gla",
 738 |     "name": "Scottish Gaelic"
 739 |   },
 740 |   "gld": {
 741 |     "speakers": 12003,
 742 |     "iso6391": null,
 743 |     "iso6392": null,
 744 |     "name": "Nanai"
 745 |   },
 746 |   "ell": {
 747 |     "speakers": 12258540,
 748 |     "iso6391": "el",
 749 |     "iso6392": "ell",
 750 |     "name": "Modern Greek (1453-)"
 751 |   },
 752 |   "gle": {
 753 |     "speakers": 260000,
 754 |     "iso6391": "ga",
 755 |     "iso6392": "gle",
 756 |     "name": "Irish"
 757 |   },
 758 |   "glg": {
 759 |     "speakers": 4000000,
 760 |     "iso6391": "gl",
 761 |     "iso6392": "glg",
 762 |     "name": "Galician"
 763 |   },
 764 |   "gno": {
 765 |     "speakers": 1950000,
 766 |     "iso6391": null,
 767 |     "iso6392": null,
 768 |     "name": "Northern Gondi"
 769 |   },
 770 |   "gax": {
 771 |     "speakers": 30000000,
 772 |     "iso6391": null,
 773 |     "iso6392": null,
 774 |     "name": "Borana-Arsi-Guji Oromo"
 775 |   },
 776 |   "kat": {
 777 |     "speakers": 4103000,
 778 |     "iso6391": "ka",
 779 |     "iso6392": "kat",
 780 |     "name": "Georgian"
 781 |   },
 782 |   "guc": {
 783 |     "speakers": 305000,
 784 |     "iso6391": null,
 785 |     "iso6392": null,
 786 |     "name": "Wayuu"
 787 |   },
 788 |   "hea": {
 789 |     "speakers": 820000,
 790 |     "iso6391": null,
 791 |     "iso6392": null,
 792 |     "name": "Northern Qiandong Miao"
 793 |   },
 794 |   "hun": {
 795 |     "speakers": 14500000,
 796 |     "iso6391": "hu",
 797 |     "iso6392": "hun",
 798 |     "name": "Hungarian"
 799 |   },
 800 |   "haw": {
 801 |     "speakers": 8000,
 802 |     "iso6391": null,
 803 |     "iso6392": "haw",
 804 |     "name": "Hawaiian"
 805 |   },
 806 |   "huu": {
 807 |     "speakers": 2900,
 808 |     "iso6391": null,
 809 |     "iso6392": null,
 810 |     "name": "Murui Huitoto"
 811 |   },
 812 |   "hat": {
 813 |     "speakers": 7382000,
 814 |     "iso6391": "ht",
 815 |     "iso6392": "hat",
 816 |     "name": "Haitian"
 817 |   },
 818 |   "heb": {
 819 |     "speakers": 4612000,
 820 |     "iso6391": "he",
 821 |     "iso6392": "heb",
 822 |     "name": "Hebrew"
 823 |   },
 824 |   "hil": {
 825 |     "speakers": 7000000,
 826 |     "iso6391": null,
 827 |     "iso6392": "hil",
 828 |     "name": "Hiligaynon"
 829 |   },
 830 |   "hin": {
 831 |     "speakers": 182000000,
 832 |     "iso6391": "hi",
 833 |     "iso6392": "hin",
 834 |     "name": "Hindi"
 835 |   },
 836 |   "hlt": {
 837 |     "speakers": 30000,
 838 |     "iso6391": null,
 839 |     "iso6392": null,
 840 |     "name": "Matu Chin"
 841 |   },
 842 |   "hms": {
 843 |     "speakers": 8200000,
 844 |     "iso6391": null,
 845 |     "iso6392": null,
 846 |     "name": "Southern Qiandong Miao"
 847 |   },
 848 |   "hna": {
 849 |     "speakers": 327000,
 850 |     "iso6391": null,
 851 |     "iso6392": null,
 852 |     "name": "Mina (Cameroon)"
 853 |   },
 854 |   "cnh": {
 855 |     "speakers": 446264,
 856 |     "iso6391": null,
 857 |     "iso6392": null,
 858 |     "name": "Haka Chin"
 859 |   },
 860 |   "hne": {
 861 |     "speakers": 17500000,
 862 |     "iso6391": null,
 863 |     "iso6392": null,
 864 |     "name": "Chhattisgarhi"
 865 |   },
 866 |   "hni": {
 867 |     "speakers": 747000,
 868 |     "iso6391": null,
 869 |     "iso6392": null,
 870 |     "name": "Hani"
 871 |   },
 872 |   "hoc": {
 873 |     "speakers": 1500000,
 874 |     "iso6391": null,
 875 |     "iso6392": null,
 876 |     "name": "Ho"
 877 |   },
 878 |   "ilo": {
 879 |     "speakers": 8000000,
 880 |     "iso6391": null,
 881 |     "iso6392": "ilo",
 882 |     "name": "Iloko"
 883 |   },
 884 |   "ibo": {
 885 |     "speakers": 17000000,
 886 |     "iso6391": "ig",
 887 |     "iso6392": "ibo",
 888 |     "name": "Igbo"
 889 |   },
 890 |   "ita": {
 891 |     "speakers": 63000000,
 892 |     "iso6391": "it",
 893 |     "iso6392": "ita",
 894 |     "name": "Italian"
 895 |   },
 896 |   "isl": {
 897 |     "speakers": 282845,
 898 |     "iso6391": "is",
 899 |     "iso6392": "isl",
 900 |     "name": "Icelandic"
 901 |   },
 902 |   "ind": {
 903 |     "speakers": 140000000,
 904 |     "iso6391": "id",
 905 |     "iso6392": "ind",
 906 |     "name": "Indonesian"
 907 |   },
 908 |   "nds": {
 909 |     "speakers": 2600000,
 910 |     "iso6391": null,
 911 |     "iso6392": "nds",
 912 |     "name": "Low German"
 913 |   },
 914 |   "ibb": {
 915 |     "speakers": 3186000,
 916 |     "iso6391": null,
 917 |     "iso6392": null,
 918 |     "name": "Ibibio"
 919 |   },
 920 |   "iii": {
 921 |     "speakers": 1600000,
 922 |     "iso6391": "ii",
 923 |     "iso6392": "iii",
 924 |     "name": "Sichuan Yi"
 925 |   },
 926 |   "jpn": {
 927 |     "speakers": 125000000,
 928 |     "iso6391": "ja",
 929 |     "iso6392": "jpn",
 930 |     "name": "Japanese"
 931 |   },
 932 |   "jav": {
 933 |     "speakers": 75500800,
 934 |     "iso6391": "jv",
 935 |     "iso6392": "jav",
 936 |     "name": "Javanese"
 937 |   },
 938 |   "kfa": {
 939 |     "speakers": 241000,
 940 |     "iso6391": null,
 941 |     "iso6392": null,
 942 |     "name": "Kodava"
 943 |   },
 944 |   "kha": {
 945 |     "speakers": 865000,
 946 |     "iso6391": null,
 947 |     "iso6392": "kha",
 948 |     "name": "Khasi"
 949 |   },
 950 |   "khk": {
 951 |     "speakers": 2330000,
 952 |     "iso6391": null,
 953 |     "iso6392": null,
 954 |     "name": "Halh Mongolian"
 955 |   },
 956 |   "khm": {
 957 |     "speakers": 7063200,
 958 |     "iso6391": "km",
 959 |     "iso6392": "khm",
 960 |     "name": "Central Khmer"
 961 |   },
 962 |   "khr": {
 963 |     "speakers": 293580,
 964 |     "iso6391": null,
 965 |     "iso6392": null,
 966 |     "name": "Kharia"
 967 |   },
 968 |   "kas": {
 969 |     "speakers": 4381000,
 970 |     "iso6391": "ks",
 971 |     "iso6392": "kas",
 972 |     "name": "Kashmiri"
 973 |   },
 974 |   "kir": {
 975 |     "speakers": 2631420,
 976 |     "iso6391": "ky",
 977 |     "iso6392": "kir",
 978 |     "name": "Kirghiz"
 979 |   },
 980 |   "kjh": {
 981 |     "speakers": 60000,
 982 |     "iso6391": null,
 983 |     "iso6392": null,
 984 |     "name": "Khakas"
 985 |   },
 986 |   "ckb": {
 987 |     "speakers": 20000000,
 988 |     "iso6391": null,
 989 |     "iso6392": null,
 990 |     "name": "Central Kurdish"
 991 |   },
 992 |   "kaz": {
 993 |     "speakers": 8000000,
 994 |     "iso6391": "kk",
 995 |     "iso6392": "kaz",
 996 |     "name": "Kazakh"
 997 |   },
 998 |   "knc": {
 999 |     "speakers": 3500000,
1000 |     "iso6391": null,
1001 |     "iso6392": null,
1002 |     "name": "Central Kanuri"
1003 |   },
1004 |   "kng": {
1005 |     "speakers": 1000000,
1006 |     "iso6391": null,
1007 |     "iso6392": null,
1008 |     "name": "Koongo"
1009 |   },
1010 |   "koi": {
1011 |     "speakers": 12500000,
1012 |     "iso6391": null,
1013 |     "iso6392": null,
1014 |     "name": "Komi-Permyak"
1015 |   },
1016 |   "koo": {
1017 |     "speakers": 361709,
1018 |     "iso6391": null,
1019 |     "iso6392": null,
1020 |     "name": "Konzo"
1021 |   },
1022 |   "kor": {
1023 |     "speakers": 75000000,
1024 |     "iso6391": "ko",
1025 |     "iso6392": "kor",
1026 |     "name": "Korean"
1027 |   },
1028 |   "kqn": {
1029 |     "speakers": 276000,
1030 |     "iso6391": null,
1031 |     "iso6392": null,
1032 |     "name": "Kaonde"
1033 |   },
1034 |   "kri": {
1035 |     "speakers": 480000,
1036 |     "iso6391": null,
1037 |     "iso6392": null,
1038 |     "name": "Krio"
1039 |   },
1040 |   "krl": {
1041 |     "speakers": 80000,
1042 |     "iso6391": null,
1043 |     "iso6392": "krl",
1044 |     "name": "Karelian"
1045 |   },
1046 |   "ksw": {
1047 |     "speakers": 2000000,
1048 |     "iso6391": null,
1049 |     "iso6392": null,
1050 |     "name": "S'gaw Karen"
1051 |   },
1052 |   "kwi": {
1053 |     "speakers": 21000,
1054 |     "iso6391": null,
1055 |     "iso6392": null,
1056 |     "name": "Awa-Cuaiquer"
1057 |   },
1058 |   "kbp": {
1059 |     "speakers": 1200000,
1060 |     "iso6391": null,
1061 |     "iso6392": null,
1062 |     "name": "Kabiyè"
1063 |   },
1064 |   "xsm": {
1065 |     "speakers": 200000,
1066 |     "iso6391": null,
1067 |     "iso6392": null,
1068 |     "name": "Kasem"
1069 |   },
1070 |   "kde": {
1071 |     "speakers": 1260000,
1072 |     "iso6391": null,
1073 |     "iso6392": null,
1074 |     "name": "Makonde"
1075 |   },
1076 |   "kea": {
1077 |     "speakers": 393943,
1078 |     "iso6391": null,
1079 |     "iso6392": null,
1080 |     "name": "Kabuverdianu"
1081 |   },
1082 |   "kan": {
1083 |     "speakers": 33663000,
1084 |     "iso6391": "kn",
1085 |     "iso6392": "kan",
1086 |     "name": "Kannada"
1087 |   },
1088 |   "kmr": {
1089 |     "speakers": 8000000,
1090 |     "iso6391": null,
1091 |     "iso6392": null,
1092 |     "name": "Northern Kurdish"
1093 |   },
1094 |   "lia": {
1095 |     "speakers": 335000,
1096 |     "iso6391": null,
1097 |     "iso6392": null,
1098 |     "name": "West-Central Limba"
1099 |   },
1100 |   "lin": {
1101 |     "speakers": 8400000,
1102 |     "iso6391": "ln",
1103 |     "iso6392": "lin",
1104 |     "name": "Lingala"
1105 |   },
1106 |   "lit": {
1107 |     "speakers": 4000000,
1108 |     "iso6391": "lt",
1109 |     "iso6392": "lit",
1110 |     "name": "Lithuanian"
1111 |   },
1112 |   "lad": {
1113 |     "speakers": 120000,
1114 |     "iso6391": null,
1115 |     "iso6392": "lad",
1116 |     "name": "Ladino"
1117 |   },
1118 |   "lav": {
1119 |     "speakers": 1550000,
1120 |     "iso6391": "lv",
1121 |     "iso6392": "lav",
1122 |     "name": "Latvian"
1123 |   },
1124 |   "lob": {
1125 |     "speakers": 442000,
1126 |     "iso6391": null,
1127 |     "iso6392": null,
1128 |     "name": "Lobi"
1129 |   },
1130 |   "lot": {
1131 |     "speakers": 135000,
1132 |     "iso6391": null,
1133 |     "iso6392": null,
1134 |     "name": "Otuho"
1135 |   },
1136 |   "loz": {
1137 |     "speakers": 71841,
1138 |     "iso6391": null,
1139 |     "iso6392": "loz",
1140 |     "name": "Lozi"
1141 |   },
1142 |   "ltz": {
1143 |     "speakers": 335518,
1144 |     "iso6391": "lb",
1145 |     "iso6392": "ltz",
1146 |     "name": "Luxembourgish"
1147 |   },
1148 |   "lua": {
1149 |     "speakers": 6300000,
1150 |     "iso6391": null,
1151 |     "iso6392": "lua",
1152 |     "name": "Luba-Lulua"
1153 |   },
1154 |   "lue": {
1155 |     "speakers": 35800,
1156 |     "iso6391": null,
1157 |     "iso6392": null,
1158 |     "name": "Luvale"
1159 |   },
1160 |   "lug": {
1161 |     "speakers": 3015980,
1162 |     "iso6391": "lg",
1163 |     "iso6392": "lug",
1164 |     "name": "Ganda"
1165 |   },
1166 |   "lus": {
1167 |     "speakers": 541750,
1168 |     "iso6391": null,
1169 |     "iso6392": "lus",
1170 |     "name": "Lushai"
1171 |   },
1172 |   "sme": {
1173 |     "speakers": 4000,
1174 |     "iso6391": "se",
1175 |     "iso6392": "sme",
1176 |     "name": "Northern Sami"
1177 |   },
1178 |   "mad": {
1179 |     "speakers": 10000000,
1180 |     "iso6391": null,
1181 |     "iso6392": "mad",
1182 |     "name": "Madurese"
1183 |   },
1184 |   "mah": {
1185 |     "speakers": 43900,
1186 |     "iso6391": "mh",
1187 |     "iso6392": "mah",
1188 |     "name": "Marshallese"
1189 |   },
1190 |   "mar": {
1191 |     "speakers": 64783000,
1192 |     "iso6391": "mr",
1193 |     "iso6392": "mar",
1194 |     "name": "Marathi"
1195 |   },
1196 |   "maz": {
1197 |     "speakers": 350000,
1198 |     "iso6391": null,
1199 |     "iso6392": null,
1200 |     "name": "Central Mazahua"
1201 |   },
1202 |   "mcd": {
1203 |     "speakers": 950,
1204 |     "iso6391": null,
1205 |     "iso6392": null,
1206 |     "name": "Sharanahua"
1207 |   },
1208 |   "mcf": {
1209 |     "speakers": 1280,
1210 |     "iso6391": null,
1211 |     "iso6392": null,
1212 |     "name": "Matsés"
1213 |   },
1214 |   "men": {
1215 |     "speakers": 1480000,
1216 |     "iso6391": null,
1217 |     "iso6392": "men",
1218 |     "name": "Mende (Sierra Leone)"
1219 |   },
1220 |   "mic": {
1221 |     "speakers": 8100,
1222 |     "iso6391": null,
1223 |     "iso6392": "mic",
1224 |     "name": "Mi'kmaq"
1225 |   },
1226 |   "min": {
1227 |     "speakers": 6500000,
1228 |     "iso6391": null,
1229 |     "iso6392": "min",
1230 |     "name": "Minangkabau"
1231 |   },
1232 |   "miq": {
1233 |     "speakers": 160000,
1234 |     "iso6391": null,
1235 |     "iso6392": null,
1236 |     "name": "Mískito"
1237 |   },
1238 |   "mkd": {
1239 |     "speakers": 2500000,
1240 |     "iso6391": "mk",
1241 |     "iso6392": "mkd",
1242 |     "name": "Macedonian"
1243 |   },
1244 |   "mlt": {
1245 |     "speakers": 330000,
1246 |     "iso6391": "mt",
1247 |     "iso6392": "mlt",
1248 |     "name": "Maltese"
1249 |   },
1250 |   "mos": {
1251 |     "speakers": 4600000,
1252 |     "iso6391": null,
1253 |     "iso6392": "mos",
1254 |     "name": "Mossi"
1255 |   },
1256 |   "mri": {
1257 |     "speakers": 70000,
1258 |     "iso6391": "mi",
1259 |     "iso6392": "mri",
1260 |     "name": "Maori"
1261 |   },
1262 |   "mve": {
1263 |     "speakers": 12104000,
1264 |     "iso6391": null,
1265 |     "iso6392": null,
1266 |     "name": "Marwari (Pakistan)"
1267 |   },
1268 |   "mxi": {
1269 |     "speakers": 0,
1270 |     "iso6391": null,
1271 |     "iso6392": null,
1272 |     "name": "Mozarabic"
1273 |   },
1274 |   "mxv": {
1275 |     "speakers": 65000,
1276 |     "iso6391": null,
1277 |     "iso6392": null,
1278 |     "name": "Metlatónoc Mixtec"
1279 |   },
1280 |   "mag": {
1281 |     "speakers": 10821000,
1282 |     "iso6391": null,
1283 |     "iso6392": "mag",
1284 |     "name": "Magahi"
1285 |   },
1286 |   "mzi": {
1287 |     "speakers": 11000,
1288 |     "iso6391": null,
1289 |     "iso6392": null,
1290 |     "name": "Ixcatlán Mazatec"
1291 |   },
1292 |   "emk": {
1293 |     "speakers": 2140300,
1294 |     "iso6391": null,
1295 |     "iso6392": null,
1296 |     "name": "Eastern Maninkakan"
1297 |   },
1298 |   "mai": {
1299 |     "speakers": 34700000,
1300 |     "iso6391": null,
1301 |     "iso6392": "mai",
1302 |     "name": "Maithili"
1303 |   },
1304 |   "kmb": {
1305 |     "speakers": 3000000,
1306 |     "iso6391": null,
1307 |     "iso6392": "kmb",
1308 |     "name": "Kimbundu"
1309 |   },
1310 |   "mam": {
1311 |     "speakers": 157000,
1312 |     "iso6391": null,
1313 |     "iso6392": null,
1314 |     "name": "Mam"
1315 |   },
1316 |   "lun": {
1317 |     "speakers": 3000000,
1318 |     "iso6391": null,
1319 |     "iso6392": "lun",
1320 |     "name": "Lunda"
1321 |   },
1322 |   "mal": {
1323 |     "speakers": 34014000,
1324 |     "iso6391": "ml",
1325 |     "iso6392": "mal",
1326 |     "name": "Malayalam"
1327 |   },
1328 |   "umb": {
1329 |     "speakers": 4000000,
1330 |     "iso6391": null,
1331 |     "iso6392": "umb",
1332 |     "name": "Umbundu"
1333 |   },
1334 |   "plt": {
1335 |     "speakers": 10156900,
1336 |     "iso6391": null,
1337 |     "iso6392": null,
1338 |     "name": "Plateau Malagasy"
1339 |   },
1340 |   "nio": {
1341 |     "speakers": 1063,
1342 |     "iso6391": null,
1343 |     "iso6392": null,
1344 |     "name": "Nganasan"
1345 |   },
1346 |   "njo": {
1347 |     "speakers": 232000,
1348 |     "iso6391": null,
1349 |     "iso6392": null,
1350 |     "name": "Ao Naga"
1351 |   },
1352 |   "nhn": {
1353 |     "speakers": 1376898,
1354 |     "iso6391": null,
1355 |     "iso6392": null,
1356 |     "name": "Central Nahuatl"
1357 |   },
1358 |   "lao": {
1359 |     "speakers": 4000000,
1360 |     "iso6391": "lo",
1361 |     "iso6392": "lao",
1362 |     "name": "Lao"
1363 |   },
1364 |   "nno": {
1365 |     "speakers": 4700000,
1366 |     "iso6391": "nn",
1367 |     "iso6392": "nno",
1368 |     "name": "Norwegian Nynorsk"
1369 |   },
1370 |   "nob": {
1371 |     "speakers": 5000000,
1372 |     "iso6391": "nb",
1373 |     "iso6392": "nob",
1374 |     "name": "Norwegian Bokmål"
1375 |   },
1376 |   "not": {
1377 |     "speakers": 4000,
1378 |     "iso6391": null,
1379 |     "iso6392": null,
1380 |     "name": "Nomatsiguenga"
1381 |   },
1382 |   "nus": {
1383 |     "speakers": 804900,
1384 |     "iso6391": null,
1385 |     "iso6392": null,
1386 |     "name": "Nuer"
1387 |   },
1388 |   "lns": {
1389 |     "speakers": 125000,
1390 |     "iso6391": null,
1391 |     "iso6392": null,
1392 |     "name": "Lamnso'"
1393 |   },
1394 |   "nya": {
1395 |     "speakers": 10000000,
1396 |     "iso6391": "ny",
1397 |     "iso6392": "nya",
1398 |     "name": "Nyanja"
1399 |   },
1400 |   "nym": {
1401 |     "speakers": 926000,
1402 |     "iso6391": null,
1403 |     "iso6392": "nym",
1404 |     "name": "Nyamwezi"
1405 |   },
1406 |   "nyn": {
1407 |     "speakers": 1643193,
1408 |     "iso6391": null,
1409 |     "iso6392": "nyn",
1410 |     "name": "Nyankole"
1411 |   },
1412 |   "nzi": {
1413 |     "speakers": 352500,
1414 |     "iso6391": null,
1415 |     "iso6392": "nzi",
1416 |     "name": "Nzima"
1417 |   },
1418 |   "nep": {
1419 |     "speakers": 16200000,
1420 |     "iso6391": "ne",
1421 |     "iso6392": "nep",
1422 |     "name": "Nepali (macrolanguage)"
1423 |   },
1424 |   "nbl": {
1425 |     "speakers": 588000,
1426 |     "iso6391": "nr",
1427 |     "iso6392": "nbl",
1428 |     "name": "South Ndebele"
1429 |   },
1430 |   "nba": {
1431 |     "speakers": 172000,
1432 |     "iso6391": null,
1433 |     "iso6392": null,
1434 |     "name": "Nyemba"
1435 |   },
1436 |   "nav": {
1437 |     "speakers": 148530,
1438 |     "iso6391": "nv",
1439 |     "iso6392": "nav",
1440 |     "name": "Navajo"
1441 |   },
1442 |   "oss": {
1443 |     "speakers": 588000,
1444 |     "iso6391": "os",
1445 |     "iso6392": "oss",
1446 |     "name": "Ossetian"
1447 |   },
1448 |   "oaa": {
1449 |     "speakers": 295,
1450 |     "iso6391": null,
1451 |     "iso6392": null,
1452 |     "name": "Orok"
1453 |   },
1454 |   "ojb": {
1455 |     "speakers": 35000,
1456 |     "iso6391": null,
1457 |     "iso6392": null,
1458 |     "name": "Northwestern Ojibwa"
1459 |   },
1460 |   "oki": {
1461 |     "speakers": 20000,
1462 |     "iso6391": null,
1463 |     "iso6392": null,
1464 |     "name": "Okiek"
1465 |   },
1466 |   "ori": {
1467 |     "speakers": 31000000,
1468 |     "iso6391": "or",
1469 |     "iso6392": "ori",
1470 |     "name": "Oriya (macrolanguage)"
1471 |   },
1472 |   "pbb": {
1473 |     "speakers": 68487,
1474 |     "iso6391": null,
1475 |     "iso6392": null,
1476 |     "name": "Páez"
1477 |   },
1478 |   "tgk": {
1479 |     "speakers": 4380000,
1480 |     "iso6391": "tg",
1481 |     "iso6392": "tgk",
1482 |     "name": "Tajik"
1483 |   },
1484 |   "tpi": {
1485 |     "speakers": 2000000,
1486 |     "iso6391": null,
1487 |     "iso6392": "tpi",
1488 |     "name": "Tok Pisin"
1489 |   },
1490 |   "pes": {
1491 |     "speakers": 7000000,
1492 |     "iso6391": null,
1493 |     "iso6392": null,
1494 |     "name": "Iranian Persian"
1495 |   },
1496 |   "pis": {
1497 |     "speakers": 350000,
1498 |     "iso6391": null,
1499 |     "iso6392": null,
1500 |     "name": "Pijin"
1501 |   },
1502 |   "pau": {
1503 |     "speakers": 15000,
1504 |     "iso6391": null,
1505 |     "iso6392": "pau",
1506 |     "name": "Palauan"
1507 |   },
1508 |   "pol": {
1509 |     "speakers": 44000000,
1510 |     "iso6391": "pl",
1511 |     "iso6392": "pol",
1512 |     "name": "Polish"
1513 |   },
1514 |   "pon": {
1515 |     "speakers": 27700,
1516 |     "iso6391": null,
1517 |     "iso6392": "pon",
1518 |     "name": "Pohnpeian"
1519 |   },
1520 |   "por": {
1521 |     "speakers": 182000000,
1522 |     "iso6391": "pt",
1523 |     "iso6392": "por",
1524 |     "name": "Portuguese"
1525 |   },
1526 |   "ppl": {
1527 |     "speakers": 20,
1528 |     "iso6391": null,
1529 |     "iso6392": null,
1530 |     "name": "Pipil"
1531 |   },
1532 |   "pwo": {
1533 |     "speakers": 1209800,
1534 |     "iso6391": null,
1535 |     "iso6392": null,
1536 |     "name": "Pwo Western Karen"
1537 |   },
1538 |   "pan": {
1539 |     "speakers": 25700000,
1540 |     "iso6391": "pa",
1541 |     "iso6392": "pan",
1542 |     "name": "Panjabi"
1543 |   },
1544 |   "pam": {
1545 |     "speakers": 2000000,
1546 |     "iso6391": null,
1547 |     "iso6392": "pam",
1548 |     "name": "Pampanga"
1549 |   },
1550 |   "pbu": {
1551 |     "speakers": 9585000,
1552 |     "iso6391": null,
1553 |     "iso6392": null,
1554 |     "name": "Northern Pashto"
1555 |   },
1556 |   "quy": {
1557 |     "speakers": 1000000,
1558 |     "iso6391": null,
1559 |     "iso6392": null,
1560 |     "name": "Ayacucho Quechua"
1561 |   },
1562 |   "qvc": {
1563 |     "speakers": 35000,
1564 |     "iso6391": null,
1565 |     "iso6392": null,
1566 |     "name": "Cajamarca Quechua"
1567 |   },
1568 |   "qva": {
1569 |     "speakers": 65000,
1570 |     "iso6391": null,
1571 |     "iso6392": null,
1572 |     "name": "Ambo-Pasco Quechua"
1573 |   },
1574 |   "qug": {
1575 |     "speakers": 10000000,
1576 |     "iso6391": null,
1577 |     "iso6392": null,
1578 |     "name": "Chimborazo Highland Quichua"
1579 |   },
1580 |   "qvh": {
1581 |     "speakers": 38000,
1582 |     "iso6391": null,
1583 |     "iso6392": null,
1584 |     "name": "Huamalíes-Dos de Mayo Huánuco Quechua"
1585 |   },
1586 |   "qvm": {
1587 |     "speakers": 55000,
1588 |     "iso6391": null,
1589 |     "iso6392": null,
1590 |     "name": "Margos-Yarowilca-Lauricocha Quechua"
1591 |   },
1592 |   "qvn": {
1593 |     "speakers": 40000,
1594 |     "iso6391": null,
1595 |     "iso6392": null,
1596 |     "name": "North Junín Quechua"
1597 |   },
1598 |   "qwh": {
1599 |     "speakers": 300000,
1600 |     "iso6391": null,
1601 |     "iso6392": null,
1602 |     "name": "Huaylas Ancash Quechua"
1603 |   },
1604 |   "qxa": {
1605 |     "speakers": 25000,
1606 |     "iso6391": null,
1607 |     "iso6392": null,
1608 |     "name": "Chiquián Ancash Quechua"
1609 |   },
1610 |   "qxn": {
1611 |     "speakers": 200000,
1612 |     "iso6391": null,
1613 |     "iso6392": null,
1614 |     "name": "Northern Conchucos Ancash Quechua"
1615 |   },
1616 |   "qxu": {
1617 |     "speakers": 16000,
1618 |     "iso6391": null,
1619 |     "iso6392": null,
1620 |     "name": "Arequipa-La Unión Quechua"
1621 |   },
1622 |   "qud": {
1623 |     "speakers": 30000,
1624 |     "iso6391": null,
1625 |     "iso6392": null,
1626 |     "name": "Calderón Highland Quichua"
1627 |   },
1628 |   "quz": {
1629 |     "speakers": 1500000,
1630 |     "iso6391": null,
1631 |     "iso6392": null,
1632 |     "name": "Cusco Quechua"
1633 |   },
1634 |   "run": {
1635 |     "speakers": 6000000,
1636 |     "iso6391": "rn",
1637 |     "iso6392": "run",
1638 |     "name": "Rundi"
1639 |   },
1640 |   "rmy": {
1641 |     "speakers": 1500000,
1642 |     "iso6391": null,
1643 |     "iso6392": null,
1644 |     "name": "Vlax Romani"
1645 |   },
1646 |   "roh": {
1647 |     "speakers": 500000,
1648 |     "iso6391": "rm",
1649 |     "iso6392": "roh",
1650 |     "name": "Romansh"
1651 |   },
1652 |   "ron": {
1653 |     "speakers": 26000000,
1654 |     "iso6391": "ro",
1655 |     "iso6392": "ron",
1656 |     "name": "Romanian"
1657 |   },
1658 |   "rmn": {
1659 |     "speakers": 1000000,
1660 |     "iso6391": null,
1661 |     "iso6392": null,
1662 |     "name": "Balkan Romani"
1663 |   },
1664 |   "rus": {
1665 |     "speakers": 288000000,
1666 |     "iso6391": "ru",
1667 |     "iso6392": "rus",
1668 |     "name": "Russian"
1669 |   },
1670 |   "raj": {
1671 |     "speakers": 12370010,
1672 |     "iso6391": null,
1673 |     "iso6392": "raj",
1674 |     "name": "Rajasthani"
1675 |   },
1676 |   "bel": {
1677 |     "speakers": 10200000,
1678 |     "iso6391": "be",
1679 |     "iso6392": "bel",
1680 |     "name": "Belarusian"
1681 |   },
1682 |   "kin": {
1683 |     "speakers": 9306800,
1684 |     "iso6391": "rw",
1685 |     "iso6392": "kin",
1686 |     "name": "Kinyarwanda"
1687 |   },
1688 |   "rar": {
1689 |     "speakers": 43000,
1690 |     "iso6391": null,
1691 |     "iso6392": "rar",
1692 |     "name": "Rarotongan"
1693 |   },
1694 |   "nso": {
1695 |     "speakers": 3851000,
1696 |     "iso6391": null,
1697 |     "iso6392": "nso",
1698 |     "name": "Pedi"
1699 |   },
1700 |   "san": {
1701 |     "speakers": 194433,
1702 |     "iso6391": "sa",
1703 |     "iso6392": "san",
1704 |     "name": "Sanskrit"
1705 |   },
1706 |   "sat": {
1707 |     "speakers": 6218900,
1708 |     "iso6391": null,
1709 |     "iso6392": "sat",
1710 |     "name": "Santali"
1711 |   },
1712 |   "sco": {
1713 |     "speakers": 1500000,
1714 |     "iso6391": null,
1715 |     "iso6392": "sco",
1716 |     "name": "Scots"
1717 |   },
1718 |   "hrv": {
1719 |     "speakers": 21000000,
1720 |     "iso6391": "hr",
1721 |     "iso6392": "hrv",
1722 |     "name": "Croatian"
1723 |   },
1724 |   "shk": {
1725 |     "speakers": 175000,
1726 |     "iso6391": null,
1727 |     "iso6392": null,
1728 |     "name": "Shilluk"
1729 |   },
1730 |   "shn": {
1731 |     "speakers": 3000000,
1732 |     "iso6391": null,
1733 |     "iso6392": "shn",
1734 |     "name": "Shan"
1735 |   },
1736 |   "shp": {
1737 |     "speakers": 15000,
1738 |     "iso6391": null,
1739 |     "iso6392": null,
1740 |     "name": "Shipibo-Conibo"
1741 |   },
1742 |   "sin": {
1743 |     "speakers": 13218000,
1744 |     "iso6391": "si",
1745 |     "iso6392": "sin",
1746 |     "name": "Sinhala"
1747 |   },
1748 |   "bos": {
1749 |     "speakers": 21000000,
1750 |     "iso6391": "bs",
1751 |     "iso6392": "bos",
1752 |     "name": "Bosnian"
1753 |   },
1754 |   "skr": {
1755 |     "speakers": 15020000,
1756 |     "iso6391": null,
1757 |     "iso6392": null,
1758 |     "name": "Seraiki"
1759 |   },
1760 |   "slk": {
1761 |     "speakers": 5606000,
1762 |     "iso6391": "sk",
1763 |     "iso6392": "slk",
1764 |     "name": "Slovak"
1765 |   },
1766 |   "slv": {
1767 |     "speakers": 2218000,
1768 |     "iso6391": "sl",
1769 |     "iso6392": "slv",
1770 |     "name": "Slovenian"
1771 |   },
1772 |   "sah": {
1773 |     "speakers": 363000,
1774 |     "iso6391": null,
1775 |     "iso6392": "sah",
1776 |     "name": "Yakut"
1777 |   },
1778 |   "smo": {
1779 |     "speakers": 362000,
1780 |     "iso6391": "sm",
1781 |     "iso6392": "smo",
1782 |     "name": "Samoan"
1783 |   },
1784 |   "sna": {
1785 |     "speakers": 7000000,
1786 |     "iso6391": "sn",
1787 |     "iso6392": "sna",
1788 |     "name": "Shona"
1789 |   },
1790 |   "snd": {
1791 |     "speakers": 19675000,
1792 |     "iso6391": "sd",
1793 |     "iso6392": "snd",
1794 |     "name": "Sindhi"
1795 |   },
1796 |   "snk": {
1797 |     "speakers": 1067000,
1798 |     "iso6391": null,
1799 |     "iso6392": "snk",
1800 |     "name": "Soninke"
1801 |   },
1802 |   "som": {
1803 |     "speakers": 8335000,
1804 |     "iso6391": "so",
1805 |     "iso6392": "som",
1806 |     "name": "Somali"
1807 |   },
1808 |   "sot": {
1809 |     "speakers": 4197000,
1810 |     "iso6391": "st",
1811 |     "iso6392": "sot",
1812 |     "name": "Southern Sotho"
1813 |   },
1814 |   "spa": {
1815 |     "speakers": 332000000,
1816 |     "iso6391": "es",
1817 |     "iso6392": "spa",
1818 |     "name": "Spanish"
1819 |   },
1820 |   "src": {
1821 |     "speakers": 1500000,
1822 |     "iso6391": null,
1823 |     "iso6392": null,
1824 |     "name": "Logudorese Sardinian"
1825 |   },
1826 |   "srp": {
1827 |     "speakers": 21000000,
1828 |     "iso6391": "sr",
1829 |     "iso6392": "srp",
1830 |     "name": "Serbian"
1831 |   },
1832 |   "srr": {
1833 |     "speakers": 868800,
1834 |     "iso6391": null,
1835 |     "iso6392": "srr",
1836 |     "name": "Serer"
1837 |   },
1838 |   "ssw": {
1839 |     "speakers": 1670000,
1840 |     "iso6391": "ss",
1841 |     "iso6392": "ssw",
1842 |     "name": "Swati"
1843 |   },
1844 |   "suk": {
1845 |     "speakers": 5000000,
1846 |     "iso6391": null,
1847 |     "iso6392": "suk",
1848 |     "name": "Sukuma"
1849 |   },
1850 |   "sun": {
1851 |     "speakers": 27000000,
1852 |     "iso6391": "su",
1853 |     "iso6392": "sun",
1854 |     "name": "Sundanese"
1855 |   },
1856 |   "sus": {
1857 |     "speakers": 923000,
1858 |     "iso6391": null,
1859 |     "iso6392": "sus",
1860 |     "name": "Susu"
1861 |   },
1862 |   "suz": {
1863 |     "speakers": 37898,
1864 |     "iso6391": null,
1865 |     "iso6392": null,
1866 |     "name": "Sunwar"
1867 |   },
1868 |   "swb": {
1869 |     "speakers": 97300,
1870 |     "iso6391": null,
1871 |     "iso6392": null,
1872 |     "name": "Maore Comorian"
1873 |   },
1874 |   "swe": {
1875 |     "speakers": 9000000,
1876 |     "iso6391": "sv",
1877 |     "iso6392": "swe",
1878 |     "name": "Swedish"
1879 |   },
1880 |   "swh": {
1881 |     "speakers": 30000000,
1882 |     "iso6391": null,
1883 |     "iso6392": null,
1884 |     "name": "Swahili (individual language)"
1885 |   },
1886 |   "sag": {
1887 |     "speakers": 4900000,
1888 |     "iso6391": "sg",
1889 |     "iso6392": "sag",
1890 |     "name": "Sango"
1891 |   },
1892 |   "ton": {
1893 |     "speakers": 123000,
1894 |     "iso6391": "to",
1895 |     "iso6392": "ton",
1896 |     "name": "Tonga (Tonga Islands)"
1897 |   },
1898 |   "taj": {
1899 |     "speakers": 1353311,
1900 |     "iso6391": null,
1901 |     "iso6392": null,
1902 |     "name": "Eastern Tamang"
1903 |   },
1904 |   "tat": {
1905 |     "speakers": 7000000,
1906 |     "iso6391": "tt",
1907 |     "iso6392": "tat",
1908 |     "name": "Tatar"
1909 |   },
1910 |   "tbz": {
1911 |     "speakers": 120000,
1912 |     "iso6391": null,
1913 |     "iso6392": null,
1914 |     "name": "Ditammari"
1915 |   },
1916 |   "tca": {
1917 |     "speakers": 25000,
1918 |     "iso6391": null,
1919 |     "iso6392": null,
1920 |     "name": "Ticuna"
1921 |   },
1922 |   "tel": {
1923 |     "speakers": 73000000,
1924 |     "iso6391": "te",
1925 |     "iso6392": "tel",
1926 |     "name": "Telugu"
1927 |   },
1928 |   "tem": {
1929 |     "speakers": 1200000,
1930 |     "iso6391": null,
1931 |     "iso6392": "tem",
1932 |     "name": "Timne"
1933 |   },
1934 |   "tet": {
1935 |     "speakers": 600000,
1936 |     "iso6391": null,
1937 |     "iso6392": "tet",
1938 |     "name": "Tetum"
1939 |   },
1940 |   "tah": {
1941 |     "speakers": 150000,
1942 |     "iso6391": "ty",
1943 |     "iso6392": "tah",
1944 |     "name": "Tahitian"
1945 |   },
1946 |   "tgl": {
1947 |     "speakers": 14850000,
1948 |     "iso6391": "tl",
1949 |     "iso6392": "tgl",
1950 |     "name": "Tagalog"
1951 |   },
1952 |   "tha": {
1953 |     "speakers": 21000000,
1954 |     "iso6391": "th",
1955 |     "iso6392": "tha",
1956 |     "name": "Thai"
1957 |   },
1958 |   "tir": {
1959 |     "speakers": 6060000,
1960 |     "iso6391": "ti",
1961 |     "iso6392": "tir",
1962 |     "name": "Tigrinya"
1963 |   },
1964 |   "tiv": {
1965 |     "speakers": 2000000,
1966 |     "iso6391": null,
1967 |     "iso6392": "tiv",
1968 |     "name": "Tiv"
1969 |   },
1970 |   "tob": {
1971 |     "speakers": 20000,
1972 |     "iso6391": null,
1973 |     "iso6392": null,
1974 |     "name": "Toba"
1975 |   },
1976 |   "toi": {
1977 |     "speakers": 1105000,
1978 |     "iso6391": null,
1979 |     "iso6392": null,
1980 |     "name": "Tonga (Zambia)"
1981 |   },
1982 |   "toj": {
1983 |     "speakers": 36000,
1984 |     "iso6391": null,
1985 |     "iso6392": null,
1986 |     "name": "Tojolabal"
1987 |   },
1988 |   "taq": {
1989 |     "speakers": 281200,
1990 |     "iso6391": null,
1991 |     "iso6392": null,
1992 |     "name": "Tamasheq"
1993 |   },
1994 |   "top": {
1995 |     "speakers": 80000,
1996 |     "iso6391": null,
1997 |     "iso6392": null,
1998 |     "name": "Papantla Totonac"
1999 |   },
2000 |   "chk": {
2001 |     "speakers": 45000,
2002 |     "iso6391": null,
2003 |     "iso6392": "chk",
2004 |     "name": "Chuukese"
2005 |   },
2006 |   "tsn": {
2007 |     "speakers": 3932000,
2008 |     "iso6391": "tn",
2009 |     "iso6392": "tsn",
2010 |     "name": "Tswana"
2011 |   },
2012 |   "tso": {
2013 |     "speakers": 1500000,
2014 |     "iso6391": "ts",
2015 |     "iso6392": "tso",
2016 |     "name": "Tsonga"
2017 |   },
2018 |   "ctd": {
2019 |     "speakers": 344100,
2020 |     "iso6391": null,
2021 |     "iso6392": null,
2022 |     "name": "Tedim Chin"
2023 |   },
2024 |   "tuk": {
2025 |     "speakers": 5397500,
2026 |     "iso6391": "tk",
2027 |     "iso6392": "tuk",
2028 |     "name": "Turkmen"
2029 |   },
2030 |   "tur": {
2031 |     "speakers": 59000000,
2032 |     "iso6391": "tr",
2033 |     "iso6392": "tur",
2034 |     "name": "Turkish"
2035 |   },
2036 |   "tyv": {
2037 |     "speakers": 200000,
2038 |     "iso6391": null,
2039 |     "iso6392": "tyv",
2040 |     "name": "Tuvinian"
2041 |   },
2042 |   "tzm": {
2043 |     "speakers": 3000000,
2044 |     "iso6391": null,
2045 |     "iso6392": null,
2046 |     "name": "Central Atlas Tamazight"
2047 |   },
2048 |   "tam": {
2049 |     "speakers": 62000000,
2050 |     "iso6391": "ta",
2051 |     "iso6392": "tam",
2052 |     "name": "Tamil"
2053 |   },
2054 |   "bod": {
2055 |     "speakers": 6150000,
2056 |     "iso6391": "bo",
2057 |     "iso6392": "bod",
2058 |     "name": "Tibetan"
2059 |   },
2060 |   "ven": {
2061 |     "speakers": 876409,
2062 |     "iso6391": "ve",
2063 |     "iso6392": "ven",
2064 |     "name": "Venda"
2065 |   },
2066 |   "ura": {
2067 |     "speakers": 3500,
2068 |     "iso6391": null,
2069 |     "iso6392": null,
2070 |     "name": "Urarina"
2071 |   },
2072 |   "urd": {
2073 |     "speakers": 54000000,
2074 |     "iso6391": "ur",
2075 |     "iso6392": "urd",
2076 |     "name": "Urdu"
2077 |   },
2078 |   "uzn": {
2079 |     "speakers": 18386000,
2080 |     "iso6391": null,
2081 |     "iso6392": null,
2082 |     "name": "Northern Uzbek"
2083 |   },
2084 |   "ukr": {
2085 |     "speakers": 41000000,
2086 |     "iso6391": "uk",
2087 |     "iso6392": "ukr",
2088 |     "name": "Ukrainian"
2089 |   },
2090 |   "uig": {
2091 |     "speakers": 7464000,
2092 |     "iso6391": "ug",
2093 |     "iso6392": "uig",
2094 |     "name": "Uighur"
2095 |   },
2096 |   "unr": {
2097 |     "speakers": 1560280,
2098 |     "iso6391": null,
2099 |     "iso6392": null,
2100 |     "name": "Mundari"
2101 |   },
2102 |   "vep": {
2103 |     "speakers": 5800,
2104 |     "iso6391": null,
2105 |     "iso6392": null,
2106 |     "name": "Veps"
2107 |   },
2108 |   "vmw": {
2109 |     "speakers": 2500000,
2110 |     "iso6391": null,
2111 |     "iso6392": null,
2112 |     "name": "Makhuwa"
2113 |   },
2114 |   "vai": {
2115 |     "speakers": 119500,
2116 |     "iso6391": null,
2117 |     "iso6392": "vai",
2118 |     "name": "Vai"
2119 |   },
2120 |   "vie": {
2121 |     "speakers": 66897000,
2122 |     "iso6391": "vi",
2123 |     "iso6392": "vie",
2124 |     "name": "Vietnamese"
2125 |   },
2126 |   "war": {
2127 |     "speakers": 3000000,
2128 |     "iso6391": null,
2129 |     "iso6392": "war",
2130 |     "name": "Waray (Philippines)"
2131 |   },
2132 |   "wol": {
2133 |     "speakers": 2700000,
2134 |     "iso6391": "wo",
2135 |     "iso6392": "wol",
2136 |     "name": "Wolof"
2137 |   },
2138 |   "hsb": {
2139 |     "speakers": 70000,
2140 |     "iso6391": null,
2141 |     "iso6392": "hsb",
2142 |     "name": "Upper Sorbian"
2143 |   },
2144 |   "cym": {
2145 |     "speakers": 580000,
2146 |     "iso6391": "cy",
2147 |     "iso6392": "cym",
2148 |     "name": "Welsh"
2149 |   },
2150 |   "xho": {
2151 |     "speakers": 6858000,
2152 |     "iso6391": "xh",
2153 |     "iso6392": "xho",
2154 |     "name": "Xhosa"
2155 |   },
2156 |   "yao": {
2157 |     "speakers": 1597000,
2158 |     "iso6391": null,
2159 |     "iso6392": "yao",
2160 |     "name": "Yao"
2161 |   },
2162 |   "yap": {
2163 |     "speakers": 6592,
2164 |     "iso6391": null,
2165 |     "iso6392": "yap",
2166 |     "name": "Yapese"
2167 |   },
2168 |   "ydd": {
2169 |     "speakers": 3000000,
2170 |     "iso6391": null,
2171 |     "iso6392": null,
2172 |     "name": "Eastern Yiddish"
2173 |   },
2174 |   "ykg": {
2175 |     "speakers": 1100,
2176 |     "iso6391": null,
2177 |     "iso6392": null,
2178 |     "name": "Northern Yukaghir"
2179 |   },
2180 |   "yor": {
2181 |     "speakers": 20000000,
2182 |     "iso6391": "yo",
2183 |     "iso6392": "yor",
2184 |     "name": "Yoruba"
2185 |   },
2186 |   "yrk": {
2187 |     "speakers": 27273,
2188 |     "iso6391": null,
2189 |     "iso6392": null,
2190 |     "name": "Nenets"
2191 |   },
2192 |   "yua": {
2193 |     "speakers": 700000,
2194 |     "iso6391": null,
2195 |     "iso6392": null,
2196 |     "name": "Yucateco"
2197 |   },
2198 |   "yad": {
2199 |     "speakers": 4000,
2200 |     "iso6391": null,
2201 |     "iso6392": null,
2202 |     "name": "Yagua"
2203 |   },
2204 |   "zam": {
2205 |     "speakers": 80000,
2206 |     "iso6391": null,
2207 |     "iso6392": null,
2208 |     "name": "Miahuatlán Zapotec"
2209 |   },
2210 |   "ztu": {
2211 |     "speakers": 2000,
2212 |     "iso6391": null,
2213 |     "iso6392": null,
2214 |     "name": "Güilá Zapotec"
2215 |   },
2216 |   "zul": {
2217 |     "speakers": 9140000,
2218 |     "iso6391": "zu",
2219 |     "iso6392": "zul",
2220 |     "name": "Zulu"
2221 |   }
2222 | }


--------------------------------------------------------------------------------
/tests/testthat/support.json:
--------------------------------------------------------------------------------
   1 | [
   2 |   {
   3 |     "speakers": 885000000,
   4 |     "name": "Mandarin Chinese",
   5 |     "iso6393": "cmn",
   6 |     "udhr": "cmn_hans",
   7 |     "script": "Han"
   8 |   },
   9 |   {
  10 |     "speakers": 332000000,
  11 |     "name": "Spanish",
  12 |     "iso6393": "spa",
  13 |     "udhr": "spa",
  14 |     "script": "Latin"
  15 |   },
  16 |   {
  17 |     "speakers": 322000000,
  18 |     "name": "English",
  19 |     "iso6393": "eng",
  20 |     "udhr": "eng",
  21 |     "script": "Latin"
  22 |   },
  23 |   {
  24 |     "speakers": 288000000,
  25 |     "name": "Russian",
  26 |     "iso6393": "rus",
  27 |     "udhr": "rus",
  28 |     "script": "Cyrillic"
  29 |   },
  30 |   {
  31 |     "speakers": 280000000,
  32 |     "name": "Standard Arabic",
  33 |     "iso6393": "arb",
  34 |     "udhr": "arb",
  35 |     "script": "Arabic"
  36 |   },
  37 |   {
  38 |     "speakers": 196000000,
  39 |     "name": "Bengali",
  40 |     "iso6393": "ben",
  41 |     "udhr": "ben",
  42 |     "script": "Bengali"
  43 |   },
  44 |   {
  45 |     "speakers": 182000000,
  46 |     "name": "Hindi",
  47 |     "iso6393": "hin",
  48 |     "udhr": "hin",
  49 |     "script": "Devanagari"
  50 |   },
  51 |   {
  52 |     "speakers": 182000000,
  53 |     "name": "Portuguese",
  54 |     "iso6393": "por",
  55 |     "udhr": "por_PT",
  56 |     "script": "Latin"
  57 |   },
  58 |   {
  59 |     "speakers": 140000000,
  60 |     "name": "Indonesian",
  61 |     "iso6393": "ind",
  62 |     "udhr": "ind",
  63 |     "script": "Latin"
  64 |   },
  65 |   {
  66 |     "speakers": 125000000,
  67 |     "name": "Japanese",
  68 |     "iso6393": "jpn",
  69 |     "udhr": "jpn",
  70 |     "script": "Hiragana, Katakana, and Han"
  71 |   },
  72 |   {
  73 |     "speakers": 124000000,
  74 |     "name": "French",
  75 |     "iso6393": "fra",
  76 |     "udhr": "fra",
  77 |     "script": "Latin"
  78 |   },
  79 |   {
  80 |     "speakers": 121000000,
  81 |     "name": "German",
  82 |     "iso6393": "deu",
  83 |     "udhr": "deu_1996",
  84 |     "script": "Latin"
  85 |   },
  86 |   {
  87 |     "speakers": 75500800,
  88 |     "name": "Javanese",
  89 |     "iso6393": "jav",
  90 |     "udhr": "jav",
  91 |     "script": "Latin"
  92 |   },
  93 |   {
  94 |     "speakers": 75000000,
  95 |     "name": "Korean",
  96 |     "iso6393": "kor",
  97 |     "udhr": "kor",
  98 |     "script": "Hangul"
  99 |   },
 100 |   {
 101 |     "speakers": 73000000,
 102 |     "name": "Telugu",
 103 |     "iso6393": "tel",
 104 |     "udhr": "tel",
 105 |     "script": "Telugu"
 106 |   },
 107 |   {
 108 |     "speakers": 66897000,
 109 |     "name": "Vietnamese",
 110 |     "iso6393": "vie",
 111 |     "udhr": "vie",
 112 |     "script": "Latin"
 113 |   },
 114 |   {
 115 |     "speakers": 64783000,
 116 |     "name": "Marathi",
 117 |     "iso6393": "mar",
 118 |     "udhr": "mar",
 119 |     "script": "Devanagari"
 120 |   },
 121 |   {
 122 |     "speakers": 63000000,
 123 |     "name": "Italian",
 124 |     "iso6393": "ita",
 125 |     "udhr": "ita",
 126 |     "script": "Latin"
 127 |   },
 128 |   {
 129 |     "speakers": 62000000,
 130 |     "name": "Tamil",
 131 |     "iso6393": "tam",
 132 |     "udhr": "tam",
 133 |     "script": "Tamil"
 134 |   },
 135 |   {
 136 |     "speakers": 59000000,
 137 |     "name": "Turkish",
 138 |     "iso6393": "tur",
 139 |     "udhr": "tur",
 140 |     "script": "Latin"
 141 |   },
 142 |   {
 143 |     "speakers": 54000000,
 144 |     "name": "Urdu",
 145 |     "iso6393": "urd",
 146 |     "udhr": "urd",
 147 |     "script": "Arabic"
 148 |   },
 149 |   {
 150 |     "speakers": 44000000,
 151 |     "name": "Gujarati",
 152 |     "iso6393": "guj",
 153 |     "udhr": "guj",
 154 |     "script": "Gujarati"
 155 |   },
 156 |   {
 157 |     "speakers": 44000000,
 158 |     "name": "Polish",
 159 |     "iso6393": "pol",
 160 |     "udhr": "pol",
 161 |     "script": "Latin"
 162 |   },
 163 |   {
 164 |     "speakers": 41000000,
 165 |     "name": "Ukrainian",
 166 |     "iso6393": "ukr",
 167 |     "udhr": "ukr",
 168 |     "script": "Cyrillic"
 169 |   },
 170 |   {
 171 |     "speakers": 34700000,
 172 |     "name": "Maithili",
 173 |     "iso6393": "mai",
 174 |     "udhr": "mai",
 175 |     "script": "Devanagari"
 176 |   },
 177 |   {
 178 |     "speakers": 34014000,
 179 |     "name": "Malayalam",
 180 |     "iso6393": "mal",
 181 |     "udhr": "mal",
 182 |     "script": "Malayalam"
 183 |   },
 184 |   {
 185 |     "speakers": 33663000,
 186 |     "name": "Kannada",
 187 |     "iso6393": "kan",
 188 |     "udhr": "kan",
 189 |     "script": "Kannada"
 190 |   },
 191 |   {
 192 |     "speakers": 31000000,
 193 |     "name": "Burmese",
 194 |     "iso6393": "mya",
 195 |     "udhr": "mya",
 196 |     "script": "Myanmar"
 197 |   },
 198 |   {
 199 |     "speakers": 31000000,
 200 |     "name": "Oriya (macrolanguage)",
 201 |     "iso6393": "ori",
 202 |     "udhr": "ori",
 203 |     "script": "Oriya"
 204 |   },
 205 |   {
 206 |     "speakers": 30000000,
 207 |     "name": "Borana-Arsi-Guji Oromo",
 208 |     "iso6393": "gax",
 209 |     "udhr": "gax",
 210 |     "script": "Latin"
 211 |   },
 212 |   {
 213 |     "speakers": 30000000,
 214 |     "name": "Swahili (individual language)",
 215 |     "iso6393": "swh",
 216 |     "udhr": "swh",
 217 |     "script": "Latin"
 218 |   },
 219 |   {
 220 |     "speakers": 27000000,
 221 |     "name": "Sundanese",
 222 |     "iso6393": "sun",
 223 |     "udhr": "sun",
 224 |     "script": "Latin"
 225 |   },
 226 |   {
 227 |     "speakers": 26000000,
 228 |     "name": "Romanian",
 229 |     "iso6393": "ron",
 230 |     "udhr": "ron_2006",
 231 |     "script": "Latin"
 232 |   },
 233 |   {
 234 |     "speakers": 25700000,
 235 |     "name": "Panjabi",
 236 |     "iso6393": "pan",
 237 |     "udhr": "pan",
 238 |     "script": "Gurmukhi"
 239 |   },
 240 |   {
 241 |     "speakers": 25000000,
 242 |     "name": "Bhojpuri",
 243 |     "iso6393": "bho",
 244 |     "udhr": "bho",
 245 |     "script": "Devanagari"
 246 |   },
 247 |   {
 248 |     "speakers": 23000000,
 249 |     "name": "Amharic",
 250 |     "iso6393": "amh",
 251 |     "udhr": "amh",
 252 |     "script": "Ethiopic"
 253 |   },
 254 |   {
 255 |     "speakers": 22000000,
 256 |     "name": "Pulaar",
 257 |     "iso6393": "fuc",
 258 |     "udhr": "fuc",
 259 |     "script": "Latin"
 260 |   },
 261 |   {
 262 |     "speakers": 22000000,
 263 |     "name": "Hausa",
 264 |     "iso6393": "hau",
 265 |     "udhr": "hau_NG",
 266 |     "script": "Latin"
 267 |   },
 268 |   {
 269 |     "speakers": 21000000,
 270 |     "name": "Bosnian",
 271 |     "iso6393": "bos",
 272 |     "udhr": "bos_latn",
 273 |     "script": "Latin"
 274 |   },
 275 |   {
 276 |     "speakers": 21000000,
 277 |     "name": "Bosnian",
 278 |     "iso6393": "bos",
 279 |     "udhr": "bos_cyrl",
 280 |     "script": "Cyrillic"
 281 |   },
 282 |   {
 283 |     "speakers": 21000000,
 284 |     "name": "Croatian",
 285 |     "iso6393": "hrv",
 286 |     "udhr": "hrv",
 287 |     "script": "Latin"
 288 |   },
 289 |   {
 290 |     "speakers": 21000000,
 291 |     "name": "Dutch",
 292 |     "iso6393": "nld",
 293 |     "udhr": "nld",
 294 |     "script": "Latin"
 295 |   },
 296 |   {
 297 |     "speakers": 21000000,
 298 |     "name": "Serbian",
 299 |     "iso6393": "srp",
 300 |     "udhr": "srp_latn",
 301 |     "script": "Latin"
 302 |   },
 303 |   {
 304 |     "speakers": 21000000,
 305 |     "name": "Serbian",
 306 |     "iso6393": "srp",
 307 |     "udhr": "srp_cyrl",
 308 |     "script": "Cyrillic"
 309 |   },
 310 |   {
 311 |     "speakers": 21000000,
 312 |     "name": "Thai",
 313 |     "iso6393": "tha",
 314 |     "udhr": "tha",
 315 |     "script": "Thai"
 316 |   },
 317 |   {
 318 |     "speakers": 20000000,
 319 |     "name": "Central Kurdish",
 320 |     "iso6393": "ckb",
 321 |     "udhr": "ckb",
 322 |     "script": "Latin"
 323 |   },
 324 |   {
 325 |     "speakers": 20000000,
 326 |     "name": "Yoruba",
 327 |     "iso6393": "yor",
 328 |     "udhr": "yor",
 329 |     "script": "Latin"
 330 |   },
 331 |   {
 332 |     "speakers": 18386000,
 333 |     "name": "Northern Uzbek",
 334 |     "iso6393": "uzn",
 335 |     "udhr": "uzn_latn",
 336 |     "script": "Latin"
 337 |   },
 338 |   {
 339 |     "speakers": 18386000,
 340 |     "name": "Northern Uzbek",
 341 |     "iso6393": "uzn",
 342 |     "udhr": "uzn_cyrl",
 343 |     "script": "Cyrillic"
 344 |   },
 345 |   {
 346 |     "speakers": 17000000,
 347 |     "name": "Igbo",
 348 |     "iso6393": "ibo",
 349 |     "udhr": "ibo",
 350 |     "script": "Latin"
 351 |   },
 352 |   {
 353 |     "speakers": 16200000,
 354 |     "name": "Nepali (macrolanguage)",
 355 |     "iso6393": "nep",
 356 |     "udhr": "nep",
 357 |     "script": "Devanagari"
 358 |   },
 359 |   {
 360 |     "speakers": 15230000,
 361 |     "name": "Cebuano",
 362 |     "iso6393": "ceb",
 363 |     "udhr": "ceb",
 364 |     "script": "Latin"
 365 |   },
 366 |   {
 367 |     "speakers": 15020000,
 368 |     "name": "Seraiki",
 369 |     "iso6393": "skr",
 370 |     "udhr": "skr",
 371 |     "script": "Arabic"
 372 |   },
 373 |   {
 374 |     "speakers": 14850000,
 375 |     "name": "Tagalog",
 376 |     "iso6393": "tgl",
 377 |     "udhr": "tgl",
 378 |     "script": "Latin"
 379 |   },
 380 |   {
 381 |     "speakers": 14500000,
 382 |     "name": "Hungarian",
 383 |     "iso6393": "hun",
 384 |     "udhr": "hun",
 385 |     "script": "Latin"
 386 |   },
 387 |   {
 388 |     "speakers": 13869000,
 389 |     "name": "North Azerbaijani",
 390 |     "iso6393": "azj",
 391 |     "udhr": "azj_cyrl",
 392 |     "script": "Cyrillic"
 393 |   },
 394 |   {
 395 |     "speakers": 13869000,
 396 |     "name": "North Azerbaijani",
 397 |     "iso6393": "azj",
 398 |     "udhr": "azj_latn",
 399 |     "script": "Latin"
 400 |   },
 401 |   {
 402 |     "speakers": 13218000,
 403 |     "name": "Sinhala",
 404 |     "iso6393": "sin",
 405 |     "udhr": "sin",
 406 |     "script": "Sinhala"
 407 |   },
 408 |   {
 409 |     "speakers": 12258540,
 410 |     "name": "Modern Greek (1453-)",
 411 |     "iso6393": "ell",
 412 |     "udhr": "ell_monotonic",
 413 |     "script": "Greek"
 414 |   },
 415 |   {
 416 |     "speakers": 12000000,
 417 |     "name": "Czech",
 418 |     "iso6393": "ces",
 419 |     "udhr": "ces",
 420 |     "script": "Latin"
 421 |   },
 422 |   {
 423 |     "speakers": 10821000,
 424 |     "name": "Magahi",
 425 |     "iso6393": "mag",
 426 |     "udhr": "mag",
 427 |     "script": "Devanagari"
 428 |   },
 429 |   {
 430 |     "speakers": 10200000,
 431 |     "name": "Belarusian",
 432 |     "iso6393": "bel",
 433 |     "udhr": "bel",
 434 |     "script": "Cyrillic"
 435 |   },
 436 |   {
 437 |     "speakers": 10156900,
 438 |     "name": "Plateau Malagasy",
 439 |     "iso6393": "plt",
 440 |     "udhr": "plt",
 441 |     "script": "Latin"
 442 |   },
 443 |   {
 444 |     "speakers": 10000000,
 445 |     "name": "Madurese",
 446 |     "iso6393": "mad",
 447 |     "udhr": "mad",
 448 |     "script": "Latin"
 449 |   },
 450 |   {
 451 |     "speakers": 10000000,
 452 |     "name": "Nyanja",
 453 |     "iso6393": "nya",
 454 |     "udhr": "nya_chinyanja",
 455 |     "script": "Latin"
 456 |   },
 457 |   {
 458 |     "speakers": 10000000,
 459 |     "name": "Chimborazo Highland Quichua",
 460 |     "iso6393": "qug",
 461 |     "udhr": "qug",
 462 |     "script": "Latin"
 463 |   },
 464 |   {
 465 |     "speakers": 9306800,
 466 |     "name": "Kinyarwanda",
 467 |     "iso6393": "kin",
 468 |     "udhr": "kin",
 469 |     "script": "Latin"
 470 |   },
 471 |   {
 472 |     "speakers": 9140000,
 473 |     "name": "Zulu",
 474 |     "iso6393": "zul",
 475 |     "udhr": "zul",
 476 |     "script": "Latin"
 477 |   },
 478 |   {
 479 |     "speakers": 9000000,
 480 |     "name": "Bulgarian",
 481 |     "iso6393": "bul",
 482 |     "udhr": "bul",
 483 |     "script": "Cyrillic"
 484 |   },
 485 |   {
 486 |     "speakers": 9000000,
 487 |     "name": "Swedish",
 488 |     "iso6393": "swe",
 489 |     "udhr": "swe",
 490 |     "script": "Latin"
 491 |   },
 492 |   {
 493 |     "speakers": 8400000,
 494 |     "name": "Lingala",
 495 |     "iso6393": "lin",
 496 |     "udhr": "lin",
 497 |     "script": "Latin"
 498 |   },
 499 |   {
 500 |     "speakers": 8335000,
 501 |     "name": "Somali",
 502 |     "iso6393": "som",
 503 |     "udhr": "som",
 504 |     "script": "Latin"
 505 |   },
 506 |   {
 507 |     "speakers": 8200000,
 508 |     "name": "Southern Qiandong Miao",
 509 |     "iso6393": "hms",
 510 |     "udhr": "hms",
 511 |     "script": "Latin"
 512 |   },
 513 |   {
 514 |     "speakers": 8000000,
 515 |     "name": "Iloko",
 516 |     "iso6393": "ilo",
 517 |     "udhr": "ilo",
 518 |     "script": "Latin"
 519 |   },
 520 |   {
 521 |     "speakers": 8000000,
 522 |     "name": "Kazakh",
 523 |     "iso6393": "kaz",
 524 |     "udhr": "kaz",
 525 |     "script": "Cyrillic"
 526 |   },
 527 |   {
 528 |     "speakers": 7464000,
 529 |     "name": "Uighur",
 530 |     "iso6393": "uig",
 531 |     "udhr": "uig_arab",
 532 |     "script": "Arabic"
 533 |   },
 534 |   {
 535 |     "speakers": 7464000,
 536 |     "name": "Uighur",
 537 |     "iso6393": "uig",
 538 |     "udhr": "uig_latn",
 539 |     "script": "Latin"
 540 |   },
 541 |   {
 542 |     "speakers": 7382000,
 543 |     "name": "Haitian",
 544 |     "iso6393": "hat",
 545 |     "udhr": "hat_popular",
 546 |     "script": "Latin"
 547 |   },
 548 |   {
 549 |     "speakers": 7063200,
 550 |     "name": "Central Khmer",
 551 |     "iso6393": "khm",
 552 |     "udhr": "khm",
 553 |     "script": "Khmer"
 554 |   },
 555 |   {
 556 |     "speakers": 7000000,
 557 |     "name": "Akan",
 558 |     "iso6393": "aka",
 559 |     "udhr": "aka_asante",
 560 |     "script": "Latin"
 561 |   },
 562 |   {
 563 |     "speakers": 7000000,
 564 |     "name": "Akan",
 565 |     "iso6393": "aka",
 566 |     "udhr": "aka_fante",
 567 |     "script": "Latin"
 568 |   },
 569 |   {
 570 |     "speakers": 7000000,
 571 |     "name": "Hiligaynon",
 572 |     "iso6393": "hil",
 573 |     "udhr": "hil",
 574 |     "script": "Latin"
 575 |   },
 576 |   {
 577 |     "speakers": 7000000,
 578 |     "name": "Iranian Persian",
 579 |     "iso6393": "pes",
 580 |     "udhr": "pes_1",
 581 |     "script": "Arabic"
 582 |   },
 583 |   {
 584 |     "speakers": 7000000,
 585 |     "name": "Shona",
 586 |     "iso6393": "sna",
 587 |     "udhr": "sna",
 588 |     "script": "Latin"
 589 |   },
 590 |   {
 591 |     "speakers": 7000000,
 592 |     "name": "Tatar",
 593 |     "iso6393": "tat",
 594 |     "udhr": "tat",
 595 |     "script": "Cyrillic"
 596 |   },
 597 |   {
 598 |     "speakers": 6858000,
 599 |     "name": "Xhosa",
 600 |     "iso6393": "xho",
 601 |     "udhr": "xho",
 602 |     "script": "Latin"
 603 |   },
 604 |   {
 605 |     "speakers": 6836000,
 606 |     "name": "Armenian",
 607 |     "iso6393": "hye",
 608 |     "udhr": "hye",
 609 |     "script": "Armenian"
 610 |   },
 611 |   {
 612 |     "speakers": 6500000,
 613 |     "name": "Minangkabau",
 614 |     "iso6393": "min",
 615 |     "udhr": "min",
 616 |     "script": "Latin"
 617 |   },
 618 |   {
 619 |     "speakers": 6365000,
 620 |     "name": "Afrikaans",
 621 |     "iso6393": "afr",
 622 |     "udhr": "afr",
 623 |     "script": "Latin"
 624 |   },
 625 |   {
 626 |     "speakers": 6300000,
 627 |     "name": "Luba-Lulua",
 628 |     "iso6393": "lua",
 629 |     "udhr": "lua",
 630 |     "script": "Latin"
 631 |   },
 632 |   {
 633 |     "speakers": 6218900,
 634 |     "name": "Santali",
 635 |     "iso6393": "sat",
 636 |     "udhr": "sat",
 637 |     "script": "Ol_Chiki"
 638 |   },
 639 |   {
 640 |     "speakers": 6150000,
 641 |     "name": "Tibetan",
 642 |     "iso6393": "bod",
 643 |     "udhr": "bod",
 644 |     "script": "Tibetan"
 645 |   },
 646 |   {
 647 |     "speakers": 6060000,
 648 |     "name": "Tigrinya",
 649 |     "iso6393": "tir",
 650 |     "udhr": "tir",
 651 |     "script": "Ethiopic"
 652 |   },
 653 |   {
 654 |     "speakers": 6000000,
 655 |     "name": "Finnish",
 656 |     "iso6393": "fin",
 657 |     "udhr": "fin",
 658 |     "script": "Latin"
 659 |   },
 660 |   {
 661 |     "speakers": 6000000,
 662 |     "name": "Rundi",
 663 |     "iso6393": "run",
 664 |     "udhr": "run",
 665 |     "script": "Latin"
 666 |   },
 667 |   {
 668 |     "speakers": 5606000,
 669 |     "name": "Slovak",
 670 |     "iso6393": "slk",
 671 |     "udhr": "slk",
 672 |     "script": "Latin"
 673 |   },
 674 |   {
 675 |     "speakers": 5397500,
 676 |     "name": "Turkmen",
 677 |     "iso6393": "tuk",
 678 |     "udhr": "tuk_cyrl",
 679 |     "script": "Cyrillic"
 680 |   },
 681 |   {
 682 |     "speakers": 5397500,
 683 |     "name": "Turkmen",
 684 |     "iso6393": "tuk",
 685 |     "udhr": "tuk_latn",
 686 |     "script": "Latin"
 687 |   },
 688 |   {
 689 |     "speakers": 5292000,
 690 |     "name": "Danish",
 691 |     "iso6393": "dan",
 692 |     "udhr": "dan",
 693 |     "script": "Latin"
 694 |   },
 695 |   {
 696 |     "speakers": 5000000,
 697 |     "name": "Tosk Albanian",
 698 |     "iso6393": "als",
 699 |     "udhr": "als",
 700 |     "script": "Latin"
 701 |   },
 702 |   {
 703 |     "speakers": 5000000,
 704 |     "name": "Norwegian Bokmål",
 705 |     "iso6393": "nob",
 706 |     "udhr": "nob",
 707 |     "script": "Latin"
 708 |   },
 709 |   {
 710 |     "speakers": 5000000,
 711 |     "name": "Sukuma",
 712 |     "iso6393": "suk",
 713 |     "udhr": "suk",
 714 |     "script": "Latin"
 715 |   },
 716 |   {
 717 |     "speakers": 4900000,
 718 |     "name": "Sango",
 719 |     "iso6393": "sag",
 720 |     "udhr": "sag",
 721 |     "script": "Latin"
 722 |   },
 723 |   {
 724 |     "speakers": 4700000,
 725 |     "name": "Norwegian Nynorsk",
 726 |     "iso6393": "nno",
 727 |     "udhr": "nno",
 728 |     "script": "Latin"
 729 |   },
 730 |   {
 731 |     "speakers": 4612000,
 732 |     "name": "Hebrew",
 733 |     "iso6393": "heb",
 734 |     "udhr": "heb",
 735 |     "script": "Hebrew"
 736 |   },
 737 |   {
 738 |     "speakers": 4600000,
 739 |     "name": "Mossi",
 740 |     "iso6393": "mos",
 741 |     "udhr": "mos",
 742 |     "script": "Latin"
 743 |   },
 744 |   {
 745 |     "speakers": 4380000,
 746 |     "name": "Tajik",
 747 |     "iso6393": "tgk",
 748 |     "udhr": "tgk",
 749 |     "script": "Cyrillic"
 750 |   },
 751 |   {
 752 |     "speakers": 4353000,
 753 |     "name": "Catalan",
 754 |     "iso6393": "cat",
 755 |     "udhr": "cat",
 756 |     "script": "Latin"
 757 |   },
 758 |   {
 759 |     "speakers": 4197000,
 760 |     "name": "Southern Sotho",
 761 |     "iso6393": "sot",
 762 |     "udhr": "sot",
 763 |     "script": "Latin"
 764 |   },
 765 |   {
 766 |     "speakers": 4103000,
 767 |     "name": "Georgian",
 768 |     "iso6393": "kat",
 769 |     "udhr": "kat",
 770 |     "script": "Georgian"
 771 |   },
 772 |   {
 773 |     "speakers": 4000000,
 774 |     "name": "Central Bikol",
 775 |     "iso6393": "bcl",
 776 |     "udhr": "bcl",
 777 |     "script": "Latin"
 778 |   },
 779 |   {
 780 |     "speakers": 4000000,
 781 |     "name": "Galician",
 782 |     "iso6393": "glg",
 783 |     "udhr": "glg",
 784 |     "script": "Latin"
 785 |   },
 786 |   {
 787 |     "speakers": 4000000,
 788 |     "name": "Lithuanian",
 789 |     "iso6393": "lit",
 790 |     "udhr": "lit",
 791 |     "script": "Latin"
 792 |   },
 793 |   {
 794 |     "speakers": 4000000,
 795 |     "name": "Lao",
 796 |     "iso6393": "lao",
 797 |     "udhr": "lao",
 798 |     "script": "Lao"
 799 |   },
 800 |   {
 801 |     "speakers": 4000000,
 802 |     "name": "Umbundu",
 803 |     "iso6393": "umb",
 804 |     "udhr": "umb",
 805 |     "script": "Latin"
 806 |   },
 807 |   {
 808 |     "speakers": 3932000,
 809 |     "name": "Tswana",
 810 |     "iso6393": "tsn",
 811 |     "udhr": "tsn",
 812 |     "script": "Latin"
 813 |   },
 814 |   {
 815 |     "speakers": 3851000,
 816 |     "name": "Pedi",
 817 |     "iso6393": "nso",
 818 |     "udhr": "nso",
 819 |     "script": "Latin"
 820 |   },
 821 |   {
 822 |     "speakers": 3800000,
 823 |     "name": "Balinese",
 824 |     "iso6393": "ban",
 825 |     "udhr": "ban",
 826 |     "script": "Latin"
 827 |   },
 828 |   {
 829 |     "speakers": 3500000,
 830 |     "name": "Buginese",
 831 |     "iso6393": "bug",
 832 |     "udhr": "bug",
 833 |     "script": "Latin"
 834 |   },
 835 |   {
 836 |     "speakers": 3500000,
 837 |     "name": "Central Kanuri",
 838 |     "iso6393": "knc",
 839 |     "udhr": "knc",
 840 |     "script": "Latin"
 841 |   },
 842 |   {
 843 |     "speakers": 3186000,
 844 |     "name": "Ibibio",
 845 |     "iso6393": "ibb",
 846 |     "udhr": "ibb",
 847 |     "script": "Latin"
 848 |   },
 849 |   {
 850 |     "speakers": 3015980,
 851 |     "name": "Ganda",
 852 |     "iso6393": "lug",
 853 |     "udhr": "lug",
 854 |     "script": "Latin"
 855 |   },
 856 |   {
 857 |     "speakers": 3000000,
 858 |     "name": "Achinese",
 859 |     "iso6393": "ace",
 860 |     "udhr": "ace",
 861 |     "script": "Latin"
 862 |   },
 863 |   {
 864 |     "speakers": 3000000,
 865 |     "name": "Bambara",
 866 |     "iso6393": "bam",
 867 |     "udhr": "bam",
 868 |     "script": "Latin"
 869 |   },
 870 |   {
 871 |     "speakers": 3000000,
 872 |     "name": "Kimbundu",
 873 |     "iso6393": "kmb",
 874 |     "udhr": "kmb",
 875 |     "script": "Latin"
 876 |   },
 877 |   {
 878 |     "speakers": 3000000,
 879 |     "name": "Lunda",
 880 |     "iso6393": "lun",
 881 |     "udhr": "lun",
 882 |     "script": "Latin"
 883 |   },
 884 |   {
 885 |     "speakers": 3000000,
 886 |     "name": "Central Atlas Tamazight",
 887 |     "iso6393": "tzm",
 888 |     "udhr": "tzm",
 889 |     "script": "Latin"
 890 |   },
 891 |   {
 892 |     "speakers": 3000000,
 893 |     "name": "Waray (Philippines)",
 894 |     "iso6393": "war",
 895 |     "udhr": "war",
 896 |     "script": "Latin"
 897 |   },
 898 |   {
 899 |     "speakers": 3000000,
 900 |     "name": "Eastern Yiddish",
 901 |     "iso6393": "ydd",
 902 |     "udhr": "ydd",
 903 |     "script": "Hebrew"
 904 |   },
 905 |   {
 906 |     "speakers": 2700000,
 907 |     "name": "Wolof",
 908 |     "iso6393": "wol",
 909 |     "udhr": "wol",
 910 |     "script": "Latin"
 911 |   },
 912 |   {
 913 |     "speakers": 2631420,
 914 |     "name": "Kirghiz",
 915 |     "iso6393": "kir",
 916 |     "udhr": "kir",
 917 |     "script": "Cyrillic"
 918 |   },
 919 |   {
 920 |     "speakers": 2600000,
 921 |     "name": "Low German",
 922 |     "iso6393": "nds",
 923 |     "udhr": "nds",
 924 |     "script": "Latin"
 925 |   },
 926 |   {
 927 |     "speakers": 2500000,
 928 |     "name": "Macedonian",
 929 |     "iso6393": "mkd",
 930 |     "udhr": "mkd",
 931 |     "script": "Cyrillic"
 932 |   },
 933 |   {
 934 |     "speakers": 2500000,
 935 |     "name": "Makhuwa",
 936 |     "iso6393": "vmw",
 937 |     "udhr": "vmw",
 938 |     "script": "Latin"
 939 |   },
 940 |   {
 941 |     "speakers": 2477600,
 942 |     "name": "Ewe",
 943 |     "iso6393": "ewe",
 944 |     "udhr": "ewe",
 945 |     "script": "Latin"
 946 |   },
 947 |   {
 948 |     "speakers": 2330000,
 949 |     "name": "Halh Mongolian",
 950 |     "iso6393": "khk",
 951 |     "udhr": "khk",
 952 |     "script": "Cyrillic"
 953 |   },
 954 |   {
 955 |     "speakers": 2218000,
 956 |     "name": "Slovenian",
 957 |     "iso6393": "slv",
 958 |     "udhr": "slv",
 959 |     "script": "Latin"
 960 |   },
 961 |   {
 962 |     "speakers": 2200000,
 963 |     "name": "Central Aymara",
 964 |     "iso6393": "ayr",
 965 |     "udhr": "ayr",
 966 |     "script": "Latin"
 967 |   },
 968 |   {
 969 |     "speakers": 2150000,
 970 |     "name": "Bemba (Zambia)",
 971 |     "iso6393": "bem",
 972 |     "udhr": "bem",
 973 |     "script": "Latin"
 974 |   },
 975 |   {
 976 |     "speakers": 2140300,
 977 |     "name": "Eastern Maninkakan",
 978 |     "iso6393": "emk",
 979 |     "udhr": "emk",
 980 |     "script": "Latin"
 981 |   },
 982 |   {
 983 |     "speakers": 2130000,
 984 |     "name": "Baoulé",
 985 |     "iso6393": "bci",
 986 |     "udhr": "bci",
 987 |     "script": "Latin"
 988 |   },
 989 |   {
 990 |     "speakers": 2000000,
 991 |     "name": "Esperanto",
 992 |     "iso6393": "epo",
 993 |     "udhr": "epo",
 994 |     "script": "Latin"
 995 |   },
 996 |   {
 997 |     "speakers": 2000000,
 998 |     "name": "Pampanga",
 999 |     "iso6393": "pam",
1000 |     "udhr": "pam",
1001 |     "script": "Latin"
1002 |   },
1003 |   {
1004 |     "speakers": 2000000,
1005 |     "name": "Tiv",
1006 |     "iso6393": "tiv",
1007 |     "udhr": "tiv",
1008 |     "script": "Latin"
1009 |   },
1010 |   {
1011 |     "speakers": 2000000,
1012 |     "name": "Tok Pisin",
1013 |     "iso6393": "tpi",
1014 |     "udhr": "tpi",
1015 |     "script": "Latin"
1016 |   },
1017 |   {
1018 |     "speakers": 1670000,
1019 |     "name": "Swati",
1020 |     "iso6393": "ssw",
1021 |     "udhr": "ssw",
1022 |     "script": "Latin"
1023 |   },
1024 |   {
1025 |     "speakers": 1643193,
1026 |     "name": "Nyankole",
1027 |     "iso6393": "nyn",
1028 |     "udhr": "nyn",
1029 |     "script": "Latin"
1030 |   },
1031 |   {
1032 |     "speakers": 1600000,
1033 |     "name": "Sichuan Yi",
1034 |     "iso6393": "iii",
1035 |     "udhr": "iii",
1036 |     "script": "Yi"
1037 |   },
1038 |   {
1039 |     "speakers": 1597000,
1040 |     "name": "Yao",
1041 |     "iso6393": "yao",
1042 |     "udhr": "yao",
1043 |     "script": "Latin"
1044 |   },
1045 |   {
1046 |     "speakers": 1550000,
1047 |     "name": "Latvian",
1048 |     "iso6393": "lav",
1049 |     "udhr": "lav",
1050 |     "script": "Latin"
1051 |   },
1052 |   {
1053 |     "speakers": 1500000,
1054 |     "name": "Cusco Quechua",
1055 |     "iso6393": "quz",
1056 |     "udhr": "quz",
1057 |     "script": "Latin"
1058 |   },
1059 |   {
1060 |     "speakers": 1500000,
1061 |     "name": "Vlax Romani",
1062 |     "iso6393": "rmy",
1063 |     "udhr": "rmy",
1064 |     "script": "Latin"
1065 |   },
1066 |   {
1067 |     "speakers": 1500000,
1068 |     "name": "Logudorese Sardinian",
1069 |     "iso6393": "src",
1070 |     "udhr": "src",
1071 |     "script": "Latin"
1072 |   },
1073 |   {
1074 |     "speakers": 1500000,
1075 |     "name": "Scots",
1076 |     "iso6393": "sco",
1077 |     "udhr": "sco",
1078 |     "script": "Latin"
1079 |   },
1080 |   {
1081 |     "speakers": 1500000,
1082 |     "name": "Tsonga",
1083 |     "iso6393": "tso",
1084 |     "udhr": "tso_MZ",
1085 |     "script": "Latin"
1086 |   },
1087 |   {
1088 |     "speakers": 1480000,
1089 |     "name": "Mende (Sierra Leone)",
1090 |     "iso6393": "men",
1091 |     "udhr": "men",
1092 |     "script": "Latin"
1093 |   },
1094 |   {
1095 |     "speakers": 1436000,
1096 |     "name": "Fon",
1097 |     "iso6393": "fon",
1098 |     "udhr": "fon",
1099 |     "script": "Latin"
1100 |   },
1101 |   {
1102 |     "speakers": 1376898,
1103 |     "name": "Central Nahuatl",
1104 |     "iso6393": "nhn",
1105 |     "udhr": "nhn",
1106 |     "script": "Latin"
1107 |   },
1108 |   {
1109 |     "speakers": 1350000,
1110 |     "name": "Northeastern Dinka",
1111 |     "iso6393": "dip",
1112 |     "udhr": "dip",
1113 |     "script": "Latin"
1114 |   },
1115 |   {
1116 |     "speakers": 1260000,
1117 |     "name": "Makonde",
1118 |     "iso6393": "kde",
1119 |     "udhr": "kde",
1120 |     "script": "Latin"
1121 |   },
1122 |   {
1123 |     "speakers": 1240000,
1124 |     "name": "Siona",
1125 |     "iso6393": "snn",
1126 |     "udhr": "snn",
1127 |     "script": "Latin"
1128 |   },
1129 |   {
1130 |     "speakers": 1200000,
1131 |     "name": "Kabiyè",
1132 |     "iso6393": "kbp",
1133 |     "udhr": "kbp",
1134 |     "script": "Latin"
1135 |   },
1136 |   {
1137 |     "speakers": 1200000,
1138 |     "name": "Timne",
1139 |     "iso6393": "tem",
1140 |     "udhr": "tem",
1141 |     "script": "Latin"
1142 |   },
1143 |   {
1144 |     "speakers": 1105000,
1145 |     "name": "Tonga (Zambia)",
1146 |     "iso6393": "toi",
1147 |     "udhr": "toi",
1148 |     "script": "Latin"
1149 |   },
1150 |   {
1151 |     "speakers": 1100000,
1152 |     "name": "Estonian",
1153 |     "iso6393": "est",
1154 |     "udhr": "est",
1155 |     "script": "Latin"
1156 |   },
1157 |   {
1158 |     "speakers": 1067000,
1159 |     "name": "Soninke",
1160 |     "iso6393": "snk",
1161 |     "udhr": "snk",
1162 |     "script": "Latin"
1163 |   },
1164 |   {
1165 |     "speakers": 1004000,
1166 |     "name": "Chokwe",
1167 |     "iso6393": "cjk",
1168 |     "udhr": "cjk",
1169 |     "script": "Latin"
1170 |   },
1171 |   {
1172 |     "speakers": 1000000,
1173 |     "name": "Assyrian Neo-Aramaic",
1174 |     "iso6393": "aii",
1175 |     "udhr": "aii",
1176 |     "script": "Syriac"
1177 |   },
1178 |   {
1179 |     "speakers": 1000000,
1180 |     "name": "Adangme",
1181 |     "iso6393": "ada",
1182 |     "udhr": "ada",
1183 |     "script": "Latin"
1184 |   },
1185 |   {
1186 |     "speakers": 1000000,
1187 |     "name": "Bini",
1188 |     "iso6393": "bin",
1189 |     "udhr": "bin",
1190 |     "script": "Latin"
1191 |   },
1192 |   {
1193 |     "speakers": 1000000,
1194 |     "name": "Ga",
1195 |     "iso6393": "gaa",
1196 |     "udhr": "gaa",
1197 |     "script": "Latin"
1198 |   },
1199 |   {
1200 |     "speakers": 1000000,
1201 |     "name": "Koongo",
1202 |     "iso6393": "kng",
1203 |     "udhr": "kng",
1204 |     "script": "Latin"
1205 |   },
1206 |   {
1207 |     "speakers": 1000000,
1208 |     "name": "Ndonga",
1209 |     "iso6393": "ndo",
1210 |     "udhr": "ndo",
1211 |     "script": "Latin"
1212 |   },
1213 |   {
1214 |     "speakers": 1000000,
1215 |     "name": "Ayacucho Quechua",
1216 |     "iso6393": "quy",
1217 |     "udhr": "quy",
1218 |     "script": "Latin"
1219 |   },
1220 |   {
1221 |     "speakers": 1000000,
1222 |     "name": "Balkan Romani",
1223 |     "iso6393": "rmn",
1224 |     "udhr": "rmn",
1225 |     "script": "Latin"
1226 |   },
1227 |   {
1228 |     "speakers": 926000,
1229 |     "name": "Nyamwezi",
1230 |     "iso6393": "nym",
1231 |     "udhr": "nym",
1232 |     "script": "Latin"
1233 |   },
1234 |   {
1235 |     "speakers": 923000,
1236 |     "name": "Susu",
1237 |     "iso6393": "sus",
1238 |     "udhr": "sus",
1239 |     "script": "Latin"
1240 |   },
1241 |   {
1242 |     "speakers": 876409,
1243 |     "name": "Venda",
1244 |     "iso6393": "ven",
1245 |     "udhr": "ven",
1246 |     "script": "Latin"
1247 |   },
1248 |   {
1249 |     "speakers": 868800,
1250 |     "name": "Serer",
1251 |     "iso6393": "srr",
1252 |     "udhr": "srr",
1253 |     "script": "Latin"
1254 |   },
1255 |   {
1256 |     "speakers": 865000,
1257 |     "name": "Khasi",
1258 |     "iso6393": "kha",
1259 |     "udhr": "kha",
1260 |     "script": "Latin"
1261 |   },
1262 |   {
1263 |     "speakers": 820000,
1264 |     "name": "Northern Qiandong Miao",
1265 |     "iso6393": "hea",
1266 |     "udhr": "hea",
1267 |     "script": "Latin"
1268 |   },
1269 |   {
1270 |     "speakers": 808000,
1271 |     "name": "Guinea Kpelle",
1272 |     "iso6393": "gkp",
1273 |     "udhr": "gkp",
1274 |     "script": "Latin"
1275 |   },
1276 |   {
1277 |     "speakers": 747000,
1278 |     "name": "Hani",
1279 |     "iso6393": "hni",
1280 |     "udhr": "hni",
1281 |     "script": "Latin"
1282 |   },
1283 |   {
1284 |     "speakers": 700000,
1285 |     "name": "Yucateco",
1286 |     "iso6393": "yua",
1287 |     "udhr": "yua",
1288 |     "script": "Latin"
1289 |   },
1290 |   {
1291 |     "speakers": 650000,
1292 |     "name": "Fijian",
1293 |     "iso6393": "fij",
1294 |     "udhr": "fij",
1295 |     "script": "Latin"
1296 |   },
1297 |   {
1298 |     "speakers": 600000,
1299 |     "name": "Friulian",
1300 |     "iso6393": "fur",
1301 |     "udhr": "fur",
1302 |     "script": "Latin"
1303 |   },
1304 |   {
1305 |     "speakers": 600000,
1306 |     "name": "Tetum",
1307 |     "iso6393": "tet",
1308 |     "udhr": "tet",
1309 |     "script": "Latin"
1310 |   },
1311 |   {
1312 |     "speakers": 600000,
1313 |     "name": "Walloon",
1314 |     "iso6393": "wln",
1315 |     "udhr": "wln",
1316 |     "script": "Latin"
1317 |   },
1318 |   {
1319 |     "speakers": 588000,
1320 |     "name": "Basque",
1321 |     "iso6393": "eus",
1322 |     "udhr": "eus",
1323 |     "script": "Latin"
1324 |   },
1325 |   {
1326 |     "speakers": 588000,
1327 |     "name": "South Ndebele",
1328 |     "iso6393": "nbl",
1329 |     "udhr": "nbl",
1330 |     "script": "Latin"
1331 |   },
1332 |   {
1333 |     "speakers": 588000,
1334 |     "name": "Ossetian",
1335 |     "iso6393": "oss",
1336 |     "udhr": "oss",
1337 |     "script": "Cyrillic"
1338 |   },
1339 |   {
1340 |     "speakers": 580000,
1341 |     "name": "Welsh",
1342 |     "iso6393": "cym",
1343 |     "udhr": "cym",
1344 |     "script": "Latin"
1345 |   },
1346 |   {
1347 |     "speakers": 580000,
1348 |     "name": "Upper Guinea Crioulo",
1349 |     "iso6393": "pov",
1350 |     "udhr": "pov",
1351 |     "script": "Latin"
1352 |   },
1353 |   {
1354 |     "speakers": 541750,
1355 |     "name": "Lushai",
1356 |     "iso6393": "lus",
1357 |     "udhr": "lus",
1358 |     "script": "Latin"
1359 |   },
1360 |   {
1361 |     "speakers": 540000,
1362 |     "name": "Dagbani",
1363 |     "iso6393": "dag",
1364 |     "udhr": "dag",
1365 |     "script": "Latin"
1366 |   },
1367 |   {
1368 |     "speakers": 501000,
1369 |     "name": "Southern Dagaare",
1370 |     "iso6393": "dga",
1371 |     "udhr": "dga",
1372 |     "script": "Latin"
1373 |   },
1374 |   {
1375 |     "speakers": 500000,
1376 |     "name": "Breton",
1377 |     "iso6393": "bre",
1378 |     "udhr": "bre",
1379 |     "script": "Latin"
1380 |   },
1381 |   {
1382 |     "speakers": 500000,
1383 |     "name": "Kekchí",
1384 |     "iso6393": "kek",
1385 |     "udhr": "kek",
1386 |     "script": "Latin"
1387 |   },
1388 |   {
1389 |     "speakers": 500000,
1390 |     "name": "Picard",
1391 |     "iso6393": "pcd",
1392 |     "udhr": "pcd",
1393 |     "script": "Latin"
1394 |   },
1395 |   {
1396 |     "speakers": 500000,
1397 |     "name": "Romansh",
1398 |     "iso6393": "roh",
1399 |     "udhr": "roh",
1400 |     "script": "Latin"
1401 |   },
1402 |   {
1403 |     "speakers": 480000,
1404 |     "name": "Bari",
1405 |     "iso6393": "bfa",
1406 |     "udhr": "bfa",
1407 |     "script": "Latin"
1408 |   },
1409 |   {
1410 |     "speakers": 480000,
1411 |     "name": "Krio",
1412 |     "iso6393": "kri",
1413 |     "udhr": "kri",
1414 |     "script": "Latin"
1415 |   },
1416 |   {
1417 |     "speakers": 446264,
1418 |     "name": "Haka Chin",
1419 |     "iso6393": "cnh",
1420 |     "udhr": "cnh",
1421 |     "script": "Latin"
1422 |   },
1423 |   {
1424 |     "speakers": 440000,
1425 |     "name": "Mapudungun",
1426 |     "iso6393": "arn",
1427 |     "udhr": "arn",
1428 |     "script": "Latin"
1429 |   },
1430 |   {
1431 |     "speakers": 400000,
1432 |     "name": "Baatonum",
1433 |     "iso6393": "bba",
1434 |     "udhr": "bba",
1435 |     "script": "Latin"
1436 |   },
1437 |   {
1438 |     "speakers": 393943,
1439 |     "name": "Kabuverdianu",
1440 |     "iso6393": "kea",
1441 |     "udhr": "kea",
1442 |     "script": "Latin"
1443 |   },
1444 |   {
1445 |     "speakers": 363000,
1446 |     "name": "Yakut",
1447 |     "iso6393": "sah",
1448 |     "udhr": "sah",
1449 |     "script": "Cyrillic"
1450 |   },
1451 |   {
1452 |     "speakers": 362000,
1453 |     "name": "Samoan",
1454 |     "iso6393": "smo",
1455 |     "udhr": "smo",
1456 |     "script": "Latin"
1457 |   },
1458 |   {
1459 |     "speakers": 361709,
1460 |     "name": "Konzo",
1461 |     "iso6393": "koo",
1462 |     "udhr": "koo",
1463 |     "script": "Latin"
1464 |   },
1465 |   {
1466 |     "speakers": 352500,
1467 |     "name": "Nzima",
1468 |     "iso6393": "nzi",
1469 |     "udhr": "nzi",
1470 |     "script": "Latin"
1471 |   },
1472 |   {
1473 |     "speakers": 350000,
1474 |     "name": "Central Mazahua",
1475 |     "iso6393": "maz",
1476 |     "udhr": "maz",
1477 |     "script": "Latin"
1478 |   },
1479 |   {
1480 |     "speakers": 350000,
1481 |     "name": "Pijin",
1482 |     "iso6393": "pis",
1483 |     "udhr": "pis",
1484 |     "script": "Latin"
1485 |   },
1486 |   {
1487 |     "speakers": 344100,
1488 |     "name": "Tedim Chin",
1489 |     "iso6393": "ctd",
1490 |     "udhr": "ctd",
1491 |     "script": "Latin"
1492 |   },
1493 |   {
1494 |     "speakers": 341000,
1495 |     "name": "Corsican",
1496 |     "iso6393": "cos",
1497 |     "udhr": "cos",
1498 |     "script": "Latin"
1499 |   },
1500 |   {
1501 |     "speakers": 335518,
1502 |     "name": "Luxembourgish",
1503 |     "iso6393": "ltz",
1504 |     "udhr": "ltz",
1505 |     "script": "Latin"
1506 |   },
1507 |   {
1508 |     "speakers": 335000,
1509 |     "name": "West-Central Limba",
1510 |     "iso6393": "lia",
1511 |     "udhr": "lia",
1512 |     "script": "Latin"
1513 |   },
1514 |   {
1515 |     "speakers": 330000,
1516 |     "name": "Maltese",
1517 |     "iso6393": "mlt",
1518 |     "udhr": "mlt",
1519 |     "script": "Latin"
1520 |   },
1521 |   {
1522 |     "speakers": 327000,
1523 |     "name": "Mina (Cameroon)",
1524 |     "iso6393": "hna",
1525 |     "udhr": "hna",
1526 |     "script": "Latin"
1527 |   },
1528 |   {
1529 |     "speakers": 305000,
1530 |     "name": "Wayuu",
1531 |     "iso6393": "guc",
1532 |     "udhr": "guc",
1533 |     "script": "Latin"
1534 |   },
1535 |   {
1536 |     "speakers": 300000,
1537 |     "name": "K'iche'",
1538 |     "iso6393": "quc",
1539 |     "udhr": "quc",
1540 |     "script": "Latin"
1541 |   },
1542 |   {
1543 |     "speakers": 300000,
1544 |     "name": "Huaylas Ancash Quechua",
1545 |     "iso6393": "qwh",
1546 |     "udhr": "qwh",
1547 |     "script": "Latin"
1548 |   },
1549 |   {
1550 |     "speakers": 287000,
1551 |     "name": "Dhivehi",
1552 |     "iso6393": "div",
1553 |     "udhr": "div",
1554 |     "script": "Thaana"
1555 |   },
1556 |   {
1557 |     "speakers": 282845,
1558 |     "name": "Icelandic",
1559 |     "iso6393": "isl",
1560 |     "udhr": "isl",
1561 |     "script": "Latin"
1562 |   },
1563 |   {
1564 |     "speakers": 276000,
1565 |     "name": "Kaonde",
1566 |     "iso6393": "kqn",
1567 |     "udhr": "kqn",
1568 |     "script": "Latin"
1569 |   },
1570 |   {
1571 |     "speakers": 260000,
1572 |     "name": "Jola-Fonyi",
1573 |     "iso6393": "dyo",
1574 |     "udhr": "dyo",
1575 |     "script": "Latin"
1576 |   },
1577 |   {
1578 |     "speakers": 260000,
1579 |     "name": "Irish",
1580 |     "iso6393": "gle",
1581 |     "udhr": "gle",
1582 |     "script": "Latin"
1583 |   },
1584 |   {
1585 |     "speakers": 250000,
1586 |     "name": "Gonja",
1587 |     "iso6393": "gjn",
1588 |     "udhr": "gjn",
1589 |     "script": "Latin"
1590 |   },
1591 |   {
1592 |     "speakers": 232000,
1593 |     "name": "Ao Naga",
1594 |     "iso6393": "njo",
1595 |     "udhr": "njo",
1596 |     "script": "Latin"
1597 |   },
1598 |   {
1599 |     "speakers": 200000,
1600 |     "name": "Mezquital Otomi",
1601 |     "iso6393": "ote",
1602 |     "udhr": "ote",
1603 |     "script": "Latin"
1604 |   },
1605 |   {
1606 |     "speakers": 200000,
1607 |     "name": "Northern Conchucos Ancash Quechua",
1608 |     "iso6393": "qxn",
1609 |     "udhr": "qxn",
1610 |     "script": "Latin"
1611 |   },
1612 |   {
1613 |     "speakers": 200000,
1614 |     "name": "Tuvinian",
1615 |     "iso6393": "tyv",
1616 |     "udhr": "tyv",
1617 |     "script": "Cyrillic"
1618 |   },
1619 |   {
1620 |     "speakers": 200000,
1621 |     "name": "Kasem",
1622 |     "iso6393": "xsm",
1623 |     "udhr": "xsm",
1624 |     "script": "Latin"
1625 |   },
1626 |   {
1627 |     "speakers": 198000,
1628 |     "name": "Gagauz",
1629 |     "iso6393": "gag",
1630 |     "udhr": "gag",
1631 |     "script": "Latin"
1632 |   },
1633 |   {
1634 |     "speakers": 194433,
1635 |     "name": "Sanskrit",
1636 |     "iso6393": "san",
1637 |     "udhr": "san",
1638 |     "script": "Devanagari"
1639 |   },
1640 |   {
1641 |     "speakers": 175000,
1642 |     "name": "Shilluk",
1643 |     "iso6393": "shk",
1644 |     "udhr": "shk",
1645 |     "script": "Latin"
1646 |   },
1647 |   {
1648 |     "speakers": 172000,
1649 |     "name": "Nyemba",
1650 |     "iso6393": "nba",
1651 |     "udhr": "nba",
1652 |     "script": "Latin"
1653 |   },
1654 |   {
1655 |     "speakers": 160000,
1656 |     "name": "Mískito",
1657 |     "iso6393": "miq",
1658 |     "udhr": "miq",
1659 |     "script": "Latin"
1660 |   },
1661 |   {
1662 |     "speakers": 157000,
1663 |     "name": "Mam",
1664 |     "iso6393": "mam",
1665 |     "udhr": "mam",
1666 |     "script": "Latin"
1667 |   },
1668 |   {
1669 |     "speakers": 150000,
1670 |     "name": "Huastec",
1671 |     "iso6393": "hus",
1672 |     "udhr": "hus",
1673 |     "script": "Latin"
1674 |   },
1675 |   {
1676 |     "speakers": 150000,
1677 |     "name": "Tahitian",
1678 |     "iso6393": "tah",
1679 |     "udhr": "tah",
1680 |     "script": "Latin"
1681 |   },
1682 |   {
1683 |     "speakers": 148530,
1684 |     "name": "Navajo",
1685 |     "iso6393": "nav",
1686 |     "udhr": "nav",
1687 |     "script": "Latin"
1688 |   },
1689 |   {
1690 |     "speakers": 135000,
1691 |     "name": "Otuho",
1692 |     "iso6393": "lot",
1693 |     "udhr": "lot",
1694 |     "script": "Latin"
1695 |   },
1696 |   {
1697 |     "speakers": 132200,
1698 |     "name": "Kaqchikel",
1699 |     "iso6393": "cak",
1700 |     "udhr": "cak",
1701 |     "script": "Latin"
1702 |   },
1703 |   {
1704 |     "speakers": 125000,
1705 |     "name": "Lamnso'",
1706 |     "iso6393": "lns",
1707 |     "udhr": "lns",
1708 |     "script": "Latin"
1709 |   },
1710 |   {
1711 |     "speakers": 123000,
1712 |     "name": "Tonga (Tonga Islands)",
1713 |     "iso6393": "ton",
1714 |     "udhr": "ton",
1715 |     "script": "Latin"
1716 |   },
1717 |   {
1718 |     "speakers": 120000,
1719 |     "name": "Ladino",
1720 |     "iso6393": "lad",
1721 |     "udhr": "lad",
1722 |     "script": "Latin"
1723 |   },
1724 |   {
1725 |     "speakers": 120000,
1726 |     "name": "Ditammari",
1727 |     "iso6393": "tbz",
1728 |     "udhr": "tbz",
1729 |     "script": "Latin"
1730 |   },
1731 |   {
1732 |     "speakers": 119500,
1733 |     "name": "Vai",
1734 |     "iso6393": "vai",
1735 |     "udhr": "vai",
1736 |     "script": "Vai"
1737 |   },
1738 |   {
1739 |     "speakers": 105000,
1740 |     "name": "Abkhazian",
1741 |     "iso6393": "abk",
1742 |     "udhr": "abk",
1743 |     "script": "Cyrillic"
1744 |   },
1745 |   {
1746 |     "speakers": 100000,
1747 |     "name": "Asturian",
1748 |     "iso6393": "ast",
1749 |     "udhr": "ast",
1750 |     "script": "Latin"
1751 |   },
1752 |   {
1753 |     "speakers": 100000,
1754 |     "name": "Purepecha",
1755 |     "iso6393": "tsz",
1756 |     "udhr": "tsz",
1757 |     "script": "Latin"
1758 |   },
1759 |   {
1760 |     "speakers": 94500,
1761 |     "name": "Garifuna",
1762 |     "iso6393": "cab",
1763 |     "udhr": "cab",
1764 |     "script": "Latin"
1765 |   },
1766 |   {
1767 |     "speakers": 80000,
1768 |     "name": "Karelian",
1769 |     "iso6393": "krl",
1770 |     "udhr": "krl",
1771 |     "script": "Latin"
1772 |   },
1773 |   {
1774 |     "speakers": 80000,
1775 |     "name": "Papantla Totonac",
1776 |     "iso6393": "top",
1777 |     "udhr": "top",
1778 |     "script": "Latin"
1779 |   },
1780 |   {
1781 |     "speakers": 80000,
1782 |     "name": "Miahuatlán Zapotec",
1783 |     "iso6393": "zam",
1784 |     "udhr": "zam",
1785 |     "script": "Latin"
1786 |   },
1787 |   {
1788 |     "speakers": 78000,
1789 |     "name": "Chamorro",
1790 |     "iso6393": "cha",
1791 |     "udhr": "cha",
1792 |     "script": "Latin"
1793 |   },
1794 |   {
1795 |     "speakers": 72700,
1796 |     "name": "Seselwa Creole French",
1797 |     "iso6393": "crs",
1798 |     "udhr": "crs",
1799 |     "script": "Latin"
1800 |   },
1801 |   {
1802 |     "speakers": 72000,
1803 |     "name": "Dendi (Benin)",
1804 |     "iso6393": "ddn",
1805 |     "udhr": "ddn",
1806 |     "script": "Latin"
1807 |   },
1808 |   {
1809 |     "speakers": 71841,
1810 |     "name": "Lozi",
1811 |     "iso6393": "loz",
1812 |     "udhr": "loz",
1813 |     "script": "Latin"
1814 |   },
1815 |   {
1816 |     "speakers": 70000,
1817 |     "name": "Upper Sorbian",
1818 |     "iso6393": "hsb",
1819 |     "udhr": "hsb",
1820 |     "script": "Latin"
1821 |   },
1822 |   {
1823 |     "speakers": 70000,
1824 |     "name": "Maori",
1825 |     "iso6393": "mri",
1826 |     "udhr": "mri",
1827 |     "script": "Latin"
1828 |   },
1829 |   {
1830 |     "speakers": 68487,
1831 |     "name": "Páez",
1832 |     "iso6393": "pbb",
1833 |     "udhr": "pbb",
1834 |     "script": "Latin"
1835 |   },
1836 |   {
1837 |     "speakers": 68000,
1838 |     "name": "Southern Altai",
1839 |     "iso6393": "alt",
1840 |     "udhr": "alt",
1841 |     "script": "Cyrillic"
1842 |   },
1843 |   {
1844 |     "speakers": 65000,
1845 |     "name": "Metlatónoc Mixtec",
1846 |     "iso6393": "mxv",
1847 |     "udhr": "mxv",
1848 |     "script": "Latin"
1849 |   },
1850 |   {
1851 |     "speakers": 65000,
1852 |     "name": "Ambo-Pasco Quechua",
1853 |     "iso6393": "qva",
1854 |     "udhr": "qva",
1855 |     "script": "Latin"
1856 |   },
1857 |   {
1858 |     "speakers": 63653,
1859 |     "name": "Scottish Gaelic",
1860 |     "iso6393": "gla",
1861 |     "udhr": "gla",
1862 |     "script": "Latin"
1863 |   },
1864 |   {
1865 |     "speakers": 60000,
1866 |     "name": "Swampy Cree",
1867 |     "iso6393": "csw",
1868 |     "udhr": "csw",
1869 |     "script": "Canadian_Aboriginal"
1870 |   },
1871 |   {
1872 |     "speakers": 60000,
1873 |     "name": "Khakas",
1874 |     "iso6393": "kjh",
1875 |     "udhr": "kjh",
1876 |     "script": "Cyrillic"
1877 |   },
1878 |   {
1879 |     "speakers": 55000,
1880 |     "name": "Margos-Yarowilca-Lauricocha Quechua",
1881 |     "iso6393": "qvm",
1882 |     "udhr": "qvm",
1883 |     "script": "Latin"
1884 |   },
1885 |   {
1886 |     "speakers": 47000,
1887 |     "name": "Faroese",
1888 |     "iso6393": "fao",
1889 |     "udhr": "fao",
1890 |     "script": "Latin"
1891 |   },
1892 |   {
1893 |     "speakers": 47000,
1894 |     "name": "Kalaallisut",
1895 |     "iso6393": "kal",
1896 |     "udhr": "kal",
1897 |     "script": "Latin"
1898 |   },
1899 |   {
1900 |     "speakers": 45000,
1901 |     "name": "Chuukese",
1902 |     "iso6393": "chk",
1903 |     "udhr": "chk",
1904 |     "script": "Latin"
1905 |   },
1906 |   {
1907 |     "speakers": 45000,
1908 |     "name": "Asháninka",
1909 |     "iso6393": "cni",
1910 |     "udhr": "cni",
1911 |     "script": "Latin"
1912 |   },
1913 |   {
1914 |     "speakers": 43900,
1915 |     "name": "Marshallese",
1916 |     "iso6393": "mah",
1917 |     "udhr": "mah",
1918 |     "script": "Latin"
1919 |   },
1920 |   {
1921 |     "speakers": 43000,
1922 |     "name": "Rarotongan",
1923 |     "iso6393": "rar",
1924 |     "udhr": "rar",
1925 |     "script": "Latin"
1926 |   },
1927 |   {
1928 |     "speakers": 40000,
1929 |     "name": "Evenki",
1930 |     "iso6393": "evn",
1931 |     "udhr": "evn",
1932 |     "script": "Cyrillic"
1933 |   },
1934 |   {
1935 |     "speakers": 40000,
1936 |     "name": "North Junín Quechua",
1937 |     "iso6393": "qvn",
1938 |     "udhr": "qvn",
1939 |     "script": "Latin"
1940 |   },
1941 |   {
1942 |     "speakers": 40000,
1943 |     "name": "Waama",
1944 |     "iso6393": "wwa",
1945 |     "udhr": "wwa",
1946 |     "script": "Latin"
1947 |   },
1948 |   {
1949 |     "speakers": 38000,
1950 |     "name": "Huamalíes-Dos de Mayo Huánuco Quechua",
1951 |     "iso6393": "qvh",
1952 |     "udhr": "qvh",
1953 |     "script": "Latin"
1954 |   },
1955 |   {
1956 |     "speakers": 36000,
1957 |     "name": "Tojolabal",
1958 |     "iso6393": "toj",
1959 |     "udhr": "toj",
1960 |     "script": "Latin"
1961 |   },
1962 |   {
1963 |     "speakers": 35800,
1964 |     "name": "Luvale",
1965 |     "iso6393": "lue",
1966 |     "udhr": "lue",
1967 |     "script": "Latin"
1968 |   },
1969 |   {
1970 |     "speakers": 35000,
1971 |     "name": "Shuar",
1972 |     "iso6393": "jiv",
1973 |     "udhr": "jiv",
1974 |     "script": "Latin"
1975 |   },
1976 |   {
1977 |     "speakers": 35000,
1978 |     "name": "Northwestern Ojibwa",
1979 |     "iso6393": "ojb",
1980 |     "udhr": "ojb",
1981 |     "script": "Canadian_Aboriginal"
1982 |   },
1983 |   {
1984 |     "speakers": 35000,
1985 |     "name": "Cajamarca Quechua",
1986 |     "iso6393": "qvc",
1987 |     "udhr": "qvc",
1988 |     "script": "Latin"
1989 |   },
1990 |   {
1991 |     "speakers": 30000,
1992 |     "name": "Matu Chin",
1993 |     "iso6393": "hlt",
1994 |     "udhr": "hlt",
1995 |     "script": "Latin"
1996 |   },
1997 |   {
1998 |     "speakers": 30000,
1999 |     "name": "Calderón Highland Quichua",
2000 |     "iso6393": "qud",
2001 |     "udhr": "qud",
2002 |     "script": "Latin"
2003 |   },
2004 |   {
2005 |     "speakers": 27700,
2006 |     "name": "Pohnpeian",
2007 |     "iso6393": "pon",
2008 |     "udhr": "pon",
2009 |     "script": "Latin"
2010 |   },
2011 |   {
2012 |     "speakers": 27500,
2013 |     "name": "Aguaruna",
2014 |     "iso6393": "agr",
2015 |     "udhr": "agr",
2016 |     "script": "Latin"
2017 |   },
2018 |   {
2019 |     "speakers": 25000,
2020 |     "name": "Chiquián Ancash Quechua",
2021 |     "iso6393": "qxa",
2022 |     "udhr": "qxa",
2023 |     "script": "Latin"
2024 |   },
2025 |   {
2026 |     "speakers": 25000,
2027 |     "name": "Ticuna",
2028 |     "iso6393": "tca",
2029 |     "udhr": "tca",
2030 |     "script": "Latin"
2031 |   },
2032 |   {
2033 |     "speakers": 22000,
2034 |     "name": "Ojitlán Chinantec",
2035 |     "iso6393": "chj",
2036 |     "udhr": "chj",
2037 |     "script": "Latin"
2038 |   },
2039 |   {
2040 |     "speakers": 21500,
2041 |     "name": "Eastern Canadian Inuktitut",
2042 |     "iso6393": "ike",
2043 |     "udhr": "ike",
2044 |     "script": "Canadian_Aboriginal"
2045 |   },
2046 |   {
2047 |     "speakers": 21000,
2048 |     "name": "Awa-Cuaiquer",
2049 |     "iso6393": "kwi",
2050 |     "udhr": "kwi",
2051 |     "script": "Latin"
2052 |   },
2053 |   {
2054 |     "speakers": 20112,
2055 |     "name": "Romagnol",
2056 |     "iso6393": "rgn",
2057 |     "udhr": "eml",
2058 |     "script": "Latin"
2059 |   },
2060 |   {
2061 |     "speakers": 20000,
2062 |     "name": "Toba",
2063 |     "iso6393": "tob",
2064 |     "udhr": "tob",
2065 |     "script": "Latin"
2066 |   },
2067 |   {
2068 |     "speakers": 17640,
2069 |     "name": "Yanomamö",
2070 |     "iso6393": "guu",
2071 |     "udhr": "guu",
2072 |     "script": "Latin"
2073 |   },
2074 |   {
2075 |     "speakers": 16000,
2076 |     "name": "Arequipa-La Unión Quechua",
2077 |     "iso6393": "qxu",
2078 |     "udhr": "qxu",
2079 |     "script": "Latin"
2080 |   },
2081 |   {
2082 |     "speakers": 15000,
2083 |     "name": "Palauan",
2084 |     "iso6393": "pau",
2085 |     "udhr": "pau",
2086 |     "script": "Latin"
2087 |   },
2088 |   {
2089 |     "speakers": 15000,
2090 |     "name": "Shipibo-Conibo",
2091 |     "iso6393": "shp",
2092 |     "udhr": "shp",
2093 |     "script": "Latin"
2094 |   },
2095 |   {
2096 |     "speakers": 12000,
2097 |     "name": "Paraguayan Guaraní",
2098 |     "iso6393": "gug",
2099 |     "udhr": "gug",
2100 |     "script": "Latin"
2101 |   },
2102 |   {
2103 |     "speakers": 11000,
2104 |     "name": "Ixcatlán Mazatec",
2105 |     "iso6393": "mzi",
2106 |     "udhr": "mzi",
2107 |     "script": "Latin"
2108 |   },
2109 |   {
2110 |     "speakers": 10000,
2111 |     "name": "Shor",
2112 |     "iso6393": "cjs",
2113 |     "udhr": "cjs",
2114 |     "script": "Cyrillic"
2115 |   },
2116 |   {
2117 |     "speakers": 8100,
2118 |     "name": "Mi'kmaq",
2119 |     "iso6393": "mic",
2120 |     "udhr": "mic",
2121 |     "script": "Latin"
2122 |   },
2123 |   {
2124 |     "speakers": 8000,
2125 |     "name": "Hawaiian",
2126 |     "iso6393": "haw",
2127 |     "udhr": "haw",
2128 |     "script": "Latin"
2129 |   },
2130 |   {
2131 |     "speakers": 7170,
2132 |     "name": "Even",
2133 |     "iso6393": "eve",
2134 |     "udhr": "eve",
2135 |     "script": "Cyrillic"
2136 |   },
2137 |   {
2138 |     "speakers": 6592,
2139 |     "name": "Yapese",
2140 |     "iso6393": "yap",
2141 |     "udhr": "yap",
2142 |     "script": "Latin"
2143 |   },
2144 |   {
2145 |     "speakers": 6000,
2146 |     "name": "Yanesha'",
2147 |     "iso6393": "ame",
2148 |     "udhr": "ame",
2149 |     "script": "Latin"
2150 |   },
2151 |   {
2152 |     "speakers": 6000,
2153 |     "name": "Chayahuita",
2154 |     "iso6393": "cbt",
2155 |     "udhr": "cbt",
2156 |     "script": "Latin"
2157 |   },
2158 |   {
2159 |     "speakers": 5933,
2160 |     "name": "Guarayu",
2161 |     "iso6393": "gyr",
2162 |     "udhr": "gyr",
2163 |     "script": "Latin"
2164 |   },
2165 |   {
2166 |     "speakers": 5800,
2167 |     "name": "Veps",
2168 |     "iso6393": "vep",
2169 |     "udhr": "vep",
2170 |     "script": "Latin"
2171 |   },
2172 |   {
2173 |     "speakers": 5000,
2174 |     "name": "Pichis Ashéninka",
2175 |     "iso6393": "cpu",
2176 |     "udhr": "cpu",
2177 |     "script": "Latin"
2178 |   },
2179 |   {
2180 |     "speakers": 4500,
2181 |     "name": "Achuar-Shiwiar",
2182 |     "iso6393": "acu",
2183 |     "udhr": "acu",
2184 |     "script": "Latin"
2185 |   },
2186 |   {
2187 |     "speakers": 4000,
2188 |     "name": "Nomatsiguenga",
2189 |     "iso6393": "not",
2190 |     "udhr": "not",
2191 |     "script": "Latin"
2192 |   },
2193 |   {
2194 |     "speakers": 4000,
2195 |     "name": "Northern Sami",
2196 |     "iso6393": "sme",
2197 |     "udhr": "sme",
2198 |     "script": "Latin"
2199 |   },
2200 |   {
2201 |     "speakers": 4000,
2202 |     "name": "Yagua",
2203 |     "iso6393": "yad",
2204 |     "udhr": "yad",
2205 |     "script": "Latin"
2206 |   },
2207 |   {
2208 |     "speakers": 3500,
2209 |     "name": "Urarina",
2210 |     "iso6393": "ura",
2211 |     "udhr": "ura",
2212 |     "script": "Latin"
2213 |   },
2214 |   {
2215 |     "speakers": 3000,
2216 |     "name": "Candoshi-Shapra",
2217 |     "iso6393": "cbu",
2218 |     "udhr": "cbu",
2219 |     "script": "Latin"
2220 |   },
2221 |   {
2222 |     "speakers": 2900,
2223 |     "name": "Murui Huitoto",
2224 |     "iso6393": "huu",
2225 |     "udhr": "huu",
2226 |     "script": "Latin"
2227 |   },
2228 |   {
2229 |     "speakers": 2300,
2230 |     "name": "Colorado",
2231 |     "iso6393": "cof",
2232 |     "udhr": "cof",
2233 |     "script": "Latin"
2234 |   },
2235 |   {
2236 |     "speakers": 2000,
2237 |     "name": "Bora",
2238 |     "iso6393": "boa",
2239 |     "udhr": "boa",
2240 |     "script": "Latin"
2241 |   },
2242 |   {
2243 |     "speakers": 2000,
2244 |     "name": "Güilá Zapotec",
2245 |     "iso6393": "ztu",
2246 |     "udhr": "ztu",
2247 |     "script": "Latin"
2248 |   },
2249 |   {
2250 |     "speakers": 1500,
2251 |     "name": "Cashibo-Cacataibo",
2252 |     "iso6393": "cbr",
2253 |     "udhr": "cbr",
2254 |     "script": "Latin"
2255 |   },
2256 |   {
2257 |     "speakers": 1280,
2258 |     "name": "Matsés",
2259 |     "iso6393": "mcf",
2260 |     "udhr": "mcf",
2261 |     "script": "Latin"
2262 |   },
2263 |   {
2264 |     "speakers": 1200,
2265 |     "name": "Bislama",
2266 |     "iso6393": "bis",
2267 |     "udhr": "bis",
2268 |     "script": "Latin"
2269 |   },
2270 |   {
2271 |     "speakers": 1100,
2272 |     "name": "Northern Yukaghir",
2273 |     "iso6393": "ykg",
2274 |     "udhr": "ykg",
2275 |     "script": "Cyrillic"
2276 |   },
2277 |   {
2278 |     "speakers": 1000,
2279 |     "name": "Chiltepec Chinantec",
2280 |     "iso6393": "csa",
2281 |     "udhr": "csa",
2282 |     "script": "Latin"
2283 |   },
2284 |   {
2285 |     "speakers": 1000,
2286 |     "name": "Chickasaw",
2287 |     "iso6393": "cic",
2288 |     "udhr": "cic",
2289 |     "script": "Latin"
2290 |   },
2291 |   {
2292 |     "speakers": 950,
2293 |     "name": "Sharanahua",
2294 |     "iso6393": "mcd",
2295 |     "udhr": "mcd",
2296 |     "script": "Latin"
2297 |   },
2298 |   {
2299 |     "speakers": 720,
2300 |     "name": "Amahuaca",
2301 |     "iso6393": "amc",
2302 |     "udhr": "amc",
2303 |     "script": "Latin"
2304 |   },
2305 |   {
2306 |     "speakers": 500,
2307 |     "name": "Amarakaeri",
2308 |     "iso6393": "amr",
2309 |     "udhr": "amr",
2310 |     "script": "Latin"
2311 |   },
2312 |   {
2313 |     "speakers": 300,
2314 |     "name": "Caquinte",
2315 |     "iso6393": "cot",
2316 |     "udhr": "cot",
2317 |     "script": "Latin"
2318 |   },
2319 |   {
2320 |     "speakers": 200,
2321 |     "name": "Aja (Benin)",
2322 |     "iso6393": "ajg",
2323 |     "udhr": "ajg",
2324 |     "script": "Latin"
2325 |   },
2326 |   {
2327 |     "speakers": 150,
2328 |     "name": "Arabela",
2329 |     "iso6393": "arl",
2330 |     "udhr": "arl",
2331 |     "script": "Latin"
2332 |   },
2333 |   {
2334 |     "speakers": 20,
2335 |     "name": "Pipil",
2336 |     "iso6393": "ppl",
2337 |     "udhr": "ppl",
2338 |     "script": "Latin"
2339 |   },
2340 |   {
2341 |     "speakers": 0,
2342 |     "name": "Mozarabic",
2343 |     "iso6393": "mxi",
2344 |     "udhr": "mxi",
2345 |     "script": "Latin"
2346 |   }
2347 | ]


--------------------------------------------------------------------------------