├── .gitignore ├── tests ├── testthat.R └── testthat │ ├── test-udhr.R │ ├── test-scripts.R │ ├── test-utils.R │ ├── test-distances.R │ ├── test-franc.R │ ├── test-trigrams.R │ └── support.json ├── Makefile ├── LICENSE ├── .Rbuildignore ├── NAMESPACE ├── R ├── normalize.R ├── distances.R ├── ngrams.R ├── script.R ├── trigrams.R ├── speakers.R ├── expressions.R └── franc.R ├── NEWS.md ├── franc.Rproj ├── DESCRIPTION ├── man ├── speakers.Rd ├── franc.Rd └── franc_all.Rd ├── .github └── workflows │ ├── test-coverage.yaml │ └── check-pak.yaml ├── README.Rmd ├── README.md └── inst └── speakers.json /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | .Rproj.user 3 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(franc) 3 | 4 | if (Sys.getenv("NOT_CRAN") == "true") test_check("franc") 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: README.md 3 | 4 | README.md: README.Rmd 5 | Rscript -e "library(knitr); knit('$<', output = '$@', quiet = TRUE)" 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2004-2019 2 | COPYRIGHT HOLDER: Mango Solutions, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi 3 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^Makefile$ 4 | ^README.Rmd$ 5 | ^README.html$ 6 | ^.travis.yml$ 7 | ^appveyor.yml$ 8 | ^\.github$ 9 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(franc) 4 | export(franc_all) 5 | export(speakers) 6 | importFrom(jsonlite,fromJSON) 7 | -------------------------------------------------------------------------------- /R/normalize.R: -------------------------------------------------------------------------------- 1 | 2 | normalize <- function(text, distances) { 3 | min <- min(distances) 4 | max <- nchar(text) * MAX_DIFFERENCE - min 5 | 1 - ((distances - min) / max) 6 | } 7 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | # development version 3 | 4 | # 1.1.4 5 | 6 | No user visible changes. 7 | 8 | # 1.1.3 9 | 10 | * Script detection is now better. Previous versions ignored some characters, 11 | because of some bad regular expressions. 12 | 13 | # 1.1.2 14 | 15 | No user visible changes. 16 | 17 | # 1.1.1 18 | 19 | First public release. 20 | -------------------------------------------------------------------------------- /franc.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /tests/testthat/test-udhr.R: -------------------------------------------------------------------------------- 1 | 2 | context("UDHR") 3 | 4 | test_that("All supported languages are recognized", { 5 | 6 | support <- jsonlite::fromJSON("support.json", )$iso6393 7 | fixtures <- jsonlite::fromJSON("fixtures.json") 8 | 9 | for (i in seq_along(fixtures)) { 10 | if (nchar(fixtures[[i]]) != 0) { 11 | lang <- franc(fixtures[[i]], min_speakers = 0) 12 | expect_equal(lang, support[i], info = i) 13 | } 14 | } 15 | }) 16 | -------------------------------------------------------------------------------- /R/distances.R: -------------------------------------------------------------------------------- 1 | 2 | get_distance <- function(trigrams, model) { 3 | 4 | diff <- abs(trigrams - model[names(trigrams)]) 5 | diff[is.na(diff)] <- MAX_DIFFERENCE 6 | sum(diff) 7 | } 8 | 9 | get_distances <- function(trigrams, languages, whitelist = NULL, 10 | blacklist = NULL) { 11 | 12 | languages <- filter_languages(languages, whitelist, blacklist) 13 | sort(vapply(languages, get_distance, 1, trigrams = trigrams)) 14 | } 15 | -------------------------------------------------------------------------------- /R/ngrams.R: -------------------------------------------------------------------------------- 1 | 2 | ngrams <- function(text, n) { 3 | 4 | stopifnot( 5 | is.numeric(n), 6 | length(n) == 1, 7 | !is.na(n), 8 | n >= 1, 9 | is.finite(n) 10 | ) 11 | 12 | if (is.null(text) || length(text) == 0) return(list()) 13 | 14 | text <- as.character(text) 15 | 16 | lapply(text, function(x) { 17 | if (nchar(x) < n) return(character()) 18 | num <- nchar(x) - n + 1 19 | substring(x, 1:num, 1:num + n - 1) 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test-scripts.R: -------------------------------------------------------------------------------- 1 | 2 | context("Scripts") 3 | 4 | test_that("script detection works", { 5 | 6 | expect_equal(get_top_script(""), NULL) 7 | expect_equal(get_top_script("this is in English"), "Latin") 8 | 9 | ben <- paste0( 10 | "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ", 11 | "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ", 12 | "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F" 13 | ) 14 | expect_equal(get_top_script(ben), "ben") 15 | }) 16 | -------------------------------------------------------------------------------- /R/script.R: -------------------------------------------------------------------------------- 1 | 2 | match_length <- function(pattern, text) { 3 | perl <- .Platform$OS.type == "windows" 4 | mat <- gregexpr(pattern, text, perl = perl)[[1]] 5 | if (mat[1] == -1) 0 else sum(attr(mat, "match.length")) 6 | } 7 | 8 | #' @include expressions.R 9 | 10 | get_top_script <- function(text) { 11 | num_letters <- vapply(expressions, match_length, 1, text = text) 12 | if (any(num_letters > 0)) { 13 | names(which.max(num_letters)) 14 | } else { 15 | NULL 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | 2 | context("Utility functions") 3 | 4 | test_that("match_length works", { 5 | 6 | expect_equal(match_length("[a-z]", "abcz"), 4) 7 | expect_equal(match_length("[a-z]", "x"), 1) 8 | expect_equal(match_length("[a-z]", ""), 0) 9 | expect_equal(match_length("[a-z]", "123"), 0) 10 | 11 | ben <- paste0( 12 | "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ", 13 | "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ", 14 | "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F" 15 | ) 16 | expect_equal(match_length(expressions$ben, ben), 23) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-distances.R: -------------------------------------------------------------------------------- 1 | 2 | context("Model distances") 3 | 4 | test_that("get_distance works", { 5 | 6 | tri_eng <- clean_trigrams_table("This is apparently in English") 7 | eng <- get_distance(tri_eng, data[["Latin"]][["eng"]]) 8 | hun <- get_distance(tri_eng, data[["Latin"]][["hun"]]) 9 | deu <- get_distance(tri_eng, data[["Latin"]][["deu"]]) 10 | 11 | expect_true(eng < hun) 12 | expect_true(eng < deu) 13 | 14 | expect_equal(eng, 5453) 15 | expect_equal(hun, 7791) 16 | expect_equal(deu, 7293) 17 | }) 18 | 19 | 20 | test_that("filter_langages works", { 21 | 22 | expect_equal(data$Latin, filter_languages(data$Latin)) 23 | expect_equal(data$Latin[c("eng", "deu")], 24 | filter_languages(data$Latin, whitelist = c("eng", "deu"))) 25 | expect_equal(data$Latin[setdiff(names(data$Latin), c("eng", "deu"))], 26 | filter_languages(data$Latin, blacklist = c("eng", "deu"))) 27 | }) 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: franc 2 | Title: Detect the Language of Text 3 | Version: 1.1.4.9000 4 | Author: Gabor Csardi, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout, 5 | and Kent S. Johnson 6 | Maintainer: Gábor Csárdi 7 | Description: With no external dependencies and 8 | support for 335 languages; all languages spoken by 9 | more than one million speakers. 'Franc' is a port 10 | of the 'JavaScript' project of the same name, 11 | see . 12 | License: MIT + file LICENSE 13 | URL: https://github.com/gaborcsardi/franc#readme 14 | BugReports: https://github.com/gaborcsardi/franc/issues 15 | Suggests: 16 | testthat 17 | RoxygenNote: 6.1.1 18 | Encoding: UTF-8 19 | Imports: 20 | jsonlite 21 | Collate: 22 | 'distances.R' 23 | 'expressions.R' 24 | 'franc.R' 25 | 'ngrams.R' 26 | 'normalize.R' 27 | 'script.R' 28 | 'speakers.R' 29 | 'trigrams.R' 30 | -------------------------------------------------------------------------------- /man/speakers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/speakers.R 3 | \docType{data} 4 | \name{speakers} 5 | \alias{speakers} 6 | \title{Number of speakers for 370 languages} 7 | \format{A data frame with columns: 8 | \describe{ 9 | \item{language}{Three letter language code.} 10 | \item{speakers}{Number of speakers.} 11 | \item{name}{Full name of language.} 12 | \item{iso6391}{ISO 639-1 codes. See more at 13 | \code{https://en.wikipedia.org/wiki/ISO_639}.} 14 | \item{iso6392}{ISO 639-2T codes. See more at 15 | \code{https://en.wikipedia.org/wiki/ISO_639}.} 16 | }} 17 | \usage{ 18 | speakers 19 | } 20 | \description{ 21 | This is a superset of all languages detected by franc. Numbers were 22 | collected by Titus Wormer. To quote him: \emph{Painstakingly crawled by 23 | hand from OHCHR, the numbers are (in some cases, very) rough estimates 24 | or out-of-date.}. 25 | } 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /R/trigrams.R: -------------------------------------------------------------------------------- 1 | 2 | ## This is mostly after 3 | ## https://github.com/wooorm/trigram-utils/blob/master/index.js 4 | 5 | trigrams <- function(text) ngrams(text, 3) 6 | 7 | expression_symbols <- "[-!\"#$%&'()*+,\\./0123456789:;<=>?@]" 8 | 9 | trim <- function(x) sub("\\s$", "", sub("^\\s*", "", x)) 10 | 11 | clean <- function(value) { 12 | value <- as.character(value) 13 | value <- gsub(pattern = expression_symbols, replacement = " ", value) 14 | value <- gsub(pattern = "\\s+", replacement = " ", value) 15 | value <- trim(value) 16 | tolower(value) 17 | } 18 | 19 | clean_trigrams <- function(value) { 20 | if (length(value) == 0) return(list()) 21 | trigrams(paste0(' ', clean(value), ' ')) 22 | } 23 | 24 | clean_trigrams_table <- function(value) { 25 | stopifnot(is.character(value), length(value) == 1) 26 | tab <- table(clean_trigrams(value)) 27 | # This is the behavior of table before 28 | # https://github.com/wch/r-source/commit/09ae38a25149d02a21b19ef33c3d09ef92f72351 29 | # Not very important for us, but we had a test case for it. 30 | names(dimnames(tab)) <- "" 31 | tab 32 | } 33 | -------------------------------------------------------------------------------- /tests/testthat/test-franc.R: -------------------------------------------------------------------------------- 1 | 2 | context("Language detection") 3 | 4 | test_that("top language is detected correctly", { 5 | 6 | expect_equal(franc("Alle menslike wesens word vry"), "afr") 7 | expect_equal(franc(""), "und") 8 | expect_equal(franc("the"), "und") 9 | expect_equal(franc("the", min_length = 3), "sco") 10 | }) 11 | 12 | test_that("no matching script", { 13 | expect_equal(franc(strrep("\U0001f4e6", 30)), "und") 14 | }) 15 | 16 | test_that("language scores are calculated correctly", { 17 | 18 | scores <- franc_all('O Brasil caiu 26 posi\u00c7\u00f5es') 19 | 20 | expect_equal( 21 | scores[1:12,], 22 | data.frame( 23 | stringsAsFactors = FALSE, 24 | language = c("por", "src", "glg", "snn", "bos", "hrv", "lav", "cat", 25 | "spa", "bam", "sco", "rmy"), 26 | score = c(1, 0.880093676814988, 0.870257611241218, 0.863700234192037, 27 | 0.816861826697892, 0.810304449648712, 0.809836065573771, 28 | 0.80655737704918, 0.799531615925059, 0.799531615925059, 29 | 0.779859484777518, 0.753629976580796) 30 | ) 31 | ) 32 | 33 | }) 34 | 35 | test_that("whitelist", { 36 | txt <- paste( 37 | "Somogy és Baranya megyét egy földút is összeköti, ahová a", 38 | "navigációs szoftverek néha bekalauzolják a gyanútlan autóst,", 39 | "aztán a helyiek húzzák ki őket a sárból. -- telex.hu" 40 | ) 41 | expect_equal(franc(txt, whitelist = c("hun", "eng", "esp")), "hun") 42 | }) 43 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | - x 7 | pull_request: 8 | branches: 9 | - main 10 | - master 11 | - x 12 | 13 | name: test-coverage 14 | 15 | jobs: 16 | test-coverage: 17 | runs-on: macOS-latest 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | steps: 21 | - uses: actions/checkout@v2 22 | 23 | - uses: r-lib/actions/setup-r@v1 24 | 25 | - uses: r-lib/actions/setup-pandoc@v1 26 | 27 | - name: Install pak and query dependencies 28 | run: | 29 | install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/") 30 | saveRDS(pak::pkg_deps_tree("local::.", dependencies = TRUE), ".github/r-depends.rds") 31 | shell: Rscript {0} 32 | 33 | - name: Cache R packages 34 | uses: actions/cache@v2 35 | with: 36 | path: ${{ env.R_LIBS_USER }} 37 | key: ${{ runner.os }}-${{ steps.install-r.outputs.installed-r-version }}-2-${{ hashFiles('.github/r-depends.rds') }} 38 | restore-keys: ${{ runner.os }}-${{ steps.install-r.outputs.installed-r-version }}-2- 39 | 40 | - name: Install system dependencies 41 | if: runner.os == 'Linux' 42 | run: Rscript -e 'pak::local_system_requirements(execute = TRUE)' 43 | 44 | - name: Install dependencies 45 | run: | 46 | pak::local_install_dev_deps(upgrade = TRUE) 47 | pak::pkg_install("covr") 48 | shell: Rscript {0} 49 | 50 | - name: Test coverage 51 | run: covr::codecov() 52 | shell: Rscript {0} 53 | -------------------------------------------------------------------------------- /R/speakers.R: -------------------------------------------------------------------------------- 1 | 2 | #' Number of speakers for 370 languages 3 | #' 4 | #' This is a superset of all languages detected by franc. Numbers were 5 | #' collected by Titus Wormer. To quote him: \emph{Painstakingly crawled by 6 | #' hand from OHCHR, the numbers are (in some cases, very) rough estimates 7 | #' or out-of-date.}. 8 | #' 9 | #' @format 10 | #' A data frame with columns: 11 | #' \describe{ 12 | #' \item{language}{Three letter language code.} 13 | #' \item{speakers}{Number of speakers.} 14 | #' \item{name}{Full name of language.} 15 | #' \item{iso6391}{ISO 639-1 codes. See more at 16 | #' \code{https://en.wikipedia.org/wiki/ISO_639}.} 17 | #' \item{iso6392}{ISO 639-2T codes. See more at 18 | #' \code{https://en.wikipedia.org/wiki/ISO_639}.} 19 | #' } 20 | #' 21 | #' @docType data 22 | #' @importFrom jsonlite fromJSON 23 | #' @export 24 | 25 | speakers <- jsonlite::fromJSON( 26 | system.file("speakers.json", package = packageName()), 27 | simplifyVector = FALSE) 28 | 29 | for (i in seq_along(speakers)) { 30 | if (is.null(speakers[[i]][[2]])) speakers[[i]][[2]] <- NA_character_ 31 | if (is.null(speakers[[i]][[3]])) speakers[[i]][[3]] <- NA_character_ 32 | } 33 | 34 | speakers <- data.frame( 35 | stringsAsFactors = FALSE, 36 | row.names = NULL, 37 | language = names(speakers), 38 | speakers = as.integer(vapply(speakers, "[[", 1, "speakers")), 39 | name = vapply(speakers, "[[", "", "name"), 40 | iso6391 = vapply(speakers, "[[", "", "iso6391"), 41 | iso6392 = vapply(speakers, "[[", "", "iso6392") 42 | ) 43 | 44 | speakers <- speakers[ order(speakers$speakers, decreasing = TRUE), ] 45 | 46 | row.names(speakers) <- seq_len(nrow(speakers)) 47 | -------------------------------------------------------------------------------- /man/franc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/franc.R 3 | \encoding{utf8} 4 | \name{franc} 5 | \alias{franc} 6 | \title{Detect the language of a string} 7 | \usage{ 8 | franc(text, min_speakers = 1e+06, whitelist = NULL, blacklist = NULL, 9 | min_length = 10, max_length = 2048) 10 | } 11 | \arguments{ 12 | \item{text}{A string constant. Should be at least \code{min_length} 13 | characters long, this is 10 characters by default. 14 | Only the first \code{max_length} characters are used (2048 by 15 | default), to make the detection reasonably fast.} 16 | 17 | \item{min_speakers}{Languages with at least this many speakers are 18 | checked. By default this is one million. Set it to zero to 19 | include all languages known by franc. See also \code{\link{speakers}}.} 20 | 21 | \item{whitelist}{List of three letter language codes to check against.} 22 | 23 | \item{blacklist}{List of three letter language codes not to check 24 | againts.} 25 | 26 | \item{min_length}{Minimum number of characters required in the text.} 27 | 28 | \item{max_length}{Maximum number of characters used from the text. 29 | By default only the first 2048 characters are used.} 30 | } 31 | \value{ 32 | A three letter ISO-639-3 language code, the detected 33 | language of the text. \code{"und"} is returned for too short input. 34 | } 35 | \description{ 36 | Detect the language of a string 37 | } 38 | \examples{ 39 | ## afr 40 | franc("Alle menslike wesens word vry") 41 | 42 | ## nno 43 | franc("Alle mennesker er født frie og") 44 | 45 | ## Too short, und 46 | franc("the") 47 | 48 | ## You can change what’s too short (default: 10), sco 49 | franc("the", min_length = 3) 50 | } 51 | \seealso{ 52 | \code{\link{franc_all}} for scores against many languages, 53 | \code{\link{speakers}}. 54 | } 55 | -------------------------------------------------------------------------------- /man/franc_all.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/franc.R 3 | \encoding{utf8} 4 | \name{franc_all} 5 | \alias{franc_all} 6 | \title{List of probably languages for a text} 7 | \usage{ 8 | franc_all(text, min_speakers = 1e+06, whitelist = NULL, 9 | blacklist = NULL, min_length = 10, max_length = 2048) 10 | } 11 | \arguments{ 12 | \item{text}{A string constant. Should be at least \code{min_length} 13 | characters long, this is 10 chracters by default. 14 | Only the first \code{max_length} characters are used (2048 by 15 | default), to make the detection reasonably fast.} 16 | 17 | \item{min_speakers}{Languages with at least this many speakers are 18 | checked. By default this is one million. Set it to zero to 19 | include all languages known by franc. See also \code{\link{speakers}}.} 20 | 21 | \item{whitelist}{List of three letter language codes to check against.} 22 | 23 | \item{blacklist}{List of three letter language codes not to check 24 | againts.} 25 | 26 | \item{min_length}{Minimum number of characters required in the text.} 27 | 28 | \item{max_length}{Maximum number of characters used from the text. 29 | By default only the first 2048 characters are used.} 30 | } 31 | \value{ 32 | A data frame with columns \code{language} and \code{score}. 33 | The \code{language} column contains the three letter ISO-639-3 34 | language codes. The \code{score} column contains the scores. 35 | } 36 | \description{ 37 | Returns the scores for all languages that use the same script 38 | as the input text, in decreasing order of probability. The score 39 | is calculated from the distances of the trigram distributions 40 | in the input text and in the language model. The closer the languages, 41 | the higher the score. Scores are scaled, so that the closest language 42 | will have a score of 1. 43 | } 44 | \examples{ 45 | head(franc_all("O Brasil caiu 26 posições")) 46 | 47 | ## Provide a whitelist: 48 | franc_all("O Brasil caiu 26 posições", 49 | whitelist = c("por", "src", "glg", "spa")) 50 | 51 | ## Provide a blacklist: 52 | head(franc_all("O Brasil caiu 26 posições", 53 | blacklist = c("src", "glg", "lav"))) 54 | } 55 | \seealso{ 56 | \code{\link{franc}} if you only want the top result, 57 | \code{\link{speakers}}. 58 | } 59 | -------------------------------------------------------------------------------- /.github/workflows/check-pak.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master, x] 10 | pull_request: 11 | branches: [main, master, x] 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: macOS-latest, r: 'release'} 26 | 27 | - {os: windows-latest, r: 'release'} 28 | # Use 3.6 to trigger usage of RTools35 29 | - {os: windows-latest, r: '3.6'} 30 | 31 | # Use older ubuntu to maximise backward compatibility 32 | - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} 33 | - {os: ubuntu-18.04, r: 'release'} 34 | - {os: ubuntu-18.04, r: 'oldrel-1'} 35 | - {os: ubuntu-18.04, r: 'oldrel-2'} 36 | - {os: ubuntu-18.04, r: 'oldrel-3'} 37 | - {os: ubuntu-18.04, r: 'oldrel-4'} 38 | 39 | env: 40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 41 | R_KEEP_PKG_SOURCE: yes 42 | 43 | steps: 44 | - uses: actions/checkout@v2 45 | 46 | - uses: r-lib/actions/setup-pandoc@v1 47 | 48 | - uses: r-lib/actions/setup-r@v1 49 | with: 50 | r-version: ${{ matrix.config.r }} 51 | http-user-agent: ${{ matrix.config.http-user-agent }} 52 | use-public-rspm: true 53 | 54 | - uses: r-lib/actions/setup-r-dependencies@v1 55 | with: 56 | extra-packages: rcmdcheck 57 | 58 | - uses: r-lib/actions/check-r-package@v1 59 | 60 | - name: Show testthat output 61 | if: always() 62 | run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true 63 | shell: bash 64 | 65 | - name: Upload check results 66 | if: failure() 67 | uses: actions/upload-artifact@main 68 | with: 69 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 70 | path: check 71 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r, setup, echo = FALSE, message = FALSE} 6 | knitr::opts_chunk$set( 7 | comment = "#>", 8 | tidy = FALSE, 9 | error = FALSE) 10 | ``` 11 | 12 | # franc 13 | 14 | > Detect the Language of Text 15 | 16 | 17 | 18 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org) 19 | [![R build status](https://github.com/gaborcsardi/franc/workflows/R-CMD-check/badge.svg)](https://github.com/gaborcsardi/franc/actions) 20 | [![](https://www.r-pkg.org/badges/version/franc)](https://www.r-pkg.org/pkg/franc) 21 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/franc)](https://www.r-pkg.org/pkg/franc) 22 | [![Coverage Status](https://img.shields.io/codecov/c/github/gaborcsardi/franc/master.svg)](https://codecov.io/github/gaborcsardi/franc?branch=master) 23 | 24 | 25 | 26 | Franc has no external dependencies and supports 310 languages; all 27 | languages spoken by more than one million speakers. Franc is a port 28 | of the JavaScript project of the same name, see 29 | https://github.com/wooorm/franc. 30 | 31 | ## Installation 32 | 33 | ```{r eval = FALSE} 34 | install.packages("franc") 35 | ``` 36 | 37 | ## Usage 38 | 39 | ```{r} 40 | library(franc) 41 | ``` 42 | 43 | Simply supply the text, and franc detects its language: 44 | 45 | ```{r} 46 | franc("Alle menslike wesens word vry") 47 | franc("এটি একটি ভাষা একক IBM স্ক্রিপ্ট") 48 | franc("Alle mennesker er født frie og") 49 | head(franc_all("O Brasil caiu 26 posições")) 50 | ``` 51 | 52 | `und` is the `undefined` language, this is returned if the input is 53 | too short (shorter than 10 characters by default). 54 | 55 | ```{r} 56 | franc("the") 57 | franc("the", min_length = 3) 58 | ``` 59 | 60 | You can provide a whitelist or a blacklist: 61 | 62 | ```{r} 63 | franc_all("O Brasil caiu 26 posições", 64 | whitelist = c("por", "src", "glg", "spa")) 65 | head(franc_all("O Brasil caiu 26 posições", 66 | blacklist = c("src", "glg", "lav"))) 67 | ``` 68 | 69 | ## Supported languages 70 | 71 | The R version of franc supports 310 languages. By default only the 72 | languages with more than 1 million speakers are used, this is 175 73 | languages. The `min_speakers` argument can relax this, and allows 74 | using more languages: 75 | 76 | ```{r} 77 | head(franc_all("O Brasil caiu 26 posições")) 78 | head(franc_all("O Brasil caiu 26 posições", min_speakers = 0)) 79 | ``` 80 | 81 | ## License 82 | 83 | MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer, 84 | Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi 85 | -------------------------------------------------------------------------------- /tests/testthat/test-trigrams.R: -------------------------------------------------------------------------------- 1 | 2 | context("Trigrams") 3 | 4 | 5 | test_that("trigrams works", { 6 | 7 | expect_equal(trigrams("abcdef")[[1]], c("abc", "bcd", "cde", "def")) 8 | expect_equal(trigrams("abc")[[1]], "abc") 9 | expect_equal(trigrams("ab")[[1]], character(0)) 10 | expect_equal(trigrams(c("ab", "abc", "abcd")), 11 | list(character(0), "abc", c("abc", "bcd"))) 12 | expect_equal(trigrams(character(0)), list()) 13 | }) 14 | 15 | 16 | test_that("clean_trigrams works", { 17 | 18 | expect_equal(clean_trigrams("abcdef")[[1]], 19 | c(" ab", "abc", "bcd", "cde", "def", "ef ")) 20 | expect_equal(clean_trigrams("abc")[[1]], c(" ab", "abc", "bc ")) 21 | expect_equal(clean_trigrams("ab")[[1]], c(" ab", "ab ")) 22 | expect_equal(clean_trigrams("a")[[1]], c(" a ")) 23 | expect_equal(clean_trigrams(c("abcd", "xyzz")), 24 | list(c(" ab", "abc", "bcd", "cd "), 25 | c(" xy", "xyz", "yzz", "zz "))) 26 | expect_equal(clean_trigrams(character(0)), list()) 27 | }) 28 | 29 | 30 | test_that("clean_trigrams removes non-letters", { 31 | 32 | expect_equal(clean_trigrams("a2345!+b<=>?c")[[1]], 33 | c(" a ", "a b", " b ", "b c", " c ")) 34 | expect_equal(clean_trigrams("a-!\"#$%&'()*+,\\./0123456789:;<=>?@")[[1]], 35 | c(" a ")) 36 | }) 37 | 38 | 39 | test_that("clean_trigrams is case insensitive", { 40 | 41 | expect_equal(clean_trigrams("ABCDEF"), clean_trigrams("abcdef")) 42 | expect_equal(clean_trigrams("ABCDEF"), clean_trigrams("abCdEf")) 43 | }) 44 | 45 | 46 | test_that("clean_trigrams keeps UniCode letters", { 47 | 48 | ben <- paste0( 49 | "\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ", 50 | "\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ", 51 | "\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F" 52 | ) 53 | expect_equal( 54 | clean_trigrams(ben)[[1]], 55 | c(" \u098F\u099F", "\u098F\u099F\u09BF", "\u099F\u09BF ", 56 | "\u09BF \u098F", " \u098F\u0995", "\u098F\u0995\u099F", 57 | "\u0995\u099F\u09BF", "\u099F\u09BF ", "\u09BF \u09AD", 58 | " \u09AD\u09BE", "\u09AD\u09BE\u09B7", 59 | "\u09BE\u09B7\u09BE", "\u09B7\u09BE ", 60 | "\u09BE \u098F", " \u098F\u0995", "\u098F\u0995\u0995", 61 | "\u0995\u0995 ", "\u0995 i", " ib", "ibm", "bm ", 62 | "m \u09B8", " \u09B8\u09CD", "\u09B8\u09CD\u0995", 63 | "\u09CD\u0995\u09CD", "\u0995\u09CD\u09B0", 64 | "\u09CD\u09B0\u09BF", "\u09B0\u09BF\u09AA", 65 | "\u09BF\u09AA\u09CD", "\u09AA\u09CD\u099F", 66 | "\u09CD\u099F ") 67 | ) 68 | }) 69 | 70 | test_that("clean_trigrams removed excesive whitespace", { 71 | 72 | expect_equal(clean_trigrams(" a ")[[1]], c(" a ")) 73 | expect_equal(clean_trigrams("a a")[[1]], c(" a ", "a a", " a ")) 74 | }) 75 | 76 | 77 | test_that("clean_trigrams_table works", { 78 | 79 | tab1 <- structure( 80 | c(1L, 3L, 1L, 2L, 2L), 81 | dim = 5L, 82 | dimnames = structure( 83 | list(c(" ab", "abc", "bc ", "bca", "cab")), 84 | names = "" 85 | ), 86 | class = "table" 87 | ) 88 | 89 | expect_equal(clean_trigrams_table(c("abcabcabc")), tab1) 90 | 91 | tab2 <- structure( 92 | integer(0), 93 | dim = 0L, 94 | dimnames = structure(list(NULL), names = ""), 95 | class = "table" 96 | ) 97 | 98 | expect_equal(clean_trigrams_table(""), tab2) 99 | }) 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # franc 3 | 4 | > Detect the Language of Text 5 | 6 | 7 | 8 | [![Project Status: Active - The project has reached a stable, usable 9 | state and is being actively 10 | developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org) 11 | [![R build 12 | status](https://github.com/gaborcsardi/franc/workflows/R-CMD-check/badge.svg)](https://github.com/gaborcsardi/franc/actions) 13 | [![](https://www.r-pkg.org/badges/version/franc)](https://www.r-pkg.org/pkg/franc) 14 | [![CRAN RStudio mirror 15 | downloads](https://cranlogs.r-pkg.org/badges/franc)](https://www.r-pkg.org/pkg/franc) 16 | [![Coverage 17 | Status](https://img.shields.io/codecov/c/github/gaborcsardi/franc/master.svg)](https://codecov.io/github/gaborcsardi/franc?branch=master) 18 | 19 | 20 | 21 | Franc has no external dependencies and supports 310 languages; all 22 | languages spoken by more than one million speakers. Franc is a port of 23 | the JavaScript project of the same name, see 24 | . 25 | 26 | ## Installation 27 | 28 | ``` r 29 | install.packages("franc") 30 | ``` 31 | 32 | ## Usage 33 | 34 | ``` r 35 | library(franc) 36 | ``` 37 | 38 | Simply supply the text, and franc detects its language: 39 | 40 | ``` r 41 | franc("Alle menslike wesens word vry") 42 | ``` 43 | 44 | #> [1] "afr" 45 | 46 | ``` r 47 | franc("এটি একটি ভাষা একক IBM স্ক্রিপ্ট") 48 | ``` 49 | 50 | #> [1] "ben" 51 | 52 | ``` r 53 | franc("Alle mennesker er født frie og") 54 | ``` 55 | 56 | #> [1] "nno" 57 | 58 | ``` r 59 | head(franc_all("O Brasil caiu 26 posições")) 60 | ``` 61 | 62 | #> language score 63 | #> 1 por 1.0000000 64 | #> 2 src 0.8800937 65 | #> 3 glg 0.8702576 66 | #> 4 snn 0.8637002 67 | #> 5 bos 0.8168618 68 | #> 6 hrv 0.8103044 69 | 70 | `und` is the `undefined` language, this is returned if the input is too 71 | short (shorter than 10 characters by default). 72 | 73 | ``` r 74 | franc("the") 75 | ``` 76 | 77 | #> [1] "und" 78 | 79 | ``` r 80 | franc("the", min_length = 3) 81 | ``` 82 | 83 | #> [1] "sco" 84 | 85 | You can provide a whitelist or a blacklist: 86 | 87 | ``` r 88 | franc_all("O Brasil caiu 26 posições", 89 | whitelist = c("por", "src", "glg", "spa")) 90 | ``` 91 | 92 | #> language score 93 | #> 1 por 1.0000000 94 | #> 2 src 0.8800937 95 | #> 3 glg 0.8702576 96 | #> 4 spa 0.7995316 97 | 98 | ``` r 99 | head(franc_all("O Brasil caiu 26 posições", 100 | blacklist = c("src", "glg", "lav"))) 101 | ``` 102 | 103 | #> language score 104 | #> 1 por 1.0000000 105 | #> 2 snn 0.8637002 106 | #> 3 bos 0.8168618 107 | #> 4 hrv 0.8103044 108 | #> 5 cat 0.8065574 109 | #> 6 spa 0.7995316 110 | 111 | ## Supported languages 112 | 113 | The R version of franc supports 310 languages. By default only the 114 | languages with more than 1 million speakers are used, this is 175 115 | languages. The `min_speakers` argument can relax this, and allows using 116 | more languages: 117 | 118 | ``` r 119 | head(franc_all("O Brasil caiu 26 posições")) 120 | ``` 121 | 122 | #> language score 123 | #> 1 por 1.0000000 124 | #> 2 src 0.8800937 125 | #> 3 glg 0.8702576 126 | #> 4 snn 0.8637002 127 | #> 5 bos 0.8168618 128 | #> 6 hrv 0.8103044 129 | 130 | ``` r 131 | head(franc_all("O Brasil caiu 26 posições", min_speakers = 0)) 132 | ``` 133 | 134 | #> language score 135 | #> 1 lad 1.0000000 136 | #> 2 por 0.9442724 137 | #> 3 pov 0.8788147 138 | #> 4 ast 0.8677576 139 | #> 5 roh 0.8363556 140 | #> 6 src 0.8310482 141 | 142 | ## License 143 | 144 | MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer, 145 | Maciej Ceglowski, Jacob R. Rideout, Kent S. Johnson, Gábor Csárdi 146 | -------------------------------------------------------------------------------- /R/expressions.R: -------------------------------------------------------------------------------- 1 | 2 | expressions <- list( 3 | "cmn" = paste0( 4 | "[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DB5\u4E00-\u9FCC\uF900-\uFA6D\uFA70-\uFAD9]|", 5 | "[\U00020000-\U0002A3FF]|[\U0002A800-\U0002B3FF]|", 6 | "[\U0002A400-\U0002A6D6]|[\U0002A700-\U0002A7FF]|", 7 | "[\U0002B400-\U0002B734]|[\U0002B740-\U0002B7FF]|", 8 | "[\U0002B800-\U0002B81D]|", 9 | "[\U0002F800-\U0002FA1D]" 10 | ), 11 | "Latin" = "[A-Za-z\u00AA\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA78E\uA790-\uA7AD\uA7B0\uA7B1\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB5F\uAB64\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A]", 12 | "Cyrillic" = "[\u0400-\u0484\u0487-\u052F\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69D\uA69F]", 13 | "Arabic" = paste0( 14 | "[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061E\u0620-\u063F\u0641-\u064A\u0656-\u065F\u066A-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u08A0-\u08B2\u08E4-\u08FF\uFB50-\uFBC1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFD\uFE70-\uFE74\uFE76-\uFEFC]|", 15 | "[\U00010E60-\U00010E7E]|", 16 | "[\U0001EE00-\U0001EE03]|[\U0001EE05-\U0001EE1F]|", 17 | "[\U0001EE21\U0001EE22\U0001EE24\U0001EE27\U0001EE29-\U0001EE32]|", 18 | "[\U0001EE34-\U0001EE37\U0001EE39\U0001EE3B\U0001EE42\U0001EE47\U0001EE49\U0001EE4B\U0001EE4D-\U0001EE4F]|", 19 | "[\U0001EE51-\U0001EE52\U0001EE54\U0001EE57\U0001EE59\U0001EE5b\U0001EE5d\U0001EE5f\U0001EE61\U0001EE62\U0001EE64]|", 20 | "[\U0001EE67-\U0001EE6a\U0001EE6c-\U0001EE72\U0001EE74-\U0001EE77\U0001EE79-\U0001EE7c]|", 21 | "[\U0001EE7e\U0001EE80-\U0001EE89\U0001EE8b-\U0001EE9b\U0001EEa1-\U0001EEa3\U0001EEa5-\U0001EEa9]|", 22 | "[\U0001EEab-\U0001EEbb\U0001EEf0-\U0001EEf1]" 23 | ), 24 | "ben" = "[\u0980-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09FB]", 25 | "Devanagari" = "[\u0900-\u0950\u0953-\u0963\u0966-\u097F\uA8E0-\uA8FB]", 26 | "jpn" = "[\u3041-\u3096\u309D-\u309F]|\uD82C\uDC01|\uD83C\uDE00|[\u30A1-\u30FA\u30FD-\u30FF\u31F0-\u31FF\u32D0-\u32FE\u3300-\u3357\uFF66-\uFF6F\uFF71-\uFF9D]|\uD82C\uDC00", 27 | "kor" = "[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]", 28 | "tel" = "[\u0C00-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58\u0C59\u0C60-\u0C63\u0C66-\u0C6F\u0C78-\u0C7F]", 29 | "tam" = "[\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BFA]", 30 | "guj" = "[\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AF1]", 31 | "mal" = "[\u0D01-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D-\u0D44\u0D46-\u0D48\u0D4A-\u0D4E\u0D57\u0D60-\u0D63\u0D66-\u0D75\u0D79-\u0D7F]", 32 | "kan" = "[\u0C81-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1\u0CF2]", 33 | "mya" = "[\u1000-\u109F\uA9E0-\uA9FE\uAA60-\uAA7F]", 34 | "ori" = "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3C-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B5C\u0B5D\u0B5F-\u0B63\u0B66-\u0B77]", 35 | "pan" = "[\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A75]", 36 | "Ethiopic" = "[\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u137C\u1380-\u1399\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E]", 37 | "tha" = "[\u0E01-\u0E3A\u0E40-\u0E5B]", 38 | "sin" = paste0( 39 | "[\u0D82\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2-\u0DF4]|", 40 | "[\U000111E1-\U000111F4]" 41 | ), 42 | "ell" = paste0( 43 | "[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|", 44 | "[\U00010140-\U0001018C\U000101A0]|", 45 | "[\U0001D200-\U0001D245]" 46 | ), 47 | "khm" = "[\u1780-\u17DD\u17E0-\u17E9\u17F0-\u17F9\u19E0-\u19FF]", 48 | "hye" = "[\u0531-\u0556\u0559-\u055F\u0561-\u0587\u058A\u058D-\u058F\uFB13-\uFB17]", 49 | "sat" = "[\u1C50-\u1C7F]", 50 | "Tibetan" = "[\u0F00-\u0F47\u0F49-\u0F6C\u0F71-\u0F97\u0F99-\u0FBC\u0FBE-\u0FCC\u0FCE-\u0FD4\u0FD9\u0FDA]", 51 | "Hebrew" = "[\u0591-\u05C7\u05D0-\u05EA\u05F0-\u05F4\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFB4F]", 52 | "kat" = "[\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u10FF\u2D00-\u2D25\u2D27\u2D2D]", 53 | "lao" = "[\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB9\u0EBB-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECD\u0ED0-\u0ED9\u0EDC-\u0EDF]", 54 | "iii" = "[\uA000-\uA48C\uA490-\uA4C6]", 55 | "aii" = "[\u0700-\u070D\u070F-\u074A\u074D-\u074F]", 56 | "div" = "[\u0780-\u07B1]", 57 | "vai" = "[\uA500-\uA62B]", 58 | "Canadian_Aboriginal" = "[\u1400-\u167F\u18B0-\u18F5]" 59 | ) 60 | -------------------------------------------------------------------------------- /R/franc.R: -------------------------------------------------------------------------------- 1 | 2 | ## This is mostly after 3 | ## https://github.com/wooorm/franc/blob/master/lib/franc.js 4 | ## 5 | ## Note that this happens at build time 6 | 7 | #' @importFrom jsonlite fromJSON 8 | 9 | data <- jsonlite::fromJSON( 10 | system.file("data.json", package = packageName()), 11 | simplifyVector = FALSE) 12 | 13 | for (script in names(data)) { 14 | for (language in names(data[[script]])) { 15 | model <- strsplit(data[[script]][[language]], '|', fixed = TRUE)[[1]] 16 | model <- structure(seq_along(model) - 1L, names = model) 17 | data[[script]][[language]] <- model 18 | } 19 | } 20 | 21 | MAX_DIFFERENCE <- 300 22 | 23 | filter_languages <- function(languages, whitelist = NULL, 24 | blacklist = NULL) { 25 | 26 | l3 <- names(languages) 27 | 28 | if (!is.null(whitelist)) l3 <- intersect(l3, whitelist) 29 | if (!is.null(blacklist)) l3 <- setdiff(l3, blacklist) 30 | 31 | languages[l3] 32 | } 33 | 34 | lang <- function(x, score = 1) { 35 | data.frame( 36 | stringsAsFactors = FALSE, 37 | language = unname(x), 38 | score = unname(score) 39 | ) 40 | } 41 | 42 | und <- function() lang("und") 43 | 44 | #' List of probably languages for a text 45 | #' 46 | #' Returns the scores for all languages that use the same script 47 | #' as the input text, in decreasing order of probability. The score 48 | #' is calculated from the distances of the trigram distributions 49 | #' in the input text and in the language model. The closer the languages, 50 | #' the higher the score. Scores are scaled, so that the closest language 51 | #' will have a score of 1. 52 | #' 53 | #' @param text A string constant. Should be at least \code{min_length} 54 | #' characters long, this is 10 chracters by default. 55 | #' Only the first \code{max_length} characters are used (2048 by 56 | #' default), to make the detection reasonably fast. 57 | #' @param min_speakers Languages with at least this many speakers are 58 | #' checked. By default this is one million. Set it to zero to 59 | #' include all languages known by franc. See also \code{\link{speakers}}. 60 | #' @param whitelist List of three letter language codes to check against. 61 | #' @param blacklist List of three letter language codes not to check 62 | #' againts. 63 | #' @param min_length Minimum number of characters required in the text. 64 | #' @param max_length Maximum number of characters used from the text. 65 | #' By default only the first 2048 characters are used. 66 | #' @return A data frame with columns \code{language} and \code{score}. 67 | #' The \code{language} column contains the three letter ISO-639-3 68 | #' language codes. The \code{score} column contains the scores. 69 | #' 70 | #' @encoding utf8 71 | #' @seealso \code{\link{franc}} if you only want the top result, 72 | #' \code{\link{speakers}}. 73 | #' @export 74 | #' @examples 75 | #' head(franc_all("O Brasil caiu 26 posições")) 76 | #' 77 | #' ## Provide a whitelist: 78 | #' franc_all("O Brasil caiu 26 posições", 79 | #' whitelist = c("por", "src", "glg", "spa")) 80 | #' 81 | #' ## Provide a blacklist: 82 | #' head(franc_all("O Brasil caiu 26 posições", 83 | #' blacklist = c("src", "glg", "lav"))) 84 | 85 | franc_all <- function(text, min_speakers = 1000000, whitelist = NULL, 86 | blacklist = NULL, min_length = 10, 87 | max_length = 2048) { 88 | 89 | text <- as.character(text) 90 | stopifnot(length(text) == 1, !is.na(text)) 91 | 92 | if (nchar(text) < min_length) return(und()) 93 | text <- substr(text, 1, max_length) 94 | 95 | script <- get_top_script(text) 96 | 97 | ## Returns NULL is script is unknown 98 | if (is.null(script)) return(und()) 99 | 100 | ## Return the language if script is a single language 101 | if (! script %in% names(data)) return(lang(script)) 102 | 103 | ## Candidate languages 104 | if (min_speakers != 0) { 105 | enough_speakers <- speakers$language[speakers$speakers >= min_speakers] 106 | if (is.null(whitelist)) { 107 | whitelist <- enough_speakers 108 | } else { 109 | whitelist <- intersect(whitelist, enough_speakers) 110 | } 111 | } 112 | 113 | languages <- filter_languages( 114 | data[[script]], 115 | whitelist = whitelist, 116 | blacklist = blacklist 117 | ) 118 | 119 | trigrams <- clean_trigrams_table(text) 120 | dist <- get_distances(trigrams, languages) 121 | 122 | lang(names(dist), normalize(text, dist)) 123 | } 124 | 125 | #' Detect the language of a string 126 | #' 127 | #' @param text A string constant. Should be at least \code{min_length} 128 | #' characters long, this is 10 characters by default. 129 | #' Only the first \code{max_length} characters are used (2048 by 130 | #' default), to make the detection reasonably fast. 131 | #' @param min_speakers Languages with at least this many speakers are 132 | #' checked. By default this is one million. Set it to zero to 133 | #' include all languages known by franc. See also \code{\link{speakers}}. 134 | #' @param whitelist List of three letter language codes to check against. 135 | #' @param blacklist List of three letter language codes not to check 136 | #' againts. 137 | #' @param min_length Minimum number of characters required in the text. 138 | #' @param max_length Maximum number of characters used from the text. 139 | #' By default only the first 2048 characters are used. 140 | #' @return A three letter ISO-639-3 language code, the detected 141 | #' language of the text. \code{"und"} is returned for too short input. 142 | #' 143 | #' @encoding utf8 144 | #' @seealso \code{\link{franc_all}} for scores against many languages, 145 | #' \code{\link{speakers}}. 146 | #' @export 147 | #' @examples 148 | #' ## afr 149 | #' franc("Alle menslike wesens word vry") 150 | #' 151 | #' ## nno 152 | #' franc("Alle mennesker er født frie og") 153 | #' 154 | #' ## Too short, und 155 | #' franc("the") 156 | #' 157 | #' ## You can change what’s too short (default: 10), sco 158 | #' franc("the", min_length = 3) 159 | 160 | franc <- function(text, min_speakers = 1000000, whitelist = NULL, 161 | blacklist = NULL, min_length = 10, max_length = 2048) { 162 | 163 | franc_all(text, min_speakers = min_speakers, whitelist = whitelist, 164 | blacklist = blacklist, min_length = min_length, 165 | max_length = max_length)$language[[1]] 166 | } 167 | -------------------------------------------------------------------------------- /inst/speakers.json: -------------------------------------------------------------------------------- 1 | { 2 | "ote": { 3 | "speakers": 200000, 4 | "iso6391": null, 5 | "iso6392": null, 6 | "name": "Mezquital Otomi" 7 | }, 8 | "tsz": { 9 | "speakers": 100000, 10 | "iso6391": null, 11 | "iso6392": null, 12 | "name": "Purepecha" 13 | }, 14 | "ndo": { 15 | "speakers": 1000000, 16 | "iso6391": "ng", 17 | "iso6392": "ndo", 18 | "name": "Ndonga" 19 | }, 20 | "epo": { 21 | "speakers": 2000000, 22 | "iso6391": "eo", 23 | "iso6392": "epo", 24 | "name": "Esperanto" 25 | }, 26 | "kek": { 27 | "speakers": 500000, 28 | "iso6391": null, 29 | "iso6392": null, 30 | "name": "Kekchí" 31 | }, 32 | "quc": { 33 | "speakers": 300000, 34 | "iso6391": null, 35 | "iso6392": null, 36 | "name": "K'iche'" 37 | }, 38 | "hus": { 39 | "speakers": 150000, 40 | "iso6391": null, 41 | "iso6392": null, 42 | "name": "Huastec" 43 | }, 44 | "snn": { 45 | "speakers": 1240000, 46 | "iso6391": null, 47 | "iso6392": null, 48 | "name": "Siona" 49 | }, 50 | "jiv": { 51 | "speakers": 35000, 52 | "iso6391": null, 53 | "iso6392": null, 54 | "name": "Shuar" 55 | }, 56 | "niv": { 57 | "speakers": 1000, 58 | "iso6391": null, 59 | "iso6392": null, 60 | "name": "Gilyak" 61 | }, 62 | "arl": { 63 | "speakers": 150, 64 | "iso6391": null, 65 | "iso6392": null, 66 | "name": "Arabela" 67 | }, 68 | "arn": { 69 | "speakers": 440000, 70 | "iso6391": null, 71 | "iso6392": "arn", 72 | "name": "Mapudungun" 73 | }, 74 | "asm": { 75 | "speakers": 14604000, 76 | "iso6391": "as", 77 | "iso6392": "asm", 78 | "name": "Assamese" 79 | }, 80 | "ast": { 81 | "speakers": 100000, 82 | "iso6391": null, 83 | "iso6392": "ast", 84 | "name": "Asturian" 85 | }, 86 | "acu": { 87 | "speakers": 4500, 88 | "iso6391": null, 89 | "iso6392": null, 90 | "name": "Achuar-Shiwiar" 91 | }, 92 | "awa": { 93 | "speakers": 38261000, 94 | "iso6391": null, 95 | "iso6392": "awa", 96 | "name": "Awadhi" 97 | }, 98 | "ayr": { 99 | "speakers": 2200000, 100 | "iso6391": null, 101 | "iso6392": null, 102 | "name": "Central Aymara" 103 | }, 104 | "azj": { 105 | "speakers": 13869000, 106 | "iso6391": null, 107 | "iso6392": null, 108 | "name": "North Azerbaijani" 109 | }, 110 | "wwa": { 111 | "speakers": 40000, 112 | "iso6391": null, 113 | "iso6392": null, 114 | "name": "Waama" 115 | }, 116 | "amh": { 117 | "speakers": 23000000, 118 | "iso6391": "am", 119 | "iso6392": "amh", 120 | "name": "Amharic" 121 | }, 122 | "arb": { 123 | "speakers": 280000000, 124 | "iso6391": null, 125 | "iso6392": null, 126 | "name": "Standard Arabic" 127 | }, 128 | "amc": { 129 | "speakers": 720, 130 | "iso6391": null, 131 | "iso6392": null, 132 | "name": "Amahuaca" 133 | }, 134 | "alt": { 135 | "speakers": 68000, 136 | "iso6391": null, 137 | "iso6392": "alt", 138 | "name": "Southern Altai" 139 | }, 140 | "als": { 141 | "speakers": 5000000, 142 | "iso6391": null, 143 | "iso6392": null, 144 | "name": "Tosk Albanian" 145 | }, 146 | "abk": { 147 | "speakers": 105000, 148 | "iso6391": "ab", 149 | "iso6392": "abk", 150 | "name": "Abkhazian" 151 | }, 152 | "aka": { 153 | "speakers": 7000000, 154 | "iso6391": "ak", 155 | "iso6392": "aka", 156 | "name": "Akan" 157 | }, 158 | "hye": { 159 | "speakers": 6836000, 160 | "iso6391": "hy", 161 | "iso6392": "hye", 162 | "name": "Armenian" 163 | }, 164 | "ajg": { 165 | "speakers": 200, 166 | "iso6391": null, 167 | "iso6392": null, 168 | "name": "Aja (Benin)" 169 | }, 170 | "aii": { 171 | "speakers": 1000000, 172 | "iso6391": null, 173 | "iso6392": null, 174 | "name": "Assyrian Neo-Aramaic" 175 | }, 176 | "ace": { 177 | "speakers": 3000000, 178 | "iso6391": null, 179 | "iso6392": "ace", 180 | "name": "Achinese" 181 | }, 182 | "agr": { 183 | "speakers": 27500, 184 | "iso6391": null, 185 | "iso6392": null, 186 | "name": "Aguaruna" 187 | }, 188 | "afr": { 189 | "speakers": 6365000, 190 | "iso6391": "af", 191 | "iso6392": "afr", 192 | "name": "Afrikaans" 193 | }, 194 | "amr": { 195 | "speakers": 500, 196 | "iso6391": null, 197 | "iso6392": null, 198 | "name": "Amarakaeri" 199 | }, 200 | "ame": { 201 | "speakers": 6000, 202 | "iso6391": null, 203 | "iso6392": null, 204 | "name": "Yanesha'" 205 | }, 206 | "boa": { 207 | "speakers": 2000, 208 | "iso6391": null, 209 | "iso6392": null, 210 | "name": "Bora" 211 | }, 212 | "ban": { 213 | "speakers": 3800000, 214 | "iso6391": null, 215 | "iso6392": "ban", 216 | "name": "Balinese" 217 | }, 218 | "bba": { 219 | "speakers": 400000, 220 | "iso6391": null, 221 | "iso6392": null, 222 | "name": "Baatonum" 223 | }, 224 | "bci": { 225 | "speakers": 2130000, 226 | "iso6391": null, 227 | "iso6392": null, 228 | "name": "Baoulé" 229 | }, 230 | "bpy": { 231 | "speakers": 77500, 232 | "iso6391": null, 233 | "iso6392": null, 234 | "name": "Bishnupriya" 235 | }, 236 | "bre": { 237 | "speakers": 500000, 238 | "iso6391": "br", 239 | "iso6392": "bre", 240 | "name": "Breton" 241 | }, 242 | "buc": { 243 | "speakers": 39000, 244 | "iso6391": null, 245 | "iso6392": null, 246 | "name": "Bushi" 247 | }, 248 | "bug": { 249 | "speakers": 3500000, 250 | "iso6391": null, 251 | "iso6392": "bug", 252 | "name": "Buginese" 253 | }, 254 | "bul": { 255 | "speakers": 9000000, 256 | "iso6391": "bg", 257 | "iso6392": "bul", 258 | "name": "Bulgarian" 259 | }, 260 | "bvi": { 261 | "speakers": 16000, 262 | "iso6391": null, 263 | "iso6392": null, 264 | "name": "Belanda Viri" 265 | }, 266 | "bcl": { 267 | "speakers": 4000000, 268 | "iso6391": null, 269 | "iso6392": null, 270 | "name": "Central Bikol" 271 | }, 272 | "mya": { 273 | "speakers": 31000000, 274 | "iso6391": "my", 275 | "iso6392": "mya", 276 | "name": "Burmese" 277 | }, 278 | "bem": { 279 | "speakers": 2150000, 280 | "iso6391": null, 281 | "iso6392": "bem", 282 | "name": "Bemba (Zambia)" 283 | }, 284 | "ben": { 285 | "speakers": 196000000, 286 | "iso6391": "bn", 287 | "iso6392": "ben", 288 | "name": "Bengali" 289 | }, 290 | "bfa": { 291 | "speakers": 480000, 292 | "iso6391": null, 293 | "iso6392": null, 294 | "name": "Bari" 295 | }, 296 | "bgp": { 297 | "speakers": 1735000, 298 | "iso6391": null, 299 | "iso6392": null, 300 | "name": "Eastern Balochi" 301 | }, 302 | "bho": { 303 | "speakers": 25000000, 304 | "iso6391": null, 305 | "iso6392": "bho", 306 | "name": "Bhojpuri" 307 | }, 308 | "bam": { 309 | "speakers": 3000000, 310 | "iso6391": "bm", 311 | "iso6392": "bam", 312 | "name": "Bambara" 313 | }, 314 | "bis": { 315 | "speakers": 1200, 316 | "iso6391": "bi", 317 | "iso6392": "bis", 318 | "name": "Bislama" 319 | }, 320 | "bjj": { 321 | "speakers": 9500000, 322 | "iso6391": null, 323 | "iso6392": null, 324 | "name": "Kanauji" 325 | }, 326 | "eus": { 327 | "speakers": 588000, 328 | "iso6391": "eu", 329 | "iso6392": "eus", 330 | "name": "Basque" 331 | }, 332 | "ces": { 333 | "speakers": 12000000, 334 | "iso6391": "cs", 335 | "iso6392": "ces", 336 | "name": "Czech" 337 | }, 338 | "chj": { 339 | "speakers": 22000, 340 | "iso6391": null, 341 | "iso6392": null, 342 | "name": "Ojitlán Chinantec" 343 | }, 344 | "cic": { 345 | "speakers": 1000, 346 | "iso6391": null, 347 | "iso6392": null, 348 | "name": "Chickasaw" 349 | }, 350 | "cjk": { 351 | "speakers": 1004000, 352 | "iso6391": null, 353 | "iso6392": null, 354 | "name": "Chokwe" 355 | }, 356 | "cjs": { 357 | "speakers": 10000, 358 | "iso6391": null, 359 | "iso6392": null, 360 | "name": "Shor" 361 | }, 362 | "cab": { 363 | "speakers": 94500, 364 | "iso6391": null, 365 | "iso6392": null, 366 | "name": "Garifuna" 367 | }, 368 | "cmn": { 369 | "speakers": 885000000, 370 | "iso6391": null, 371 | "iso6392": null, 372 | "name": "Mandarin Chinese" 373 | }, 374 | "cak": { 375 | "speakers": 132200, 376 | "iso6391": null, 377 | "iso6392": null, 378 | "name": "Kaqchikel" 379 | }, 380 | "cni": { 381 | "speakers": 45000, 382 | "iso6391": null, 383 | "iso6392": null, 384 | "name": "Asháninka" 385 | }, 386 | "cof": { 387 | "speakers": 2300, 388 | "iso6391": null, 389 | "iso6392": null, 390 | "name": "Colorado" 391 | }, 392 | "con": { 393 | "speakers": 1400, 394 | "iso6391": null, 395 | "iso6392": null, 396 | "name": "Cofán" 397 | }, 398 | "cos": { 399 | "speakers": 341000, 400 | "iso6391": "co", 401 | "iso6392": "cos", 402 | "name": "Corsican" 403 | }, 404 | "cot": { 405 | "speakers": 300, 406 | "iso6391": null, 407 | "iso6392": null, 408 | "name": "Caquinte" 409 | }, 410 | "cpu": { 411 | "speakers": 5000, 412 | "iso6391": null, 413 | "iso6392": null, 414 | "name": "Pichis Ashéninka" 415 | }, 416 | "crs": { 417 | "speakers": 72700, 418 | "iso6391": null, 419 | "iso6392": null, 420 | "name": "Seselwa Creole French" 421 | }, 422 | "csa": { 423 | "speakers": 1000, 424 | "iso6391": null, 425 | "iso6392": null, 426 | "name": "Chiltepec Chinantec" 427 | }, 428 | "csw": { 429 | "speakers": 60000, 430 | "iso6391": null, 431 | "iso6392": null, 432 | "name": "Swampy Cree" 433 | }, 434 | "ceb": { 435 | "speakers": 15230000, 436 | "iso6391": null, 437 | "iso6392": "ceb", 438 | "name": "Cebuano" 439 | }, 440 | "cat": { 441 | "speakers": 4353000, 442 | "iso6391": "ca", 443 | "iso6392": "cat", 444 | "name": "Catalan" 445 | }, 446 | "cax": { 447 | "speakers": 47086, 448 | "iso6391": null, 449 | "iso6392": null, 450 | "name": "Chiquitano" 451 | }, 452 | "cbr": { 453 | "speakers": 1500, 454 | "iso6391": null, 455 | "iso6392": null, 456 | "name": "Cashibo-Cacataibo" 457 | }, 458 | "prq": { 459 | "speakers": 9000, 460 | "iso6391": null, 461 | "iso6392": null, 462 | "name": "Ashéninka Perené" 463 | }, 464 | "cha": { 465 | "speakers": 78000, 466 | "iso6391": "ch", 467 | "iso6392": "cha", 468 | "name": "Chamorro" 469 | }, 470 | "cbs": { 471 | "speakers": 2000, 472 | "iso6391": null, 473 | "iso6392": null, 474 | "name": "Cashinahua" 475 | }, 476 | "cbt": { 477 | "speakers": 6000, 478 | "iso6391": null, 479 | "iso6392": null, 480 | "name": "Chayahuita" 481 | }, 482 | "cbu": { 483 | "speakers": 3000, 484 | "iso6391": null, 485 | "iso6392": null, 486 | "name": "Candoshi-Shapra" 487 | }, 488 | "ddn": { 489 | "speakers": 72000, 490 | "iso6391": null, 491 | "iso6392": null, 492 | "name": "Dendi (Benin)" 493 | }, 494 | "dyu": { 495 | "speakers": 2700000, 496 | "iso6391": null, 497 | "iso6392": "dyu", 498 | "name": "Dyula" 499 | }, 500 | "nld": { 501 | "speakers": 21000000, 502 | "iso6391": "nl", 503 | "iso6392": "nld", 504 | "name": "Dutch" 505 | }, 506 | "dyo": { 507 | "speakers": 260000, 508 | "iso6391": null, 509 | "iso6392": null, 510 | "name": "Jola-Fonyi" 511 | }, 512 | "dag": { 513 | "speakers": 540000, 514 | "iso6391": null, 515 | "iso6392": null, 516 | "name": "Dagbani" 517 | }, 518 | "dan": { 519 | "speakers": 5292000, 520 | "iso6391": "da", 521 | "iso6392": "dan", 522 | "name": "Danish" 523 | }, 524 | "div": { 525 | "speakers": 287000, 526 | "iso6391": "dv", 527 | "iso6392": "div", 528 | "name": "Dhivehi" 529 | }, 530 | "dzo": { 531 | "speakers": 400000, 532 | "iso6391": "dz", 533 | "iso6392": "dzo", 534 | "name": "Dzongkha" 535 | }, 536 | "dip": { 537 | "speakers": 1350000, 538 | "iso6391": null, 539 | "iso6392": null, 540 | "name": "Northeastern Dinka" 541 | }, 542 | "dga": { 543 | "speakers": 501000, 544 | "iso6391": null, 545 | "iso6392": null, 546 | "name": "Southern Dagaare" 547 | }, 548 | "gjn": { 549 | "speakers": 250000, 550 | "iso6391": null, 551 | "iso6392": null, 552 | "name": "Gonja" 553 | }, 554 | "ewe": { 555 | "speakers": 2477600, 556 | "iso6391": "ee", 557 | "iso6392": "ewe", 558 | "name": "Ewe" 559 | }, 560 | "kal": { 561 | "speakers": 47000, 562 | "iso6391": "kl", 563 | "iso6392": "kal", 564 | "name": "Kalaallisut" 565 | }, 566 | "bin": { 567 | "speakers": 1000000, 568 | "iso6391": null, 569 | "iso6392": "bin", 570 | "name": "Bini" 571 | }, 572 | "ike": { 573 | "speakers": 21500, 574 | "iso6391": null, 575 | "iso6392": null, 576 | "name": "Eastern Canadian Inuktitut" 577 | }, 578 | "rgn": { 579 | "speakers": 20112, 580 | "iso6391": null, 581 | "iso6392": null, 582 | "name": "Romagnol" 583 | }, 584 | "eng": { 585 | "speakers": 322000000, 586 | "iso6391": "en", 587 | "iso6392": "eng", 588 | "name": "English" 589 | }, 590 | "est": { 591 | "speakers": 1100000, 592 | "iso6391": "et", 593 | "iso6392": "est", 594 | "name": "Estonian" 595 | }, 596 | "eve": { 597 | "speakers": 7170, 598 | "iso6391": null, 599 | "iso6392": null, 600 | "name": "Even" 601 | }, 602 | "evn": { 603 | "speakers": 40000, 604 | "iso6391": null, 605 | "iso6392": null, 606 | "name": "Evenki" 607 | }, 608 | "fao": { 609 | "speakers": 47000, 610 | "iso6391": "fo", 611 | "iso6392": "fao", 612 | "name": "Faroese" 613 | }, 614 | "wln": { 615 | "speakers": 600000, 616 | "iso6391": "wa", 617 | "iso6392": "wln", 618 | "name": "Walloon" 619 | }, 620 | "fij": { 621 | "speakers": 650000, 622 | "iso6391": "fj", 623 | "iso6392": "fij", 624 | "name": "Fijian" 625 | }, 626 | "fuc": { 627 | "speakers": 22000000, 628 | "iso6391": null, 629 | "iso6392": null, 630 | "name": "Pulaar" 631 | }, 632 | "fra": { 633 | "speakers": 124000000, 634 | "iso6391": "fr", 635 | "iso6392": "fra", 636 | "name": "French" 637 | }, 638 | "fur": { 639 | "speakers": 600000, 640 | "iso6391": null, 641 | "iso6392": "fur", 642 | "name": "Friulian" 643 | }, 644 | "fon": { 645 | "speakers": 1436000, 646 | "iso6391": null, 647 | "iso6392": "fon", 648 | "name": "Fon" 649 | }, 650 | "fin": { 651 | "speakers": 6000000, 652 | "iso6391": "fi", 653 | "iso6392": "fin", 654 | "name": "Finnish" 655 | }, 656 | "pcd": { 657 | "speakers": 500000, 658 | "iso6391": null, 659 | "iso6392": null, 660 | "name": "Picard" 661 | }, 662 | "hau": { 663 | "speakers": 22000000, 664 | "iso6391": "ha", 665 | "iso6392": "hau", 666 | "name": "Hausa" 667 | }, 668 | "gug": { 669 | "speakers": 12000, 670 | "iso6391": null, 671 | "iso6392": null, 672 | "name": "Paraguayan Guaraní" 673 | }, 674 | "guj": { 675 | "speakers": 44000000, 676 | "iso6391": "gu", 677 | "iso6392": "guj", 678 | "name": "Gujarati" 679 | }, 680 | "guu": { 681 | "speakers": 17640, 682 | "iso6391": null, 683 | "iso6392": null, 684 | "name": "Yanomamö" 685 | }, 686 | "gyr": { 687 | "speakers": 5933, 688 | "iso6391": null, 689 | "iso6392": null, 690 | "name": "Guarayu" 691 | }, 692 | "gag": { 693 | "speakers": 198000, 694 | "iso6391": null, 695 | "iso6392": null, 696 | "name": "Gagauz" 697 | }, 698 | "gbm": { 699 | "speakers": 2920000, 700 | "iso6391": null, 701 | "iso6392": null, 702 | "name": "Garhwali" 703 | }, 704 | "deu": { 705 | "speakers": 121000000, 706 | "iso6391": "de", 707 | "iso6392": "deu", 708 | "name": "German" 709 | }, 710 | "pov": { 711 | "speakers": 580000, 712 | "iso6391": null, 713 | "iso6392": null, 714 | "name": "Upper Guinea Crioulo" 715 | }, 716 | "gaa": { 717 | "speakers": 1000000, 718 | "iso6391": null, 719 | "iso6392": "gaa", 720 | "name": "Ga" 721 | }, 722 | "gkp": { 723 | "speakers": 808000, 724 | "iso6391": null, 725 | "iso6392": null, 726 | "name": "Guinea Kpelle" 727 | }, 728 | "ada": { 729 | "speakers": 1000000, 730 | "iso6391": null, 731 | "iso6392": "ada", 732 | "name": "Adangme" 733 | }, 734 | "gla": { 735 | "speakers": 63653, 736 | "iso6391": "gd", 737 | "iso6392": "gla", 738 | "name": "Scottish Gaelic" 739 | }, 740 | "gld": { 741 | "speakers": 12003, 742 | "iso6391": null, 743 | "iso6392": null, 744 | "name": "Nanai" 745 | }, 746 | "ell": { 747 | "speakers": 12258540, 748 | "iso6391": "el", 749 | "iso6392": "ell", 750 | "name": "Modern Greek (1453-)" 751 | }, 752 | "gle": { 753 | "speakers": 260000, 754 | "iso6391": "ga", 755 | "iso6392": "gle", 756 | "name": "Irish" 757 | }, 758 | "glg": { 759 | "speakers": 4000000, 760 | "iso6391": "gl", 761 | "iso6392": "glg", 762 | "name": "Galician" 763 | }, 764 | "gno": { 765 | "speakers": 1950000, 766 | "iso6391": null, 767 | "iso6392": null, 768 | "name": "Northern Gondi" 769 | }, 770 | "gax": { 771 | "speakers": 30000000, 772 | "iso6391": null, 773 | "iso6392": null, 774 | "name": "Borana-Arsi-Guji Oromo" 775 | }, 776 | "kat": { 777 | "speakers": 4103000, 778 | "iso6391": "ka", 779 | "iso6392": "kat", 780 | "name": "Georgian" 781 | }, 782 | "guc": { 783 | "speakers": 305000, 784 | "iso6391": null, 785 | "iso6392": null, 786 | "name": "Wayuu" 787 | }, 788 | "hea": { 789 | "speakers": 820000, 790 | "iso6391": null, 791 | "iso6392": null, 792 | "name": "Northern Qiandong Miao" 793 | }, 794 | "hun": { 795 | "speakers": 14500000, 796 | "iso6391": "hu", 797 | "iso6392": "hun", 798 | "name": "Hungarian" 799 | }, 800 | "haw": { 801 | "speakers": 8000, 802 | "iso6391": null, 803 | "iso6392": "haw", 804 | "name": "Hawaiian" 805 | }, 806 | "huu": { 807 | "speakers": 2900, 808 | "iso6391": null, 809 | "iso6392": null, 810 | "name": "Murui Huitoto" 811 | }, 812 | "hat": { 813 | "speakers": 7382000, 814 | "iso6391": "ht", 815 | "iso6392": "hat", 816 | "name": "Haitian" 817 | }, 818 | "heb": { 819 | "speakers": 4612000, 820 | "iso6391": "he", 821 | "iso6392": "heb", 822 | "name": "Hebrew" 823 | }, 824 | "hil": { 825 | "speakers": 7000000, 826 | "iso6391": null, 827 | "iso6392": "hil", 828 | "name": "Hiligaynon" 829 | }, 830 | "hin": { 831 | "speakers": 182000000, 832 | "iso6391": "hi", 833 | "iso6392": "hin", 834 | "name": "Hindi" 835 | }, 836 | "hlt": { 837 | "speakers": 30000, 838 | "iso6391": null, 839 | "iso6392": null, 840 | "name": "Matu Chin" 841 | }, 842 | "hms": { 843 | "speakers": 8200000, 844 | "iso6391": null, 845 | "iso6392": null, 846 | "name": "Southern Qiandong Miao" 847 | }, 848 | "hna": { 849 | "speakers": 327000, 850 | "iso6391": null, 851 | "iso6392": null, 852 | "name": "Mina (Cameroon)" 853 | }, 854 | "cnh": { 855 | "speakers": 446264, 856 | "iso6391": null, 857 | "iso6392": null, 858 | "name": "Haka Chin" 859 | }, 860 | "hne": { 861 | "speakers": 17500000, 862 | "iso6391": null, 863 | "iso6392": null, 864 | "name": "Chhattisgarhi" 865 | }, 866 | "hni": { 867 | "speakers": 747000, 868 | "iso6391": null, 869 | "iso6392": null, 870 | "name": "Hani" 871 | }, 872 | "hoc": { 873 | "speakers": 1500000, 874 | "iso6391": null, 875 | "iso6392": null, 876 | "name": "Ho" 877 | }, 878 | "ilo": { 879 | "speakers": 8000000, 880 | "iso6391": null, 881 | "iso6392": "ilo", 882 | "name": "Iloko" 883 | }, 884 | "ibo": { 885 | "speakers": 17000000, 886 | "iso6391": "ig", 887 | "iso6392": "ibo", 888 | "name": "Igbo" 889 | }, 890 | "ita": { 891 | "speakers": 63000000, 892 | "iso6391": "it", 893 | "iso6392": "ita", 894 | "name": "Italian" 895 | }, 896 | "isl": { 897 | "speakers": 282845, 898 | "iso6391": "is", 899 | "iso6392": "isl", 900 | "name": "Icelandic" 901 | }, 902 | "ind": { 903 | "speakers": 140000000, 904 | "iso6391": "id", 905 | "iso6392": "ind", 906 | "name": "Indonesian" 907 | }, 908 | "nds": { 909 | "speakers": 2600000, 910 | "iso6391": null, 911 | "iso6392": "nds", 912 | "name": "Low German" 913 | }, 914 | "ibb": { 915 | "speakers": 3186000, 916 | "iso6391": null, 917 | "iso6392": null, 918 | "name": "Ibibio" 919 | }, 920 | "iii": { 921 | "speakers": 1600000, 922 | "iso6391": "ii", 923 | "iso6392": "iii", 924 | "name": "Sichuan Yi" 925 | }, 926 | "jpn": { 927 | "speakers": 125000000, 928 | "iso6391": "ja", 929 | "iso6392": "jpn", 930 | "name": "Japanese" 931 | }, 932 | "jav": { 933 | "speakers": 75500800, 934 | "iso6391": "jv", 935 | "iso6392": "jav", 936 | "name": "Javanese" 937 | }, 938 | "kfa": { 939 | "speakers": 241000, 940 | "iso6391": null, 941 | "iso6392": null, 942 | "name": "Kodava" 943 | }, 944 | "kha": { 945 | "speakers": 865000, 946 | "iso6391": null, 947 | "iso6392": "kha", 948 | "name": "Khasi" 949 | }, 950 | "khk": { 951 | "speakers": 2330000, 952 | "iso6391": null, 953 | "iso6392": null, 954 | "name": "Halh Mongolian" 955 | }, 956 | "khm": { 957 | "speakers": 7063200, 958 | "iso6391": "km", 959 | "iso6392": "khm", 960 | "name": "Central Khmer" 961 | }, 962 | "khr": { 963 | "speakers": 293580, 964 | "iso6391": null, 965 | "iso6392": null, 966 | "name": "Kharia" 967 | }, 968 | "kas": { 969 | "speakers": 4381000, 970 | "iso6391": "ks", 971 | "iso6392": "kas", 972 | "name": "Kashmiri" 973 | }, 974 | "kir": { 975 | "speakers": 2631420, 976 | "iso6391": "ky", 977 | "iso6392": "kir", 978 | "name": "Kirghiz" 979 | }, 980 | "kjh": { 981 | "speakers": 60000, 982 | "iso6391": null, 983 | "iso6392": null, 984 | "name": "Khakas" 985 | }, 986 | "ckb": { 987 | "speakers": 20000000, 988 | "iso6391": null, 989 | "iso6392": null, 990 | "name": "Central Kurdish" 991 | }, 992 | "kaz": { 993 | "speakers": 8000000, 994 | "iso6391": "kk", 995 | "iso6392": "kaz", 996 | "name": "Kazakh" 997 | }, 998 | "knc": { 999 | "speakers": 3500000, 1000 | "iso6391": null, 1001 | "iso6392": null, 1002 | "name": "Central Kanuri" 1003 | }, 1004 | "kng": { 1005 | "speakers": 1000000, 1006 | "iso6391": null, 1007 | "iso6392": null, 1008 | "name": "Koongo" 1009 | }, 1010 | "koi": { 1011 | "speakers": 12500000, 1012 | "iso6391": null, 1013 | "iso6392": null, 1014 | "name": "Komi-Permyak" 1015 | }, 1016 | "koo": { 1017 | "speakers": 361709, 1018 | "iso6391": null, 1019 | "iso6392": null, 1020 | "name": "Konzo" 1021 | }, 1022 | "kor": { 1023 | "speakers": 75000000, 1024 | "iso6391": "ko", 1025 | "iso6392": "kor", 1026 | "name": "Korean" 1027 | }, 1028 | "kqn": { 1029 | "speakers": 276000, 1030 | "iso6391": null, 1031 | "iso6392": null, 1032 | "name": "Kaonde" 1033 | }, 1034 | "kri": { 1035 | "speakers": 480000, 1036 | "iso6391": null, 1037 | "iso6392": null, 1038 | "name": "Krio" 1039 | }, 1040 | "krl": { 1041 | "speakers": 80000, 1042 | "iso6391": null, 1043 | "iso6392": "krl", 1044 | "name": "Karelian" 1045 | }, 1046 | "ksw": { 1047 | "speakers": 2000000, 1048 | "iso6391": null, 1049 | "iso6392": null, 1050 | "name": "S'gaw Karen" 1051 | }, 1052 | "kwi": { 1053 | "speakers": 21000, 1054 | "iso6391": null, 1055 | "iso6392": null, 1056 | "name": "Awa-Cuaiquer" 1057 | }, 1058 | "kbp": { 1059 | "speakers": 1200000, 1060 | "iso6391": null, 1061 | "iso6392": null, 1062 | "name": "Kabiyè" 1063 | }, 1064 | "xsm": { 1065 | "speakers": 200000, 1066 | "iso6391": null, 1067 | "iso6392": null, 1068 | "name": "Kasem" 1069 | }, 1070 | "kde": { 1071 | "speakers": 1260000, 1072 | "iso6391": null, 1073 | "iso6392": null, 1074 | "name": "Makonde" 1075 | }, 1076 | "kea": { 1077 | "speakers": 393943, 1078 | "iso6391": null, 1079 | "iso6392": null, 1080 | "name": "Kabuverdianu" 1081 | }, 1082 | "kan": { 1083 | "speakers": 33663000, 1084 | "iso6391": "kn", 1085 | "iso6392": "kan", 1086 | "name": "Kannada" 1087 | }, 1088 | "kmr": { 1089 | "speakers": 8000000, 1090 | "iso6391": null, 1091 | "iso6392": null, 1092 | "name": "Northern Kurdish" 1093 | }, 1094 | "lia": { 1095 | "speakers": 335000, 1096 | "iso6391": null, 1097 | "iso6392": null, 1098 | "name": "West-Central Limba" 1099 | }, 1100 | "lin": { 1101 | "speakers": 8400000, 1102 | "iso6391": "ln", 1103 | "iso6392": "lin", 1104 | "name": "Lingala" 1105 | }, 1106 | "lit": { 1107 | "speakers": 4000000, 1108 | "iso6391": "lt", 1109 | "iso6392": "lit", 1110 | "name": "Lithuanian" 1111 | }, 1112 | "lad": { 1113 | "speakers": 120000, 1114 | "iso6391": null, 1115 | "iso6392": "lad", 1116 | "name": "Ladino" 1117 | }, 1118 | "lav": { 1119 | "speakers": 1550000, 1120 | "iso6391": "lv", 1121 | "iso6392": "lav", 1122 | "name": "Latvian" 1123 | }, 1124 | "lob": { 1125 | "speakers": 442000, 1126 | "iso6391": null, 1127 | "iso6392": null, 1128 | "name": "Lobi" 1129 | }, 1130 | "lot": { 1131 | "speakers": 135000, 1132 | "iso6391": null, 1133 | "iso6392": null, 1134 | "name": "Otuho" 1135 | }, 1136 | "loz": { 1137 | "speakers": 71841, 1138 | "iso6391": null, 1139 | "iso6392": "loz", 1140 | "name": "Lozi" 1141 | }, 1142 | "ltz": { 1143 | "speakers": 335518, 1144 | "iso6391": "lb", 1145 | "iso6392": "ltz", 1146 | "name": "Luxembourgish" 1147 | }, 1148 | "lua": { 1149 | "speakers": 6300000, 1150 | "iso6391": null, 1151 | "iso6392": "lua", 1152 | "name": "Luba-Lulua" 1153 | }, 1154 | "lue": { 1155 | "speakers": 35800, 1156 | "iso6391": null, 1157 | "iso6392": null, 1158 | "name": "Luvale" 1159 | }, 1160 | "lug": { 1161 | "speakers": 3015980, 1162 | "iso6391": "lg", 1163 | "iso6392": "lug", 1164 | "name": "Ganda" 1165 | }, 1166 | "lus": { 1167 | "speakers": 541750, 1168 | "iso6391": null, 1169 | "iso6392": "lus", 1170 | "name": "Lushai" 1171 | }, 1172 | "sme": { 1173 | "speakers": 4000, 1174 | "iso6391": "se", 1175 | "iso6392": "sme", 1176 | "name": "Northern Sami" 1177 | }, 1178 | "mad": { 1179 | "speakers": 10000000, 1180 | "iso6391": null, 1181 | "iso6392": "mad", 1182 | "name": "Madurese" 1183 | }, 1184 | "mah": { 1185 | "speakers": 43900, 1186 | "iso6391": "mh", 1187 | "iso6392": "mah", 1188 | "name": "Marshallese" 1189 | }, 1190 | "mar": { 1191 | "speakers": 64783000, 1192 | "iso6391": "mr", 1193 | "iso6392": "mar", 1194 | "name": "Marathi" 1195 | }, 1196 | "maz": { 1197 | "speakers": 350000, 1198 | "iso6391": null, 1199 | "iso6392": null, 1200 | "name": "Central Mazahua" 1201 | }, 1202 | "mcd": { 1203 | "speakers": 950, 1204 | "iso6391": null, 1205 | "iso6392": null, 1206 | "name": "Sharanahua" 1207 | }, 1208 | "mcf": { 1209 | "speakers": 1280, 1210 | "iso6391": null, 1211 | "iso6392": null, 1212 | "name": "Matsés" 1213 | }, 1214 | "men": { 1215 | "speakers": 1480000, 1216 | "iso6391": null, 1217 | "iso6392": "men", 1218 | "name": "Mende (Sierra Leone)" 1219 | }, 1220 | "mic": { 1221 | "speakers": 8100, 1222 | "iso6391": null, 1223 | "iso6392": "mic", 1224 | "name": "Mi'kmaq" 1225 | }, 1226 | "min": { 1227 | "speakers": 6500000, 1228 | "iso6391": null, 1229 | "iso6392": "min", 1230 | "name": "Minangkabau" 1231 | }, 1232 | "miq": { 1233 | "speakers": 160000, 1234 | "iso6391": null, 1235 | "iso6392": null, 1236 | "name": "Mískito" 1237 | }, 1238 | "mkd": { 1239 | "speakers": 2500000, 1240 | "iso6391": "mk", 1241 | "iso6392": "mkd", 1242 | "name": "Macedonian" 1243 | }, 1244 | "mlt": { 1245 | "speakers": 330000, 1246 | "iso6391": "mt", 1247 | "iso6392": "mlt", 1248 | "name": "Maltese" 1249 | }, 1250 | "mos": { 1251 | "speakers": 4600000, 1252 | "iso6391": null, 1253 | "iso6392": "mos", 1254 | "name": "Mossi" 1255 | }, 1256 | "mri": { 1257 | "speakers": 70000, 1258 | "iso6391": "mi", 1259 | "iso6392": "mri", 1260 | "name": "Maori" 1261 | }, 1262 | "mve": { 1263 | "speakers": 12104000, 1264 | "iso6391": null, 1265 | "iso6392": null, 1266 | "name": "Marwari (Pakistan)" 1267 | }, 1268 | "mxi": { 1269 | "speakers": 0, 1270 | "iso6391": null, 1271 | "iso6392": null, 1272 | "name": "Mozarabic" 1273 | }, 1274 | "mxv": { 1275 | "speakers": 65000, 1276 | "iso6391": null, 1277 | "iso6392": null, 1278 | "name": "Metlatónoc Mixtec" 1279 | }, 1280 | "mag": { 1281 | "speakers": 10821000, 1282 | "iso6391": null, 1283 | "iso6392": "mag", 1284 | "name": "Magahi" 1285 | }, 1286 | "mzi": { 1287 | "speakers": 11000, 1288 | "iso6391": null, 1289 | "iso6392": null, 1290 | "name": "Ixcatlán Mazatec" 1291 | }, 1292 | "emk": { 1293 | "speakers": 2140300, 1294 | "iso6391": null, 1295 | "iso6392": null, 1296 | "name": "Eastern Maninkakan" 1297 | }, 1298 | "mai": { 1299 | "speakers": 34700000, 1300 | "iso6391": null, 1301 | "iso6392": "mai", 1302 | "name": "Maithili" 1303 | }, 1304 | "kmb": { 1305 | "speakers": 3000000, 1306 | "iso6391": null, 1307 | "iso6392": "kmb", 1308 | "name": "Kimbundu" 1309 | }, 1310 | "mam": { 1311 | "speakers": 157000, 1312 | "iso6391": null, 1313 | "iso6392": null, 1314 | "name": "Mam" 1315 | }, 1316 | "lun": { 1317 | "speakers": 3000000, 1318 | "iso6391": null, 1319 | "iso6392": "lun", 1320 | "name": "Lunda" 1321 | }, 1322 | "mal": { 1323 | "speakers": 34014000, 1324 | "iso6391": "ml", 1325 | "iso6392": "mal", 1326 | "name": "Malayalam" 1327 | }, 1328 | "umb": { 1329 | "speakers": 4000000, 1330 | "iso6391": null, 1331 | "iso6392": "umb", 1332 | "name": "Umbundu" 1333 | }, 1334 | "plt": { 1335 | "speakers": 10156900, 1336 | "iso6391": null, 1337 | "iso6392": null, 1338 | "name": "Plateau Malagasy" 1339 | }, 1340 | "nio": { 1341 | "speakers": 1063, 1342 | "iso6391": null, 1343 | "iso6392": null, 1344 | "name": "Nganasan" 1345 | }, 1346 | "njo": { 1347 | "speakers": 232000, 1348 | "iso6391": null, 1349 | "iso6392": null, 1350 | "name": "Ao Naga" 1351 | }, 1352 | "nhn": { 1353 | "speakers": 1376898, 1354 | "iso6391": null, 1355 | "iso6392": null, 1356 | "name": "Central Nahuatl" 1357 | }, 1358 | "lao": { 1359 | "speakers": 4000000, 1360 | "iso6391": "lo", 1361 | "iso6392": "lao", 1362 | "name": "Lao" 1363 | }, 1364 | "nno": { 1365 | "speakers": 4700000, 1366 | "iso6391": "nn", 1367 | "iso6392": "nno", 1368 | "name": "Norwegian Nynorsk" 1369 | }, 1370 | "nob": { 1371 | "speakers": 5000000, 1372 | "iso6391": "nb", 1373 | "iso6392": "nob", 1374 | "name": "Norwegian Bokmål" 1375 | }, 1376 | "not": { 1377 | "speakers": 4000, 1378 | "iso6391": null, 1379 | "iso6392": null, 1380 | "name": "Nomatsiguenga" 1381 | }, 1382 | "nus": { 1383 | "speakers": 804900, 1384 | "iso6391": null, 1385 | "iso6392": null, 1386 | "name": "Nuer" 1387 | }, 1388 | "lns": { 1389 | "speakers": 125000, 1390 | "iso6391": null, 1391 | "iso6392": null, 1392 | "name": "Lamnso'" 1393 | }, 1394 | "nya": { 1395 | "speakers": 10000000, 1396 | "iso6391": "ny", 1397 | "iso6392": "nya", 1398 | "name": "Nyanja" 1399 | }, 1400 | "nym": { 1401 | "speakers": 926000, 1402 | "iso6391": null, 1403 | "iso6392": "nym", 1404 | "name": "Nyamwezi" 1405 | }, 1406 | "nyn": { 1407 | "speakers": 1643193, 1408 | "iso6391": null, 1409 | "iso6392": "nyn", 1410 | "name": "Nyankole" 1411 | }, 1412 | "nzi": { 1413 | "speakers": 352500, 1414 | "iso6391": null, 1415 | "iso6392": "nzi", 1416 | "name": "Nzima" 1417 | }, 1418 | "nep": { 1419 | "speakers": 16200000, 1420 | "iso6391": "ne", 1421 | "iso6392": "nep", 1422 | "name": "Nepali (macrolanguage)" 1423 | }, 1424 | "nbl": { 1425 | "speakers": 588000, 1426 | "iso6391": "nr", 1427 | "iso6392": "nbl", 1428 | "name": "South Ndebele" 1429 | }, 1430 | "nba": { 1431 | "speakers": 172000, 1432 | "iso6391": null, 1433 | "iso6392": null, 1434 | "name": "Nyemba" 1435 | }, 1436 | "nav": { 1437 | "speakers": 148530, 1438 | "iso6391": "nv", 1439 | "iso6392": "nav", 1440 | "name": "Navajo" 1441 | }, 1442 | "oss": { 1443 | "speakers": 588000, 1444 | "iso6391": "os", 1445 | "iso6392": "oss", 1446 | "name": "Ossetian" 1447 | }, 1448 | "oaa": { 1449 | "speakers": 295, 1450 | "iso6391": null, 1451 | "iso6392": null, 1452 | "name": "Orok" 1453 | }, 1454 | "ojb": { 1455 | "speakers": 35000, 1456 | "iso6391": null, 1457 | "iso6392": null, 1458 | "name": "Northwestern Ojibwa" 1459 | }, 1460 | "oki": { 1461 | "speakers": 20000, 1462 | "iso6391": null, 1463 | "iso6392": null, 1464 | "name": "Okiek" 1465 | }, 1466 | "ori": { 1467 | "speakers": 31000000, 1468 | "iso6391": "or", 1469 | "iso6392": "ori", 1470 | "name": "Oriya (macrolanguage)" 1471 | }, 1472 | "pbb": { 1473 | "speakers": 68487, 1474 | "iso6391": null, 1475 | "iso6392": null, 1476 | "name": "Páez" 1477 | }, 1478 | "tgk": { 1479 | "speakers": 4380000, 1480 | "iso6391": "tg", 1481 | "iso6392": "tgk", 1482 | "name": "Tajik" 1483 | }, 1484 | "tpi": { 1485 | "speakers": 2000000, 1486 | "iso6391": null, 1487 | "iso6392": "tpi", 1488 | "name": "Tok Pisin" 1489 | }, 1490 | "pes": { 1491 | "speakers": 7000000, 1492 | "iso6391": null, 1493 | "iso6392": null, 1494 | "name": "Iranian Persian" 1495 | }, 1496 | "pis": { 1497 | "speakers": 350000, 1498 | "iso6391": null, 1499 | "iso6392": null, 1500 | "name": "Pijin" 1501 | }, 1502 | "pau": { 1503 | "speakers": 15000, 1504 | "iso6391": null, 1505 | "iso6392": "pau", 1506 | "name": "Palauan" 1507 | }, 1508 | "pol": { 1509 | "speakers": 44000000, 1510 | "iso6391": "pl", 1511 | "iso6392": "pol", 1512 | "name": "Polish" 1513 | }, 1514 | "pon": { 1515 | "speakers": 27700, 1516 | "iso6391": null, 1517 | "iso6392": "pon", 1518 | "name": "Pohnpeian" 1519 | }, 1520 | "por": { 1521 | "speakers": 182000000, 1522 | "iso6391": "pt", 1523 | "iso6392": "por", 1524 | "name": "Portuguese" 1525 | }, 1526 | "ppl": { 1527 | "speakers": 20, 1528 | "iso6391": null, 1529 | "iso6392": null, 1530 | "name": "Pipil" 1531 | }, 1532 | "pwo": { 1533 | "speakers": 1209800, 1534 | "iso6391": null, 1535 | "iso6392": null, 1536 | "name": "Pwo Western Karen" 1537 | }, 1538 | "pan": { 1539 | "speakers": 25700000, 1540 | "iso6391": "pa", 1541 | "iso6392": "pan", 1542 | "name": "Panjabi" 1543 | }, 1544 | "pam": { 1545 | "speakers": 2000000, 1546 | "iso6391": null, 1547 | "iso6392": "pam", 1548 | "name": "Pampanga" 1549 | }, 1550 | "pbu": { 1551 | "speakers": 9585000, 1552 | "iso6391": null, 1553 | "iso6392": null, 1554 | "name": "Northern Pashto" 1555 | }, 1556 | "quy": { 1557 | "speakers": 1000000, 1558 | "iso6391": null, 1559 | "iso6392": null, 1560 | "name": "Ayacucho Quechua" 1561 | }, 1562 | "qvc": { 1563 | "speakers": 35000, 1564 | "iso6391": null, 1565 | "iso6392": null, 1566 | "name": "Cajamarca Quechua" 1567 | }, 1568 | "qva": { 1569 | "speakers": 65000, 1570 | "iso6391": null, 1571 | "iso6392": null, 1572 | "name": "Ambo-Pasco Quechua" 1573 | }, 1574 | "qug": { 1575 | "speakers": 10000000, 1576 | "iso6391": null, 1577 | "iso6392": null, 1578 | "name": "Chimborazo Highland Quichua" 1579 | }, 1580 | "qvh": { 1581 | "speakers": 38000, 1582 | "iso6391": null, 1583 | "iso6392": null, 1584 | "name": "Huamalíes-Dos de Mayo Huánuco Quechua" 1585 | }, 1586 | "qvm": { 1587 | "speakers": 55000, 1588 | "iso6391": null, 1589 | "iso6392": null, 1590 | "name": "Margos-Yarowilca-Lauricocha Quechua" 1591 | }, 1592 | "qvn": { 1593 | "speakers": 40000, 1594 | "iso6391": null, 1595 | "iso6392": null, 1596 | "name": "North Junín Quechua" 1597 | }, 1598 | "qwh": { 1599 | "speakers": 300000, 1600 | "iso6391": null, 1601 | "iso6392": null, 1602 | "name": "Huaylas Ancash Quechua" 1603 | }, 1604 | "qxa": { 1605 | "speakers": 25000, 1606 | "iso6391": null, 1607 | "iso6392": null, 1608 | "name": "Chiquián Ancash Quechua" 1609 | }, 1610 | "qxn": { 1611 | "speakers": 200000, 1612 | "iso6391": null, 1613 | "iso6392": null, 1614 | "name": "Northern Conchucos Ancash Quechua" 1615 | }, 1616 | "qxu": { 1617 | "speakers": 16000, 1618 | "iso6391": null, 1619 | "iso6392": null, 1620 | "name": "Arequipa-La Unión Quechua" 1621 | }, 1622 | "qud": { 1623 | "speakers": 30000, 1624 | "iso6391": null, 1625 | "iso6392": null, 1626 | "name": "Calderón Highland Quichua" 1627 | }, 1628 | "quz": { 1629 | "speakers": 1500000, 1630 | "iso6391": null, 1631 | "iso6392": null, 1632 | "name": "Cusco Quechua" 1633 | }, 1634 | "run": { 1635 | "speakers": 6000000, 1636 | "iso6391": "rn", 1637 | "iso6392": "run", 1638 | "name": "Rundi" 1639 | }, 1640 | "rmy": { 1641 | "speakers": 1500000, 1642 | "iso6391": null, 1643 | "iso6392": null, 1644 | "name": "Vlax Romani" 1645 | }, 1646 | "roh": { 1647 | "speakers": 500000, 1648 | "iso6391": "rm", 1649 | "iso6392": "roh", 1650 | "name": "Romansh" 1651 | }, 1652 | "ron": { 1653 | "speakers": 26000000, 1654 | "iso6391": "ro", 1655 | "iso6392": "ron", 1656 | "name": "Romanian" 1657 | }, 1658 | "rmn": { 1659 | "speakers": 1000000, 1660 | "iso6391": null, 1661 | "iso6392": null, 1662 | "name": "Balkan Romani" 1663 | }, 1664 | "rus": { 1665 | "speakers": 288000000, 1666 | "iso6391": "ru", 1667 | "iso6392": "rus", 1668 | "name": "Russian" 1669 | }, 1670 | "raj": { 1671 | "speakers": 12370010, 1672 | "iso6391": null, 1673 | "iso6392": "raj", 1674 | "name": "Rajasthani" 1675 | }, 1676 | "bel": { 1677 | "speakers": 10200000, 1678 | "iso6391": "be", 1679 | "iso6392": "bel", 1680 | "name": "Belarusian" 1681 | }, 1682 | "kin": { 1683 | "speakers": 9306800, 1684 | "iso6391": "rw", 1685 | "iso6392": "kin", 1686 | "name": "Kinyarwanda" 1687 | }, 1688 | "rar": { 1689 | "speakers": 43000, 1690 | "iso6391": null, 1691 | "iso6392": "rar", 1692 | "name": "Rarotongan" 1693 | }, 1694 | "nso": { 1695 | "speakers": 3851000, 1696 | "iso6391": null, 1697 | "iso6392": "nso", 1698 | "name": "Pedi" 1699 | }, 1700 | "san": { 1701 | "speakers": 194433, 1702 | "iso6391": "sa", 1703 | "iso6392": "san", 1704 | "name": "Sanskrit" 1705 | }, 1706 | "sat": { 1707 | "speakers": 6218900, 1708 | "iso6391": null, 1709 | "iso6392": "sat", 1710 | "name": "Santali" 1711 | }, 1712 | "sco": { 1713 | "speakers": 1500000, 1714 | "iso6391": null, 1715 | "iso6392": "sco", 1716 | "name": "Scots" 1717 | }, 1718 | "hrv": { 1719 | "speakers": 21000000, 1720 | "iso6391": "hr", 1721 | "iso6392": "hrv", 1722 | "name": "Croatian" 1723 | }, 1724 | "shk": { 1725 | "speakers": 175000, 1726 | "iso6391": null, 1727 | "iso6392": null, 1728 | "name": "Shilluk" 1729 | }, 1730 | "shn": { 1731 | "speakers": 3000000, 1732 | "iso6391": null, 1733 | "iso6392": "shn", 1734 | "name": "Shan" 1735 | }, 1736 | "shp": { 1737 | "speakers": 15000, 1738 | "iso6391": null, 1739 | "iso6392": null, 1740 | "name": "Shipibo-Conibo" 1741 | }, 1742 | "sin": { 1743 | "speakers": 13218000, 1744 | "iso6391": "si", 1745 | "iso6392": "sin", 1746 | "name": "Sinhala" 1747 | }, 1748 | "bos": { 1749 | "speakers": 21000000, 1750 | "iso6391": "bs", 1751 | "iso6392": "bos", 1752 | "name": "Bosnian" 1753 | }, 1754 | "skr": { 1755 | "speakers": 15020000, 1756 | "iso6391": null, 1757 | "iso6392": null, 1758 | "name": "Seraiki" 1759 | }, 1760 | "slk": { 1761 | "speakers": 5606000, 1762 | "iso6391": "sk", 1763 | "iso6392": "slk", 1764 | "name": "Slovak" 1765 | }, 1766 | "slv": { 1767 | "speakers": 2218000, 1768 | "iso6391": "sl", 1769 | "iso6392": "slv", 1770 | "name": "Slovenian" 1771 | }, 1772 | "sah": { 1773 | "speakers": 363000, 1774 | "iso6391": null, 1775 | "iso6392": "sah", 1776 | "name": "Yakut" 1777 | }, 1778 | "smo": { 1779 | "speakers": 362000, 1780 | "iso6391": "sm", 1781 | "iso6392": "smo", 1782 | "name": "Samoan" 1783 | }, 1784 | "sna": { 1785 | "speakers": 7000000, 1786 | "iso6391": "sn", 1787 | "iso6392": "sna", 1788 | "name": "Shona" 1789 | }, 1790 | "snd": { 1791 | "speakers": 19675000, 1792 | "iso6391": "sd", 1793 | "iso6392": "snd", 1794 | "name": "Sindhi" 1795 | }, 1796 | "snk": { 1797 | "speakers": 1067000, 1798 | "iso6391": null, 1799 | "iso6392": "snk", 1800 | "name": "Soninke" 1801 | }, 1802 | "som": { 1803 | "speakers": 8335000, 1804 | "iso6391": "so", 1805 | "iso6392": "som", 1806 | "name": "Somali" 1807 | }, 1808 | "sot": { 1809 | "speakers": 4197000, 1810 | "iso6391": "st", 1811 | "iso6392": "sot", 1812 | "name": "Southern Sotho" 1813 | }, 1814 | "spa": { 1815 | "speakers": 332000000, 1816 | "iso6391": "es", 1817 | "iso6392": "spa", 1818 | "name": "Spanish" 1819 | }, 1820 | "src": { 1821 | "speakers": 1500000, 1822 | "iso6391": null, 1823 | "iso6392": null, 1824 | "name": "Logudorese Sardinian" 1825 | }, 1826 | "srp": { 1827 | "speakers": 21000000, 1828 | "iso6391": "sr", 1829 | "iso6392": "srp", 1830 | "name": "Serbian" 1831 | }, 1832 | "srr": { 1833 | "speakers": 868800, 1834 | "iso6391": null, 1835 | "iso6392": "srr", 1836 | "name": "Serer" 1837 | }, 1838 | "ssw": { 1839 | "speakers": 1670000, 1840 | "iso6391": "ss", 1841 | "iso6392": "ssw", 1842 | "name": "Swati" 1843 | }, 1844 | "suk": { 1845 | "speakers": 5000000, 1846 | "iso6391": null, 1847 | "iso6392": "suk", 1848 | "name": "Sukuma" 1849 | }, 1850 | "sun": { 1851 | "speakers": 27000000, 1852 | "iso6391": "su", 1853 | "iso6392": "sun", 1854 | "name": "Sundanese" 1855 | }, 1856 | "sus": { 1857 | "speakers": 923000, 1858 | "iso6391": null, 1859 | "iso6392": "sus", 1860 | "name": "Susu" 1861 | }, 1862 | "suz": { 1863 | "speakers": 37898, 1864 | "iso6391": null, 1865 | "iso6392": null, 1866 | "name": "Sunwar" 1867 | }, 1868 | "swb": { 1869 | "speakers": 97300, 1870 | "iso6391": null, 1871 | "iso6392": null, 1872 | "name": "Maore Comorian" 1873 | }, 1874 | "swe": { 1875 | "speakers": 9000000, 1876 | "iso6391": "sv", 1877 | "iso6392": "swe", 1878 | "name": "Swedish" 1879 | }, 1880 | "swh": { 1881 | "speakers": 30000000, 1882 | "iso6391": null, 1883 | "iso6392": null, 1884 | "name": "Swahili (individual language)" 1885 | }, 1886 | "sag": { 1887 | "speakers": 4900000, 1888 | "iso6391": "sg", 1889 | "iso6392": "sag", 1890 | "name": "Sango" 1891 | }, 1892 | "ton": { 1893 | "speakers": 123000, 1894 | "iso6391": "to", 1895 | "iso6392": "ton", 1896 | "name": "Tonga (Tonga Islands)" 1897 | }, 1898 | "taj": { 1899 | "speakers": 1353311, 1900 | "iso6391": null, 1901 | "iso6392": null, 1902 | "name": "Eastern Tamang" 1903 | }, 1904 | "tat": { 1905 | "speakers": 7000000, 1906 | "iso6391": "tt", 1907 | "iso6392": "tat", 1908 | "name": "Tatar" 1909 | }, 1910 | "tbz": { 1911 | "speakers": 120000, 1912 | "iso6391": null, 1913 | "iso6392": null, 1914 | "name": "Ditammari" 1915 | }, 1916 | "tca": { 1917 | "speakers": 25000, 1918 | "iso6391": null, 1919 | "iso6392": null, 1920 | "name": "Ticuna" 1921 | }, 1922 | "tel": { 1923 | "speakers": 73000000, 1924 | "iso6391": "te", 1925 | "iso6392": "tel", 1926 | "name": "Telugu" 1927 | }, 1928 | "tem": { 1929 | "speakers": 1200000, 1930 | "iso6391": null, 1931 | "iso6392": "tem", 1932 | "name": "Timne" 1933 | }, 1934 | "tet": { 1935 | "speakers": 600000, 1936 | "iso6391": null, 1937 | "iso6392": "tet", 1938 | "name": "Tetum" 1939 | }, 1940 | "tah": { 1941 | "speakers": 150000, 1942 | "iso6391": "ty", 1943 | "iso6392": "tah", 1944 | "name": "Tahitian" 1945 | }, 1946 | "tgl": { 1947 | "speakers": 14850000, 1948 | "iso6391": "tl", 1949 | "iso6392": "tgl", 1950 | "name": "Tagalog" 1951 | }, 1952 | "tha": { 1953 | "speakers": 21000000, 1954 | "iso6391": "th", 1955 | "iso6392": "tha", 1956 | "name": "Thai" 1957 | }, 1958 | "tir": { 1959 | "speakers": 6060000, 1960 | "iso6391": "ti", 1961 | "iso6392": "tir", 1962 | "name": "Tigrinya" 1963 | }, 1964 | "tiv": { 1965 | "speakers": 2000000, 1966 | "iso6391": null, 1967 | "iso6392": "tiv", 1968 | "name": "Tiv" 1969 | }, 1970 | "tob": { 1971 | "speakers": 20000, 1972 | "iso6391": null, 1973 | "iso6392": null, 1974 | "name": "Toba" 1975 | }, 1976 | "toi": { 1977 | "speakers": 1105000, 1978 | "iso6391": null, 1979 | "iso6392": null, 1980 | "name": "Tonga (Zambia)" 1981 | }, 1982 | "toj": { 1983 | "speakers": 36000, 1984 | "iso6391": null, 1985 | "iso6392": null, 1986 | "name": "Tojolabal" 1987 | }, 1988 | "taq": { 1989 | "speakers": 281200, 1990 | "iso6391": null, 1991 | "iso6392": null, 1992 | "name": "Tamasheq" 1993 | }, 1994 | "top": { 1995 | "speakers": 80000, 1996 | "iso6391": null, 1997 | "iso6392": null, 1998 | "name": "Papantla Totonac" 1999 | }, 2000 | "chk": { 2001 | "speakers": 45000, 2002 | "iso6391": null, 2003 | "iso6392": "chk", 2004 | "name": "Chuukese" 2005 | }, 2006 | "tsn": { 2007 | "speakers": 3932000, 2008 | "iso6391": "tn", 2009 | "iso6392": "tsn", 2010 | "name": "Tswana" 2011 | }, 2012 | "tso": { 2013 | "speakers": 1500000, 2014 | "iso6391": "ts", 2015 | "iso6392": "tso", 2016 | "name": "Tsonga" 2017 | }, 2018 | "ctd": { 2019 | "speakers": 344100, 2020 | "iso6391": null, 2021 | "iso6392": null, 2022 | "name": "Tedim Chin" 2023 | }, 2024 | "tuk": { 2025 | "speakers": 5397500, 2026 | "iso6391": "tk", 2027 | "iso6392": "tuk", 2028 | "name": "Turkmen" 2029 | }, 2030 | "tur": { 2031 | "speakers": 59000000, 2032 | "iso6391": "tr", 2033 | "iso6392": "tur", 2034 | "name": "Turkish" 2035 | }, 2036 | "tyv": { 2037 | "speakers": 200000, 2038 | "iso6391": null, 2039 | "iso6392": "tyv", 2040 | "name": "Tuvinian" 2041 | }, 2042 | "tzm": { 2043 | "speakers": 3000000, 2044 | "iso6391": null, 2045 | "iso6392": null, 2046 | "name": "Central Atlas Tamazight" 2047 | }, 2048 | "tam": { 2049 | "speakers": 62000000, 2050 | "iso6391": "ta", 2051 | "iso6392": "tam", 2052 | "name": "Tamil" 2053 | }, 2054 | "bod": { 2055 | "speakers": 6150000, 2056 | "iso6391": "bo", 2057 | "iso6392": "bod", 2058 | "name": "Tibetan" 2059 | }, 2060 | "ven": { 2061 | "speakers": 876409, 2062 | "iso6391": "ve", 2063 | "iso6392": "ven", 2064 | "name": "Venda" 2065 | }, 2066 | "ura": { 2067 | "speakers": 3500, 2068 | "iso6391": null, 2069 | "iso6392": null, 2070 | "name": "Urarina" 2071 | }, 2072 | "urd": { 2073 | "speakers": 54000000, 2074 | "iso6391": "ur", 2075 | "iso6392": "urd", 2076 | "name": "Urdu" 2077 | }, 2078 | "uzn": { 2079 | "speakers": 18386000, 2080 | "iso6391": null, 2081 | "iso6392": null, 2082 | "name": "Northern Uzbek" 2083 | }, 2084 | "ukr": { 2085 | "speakers": 41000000, 2086 | "iso6391": "uk", 2087 | "iso6392": "ukr", 2088 | "name": "Ukrainian" 2089 | }, 2090 | "uig": { 2091 | "speakers": 7464000, 2092 | "iso6391": "ug", 2093 | "iso6392": "uig", 2094 | "name": "Uighur" 2095 | }, 2096 | "unr": { 2097 | "speakers": 1560280, 2098 | "iso6391": null, 2099 | "iso6392": null, 2100 | "name": "Mundari" 2101 | }, 2102 | "vep": { 2103 | "speakers": 5800, 2104 | "iso6391": null, 2105 | "iso6392": null, 2106 | "name": "Veps" 2107 | }, 2108 | "vmw": { 2109 | "speakers": 2500000, 2110 | "iso6391": null, 2111 | "iso6392": null, 2112 | "name": "Makhuwa" 2113 | }, 2114 | "vai": { 2115 | "speakers": 119500, 2116 | "iso6391": null, 2117 | "iso6392": "vai", 2118 | "name": "Vai" 2119 | }, 2120 | "vie": { 2121 | "speakers": 66897000, 2122 | "iso6391": "vi", 2123 | "iso6392": "vie", 2124 | "name": "Vietnamese" 2125 | }, 2126 | "war": { 2127 | "speakers": 3000000, 2128 | "iso6391": null, 2129 | "iso6392": "war", 2130 | "name": "Waray (Philippines)" 2131 | }, 2132 | "wol": { 2133 | "speakers": 2700000, 2134 | "iso6391": "wo", 2135 | "iso6392": "wol", 2136 | "name": "Wolof" 2137 | }, 2138 | "hsb": { 2139 | "speakers": 70000, 2140 | "iso6391": null, 2141 | "iso6392": "hsb", 2142 | "name": "Upper Sorbian" 2143 | }, 2144 | "cym": { 2145 | "speakers": 580000, 2146 | "iso6391": "cy", 2147 | "iso6392": "cym", 2148 | "name": "Welsh" 2149 | }, 2150 | "xho": { 2151 | "speakers": 6858000, 2152 | "iso6391": "xh", 2153 | "iso6392": "xho", 2154 | "name": "Xhosa" 2155 | }, 2156 | "yao": { 2157 | "speakers": 1597000, 2158 | "iso6391": null, 2159 | "iso6392": "yao", 2160 | "name": "Yao" 2161 | }, 2162 | "yap": { 2163 | "speakers": 6592, 2164 | "iso6391": null, 2165 | "iso6392": "yap", 2166 | "name": "Yapese" 2167 | }, 2168 | "ydd": { 2169 | "speakers": 3000000, 2170 | "iso6391": null, 2171 | "iso6392": null, 2172 | "name": "Eastern Yiddish" 2173 | }, 2174 | "ykg": { 2175 | "speakers": 1100, 2176 | "iso6391": null, 2177 | "iso6392": null, 2178 | "name": "Northern Yukaghir" 2179 | }, 2180 | "yor": { 2181 | "speakers": 20000000, 2182 | "iso6391": "yo", 2183 | "iso6392": "yor", 2184 | "name": "Yoruba" 2185 | }, 2186 | "yrk": { 2187 | "speakers": 27273, 2188 | "iso6391": null, 2189 | "iso6392": null, 2190 | "name": "Nenets" 2191 | }, 2192 | "yua": { 2193 | "speakers": 700000, 2194 | "iso6391": null, 2195 | "iso6392": null, 2196 | "name": "Yucateco" 2197 | }, 2198 | "yad": { 2199 | "speakers": 4000, 2200 | "iso6391": null, 2201 | "iso6392": null, 2202 | "name": "Yagua" 2203 | }, 2204 | "zam": { 2205 | "speakers": 80000, 2206 | "iso6391": null, 2207 | "iso6392": null, 2208 | "name": "Miahuatlán Zapotec" 2209 | }, 2210 | "ztu": { 2211 | "speakers": 2000, 2212 | "iso6391": null, 2213 | "iso6392": null, 2214 | "name": "Güilá Zapotec" 2215 | }, 2216 | "zul": { 2217 | "speakers": 9140000, 2218 | "iso6391": "zu", 2219 | "iso6392": "zul", 2220 | "name": "Zulu" 2221 | } 2222 | } -------------------------------------------------------------------------------- /tests/testthat/support.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "speakers": 885000000, 4 | "name": "Mandarin Chinese", 5 | "iso6393": "cmn", 6 | "udhr": "cmn_hans", 7 | "script": "Han" 8 | }, 9 | { 10 | "speakers": 332000000, 11 | "name": "Spanish", 12 | "iso6393": "spa", 13 | "udhr": "spa", 14 | "script": "Latin" 15 | }, 16 | { 17 | "speakers": 322000000, 18 | "name": "English", 19 | "iso6393": "eng", 20 | "udhr": "eng", 21 | "script": "Latin" 22 | }, 23 | { 24 | "speakers": 288000000, 25 | "name": "Russian", 26 | "iso6393": "rus", 27 | "udhr": "rus", 28 | "script": "Cyrillic" 29 | }, 30 | { 31 | "speakers": 280000000, 32 | "name": "Standard Arabic", 33 | "iso6393": "arb", 34 | "udhr": "arb", 35 | "script": "Arabic" 36 | }, 37 | { 38 | "speakers": 196000000, 39 | "name": "Bengali", 40 | "iso6393": "ben", 41 | "udhr": "ben", 42 | "script": "Bengali" 43 | }, 44 | { 45 | "speakers": 182000000, 46 | "name": "Hindi", 47 | "iso6393": "hin", 48 | "udhr": "hin", 49 | "script": "Devanagari" 50 | }, 51 | { 52 | "speakers": 182000000, 53 | "name": "Portuguese", 54 | "iso6393": "por", 55 | "udhr": "por_PT", 56 | "script": "Latin" 57 | }, 58 | { 59 | "speakers": 140000000, 60 | "name": "Indonesian", 61 | "iso6393": "ind", 62 | "udhr": "ind", 63 | "script": "Latin" 64 | }, 65 | { 66 | "speakers": 125000000, 67 | "name": "Japanese", 68 | "iso6393": "jpn", 69 | "udhr": "jpn", 70 | "script": "Hiragana, Katakana, and Han" 71 | }, 72 | { 73 | "speakers": 124000000, 74 | "name": "French", 75 | "iso6393": "fra", 76 | "udhr": "fra", 77 | "script": "Latin" 78 | }, 79 | { 80 | "speakers": 121000000, 81 | "name": "German", 82 | "iso6393": "deu", 83 | "udhr": "deu_1996", 84 | "script": "Latin" 85 | }, 86 | { 87 | "speakers": 75500800, 88 | "name": "Javanese", 89 | "iso6393": "jav", 90 | "udhr": "jav", 91 | "script": "Latin" 92 | }, 93 | { 94 | "speakers": 75000000, 95 | "name": "Korean", 96 | "iso6393": "kor", 97 | "udhr": "kor", 98 | "script": "Hangul" 99 | }, 100 | { 101 | "speakers": 73000000, 102 | "name": "Telugu", 103 | "iso6393": "tel", 104 | "udhr": "tel", 105 | "script": "Telugu" 106 | }, 107 | { 108 | "speakers": 66897000, 109 | "name": "Vietnamese", 110 | "iso6393": "vie", 111 | "udhr": "vie", 112 | "script": "Latin" 113 | }, 114 | { 115 | "speakers": 64783000, 116 | "name": "Marathi", 117 | "iso6393": "mar", 118 | "udhr": "mar", 119 | "script": "Devanagari" 120 | }, 121 | { 122 | "speakers": 63000000, 123 | "name": "Italian", 124 | "iso6393": "ita", 125 | "udhr": "ita", 126 | "script": "Latin" 127 | }, 128 | { 129 | "speakers": 62000000, 130 | "name": "Tamil", 131 | "iso6393": "tam", 132 | "udhr": "tam", 133 | "script": "Tamil" 134 | }, 135 | { 136 | "speakers": 59000000, 137 | "name": "Turkish", 138 | "iso6393": "tur", 139 | "udhr": "tur", 140 | "script": "Latin" 141 | }, 142 | { 143 | "speakers": 54000000, 144 | "name": "Urdu", 145 | "iso6393": "urd", 146 | "udhr": "urd", 147 | "script": "Arabic" 148 | }, 149 | { 150 | "speakers": 44000000, 151 | "name": "Gujarati", 152 | "iso6393": "guj", 153 | "udhr": "guj", 154 | "script": "Gujarati" 155 | }, 156 | { 157 | "speakers": 44000000, 158 | "name": "Polish", 159 | "iso6393": "pol", 160 | "udhr": "pol", 161 | "script": "Latin" 162 | }, 163 | { 164 | "speakers": 41000000, 165 | "name": "Ukrainian", 166 | "iso6393": "ukr", 167 | "udhr": "ukr", 168 | "script": "Cyrillic" 169 | }, 170 | { 171 | "speakers": 34700000, 172 | "name": "Maithili", 173 | "iso6393": "mai", 174 | "udhr": "mai", 175 | "script": "Devanagari" 176 | }, 177 | { 178 | "speakers": 34014000, 179 | "name": "Malayalam", 180 | "iso6393": "mal", 181 | "udhr": "mal", 182 | "script": "Malayalam" 183 | }, 184 | { 185 | "speakers": 33663000, 186 | "name": "Kannada", 187 | "iso6393": "kan", 188 | "udhr": "kan", 189 | "script": "Kannada" 190 | }, 191 | { 192 | "speakers": 31000000, 193 | "name": "Burmese", 194 | "iso6393": "mya", 195 | "udhr": "mya", 196 | "script": "Myanmar" 197 | }, 198 | { 199 | "speakers": 31000000, 200 | "name": "Oriya (macrolanguage)", 201 | "iso6393": "ori", 202 | "udhr": "ori", 203 | "script": "Oriya" 204 | }, 205 | { 206 | "speakers": 30000000, 207 | "name": "Borana-Arsi-Guji Oromo", 208 | "iso6393": "gax", 209 | "udhr": "gax", 210 | "script": "Latin" 211 | }, 212 | { 213 | "speakers": 30000000, 214 | "name": "Swahili (individual language)", 215 | "iso6393": "swh", 216 | "udhr": "swh", 217 | "script": "Latin" 218 | }, 219 | { 220 | "speakers": 27000000, 221 | "name": "Sundanese", 222 | "iso6393": "sun", 223 | "udhr": "sun", 224 | "script": "Latin" 225 | }, 226 | { 227 | "speakers": 26000000, 228 | "name": "Romanian", 229 | "iso6393": "ron", 230 | "udhr": "ron_2006", 231 | "script": "Latin" 232 | }, 233 | { 234 | "speakers": 25700000, 235 | "name": "Panjabi", 236 | "iso6393": "pan", 237 | "udhr": "pan", 238 | "script": "Gurmukhi" 239 | }, 240 | { 241 | "speakers": 25000000, 242 | "name": "Bhojpuri", 243 | "iso6393": "bho", 244 | "udhr": "bho", 245 | "script": "Devanagari" 246 | }, 247 | { 248 | "speakers": 23000000, 249 | "name": "Amharic", 250 | "iso6393": "amh", 251 | "udhr": "amh", 252 | "script": "Ethiopic" 253 | }, 254 | { 255 | "speakers": 22000000, 256 | "name": "Pulaar", 257 | "iso6393": "fuc", 258 | "udhr": "fuc", 259 | "script": "Latin" 260 | }, 261 | { 262 | "speakers": 22000000, 263 | "name": "Hausa", 264 | "iso6393": "hau", 265 | "udhr": "hau_NG", 266 | "script": "Latin" 267 | }, 268 | { 269 | "speakers": 21000000, 270 | "name": "Bosnian", 271 | "iso6393": "bos", 272 | "udhr": "bos_latn", 273 | "script": "Latin" 274 | }, 275 | { 276 | "speakers": 21000000, 277 | "name": "Bosnian", 278 | "iso6393": "bos", 279 | "udhr": "bos_cyrl", 280 | "script": "Cyrillic" 281 | }, 282 | { 283 | "speakers": 21000000, 284 | "name": "Croatian", 285 | "iso6393": "hrv", 286 | "udhr": "hrv", 287 | "script": "Latin" 288 | }, 289 | { 290 | "speakers": 21000000, 291 | "name": "Dutch", 292 | "iso6393": "nld", 293 | "udhr": "nld", 294 | "script": "Latin" 295 | }, 296 | { 297 | "speakers": 21000000, 298 | "name": "Serbian", 299 | "iso6393": "srp", 300 | "udhr": "srp_latn", 301 | "script": "Latin" 302 | }, 303 | { 304 | "speakers": 21000000, 305 | "name": "Serbian", 306 | "iso6393": "srp", 307 | "udhr": "srp_cyrl", 308 | "script": "Cyrillic" 309 | }, 310 | { 311 | "speakers": 21000000, 312 | "name": "Thai", 313 | "iso6393": "tha", 314 | "udhr": "tha", 315 | "script": "Thai" 316 | }, 317 | { 318 | "speakers": 20000000, 319 | "name": "Central Kurdish", 320 | "iso6393": "ckb", 321 | "udhr": "ckb", 322 | "script": "Latin" 323 | }, 324 | { 325 | "speakers": 20000000, 326 | "name": "Yoruba", 327 | "iso6393": "yor", 328 | "udhr": "yor", 329 | "script": "Latin" 330 | }, 331 | { 332 | "speakers": 18386000, 333 | "name": "Northern Uzbek", 334 | "iso6393": "uzn", 335 | "udhr": "uzn_latn", 336 | "script": "Latin" 337 | }, 338 | { 339 | "speakers": 18386000, 340 | "name": "Northern Uzbek", 341 | "iso6393": "uzn", 342 | "udhr": "uzn_cyrl", 343 | "script": "Cyrillic" 344 | }, 345 | { 346 | "speakers": 17000000, 347 | "name": "Igbo", 348 | "iso6393": "ibo", 349 | "udhr": "ibo", 350 | "script": "Latin" 351 | }, 352 | { 353 | "speakers": 16200000, 354 | "name": "Nepali (macrolanguage)", 355 | "iso6393": "nep", 356 | "udhr": "nep", 357 | "script": "Devanagari" 358 | }, 359 | { 360 | "speakers": 15230000, 361 | "name": "Cebuano", 362 | "iso6393": "ceb", 363 | "udhr": "ceb", 364 | "script": "Latin" 365 | }, 366 | { 367 | "speakers": 15020000, 368 | "name": "Seraiki", 369 | "iso6393": "skr", 370 | "udhr": "skr", 371 | "script": "Arabic" 372 | }, 373 | { 374 | "speakers": 14850000, 375 | "name": "Tagalog", 376 | "iso6393": "tgl", 377 | "udhr": "tgl", 378 | "script": "Latin" 379 | }, 380 | { 381 | "speakers": 14500000, 382 | "name": "Hungarian", 383 | "iso6393": "hun", 384 | "udhr": "hun", 385 | "script": "Latin" 386 | }, 387 | { 388 | "speakers": 13869000, 389 | "name": "North Azerbaijani", 390 | "iso6393": "azj", 391 | "udhr": "azj_cyrl", 392 | "script": "Cyrillic" 393 | }, 394 | { 395 | "speakers": 13869000, 396 | "name": "North Azerbaijani", 397 | "iso6393": "azj", 398 | "udhr": "azj_latn", 399 | "script": "Latin" 400 | }, 401 | { 402 | "speakers": 13218000, 403 | "name": "Sinhala", 404 | "iso6393": "sin", 405 | "udhr": "sin", 406 | "script": "Sinhala" 407 | }, 408 | { 409 | "speakers": 12258540, 410 | "name": "Modern Greek (1453-)", 411 | "iso6393": "ell", 412 | "udhr": "ell_monotonic", 413 | "script": "Greek" 414 | }, 415 | { 416 | "speakers": 12000000, 417 | "name": "Czech", 418 | "iso6393": "ces", 419 | "udhr": "ces", 420 | "script": "Latin" 421 | }, 422 | { 423 | "speakers": 10821000, 424 | "name": "Magahi", 425 | "iso6393": "mag", 426 | "udhr": "mag", 427 | "script": "Devanagari" 428 | }, 429 | { 430 | "speakers": 10200000, 431 | "name": "Belarusian", 432 | "iso6393": "bel", 433 | "udhr": "bel", 434 | "script": "Cyrillic" 435 | }, 436 | { 437 | "speakers": 10156900, 438 | "name": "Plateau Malagasy", 439 | "iso6393": "plt", 440 | "udhr": "plt", 441 | "script": "Latin" 442 | }, 443 | { 444 | "speakers": 10000000, 445 | "name": "Madurese", 446 | "iso6393": "mad", 447 | "udhr": "mad", 448 | "script": "Latin" 449 | }, 450 | { 451 | "speakers": 10000000, 452 | "name": "Nyanja", 453 | "iso6393": "nya", 454 | "udhr": "nya_chinyanja", 455 | "script": "Latin" 456 | }, 457 | { 458 | "speakers": 10000000, 459 | "name": "Chimborazo Highland Quichua", 460 | "iso6393": "qug", 461 | "udhr": "qug", 462 | "script": "Latin" 463 | }, 464 | { 465 | "speakers": 9306800, 466 | "name": "Kinyarwanda", 467 | "iso6393": "kin", 468 | "udhr": "kin", 469 | "script": "Latin" 470 | }, 471 | { 472 | "speakers": 9140000, 473 | "name": "Zulu", 474 | "iso6393": "zul", 475 | "udhr": "zul", 476 | "script": "Latin" 477 | }, 478 | { 479 | "speakers": 9000000, 480 | "name": "Bulgarian", 481 | "iso6393": "bul", 482 | "udhr": "bul", 483 | "script": "Cyrillic" 484 | }, 485 | { 486 | "speakers": 9000000, 487 | "name": "Swedish", 488 | "iso6393": "swe", 489 | "udhr": "swe", 490 | "script": "Latin" 491 | }, 492 | { 493 | "speakers": 8400000, 494 | "name": "Lingala", 495 | "iso6393": "lin", 496 | "udhr": "lin", 497 | "script": "Latin" 498 | }, 499 | { 500 | "speakers": 8335000, 501 | "name": "Somali", 502 | "iso6393": "som", 503 | "udhr": "som", 504 | "script": "Latin" 505 | }, 506 | { 507 | "speakers": 8200000, 508 | "name": "Southern Qiandong Miao", 509 | "iso6393": "hms", 510 | "udhr": "hms", 511 | "script": "Latin" 512 | }, 513 | { 514 | "speakers": 8000000, 515 | "name": "Iloko", 516 | "iso6393": "ilo", 517 | "udhr": "ilo", 518 | "script": "Latin" 519 | }, 520 | { 521 | "speakers": 8000000, 522 | "name": "Kazakh", 523 | "iso6393": "kaz", 524 | "udhr": "kaz", 525 | "script": "Cyrillic" 526 | }, 527 | { 528 | "speakers": 7464000, 529 | "name": "Uighur", 530 | "iso6393": "uig", 531 | "udhr": "uig_arab", 532 | "script": "Arabic" 533 | }, 534 | { 535 | "speakers": 7464000, 536 | "name": "Uighur", 537 | "iso6393": "uig", 538 | "udhr": "uig_latn", 539 | "script": "Latin" 540 | }, 541 | { 542 | "speakers": 7382000, 543 | "name": "Haitian", 544 | "iso6393": "hat", 545 | "udhr": "hat_popular", 546 | "script": "Latin" 547 | }, 548 | { 549 | "speakers": 7063200, 550 | "name": "Central Khmer", 551 | "iso6393": "khm", 552 | "udhr": "khm", 553 | "script": "Khmer" 554 | }, 555 | { 556 | "speakers": 7000000, 557 | "name": "Akan", 558 | "iso6393": "aka", 559 | "udhr": "aka_asante", 560 | "script": "Latin" 561 | }, 562 | { 563 | "speakers": 7000000, 564 | "name": "Akan", 565 | "iso6393": "aka", 566 | "udhr": "aka_fante", 567 | "script": "Latin" 568 | }, 569 | { 570 | "speakers": 7000000, 571 | "name": "Hiligaynon", 572 | "iso6393": "hil", 573 | "udhr": "hil", 574 | "script": "Latin" 575 | }, 576 | { 577 | "speakers": 7000000, 578 | "name": "Iranian Persian", 579 | "iso6393": "pes", 580 | "udhr": "pes_1", 581 | "script": "Arabic" 582 | }, 583 | { 584 | "speakers": 7000000, 585 | "name": "Shona", 586 | "iso6393": "sna", 587 | "udhr": "sna", 588 | "script": "Latin" 589 | }, 590 | { 591 | "speakers": 7000000, 592 | "name": "Tatar", 593 | "iso6393": "tat", 594 | "udhr": "tat", 595 | "script": "Cyrillic" 596 | }, 597 | { 598 | "speakers": 6858000, 599 | "name": "Xhosa", 600 | "iso6393": "xho", 601 | "udhr": "xho", 602 | "script": "Latin" 603 | }, 604 | { 605 | "speakers": 6836000, 606 | "name": "Armenian", 607 | "iso6393": "hye", 608 | "udhr": "hye", 609 | "script": "Armenian" 610 | }, 611 | { 612 | "speakers": 6500000, 613 | "name": "Minangkabau", 614 | "iso6393": "min", 615 | "udhr": "min", 616 | "script": "Latin" 617 | }, 618 | { 619 | "speakers": 6365000, 620 | "name": "Afrikaans", 621 | "iso6393": "afr", 622 | "udhr": "afr", 623 | "script": "Latin" 624 | }, 625 | { 626 | "speakers": 6300000, 627 | "name": "Luba-Lulua", 628 | "iso6393": "lua", 629 | "udhr": "lua", 630 | "script": "Latin" 631 | }, 632 | { 633 | "speakers": 6218900, 634 | "name": "Santali", 635 | "iso6393": "sat", 636 | "udhr": "sat", 637 | "script": "Ol_Chiki" 638 | }, 639 | { 640 | "speakers": 6150000, 641 | "name": "Tibetan", 642 | "iso6393": "bod", 643 | "udhr": "bod", 644 | "script": "Tibetan" 645 | }, 646 | { 647 | "speakers": 6060000, 648 | "name": "Tigrinya", 649 | "iso6393": "tir", 650 | "udhr": "tir", 651 | "script": "Ethiopic" 652 | }, 653 | { 654 | "speakers": 6000000, 655 | "name": "Finnish", 656 | "iso6393": "fin", 657 | "udhr": "fin", 658 | "script": "Latin" 659 | }, 660 | { 661 | "speakers": 6000000, 662 | "name": "Rundi", 663 | "iso6393": "run", 664 | "udhr": "run", 665 | "script": "Latin" 666 | }, 667 | { 668 | "speakers": 5606000, 669 | "name": "Slovak", 670 | "iso6393": "slk", 671 | "udhr": "slk", 672 | "script": "Latin" 673 | }, 674 | { 675 | "speakers": 5397500, 676 | "name": "Turkmen", 677 | "iso6393": "tuk", 678 | "udhr": "tuk_cyrl", 679 | "script": "Cyrillic" 680 | }, 681 | { 682 | "speakers": 5397500, 683 | "name": "Turkmen", 684 | "iso6393": "tuk", 685 | "udhr": "tuk_latn", 686 | "script": "Latin" 687 | }, 688 | { 689 | "speakers": 5292000, 690 | "name": "Danish", 691 | "iso6393": "dan", 692 | "udhr": "dan", 693 | "script": "Latin" 694 | }, 695 | { 696 | "speakers": 5000000, 697 | "name": "Tosk Albanian", 698 | "iso6393": "als", 699 | "udhr": "als", 700 | "script": "Latin" 701 | }, 702 | { 703 | "speakers": 5000000, 704 | "name": "Norwegian Bokmål", 705 | "iso6393": "nob", 706 | "udhr": "nob", 707 | "script": "Latin" 708 | }, 709 | { 710 | "speakers": 5000000, 711 | "name": "Sukuma", 712 | "iso6393": "suk", 713 | "udhr": "suk", 714 | "script": "Latin" 715 | }, 716 | { 717 | "speakers": 4900000, 718 | "name": "Sango", 719 | "iso6393": "sag", 720 | "udhr": "sag", 721 | "script": "Latin" 722 | }, 723 | { 724 | "speakers": 4700000, 725 | "name": "Norwegian Nynorsk", 726 | "iso6393": "nno", 727 | "udhr": "nno", 728 | "script": "Latin" 729 | }, 730 | { 731 | "speakers": 4612000, 732 | "name": "Hebrew", 733 | "iso6393": "heb", 734 | "udhr": "heb", 735 | "script": "Hebrew" 736 | }, 737 | { 738 | "speakers": 4600000, 739 | "name": "Mossi", 740 | "iso6393": "mos", 741 | "udhr": "mos", 742 | "script": "Latin" 743 | }, 744 | { 745 | "speakers": 4380000, 746 | "name": "Tajik", 747 | "iso6393": "tgk", 748 | "udhr": "tgk", 749 | "script": "Cyrillic" 750 | }, 751 | { 752 | "speakers": 4353000, 753 | "name": "Catalan", 754 | "iso6393": "cat", 755 | "udhr": "cat", 756 | "script": "Latin" 757 | }, 758 | { 759 | "speakers": 4197000, 760 | "name": "Southern Sotho", 761 | "iso6393": "sot", 762 | "udhr": "sot", 763 | "script": "Latin" 764 | }, 765 | { 766 | "speakers": 4103000, 767 | "name": "Georgian", 768 | "iso6393": "kat", 769 | "udhr": "kat", 770 | "script": "Georgian" 771 | }, 772 | { 773 | "speakers": 4000000, 774 | "name": "Central Bikol", 775 | "iso6393": "bcl", 776 | "udhr": "bcl", 777 | "script": "Latin" 778 | }, 779 | { 780 | "speakers": 4000000, 781 | "name": "Galician", 782 | "iso6393": "glg", 783 | "udhr": "glg", 784 | "script": "Latin" 785 | }, 786 | { 787 | "speakers": 4000000, 788 | "name": "Lithuanian", 789 | "iso6393": "lit", 790 | "udhr": "lit", 791 | "script": "Latin" 792 | }, 793 | { 794 | "speakers": 4000000, 795 | "name": "Lao", 796 | "iso6393": "lao", 797 | "udhr": "lao", 798 | "script": "Lao" 799 | }, 800 | { 801 | "speakers": 4000000, 802 | "name": "Umbundu", 803 | "iso6393": "umb", 804 | "udhr": "umb", 805 | "script": "Latin" 806 | }, 807 | { 808 | "speakers": 3932000, 809 | "name": "Tswana", 810 | "iso6393": "tsn", 811 | "udhr": "tsn", 812 | "script": "Latin" 813 | }, 814 | { 815 | "speakers": 3851000, 816 | "name": "Pedi", 817 | "iso6393": "nso", 818 | "udhr": "nso", 819 | "script": "Latin" 820 | }, 821 | { 822 | "speakers": 3800000, 823 | "name": "Balinese", 824 | "iso6393": "ban", 825 | "udhr": "ban", 826 | "script": "Latin" 827 | }, 828 | { 829 | "speakers": 3500000, 830 | "name": "Buginese", 831 | "iso6393": "bug", 832 | "udhr": "bug", 833 | "script": "Latin" 834 | }, 835 | { 836 | "speakers": 3500000, 837 | "name": "Central Kanuri", 838 | "iso6393": "knc", 839 | "udhr": "knc", 840 | "script": "Latin" 841 | }, 842 | { 843 | "speakers": 3186000, 844 | "name": "Ibibio", 845 | "iso6393": "ibb", 846 | "udhr": "ibb", 847 | "script": "Latin" 848 | }, 849 | { 850 | "speakers": 3015980, 851 | "name": "Ganda", 852 | "iso6393": "lug", 853 | "udhr": "lug", 854 | "script": "Latin" 855 | }, 856 | { 857 | "speakers": 3000000, 858 | "name": "Achinese", 859 | "iso6393": "ace", 860 | "udhr": "ace", 861 | "script": "Latin" 862 | }, 863 | { 864 | "speakers": 3000000, 865 | "name": "Bambara", 866 | "iso6393": "bam", 867 | "udhr": "bam", 868 | "script": "Latin" 869 | }, 870 | { 871 | "speakers": 3000000, 872 | "name": "Kimbundu", 873 | "iso6393": "kmb", 874 | "udhr": "kmb", 875 | "script": "Latin" 876 | }, 877 | { 878 | "speakers": 3000000, 879 | "name": "Lunda", 880 | "iso6393": "lun", 881 | "udhr": "lun", 882 | "script": "Latin" 883 | }, 884 | { 885 | "speakers": 3000000, 886 | "name": "Central Atlas Tamazight", 887 | "iso6393": "tzm", 888 | "udhr": "tzm", 889 | "script": "Latin" 890 | }, 891 | { 892 | "speakers": 3000000, 893 | "name": "Waray (Philippines)", 894 | "iso6393": "war", 895 | "udhr": "war", 896 | "script": "Latin" 897 | }, 898 | { 899 | "speakers": 3000000, 900 | "name": "Eastern Yiddish", 901 | "iso6393": "ydd", 902 | "udhr": "ydd", 903 | "script": "Hebrew" 904 | }, 905 | { 906 | "speakers": 2700000, 907 | "name": "Wolof", 908 | "iso6393": "wol", 909 | "udhr": "wol", 910 | "script": "Latin" 911 | }, 912 | { 913 | "speakers": 2631420, 914 | "name": "Kirghiz", 915 | "iso6393": "kir", 916 | "udhr": "kir", 917 | "script": "Cyrillic" 918 | }, 919 | { 920 | "speakers": 2600000, 921 | "name": "Low German", 922 | "iso6393": "nds", 923 | "udhr": "nds", 924 | "script": "Latin" 925 | }, 926 | { 927 | "speakers": 2500000, 928 | "name": "Macedonian", 929 | "iso6393": "mkd", 930 | "udhr": "mkd", 931 | "script": "Cyrillic" 932 | }, 933 | { 934 | "speakers": 2500000, 935 | "name": "Makhuwa", 936 | "iso6393": "vmw", 937 | "udhr": "vmw", 938 | "script": "Latin" 939 | }, 940 | { 941 | "speakers": 2477600, 942 | "name": "Ewe", 943 | "iso6393": "ewe", 944 | "udhr": "ewe", 945 | "script": "Latin" 946 | }, 947 | { 948 | "speakers": 2330000, 949 | "name": "Halh Mongolian", 950 | "iso6393": "khk", 951 | "udhr": "khk", 952 | "script": "Cyrillic" 953 | }, 954 | { 955 | "speakers": 2218000, 956 | "name": "Slovenian", 957 | "iso6393": "slv", 958 | "udhr": "slv", 959 | "script": "Latin" 960 | }, 961 | { 962 | "speakers": 2200000, 963 | "name": "Central Aymara", 964 | "iso6393": "ayr", 965 | "udhr": "ayr", 966 | "script": "Latin" 967 | }, 968 | { 969 | "speakers": 2150000, 970 | "name": "Bemba (Zambia)", 971 | "iso6393": "bem", 972 | "udhr": "bem", 973 | "script": "Latin" 974 | }, 975 | { 976 | "speakers": 2140300, 977 | "name": "Eastern Maninkakan", 978 | "iso6393": "emk", 979 | "udhr": "emk", 980 | "script": "Latin" 981 | }, 982 | { 983 | "speakers": 2130000, 984 | "name": "Baoulé", 985 | "iso6393": "bci", 986 | "udhr": "bci", 987 | "script": "Latin" 988 | }, 989 | { 990 | "speakers": 2000000, 991 | "name": "Esperanto", 992 | "iso6393": "epo", 993 | "udhr": "epo", 994 | "script": "Latin" 995 | }, 996 | { 997 | "speakers": 2000000, 998 | "name": "Pampanga", 999 | "iso6393": "pam", 1000 | "udhr": "pam", 1001 | "script": "Latin" 1002 | }, 1003 | { 1004 | "speakers": 2000000, 1005 | "name": "Tiv", 1006 | "iso6393": "tiv", 1007 | "udhr": "tiv", 1008 | "script": "Latin" 1009 | }, 1010 | { 1011 | "speakers": 2000000, 1012 | "name": "Tok Pisin", 1013 | "iso6393": "tpi", 1014 | "udhr": "tpi", 1015 | "script": "Latin" 1016 | }, 1017 | { 1018 | "speakers": 1670000, 1019 | "name": "Swati", 1020 | "iso6393": "ssw", 1021 | "udhr": "ssw", 1022 | "script": "Latin" 1023 | }, 1024 | { 1025 | "speakers": 1643193, 1026 | "name": "Nyankole", 1027 | "iso6393": "nyn", 1028 | "udhr": "nyn", 1029 | "script": "Latin" 1030 | }, 1031 | { 1032 | "speakers": 1600000, 1033 | "name": "Sichuan Yi", 1034 | "iso6393": "iii", 1035 | "udhr": "iii", 1036 | "script": "Yi" 1037 | }, 1038 | { 1039 | "speakers": 1597000, 1040 | "name": "Yao", 1041 | "iso6393": "yao", 1042 | "udhr": "yao", 1043 | "script": "Latin" 1044 | }, 1045 | { 1046 | "speakers": 1550000, 1047 | "name": "Latvian", 1048 | "iso6393": "lav", 1049 | "udhr": "lav", 1050 | "script": "Latin" 1051 | }, 1052 | { 1053 | "speakers": 1500000, 1054 | "name": "Cusco Quechua", 1055 | "iso6393": "quz", 1056 | "udhr": "quz", 1057 | "script": "Latin" 1058 | }, 1059 | { 1060 | "speakers": 1500000, 1061 | "name": "Vlax Romani", 1062 | "iso6393": "rmy", 1063 | "udhr": "rmy", 1064 | "script": "Latin" 1065 | }, 1066 | { 1067 | "speakers": 1500000, 1068 | "name": "Logudorese Sardinian", 1069 | "iso6393": "src", 1070 | "udhr": "src", 1071 | "script": "Latin" 1072 | }, 1073 | { 1074 | "speakers": 1500000, 1075 | "name": "Scots", 1076 | "iso6393": "sco", 1077 | "udhr": "sco", 1078 | "script": "Latin" 1079 | }, 1080 | { 1081 | "speakers": 1500000, 1082 | "name": "Tsonga", 1083 | "iso6393": "tso", 1084 | "udhr": "tso_MZ", 1085 | "script": "Latin" 1086 | }, 1087 | { 1088 | "speakers": 1480000, 1089 | "name": "Mende (Sierra Leone)", 1090 | "iso6393": "men", 1091 | "udhr": "men", 1092 | "script": "Latin" 1093 | }, 1094 | { 1095 | "speakers": 1436000, 1096 | "name": "Fon", 1097 | "iso6393": "fon", 1098 | "udhr": "fon", 1099 | "script": "Latin" 1100 | }, 1101 | { 1102 | "speakers": 1376898, 1103 | "name": "Central Nahuatl", 1104 | "iso6393": "nhn", 1105 | "udhr": "nhn", 1106 | "script": "Latin" 1107 | }, 1108 | { 1109 | "speakers": 1350000, 1110 | "name": "Northeastern Dinka", 1111 | "iso6393": "dip", 1112 | "udhr": "dip", 1113 | "script": "Latin" 1114 | }, 1115 | { 1116 | "speakers": 1260000, 1117 | "name": "Makonde", 1118 | "iso6393": "kde", 1119 | "udhr": "kde", 1120 | "script": "Latin" 1121 | }, 1122 | { 1123 | "speakers": 1240000, 1124 | "name": "Siona", 1125 | "iso6393": "snn", 1126 | "udhr": "snn", 1127 | "script": "Latin" 1128 | }, 1129 | { 1130 | "speakers": 1200000, 1131 | "name": "Kabiyè", 1132 | "iso6393": "kbp", 1133 | "udhr": "kbp", 1134 | "script": "Latin" 1135 | }, 1136 | { 1137 | "speakers": 1200000, 1138 | "name": "Timne", 1139 | "iso6393": "tem", 1140 | "udhr": "tem", 1141 | "script": "Latin" 1142 | }, 1143 | { 1144 | "speakers": 1105000, 1145 | "name": "Tonga (Zambia)", 1146 | "iso6393": "toi", 1147 | "udhr": "toi", 1148 | "script": "Latin" 1149 | }, 1150 | { 1151 | "speakers": 1100000, 1152 | "name": "Estonian", 1153 | "iso6393": "est", 1154 | "udhr": "est", 1155 | "script": "Latin" 1156 | }, 1157 | { 1158 | "speakers": 1067000, 1159 | "name": "Soninke", 1160 | "iso6393": "snk", 1161 | "udhr": "snk", 1162 | "script": "Latin" 1163 | }, 1164 | { 1165 | "speakers": 1004000, 1166 | "name": "Chokwe", 1167 | "iso6393": "cjk", 1168 | "udhr": "cjk", 1169 | "script": "Latin" 1170 | }, 1171 | { 1172 | "speakers": 1000000, 1173 | "name": "Assyrian Neo-Aramaic", 1174 | "iso6393": "aii", 1175 | "udhr": "aii", 1176 | "script": "Syriac" 1177 | }, 1178 | { 1179 | "speakers": 1000000, 1180 | "name": "Adangme", 1181 | "iso6393": "ada", 1182 | "udhr": "ada", 1183 | "script": "Latin" 1184 | }, 1185 | { 1186 | "speakers": 1000000, 1187 | "name": "Bini", 1188 | "iso6393": "bin", 1189 | "udhr": "bin", 1190 | "script": "Latin" 1191 | }, 1192 | { 1193 | "speakers": 1000000, 1194 | "name": "Ga", 1195 | "iso6393": "gaa", 1196 | "udhr": "gaa", 1197 | "script": "Latin" 1198 | }, 1199 | { 1200 | "speakers": 1000000, 1201 | "name": "Koongo", 1202 | "iso6393": "kng", 1203 | "udhr": "kng", 1204 | "script": "Latin" 1205 | }, 1206 | { 1207 | "speakers": 1000000, 1208 | "name": "Ndonga", 1209 | "iso6393": "ndo", 1210 | "udhr": "ndo", 1211 | "script": "Latin" 1212 | }, 1213 | { 1214 | "speakers": 1000000, 1215 | "name": "Ayacucho Quechua", 1216 | "iso6393": "quy", 1217 | "udhr": "quy", 1218 | "script": "Latin" 1219 | }, 1220 | { 1221 | "speakers": 1000000, 1222 | "name": "Balkan Romani", 1223 | "iso6393": "rmn", 1224 | "udhr": "rmn", 1225 | "script": "Latin" 1226 | }, 1227 | { 1228 | "speakers": 926000, 1229 | "name": "Nyamwezi", 1230 | "iso6393": "nym", 1231 | "udhr": "nym", 1232 | "script": "Latin" 1233 | }, 1234 | { 1235 | "speakers": 923000, 1236 | "name": "Susu", 1237 | "iso6393": "sus", 1238 | "udhr": "sus", 1239 | "script": "Latin" 1240 | }, 1241 | { 1242 | "speakers": 876409, 1243 | "name": "Venda", 1244 | "iso6393": "ven", 1245 | "udhr": "ven", 1246 | "script": "Latin" 1247 | }, 1248 | { 1249 | "speakers": 868800, 1250 | "name": "Serer", 1251 | "iso6393": "srr", 1252 | "udhr": "srr", 1253 | "script": "Latin" 1254 | }, 1255 | { 1256 | "speakers": 865000, 1257 | "name": "Khasi", 1258 | "iso6393": "kha", 1259 | "udhr": "kha", 1260 | "script": "Latin" 1261 | }, 1262 | { 1263 | "speakers": 820000, 1264 | "name": "Northern Qiandong Miao", 1265 | "iso6393": "hea", 1266 | "udhr": "hea", 1267 | "script": "Latin" 1268 | }, 1269 | { 1270 | "speakers": 808000, 1271 | "name": "Guinea Kpelle", 1272 | "iso6393": "gkp", 1273 | "udhr": "gkp", 1274 | "script": "Latin" 1275 | }, 1276 | { 1277 | "speakers": 747000, 1278 | "name": "Hani", 1279 | "iso6393": "hni", 1280 | "udhr": "hni", 1281 | "script": "Latin" 1282 | }, 1283 | { 1284 | "speakers": 700000, 1285 | "name": "Yucateco", 1286 | "iso6393": "yua", 1287 | "udhr": "yua", 1288 | "script": "Latin" 1289 | }, 1290 | { 1291 | "speakers": 650000, 1292 | "name": "Fijian", 1293 | "iso6393": "fij", 1294 | "udhr": "fij", 1295 | "script": "Latin" 1296 | }, 1297 | { 1298 | "speakers": 600000, 1299 | "name": "Friulian", 1300 | "iso6393": "fur", 1301 | "udhr": "fur", 1302 | "script": "Latin" 1303 | }, 1304 | { 1305 | "speakers": 600000, 1306 | "name": "Tetum", 1307 | "iso6393": "tet", 1308 | "udhr": "tet", 1309 | "script": "Latin" 1310 | }, 1311 | { 1312 | "speakers": 600000, 1313 | "name": "Walloon", 1314 | "iso6393": "wln", 1315 | "udhr": "wln", 1316 | "script": "Latin" 1317 | }, 1318 | { 1319 | "speakers": 588000, 1320 | "name": "Basque", 1321 | "iso6393": "eus", 1322 | "udhr": "eus", 1323 | "script": "Latin" 1324 | }, 1325 | { 1326 | "speakers": 588000, 1327 | "name": "South Ndebele", 1328 | "iso6393": "nbl", 1329 | "udhr": "nbl", 1330 | "script": "Latin" 1331 | }, 1332 | { 1333 | "speakers": 588000, 1334 | "name": "Ossetian", 1335 | "iso6393": "oss", 1336 | "udhr": "oss", 1337 | "script": "Cyrillic" 1338 | }, 1339 | { 1340 | "speakers": 580000, 1341 | "name": "Welsh", 1342 | "iso6393": "cym", 1343 | "udhr": "cym", 1344 | "script": "Latin" 1345 | }, 1346 | { 1347 | "speakers": 580000, 1348 | "name": "Upper Guinea Crioulo", 1349 | "iso6393": "pov", 1350 | "udhr": "pov", 1351 | "script": "Latin" 1352 | }, 1353 | { 1354 | "speakers": 541750, 1355 | "name": "Lushai", 1356 | "iso6393": "lus", 1357 | "udhr": "lus", 1358 | "script": "Latin" 1359 | }, 1360 | { 1361 | "speakers": 540000, 1362 | "name": "Dagbani", 1363 | "iso6393": "dag", 1364 | "udhr": "dag", 1365 | "script": "Latin" 1366 | }, 1367 | { 1368 | "speakers": 501000, 1369 | "name": "Southern Dagaare", 1370 | "iso6393": "dga", 1371 | "udhr": "dga", 1372 | "script": "Latin" 1373 | }, 1374 | { 1375 | "speakers": 500000, 1376 | "name": "Breton", 1377 | "iso6393": "bre", 1378 | "udhr": "bre", 1379 | "script": "Latin" 1380 | }, 1381 | { 1382 | "speakers": 500000, 1383 | "name": "Kekchí", 1384 | "iso6393": "kek", 1385 | "udhr": "kek", 1386 | "script": "Latin" 1387 | }, 1388 | { 1389 | "speakers": 500000, 1390 | "name": "Picard", 1391 | "iso6393": "pcd", 1392 | "udhr": "pcd", 1393 | "script": "Latin" 1394 | }, 1395 | { 1396 | "speakers": 500000, 1397 | "name": "Romansh", 1398 | "iso6393": "roh", 1399 | "udhr": "roh", 1400 | "script": "Latin" 1401 | }, 1402 | { 1403 | "speakers": 480000, 1404 | "name": "Bari", 1405 | "iso6393": "bfa", 1406 | "udhr": "bfa", 1407 | "script": "Latin" 1408 | }, 1409 | { 1410 | "speakers": 480000, 1411 | "name": "Krio", 1412 | "iso6393": "kri", 1413 | "udhr": "kri", 1414 | "script": "Latin" 1415 | }, 1416 | { 1417 | "speakers": 446264, 1418 | "name": "Haka Chin", 1419 | "iso6393": "cnh", 1420 | "udhr": "cnh", 1421 | "script": "Latin" 1422 | }, 1423 | { 1424 | "speakers": 440000, 1425 | "name": "Mapudungun", 1426 | "iso6393": "arn", 1427 | "udhr": "arn", 1428 | "script": "Latin" 1429 | }, 1430 | { 1431 | "speakers": 400000, 1432 | "name": "Baatonum", 1433 | "iso6393": "bba", 1434 | "udhr": "bba", 1435 | "script": "Latin" 1436 | }, 1437 | { 1438 | "speakers": 393943, 1439 | "name": "Kabuverdianu", 1440 | "iso6393": "kea", 1441 | "udhr": "kea", 1442 | "script": "Latin" 1443 | }, 1444 | { 1445 | "speakers": 363000, 1446 | "name": "Yakut", 1447 | "iso6393": "sah", 1448 | "udhr": "sah", 1449 | "script": "Cyrillic" 1450 | }, 1451 | { 1452 | "speakers": 362000, 1453 | "name": "Samoan", 1454 | "iso6393": "smo", 1455 | "udhr": "smo", 1456 | "script": "Latin" 1457 | }, 1458 | { 1459 | "speakers": 361709, 1460 | "name": "Konzo", 1461 | "iso6393": "koo", 1462 | "udhr": "koo", 1463 | "script": "Latin" 1464 | }, 1465 | { 1466 | "speakers": 352500, 1467 | "name": "Nzima", 1468 | "iso6393": "nzi", 1469 | "udhr": "nzi", 1470 | "script": "Latin" 1471 | }, 1472 | { 1473 | "speakers": 350000, 1474 | "name": "Central Mazahua", 1475 | "iso6393": "maz", 1476 | "udhr": "maz", 1477 | "script": "Latin" 1478 | }, 1479 | { 1480 | "speakers": 350000, 1481 | "name": "Pijin", 1482 | "iso6393": "pis", 1483 | "udhr": "pis", 1484 | "script": "Latin" 1485 | }, 1486 | { 1487 | "speakers": 344100, 1488 | "name": "Tedim Chin", 1489 | "iso6393": "ctd", 1490 | "udhr": "ctd", 1491 | "script": "Latin" 1492 | }, 1493 | { 1494 | "speakers": 341000, 1495 | "name": "Corsican", 1496 | "iso6393": "cos", 1497 | "udhr": "cos", 1498 | "script": "Latin" 1499 | }, 1500 | { 1501 | "speakers": 335518, 1502 | "name": "Luxembourgish", 1503 | "iso6393": "ltz", 1504 | "udhr": "ltz", 1505 | "script": "Latin" 1506 | }, 1507 | { 1508 | "speakers": 335000, 1509 | "name": "West-Central Limba", 1510 | "iso6393": "lia", 1511 | "udhr": "lia", 1512 | "script": "Latin" 1513 | }, 1514 | { 1515 | "speakers": 330000, 1516 | "name": "Maltese", 1517 | "iso6393": "mlt", 1518 | "udhr": "mlt", 1519 | "script": "Latin" 1520 | }, 1521 | { 1522 | "speakers": 327000, 1523 | "name": "Mina (Cameroon)", 1524 | "iso6393": "hna", 1525 | "udhr": "hna", 1526 | "script": "Latin" 1527 | }, 1528 | { 1529 | "speakers": 305000, 1530 | "name": "Wayuu", 1531 | "iso6393": "guc", 1532 | "udhr": "guc", 1533 | "script": "Latin" 1534 | }, 1535 | { 1536 | "speakers": 300000, 1537 | "name": "K'iche'", 1538 | "iso6393": "quc", 1539 | "udhr": "quc", 1540 | "script": "Latin" 1541 | }, 1542 | { 1543 | "speakers": 300000, 1544 | "name": "Huaylas Ancash Quechua", 1545 | "iso6393": "qwh", 1546 | "udhr": "qwh", 1547 | "script": "Latin" 1548 | }, 1549 | { 1550 | "speakers": 287000, 1551 | "name": "Dhivehi", 1552 | "iso6393": "div", 1553 | "udhr": "div", 1554 | "script": "Thaana" 1555 | }, 1556 | { 1557 | "speakers": 282845, 1558 | "name": "Icelandic", 1559 | "iso6393": "isl", 1560 | "udhr": "isl", 1561 | "script": "Latin" 1562 | }, 1563 | { 1564 | "speakers": 276000, 1565 | "name": "Kaonde", 1566 | "iso6393": "kqn", 1567 | "udhr": "kqn", 1568 | "script": "Latin" 1569 | }, 1570 | { 1571 | "speakers": 260000, 1572 | "name": "Jola-Fonyi", 1573 | "iso6393": "dyo", 1574 | "udhr": "dyo", 1575 | "script": "Latin" 1576 | }, 1577 | { 1578 | "speakers": 260000, 1579 | "name": "Irish", 1580 | "iso6393": "gle", 1581 | "udhr": "gle", 1582 | "script": "Latin" 1583 | }, 1584 | { 1585 | "speakers": 250000, 1586 | "name": "Gonja", 1587 | "iso6393": "gjn", 1588 | "udhr": "gjn", 1589 | "script": "Latin" 1590 | }, 1591 | { 1592 | "speakers": 232000, 1593 | "name": "Ao Naga", 1594 | "iso6393": "njo", 1595 | "udhr": "njo", 1596 | "script": "Latin" 1597 | }, 1598 | { 1599 | "speakers": 200000, 1600 | "name": "Mezquital Otomi", 1601 | "iso6393": "ote", 1602 | "udhr": "ote", 1603 | "script": "Latin" 1604 | }, 1605 | { 1606 | "speakers": 200000, 1607 | "name": "Northern Conchucos Ancash Quechua", 1608 | "iso6393": "qxn", 1609 | "udhr": "qxn", 1610 | "script": "Latin" 1611 | }, 1612 | { 1613 | "speakers": 200000, 1614 | "name": "Tuvinian", 1615 | "iso6393": "tyv", 1616 | "udhr": "tyv", 1617 | "script": "Cyrillic" 1618 | }, 1619 | { 1620 | "speakers": 200000, 1621 | "name": "Kasem", 1622 | "iso6393": "xsm", 1623 | "udhr": "xsm", 1624 | "script": "Latin" 1625 | }, 1626 | { 1627 | "speakers": 198000, 1628 | "name": "Gagauz", 1629 | "iso6393": "gag", 1630 | "udhr": "gag", 1631 | "script": "Latin" 1632 | }, 1633 | { 1634 | "speakers": 194433, 1635 | "name": "Sanskrit", 1636 | "iso6393": "san", 1637 | "udhr": "san", 1638 | "script": "Devanagari" 1639 | }, 1640 | { 1641 | "speakers": 175000, 1642 | "name": "Shilluk", 1643 | "iso6393": "shk", 1644 | "udhr": "shk", 1645 | "script": "Latin" 1646 | }, 1647 | { 1648 | "speakers": 172000, 1649 | "name": "Nyemba", 1650 | "iso6393": "nba", 1651 | "udhr": "nba", 1652 | "script": "Latin" 1653 | }, 1654 | { 1655 | "speakers": 160000, 1656 | "name": "Mískito", 1657 | "iso6393": "miq", 1658 | "udhr": "miq", 1659 | "script": "Latin" 1660 | }, 1661 | { 1662 | "speakers": 157000, 1663 | "name": "Mam", 1664 | "iso6393": "mam", 1665 | "udhr": "mam", 1666 | "script": "Latin" 1667 | }, 1668 | { 1669 | "speakers": 150000, 1670 | "name": "Huastec", 1671 | "iso6393": "hus", 1672 | "udhr": "hus", 1673 | "script": "Latin" 1674 | }, 1675 | { 1676 | "speakers": 150000, 1677 | "name": "Tahitian", 1678 | "iso6393": "tah", 1679 | "udhr": "tah", 1680 | "script": "Latin" 1681 | }, 1682 | { 1683 | "speakers": 148530, 1684 | "name": "Navajo", 1685 | "iso6393": "nav", 1686 | "udhr": "nav", 1687 | "script": "Latin" 1688 | }, 1689 | { 1690 | "speakers": 135000, 1691 | "name": "Otuho", 1692 | "iso6393": "lot", 1693 | "udhr": "lot", 1694 | "script": "Latin" 1695 | }, 1696 | { 1697 | "speakers": 132200, 1698 | "name": "Kaqchikel", 1699 | "iso6393": "cak", 1700 | "udhr": "cak", 1701 | "script": "Latin" 1702 | }, 1703 | { 1704 | "speakers": 125000, 1705 | "name": "Lamnso'", 1706 | "iso6393": "lns", 1707 | "udhr": "lns", 1708 | "script": "Latin" 1709 | }, 1710 | { 1711 | "speakers": 123000, 1712 | "name": "Tonga (Tonga Islands)", 1713 | "iso6393": "ton", 1714 | "udhr": "ton", 1715 | "script": "Latin" 1716 | }, 1717 | { 1718 | "speakers": 120000, 1719 | "name": "Ladino", 1720 | "iso6393": "lad", 1721 | "udhr": "lad", 1722 | "script": "Latin" 1723 | }, 1724 | { 1725 | "speakers": 120000, 1726 | "name": "Ditammari", 1727 | "iso6393": "tbz", 1728 | "udhr": "tbz", 1729 | "script": "Latin" 1730 | }, 1731 | { 1732 | "speakers": 119500, 1733 | "name": "Vai", 1734 | "iso6393": "vai", 1735 | "udhr": "vai", 1736 | "script": "Vai" 1737 | }, 1738 | { 1739 | "speakers": 105000, 1740 | "name": "Abkhazian", 1741 | "iso6393": "abk", 1742 | "udhr": "abk", 1743 | "script": "Cyrillic" 1744 | }, 1745 | { 1746 | "speakers": 100000, 1747 | "name": "Asturian", 1748 | "iso6393": "ast", 1749 | "udhr": "ast", 1750 | "script": "Latin" 1751 | }, 1752 | { 1753 | "speakers": 100000, 1754 | "name": "Purepecha", 1755 | "iso6393": "tsz", 1756 | "udhr": "tsz", 1757 | "script": "Latin" 1758 | }, 1759 | { 1760 | "speakers": 94500, 1761 | "name": "Garifuna", 1762 | "iso6393": "cab", 1763 | "udhr": "cab", 1764 | "script": "Latin" 1765 | }, 1766 | { 1767 | "speakers": 80000, 1768 | "name": "Karelian", 1769 | "iso6393": "krl", 1770 | "udhr": "krl", 1771 | "script": "Latin" 1772 | }, 1773 | { 1774 | "speakers": 80000, 1775 | "name": "Papantla Totonac", 1776 | "iso6393": "top", 1777 | "udhr": "top", 1778 | "script": "Latin" 1779 | }, 1780 | { 1781 | "speakers": 80000, 1782 | "name": "Miahuatlán Zapotec", 1783 | "iso6393": "zam", 1784 | "udhr": "zam", 1785 | "script": "Latin" 1786 | }, 1787 | { 1788 | "speakers": 78000, 1789 | "name": "Chamorro", 1790 | "iso6393": "cha", 1791 | "udhr": "cha", 1792 | "script": "Latin" 1793 | }, 1794 | { 1795 | "speakers": 72700, 1796 | "name": "Seselwa Creole French", 1797 | "iso6393": "crs", 1798 | "udhr": "crs", 1799 | "script": "Latin" 1800 | }, 1801 | { 1802 | "speakers": 72000, 1803 | "name": "Dendi (Benin)", 1804 | "iso6393": "ddn", 1805 | "udhr": "ddn", 1806 | "script": "Latin" 1807 | }, 1808 | { 1809 | "speakers": 71841, 1810 | "name": "Lozi", 1811 | "iso6393": "loz", 1812 | "udhr": "loz", 1813 | "script": "Latin" 1814 | }, 1815 | { 1816 | "speakers": 70000, 1817 | "name": "Upper Sorbian", 1818 | "iso6393": "hsb", 1819 | "udhr": "hsb", 1820 | "script": "Latin" 1821 | }, 1822 | { 1823 | "speakers": 70000, 1824 | "name": "Maori", 1825 | "iso6393": "mri", 1826 | "udhr": "mri", 1827 | "script": "Latin" 1828 | }, 1829 | { 1830 | "speakers": 68487, 1831 | "name": "Páez", 1832 | "iso6393": "pbb", 1833 | "udhr": "pbb", 1834 | "script": "Latin" 1835 | }, 1836 | { 1837 | "speakers": 68000, 1838 | "name": "Southern Altai", 1839 | "iso6393": "alt", 1840 | "udhr": "alt", 1841 | "script": "Cyrillic" 1842 | }, 1843 | { 1844 | "speakers": 65000, 1845 | "name": "Metlatónoc Mixtec", 1846 | "iso6393": "mxv", 1847 | "udhr": "mxv", 1848 | "script": "Latin" 1849 | }, 1850 | { 1851 | "speakers": 65000, 1852 | "name": "Ambo-Pasco Quechua", 1853 | "iso6393": "qva", 1854 | "udhr": "qva", 1855 | "script": "Latin" 1856 | }, 1857 | { 1858 | "speakers": 63653, 1859 | "name": "Scottish Gaelic", 1860 | "iso6393": "gla", 1861 | "udhr": "gla", 1862 | "script": "Latin" 1863 | }, 1864 | { 1865 | "speakers": 60000, 1866 | "name": "Swampy Cree", 1867 | "iso6393": "csw", 1868 | "udhr": "csw", 1869 | "script": "Canadian_Aboriginal" 1870 | }, 1871 | { 1872 | "speakers": 60000, 1873 | "name": "Khakas", 1874 | "iso6393": "kjh", 1875 | "udhr": "kjh", 1876 | "script": "Cyrillic" 1877 | }, 1878 | { 1879 | "speakers": 55000, 1880 | "name": "Margos-Yarowilca-Lauricocha Quechua", 1881 | "iso6393": "qvm", 1882 | "udhr": "qvm", 1883 | "script": "Latin" 1884 | }, 1885 | { 1886 | "speakers": 47000, 1887 | "name": "Faroese", 1888 | "iso6393": "fao", 1889 | "udhr": "fao", 1890 | "script": "Latin" 1891 | }, 1892 | { 1893 | "speakers": 47000, 1894 | "name": "Kalaallisut", 1895 | "iso6393": "kal", 1896 | "udhr": "kal", 1897 | "script": "Latin" 1898 | }, 1899 | { 1900 | "speakers": 45000, 1901 | "name": "Chuukese", 1902 | "iso6393": "chk", 1903 | "udhr": "chk", 1904 | "script": "Latin" 1905 | }, 1906 | { 1907 | "speakers": 45000, 1908 | "name": "Asháninka", 1909 | "iso6393": "cni", 1910 | "udhr": "cni", 1911 | "script": "Latin" 1912 | }, 1913 | { 1914 | "speakers": 43900, 1915 | "name": "Marshallese", 1916 | "iso6393": "mah", 1917 | "udhr": "mah", 1918 | "script": "Latin" 1919 | }, 1920 | { 1921 | "speakers": 43000, 1922 | "name": "Rarotongan", 1923 | "iso6393": "rar", 1924 | "udhr": "rar", 1925 | "script": "Latin" 1926 | }, 1927 | { 1928 | "speakers": 40000, 1929 | "name": "Evenki", 1930 | "iso6393": "evn", 1931 | "udhr": "evn", 1932 | "script": "Cyrillic" 1933 | }, 1934 | { 1935 | "speakers": 40000, 1936 | "name": "North Junín Quechua", 1937 | "iso6393": "qvn", 1938 | "udhr": "qvn", 1939 | "script": "Latin" 1940 | }, 1941 | { 1942 | "speakers": 40000, 1943 | "name": "Waama", 1944 | "iso6393": "wwa", 1945 | "udhr": "wwa", 1946 | "script": "Latin" 1947 | }, 1948 | { 1949 | "speakers": 38000, 1950 | "name": "Huamalíes-Dos de Mayo Huánuco Quechua", 1951 | "iso6393": "qvh", 1952 | "udhr": "qvh", 1953 | "script": "Latin" 1954 | }, 1955 | { 1956 | "speakers": 36000, 1957 | "name": "Tojolabal", 1958 | "iso6393": "toj", 1959 | "udhr": "toj", 1960 | "script": "Latin" 1961 | }, 1962 | { 1963 | "speakers": 35800, 1964 | "name": "Luvale", 1965 | "iso6393": "lue", 1966 | "udhr": "lue", 1967 | "script": "Latin" 1968 | }, 1969 | { 1970 | "speakers": 35000, 1971 | "name": "Shuar", 1972 | "iso6393": "jiv", 1973 | "udhr": "jiv", 1974 | "script": "Latin" 1975 | }, 1976 | { 1977 | "speakers": 35000, 1978 | "name": "Northwestern Ojibwa", 1979 | "iso6393": "ojb", 1980 | "udhr": "ojb", 1981 | "script": "Canadian_Aboriginal" 1982 | }, 1983 | { 1984 | "speakers": 35000, 1985 | "name": "Cajamarca Quechua", 1986 | "iso6393": "qvc", 1987 | "udhr": "qvc", 1988 | "script": "Latin" 1989 | }, 1990 | { 1991 | "speakers": 30000, 1992 | "name": "Matu Chin", 1993 | "iso6393": "hlt", 1994 | "udhr": "hlt", 1995 | "script": "Latin" 1996 | }, 1997 | { 1998 | "speakers": 30000, 1999 | "name": "Calderón Highland Quichua", 2000 | "iso6393": "qud", 2001 | "udhr": "qud", 2002 | "script": "Latin" 2003 | }, 2004 | { 2005 | "speakers": 27700, 2006 | "name": "Pohnpeian", 2007 | "iso6393": "pon", 2008 | "udhr": "pon", 2009 | "script": "Latin" 2010 | }, 2011 | { 2012 | "speakers": 27500, 2013 | "name": "Aguaruna", 2014 | "iso6393": "agr", 2015 | "udhr": "agr", 2016 | "script": "Latin" 2017 | }, 2018 | { 2019 | "speakers": 25000, 2020 | "name": "Chiquián Ancash Quechua", 2021 | "iso6393": "qxa", 2022 | "udhr": "qxa", 2023 | "script": "Latin" 2024 | }, 2025 | { 2026 | "speakers": 25000, 2027 | "name": "Ticuna", 2028 | "iso6393": "tca", 2029 | "udhr": "tca", 2030 | "script": "Latin" 2031 | }, 2032 | { 2033 | "speakers": 22000, 2034 | "name": "Ojitlán Chinantec", 2035 | "iso6393": "chj", 2036 | "udhr": "chj", 2037 | "script": "Latin" 2038 | }, 2039 | { 2040 | "speakers": 21500, 2041 | "name": "Eastern Canadian Inuktitut", 2042 | "iso6393": "ike", 2043 | "udhr": "ike", 2044 | "script": "Canadian_Aboriginal" 2045 | }, 2046 | { 2047 | "speakers": 21000, 2048 | "name": "Awa-Cuaiquer", 2049 | "iso6393": "kwi", 2050 | "udhr": "kwi", 2051 | "script": "Latin" 2052 | }, 2053 | { 2054 | "speakers": 20112, 2055 | "name": "Romagnol", 2056 | "iso6393": "rgn", 2057 | "udhr": "eml", 2058 | "script": "Latin" 2059 | }, 2060 | { 2061 | "speakers": 20000, 2062 | "name": "Toba", 2063 | "iso6393": "tob", 2064 | "udhr": "tob", 2065 | "script": "Latin" 2066 | }, 2067 | { 2068 | "speakers": 17640, 2069 | "name": "Yanomamö", 2070 | "iso6393": "guu", 2071 | "udhr": "guu", 2072 | "script": "Latin" 2073 | }, 2074 | { 2075 | "speakers": 16000, 2076 | "name": "Arequipa-La Unión Quechua", 2077 | "iso6393": "qxu", 2078 | "udhr": "qxu", 2079 | "script": "Latin" 2080 | }, 2081 | { 2082 | "speakers": 15000, 2083 | "name": "Palauan", 2084 | "iso6393": "pau", 2085 | "udhr": "pau", 2086 | "script": "Latin" 2087 | }, 2088 | { 2089 | "speakers": 15000, 2090 | "name": "Shipibo-Conibo", 2091 | "iso6393": "shp", 2092 | "udhr": "shp", 2093 | "script": "Latin" 2094 | }, 2095 | { 2096 | "speakers": 12000, 2097 | "name": "Paraguayan Guaraní", 2098 | "iso6393": "gug", 2099 | "udhr": "gug", 2100 | "script": "Latin" 2101 | }, 2102 | { 2103 | "speakers": 11000, 2104 | "name": "Ixcatlán Mazatec", 2105 | "iso6393": "mzi", 2106 | "udhr": "mzi", 2107 | "script": "Latin" 2108 | }, 2109 | { 2110 | "speakers": 10000, 2111 | "name": "Shor", 2112 | "iso6393": "cjs", 2113 | "udhr": "cjs", 2114 | "script": "Cyrillic" 2115 | }, 2116 | { 2117 | "speakers": 8100, 2118 | "name": "Mi'kmaq", 2119 | "iso6393": "mic", 2120 | "udhr": "mic", 2121 | "script": "Latin" 2122 | }, 2123 | { 2124 | "speakers": 8000, 2125 | "name": "Hawaiian", 2126 | "iso6393": "haw", 2127 | "udhr": "haw", 2128 | "script": "Latin" 2129 | }, 2130 | { 2131 | "speakers": 7170, 2132 | "name": "Even", 2133 | "iso6393": "eve", 2134 | "udhr": "eve", 2135 | "script": "Cyrillic" 2136 | }, 2137 | { 2138 | "speakers": 6592, 2139 | "name": "Yapese", 2140 | "iso6393": "yap", 2141 | "udhr": "yap", 2142 | "script": "Latin" 2143 | }, 2144 | { 2145 | "speakers": 6000, 2146 | "name": "Yanesha'", 2147 | "iso6393": "ame", 2148 | "udhr": "ame", 2149 | "script": "Latin" 2150 | }, 2151 | { 2152 | "speakers": 6000, 2153 | "name": "Chayahuita", 2154 | "iso6393": "cbt", 2155 | "udhr": "cbt", 2156 | "script": "Latin" 2157 | }, 2158 | { 2159 | "speakers": 5933, 2160 | "name": "Guarayu", 2161 | "iso6393": "gyr", 2162 | "udhr": "gyr", 2163 | "script": "Latin" 2164 | }, 2165 | { 2166 | "speakers": 5800, 2167 | "name": "Veps", 2168 | "iso6393": "vep", 2169 | "udhr": "vep", 2170 | "script": "Latin" 2171 | }, 2172 | { 2173 | "speakers": 5000, 2174 | "name": "Pichis Ashéninka", 2175 | "iso6393": "cpu", 2176 | "udhr": "cpu", 2177 | "script": "Latin" 2178 | }, 2179 | { 2180 | "speakers": 4500, 2181 | "name": "Achuar-Shiwiar", 2182 | "iso6393": "acu", 2183 | "udhr": "acu", 2184 | "script": "Latin" 2185 | }, 2186 | { 2187 | "speakers": 4000, 2188 | "name": "Nomatsiguenga", 2189 | "iso6393": "not", 2190 | "udhr": "not", 2191 | "script": "Latin" 2192 | }, 2193 | { 2194 | "speakers": 4000, 2195 | "name": "Northern Sami", 2196 | "iso6393": "sme", 2197 | "udhr": "sme", 2198 | "script": "Latin" 2199 | }, 2200 | { 2201 | "speakers": 4000, 2202 | "name": "Yagua", 2203 | "iso6393": "yad", 2204 | "udhr": "yad", 2205 | "script": "Latin" 2206 | }, 2207 | { 2208 | "speakers": 3500, 2209 | "name": "Urarina", 2210 | "iso6393": "ura", 2211 | "udhr": "ura", 2212 | "script": "Latin" 2213 | }, 2214 | { 2215 | "speakers": 3000, 2216 | "name": "Candoshi-Shapra", 2217 | "iso6393": "cbu", 2218 | "udhr": "cbu", 2219 | "script": "Latin" 2220 | }, 2221 | { 2222 | "speakers": 2900, 2223 | "name": "Murui Huitoto", 2224 | "iso6393": "huu", 2225 | "udhr": "huu", 2226 | "script": "Latin" 2227 | }, 2228 | { 2229 | "speakers": 2300, 2230 | "name": "Colorado", 2231 | "iso6393": "cof", 2232 | "udhr": "cof", 2233 | "script": "Latin" 2234 | }, 2235 | { 2236 | "speakers": 2000, 2237 | "name": "Bora", 2238 | "iso6393": "boa", 2239 | "udhr": "boa", 2240 | "script": "Latin" 2241 | }, 2242 | { 2243 | "speakers": 2000, 2244 | "name": "Güilá Zapotec", 2245 | "iso6393": "ztu", 2246 | "udhr": "ztu", 2247 | "script": "Latin" 2248 | }, 2249 | { 2250 | "speakers": 1500, 2251 | "name": "Cashibo-Cacataibo", 2252 | "iso6393": "cbr", 2253 | "udhr": "cbr", 2254 | "script": "Latin" 2255 | }, 2256 | { 2257 | "speakers": 1280, 2258 | "name": "Matsés", 2259 | "iso6393": "mcf", 2260 | "udhr": "mcf", 2261 | "script": "Latin" 2262 | }, 2263 | { 2264 | "speakers": 1200, 2265 | "name": "Bislama", 2266 | "iso6393": "bis", 2267 | "udhr": "bis", 2268 | "script": "Latin" 2269 | }, 2270 | { 2271 | "speakers": 1100, 2272 | "name": "Northern Yukaghir", 2273 | "iso6393": "ykg", 2274 | "udhr": "ykg", 2275 | "script": "Cyrillic" 2276 | }, 2277 | { 2278 | "speakers": 1000, 2279 | "name": "Chiltepec Chinantec", 2280 | "iso6393": "csa", 2281 | "udhr": "csa", 2282 | "script": "Latin" 2283 | }, 2284 | { 2285 | "speakers": 1000, 2286 | "name": "Chickasaw", 2287 | "iso6393": "cic", 2288 | "udhr": "cic", 2289 | "script": "Latin" 2290 | }, 2291 | { 2292 | "speakers": 950, 2293 | "name": "Sharanahua", 2294 | "iso6393": "mcd", 2295 | "udhr": "mcd", 2296 | "script": "Latin" 2297 | }, 2298 | { 2299 | "speakers": 720, 2300 | "name": "Amahuaca", 2301 | "iso6393": "amc", 2302 | "udhr": "amc", 2303 | "script": "Latin" 2304 | }, 2305 | { 2306 | "speakers": 500, 2307 | "name": "Amarakaeri", 2308 | "iso6393": "amr", 2309 | "udhr": "amr", 2310 | "script": "Latin" 2311 | }, 2312 | { 2313 | "speakers": 300, 2314 | "name": "Caquinte", 2315 | "iso6393": "cot", 2316 | "udhr": "cot", 2317 | "script": "Latin" 2318 | }, 2319 | { 2320 | "speakers": 200, 2321 | "name": "Aja (Benin)", 2322 | "iso6393": "ajg", 2323 | "udhr": "ajg", 2324 | "script": "Latin" 2325 | }, 2326 | { 2327 | "speakers": 150, 2328 | "name": "Arabela", 2329 | "iso6393": "arl", 2330 | "udhr": "arl", 2331 | "script": "Latin" 2332 | }, 2333 | { 2334 | "speakers": 20, 2335 | "name": "Pipil", 2336 | "iso6393": "ppl", 2337 | "udhr": "ppl", 2338 | "script": "Latin" 2339 | }, 2340 | { 2341 | "speakers": 0, 2342 | "name": "Mozarabic", 2343 | "iso6393": "mxi", 2344 | "udhr": "mxi", 2345 | "script": "Latin" 2346 | } 2347 | ] --------------------------------------------------------------------------------