├── .Rbuildignore
├── .github
├── ISSUE_TEMPLATE
│ ├── config.yml
│ └── issue_template.md
└── workflows
│ ├── issue.yml
│ ├── stale-actions.yml
│ └── tic.yml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── RcppExports.R
├── fasttext_2R_interface.R
└── zzz.R
├── README.md
├── codecov.yml
├── inst
├── COPYRIGHTS
├── LICENSE
└── PATENTS
├── src
├── Makevars
├── Makevars.win
├── RcppExports.cpp
├── args.cc
├── args.h
├── dictionary.cc
├── dictionary.h
├── fasttext.cc
├── fasttext.h
├── init.c
├── main.cc
├── matrix.cc
├── matrix.h
├── model.cc
├── model.h
├── real.h
├── utils.cc
├── utils.h
├── vector.cc
└── vector.h
├── tests
├── testthat.R
└── testthat
│ ├── test-wrapper_fasttext.R
│ └── test_data
│ ├── VECS.vec
│ ├── doc.txt
│ ├── model.bin
│ ├── model.vec
│ └── queries.txt
└── tic.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^\.ccache$
2 | ^\.github$
3 | ^tic\.R$
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # For more info see: https://docs.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
2 |
3 | blank_issues_enabled: true
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report or feature request
3 | about: Describe a bug you've encountered or make a case for a new feature
4 | ---
5 |
6 | Please briefly describe your problem and what output you expect. If you have a question, you also have the option of (but I'm flexible if it's not too complicated)
7 |
8 | Please include a minimal reproducible example
9 |
10 | Please give a brief description of the problem
11 |
12 | Please add your Operating System (e.g., Windows10, Macintosh, Linux) and the R version that you use (e.g., 3.6.2)
13 |
14 | If my package uses Python (via 'reticulate') then please add also the Python version (e.g., Python 3.8) and the 'reticulate' version (e.g., 1.18.0)
15 |
--------------------------------------------------------------------------------
/.github/workflows/issue.yml:
--------------------------------------------------------------------------------
1 | # For more info see: https://github.com/Renato66/auto-label
2 | # for the 'secrets.GITHUB_TOKEN' see: https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret
3 |
4 | name: Labeling new issue
5 | on:
6 | issues:
7 | types: ['opened']
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: Renato66/auto-label@v2
13 | with:
14 | repo-token: ${{ secrets.GITHUB_TOKEN }}
15 | ignore-comments: true
16 | labels-synonyms: '{"bug":["error","need fix","not working"],"enhancement":["upgrade"],"question":["help"]}'
17 | labels-not-allowed: '["good first issue"]'
18 | default-labels: '["help wanted"]'
19 |
--------------------------------------------------------------------------------
/.github/workflows/stale-actions.yml:
--------------------------------------------------------------------------------
1 | # for the 'secrets.GITHUB_TOKEN' see: https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret
2 |
3 | name: "Mark or close stale issues and PRs"
4 |
5 | on:
6 | schedule:
7 | - cron: "00 * * * *"
8 |
9 | jobs:
10 | stale:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/stale@v3
14 | with:
15 | repo-token: ${{ secrets.GITHUB_TOKEN }}
16 | days-before-stale: 12
17 | days-before-close: 7
18 | stale-issue-message: "This is Robo-lampros because the Human-lampros is lazy. This issue has been automatically marked as stale because it has not had recent activity. It will be closed after 7 days if no further activity occurs. Feel free to re-open a closed issue and the Human-lampros will respond."
19 | stale-pr-message: "This is Robo-lampros because the Human-lampros is lazy. This PR has been automatically marked as stale because it has not had recent activity. It will be closed after 7 days if no further activity occurs."
20 | close-issue-message: "This issue was automatically closed because of being stale. Feel free to re-open a closed issue and the Human-lampros will respond."
21 | close-pr-message: "This PR was automatically closed because of being stale."
22 | stale-pr-label: "stale"
23 | stale-issue-label: "stale"
24 | exempt-issue-labels: "bug,enhancement,pinned,security,pending,work_in_progress"
25 | exempt-pr-labels: "bug,enhancement,pinned,security,pending,work_in_progress"
26 |
--------------------------------------------------------------------------------
/.github/workflows/tic.yml:
--------------------------------------------------------------------------------
1 | ## tic GitHub Actions template: linux-macos-windows-deploy
2 | ## revision date: 2020-12-11
3 | on:
4 | workflow_dispatch:
5 | push:
6 | pull_request:
7 | # for now, CRON jobs only run on the default branch of the repo (i.e. usually on master)
8 | schedule:
9 | # * is a special character in YAML so you have to quote this string
10 | - cron: "0 4 * * *"
11 |
12 | name: tic
13 |
14 | jobs:
15 | all:
16 | runs-on: ${{ matrix.config.os }}
17 |
18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | config:
24 | # use a different tic template type if you do not want to build on all listed platforms
25 | - { os: windows-latest, r: "release" }
26 | - { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" }
27 | - { os: ubuntu-latest, r: "devel" }
28 | - { os: ubuntu-latest, r: "release" }
29 |
30 | env:
31 | # otherwise remotes::fun() errors cause the build to fail. Example: Unavailability of binaries
32 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
33 | CRAN: ${{ matrix.config.cran }}
34 | # make sure to run `tic::use_ghactions_deploy()` to set up deployment
35 | TIC_DEPLOY_KEY: ${{ secrets.TIC_DEPLOY_KEY }}
36 | # prevent rgl issues because no X11 display is available
37 | RGL_USE_NULL: true
38 | # if you use bookdown or blogdown, replace "PKGDOWN" by the respective
39 | # capitalized term. This also might need to be done in tic.R
40 | BUILD_PKGDOWN: ${{ matrix.config.pkgdown }}
41 | # macOS >= 10.15.4 linking
42 | SDKROOT: /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk
43 | # use GITHUB_TOKEN from GitHub to workaround rate limits in {remotes}
44 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
45 |
46 | steps:
47 | - uses: actions/checkout@v2.3.4
48 |
49 | - uses: r-lib/actions/setup-r@master
50 | with:
51 | r-version: ${{ matrix.config.r }}
52 | Ncpus: 4
53 |
54 | # LaTeX. Installation time:
55 | # Linux: ~ 1 min
56 | # macOS: ~ 1 min 30s
57 | # Windows: never finishes
58 | - uses: r-lib/actions/setup-tinytex@master
59 | if: matrix.config.latex == 'true'
60 |
61 | - uses: r-lib/actions/setup-pandoc@master
62 |
63 | # set date/week for use in cache creation
64 | # https://github.community/t5/GitHub-Actions/How-to-set-and-access-a-Workflow-variable/m-p/42970
65 | # - cache R packages daily
66 | - name: "[Cache] Prepare daily timestamp for cache"
67 | if: runner.os != 'Windows'
68 | id: date
69 | run: echo "::set-output name=date::$(date '+%d-%m')"
70 |
71 | - name: "[Cache] Cache R packages"
72 | if: runner.os != 'Windows'
73 | uses: pat-s/always-upload-cache@v2.1.3
74 | with:
75 | path: ${{ env.R_LIBS_USER }}
76 | key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
77 | restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}
78 |
79 | # for some strange Windows reason this step and the next one need to be decoupled
80 | - name: "[Stage] Prepare"
81 | run: |
82 | Rscript -e "if (!requireNamespace('remotes')) install.packages('remotes', type = 'source')"
83 | Rscript -e "if (getRversion() < '3.2' && !requireNamespace('curl')) install.packages('curl', type = 'source')"
84 |
85 | - name: "[Stage] [Linux] Install curl and libgit2"
86 | if: runner.os == 'Linux'
87 | run: sudo apt install libcurl4-openssl-dev libgit2-dev
88 |
89 | - name: "[Stage] [macOS] Install libgit2"
90 | if: runner.os == 'macOS'
91 | run: brew install libgit2
92 |
93 | - name: "[Stage] [macOS] Install system libs for pkgdown"
94 | if: runner.os == 'macOS' && matrix.config.pkgdown != ''
95 | run: brew install harfbuzz fribidi
96 |
97 | - name: "[Stage] [Linux] Install system libs for pkgdown"
98 | if: runner.os == 'Linux' && matrix.config.pkgdown != ''
99 | run: sudo apt install libharfbuzz-dev libfribidi-dev
100 |
101 | - name: "[Stage] Install"
102 | if: matrix.config.os != 'macOS-latest' || matrix.config.r != 'devel'
103 | run: Rscript -e "remotes::install_github('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()"
104 |
105 | # macOS devel needs its own stage because we need to work with an option to suppress the usage of binaries
106 | - name: "[Stage] Prepare & Install (macOS-devel)"
107 | if: matrix.config.os == 'macOS-latest' && matrix.config.r == 'devel'
108 | run: |
109 | echo -e 'options(Ncpus = 4, pkgType = "source", repos = structure(c(CRAN = "https://cloud.r-project.org/")))' > $HOME/.Rprofile
110 | Rscript -e "remotes::install_github('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()"
111 |
112 | - name: "[Stage] Script"
113 | run: Rscript -e 'tic::script()'
114 |
115 | - name: "[Stage] After Success"
116 | if: matrix.config.os == 'macOS-latest' && matrix.config.r == 'release'
117 | run: Rscript -e "tic::after_success()"
118 |
119 | - name: "[Stage] Upload R CMD check artifacts"
120 | if: failure()
121 | uses: actions/upload-artifact@v2.2.1
122 | with:
123 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results
124 | path: check
125 | - name: "[Stage] Before Deploy"
126 | run: |
127 | Rscript -e "tic::before_deploy()"
128 |
129 | - name: "[Stage] Deploy"
130 | run: Rscript -e "tic::deploy()"
131 |
132 | - name: "[Stage] After Deploy"
133 | run: Rscript -e "tic::after_deploy()"
134 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | docs/
9 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: fastTextR
2 | Type: Package
3 | Title: Efficient learning of word representations
4 | Version: 1.0.2
5 | Date: 2019-04-11
6 | Author: Lampros Mouselimis
7 | Maintainer: Lampros Mouselimis
8 | BugReports: https://github.com/mlampros/fastTextR/issues
9 | URL: https://github.com/mlampros/fastTextR
10 | Description: The fastTextR package is a wrapper (only) for the skipgram and cbow functions of the fastText (https://github.com/facebookresearch/fastText) library. fastText is a library for efficient learning of word representations and sentence classification. Since it uses C++11 features, it requires a compiler with good C++11 support. These include : (gcc-4.6.3 or newer) or (clang-3.3 or newer). More information about the fastText library can be found in https://github.com/facebookresearch/fastText.
11 | License: GPL-3 + (COPYRIGHTS, LICENSE, PATENTS)
12 | Copyright: inst/COPYRIGHTS
13 | LazyData: TRUE
14 | Depends:
15 | R(>= 3.2.3)
16 | Imports:
17 | Rcpp (>= 0.12.5)
18 | LinkingTo: Rcpp, RcppArmadillo (>= 0.7.2)
19 | RoxygenNote: 6.1.0
20 | Suggests: testthat, covr
21 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | useDynLib(fastTextR, .registration = TRUE)
2 | importFrom(Rcpp, evalCpp)
3 |
4 | export(predict_unknown_words)
5 | export(skipgram_cbow)
6 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 |
2 | ## fastTextR 1.0.2
3 |
4 | - I update the README.md file with installation instructions
5 |
6 |
7 | ## fastTextR 1.0.1
8 |
9 | - I added the [probot](https://github.com/probot/stale)
10 |
11 |
12 | ## fastTextR 1.0.0
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | convert_args_to_pointers <- function(string_commands, INPUT, OUTPUT) {
5 | invisible(.Call(`_fastTextR_convert_args_to_pointers`, string_commands, INPUT, OUTPUT))
6 | }
7 |
8 |
--------------------------------------------------------------------------------
/R/fasttext_2R_interface.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' wrapper for the skipgram and cbow methods of the fasttext text classifier
4 | #'
5 | #'
6 | #' @param input_path a character string specifying the path to the train text file
7 | #' @param output_path a character string specifying the path to the output-file
8 | #' @param method a string specifying the method. One of \emph{skipgram}, \emph{cbow}
9 | #' @param lr a float number specifying the learning rate [default is 0.1]
10 | #' @param lrUpdateRate a number specifying the rate of updates for the learning rate [default is 100]
11 | #' @param dim a number specifying the size of the word vectors [default is 100]
12 | #' @param ws a number specifying the size of the context window [default is 5]
13 | #' @param epoch a value specifying the number of epochs [default is 5]
14 | #' @param minCount a number specifying the minimal number of word occurences [default is 1]
15 | #' @param neg a value specifying the number of negatives sampled [default is 5]
16 | #' @param wordNgrams a number specifying the max length of word ngram [default is 1]
17 | #' @param loss a character string specifying the loss function. One of \emph{ns (negative sampling)}, \emph{hs (hierarchical softmax)}, \emph{softmax} [default is 'ns']
18 | #' @param bucket a value specifying the number of buckets [default is 2000000]
19 | #' @param minn a number specifying the min length of char ngram [default is 0]
20 | #' @param maxn a number specifying the max length of char ngram [default is 0]
21 | #' @param thread a value specifying the of threads [default is 6]
22 | #' @param t a float number specifying the sampling threshold [default is 0.0001]
23 | #' @param verbose a number (between 0 and 2) specifying the verbosity level [default is 2]
24 | #' @return a character string specifying the location of the saved data and the number of the word vectors
25 | #' @export
26 | #' @details
27 | #' the function will save a model.bin and the word vectors to a pre-specified path (\emph{output_path})
28 | #' @references
29 | #' https://github.com/facebookresearch/fastText
30 | #'
31 | #' https://arxiv.org/abs/1607.04606
32 | #'
33 | #' https://arxiv.org/abs/1607.01759
34 | #' @examples
35 | #'
36 | #' # library(fastTextR)
37 | #'
38 | #' # res = skipgram_cbow(input_path = "/data_fasttext/out_test_file.txt",
39 | #'
40 | #' # output_path = "/data_fasttext/model", method = "skipgram")
41 |
42 |
43 | skipgram_cbow = function(input_path = NULL, output_path = NULL, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 100,
44 |
45 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
46 |
47 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) {
48 |
49 | try_err_files = inherits(tryCatch(normalizePath(input_path, mustWork = T), error = function(e) e), "error")
50 | if (!is.character(input_path) || try_err_files) stop("the input_path parameter should be a valid character string path")
51 | if (is.null(output_path)) stop("the output_path parameter should be a non-NULL valid character string path")
52 | if (.Platform$OS.type == 'unix') {
53 | first = strsplit(output_path, "/")[[1]]
54 | second = first[-length(first)]
55 | res = paste(second, collapse = "/")
56 | try_err_files_out = inherits(tryCatch(normalizePath(res, mustWork = T), error = function(e) e), "error")
57 | }
58 | if (.Platform$OS.type == 'windows') {
59 | first = strsplit(output_path, "\\\\|/")[[1]]
60 | second = first[-length(first)]
61 | res = paste(second, collapse = "\\")
62 | try_err_files_out = inherits(tryCatch(normalizePath(res, mustWork = T), error = function(e) e), "error")
63 | }
64 | if (!is.character(output_path) || try_err_files_out) stop("the output_path parameter should be a valid character string path")
65 | type_bin = strsplit(first[length(first)], '[.]')[[1]]
66 | if (length(type_bin) > 1) stop("the output_path parameter should be a valid path with no file extensions. Example path: ../model")
67 | if (!method %in% c('skipgram', 'cbow')) stop("the method parameter should be either 'skipgram' or 'cbow'")
68 | params = c(lr, t, lrUpdateRate, dim, ws, epoch, minCount, neg, wordNgrams, bucket)
69 | params_nams = c('lr', 't', 'lrUpdateRate', 'dim', 'ws', 'epoch', 'minCount', 'neg', 'wordNgrams', 'bucket')
70 | flag_params = sum(params <= 0)
71 | if (flag_params > 0) {
72 | idx = which(params <= 0)[1]
73 | stop(paste0("the ", paste0(params_nams[idx], " parameter should be a number greater than 0.0")))}
74 | if (minn < 0) stop("the minn parameter should be greater than 0")
75 | if (maxn < 0) stop("the maxn parameter should be greater than 0")
76 | if (verbose < 0 || verbose > 2) stop("the verbose parameter should be a number between 0 and 2")
77 | if (thread < 1) stop("the thread parameter should be greater than 0")
78 | if (!loss %in% c('ns', 'hs', 'softmax')) stop("the loss parameter should be one of 'ns', 'hs', 'softmax'")
79 |
80 | if (verbose > 0) { start = Sys.time() }
81 |
82 | default_args = c("fasttext", method, "-input", as.character(input_path), "-output", as.character(output_path), "-lr", as.character(lr),
83 |
84 | "-lrUpdateRate", as.character(lrUpdateRate), "-dim", as.character(dim), "-ws", as.character(ws), "-epoch", as.character(epoch),
85 |
86 | "-minCount", as.character(minCount), "-neg", as.character(neg), "-wordNgrams",
87 |
88 | as.character(wordNgrams), "-loss", loss, "-bucket", as.character(bucket), "-minn", as.character(minn), "-maxn",
89 |
90 | as.character(maxn), "-thread", as.character(thread), "-t", as.character(t), "-verbose", as.character(verbose))
91 |
92 | convert_args_to_pointers(default_args, "", "")
93 |
94 | if (verbose > 0) {
95 |
96 | end = Sys.time()
97 |
98 | t = end - start
99 |
100 | cat('\n'); cat('time to complete :', t, attributes(t)$units, '\n'); cat('\n');
101 | }
102 |
103 | return(structure(list(file_location = paste0("the output is saved in: ", output_path), num_vectors = dim),
104 |
105 | class = 'fasttextR'))
106 | }
107 |
108 |
109 |
110 | #' predict unknown words for the skipgram and cbow models
111 | #'
112 | #'
113 | #' @param skipgram_cbow_model_output the output.bin of the \emph{skipgram_cbow} function
114 | #' @param unknown_words_path a character string specifying the path to the unknown words file
115 | #' @param output_path a character string specifying the path to the output file
116 | #' @param verbose either TRUE or FALSE. If TRUE information will be printed out
117 | #' @return a character string specifying the location of the saved data
118 | #' @export
119 | #' @details
120 | #' the function will save to a pre-specified path (\emph{output_path}) the numeric vectors for the unknown words. In case of non-matches it returns zero-valued vectors
121 | #' @references
122 | #' https://github.com/facebookresearch/fastText
123 | #'
124 | #' https://arxiv.org/abs/1607.04606
125 | #'
126 | #' https://arxiv.org/abs/1607.01759
127 | #' @examples
128 | #'
129 | #' # library(fastTextR)
130 | #'
131 | #' # res = predict_unknown_words(skipgram_cbow_model_output = "/data_fasttext/model.bin",
132 | #'
133 | #' # unknown_words_path = "/data_fasttext/queries.txt",
134 | #'
135 | #' # output_path = "/data_fasttext/NEW_VEC")
136 |
137 |
138 | predict_unknown_words = function(skipgram_cbow_model_output = NULL, unknown_words_path = NULL, output_path = NULL, verbose = FALSE) {
139 |
140 | try_err_files = inherits(tryCatch(normalizePath(skipgram_cbow_model_output, mustWork = T), error = function(e) e), "error")
141 | if (!is.character(skipgram_cbow_model_output) || try_err_files) stop("the skipgram_cbow_model_output parameter should be a valid character string path")
142 | try_err_unknown = inherits(tryCatch(normalizePath(unknown_words_path, mustWork = T), error = function(e) e), "error")
143 | if (!is.character(unknown_words_path) || try_err_unknown) stop("the unknown_words_path parameter should be a valid character string path")
144 | if (is.null(output_path)) stop("the output_path parameter should be a non-NULL character string path")
145 | if (!is.null(output_path)) {
146 | if (!is.character(output_path)) {
147 | stop("the output_path parameter should be a valid character string path")}}
148 | if (!is.logical(verbose)) stop("the verbose parameter should be either TRUE or FALSE")
149 |
150 | if (verbose) { start = Sys.time() }
151 |
152 | default_args = c("fasttext", "predict_skipgram_cbow", skipgram_cbow_model_output)
153 |
154 | convert_args_to_pointers(default_args, unknown_words_path, output_path)
155 |
156 | if (verbose) {
157 |
158 | end = Sys.time()
159 |
160 | t = end - start
161 |
162 | cat('\n'); cat('time to complete :', t, attributes(t)$units, '\n'); cat('\n');
163 | }
164 |
165 | return(paste0("the output is saved in: ", output_path))
166 | }
167 |
168 |
169 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | # temporary startup message to inform users about the new version of 'fastText' [ SEE : http://r-pkgs.had.co.nz/r.html#r-differences ]
2 |
3 | .onAttach <- function(libname, pkgname) {
4 |
5 | packageStartupMessage("UPDATE 11-04-2019: There is an updated version of the fastText R package which includes all the features of the ported fasttext library. This R package is archived. Please visit 'https://github.com/mlampros/fastText' to install the most updated version!")
6 | }
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | [](https://github.com/mlampros/fastTextR/actions)
3 | [](https://codecov.io/github/mlampros/fastTextR?branch=master)
4 |
5 |
6 |
7 |
8 | ## fastTextR
9 |
10 |
11 | **UPDATE 11-04-2019**: There is an [updated version of the fastText R package](https://github.com/mlampros/fastText) which includes all the features of the ported [fasttext library](https://github.com/facebookresearch/fastText). Therefore this repository **is archived**.
12 |
13 |
14 |
15 | The fastTextR package is an R wrapper (only) for the *skipgram* and *cbow* functions of the [*fastText*](https://github.com/facebookresearch/fastText) library. *fastText* is a library for efficient learning of word representations and sentence classification. Since it uses C++11 features, it requires a compiler with good C++11 support. These include : (gcc-4.6.3 or newer) or (clang-3.3 or newer). More information about the fastText library can be found in [https://github.com/facebookresearch/fastText](https://github.com/facebookresearch/fastText). COPYRIGHTS, LICENSE and PATENTS files can be found in the *inst* folder of the R package.
16 |
17 | A detailed example can be found in my [blog-post](http://mlampros.github.io/2017/01/05/textTinyR_package/) about text processing, in section 'word vectors'.
18 |
19 |
20 |
21 | To install the package from Github you can **either** use the *install_github* function of the devtools package,
22 |
23 |
24 | ```R
25 |
26 | devtools::install_github('mlampros/fastTextR')
27 |
28 |
29 | ```
30 |
31 |
32 | **or** directly download the fastTextR-zip file using the **Clone or download** button in the [repository page](https://github.com/mlampros/fastTextR), extract it locally (rename it to *fastTextR* if necessary and check that files such as DESCRIPTION, NAMESPACE etc. are present when you open the fastTextR folder) and then run,
33 |
34 |
35 |
36 | ```R
37 |
38 | #-------------
39 | # on a Unix OS
40 | #-------------
41 |
42 | setwd('/your_folder/fastTextR/')
43 | Rcpp::compileAttributes(verbose = TRUE)
44 | setwd('/your_folder/')
45 | system("R CMD build fastTextR")
46 | system("R CMD INSTALL fastTextR_1.0.2.tar.gz")
47 |
48 |
49 | #------------------
50 | # on the Windows OS
51 | #------------------
52 |
53 | setwd('C:/your_folder/fastTextR/')
54 | Rcpp::compileAttributes(verbose = TRUE)
55 | setwd('C:/your_folder/')
56 | system("R CMD build fastTextR")
57 | system("R CMD INSTALL fastTextR_1.0.2.tar.gz")
58 |
59 | ```
60 |
61 |
62 | Use the following link to report bugs/issues (for the R wrapper),
63 |
64 |
65 | [https://github.com/mlampros/fastTextR/issues](https://github.com/mlampros/fastTextR/issues)
66 |
67 |
68 |
69 |
70 | #### **Example usage**
71 |
72 |
73 |
74 |
75 | ```R
76 |
77 |
78 | # example input data ---> 'dat.txt'
79 |
80 |
81 |
82 | library(fastTextR)
83 |
84 |
85 |
86 | #--------------------------
87 | # skipgram or cbow methods
88 | #--------------------------
89 |
90 |
91 | res = skipgram_cbow(input_path = "/data_fasttext/dat.txt",
92 |
93 | output_path = "/data_fasttext/model",
94 |
95 | method = "skipgram", lr = 0.1,
96 |
97 | lrUpdateRate = 100, dim = 100,
98 |
99 | ws = 5, epoch = 5, minCount = 1,
100 |
101 | neg = 5, wordNgrams = 1, loss = "ns",
102 |
103 | bucket = 2000000, minn = 0,
104 |
105 | maxn = 0, thread = 6, t = 0.0001,
106 |
107 | verbose = 2)
108 |
109 |
110 |
111 | #-------------------------------------------------------------
112 | # prediction of unknown words for the skipgram and cbow models
113 | #-------------------------------------------------------------
114 |
115 |
116 | res = predict_unknown_words(skipgram_cbow_model_output = "/data_fasttext/model.bin",
117 |
118 | unknown_words_path = "/data_fasttext/queries.txt",
119 |
120 | output_path = "/data_fasttext/NEW_VEC",
121 |
122 | verbose = TRUE)
123 |
124 | ```
125 |
126 |
127 |
128 | More information about the parameters of each function can be found in the package documentation.
129 |
130 |
131 |
132 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
--------------------------------------------------------------------------------
/inst/COPYRIGHTS:
--------------------------------------------------------------------------------
1 |
2 |
3 | ==============================================================================================
4 | The fastTextR package is a wrapper for the skipgram and cbow functions of the fastText library
5 | ==============================================================================================
6 |
7 |
8 | Copyright (c) 2016-present, Facebook, Inc.
9 | All rights reserved.
10 |
11 | This source code is licensed under the BSD-style license found in the
12 | LICENSE file in the root directory of this source tree. An additional grant
13 | of patent rights can be found in the PATENTS file in the same directory.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/inst/LICENSE:
--------------------------------------------------------------------------------
1 | BSD License
2 |
3 | For fastText software
4 |
5 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without modification,
8 | are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name Facebook nor the names of its contributors may be used to
18 | endorse or promote products derived from this software without specific
19 | prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/inst/PATENTS:
--------------------------------------------------------------------------------
1 | Additional Grant of Patent Rights Version 2
2 |
3 | "Software" means the fastText software distributed by Facebook, Inc.
4 |
5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software
6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable
7 | (subject to the termination provision below) license under any Necessary
8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise
9 | transfer the Software. For avoidance of doubt, no license is granted under
10 | Facebook’s rights in any patent claims that are infringed by (i) modifications
11 | to the Software made by you or any third party or (ii) the Software in
12 | combination with any software or other technology.
13 |
14 | The license granted hereunder will terminate, automatically and without notice,
15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate
16 | directly or indirectly, or take a direct financial interest in, any Patent
17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate
18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or
19 | in part from any software, technology, product or service of Facebook or any of
20 | its subsidiaries or corporate affiliates, or (iii) against any party relating
21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its
22 | subsidiaries or corporate affiliates files a lawsuit alleging patent
23 | infringement against you in the first instance, and you respond by filing a
24 | patent infringement counterclaim in that lawsuit against that party that is
25 | unrelated to the Software, the license granted hereunder will not terminate
26 | under section (i) of this paragraph due to such counterclaim.
27 |
28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is
29 | necessarily infringed by the Software standing alone.
30 |
31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect,
32 | or contributory infringement or inducement to infringe any patent, including a
33 | cross-claim or counterclaim.
34 |
--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS)
3 | CXX_STD = CXX11
4 | PKG_CPPFLAGS = -I../inst/include/
5 |
--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) -mthreads
3 | CXX_STD = CXX11
4 | PKG_CPPFLAGS = -I../inst/include/
5 |
--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | #include
5 | #include
6 |
7 | using namespace Rcpp;
8 |
9 | // convert_args_to_pointers
10 | void convert_args_to_pointers(std::vector string_commands, std::string INPUT, std::string OUTPUT);
11 | RcppExport SEXP _fastTextR_convert_args_to_pointers(SEXP string_commandsSEXP, SEXP INPUTSEXP, SEXP OUTPUTSEXP) {
12 | BEGIN_RCPP
13 | Rcpp::RNGScope rcpp_rngScope_gen;
14 | Rcpp::traits::input_parameter< std::vector >::type string_commands(string_commandsSEXP);
15 | Rcpp::traits::input_parameter< std::string >::type INPUT(INPUTSEXP);
16 | Rcpp::traits::input_parameter< std::string >::type OUTPUT(OUTPUTSEXP);
17 | convert_args_to_pointers(string_commands, INPUT, OUTPUT);
18 | return R_NilValue;
19 | END_RCPP
20 | }
21 |
--------------------------------------------------------------------------------
/src/args.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "args.h"
16 |
17 | #include
18 | #include
19 |
20 | #include
21 |
22 | namespace fasttext {
23 |
24 | Args::Args() {
25 | lr = 0.05;
26 | dim = 100;
27 | ws = 5;
28 | epoch = 5;
29 | minCount = 5;
30 | neg = 5;
31 | wordNgrams = 1;
32 | loss = loss_name::ns;
33 | model = model_name::sg;
34 | bucket = 2000000;
35 | minn = 3;
36 | maxn = 6;
37 | thread = 12;
38 | lrUpdateRate = 100;
39 | t = 1e-4;
40 | label = "__label__";
41 | verbose = 2;
42 | pretrainedVectors = "";
43 | }
44 |
45 | void Args::parseArgs(int argc, char** argv) {
46 | std::string command(argv[1]);
47 | if (command == "supervised") {
48 | model = model_name::sup;
49 | loss = loss_name::softmax;
50 | minCount = 1;
51 | minn = 0;
52 | maxn = 0;
53 | lr = 0.1;
54 | } else if (command == "cbow") {
55 | model = model_name::cbow;
56 | }
57 | int ai = 2;
58 | while (ai < argc) {
59 | if (argv[ai][0] != '-') {
60 | std::cout << "Provided argument without a dash! Usage:" << std::endl;
61 | printHelp();
62 | exit(EXIT_FAILURE);
63 | }
64 | if (strcmp(argv[ai], "-h") == 0) {
65 | std::cout << "Here is the help! Usage:" << std::endl;
66 | printHelp();
67 | exit(EXIT_FAILURE);
68 | } else if (strcmp(argv[ai], "-input") == 0) {
69 | input = std::string(argv[ai + 1]);
70 | } else if (strcmp(argv[ai], "-test") == 0) {
71 | test = std::string(argv[ai + 1]);
72 | } else if (strcmp(argv[ai], "-output") == 0) {
73 | output = std::string(argv[ai + 1]);
74 | } else if (strcmp(argv[ai], "-lr") == 0) {
75 | lr = atof(argv[ai + 1]);
76 | } else if (strcmp(argv[ai], "-lrUpdateRate") == 0) {
77 | lrUpdateRate = atoi(argv[ai + 1]);
78 | } else if (strcmp(argv[ai], "-dim") == 0) {
79 | dim = atoi(argv[ai + 1]);
80 | } else if (strcmp(argv[ai], "-ws") == 0) {
81 | ws = atoi(argv[ai + 1]);
82 | } else if (strcmp(argv[ai], "-epoch") == 0) {
83 | epoch = atoi(argv[ai + 1]);
84 | } else if (strcmp(argv[ai], "-minCount") == 0) {
85 | minCount = atoi(argv[ai + 1]);
86 | } else if (strcmp(argv[ai], "-neg") == 0) {
87 | neg = atoi(argv[ai + 1]);
88 | } else if (strcmp(argv[ai], "-wordNgrams") == 0) {
89 | wordNgrams = atoi(argv[ai + 1]);
90 | } else if (strcmp(argv[ai], "-loss") == 0) {
91 | if (strcmp(argv[ai + 1], "hs") == 0) {
92 | loss = loss_name::hs;
93 | } else if (strcmp(argv[ai + 1], "ns") == 0) {
94 | loss = loss_name::ns;
95 | } else if (strcmp(argv[ai + 1], "softmax") == 0) {
96 | loss = loss_name::softmax;
97 | } else {
98 | std::cout << "Unknown loss: " << argv[ai + 1] << std::endl;
99 | printHelp();
100 | exit(EXIT_FAILURE);
101 | }
102 | } else if (strcmp(argv[ai], "-bucket") == 0) {
103 | bucket = atoi(argv[ai + 1]);
104 | } else if (strcmp(argv[ai], "-minn") == 0) {
105 | minn = atoi(argv[ai + 1]);
106 | } else if (strcmp(argv[ai], "-maxn") == 0) {
107 | maxn = atoi(argv[ai + 1]);
108 | } else if (strcmp(argv[ai], "-thread") == 0) {
109 | thread = atoi(argv[ai + 1]);
110 | } else if (strcmp(argv[ai], "-t") == 0) {
111 | t = atof(argv[ai + 1]);
112 | } else if (strcmp(argv[ai], "-label") == 0) {
113 | label = std::string(argv[ai + 1]);
114 | } else if (strcmp(argv[ai], "-verbose") == 0) {
115 | verbose = atoi(argv[ai + 1]);
116 | } else if (strcmp(argv[ai], "-pretrainedVectors") == 0) {
117 | pretrainedVectors = std::string(argv[ai + 1]);
118 | } else {
119 | std::cout << "Unknown argument: " << argv[ai] << std::endl;
120 | printHelp();
121 | exit(EXIT_FAILURE);
122 | }
123 | ai += 2;
124 | }
125 | if (input.empty() || output.empty()) {
126 | std::cout << "Empty input or output path." << std::endl;
127 | printHelp();
128 | exit(EXIT_FAILURE);
129 | }
130 | if (wordNgrams <= 1 && maxn == 0) {
131 | bucket = 0;
132 | }
133 | }
134 |
135 | void Args::printHelp() {
136 | std::string lname = "ns";
137 | if (loss == loss_name::hs) lname = "hs";
138 | if (loss == loss_name::softmax) lname = "softmax";
139 | std::cout
140 | << "\n"
141 | << "The following arguments are mandatory:\n"
142 | << " -input training file path\n"
143 | << " -output output file path\n\n"
144 | << "The following arguments are optional:\n"
145 | << " -lr learning rate [" << lr << "]\n"
146 | << " -lrUpdateRate change the rate of updates for the learning rate [" << lrUpdateRate << "]\n"
147 | << " -dim size of word vectors [" << dim << "]\n"
148 | << " -ws size of the context window [" << ws << "]\n"
149 | << " -epoch number of epochs [" << epoch << "]\n"
150 | << " -minCount minimal number of word occurences [" << minCount << "]\n"
151 | << " -neg number of negatives sampled [" << neg << "]\n"
152 | << " -wordNgrams max length of word ngram [" << wordNgrams << "]\n"
153 | << " -loss loss function {ns, hs, softmax} [ns]\n"
154 | << " -bucket number of buckets [" << bucket << "]\n"
155 | << " -minn min length of char ngram [" << minn << "]\n"
156 | << " -maxn max length of char ngram [" << maxn << "]\n"
157 | << " -thread number of threads [" << thread << "]\n"
158 | << " -t sampling threshold [" << t << "]\n"
159 | << " -label labels prefix [" << label << "]\n"
160 | << " -verbose verbosity level [" << verbose << "]\n"
161 | << " -pretrainedVectors pretrained word vectors for supervised learning []"
162 | << std::endl;
163 | }
164 |
165 | void Args::save(std::ostream& out) {
166 | out.write((char*) &(dim), sizeof(int));
167 | out.write((char*) &(ws), sizeof(int));
168 | out.write((char*) &(epoch), sizeof(int));
169 | out.write((char*) &(minCount), sizeof(int));
170 | out.write((char*) &(neg), sizeof(int));
171 | out.write((char*) &(wordNgrams), sizeof(int));
172 | out.write((char*) &(loss), sizeof(loss_name));
173 | out.write((char*) &(model), sizeof(model_name));
174 | out.write((char*) &(bucket), sizeof(int));
175 | out.write((char*) &(minn), sizeof(int));
176 | out.write((char*) &(maxn), sizeof(int));
177 | out.write((char*) &(lrUpdateRate), sizeof(int));
178 | out.write((char*) &(t), sizeof(double));
179 | }
180 |
181 | void Args::load(std::istream& in) {
182 | in.read((char*) &(dim), sizeof(int));
183 | in.read((char*) &(ws), sizeof(int));
184 | in.read((char*) &(epoch), sizeof(int));
185 | in.read((char*) &(minCount), sizeof(int));
186 | in.read((char*) &(neg), sizeof(int));
187 | in.read((char*) &(wordNgrams), sizeof(int));
188 | in.read((char*) &(loss), sizeof(loss_name));
189 | in.read((char*) &(model), sizeof(model_name));
190 | in.read((char*) &(bucket), sizeof(int));
191 | in.read((char*) &(minn), sizeof(int));
192 | in.read((char*) &(maxn), sizeof(int));
193 | in.read((char*) &(lrUpdateRate), sizeof(int));
194 | in.read((char*) &(t), sizeof(double));
195 | }
196 |
197 | }
198 |
--------------------------------------------------------------------------------
/src/args.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_ARGS_H
11 | #define FASTTEXT_ARGS_H
12 |
13 | #include
14 | #include
15 | #include
16 |
17 | namespace fasttext {
18 |
19 | enum class model_name : int {cbow=1, sg, sup};
20 | enum class loss_name : int {hs=1, ns, softmax};
21 |
22 | class Args {
23 | public:
24 | Args();
25 | std::string input;
26 | std::string test;
27 | std::string output;
28 | double lr;
29 | int lrUpdateRate;
30 | int dim;
31 | int ws;
32 | int epoch;
33 | int minCount;
34 | int neg;
35 | int wordNgrams;
36 | loss_name loss;
37 | model_name model;
38 | int bucket;
39 | int minn;
40 | int maxn;
41 | int thread;
42 | double t;
43 | std::string label;
44 | int verbose;
45 | std::string pretrainedVectors;
46 |
47 | void parseArgs(int, char**);
48 | void printHelp();
49 | void save(std::ostream&);
50 | void load(std::istream&);
51 | };
52 |
53 | }
54 |
55 | #endif
56 |
--------------------------------------------------------------------------------
/src/dictionary.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "dictionary.h"
16 |
17 | #include
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 |
25 | namespace fasttext {
26 |
27 | const std::string Dictionary::EOS = "";
28 | const std::string Dictionary::BOW = "<";
29 | const std::string Dictionary::EOW = ">";
30 |
31 | Dictionary::Dictionary(std::shared_ptr args) {
32 | args_ = args;
33 | size_ = 0;
34 | nwords_ = 0;
35 | nlabels_ = 0;
36 | ntokens_ = 0;
37 | word2int_.resize(MAX_VOCAB_SIZE);
38 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) {
39 | word2int_[i] = -1;
40 | }
41 | }
42 |
43 | int32_t Dictionary::find(const std::string& w) const {
44 | int32_t h = hash(w) % MAX_VOCAB_SIZE;
45 | while (word2int_[h] != -1 && words_[word2int_[h]].word != w) {
46 | h = (h + 1) % MAX_VOCAB_SIZE;
47 | }
48 | return h;
49 | }
50 |
51 | void Dictionary::add(const std::string& w) {
52 | int32_t h = find(w);
53 | ntokens_++;
54 | if (word2int_[h] == -1) {
55 | entry e;
56 | e.word = w;
57 | e.count = 1;
58 | e.type = (w.find(args_->label) == 0) ? entry_type::label : entry_type::word;
59 | words_.push_back(e);
60 | word2int_[h] = size_++;
61 | } else {
62 | words_[word2int_[h]].count++;
63 | }
64 | }
65 |
66 | int32_t Dictionary::nwords() const {
67 | return nwords_;
68 | }
69 |
70 | int32_t Dictionary::nlabels() const {
71 | return nlabels_;
72 | }
73 |
74 | int64_t Dictionary::ntokens() const {
75 | return ntokens_;
76 | }
77 |
78 | const std::vector& Dictionary::getNgrams(int32_t i) const {
79 | assert(i >= 0);
80 | assert(i < nwords_);
81 | return words_[i].subwords;
82 | }
83 |
84 | const std::vector Dictionary::getNgrams(const std::string& word) const {
85 | int32_t i = getId(word);
86 | if (i >= 0) {
87 | return getNgrams(i);
88 | }
89 | std::vector ngrams;
90 | computeNgrams(BOW + word + EOW, ngrams);
91 | return ngrams;
92 | }
93 |
94 | bool Dictionary::discard(int32_t id, real rand) const {
95 | assert(id >= 0);
96 | assert(id < nwords_);
97 | if (args_->model == model_name::sup) return false;
98 | return rand > pdiscard_[id];
99 | }
100 |
101 | int32_t Dictionary::getId(const std::string& w) const {
102 | int32_t h = find(w);
103 | return word2int_[h];
104 | }
105 |
106 | entry_type Dictionary::getType(int32_t id) const {
107 | assert(id >= 0);
108 | assert(id < size_);
109 | return words_[id].type;
110 | }
111 |
112 | std::string Dictionary::getWord(int32_t id) const {
113 | assert(id >= 0);
114 | assert(id < size_);
115 | return words_[id].word;
116 | }
117 |
118 | uint32_t Dictionary::hash(const std::string& str) const {
119 | uint32_t h = 2166136261;
120 | for (size_t i = 0; i < str.size(); i++) {
121 | h = h ^ uint32_t(str[i]);
122 | h = h * 16777619;
123 | }
124 | return h;
125 | }
126 |
127 | void Dictionary::computeNgrams(const std::string& word,
128 | std::vector& ngrams) const {
129 | for (size_t i = 0; i < word.size(); i++) {
130 | std::string ngram;
131 | if ((word[i] & 0xC0) == 0x80) continue;
132 | for (size_t j = i, n = 1; j < word.size() && n <= args_->maxn; n++) {
133 | ngram.push_back(word[j++]);
134 | while (j < word.size() && (word[j] & 0xC0) == 0x80) {
135 | ngram.push_back(word[j++]);
136 | }
137 | if (n >= args_->minn && !(n == 1 && (i == 0 || j == word.size()))) {
138 | int32_t h = hash(ngram) % args_->bucket;
139 | ngrams.push_back(nwords_ + h);
140 | }
141 | }
142 | }
143 | }
144 |
145 | void Dictionary::initNgrams() {
146 | for (size_t i = 0; i < size_; i++) {
147 | std::string word = BOW + words_[i].word + EOW;
148 | words_[i].subwords.push_back(i);
149 | computeNgrams(word, words_[i].subwords);
150 | }
151 | }
152 |
153 | bool Dictionary::readWord(std::istream& in, std::string& word) const
154 | {
155 | char c;
156 | std::streambuf& sb = *in.rdbuf();
157 | word.clear();
158 | while ((c = sb.sbumpc()) != EOF) {
159 | if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\f' || c == '\0') {
160 | if (word.empty()) {
161 | if (c == '\n') {
162 | word += EOS;
163 | return true;
164 | }
165 | continue;
166 | } else {
167 | if (c == '\n')
168 | sb.sungetc();
169 | return true;
170 | }
171 | }
172 | word.push_back(c);
173 | }
174 | // trigger eofbit
175 | in.get();
176 | return !word.empty();
177 | }
178 |
179 | void Dictionary::readFromFile(std::istream& in) {
180 | std::string word;
181 | int64_t minThreshold = 1;
182 | while (readWord(in, word)) {
183 | add(word);
184 | if (ntokens_ % 1000000 == 0 && args_->verbose > 1) {
185 | std::cout << "\rRead " << ntokens_ / 1000000 << "M words" << std::flush;
186 | }
187 | if (size_ > 0.75 * MAX_VOCAB_SIZE) {
188 | threshold(minThreshold++);
189 | }
190 | }
191 | threshold(args_->minCount);
192 | initTableDiscard();
193 | initNgrams();
194 | if (args_->verbose > 0) {
195 | std::cout << "\rRead " << ntokens_ / 1000000 << "M words" << std::endl;
196 | std::cout << "Number of words: " << nwords_ << std::endl;
197 | std::cout << "Number of labels: " << nlabels_ << std::endl;
198 | }
199 | if (size_ == 0) {
200 | std::cerr << "Empty vocabulary. Try a smaller -minCount value." << std::endl;
201 | exit(EXIT_FAILURE);
202 | }
203 | }
204 |
205 | void Dictionary::threshold(int64_t t) {
206 | sort(words_.begin(), words_.end(), [](const entry& e1, const entry& e2) {
207 | if (e1.type != e2.type) return e1.type < e2.type;
208 | return e1.count > e2.count;
209 | });
210 | words_.erase(remove_if(words_.begin(), words_.end(), [&](const entry& e) {
211 | return e.type == entry_type::word && e.count < t;
212 | }), words_.end());
213 | words_.shrink_to_fit();
214 | size_ = 0;
215 | nwords_ = 0;
216 | nlabels_ = 0;
217 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) {
218 | word2int_[i] = -1;
219 | }
220 | for (auto it = words_.begin(); it != words_.end(); ++it) {
221 | int32_t h = find(it->word);
222 | word2int_[h] = size_++;
223 | if (it->type == entry_type::word) nwords_++;
224 | if (it->type == entry_type::label) nlabels_++;
225 | }
226 | }
227 |
228 | void Dictionary::initTableDiscard() {
229 | pdiscard_.resize(size_);
230 | for (size_t i = 0; i < size_; i++) {
231 | real f = real(words_[i].count) / real(ntokens_);
232 | pdiscard_[i] = sqrt(args_->t / f) + args_->t / f;
233 | }
234 | }
235 |
236 | std::vector Dictionary::getCounts(entry_type type) const {
237 | std::vector counts;
238 | for (auto& w : words_) {
239 | if (w.type == type) counts.push_back(w.count);
240 | }
241 | return counts;
242 | }
243 |
244 | void Dictionary::addNgrams(std::vector& line, int32_t n) const {
245 | int32_t line_size = line.size();
246 | for (int32_t i = 0; i < line_size; i++) {
247 | uint64_t h = line[i];
248 | for (int32_t j = i + 1; j < line_size && j < i + n; j++) {
249 | h = h * 116049371 + line[j];
250 | line.push_back(nwords_ + (h % args_->bucket));
251 | }
252 | }
253 | }
254 |
255 | int32_t Dictionary::getLine(std::istream& in,
256 | std::vector& words,
257 | std::vector& labels,
258 | std::minstd_rand& rng) const {
259 | std::uniform_real_distribution<> uniform(0, 1);
260 | std::string token;
261 | int32_t ntokens = 0;
262 | words.clear();
263 | labels.clear();
264 | if (in.eof()) {
265 | in.clear();
266 | in.seekg(std::streampos(0));
267 | }
268 | while (readWord(in, token)) {
269 | if (token == EOS) break;
270 | int32_t wid = getId(token);
271 | if (wid < 0) continue;
272 | entry_type type = getType(wid);
273 | ntokens++;
274 | if (type == entry_type::word && !discard(wid, uniform(rng))) {
275 | words.push_back(wid);
276 | }
277 | if (type == entry_type::label) {
278 | labels.push_back(wid - nwords_);
279 | }
280 | if (words.size() > MAX_LINE_SIZE && args_->model != model_name::sup) break;
281 | }
282 | return ntokens;
283 | }
284 |
285 | std::string Dictionary::getLabel(int32_t lid) const {
286 | assert(lid >= 0);
287 | assert(lid < nlabels_);
288 | return words_[lid + nwords_].word;
289 | }
290 |
291 | void Dictionary::save(std::ostream& out) const {
292 | out.write((char*) &size_, sizeof(int32_t));
293 | out.write((char*) &nwords_, sizeof(int32_t));
294 | out.write((char*) &nlabels_, sizeof(int32_t));
295 | out.write((char*) &ntokens_, sizeof(int64_t));
296 | for (int32_t i = 0; i < size_; i++) {
297 | entry e = words_[i];
298 | out.write(e.word.data(), e.word.size() * sizeof(char));
299 | out.put(0);
300 | out.write((char*) &(e.count), sizeof(int64_t));
301 | out.write((char*) &(e.type), sizeof(entry_type));
302 | }
303 | }
304 |
305 | void Dictionary::load(std::istream& in) {
306 | words_.clear();
307 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) {
308 | word2int_[i] = -1;
309 | }
310 | in.read((char*) &size_, sizeof(int32_t));
311 | in.read((char*) &nwords_, sizeof(int32_t));
312 | in.read((char*) &nlabels_, sizeof(int32_t));
313 | in.read((char*) &ntokens_, sizeof(int64_t));
314 | for (int32_t i = 0; i < size_; i++) {
315 | char c;
316 | entry e;
317 | while ((c = in.get()) != 0) {
318 | e.word.push_back(c);
319 | }
320 | in.read((char*) &e.count, sizeof(int64_t));
321 | in.read((char*) &e.type, sizeof(entry_type));
322 | words_.push_back(e);
323 | word2int_[find(e.word)] = i;
324 | }
325 | initTableDiscard();
326 | initNgrams();
327 | }
328 |
329 | }
330 |
--------------------------------------------------------------------------------
/src/dictionary.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_DICTIONARY_H
11 | #define FASTTEXT_DICTIONARY_H
12 |
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | #include "args.h"
21 | #include "real.h"
22 |
23 | namespace fasttext {
24 |
25 | typedef int32_t id_type;
26 | enum class entry_type : int8_t {word=0, label=1};
27 |
28 | struct entry {
29 | std::string word;
30 | int64_t count;
31 | entry_type type;
32 | std::vector subwords;
33 | };
34 |
35 | class Dictionary {
36 | private:
37 | static const int32_t MAX_VOCAB_SIZE = 30000000;
38 | static const int32_t MAX_LINE_SIZE = 1024;
39 |
40 | int32_t find(const std::string&) const;
41 | void initTableDiscard();
42 | void initNgrams();
43 |
44 | std::shared_ptr args_;
45 | std::vector word2int_;
46 | std::vector words_;
47 | std::vector pdiscard_;
48 | int32_t size_;
49 | int32_t nwords_;
50 | int32_t nlabels_;
51 | int64_t ntokens_;
52 |
53 | public:
54 | static const std::string EOS;
55 | static const std::string BOW;
56 | static const std::string EOW;
57 |
58 | explicit Dictionary(std::shared_ptr);
59 | int32_t nwords() const;
60 | int32_t nlabels() const;
61 | int64_t ntokens() const;
62 | int32_t getId(const std::string&) const;
63 | entry_type getType(int32_t) const;
64 | bool discard(int32_t, real) const;
65 | std::string getWord(int32_t) const;
66 | const std::vector& getNgrams(int32_t) const;
67 | const std::vector getNgrams(const std::string&) const;
68 | void computeNgrams(const std::string&, std::vector&) const;
69 | uint32_t hash(const std::string& str) const;
70 | void add(const std::string&);
71 | bool readWord(std::istream&, std::string&) const;
72 | void readFromFile(std::istream&);
73 | std::string getLabel(int32_t) const;
74 | void save(std::ostream&) const;
75 | void load(std::istream&);
76 | std::vector getCounts(entry_type) const;
77 | void addNgrams(std::vector&, int32_t) const;
78 | int32_t getLine(std::istream&, std::vector&,
79 | std::vector&, std::minstd_rand&) const;
80 | void threshold(int64_t);
81 | };
82 |
83 | }
84 |
85 | #endif
86 |
--------------------------------------------------------------------------------
/src/fasttext.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "fasttext.h"
16 |
17 | #include
18 | #include
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | namespace fasttext {
29 |
30 | void FastText::getVector(Vector& vec, const std::string& word) {
31 | const std::vector& ngrams = dict_->getNgrams(word);
32 | vec.zero();
33 | for (auto it = ngrams.begin(); it != ngrams.end(); ++it) {
34 | vec.addRow(*input_, *it);
35 | }
36 | if (ngrams.size() > 0) {
37 | vec.mul(1.0 / ngrams.size());
38 | }
39 | }
40 |
41 | void FastText::saveVectors() {
42 | std::ofstream ofs(args_->output + ".vec");
43 | if (!ofs.is_open()) {
44 | std::cout << "Error opening file for saving vectors." << std::endl;
45 | exit(EXIT_FAILURE);
46 | }
47 | // ofs << dict_->nwords() << " " << args_->dim << std::endl; // exclude dimensions from output .txt file
48 | Vector vec(args_->dim);
49 | for (int32_t i = 0; i < dict_->nwords(); i++) {
50 | std::string word = dict_->getWord(i);
51 | getVector(vec, word);
52 | ofs << word << " " << vec << std::endl;
53 | }
54 | ofs.close();
55 | }
56 |
57 | void FastText::saveModel() {
58 | std::ofstream ofs(args_->output + ".bin", std::ofstream::binary);
59 | if (!ofs.is_open()) {
60 | std::cerr << "Model file cannot be opened for saving!" << std::endl;
61 | exit(EXIT_FAILURE);
62 | }
63 | args_->save(ofs);
64 | dict_->save(ofs);
65 | input_->save(ofs);
66 | output_->save(ofs);
67 | ofs.close();
68 | }
69 |
70 | void FastText::loadModel(const std::string& filename) {
71 | std::ifstream ifs(filename, std::ifstream::binary);
72 | if (!ifs.is_open()) {
73 | std::cerr << "Model file cannot be opened for loading!" << std::endl;
74 | exit(EXIT_FAILURE);
75 | }
76 | loadModel(ifs);
77 | ifs.close();
78 | }
79 |
80 | void FastText::loadModel(std::istream& in) {
81 | args_ = std::make_shared();
82 | dict_ = std::make_shared(args_);
83 | input_ = std::make_shared();
84 | output_ = std::make_shared();
85 | args_->load(in);
86 | dict_->load(in);
87 | input_->load(in);
88 | output_->load(in);
89 | model_ = std::make_shared(input_, output_, args_, 0);
90 | if (args_->model == model_name::sup) {
91 | model_->setTargetCounts(dict_->getCounts(entry_type::label));
92 | } else {
93 | model_->setTargetCounts(dict_->getCounts(entry_type::word));
94 | }
95 | }
96 |
97 | void FastText::printInfo(real progress, real loss) {
98 | real t = real(clock() - start) / CLOCKS_PER_SEC;
99 | real wst = real(tokenCount) / t;
100 | real lr = args_->lr * (1.0 - progress);
101 | int eta = int(t / progress * (1 - progress) / args_->thread);
102 | int etah = eta / 3600;
103 | int etam = (eta - etah * 3600) / 60;
104 | std::cout << std::fixed;
105 | std::cout << "\rProgress: " << std::setprecision(1) << 100 * progress << "%";
106 | std::cout << " words/sec/thread: " << std::setprecision(0) << wst;
107 | std::cout << " lr: " << std::setprecision(6) << lr;
108 | std::cout << " loss: " << std::setprecision(6) << loss;
109 | std::cout << " eta: " << etah << "h" << etam << "m ";
110 | std::cout << std::flush;
111 | }
112 |
113 | void FastText::supervised(Model& model, real lr,
114 | const std::vector& line,
115 | const std::vector& labels) {
116 | if (labels.size() == 0 || line.size() == 0) return;
117 | std::uniform_int_distribution<> uniform(0, labels.size() - 1);
118 | int32_t i = uniform(model.rng);
119 | model.update(line, labels[i], lr);
120 | }
121 |
122 | void FastText::cbow(Model& model, real lr,
123 | const std::vector& line) {
124 | std::vector bow;
125 | std::uniform_int_distribution<> uniform(1, args_->ws);
126 | for (int32_t w = 0; w < line.size(); w++) {
127 | int32_t boundary = uniform(model.rng);
128 | bow.clear();
129 | for (int32_t c = -boundary; c <= boundary; c++) {
130 | if (c != 0 && w + c >= 0 && w + c < line.size()) {
131 | const std::vector& ngrams = dict_->getNgrams(line[w + c]);
132 | bow.insert(bow.end(), ngrams.cbegin(), ngrams.cend());
133 | }
134 | }
135 | model.update(bow, line[w], lr);
136 | }
137 | }
138 |
139 | void FastText::skipgram(Model& model, real lr,
140 | const std::vector& line) {
141 | std::uniform_int_distribution<> uniform(1, args_->ws);
142 | for (int32_t w = 0; w < line.size(); w++) {
143 | int32_t boundary = uniform(model.rng);
144 | const std::vector& ngrams = dict_->getNgrams(line[w]);
145 | for (int32_t c = -boundary; c <= boundary; c++) {
146 | if (c != 0 && w + c >= 0 && w + c < line.size()) {
147 | model.update(ngrams, line[w + c], lr);
148 | }
149 | }
150 | }
151 | }
152 |
153 | void FastText::test(std::istream& in, int32_t k) {
154 | int32_t nexamples = 0, nlabels = 0;
155 | double precision = 0.0;
156 | std::vector line, labels;
157 |
158 | while (in.peek() != EOF) {
159 | dict_->getLine(in, line, labels, model_->rng);
160 | dict_->addNgrams(line, args_->wordNgrams);
161 | if (labels.size() > 0 && line.size() > 0) {
162 | std::vector> modelPredictions;
163 | model_->predict(line, k, modelPredictions);
164 | for (auto it = modelPredictions.cbegin(); it != modelPredictions.cend(); it++) {
165 | if (std::find(labels.begin(), labels.end(), it->second) != labels.end()) {
166 | precision += 1.0;
167 | }
168 | }
169 | nexamples++;
170 | nlabels += labels.size();
171 | }
172 | }
173 | std::cout << std::setprecision(3);
174 | std::cout << "P@" << k << ": " << precision / (k * nexamples) << std::endl;
175 | std::cout << "R@" << k << ": " << precision / nlabels << std::endl;
176 | std::cout << "Number of examples: " << nexamples << std::endl;
177 | }
178 |
179 | void FastText::predict(std::istream& in, int32_t k,
180 | std::vector>& predictions) const {
181 | std::vector words, labels;
182 | dict_->getLine(in, words, labels, model_->rng);
183 | dict_->addNgrams(words, args_->wordNgrams);
184 | if (words.empty()) return;
185 | Vector hidden(args_->dim);
186 | Vector output(dict_->nlabels());
187 | std::vector> modelPredictions;
188 | model_->predict(words, k, modelPredictions, hidden, output);
189 | predictions.clear();
190 | for (auto it = modelPredictions.cbegin(); it != modelPredictions.cend(); it++) {
191 | predictions.push_back(std::make_pair(it->first, dict_->getLabel(it->second)));
192 | }
193 | }
194 |
195 | void FastText::predict(std::istream& in, int32_t k, bool print_prob) {
196 | std::vector> predictions;
197 | while (in.peek() != EOF) {
198 | predict(in, k, predictions);
199 | if (predictions.empty()) {
200 | std::cout << "n/a" << std::endl;
201 | }
202 | for (auto it = predictions.cbegin(); it != predictions.cend(); it++) {
203 | if (it != predictions.cbegin()) {
204 | std::cout << ' ';
205 | }
206 | std::cout << it->second;
207 | if (print_prob) {
208 | std::cout << ' ' << exp(it->first);
209 | }
210 | }
211 | std::cout << std::endl;
212 | }
213 | }
214 |
215 | // void FastText::wordVectors() {
216 | // std::string word;
217 | // Vector vec(args_->dim);
218 | // while (std::cin >> word) {
219 | // getVector(vec, word);
220 | // std::cout << word << " " << vec << std::endl; // write to file
221 | // }
222 | // }
223 |
224 |
225 | void FastText::wordVectors(std::string& INPUT, std::string& OUTPUT) {
226 |
227 | std::ifstream inf(INPUT);
228 |
229 | std::ofstream ofs(OUTPUT + ".vec");
230 | if (!ofs.is_open()) {
231 | std::cout << "Error opening file for saving vectors." << std::endl;
232 | exit(EXIT_FAILURE);
233 | }
234 | std::string word;
235 | Vector vec(args_->dim);
236 | while (std::getline(inf, word)) {
237 | getVector(vec, word);
238 | ofs << word << " " << vec << std::endl;
239 | }
240 | ofs.close();
241 | }
242 |
243 |
244 | void FastText::textVectors() {
245 | std::vector line, labels;
246 | Vector vec(args_->dim);
247 | while (std::cin.peek() != EOF) {
248 | dict_->getLine(std::cin, line, labels, model_->rng);
249 | dict_->addNgrams(line, args_->wordNgrams);
250 | vec.zero();
251 | for (auto it = line.cbegin(); it != line.cend(); ++it) {
252 | vec.addRow(*input_, *it);
253 | }
254 | if (!line.empty()) {
255 | vec.mul(1.0 / line.size());
256 | }
257 | std::cout << vec << std::endl; // write to file
258 | }
259 | }
260 |
261 | void FastText::printVectors() {
262 | if (args_->model == model_name::sup) { // if model 'supervised' return textvectors else return wordvectors
263 | textVectors();
264 | }
265 | // } else {
266 | // wordVectors();
267 | // }
268 | }
269 |
270 | void FastText::trainThread(int32_t threadId) {
271 | std::ifstream ifs(args_->input);
272 | utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
273 |
274 | Model model(input_, output_, args_, threadId);
275 | if (args_->model == model_name::sup) {
276 | model.setTargetCounts(dict_->getCounts(entry_type::label));
277 | } else {
278 | model.setTargetCounts(dict_->getCounts(entry_type::word));
279 | }
280 |
281 | const int64_t ntokens = dict_->ntokens();
282 | int64_t localTokenCount = 0;
283 | std::vector line, labels;
284 | while (tokenCount < args_->epoch * ntokens) {
285 | real progress = real(tokenCount) / (args_->epoch * ntokens);
286 | real lr = args_->lr * (1.0 - progress);
287 | localTokenCount += dict_->getLine(ifs, line, labels, model.rng);
288 | if (args_->model == model_name::sup) {
289 | dict_->addNgrams(line, args_->wordNgrams);
290 | supervised(model, lr, line, labels);
291 | } else if (args_->model == model_name::cbow) {
292 | cbow(model, lr, line);
293 | } else if (args_->model == model_name::sg) {
294 | skipgram(model, lr, line);
295 | }
296 | if (localTokenCount > args_->lrUpdateRate) {
297 | tokenCount += localTokenCount;
298 | localTokenCount = 0;
299 | if (threadId == 0 && args_->verbose > 1) {
300 | printInfo(progress, model.getLoss());
301 | }
302 | }
303 | }
304 | if (threadId == 0 && args_->verbose > 0) {
305 | printInfo(1.0, model.getLoss());
306 | std::cout << std::endl;
307 | }
308 | ifs.close();
309 | }
310 |
311 | void FastText::loadVectors(std::string filename) {
312 | std::ifstream in(filename);
313 | std::vector words;
314 | std::shared_ptr mat; // temp. matrix for pretrained vectors
315 | int64_t n, dim;
316 | if (!in.is_open()) {
317 | std::cerr << "Pretrained vectors file cannot be opened!" << std::endl;
318 | exit(EXIT_FAILURE);
319 | }
320 | in >> n >> dim;
321 | if (dim != args_->dim) {
322 | std::cerr << "Dimension of pretrained vectors does not match -dim option"
323 | << std::endl;
324 | exit(EXIT_FAILURE);
325 | }
326 | mat = std::make_shared(n, dim);
327 | for (size_t i = 0; i < n; i++) {
328 | std::string word;
329 | in >> word;
330 | words.push_back(word);
331 | dict_->add(word);
332 | for (size_t j = 0; j < dim; j++) {
333 | in >> mat->data_[i * dim + j];
334 | }
335 | }
336 | in.close();
337 |
338 | dict_->threshold(1);
339 | input_ = std::make_shared(dict_->nwords()+args_->bucket, args_->dim);
340 | input_->uniform(1.0 / args_->dim);
341 |
342 | for (size_t i = 0; i < n; i++) {
343 | int32_t idx = dict_->getId(words[i]);
344 | if (idx < 0 || idx >= dict_->nwords()) continue;
345 | for (size_t j = 0; j < dim; j++) {
346 | input_->data_[idx * dim + j] = mat->data_[i * dim + j];
347 | }
348 | }
349 | }
350 |
351 | void FastText::train(std::shared_ptr args) {
352 | args_ = args;
353 | dict_ = std::make_shared(args_);
354 | if (args_->input == "-") {
355 | // manage expectations
356 | std::cerr << "Cannot use stdin for training!" << std::endl;
357 | exit(EXIT_FAILURE);
358 | }
359 | std::ifstream ifs(args_->input);
360 | if (!ifs.is_open()) {
361 | std::cerr << "Input file cannot be opened!" << std::endl;
362 | exit(EXIT_FAILURE);
363 | }
364 | dict_->readFromFile(ifs);
365 | ifs.close();
366 |
367 | if (args_->pretrainedVectors.size() != 0) {
368 | loadVectors(args_->pretrainedVectors);
369 | } else {
370 | input_ = std::make_shared(dict_->nwords()+args_->bucket, args_->dim);
371 | input_->uniform(1.0 / args_->dim);
372 | }
373 |
374 | if (args_->model == model_name::sup) {
375 | output_ = std::make_shared(dict_->nlabels(), args_->dim);
376 | } else {
377 | output_ = std::make_shared(dict_->nwords(), args_->dim);
378 | }
379 | output_->zero();
380 |
381 | start = clock();
382 | tokenCount = 0;
383 | std::vector threads;
384 | for (int32_t i = 0; i < args_->thread; i++) {
385 | threads.push_back(std::thread([=]() { trainThread(i); }));
386 | }
387 | for (auto it = threads.begin(); it != threads.end(); ++it) {
388 | it->join();
389 | }
390 | model_ = std::make_shared(input_, output_, args_, 0);
391 |
392 | saveModel();
393 | if (args_->model != model_name::sup) {
394 | saveVectors();
395 | }
396 | }
397 |
398 | }
399 |
--------------------------------------------------------------------------------
/src/fasttext.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_FASTTEXT_H
11 | #define FASTTEXT_FASTTEXT_H
12 |
13 | #include
14 |
15 | #include
16 | #include
17 |
18 | #include
19 | #include "matrix.h"
20 | #include "vector.h"
21 | #include "dictionary.h"
22 | #include "model.h"
23 | #include "utils.h"
24 | #include "real.h"
25 | #include "args.h"
26 |
27 | namespace fasttext {
28 |
29 | class FastText {
30 | private:
31 | std::shared_ptr args_;
32 | std::shared_ptr dict_;
33 | std::shared_ptr input_;
34 | std::shared_ptr output_;
35 | std::shared_ptr model_;
36 | std::atomic tokenCount;
37 | clock_t start;
38 |
39 | public:
40 | void getVector(Vector&, const std::string&);
41 | void saveVectors();
42 | void saveModel();
43 | void loadModel(const std::string&);
44 | void loadModel(std::istream&);
45 | void printInfo(real, real);
46 |
47 | void supervised(Model&, real, const std::vector&,
48 | const std::vector&);
49 | void cbow(Model&, real, const std::vector&);
50 | void skipgram(Model&, real, const std::vector&);
51 | void test(std::istream&, int32_t);
52 | void predict(std::istream&, int32_t, bool);
53 | void predict(std::istream&, int32_t, std::vector>&) const;
54 | void wordVectors(std::string&, std::string&);
55 | void textVectors();
56 | void printVectors();
57 | void trainThread(int32_t);
58 | void train(std::shared_ptr);
59 |
60 | void loadVectors(std::string);
61 | };
62 |
63 | }
64 |
65 | #endif
66 |
--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include // for NULL
4 | #include
5 |
6 | /* FIXME:
7 | Check these declarations against the C/Fortran source code.
8 | */
9 |
10 | /* .Call calls */
11 | extern SEXP _fastTextR_convert_args_to_pointers(SEXP, SEXP, SEXP);
12 |
13 | static const R_CallMethodDef CallEntries[] = {
14 | {"_fastTextR_convert_args_to_pointers", (DL_FUNC) &_fastTextR_convert_args_to_pointers, 3},
15 | {NULL, NULL, 0}
16 | };
17 |
18 | void R_init_fastTextR(DllInfo *dll)
19 | {
20 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
21 | R_useDynamicSymbols(dll, FALSE);
22 | }
23 |
--------------------------------------------------------------------------------
/src/main.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 |
7 | /**
8 | * Copyright (c) 2016-present, Facebook, Inc.
9 | * All rights reserved.
10 | *
11 | * This source code is licensed under the BSD-style license found in the
12 | * LICENSE file in the root directory of this source tree. An additional grant
13 | * of patent rights can be found in the PATENTS file in the same directory.
14 | */
15 |
16 | #include
17 | #include
18 | #include "fasttext.h"
19 | #include "args.h"
20 |
21 | using namespace fasttext;
22 |
23 | void printUsage() {
24 | std::cout
25 | << "usage: fasttext \n\n"
26 | << "The commands supported by fasttext are:\n\n"
27 | << " supervised train a supervised classifier\n"
28 | << " test evaluate a supervised classifier\n"
29 | << " predict predict most likely labels\n"
30 | << " predict-prob predict most likely labels with probabilities\n"
31 | << " skipgram train a skipgram model\n"
32 | << " cbow train a cbow model\n"
33 | << " print-vectors print vectors given a trained model\n"
34 | << std::endl;
35 | }
36 |
37 | void printTestUsage() {
38 | std::cout
39 | << "usage: fasttext test []\n\n"
40 | << " model filename\n"
41 | << " test data filename (if -, read from stdin)\n"
42 | << " (optional; 1 by default) predict top k labels\n"
43 | << std::endl;
44 | }
45 |
46 | void printPredictUsage() {
47 | std::cout
48 | << "usage: fasttext predict[-prob] []\n\n"
49 | << " model filename\n"
50 | << " test data filename (if -, read from stdin)\n"
51 | << " (optional; 1 by default) predict top k labels\n"
52 | << std::endl;
53 | }
54 |
55 | void printPrintVectorsUsage() {
56 | std::cout
57 | << "usage: fasttext print-vectors \n\n"
58 | << " model filename\n"
59 | << std::endl;
60 | }
61 |
62 | void test(int argc, char** argv) {
63 | int32_t k;
64 | if (argc == 4) {
65 | k = 1;
66 | } else if (argc == 5) {
67 | k = atoi(argv[4]);
68 | } else {
69 | printTestUsage();
70 | exit(EXIT_FAILURE);
71 | }
72 | FastText fasttext;
73 | fasttext.loadModel(std::string(argv[2]));
74 | std::string infile(argv[3]);
75 | if (infile == "-") {
76 | fasttext.test(std::cin, k);
77 | } else {
78 | std::ifstream ifs(infile);
79 | if (!ifs.is_open()) {
80 | std::cerr << "Test file cannot be opened!" << std::endl;
81 | exit(EXIT_FAILURE);
82 | }
83 | fasttext.test(ifs, k);
84 | ifs.close();
85 | }
86 | exit(0);
87 | }
88 |
89 | void predict(int argc, char** argv) {
90 | int32_t k;
91 | if (argc == 4) {
92 | k = 1;
93 | } else if (argc == 5) {
94 | k = atoi(argv[4]);
95 | } else {
96 | printPredictUsage();
97 | exit(EXIT_FAILURE);
98 | }
99 | bool print_prob = std::string(argv[1]) == "predict-prob";
100 | FastText fasttext;
101 | fasttext.loadModel(std::string(argv[2]));
102 |
103 | std::string infile(argv[3]);
104 | if (infile == "-") {
105 | fasttext.predict(std::cin, k, print_prob);
106 | } else {
107 | std::ifstream ifs(infile);
108 | if (!ifs.is_open()) {
109 | std::cerr << "Input file cannot be opened!" << std::endl;
110 | exit(EXIT_FAILURE);
111 | }
112 | fasttext.predict(ifs, k, print_prob);
113 | ifs.close();
114 | }
115 |
116 | exit(0);
117 | }
118 |
119 | void printVectors(int argc, char** argv) {
120 | if (argc != 3) {
121 | printPrintVectorsUsage();
122 | exit(EXIT_FAILURE);
123 | }
124 | FastText fasttext;
125 | fasttext.loadModel(std::string(argv[2]));
126 | fasttext.printVectors();
127 | exit(0);
128 | }
129 |
130 | void train(int argc, char** argv) {
131 | std::shared_ptr a = std::make_shared();
132 | a->parseArgs(argc, argv);
133 | FastText fasttext;
134 | fasttext.train(a);
135 | }
136 |
137 |
138 | void SAVE_dict_vectors(char** argv, std::string INPUT, std::string OUTPUT) {
139 |
140 | FastText fasttext;
141 |
142 | fasttext.loadModel(std::string(argv[2]));
143 |
144 | fasttext.wordVectors(INPUT, OUTPUT);
145 | }
146 |
147 |
148 |
149 | // wrapper for the train(), test(), printVectors(), predict() functions
150 | // conversion of string-arguments to pointers in c++ : http://stackoverflow.com/questions/26032039/convert-vectorstring-into-char-c
151 | // for supervised, modify the std::cin in predict(), test()
152 | //
153 |
154 | // [[Rcpp::export]]
155 | void convert_args_to_pointers(std::vector string_commands, std::string INPUT, std::string OUTPUT) { // 'string_commands' includes also the fasttext argument in index 0;
156 |
157 | int num_argc = string_commands.size();
158 |
159 | utils::initTables();
160 |
161 | if (num_argc < 2) {
162 |
163 | printUsage();
164 |
165 | exit(EXIT_FAILURE);
166 | }
167 |
168 | char** cstrings = new char*[string_commands.size()];
169 |
170 | for(size_t i = 0; i < string_commands.size(); ++i) {
171 |
172 | cstrings[i] = new char[string_commands[i].size() + 1];
173 |
174 | std::strcpy(cstrings[i], string_commands[i].c_str());
175 | }
176 |
177 | std::string command = string_commands[1];
178 |
179 | if (command == "skipgram" || command == "cbow" || command == "supervised") {
180 |
181 | train(num_argc, cstrings);}
182 |
183 | else if (command == "test") {
184 |
185 | test(num_argc, cstrings);}
186 |
187 | else if (command == "print-vectors") {
188 |
189 | printVectors(num_argc, cstrings);}
190 |
191 | else if (command == "predict" || command == "predict-prob" ) {
192 |
193 | predict(num_argc, cstrings);}
194 |
195 | else if (command == "predict_skipgram_cbow") {
196 |
197 | SAVE_dict_vectors(cstrings, INPUT, OUTPUT);}
198 |
199 | else {
200 |
201 | printUsage();
202 |
203 | exit(EXIT_FAILURE);
204 | }
205 |
206 | utils::freeTables();
207 |
208 | // clean up memory
209 |
210 | for(size_t i = 0; i < num_argc; ++i) {
211 |
212 | delete[] cstrings[i];
213 | }
214 |
215 | delete[] cstrings;
216 | }
217 |
218 |
--------------------------------------------------------------------------------
/src/matrix.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "matrix.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "utils.h"
22 | #include "vector.h"
23 |
24 | namespace fasttext {
25 |
26 | Matrix::Matrix() {
27 | m_ = 0;
28 | n_ = 0;
29 | data_ = nullptr;
30 | }
31 |
32 | Matrix::Matrix(int64_t m, int64_t n) {
33 | m_ = m;
34 | n_ = n;
35 | data_ = new real[m * n];
36 | }
37 |
38 | Matrix::Matrix(const Matrix& other) {
39 | m_ = other.m_;
40 | n_ = other.n_;
41 | data_ = new real[m_ * n_];
42 | for (int64_t i = 0; i < (m_ * n_); i++) {
43 | data_[i] = other.data_[i];
44 | }
45 | }
46 |
47 | Matrix& Matrix::operator=(const Matrix& other) {
48 | Matrix temp(other);
49 | m_ = temp.m_;
50 | n_ = temp.n_;
51 | std::swap(data_, temp.data_);
52 | return *this;
53 | }
54 |
55 | Matrix::~Matrix() {
56 | delete[] data_;
57 | }
58 |
59 | void Matrix::zero() {
60 | for (int64_t i = 0; i < (m_ * n_); i++) {
61 | data_[i] = 0.0;
62 | }
63 | }
64 |
65 | void Matrix::uniform(real a) {
66 | std::minstd_rand rng(1);
67 | std::uniform_real_distribution<> uniform(-a, a);
68 | for (int64_t i = 0; i < (m_ * n_); i++) {
69 | data_[i] = uniform(rng);
70 | }
71 | }
72 |
73 | void Matrix::addRow(const Vector& vec, int64_t i, real a) {
74 | assert(i >= 0);
75 | assert(i < m_);
76 | assert(vec.m_ == n_);
77 | for (int64_t j = 0; j < n_; j++) {
78 | data_[i * n_ + j] += a * vec.data_[j];
79 | }
80 | }
81 |
82 | real Matrix::dotRow(const Vector& vec, int64_t i) {
83 | assert(i >= 0);
84 | assert(i < m_);
85 | assert(vec.m_ == n_);
86 | real d = 0.0;
87 | for (int64_t j = 0; j < n_; j++) {
88 | d += data_[i * n_ + j] * vec.data_[j];
89 | }
90 | return d;
91 | }
92 |
93 | void Matrix::save(std::ostream& out) {
94 | out.write((char*) &m_, sizeof(int64_t));
95 | out.write((char*) &n_, sizeof(int64_t));
96 | out.write((char*) data_, m_ * n_ * sizeof(real));
97 | }
98 |
99 | void Matrix::load(std::istream& in) {
100 | in.read((char*) &m_, sizeof(int64_t));
101 | in.read((char*) &n_, sizeof(int64_t));
102 | delete[] data_;
103 | data_ = new real[m_ * n_];
104 | in.read((char*) data_, m_ * n_ * sizeof(real));
105 | }
106 |
107 | }
108 |
--------------------------------------------------------------------------------
/src/matrix.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_MATRIX_H
11 | #define FASTTEXT_MATRIX_H
12 |
13 | #include
14 | #include
15 | #include
16 |
17 | #include "real.h"
18 |
19 | namespace fasttext {
20 |
21 | class Vector;
22 |
23 | class Matrix {
24 |
25 | public:
26 | real* data_;
27 | int64_t m_;
28 | int64_t n_;
29 |
30 | Matrix();
31 | Matrix(int64_t, int64_t);
32 | Matrix(const Matrix&);
33 | Matrix& operator=(const Matrix&);
34 | ~Matrix();
35 |
36 | void zero();
37 | void uniform(real);
38 | real dotRow(const Vector&, int64_t);
39 | void addRow(const Vector&, int64_t, real);
40 |
41 | void save(std::ostream&);
42 | void load(std::istream&);
43 | };
44 |
45 | }
46 |
47 | #endif
48 |
--------------------------------------------------------------------------------
/src/model.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "model.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "utils.h"
22 |
23 | namespace fasttext {
24 |
25 | Model::Model(std::shared_ptr wi,
26 | std::shared_ptr wo,
27 | std::shared_ptr args,
28 | int32_t seed)
29 | : hidden_(args->dim), output_(wo->m_), grad_(args->dim), rng(seed)
30 | {
31 | wi_ = wi;
32 | wo_ = wo;
33 | args_ = args;
34 | isz_ = wi->m_;
35 | osz_ = wo->m_;
36 | hsz_ = args->dim;
37 | negpos = 0;
38 | loss_ = 0.0;
39 | nexamples_ = 1;
40 | }
41 |
42 | real Model::binaryLogistic(int32_t target, bool label, real lr) {
43 | real score = utils::sigmoid(wo_->dotRow(hidden_, target));
44 | real alpha = lr * (real(label) - score);
45 | grad_.addRow(*wo_, target, alpha);
46 | wo_->addRow(hidden_, target, alpha);
47 | if (label) {
48 | return -utils::log(score);
49 | } else {
50 | return -utils::log(1.0 - score);
51 | }
52 | }
53 |
54 | real Model::negativeSampling(int32_t target, real lr) {
55 | real loss = 0.0;
56 | grad_.zero();
57 | for (int32_t n = 0; n <= args_->neg; n++) {
58 | if (n == 0) {
59 | loss += binaryLogistic(target, true, lr);
60 | } else {
61 | loss += binaryLogistic(getNegative(target), false, lr);
62 | }
63 | }
64 | return loss;
65 | }
66 |
67 | real Model::hierarchicalSoftmax(int32_t target, real lr) {
68 | real loss = 0.0;
69 | grad_.zero();
70 | const std::vector& binaryCode = codes[target];
71 | const std::vector& pathToRoot = paths[target];
72 | for (int32_t i = 0; i < pathToRoot.size(); i++) {
73 | loss += binaryLogistic(pathToRoot[i], binaryCode[i], lr);
74 | }
75 | return loss;
76 | }
77 |
78 | void Model::computeOutputSoftmax(Vector& hidden, Vector& output) const {
79 | output.mul(*wo_, hidden);
80 | real max = output[0], z = 0.0;
81 | for (int32_t i = 0; i < osz_; i++) {
82 | max = std::max(output[i], max);
83 | }
84 | for (int32_t i = 0; i < osz_; i++) {
85 | output[i] = exp(output[i] - max);
86 | z += output[i];
87 | }
88 | for (int32_t i = 0; i < osz_; i++) {
89 | output[i] /= z;
90 | }
91 | }
92 |
93 | void Model::computeOutputSoftmax() {
94 | computeOutputSoftmax(hidden_, output_);
95 | }
96 |
97 | real Model::softmax(int32_t target, real lr) {
98 | grad_.zero();
99 | computeOutputSoftmax();
100 | for (int32_t i = 0; i < osz_; i++) {
101 | real label = (i == target) ? 1.0 : 0.0;
102 | real alpha = lr * (label - output_[i]);
103 | grad_.addRow(*wo_, i, alpha);
104 | wo_->addRow(hidden_, i, alpha);
105 | }
106 | return -utils::log(output_[target]);
107 | }
108 |
109 | void Model::computeHidden(const std::vector& input, Vector& hidden) const {
110 | assert(hidden.size() == hsz_);
111 | hidden.zero();
112 | for (auto it = input.cbegin(); it != input.cend(); ++it) {
113 | hidden.addRow(*wi_, *it);
114 | }
115 | hidden.mul(1.0 / input.size());
116 | }
117 |
118 | bool Model::comparePairs(const std::pair &l,
119 | const std::pair &r) {
120 | return l.first > r.first;
121 | }
122 |
123 | void Model::predict(const std::vector& input, int32_t k,
124 | std::vector>& heap,
125 | Vector& hidden, Vector& output) const {
126 | assert(k > 0);
127 | heap.reserve(k + 1);
128 | computeHidden(input, hidden);
129 | if (args_->loss == loss_name::hs) {
130 | dfs(k, 2 * osz_ - 2, 0.0, heap, hidden);
131 | } else {
132 | findKBest(k, heap, hidden, output);
133 | }
134 | std::sort_heap(heap.begin(), heap.end(), comparePairs);
135 | }
136 |
137 | void Model::predict(const std::vector& input, int32_t k,
138 | std::vector>& heap) {
139 | predict(input, k, heap, hidden_, output_);
140 | }
141 |
142 | void Model::findKBest(int32_t k, std::vector>& heap,
143 | Vector& hidden, Vector& output) const {
144 | computeOutputSoftmax(hidden, output);
145 | for (int32_t i = 0; i < osz_; i++) {
146 | if (heap.size() == k && utils::log(output[i]) < heap.front().first) {
147 | continue;
148 | }
149 | heap.push_back(std::make_pair(utils::log(output[i]), i));
150 | std::push_heap(heap.begin(), heap.end(), comparePairs);
151 | if (heap.size() > k) {
152 | std::pop_heap(heap.begin(), heap.end(), comparePairs);
153 | heap.pop_back();
154 | }
155 | }
156 | }
157 |
158 | void Model::dfs(int32_t k, int32_t node, real score,
159 | std::vector>& heap,
160 | Vector& hidden) const {
161 | if (heap.size() == k && score < heap.front().first) {
162 | return;
163 | }
164 |
165 | if (tree[node].left == -1 && tree[node].right == -1) {
166 | heap.push_back(std::make_pair(score, node));
167 | std::push_heap(heap.begin(), heap.end(), comparePairs);
168 | if (heap.size() > k) {
169 | std::pop_heap(heap.begin(), heap.end(), comparePairs);
170 | heap.pop_back();
171 | }
172 | return;
173 | }
174 |
175 | real f = utils::sigmoid(wo_->dotRow(hidden, node - osz_));
176 | dfs(k, tree[node].left, score + utils::log(1.0 - f), heap, hidden);
177 | dfs(k, tree[node].right, score + utils::log(f), heap, hidden);
178 | }
179 |
180 | void Model::update(const std::vector& input, int32_t target, real lr) {
181 | assert(target >= 0);
182 | assert(target < osz_);
183 | if (input.size() == 0) return;
184 | computeHidden(input, hidden_);
185 | if (args_->loss == loss_name::ns) {
186 | loss_ += negativeSampling(target, lr);
187 | } else if (args_->loss == loss_name::hs) {
188 | loss_ += hierarchicalSoftmax(target, lr);
189 | } else {
190 | loss_ += softmax(target, lr);
191 | }
192 | nexamples_ += 1;
193 |
194 | if (args_->model == model_name::sup) {
195 | grad_.mul(1.0 / input.size());
196 | }
197 | for (auto it = input.cbegin(); it != input.cend(); ++it) {
198 | wi_->addRow(grad_, *it, 1.0);
199 | }
200 | }
201 |
202 | void Model::setTargetCounts(const std::vector& counts) {
203 | assert(counts.size() == osz_);
204 | if (args_->loss == loss_name::ns) {
205 | initTableNegatives(counts);
206 | }
207 | if (args_->loss == loss_name::hs) {
208 | buildTree(counts);
209 | }
210 | }
211 |
212 | void Model::initTableNegatives(const std::vector& counts) {
213 | real z = 0.0;
214 | for (size_t i = 0; i < counts.size(); i++) {
215 | z += pow(counts[i], 0.5);
216 | }
217 | for (size_t i = 0; i < counts.size(); i++) {
218 | real c = pow(counts[i], 0.5);
219 | for (size_t j = 0; j < c * NEGATIVE_TABLE_SIZE / z; j++) {
220 | negatives.push_back(i);
221 | }
222 | }
223 | std::shuffle(negatives.begin(), negatives.end(), rng);
224 | }
225 |
226 | int32_t Model::getNegative(int32_t target) {
227 | int32_t negative;
228 | do {
229 | negative = negatives[negpos];
230 | negpos = (negpos + 1) % negatives.size();
231 | } while (target == negative);
232 | return negative;
233 | }
234 |
235 | void Model::buildTree(const std::vector& counts) {
236 | tree.resize(2 * osz_ - 1);
237 | for (int32_t i = 0; i < 2 * osz_ - 1; i++) {
238 | tree[i].parent = -1;
239 | tree[i].left = -1;
240 | tree[i].right = -1;
241 | tree[i].count = 1e15;
242 | tree[i].binary = false;
243 | }
244 | for (int32_t i = 0; i < osz_; i++) {
245 | tree[i].count = counts[i];
246 | }
247 | int32_t leaf = osz_ - 1;
248 | int32_t node = osz_;
249 | for (int32_t i = osz_; i < 2 * osz_ - 1; i++) {
250 | int32_t mini[2];
251 | for (int32_t j = 0; j < 2; j++) {
252 | if (leaf >= 0 && tree[leaf].count < tree[node].count) {
253 | mini[j] = leaf--;
254 | } else {
255 | mini[j] = node++;
256 | }
257 | }
258 | tree[i].left = mini[0];
259 | tree[i].right = mini[1];
260 | tree[i].count = tree[mini[0]].count + tree[mini[1]].count;
261 | tree[mini[0]].parent = i;
262 | tree[mini[1]].parent = i;
263 | tree[mini[1]].binary = true;
264 | }
265 | for (int32_t i = 0; i < osz_; i++) {
266 | std::vector path;
267 | std::vector code;
268 | int32_t j = i;
269 | while (tree[j].parent != -1) {
270 | path.push_back(tree[j].parent - osz_);
271 | code.push_back(tree[j].binary);
272 | j = tree[j].parent;
273 | }
274 | paths.push_back(path);
275 | codes.push_back(code);
276 | }
277 | }
278 |
279 | real Model::getLoss() const {
280 | return loss_ / nexamples_;
281 | }
282 |
283 | }
284 |
--------------------------------------------------------------------------------
/src/model.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_MODEL_H
11 | #define FASTTEXT_MODEL_H
12 |
13 | #include
14 | #include
15 | #include
16 | #include
17 |
18 | #include "args.h"
19 | #include "matrix.h"
20 | #include "vector.h"
21 | #include "real.h"
22 |
23 | namespace fasttext {
24 |
25 | struct Node {
26 | int32_t parent;
27 | int32_t left;
28 | int32_t right;
29 | int64_t count;
30 | bool binary;
31 | };
32 |
33 | class Model {
34 | private:
35 | std::shared_ptr wi_;
36 | std::shared_ptr wo_;
37 | std::shared_ptr args_;
38 | Vector hidden_;
39 | Vector output_;
40 | Vector grad_;
41 | int32_t hsz_;
42 | int32_t isz_;
43 | int32_t osz_;
44 | real loss_;
45 | int64_t nexamples_;
46 |
47 | static bool comparePairs(const std::pair&,
48 | const std::pair&);
49 |
50 | std::vector negatives;
51 | size_t negpos;
52 |
53 | int32_t getNegative(int32_t target);
54 |
55 | std::vector< std::vector > paths;
56 | std::vector< std::vector > codes;
57 | std::vector tree;
58 |
59 | static const int32_t NEGATIVE_TABLE_SIZE = 10000000;
60 |
61 | public:
62 | Model(std::shared_ptr, std::shared_ptr,
63 | std::shared_ptr, int32_t);
64 |
65 | real binaryLogistic(int32_t, bool, real);
66 | real negativeSampling(int32_t, real);
67 | real hierarchicalSoftmax(int32_t, real);
68 | real softmax(int32_t, real);
69 |
70 | void predict(const std::vector&, int32_t,
71 | std::vector>&,
72 | Vector&, Vector&) const;
73 | void predict(const std::vector&, int32_t,
74 | std::vector>&);
75 | void dfs(int32_t, int32_t, real,
76 | std::vector>&,
77 | Vector&) const;
78 | void findKBest(int32_t, std::vector>&,
79 | Vector&, Vector&) const;
80 | void update(const std::vector&, int32_t, real);
81 | void computeHidden(const std::vector&, Vector&) const;
82 | void computeOutputSoftmax(Vector&, Vector&) const;
83 | void computeOutputSoftmax();
84 |
85 | void setTargetCounts(const std::vector&);
86 | void initTableNegatives(const std::vector&);
87 | void buildTree(const std::vector&);
88 | real getLoss() const;
89 |
90 | std::minstd_rand rng;
91 | };
92 |
93 | }
94 |
95 | #endif
96 |
--------------------------------------------------------------------------------
/src/real.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_REAL_H
11 | #define FASTTEXT_REAL_H
12 |
13 | namespace fasttext {
14 |
15 | typedef float real;
16 |
17 | }
18 |
19 | #endif
20 |
--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "utils.h"
16 |
17 | #include
18 | #include
19 |
20 | namespace fasttext {
21 |
22 | namespace utils {
23 | real* t_sigmoid = nullptr;
24 | real* t_log = nullptr;
25 |
26 | real log(real x) {
27 | if (x > 1.0) {
28 | return 0.0;
29 | }
30 | int i = int(x * LOG_TABLE_SIZE);
31 | return t_log[i];
32 | }
33 |
34 | real sigmoid(real x) {
35 | if (x < -MAX_SIGMOID) {
36 | return 0.0;
37 | } else if (x > MAX_SIGMOID) {
38 | return 1.0;
39 | } else {
40 | int i = int((x + MAX_SIGMOID) * SIGMOID_TABLE_SIZE / MAX_SIGMOID / 2);
41 | return t_sigmoid[i];
42 | }
43 | }
44 |
45 | void initTables() {
46 | initSigmoid();
47 | initLog();
48 | }
49 |
50 | void initSigmoid() {
51 | if (t_sigmoid != nullptr) return;
52 | t_sigmoid = new real[SIGMOID_TABLE_SIZE + 1];
53 | for (int i = 0; i < SIGMOID_TABLE_SIZE + 1; i++) {
54 | real x = real(i * 2 * MAX_SIGMOID) / SIGMOID_TABLE_SIZE - MAX_SIGMOID;
55 | t_sigmoid[i] = 1.0 / (1.0 + std::exp(-x));
56 | }
57 | }
58 |
59 | void initLog() {
60 | if (t_log != nullptr) return;
61 | t_log = new real[LOG_TABLE_SIZE + 1];
62 | for (int i = 0; i < LOG_TABLE_SIZE + 1; i++) {
63 | real x = (real(i) + 1e-5) / LOG_TABLE_SIZE;
64 | t_log[i] = std::log(x);
65 | }
66 | }
67 |
68 | void freeTables() {
69 | delete[] t_sigmoid;
70 | delete[] t_log;
71 | t_sigmoid = nullptr;
72 | t_log = nullptr;
73 | }
74 |
75 | int64_t size(std::ifstream& ifs) {
76 | ifs.seekg(std::streamoff(0), std::ios::end);
77 | return ifs.tellg();
78 | }
79 |
80 | void seek(std::ifstream& ifs, int64_t pos) {
81 | ifs.clear();
82 | ifs.seekg(std::streampos(pos));
83 | }
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_UTILS_H
11 | #define FASTTEXT_UTILS_H
12 |
13 | #include
14 |
15 | #include "real.h"
16 |
17 | #define SIGMOID_TABLE_SIZE 512
18 | #define MAX_SIGMOID 8
19 | #define LOG_TABLE_SIZE 512
20 |
21 | namespace fasttext {
22 |
23 | namespace utils {
24 |
25 | real log(real);
26 | real sigmoid(real);
27 |
28 | void initTables();
29 | void initSigmoid();
30 | void initLog();
31 | void freeTables();
32 |
33 | int64_t size(std::ifstream&);
34 | void seek(std::ifstream&, int64_t);
35 | }
36 |
37 | }
38 |
39 | #endif
40 |
--------------------------------------------------------------------------------
/src/vector.cc:
--------------------------------------------------------------------------------
1 | # include
2 | // [[Rcpp::depends("RcppArmadillo")]]
3 | // [[Rcpp::plugins(openmp)]]
4 | // [[Rcpp::plugins(cpp11)]]
5 |
6 | /**
7 | * Copyright (c) 2016-present, Facebook, Inc.
8 | * All rights reserved.
9 | *
10 | * This source code is licensed under the BSD-style license found in the
11 | * LICENSE file in the root directory of this source tree. An additional grant
12 | * of patent rights can be found in the PATENTS file in the same directory.
13 | */
14 |
15 | #include "vector.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "matrix.h"
22 | #include "utils.h"
23 |
24 | namespace fasttext {
25 |
26 | Vector::Vector(int64_t m) {
27 | m_ = m;
28 | data_ = new real[m];
29 | }
30 |
31 | Vector::~Vector() {
32 | delete[] data_;
33 | }
34 |
35 | int64_t Vector::size() const {
36 | return m_;
37 | }
38 |
39 | void Vector::zero() {
40 | for (int64_t i = 0; i < m_; i++) {
41 | data_[i] = 0.0;
42 | }
43 | }
44 |
45 | void Vector::mul(real a) {
46 | for (int64_t i = 0; i < m_; i++) {
47 | data_[i] *= a;
48 | }
49 | }
50 |
51 | void Vector::addRow(const Matrix& A, int64_t i) {
52 | assert(i >= 0);
53 | assert(i < A.m_);
54 | assert(m_ == A.n_);
55 | for (int64_t j = 0; j < A.n_; j++) {
56 | data_[j] += A.data_[i * A.n_ + j];
57 | }
58 | }
59 |
60 | void Vector::addRow(const Matrix& A, int64_t i, real a) {
61 | assert(i >= 0);
62 | assert(i < A.m_);
63 | assert(m_ == A.n_);
64 | for (int64_t j = 0; j < A.n_; j++) {
65 | data_[j] += a * A.data_[i * A.n_ + j];
66 | }
67 | }
68 |
69 | void Vector::mul(const Matrix& A, const Vector& vec) {
70 | assert(A.m_ == m_);
71 | assert(A.n_ == vec.m_);
72 | for (int64_t i = 0; i < m_; i++) {
73 | data_[i] = 0.0;
74 | for (int64_t j = 0; j < A.n_; j++) {
75 | data_[i] += A.data_[i * A.n_ + j] * vec.data_[j];
76 | }
77 | }
78 | }
79 |
80 | int64_t Vector::argmax() {
81 | real max = data_[0];
82 | int64_t argmax = 0;
83 | for (int64_t i = 1; i < m_; i++) {
84 | if (data_[i] > max) {
85 | max = data_[i];
86 | argmax = i;
87 | }
88 | }
89 | return argmax;
90 | }
91 |
92 | real& Vector::operator[](int64_t i) {
93 | return data_[i];
94 | }
95 |
96 | const real& Vector::operator[](int64_t i) const {
97 | return data_[i];
98 | }
99 |
100 | std::ostream& operator<<(std::ostream& os, const Vector& v)
101 | {
102 | os << std::setprecision(5);
103 | for (int64_t j = 0; j < v.m_; j++) {
104 | os << v.data_[j] << ' ';
105 | }
106 | return os;
107 | }
108 |
109 | }
110 |
--------------------------------------------------------------------------------
/src/vector.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016-present, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree. An additional grant
7 | * of patent rights can be found in the PATENTS file in the same directory.
8 | */
9 |
10 | #ifndef FASTTEXT_VECTOR_H
11 | #define FASTTEXT_VECTOR_H
12 |
13 | #include
14 | #include
15 |
16 | #include "real.h"
17 |
18 | namespace fasttext {
19 |
20 | class Matrix;
21 |
22 | class Vector {
23 |
24 | public:
25 | int64_t m_;
26 | real* data_;
27 |
28 | explicit Vector(int64_t);
29 | ~Vector();
30 |
31 | real& operator[](int64_t);
32 | const real& operator[](int64_t) const;
33 |
34 | int64_t size() const;
35 | void zero();
36 | void mul(real);
37 | void addRow(const Matrix&, int64_t);
38 | void addRow(const Matrix&, int64_t, real);
39 | void mul(const Matrix&, const Vector&);
40 | int64_t argmax();
41 | };
42 |
43 | std::ostream& operator<<(std::ostream&, const Vector&);
44 |
45 | }
46 |
47 | #endif
48 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(fastTextR)
3 |
4 | test_check("fastTextR")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-wrapper_fasttext.R:
--------------------------------------------------------------------------------
1 |
2 | if (.Platform$OS.type == "windows") {
3 |
4 | paste_delim = "\\"
5 | }
6 |
7 | if (.Platform$OS.type == "unix") {
8 |
9 | paste_delim = "/"
10 | }
11 |
12 |
13 | context('fasttext functions')
14 |
15 |
16 | #--------------------------
17 | # 'skipgram_cbow' function
18 | #--------------------------
19 |
20 |
21 | testthat::test_that("it returns an error if the input_path parameter is invalid", {
22 |
23 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data1", "doc.txt"), collapse = paste_delim)))
24 |
25 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
26 |
27 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
28 |
29 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
30 |
31 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
32 | })
33 |
34 |
35 |
36 | testthat::test_that("it returns an error if the output_path parameter is invalid", {
37 |
38 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
39 |
40 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data1", "model"), collapse = paste_delim)))
41 |
42 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
43 |
44 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
45 |
46 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
47 | })
48 |
49 |
50 |
51 | testthat::test_that("it returns an error if the method parameter is not one of c('skipgram', 'cbow')", {
52 |
53 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
54 |
55 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
56 |
57 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "unknown", lr = 0.1, lrUpdateRate = 100, dim = 10,
58 |
59 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
60 |
61 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
62 | })
63 |
64 |
65 | testthat::test_that("it returns an error if the learning rate parameter is less than or equal to 0.0", {
66 |
67 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
68 |
69 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
70 |
71 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.0, lrUpdateRate = 100, dim = 10,
72 |
73 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
74 |
75 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
76 | })
77 |
78 |
79 |
80 | testthat::test_that("it returns an error if the lrUpdateRate parameter is less than or equal to 0.0", {
81 |
82 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
83 |
84 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
85 |
86 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 0, dim = 10,
87 |
88 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
89 |
90 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
91 | })
92 |
93 |
94 | testthat::test_that("it returns an error if the dim parameter is less than or equal to 0.0", {
95 |
96 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
97 |
98 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
99 |
100 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 0,
101 |
102 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
103 |
104 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
105 | })
106 |
107 |
108 | testthat::test_that("it returns an error if the ws parameter is less than or equal to 0.0", {
109 |
110 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
111 |
112 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
113 |
114 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
115 |
116 | ws = 0, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
117 |
118 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
119 | })
120 |
121 |
122 |
123 | testthat::test_that("it returns an error if the epoch parameter is less than or equal to 0.0", {
124 |
125 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
126 |
127 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
128 |
129 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
130 |
131 | ws = 5, epoch = 0, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
132 |
133 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
134 | })
135 |
136 |
137 |
138 | testthat::test_that("it returns an error if the minCount parameter is less than or equal to 0.0", {
139 |
140 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
141 |
142 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
143 |
144 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
145 |
146 | ws = 5, epoch = 5, minCount = 0, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
147 |
148 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
149 | })
150 |
151 |
152 |
153 | testthat::test_that("it returns an error if the neg parameter is less than or equal to 0.0", {
154 |
155 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
156 |
157 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
158 |
159 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
160 |
161 | ws = 5, epoch = 5, minCount = 1, neg = 0, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
162 |
163 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
164 | })
165 |
166 |
167 |
168 | testthat::test_that("it returns an error if the wordNgrams parameter is less than or equal to 0.0", {
169 |
170 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
171 |
172 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
173 |
174 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
175 |
176 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 0, loss = "ns", bucket = 2000000, minn = 0,
177 |
178 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
179 | })
180 |
181 |
182 |
183 | testthat::test_that("it returns an error if the loss parameter is not one of c('ns', 'hs', 'softmax')", {
184 |
185 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
186 |
187 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
188 |
189 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
190 |
191 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "unknown", bucket = 2000000, minn = 0,
192 |
193 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
194 | })
195 |
196 |
197 | testthat::test_that("it returns an error if the bucket parameter is less than or equal to 0.0", {
198 |
199 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
200 |
201 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
202 |
203 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
204 |
205 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 0, minn = 0,
206 |
207 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
208 | })
209 |
210 |
211 |
212 |
213 | testthat::test_that("it returns an error if the minn parameter is less than 0", {
214 |
215 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
216 |
217 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
218 |
219 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
220 |
221 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = -1,
222 |
223 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) )
224 | })
225 |
226 |
227 |
228 | testthat::test_that("it returns an error if the maxn parameter is less than 0", {
229 |
230 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
231 |
232 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
233 |
234 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
235 |
236 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
237 |
238 | maxn = -1, thread = 6, t = 0.0001, verbose = 2) )
239 | })
240 |
241 |
242 |
243 | testthat::test_that("it returns an error if the thread parameter is less than 1", {
244 |
245 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
246 |
247 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
248 |
249 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
250 |
251 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
252 |
253 | maxn = 0, thread = 0, t = 0.0001, verbose = 2) )
254 | })
255 |
256 |
257 |
258 | testthat::test_that("it returns an error if the t parameter is less than or equal to 0.0", {
259 |
260 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
261 |
262 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
263 |
264 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
265 |
266 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
267 |
268 | maxn = 0, thread = 1, t = 0.0, verbose = 2) )
269 | })
270 |
271 |
272 |
273 | testthat::test_that("it returns an error if the verbose parameter is less 0", {
274 |
275 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
276 |
277 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
278 |
279 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
280 |
281 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
282 |
283 | maxn = 0, thread = 1, t = 0.0001, verbose = -1) )
284 | })
285 |
286 |
287 |
288 | testthat::test_that("it returns an error if the verbose parameter is greater than 2", {
289 |
290 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
291 |
292 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
293 |
294 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
295 |
296 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
297 |
298 | maxn = 0, thread = 1, t = 0.0001, verbose = 3) )
299 | })
300 |
301 |
302 |
303 | testthat::test_that("it saves the output to a file if all parameters are valid", {
304 |
305 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim)))
306 |
307 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim)))
308 |
309 | res = skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10,
310 |
311 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0,
312 |
313 | maxn = 0, thread = 1, t = 0.0001, verbose = 0)
314 |
315 | testthat::expect_silent(res)
316 | })
317 |
318 |
319 |
320 |
321 | #---------------------------------
322 | # 'predict_unknown_words' function
323 | #---------------------------------
324 |
325 |
326 | testthat::test_that("it returns an error if the skipgram_cbow_model_output parameter is not a valid character string path", {
327 |
328 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = NULL) )
329 | })
330 |
331 |
332 | testthat::test_that("it returns an error if the skipgram_cbow_model_output parameter is not a valid character string path", {
333 |
334 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim)))
335 |
336 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = NULL) )
337 | })
338 |
339 |
340 | testthat::test_that("it returns an error if the unknown_words_path parameter is not a valid character string path", {
341 |
342 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim)))
343 |
344 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim)))
345 |
346 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = NULL) )
347 | })
348 |
349 |
350 | testthat::test_that("it returns an error if the output_path parameter is not a valid character string path", {
351 |
352 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim)))
353 |
354 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim)))
355 |
356 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = list()) )
357 | })
358 |
359 |
360 | testthat::test_that("it returns an error if the verbose parameter is not a boolean", {
361 |
362 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim)))
363 |
364 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim)))
365 |
366 | path_res_vecs = paste0(getwd(), path.expand(paste(c("", "test_data", "VECS.txt"), collapse = paste_delim)))
367 |
368 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = path_res_vecs, verbose = 'FALSE') )
369 | })
370 |
371 |
372 |
373 | testthat::test_that("it returns word vectors to the specified folder", {
374 |
375 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim)))
376 |
377 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim)))
378 |
379 | path_res_vecs = paste0(getwd(), path.expand(paste(c("", "test_data", "VECS"), collapse = paste_delim)))
380 |
381 | testthat::expect_silent( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = path_res_vecs, verbose = FALSE) )
382 | })
383 |
384 |
--------------------------------------------------------------------------------
/tests/testthat/test_data/model.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlampros/fastTextR/2cd6fef56d400128779a2079b13b92a094baefeb/tests/testthat/test_data/model.bin
--------------------------------------------------------------------------------
/tests/testthat/test_data/queries.txt:
--------------------------------------------------------------------------------
1 | squishing
2 | squirt
3 | undated
4 | undatable
5 | circumvents
6 | beat
7 | circumvents
8 | ebb
9 | dispossess
10 | deprive
11 | provincialism
12 | narrow-mindedness
13 | provincialism
14 | partiality
15 | instrumentality
16 | department
17 | instrumentality
18 | utility
19 | involvement
20 | action
21 | involvement
22 | implication
23 | ecclesiastic
24 | clergyman
25 | brigadier
26 | general
27 | carbonic
28 | chemical
29 | carbonic
30 | paper
31 | aspirate
32 | pronounce
33 | aspirate
34 | remove
35 | monotype
36 | machine
37 | incommensurate
38 | incommensurable
39 | campfires
40 | fire
41 | cognizance
42 | knowing
43 | urbanize
44 | change
45 | imperfection
46 | state
47 | assessment
48 | charge
49 | assessment
50 | assay
51 | incubate
52 | breed
53 | incubate
54 | develop
55 | principality
56 | domain
57 | vicarious
58 | abnormal
59 | vicarious
60 | secondary
61 | ungraceful
62 | awkward
63 | unsighted
64 | color-blind
65 | socialise
66 | educate
67 | socialise
68 | swing
69 | diagonals
70 | line
71 | diagonals
72 | heterosexual
73 | naturalise
74 | adapt
75 | insubordinate
76 | rebellious
77 | insubordinate
78 | defiant
79 | subdividing
80 | subdivide
81 | subdividing
82 | separate
83 | antifeminism
84 | sexism
85 | circumcising
86 | cut
87 | circumcising
88 | remove
89 | excommunicate
90 | oust
91 | accomplished
92 | over
93 | attackers
94 | wrongdoer
95 | contravened
96 | disagree
97 | contravened
98 | transgress
99 | tenderize
100 | change
101 | blithering
102 | chatter
103 | resurfacing
104 | coat
105 | resurfacing
106 | surface
107 | friendships
108 | brotherhood
109 | soulfully
110 | emotional
111 | elector
112 | voter
113 | intentionality
114 | intended
115 | vulgarism
116 | profanity
117 | vulgarism
118 | inelegance
119 | preliterate
120 | illiterate
121 | preliterate
122 | noncivilized
123 | retrying
124 | hear
125 | wanderers
126 | program
127 | wanderers
128 | nomad
129 | marginalize
130 | interact
131 | hyperlink
132 | link
133 | inducted
134 | admit
135 | inducted
136 | install
137 | entrapping
138 | capture
139 | entrapping
140 | deceive
141 | alleviated
142 | comfort
143 | alleviated
144 | help
145 | radiators
146 | beginning
147 | radiators
148 | system
149 | postmodernism
150 | genre
151 | excavations
152 | site
153 | excavations
154 | removal
155 | comfortable
156 | cozy
157 | unfeathered
158 | unfledged
159 | unfeathered
160 | plucked
161 | assigned
162 | regiment
163 | assigned
164 | allow
165 | listeners
166 | eavesdropper
167 | unheralded
168 | unexpected
169 | inabilities
170 | insufficiency
171 | inabilities
172 | incomprehension
173 | monocultures
174 | culture
175 | tricolour
176 | flag
177 | omnipotence
178 | state
179 | mingles
180 | change
181 | calcify
182 | harden
183 | calcify
184 | change
185 | disinheritance
186 | discontinuance
187 | interwove
188 | braid
189 | cession
190 | relinquishment
191 | dwarfish
192 | small
193 | assessments
194 | charge
195 | assessments
196 | classification
197 | prescriptions
198 | medicine
199 | prescriptions
200 | direction
201 | antipsychotic
202 | lithium
203 | circumferential
204 | peripheral
205 | roosters
206 | cockerel
207 | nonpublic
208 | private
209 | yodeling
210 | sing
211 | yodeling
212 | singing
213 | autoerotic
214 | sexy
215 | unisons
216 | concurrence
217 | unisons
218 | agreement
219 | disassembled
220 | destroy
221 | preteens
222 | juvenile
223 | unaccessible
224 | pathless
225 | monogram
226 | symbol
227 | opalescence
228 | brightness
229 | estrogenic
230 | hormone
231 | misleading
232 | beat
233 | consubstantial
234 | considerable
235 | coeducation
236 | education
237 | canonical
238 | standard
239 | galvanic
240 | exciting
241 | nominated
242 | nominate
243 | nominated
244 | choose
245 | exceedance
246 | probability
247 | confide
248 | consign
249 | confide
250 | unwrap
251 | established
252 | initiate
253 | condescend
254 | act
255 | tricycle
256 | pedicab
257 | discharged
258 | spread
259 | postmodernist
260 | artist
261 | ruralist
262 | rustic
263 | ruralist
264 | advocate
265 | conjurors
266 | enchantress
267 | detestable
268 | offensive
269 | detestable
270 | hateful
271 | comparing
272 | compare
273 | comparing
274 | analogize
275 | marketers
276 | selling
277 | hypercoaster
278 | roller
279 | pittance
280 | payment
281 | soulless
282 | insensitive
283 | hypermarket
284 | supermarket
285 | confluent
286 | branch
287 | confluent
288 | convergent
289 | anterooms
290 | building
291 | reasoning
292 | deduce
293 | reasoning
294 | re-argue
295 | summonings
296 | page
297 | summonings
298 | demand
299 | preordained
300 | predetermine
301 | antechamber
302 | room
303 | concavity
304 | shape
305 | concavity
306 | recess
307 | unzipping
308 | unfasten
309 | spoonful
310 | containerful
311 | partible
312 | divisible
313 | hypersensitive
314 | susceptible
315 | impurity
316 | adulteration
317 | impurity
318 | waste
319 | inscribe
320 | engrave
321 | virginals
322 | harpsichord
323 | hypertexts
324 | database
325 | inheritances
326 | acquisition
327 | infectious
328 | septic
329 | infectious
330 | contagious
331 | pinpointed
332 | locate
333 | capitalised
334 | profit
335 | capitalised
336 | supply
337 | retrace
338 | return
339 | deadness
340 | quality
341 | deadness
342 | inelasticity
343 | conformism
344 | legalism
345 | tripods
346 | tripod
347 | lastingly
348 | wear
349 | lastingly
350 | populate
351 | unexpected
352 | unannounced
353 | interlink
354 | intercommunicate
355 | interlink
356 | connect
357 | brained
358 | kill
359 | brained
360 | hit
361 | unicycles
362 | wheel
363 | unicycles
364 | bicycle
365 | preservers
366 | cook
367 | preservers
368 | worker
369 | autografts
370 | graft
371 | retarding
372 | decelerate
373 | retarding
374 | stay
375 | subfamily
376 | group
377 | encrust
378 | coat
379 | encrust
380 | decorate
381 | wingless
382 | flightless
383 | intraspecific
384 | interspecies
385 | tunneled
386 | penetrate
387 | tunneled
388 | dig
389 | suppressor
390 | gene
391 | suppressor
392 | restrainer
393 | expound
394 | elaborate
395 | expound
396 | detail
397 | brisker
398 | energetic
399 | brisker
400 | invigorating
401 | wealthy
402 | rich
403 | eventful
404 | important
405 | eventful
406 | lively
407 | edgeless
408 | dull
409 | clownish
410 | humorous
411 | inquisitor
412 | thousand
413 | inquisitor
414 | inquirer
415 | extravert
416 | extroversive
417 | finality
418 | conclusive
419 | gibberish
420 | dutch
421 | championship
422 | status
423 | championship
424 | contest
425 | indelicate
426 | indecent
427 | indelicate
428 | tasteless
429 | sheikhdoms
430 | domain
431 | flighted
432 | fly
433 | flighted
434 | shoot
435 | backwardness
436 | idiocy
437 | discontinuous
438 | disjunct
439 | cheapen
440 | devalue
441 | flatulence
442 | physical
443 | venomous
444 | toxic
445 | venomous
446 | malicious
447 | enshrouded
448 | envelop
449 | impotently
450 | ineffective
451 | deviationism
452 | desertion
453 | capitation
454 | tax
455 | denominate
456 | label
457 | illiberal
458 | narrow-minded
459 | periodical
460 | nightly
461 | periodical
462 | publication
463 | distillate
464 | liquid
465 | returning
466 | bounce
467 | dictatorship
468 | state
469 | disfavoring
470 | prejudice
471 | postglacial
472 | cold
473 | ennobled
474 | honor
475 | anticyclones
476 | high
477 | cylindrical
478 | rounded
479 | nonpolitical
480 | apolitical
481 | circumference
482 | size
483 | repositions
484 | move
485 | repositions
486 | reduce
487 | librarianship
488 | position
489 | conductive
490 | semiconducting
491 | bounced
492 | skip
493 | bounced
494 | bounce
495 | entreaty
496 | request
497 | convertible
498 | car
499 | convertible
500 | security
501 | constitutive
502 | essential
503 | kindergarteners
504 | child
505 | angrier
506 | huffy
507 | angrier
508 | stormy
509 | defiles
510 | mar
511 | defiles
512 | spot
513 | hankering
514 | desire
515 | hankering
516 | longing
517 | circumnavigate
518 | circle
519 | criticality
520 | juncture
521 | criticality
522 | urgency
523 | mistrustful
524 | distrustful
525 | presuppose
526 | imply
527 | presuppose
528 | premise
529 | mayoralty
530 | position
531 | companionships
532 | friendship
533 | primates
534 | priest
535 | omnipotent
536 | powerful
537 | postboxes
538 | maildrop
539 | loveable
540 | desirable
541 | antedating
542 | chronologize
543 | benefited
544 | help
545 | benefited
546 | get
547 | contrastive
548 | different
549 | contrastive
550 | antonymous
551 | interned
552 | work
553 | interned
554 | confine
555 | clamorous
556 | noisy
557 | baseness
558 | unworthiness
559 | serenaded
560 | perform
561 | snookered
562 | play
563 | snookered
564 | flim-flam
565 | immeasurable
566 | incalculable
567 | immeasurable
568 | illimitable
569 | encroachments
570 | inroad
571 | encroachments
572 | entrance
573 | deregulating
574 | liberation
575 | deregulating
576 | exempt
577 | acceptable
578 | satisfactory
579 | acceptable
580 | standard
581 | sentenced
582 | declare
583 | shrieks
584 | shout
585 | shrieks
586 | cry
587 | nonviable
588 | dead
589 | papered
590 | cover
591 | territorials
592 | soldier
593 | territorials
594 | guard
595 | publicise
596 | tell
597 | reenact
598 | re-create
599 | reenact
600 | ordain
601 | interstellar
602 | major
603 | scattered
604 | separate
605 | transmigrating
606 | immigrate
607 | transmigrating
608 | born
609 | associations
610 | southern
611 | associations
612 | sociable
613 | admiralty
614 | department
615 | admiralty
616 | position
617 | autobiographer
618 | biographer
619 | planners
620 | schemer
621 | planners
622 | notebook
623 | supplement
624 | constitute
625 | supplement
626 | leverage
627 | combusts
628 | blow
629 | combusts
630 | ablaze
631 | brightness
632 | intelligence
633 | brightness
634 | radiance
635 | producing
636 | together
637 | reserve
638 | assign
639 | reserve
640 | withhold
641 | unflagging
642 | constant
643 | unflagging
644 | energetic
645 | preschooler
646 | child
647 | baggers
648 | machine
649 | baggers
650 | workman
651 | willingness
652 | wholeheartedness
653 | unacceptable
654 | unsatisfactory
655 | unacceptable
656 | unwelcome
657 | directionless
658 | purposeless
659 | replications
660 | reproduction
661 | replications
662 | procedure
663 | retrials
664 | trial
665 | venders
666 | selling
667 | fantasist
668 | creator
669 | interlinks
670 | intercommunicate
671 | interlinks
672 | connect
673 | adversely
674 | unfavorable
675 | repulses
676 | disgust
677 | repulses
678 | fight
679 | humanness
680 | quality
681 | autofocus
682 | optical
683 | conversely
684 | interview
685 | conversely
686 | proposition
687 | ceaseless
688 | continuous
689 | hybridise
690 | breed
691 | antitumor
692 | brain
693 | parallelism
694 | similarity
695 | sightedness
696 | sight
697 | battleships
698 | dreadnought
699 | subarctic
700 | polar
701 | subarctic
702 | overshoe
703 | sufferance
704 | self
705 | uncomprehending
706 | undiscerning
707 | regretful
708 | penitent
709 | monoplanes
710 | airplane
711 | steepen
712 | change
713 | transfuse
714 | breathe
715 | transfuse
716 | pour
717 | hyperextension
718 | extension
719 | amazings
720 | surprise
721 | amazings
722 | stump
723 | perished
724 | change
725 | hilarity
726 | gaiety
727 | appearance
728 | apparition
729 | transmissible
730 | infectious
731 | transmissible
732 | inheritable
733 | wheaten
734 | source
735 | magnetize
736 | charm
737 | magnetize
738 | change
739 | militarize
740 | change
741 | circumspect
742 | prudent
743 | translocate
744 | transfer
745 | macroevolution
746 | evolution
747 | circumvented
748 | attack
749 | circumvented
750 | surpass
751 | adventism
752 | christianity
753 | breather
754 | submarine
755 | breather
756 | respite
757 | disabused
758 | inform
759 | contravene
760 | deny
761 | contravene
762 | transgress
763 | transducers
764 | device
765 | icelandic
766 | scandinavian
767 | uncertainty
768 | speculativeness
769 | disengages
770 | unclog
771 | painkillers
772 | hydrochloride
773 | associational
774 | legion
775 | associational
776 | affiliation
777 | luxuriance
778 | abundance
779 | vacations
780 | spend
781 | chooses
782 | compare
783 | chooses
784 | decide
785 | enunciated
786 | state
787 | cosponsoring
788 | sponsor
789 | impeded
790 | obstruct
791 | impeded
792 | dam
793 | irremovable
794 | tenured
795 | strangers
796 | person
797 | utilitarianism
798 | doctrine
799 | puffery
800 | flattery
801 | noncitizens
802 | traveler
803 | monsignori
804 | priest
805 | refered
806 | apply
807 | refered
808 | remember
809 | macrocosmic
810 | large
811 | functionality
812 | practicality
813 | spoonfuls
814 | containerful
815 | instructorship
816 | position
817 | approved
818 | authorize
819 | approved
820 | rubberstamp
821 | recorders
822 | box
823 | recorders
824 | official
825 | headship
826 | position
827 | credentials
828 | document
829 | credentials
830 | certificate
831 | enunciating
832 | state
833 | enunciating
834 | round
835 | caramelize
836 | convert
837 | cosigns
838 | validate
839 | cosigns
840 | endorse
841 | deformity
842 | appearance
843 | responsible
844 | causative
845 | undisputable
846 | undeniable
847 | reassess
848 | measure
849 | colonise
850 | settle
851 | subserve
852 | help
853 | religionist
854 | person
855 | sanctioned
856 | empower
857 | sanctioned
858 | back
859 | suggestible
860 | susceptible
861 | warmness
862 | protectiveness
863 | warmness
864 | hotness
865 | relates
866 | focus
867 | relates
868 | remember
869 | cardinality
870 | number
871 | rotational
872 | transformation
873 | rotational
874 | circumvolution
875 | totalism
876 | political
877 | irrationality
878 | insanity
879 | absorbance
880 | density
881 | intracerebral
882 | emotional
883 | disjoined
884 | separate
885 | intramuscular
886 | powerful
887 | endangerment
888 | hazard
889 | decomposition
890 | fragmentation
891 | decomposition
892 | algebra
893 | autobiographies
894 | memoir
895 | characterless
896 | ordinary
897 | dissenters
898 | conscientious
899 | subspecies
900 | group
901 | irreproducible
902 | unrepeatable
903 | cosigned
904 | validate
905 | cosigned
906 | endorse
907 | embellishment
908 | expansion
909 | encyclopaedic
910 | comprehensive
911 | indispensable
912 | critical
913 | indispensable
914 | necessary
915 | fractures
916 | destroy
917 | fractures
918 | pervert
919 | entraps
920 | gin
921 | entraps
922 | deceive
923 | anamorphosis
924 | evolution
925 | anamorphosis
926 | copy
927 | dispersive
928 | distributive
929 | smoothen
930 | rub
931 | interpreter
932 | person
933 | interpreter
934 | symbolist
935 | meadows
936 | grassland
937 | obtainment
938 | acquiring
939 | nonprofessional
940 | lay
941 | attendances
942 | frequency
943 | attendances
944 | appearance
945 | protraction
946 | continuance
947 | transshipped
948 | transfer
949 | entrapped
950 | capture
951 | entrapped
952 | deceive
953 | exclaiming
954 | call
955 | exclaiming
956 | declare
957 | passable
958 | satisfactory
959 | passable
960 | negotiable
961 | undetectable
962 | invisible
963 | undetectable
964 | imperceptible
965 | endurable
966 | tolerable
967 | supposed
968 | speculate
969 | supposed
970 | suspect
971 | transact
972 | bank
973 | survivalist
974 | person
975 | increasing
976 | grow
977 | increasing
978 | up
979 | fabricate
980 | make
981 | fabricate
982 | think
983 | partnership
984 | partner
985 | partnership
986 | relationship
987 | microorganism
988 | organism
989 | impossibilities
990 | unattainableness
991 | repress
992 | suppress
993 | dimensional
994 | multidimensional
995 | performance
996 | universe
997 | performance
998 | musical
999 | feudalism
1000 | organization
1001 | behaviorist
1002 | psychologist
1003 | interjection
1004 | break
1005 | interjection
1006 | exclamation
1007 | consequences
1008 | position
1009 | consequences
1010 | result
1011 | preschoolers
1012 | child
1013 | unmentionables
1014 | garment
1015 | subeditor
1016 | editor
1017 | standardize
1018 | regulate
1019 | standardize
1020 | measure
1021 | winners
1022 | walloper
1023 | persuasions
1024 | electioneering
1025 | persuasions
1026 | belief
1027 | conformations
1028 | balance
1029 | conformations
1030 | curvature
1031 | seriousness
1032 | badness
1033 | seriousness
1034 | gravity
1035 | metabolism
1036 | organic
1037 | reprints
1038 | reproduce
1039 | reprints
1040 | publication
1041 | replication
1042 | procedure
1043 | replication
1044 | copying
1045 | highjacking
1046 | robbery
1047 | highjacking
1048 | seize
1049 | repurchases
1050 | buy
1051 | sympathized
1052 | feel
1053 | unsuitable
1054 | unfit
1055 | unsuitable
1056 | irrelevant
1057 | victorious
1058 | successful
1059 | victorious
1060 | undefeated
1061 | leagued
1062 | unite
1063 | ravenous
1064 | gluttonous
1065 | ravenous
1066 | hungry
1067 | inversions
1068 | abnormality
1069 | inversions
1070 | phenomenon
1071 | flavourful
1072 | tasty
1073 | spaciousness
1074 | largeness
1075 | evidently
1076 | obvious
1077 | evidently
1078 | noticeable
1079 | reinsured
1080 | insure
1081 | crudeness
1082 | wild
1083 | crudeness
1084 | impoliteness
1085 | initialise
1086 | divide
1087 | initialise
1088 | determine
1089 | requirement
1090 | duty
1091 | requirement
1092 | thing
1093 | contortionists
1094 | acrobat
1095 | dysentery
1096 | diarrhea
1097 | occlusion
1098 | thrombosis
1099 | reenactor
1100 | actor
1101 | ulcerate
1102 | affect
1103 | ulcerate
1104 | change
1105 | exemplify
1106 | embody
1107 | exemplify
1108 | elaborate
1109 | attractor
1110 | entertainer
1111 | macroeconomists
1112 | economist
1113 | exploitive
1114 | consumptive
1115 | lectureship
1116 | position
1117 | automate
1118 | change
1119 | incorruptible
1120 | incorrupt
1121 | exacerbated
1122 | anger
1123 | exacerbated
1124 | inflame
1125 | continuously
1126 | unbroken
1127 | crusaders
1128 | warrior
1129 | crusaders
1130 | insurgent
1131 | formations
1132 | flight
1133 | formations
1134 | filing
1135 | bestowals
1136 | giving
1137 | bestowals
1138 | gift
1139 | undeviating
1140 | reliable
1141 | undeviating
1142 | direct
1143 | impassively
1144 | voice
1145 | paradoxical
1146 | inexplicable
1147 | deceitful
1148 | dishonest
1149 | commissions
1150 | equip
1151 | commissions
1152 | order
1153 | leisured
1154 | idle
1155 | unsalable
1156 | unmarketable
1157 | hypersensitivity
1158 | sensitivity
1159 | inquisitiveness
1160 | nosiness
1161 | monograms
1162 | symbol
1163 | admitting
1164 | confess
1165 | deflowering
1166 | deface
1167 | innovativeness
1168 | originality
1169 | impulsion
1170 | force
1171 | impulsion
1172 | drive
1173 | unisexual
1174 | sexual
1175 | anarchist
1176 | radical
1177 | circumcision
1178 | banquet
1179 | socialites
1180 | person
1181 | rearrangements
1182 | reordering
1183 | unquenchable
1184 | insatiate
1185 | interrelated
1186 | interrelate
1187 | interrelated
1188 | associate
1189 | synthetical
1190 | logical
1191 | entombment
1192 | funeral
1193 | kidnapped
1194 | shanghai
1195 | uproarious
1196 | humorous
1197 | uproarious
1198 | noisy
1199 | discipleship
1200 | position
1201 | vaporise
1202 | evaporate
1203 | vaporise
1204 | change
1205 | memorialize
1206 | remind
1207 | memorialize
1208 | address
1209 | personify
1210 | embody
1211 | personify
1212 | typify
1213 | inbreeding
1214 | coupling
1215 | lenience
1216 | softness
1217 | lenience
1218 | mercifulness
1219 | preposed
1220 | put
1221 | prophetical
1222 | predictive
1223 | standoffish
1224 | unapproachable
1225 | procurator
1226 | agent
1227 | excitations
1228 | arousal
1229 | excitations
1230 | fever
1231 | thoughtless
1232 | inconsiderate
1233 | untruth
1234 | statement
1235 | malfeasance
1236 | wrongdoing
1237 | supporters
1238 | trader
1239 | supporters
1240 | strength
1241 | punctuate
1242 | quote
1243 | punctuate
1244 | point
1245 | translocation
1246 | organic
1247 | translocation
1248 | procedure
1249 | deforming
1250 | change
1251 | deforming
1252 | morph
1253 | attributions
1254 | attributable
1255 | shouter
1256 | crier
1257 | excrete
1258 | make
1259 | concerti
1260 | concerto
1261 | reformism
1262 | doctrine
1263 | moisten
1264 | baste
1265 | moisten
1266 | sprinkle
1267 | inflammation
1268 | pitch
1269 | intermingles
1270 | commingle
1271 | gathering
1272 | sponge
1273 | gathering
1274 | hive
1275 | concerning
1276 | involve
1277 | deviously
1278 | indirect
1279 | deviously
1280 | untrustworthy
1281 | admittance
1282 | right
1283 | performing
1284 | improvise
1285 | performing
1286 | church
1287 | pretenders
1288 | ringer
1289 | toppled
1290 | push
1291 | toppled
1292 | over
1293 | nonconscious
1294 | unconscious
1295 | nonconscious
1296 | inanimate
1297 | meaningless
1298 | empty
1299 | immoveable
1300 | immobile
1301 | unblock
1302 | play
1303 | unblock
1304 | unstuff
1305 | rhythmicity
1306 | lilt
1307 | significances
1308 | meaning
1309 | sheepish
1310 | docile
1311 | sheepish
1312 | ashamed
1313 | immensely
1314 | large
1315 | eruptive
1316 | active
1317 | eruptive
1318 | aqueous
1319 | transvestitism
1320 | practice
1321 | royalist
1322 | monarchist
1323 | libelous
1324 | harmful
1325 | commodes
1326 | drawers
1327 | commodes
1328 | fixture
1329 | conscripting
1330 | enlist
1331 | depopulate
1332 | shrink
1333 | directional
1334 | leading
1335 | disbelieving
1336 | doubt
1337 | disbelieving
1338 | incredulous
1339 | hypervelocity
1340 | speed
1341 | interdisciplinary
1342 | nonindulgent
1343 | nonverbally
1344 | numerical
1345 | pressurise
1346 | change
1347 | measurements
1348 | viscometry
1349 | nonfunctional
1350 | run-down
1351 | severer
1352 | intense
1353 | brainless
1354 | unintelligent
1355 | marinate
1356 | steep
1357 | freighter
1358 | cargo
1359 | terrorize
1360 | coerce
1361 | terrorize
1362 | frighten
1363 | prayerful
1364 | pious
1365 | bestowal
1366 | giving
1367 | bestowal
1368 | gift
1369 | diagonal
1370 | line
1371 | diagonal
1372 | heterosexual
1373 | ingroup
1374 | bohemia
1375 | uncomfortable
1376 | comfortless
1377 | uncomfortable
1378 | disquieting
1379 | hyperlinks
1380 | link
1381 | therapeutical
1382 | acoustic
1383 | therapeutical
1384 | healthful
1385 | depreciate
1386 | deflate
1387 | intelligence
1388 | shrewdness
1389 | intelligence
1390 | agency
1391 | cynically
1392 | distrustful
1393 | autopilot
1394 | unconsciousness
1395 | enjoining
1396 | forbid
1397 | enjoining
1398 | command
1399 | reelections
1400 | election
1401 | tidings
1402 | float
1403 | tidings
1404 | ebb
1405 | transmigrated
1406 | immigrate
1407 | infeasible
1408 | impossible
1409 | rhymers
1410 | writer
1411 | germanic
1412 | scandinavian
1413 | anticancer
1414 | person
1415 | fording
1416 | traverse
1417 | fording
1418 | deep
1419 | unmolested
1420 | untroubled
1421 | covariant
1422 | variable
1423 | postposition
1424 | place
1425 | splashy
1426 | covered
1427 | splashy
1428 | ostentatious
1429 | sprouting
1430 | germinate
1431 | sprouting
1432 | grow
1433 | entwined
1434 | stitch
1435 | entwined
1436 | wreathe
1437 | hypertext
1438 | text
1439 | expressible
1440 | representable
1441 | unicyclist
1442 | pedaler
1443 | spatiality
1444 | property
1445 | acoustical
1446 | remedy
1447 | strains
1448 | trouble
1449 | strains
1450 | rack
1451 | resistor
1452 | splitter
1453 | pastorship
1454 | position
1455 | brightly
1456 | colorful
1457 | lubricate
1458 | change
1459 | lubricate
1460 | fill
1461 | hilariously
1462 | humorous
1463 | intercession
1464 | prayer
1465 | evangelicalism
1466 | revivalism
1467 | unmarried
1468 | unwed
1469 | globalise
1470 | widen
1471 | cofactor
1472 | compound
1473 | energized
1474 | enliven
1475 | energized
1476 | change
1477 | registry
1478 | register
1479 | unrealizable
1480 | impossible
1481 | dissociations
1482 | compartmentalization
1483 | dissociations
1484 | separation
1485 | griping
1486 | bite
1487 | griping
1488 | complain
1489 | quieten
1490 | hush
1491 | quieten
1492 | compose
1493 | transfigure
1494 | change
1495 | scarceness
1496 | rarity
1497 | corroding
1498 | decay
1499 | corroding
1500 | corrode
1501 | freakishly
1502 | panic
1503 | copilot
1504 | pilot
1505 | analyzed
1506 | synthesize
1507 | analyzed
1508 | survey
1509 | confinements
1510 | pregnancy
1511 | confinements
1512 | restraint
1513 | prideful
1514 | elated
1515 | prideful
1516 | proud
1517 | commode
1518 | drawers
1519 | commode
1520 | seat
1521 | fluidity
1522 | thinness
1523 | fluidity
1524 | changeableness
1525 | internationalize
1526 | control
1527 | internationalize
1528 | change
1529 | academicism
1530 | traditionalism
1531 | boisterously
1532 | spirited
1533 | boisterously
1534 | disorderly
1535 | effected
1536 | carry
1537 | effected
1538 | draw
1539 | subhead
1540 | heading
1541 | whizzed
1542 | sound
1543 | independences
1544 | independent
1545 | independences
1546 | victory
1547 | conductance
1548 | electrical
1549 | uncontrolled
1550 | rampant
1551 | selectively
1552 | exclusive
1553 | selectively
1554 | discriminating
1555 | fulfillments
1556 | satisfaction
1557 | fulfillments
1558 | self-fulfillment
1559 | premeditation
1560 | planning
1561 | stewardship
1562 | position
1563 | residing
1564 | populate
1565 | residing
1566 | stay
1567 | coefficient
1568 | self
1569 | algebraist
1570 | mathematician
1571 | drownings
1572 | extinguish
1573 | drownings
1574 | cover
1575 | encamping
1576 | populate
1577 | prostatic
1578 | criticism
1579 | prostatic
1580 | radio
1581 | violating
1582 | fly
1583 | violating
1584 | observe
1585 | remitting
1586 | transfer
1587 | orientate
1588 | reorientate
1589 | postmark
1590 | marker
1591 | postmark
1592 | stamp
1593 | establishment
1594 | organization
1595 | establishment
1596 | beginning
1597 | recitalist
1598 | soloist
1599 | institutionalize
1600 | hospitalize
1601 | verbalize
1602 | enthuse
1603 | verbalize
1604 | talk
1605 | presenters
1606 | communicator
1607 | presenters
1608 | advocate
1609 | repressing
1610 | suppress
1611 | repressing
1612 | oppress
1613 | premisses
1614 | presuppose
1615 | premisses
1616 | premise
1617 | outfoxed
1618 | surpass
1619 | gardens
1620 | sink
1621 | gardens
1622 | tend
1623 | phosphate
1624 | drink
1625 | phosphate
1626 | sodium
1627 | airship
1628 | trade
1629 | submariners
1630 | bluejacket
1631 | infectiously
1632 | canker
1633 | subsurface
1634 | submarine
1635 | extendible
1636 | long
1637 | refresher
1638 | beverage
1639 | refresher
1640 | legal
1641 | seasonable
1642 | opportune
1643 | moderatorship
1644 | position
1645 | modesty
1646 | demureness
1647 | prejudging
1648 | evaluate
1649 | roadless
1650 | inaccessible
1651 | objectifying
1652 | change
1653 | expounded
1654 | clarify
1655 | expounded
1656 | premise
1657 | nonperformance
1658 | negligence
1659 | acoustics
1660 | remedy
1661 | acoustics
1662 | physics
1663 | yellowish
1664 | chromatic
1665 | reckoner
1666 | statistician
1667 | reckoner
1668 | handbook
1669 | conscientious
1670 | careful
1671 | amounted
1672 | make
1673 | amounted
1674 | work
1675 | vegetational
1676 | growth
1677 | vegetational
1678 | forest
1679 | unfavourable
1680 | adverse
1681 | unfavourable
1682 | discriminatory
1683 | vocalism
1684 | voice
1685 | vocalism
1686 | system
1687 | continence
1688 | self-discipline
1689 | immoderate
1690 | excessive
1691 | internships
1692 | position
1693 | translunar
1694 | heavenly
1695 | ideality
1696 | quality
1697 | importance
1698 | momentousness
1699 | importance
1700 | primacy
1701 | jarringly
1702 | move
1703 | jarringly
1704 | conflict
1705 | affectional
1706 | emotional
1707 | rediscovery
1708 | discovery
1709 | microfossils
1710 | fossil
1711 | unknowing
1712 | ignorance
1713 | unknowing
1714 | uninformed
1715 | commandership
1716 | position
1717 | autoimmunity
1718 | autoimmune
1719 | undefended
1720 | vulnerable
1721 | collected
1722 | take
1723 | collected
1724 | corral
1725 | secluding
1726 | isolate
1727 | ceramicist
1728 | craftsman
1729 | teaspoonful
1730 | containerful
1731 | migrational
1732 | emigration
1733 | migrational
1734 | people
1735 | newness
1736 | brand-newness
1737 | circumscribes
1738 | content
1739 | rudderless
1740 | purposeless
1741 | internationaler
1742 | foreign
1743 | contrive
1744 | plot
1745 | contrive
1746 | make
1747 | unarguable
1748 | incontestable
1749 | replaces
1750 | preempt
1751 | unconcern
1752 | heartlessness
1753 | unconcern
1754 | carefreeness
1755 | reformations
1756 | religious
1757 | procreated
1758 | make
1759 | inducement
1760 | motivation
1761 | inducement
1762 | causing
1763 | sanctify
1764 | lustrate
1765 | sanctify
1766 | declare
1767 | effectiveness
1768 | potent
1769 | restrainer
1770 | chemical
1771 | restrainer
1772 | nazi
1773 | imprecise
1774 | inaccurate
1775 | heraldist
1776 | applaud
1777 | heraldist
1778 | tell
1779 | sweetish
1780 | sweet
1781 | bootless
1782 | unproductive
1783 | follower
1784 | tail
1785 | follower
1786 | cultist
1787 | traversals
1788 | travel
1789 | traversals
1790 | skiing
1791 | requests
1792 | invite
1793 | posthole
1794 | hole
1795 | unilluminated
1796 | dark
1797 | consigning
1798 | abandon
1799 | consigning
1800 | entrust
1801 | purchasable
1802 | available
1803 | purchasable
1804 | corrupt
1805 | abandonment
1806 | absence
1807 | pestilence
1808 | plague
1809 | pestilence
1810 | disease
1811 | weirdly
1812 | deity
1813 | weirdly
1814 | supernatural
1815 | antagonist
1816 | person
1817 | antagonist
1818 | muscle
1819 | puritanism
1820 | sternness
1821 | profitless
1822 | unrewarding
1823 | customise
1824 | produce
1825 | customise
1826 | change
1827 | insurrectional
1828 | conflict
1829 | algebras
1830 | vector
1831 | monotony
1832 | constancy
1833 | monotony
1834 | unvariedness
1835 | subletting
1836 | lease
1837 | princedom
1838 | domain
1839 | princedom
1840 | rank
1841 | uninhibited
1842 | unreserved
1843 | sublieutenant
1844 | lieutenant
1845 | absorbing
1846 | assimilate
1847 | absorbing
1848 | learn
1849 | conflagration
1850 | fire
1851 | condescended
1852 | act
1853 | decompositions
1854 | decay
1855 | decompositions
1856 | algebra
1857 | obstructive
1858 | preventive
1859 | intelligences
1860 | brain
1861 | intelligences
1862 | military
1863 | indirectness
1864 | characteristic
1865 | imperils
1866 | exist
1867 | skillfulness
1868 | command
1869 | unmentionable
1870 | impermissible
1871 | shortish
1872 | short
1873 | deserters
1874 | quitter
1875 | engineering
1876 | design
1877 | engineering
1878 | plan
1879 | provisionally
1880 | conditional
1881 | subordination
1882 | relation
1883 | subordination
1884 | dependence
1885 | cofounder
1886 | founder
1887 | membership
1888 | body
1889 | membership
1890 | relationship
1891 | embroideries
1892 | needlepoint
1893 | embroideries
1894 | expansion
1895 | americanize
1896 | change
1897 | protectorship
1898 | position
1899 | unilateralist
1900 | advocate
1901 | nonstandard
1902 | measure
1903 | convector
1904 | heater
1905 | evacuated
1906 | move
1907 | evacuated
1908 | empty
1909 | subroutines
1910 | software
1911 | brittany
1912 | france
1913 | accomplishments
1914 | attainment
1915 | accomplishments
1916 | horsemanship
1917 | outperforming
1918 | outshout
1919 | microcircuit
1920 | chip
1921 | voraciously
1922 | gluttonous
1923 | voraciously
1924 | acquisitive
1925 | intramolecular
1926 | molar
1927 | hospitalize
1928 | commit
1929 | distinguishing
1930 | discriminate
1931 | distinguishing
1932 | sex
1933 | reproductive
1934 | fruitful
1935 | goldplated
1936 | plate
1937 | favourable
1938 | complimentary
1939 | procreation
1940 | generation
1941 | postponements
1942 | adjournment
1943 | postponements
1944 | extension
1945 | detectable
1946 | perceptible
1947 | detectable
1948 | noticeable
1949 | contraception
1950 | control
1951 | lushness
1952 | abundance
1953 | incensing
1954 | anger
1955 | incensing
1956 | odorize
1957 | outlawed
1958 | illegal
1959 | embroiderers
1960 | embroideress
1961 | blitzed
1962 | attack
1963 | wilderness
1964 | disfavor
1965 | wilderness
1966 | bush
1967 | decapitated
1968 | guillotine
1969 | decapitated
1970 | headless
1971 | microflora
1972 | microorganism
1973 | acceptance
1974 | blessing
1975 | acceptance
1976 | recognition
1977 | unfortunate
1978 | prisoner
1979 | unfortunate
1980 | black
1981 | refuted
1982 | oppose
1983 | refuted
1984 | disprove
1985 | greenly
1986 | discolor
1987 | greenly
1988 | emerald
1989 | importances
1990 | standing
1991 | importances
1992 | deal
1993 | autoimmune
1994 | carrier
1995 | autoimmune
1996 | exempt
1997 | circumnavigations
1998 | travel
1999 | interrelationship
2000 | psychodynamics
2001 | monoatomic
2002 | small
2003 | monoatomic
2004 | thermonuclear
2005 | undefinable
2006 | undefined
2007 | catalogued
2008 | compose
2009 | catalogued
2010 | classify
2011 | heterosexism
2012 | discrimination
2013 | inflicted
2014 | intrude
2015 | preaching
2016 | evangelize
2017 | preaching
2018 | sermonize
2019 | improver
2020 | benefactor
2021 | improver
2022 | attachment
2023 | prudery
2024 | modesty
2025 | combusted
2026 | burn
2027 | swooshing
2028 | sound
2029 | intersected
2030 | meet
2031 | unwaveringly
2032 | hover
2033 | interlingua
2034 | language
2035 | tricolours
2036 | flag
2037 | fictitiously
2038 | unreal
2039 | fictitiously
2040 | counterfeit
2041 | languishing
2042 | weaken
2043 | scampering
2044 | run
2045 | sulfuric
2046 | process
2047 | sulfuric
2048 | sulfide
2049 | trilateral
2050 | reciprocal
2051 | trilateral
2052 | isosceles
2053 | delimitations
2054 | property
2055 | management
2056 | administration
2057 | management
2058 | finance
2059 | microfiche
2060 | microfilm
2061 | medicate
2062 | impregnate
2063 | medicate
2064 | treat
2065 | subgroup
2066 | group
2067 | subgroup
2068 | bench
2069 | normalise
2070 | normalize
2071 | irreligious
2072 | nonobservant
2073 | slanderous
2074 | harmful
2075 | microbiologist
2076 | virologist
2077 | circumvent
2078 | beat
2079 | circumvent
2080 | attack
2081 | revolutionise
2082 | indoctrinate
2083 | revolutionise
2084 | change
2085 | discrete
2086 | separate
2087 | protrusion
2088 | mogul
2089 | protrusion
2090 | shape
2091 | bewitchment
2092 | sorcery
2093 | locality
2094 | scenery
2095 | scornful
2096 | disrespectful
2097 | reburial
2098 | burying
2099 | untracked
2100 | inaccessible
2101 | mutinied
2102 | rebel
2103 | unforeseen
2104 | unexpected
2105 | helical
2106 | coiled
2107 | carbonate
2108 | process
2109 | carbonate
2110 | change
2111 | disturbances
2112 | magnetic
2113 | disturbances
2114 | agitation
2115 | mccarthyism
2116 | witch-hunt
2117 | titillated
2118 | please
2119 | titillated
2120 | itch
2121 | fetishism
2122 | belief
2123 | indifferently
2124 | uninterested
2125 | indifferently
2126 | unconcerned
2127 | rascality
2128 | naughtiness
2129 | parallelize
2130 | put
2131 | fractionate
2132 | separate
2133 | chairmanship
2134 | position
2135 | starkness
2136 | limit
2137 | bellowing
2138 | shout
2139 | destroyers
2140 | annihilator
2141 | destroyers
2142 | warship
2143 | rededicated
2144 | give
2145 | reassuringly
2146 | affirm
2147 | inconvertible
2148 | incommutable
2149 | dissatisfying
2150 | disgruntle
2151 | seeders
2152 | person
2153 | seeders
2154 | mechanical
2155 | prospector
2156 | sourdough
2157 | leadership
2158 | helm
2159 | leadership
2160 | high
2161 | assassinated
2162 | kill
2163 | assassinated
2164 | defame
2165 | considerable
2166 | significant
2167 | guardedly
2168 | shepherd
2169 | guardedly
2170 | patrol
2171 | accessible
2172 | approachable
2173 | accessible
2174 | comprehensible
2175 | interconnectedness
2176 | connection
2177 | autograft
2178 | graft
2179 | antagonize
2180 | annoy
2181 | antagonize
2182 | act
2183 | nerveless
2184 | composed
2185 | distrustful
2186 | cynical
2187 | democratize
2188 | change
2189 | diffidence
2190 | unassertiveness
2191 | heartlessly
2192 | spiritless
2193 | sensualist
2194 | epicure
2195 | concordance
2196 | agreement
2197 | concordance
2198 | order
2199 | promiscuous
2200 | unchaste
2201 | promiscuous
2202 | indiscriminate
2203 | excitedly
2204 | affect
2205 | excitedly
2206 | arouse
2207 | careerism
2208 | practice
2209 | retraced
2210 | return
2211 | internationality
2212 | scope
2213 | advisory
2214 | announcement
2215 | advisory
2216 | informative
2217 | enunciates
2218 | state
2219 | battened
2220 | strengthen
2221 | assistance
2222 | facilitation
2223 | blunders
2224 | transgress
2225 | combust
2226 | blow
2227 | combust
2228 | burn
2229 | excitation
2230 | arousal
2231 | excitation
2232 | exciting
2233 | blackmailed
2234 | extort
2235 | bunking
2236 | cheat
2237 | bunking
2238 | bed
2239 | labourer
2240 | hire
2241 | rectorate
2242 | position
2243 | designed
2244 | intend
2245 | designed
2246 | mental
2247 | behavioural
2248 | action
2249 | behavioural
2250 | propriety
2251 | defrauding
2252 | short-change
2253 | procurators
2254 | bureaucrat
2255 | procurators
2256 | agent
2257 | assistances
2258 | resource
2259 | assistances
2260 | recourse
2261 | unsubdivided
2262 | smooth
2263 | implantations
2264 | placement
2265 | implantations
2266 | procedure
2267 | advancement
2268 | seafaring
2269 | advancement
2270 | encouragement
2271 | translocating
2272 | transfer
2273 | codefendants
2274 | corespondent
2275 | monarchic
2276 | undemocratic
2277 | traitorous
2278 | disloyal
2279 | highlanders
2280 | soldier
2281 | highlanders
2282 | scot
2283 | syntactic
2284 | plan
2285 | reproducible
2286 | duplicable
2287 | monopolist
2288 | person
2289 | comportment
2290 | manner
2291 | roofers
2292 | thatcher
2293 | improving
2294 | relieve
2295 | improving
2296 | reform
2297 | adjustor
2298 | investigator
2299 | dooming
2300 | convict
2301 | preadolescent
2302 | young
2303 | depictive
2304 | representational
2305 | stoical
2306 | unemotional
2307 | dynastic
2308 | ruler
2309 | hinduism
2310 | religion
2311 | pathfinder
2312 | usher
2313 | romanic
2314 | italian
2315 | overlying
2316 | lie
2317 | overlying
2318 | kill
2319 | refinery
2320 | plant
2321 | industrialise
2322 | change
2323 | expressionless
2324 | uncommunicative
2325 | censorship
2326 | military
2327 | censorship
2328 | deletion
2329 | tricolor
2330 | flag
2331 | tricolor
2332 | colored
2333 | interlaces
2334 | hold
2335 | interlaces
2336 | splice
2337 | personifying
2338 | embody
2339 | personifying
2340 | exemplify
2341 | immobilization
2342 | restraint
2343 | immobilization
2344 | preservation
2345 | subsequences
2346 | result
2347 | circumcisions
2348 | rite
2349 | circumcisions
2350 | day
2351 | bibliographies
2352 | list
2353 | unnecessary
2354 | inessential
2355 | rejoinders
2356 | reply
2357 | rejoinders
2358 | pleading
2359 | lavishness
2360 | expensiveness
2361 | acronymic
2362 | form
2363 | incoordination
2364 | unskillfulness
2365 | provisionary
2366 | conditional
2367 | regardless
2368 | heedless
2369 | promotive
2370 | encouraging
2371 | indicted
2372 | charge
2373 | asphaltic
2374 | paving
2375 | asphaltic
2376 | pave
2377 | cowered
2378 | crouch
2379 | cowered
2380 | bend
2381 | mimicked
2382 | imitate
2383 | protestantism
2384 | fundamentalism
2385 | performances
2386 | play
2387 | regained
2388 | locate
2389 | regained
2390 | get
2391 | monoculture
2392 | culture
2393 | emulsifying
2394 | change
2395 | knightly
2396 | past
2397 | knightly
2398 | courteous
2399 | monogenesis
2400 | reproduction
2401 | interlace
2402 | hold
2403 | communistic
2404 | socialist
2405 | communistic
2406 | politician
2407 | extraterrestrials
2408 | animal
2409 | bronchus
2410 | tube
2411 | contraries
2412 | opposition
2413 | amethysts
2414 | crystalline
2415 | travelers
2416 | foreigner
2417 | gathered
2418 | muster
2419 | gathered
2420 | convene
2421 | trusteeship
2422 | position
2423 | trusteeship
2424 | district
2425 | suspiciousness
2426 | distrust
2427 | feminised
2428 | change
2429 | mushroomed
2430 | grow
2431 | mushroomed
2432 | pick
2433 | vindictively
2434 | unforgiving
2435 | vindictively
2436 | malicious
2437 | castled
2438 | move
2439 | castled
2440 | fancy
2441 | glittery
2442 | bright
2443 | lightship
2444 | ship
2445 | eroticism
2446 | arousal
2447 | eroticism
2448 | desire
2449 | caesarism
2450 | autocracy
2451 | sessions
2452 | quarter
2453 | sessions
2454 | sitting
2455 | fashionable
2456 | up-to-date
2457 | teasingly
2458 | torment
2459 | teasingly
2460 | kid
2461 | microwaving
2462 | cook
2463 | transverse
2464 | crosswise
2465 | managership
2466 | position
2467 | methodically
2468 | acting
2469 | methodically
2470 | know-how
2471 | excitements
2472 | fever
2473 | excitements
2474 | intoxication
2475 | christianise
2476 | convert
2477 | monarchical
2478 | undemocratic
2479 | monarchical
2480 | noble
2481 | cooperators
2482 | spouse
2483 | circumscribed
2484 | restrict
2485 | circumscribed
2486 | trace
2487 | topically
2488 | current
2489 | topically
2490 | local
2491 | evangelistic
2492 | enthusiastic
2493 | emotionalism
2494 | emotional
2495 | differences
2496 | differentia
2497 | differences
2498 | variation
2499 | hallucinating
2500 | ill
2501 | hallucinating
2502 | perceive
2503 | liverpools
2504 | england
2505 | deciphering
2506 | read
2507 | stroked
2508 | touch
2509 | motherless
2510 | unparented
2511 | impermanent
2512 | improvised
2513 | contrabands
2514 | merchandise
2515 | astronautical
2516 | spacewalker
2517 | scrutiny
2518 | look
2519 | discolor
2520 | bleach
2521 | receiverships
2522 | proceeding
2523 | rematches
2524 | repeat
2525 | scandalize
2526 | disgust
2527 | condensing
2528 | encapsulate
2529 | scholarship
2530 | letters
2531 | scholarship
2532 | prize
2533 | transmitter
2534 | communicator
2535 | transmitter
2536 | carrier
2537 | autobuses
2538 | school
2539 | dematerialised
2540 | vanish
2541 | predators
2542 | attacker
2543 | predators
2544 | carnivore
2545 | enlarger
2546 | equipment
2547 | repositioned
2548 | down
2549 | repositioned
2550 | reduce
2551 | equivalence
2552 | tie
2553 | localise
2554 | lie
2555 | localise
2556 | situate
2557 | enfolded
2558 | cocoon
2559 | approachable
2560 | comprehensible
2561 | animality
2562 | nature
2563 | interweaved
2564 | braid
2565 | engorge
2566 | eat
2567 | protesters
2568 | picket
2569 | protesters
2570 | nonconformist
2571 | rooters
2572 | enthusiast
2573 | unobjectionable
2574 | dirty
2575 | unobjectionable
2576 | inoffensive
2577 | physically
2578 | material
2579 | physically
2580 | bodily
2581 | unceremonious
2582 | informal
2583 | unceremonious
2584 | discourteous
2585 | postcodes
2586 | code
2587 | autosuggestion
2588 | self-improvement
2589 | circumventing
2590 | beat
2591 | circumventing
2592 | ebb
2593 | homoerotic
2594 | homosexual
2595 | undesirable
2596 | unwelcome
2597 | adaptive
2598 | accommodative
2599 | foresters
2600 | farmer
2601 | winking
2602 | flicker
2603 | trichloride
2604 | chloride
2605 | preconception
2606 | opinion
2607 | preconception
2608 | homophobia
2609 | fringes
2610 | decorate
2611 | fringes
2612 | surround
2613 | shepherded
2614 | guard
2615 | shepherded
2616 | tend
2617 | kingship
2618 | rank
2619 | excretion
2620 | matter
2621 | excretion
2622 | defecation
2623 | inventively
2624 | creative
2625 | inheritor
2626 | heiress
2627 | conspicuousness
2628 | boldness
2629 | preconceptions
2630 | opinion
2631 | preconceptions
2632 | experimenter
2633 | uproariously
2634 | combustion
2635 | uproariously
2636 | noise
2637 | glistens
2638 | spangle
2639 | glistens
2640 | brightness
2641 | sexless
2642 | asexual
2643 | sexless
2644 | unsexy
2645 | spellers
2646 | writer
2647 | spellers
2648 | primer
2649 | orchestrations
2650 | musical
2651 | orchestrations
2652 | arrangement
2653 | embroiderer
2654 | needleworker
2655 | arousal
2656 | desire
2657 | arousal
2658 | inflammation
2659 | extending
2660 | increase
2661 | extending
2662 | range
2663 | enforcements
2664 | imposition
2665 | connectedness
2666 | bridge
2667 | blacken
2668 | discolor
2669 | blacken
2670 | singe
2671 | galvanize
2672 | coat
2673 | galvanize
2674 | shock
2675 | weaponize
2676 | change
2677 | sorrowful
2678 | grievous
2679 | postdated
2680 | follow
2681 | antipsychotics
2682 | clozapine
2683 | convocation
2684 | gathering
2685 | convocation
2686 | assembly
2687 | intensions
2688 | meaning
2689 | employed
2690 | ship
2691 | employed
2692 | give
2693 | susceptible
2694 | impressionable
2695 | susceptible
2696 | allergic
2697 | alarmism
2698 | warning
2699 | canonize
2700 | declare
2701 | canonize
2702 | laud
2703 | imbedding
2704 | nest
2705 | foreigner
2706 | transalpine
2707 | foreigner
2708 | gringo
2709 | anaesthetics
2710 | drug
2711 | disassociates
2712 | separate
2713 | utterance
2714 | communication
2715 | mitigated
2716 | relieve
2717 | mitigated
2718 | apologize
2719 | unconsolidated
2720 | loose
2721 | ukrainians
2722 | slavic
2723 | hypocrisy
2724 | pretense
2725 | refurbishment
2726 | improvement
2727 | intertwining
2728 | raw
2729 | objectify
2730 | change
2731 | crispness
2732 | freshness
2733 | magically
2734 | supernatural
2735 | philanthropy
2736 | aid
2737 | washers
2738 | worker
2739 | washers
2740 | seal
2741 | frowning
2742 | displeased
2743 | frowning
2744 | scowl
2745 | debarred
2746 | prevent
2747 | nonnative
2748 | foreign
2749 | defeatist
2750 | pessimist
2751 | nakedness
2752 | gloom
2753 | nakedness
2754 | undress
2755 | inexpert
2756 | unprofessional
2757 | designs
2758 | plot
2759 | designs
2760 | intend
2761 | implicational
2762 | meaning
2763 | accordance
2764 | giving
2765 | accordance
2766 | agreement
2767 | skateboarders
2768 | skater
2769 | amusements
2770 | delight
2771 | divided
2772 | paragraph
2773 | divided
2774 | calculate
2775 | dissociable
2776 | divisible
2777 | reduced
2778 | abbreviate
2779 | reduced
2780 | spill
2781 | unintelligible
2782 | incomprehensible
2783 | unintelligible
2784 | slurred
2785 | syllable
2786 | word
2787 | governance
2788 | sociable
2789 | governance
2790 | government
2791 | rainless
2792 | dry
2793 | kazakhstani
2794 | asian
2795 | microseconds
2796 | nanosecond
2797 | disgorge
2798 | seed
2799 | discernment
2800 | knowing
2801 | discernment
2802 | discrimination
2803 | employable
2804 | worker
2805 | univocal
2806 | absolute
2807 | disturbing
2808 | affect
2809 | disturbing
2810 | toss
2811 | unicycling
2812 | bicycle
2813 | mildness
2814 | balminess
2815 | mildness
2816 | manner
2817 | civilise
2818 | sophisticate
2819 | civilise
2820 | change
2821 | valorous
2822 | brave
2823 | exporters
2824 | businessperson
2825 | enrollment
2826 | body
2827 | enrollment
2828 | entrance
2829 | preheated
2830 | heat
2831 | scholarships
2832 | aid
2833 | scholarships
2834 | education
2835 | stressor
2836 | agent
2837 | correspondence
2838 | write
2839 | correspondence
2840 | conformity
2841 | interlinking
2842 | connect
2843 | princedoms
2844 | domain
2845 | princedoms
2846 | rank
2847 | extrapolations
2848 | calculation
2849 | extrapolations
2850 | inference
2851 | extraterritorial
2852 | territorial
2853 | whimsically
2854 | arbitrary
2855 | nobelist
2856 | laureate
2857 | resigning
2858 | top
2859 | resigning
2860 | office
2861 | wrathful
2862 | angry
2863 | cowboys
2864 | performer
2865 | cowboys
2866 | ranch
2867 | conjoins
2868 | intermarry
2869 | conjoins
2870 | cross-link
2871 | predominance
2872 | obviousness
2873 | predominance
2874 | dominance
2875 | brandish
2876 | expose
2877 | brandish
2878 | hold
2879 | regionalisms
2880 | policy
2881 | regionalisms
2882 | address
2883 | extrasensory
2884 | clairvoyant
2885 | microcomputers
2886 | computer
2887 | subtropical
2888 | figurative
2889 | subtropical
2890 | equatorial
2891 | unbiased
2892 | impartial
2893 | unbiased
2894 | nonpartisan
2895 | adhesion
2896 | scar
2897 | resistive
2898 | defiant
2899 | homogeneous
2900 | homogenized
2901 | dependence
2902 | addiction
2903 | dependence
2904 | helplessness
2905 | disinvestment
2906 | withdrawal
2907 | containership
2908 | ship
2909 | naivety
2910 | artlessness
2911 | transmuted
2912 | become
2913 | baptistic
2914 | protestant
2915 | purveying
2916 | supply
2917 | secularist
2918 | advocate
2919 | demerit
2920 | mark
2921 | remarriage
2922 | marriage
2923 | interpreted
2924 | reinterpret
2925 | interpreted
2926 | deconstruct
2927 | clericalism
2928 | policy
2929 | irresolution
2930 | doubt
2931 | irresolution
2932 | volatility
2933 | transmutes
2934 | become
2935 | transmutes
2936 | work
2937 | footballers
2938 | player
2939 | excommunicated
2940 | oust
2941 | containers
2942 | cargo
2943 | commutation
2944 | travel
2945 | transponder
2946 | device
2947 | cooperator
2948 | spouse
2949 | buggered
2950 | copulate
2951 | remarkable
2952 | extraordinary
2953 | remarkable
2954 | significant
2955 | suppleness
2956 | gracefulness
2957 | suppleness
2958 | bendability
2959 | purgatory
2960 | situation
2961 | prehistorical
2962 | past
2963 | technology
2964 | aeronautical
2965 | technology
2966 | science
2967 | transfusing
2968 | pour
2969 | transfusing
2970 | lend
2971 | prolapse
2972 | descend
2973 | circularize
2974 | canvass
2975 | circularize
2976 | poll
2977 | greenness
2978 | profusion
2979 | greenness
2980 | ripeness
2981 | formalisms
2982 | philosophic
2983 | formalisms
2984 | imitation
2985 | interpenetrate
2986 | spiritize
2987 | worsens
2988 | inflame
2989 | worsens
2990 | tumble
2991 | pathfinders
2992 | hunt
2993 | demanded
2994 | clamor
2995 | demanded
2996 | cost
2997 | unequivocal
2998 | unambiguous
2999 | unequivocal
3000 | explicit
3001 | intending
3002 | aim
3003 | intending
3004 | plan
3005 | foreclosed
3006 | obstruct
3007 | disturbance
3008 | storm
3009 | disturbance
3010 | agitation
3011 | eldership
3012 | position
3013 | homophony
3014 | pronunciation
3015 | homophony
3016 | music
3017 | contrarily
3018 | brown
3019 | partnerships
3020 | relationship
3021 | partnerships
3022 | copartnership
3023 | removes
3024 | empty
3025 | removes
3026 | out
3027 | skidding
3028 | skid
3029 | sportive
3030 | playful
3031 | nurturance
3032 | care
3033 | microvolts
3034 | potential
3035 | asteroidal
3036 | angular
3037 | asteroidal
3038 | child
3039 | exterminator
3040 | killer
3041 | talkativeness
3042 | communicativeness
3043 | difference
3044 | distinction
3045 | difference
3046 | inflection
3047 | gumption
3048 | fortitude
3049 | competes
3050 | race
3051 | scheduled
3052 | calendar
3053 | scheduled
3054 | program
3055 | undisclosed
3056 | covert
3057 | abstractionist
3058 | nonrepresentational
3059 | abstractionist
3060 | painter
3061 | merchantable
3062 | salable
3063 | exacted
3064 | necessitate
3065 | developments
3066 | advancement
3067 | syphons
3068 | draw
3069 | syphons
3070 | tube
3071 | organismal
3072 | system
3073 | sanctifying
3074 | spiritualize
3075 | cuteness
3076 | beauty
3077 | subserving
3078 | help
3079 | cofactors
3080 | compound
3081 | combusting
3082 | ablaze
3083 | combusting
3084 | change
3085 | shoulders
3086 | thrust
3087 | shoulders
3088 | raise
3089 | consonant
3090 | letter
3091 | auditive
3092 | analyze
3093 | auditive
3094 | learn
3095 | clients
3096 | case
3097 | clients
3098 | guest
3099 | interesting
3100 | fascinate
3101 | interesting
3102 | refer
3103 | exhibited
3104 | flaunt
3105 | exhibited
3106 | possess
3107 | remakes
3108 | recast
3109 | remakes
3110 | creation
3111 | florescence
3112 | growth
3113 | autopilots
3114 | guidance
3115 | autopilots
3116 | unconsciousness
3117 | separationist
3118 | separatist
3119 | falsifier
3120 | deceiver
3121 | manacles
3122 | shackle
3123 | microcircuits
3124 | chip
3125 | purposeless
3126 | worthless
3127 | cofounders
3128 | founder
3129 | specialism
3130 | career
3131 | specialism
3132 | concentration
3133 | apocalyptical
3134 | prophetic
3135 | copilots
3136 | pilot
3137 | reprehensible
3138 | wrong
3139 | abashed
3140 | upset
3141 | unshaped
3142 | unformed
3143 | boastful
3144 | proud
3145 | commingled
3146 | blend
3147 | trioxide
3148 | oxide
3149 | despoil
3150 | destroy
3151 | bachelors
3152 | live
3153 | bachelors
3154 | man
3155 | macroeconomist
3156 | economist
3157 | placidity
3158 | calmness
3159 | placidity
3160 | composure
3161 | affordable
3162 | cheap
3163 | wallpapered
3164 | cover
3165 | friendship
3166 | brotherhood
3167 | practicable
3168 | practical
3169 | practicable
3170 | possible
3171 | householders
3172 | warrior
3173 | blurting
3174 | talk
3175 | confirmable
3176 | empirical
3177 | positioners
3178 | actuator
3179 | inadvertence
3180 | omission
3181 | reassessments
3182 | appraisal
3183 | reclaim
3184 | save
3185 | reclaim
3186 | get
3187 | broadcasters
3188 | disk
3189 | broadcasters
3190 | mechanical
3191 | inclosure
3192 | document
3193 | inclosure
3194 | insertion
3195 | regularize
3196 | decide
3197 | regularize
3198 | arrange
3199 | interlayers
3200 | layer
3201 | disembodied
3202 | rid
3203 | interviewing
3204 | converse
3205 | pledged
3206 | donate
3207 | pledged
3208 | guarantee
3209 | insidiously
3210 | dangerous
3211 | insidiously
3212 | seductive
3213 | spiritualist
3214 | psychic
3215 | microphallus
3216 | penis
3217 | interceptor
3218 | fighter
3219 | surroundings
3220 | touch
3221 | surroundings
3222 | cover
3223 | reviewers
3224 | critic
3225 | reviewers
3226 | writer
3227 | gladness
3228 | happiness
3229 | followed
3230 | tailgate
3231 | disestablishing
3232 | deprive
3233 | dissolved
3234 | state
3235 | dissolved
3236 | integrity
3237 | grassroots
3238 | common
3239 | grassroots
3240 | basic
3241 | slaughterers
3242 | skilled
3243 | corruptive
3244 | evil
3245 | autographic
3246 | picture
3247 | autographic
3248 | written
3249 | predetermine
3250 | prejudice
3251 | predetermine
3252 | determine
3253 | autoregulation
3254 | organic
3255 | incurved
3256 | curved
3257 | knifing
3258 | injure
3259 | immigrating
3260 | inch
3261 | immigrating
3262 | migrate
3263 | noticeable
3264 | broad
3265 | noticeable
3266 | perceptible
3267 | synchronic
3268 | synchronized
3269 | explorers
3270 | person
3271 | circumstances
3272 | possession
3273 | circumstances
3274 | providence
3275 | expounding
3276 | premise
3277 | inharmonious
3278 | incongruous
3279 | disavowed
3280 | deny
3281 | regularise
3282 | even
3283 | regularise
3284 | decide
3285 | possessor
3286 | holder
3287 | consultive
3288 | informative
3289 | distressful
3290 | heavy
3291 | advised
3292 | inform
3293 | advised
3294 | hash
3295 | deposes
3296 | oust
3297 | deposes
3298 | declare
3299 | wordless
3300 | inarticulate
3301 | demoralise
3302 | bastardize
3303 | muscularity
3304 | strength
3305 | muscularity
3306 | condition
3307 | unspecialised
3308 | generalized
3309 | appearances
3310 | manifestation
3311 | disarranged
3312 | randomize
3313 | sniffers
3314 | person
3315 | irritatingly
3316 | worsen
3317 | irritatingly
3318 | fret
3319 | exaction
3320 | demand
3321 | sailings
3322 | travel
3323 | sailings
3324 | swan
3325 | objector
3326 | dissenter
3327 | earmuffs
3328 | covering
3329 | synoptic
3330 | same
3331 | infolding
3332 | organic
3333 | smallish
3334 | small
3335 | digitise
3336 | change
3337 | receptions
3338 | tea
3339 | receptions
3340 | greeting
3341 | corpulence
3342 | fleshiness
3343 | disfigure
3344 | scar
3345 | refurbishments
3346 | improvement
3347 | censorships
3348 | deletion
3349 | censorships
3350 | censoring
3351 | depressor
3352 | nerve
3353 | depressor
3354 | muscle
3355 | grocery
3356 | greengrocery
3357 | fruiterer
3358 | seller
3359 | unionise
3360 | enroll
3361 | unionise
3362 | join
3363 | malevolence
3364 | vindictiveness
3365 | malevolence
3366 | evil
3367 | unprecedented
3368 | new
3369 | reclassifications
3370 | categorization
3371 | embracement
3372 | cuddle
3373 | autoloading
3374 | automatic
3375 | abductor
3376 | muscle
3377 | cliffhanger
3378 | episode
3379 | cliffhanger
3380 | contest
3381 | solemnity
3382 | seriousness
3383 | delimited
3384 | determine
3385 | moralist
3386 | stickler
3387 | insecurities
3388 | insecureness
3389 | insecurities
3390 | anxiety
3391 | repeating
3392 | replicate
3393 | churchs
3394 | perform
3395 | discovery
3396 | rediscovery
3397 | discovery
3398 | disclosure
3399 | submerging
3400 | cover
3401 | submerging
3402 | sink
3403 | literalness
3404 | concreteness
3405 | acknowledgement
3406 | admission
3407 | acknowledgement
3408 | acceptance
3409 | rompers
3410 | garment
3411 | rompers
3412 | person
3413 | enfolding
3414 | cocoon
3415 | enfolding
3416 | change
3417 | antifeminist
3418 | chauvinist
3419 | omniscience
3420 | wisdom
3421 | astonish
3422 | dazzle
3423 | circumpolar
3424 | polar
3425 | ascendence
3426 | predominance
3427 | aerialist
3428 | ropewalker
3429 | precociously
3430 | early
3431 | precociously
3432 | intelligent
3433 | suspenseful
3434 | tense
3435 | banished
3436 | expel
3437 | relocation
3438 | transportation
3439 | relocation
3440 | change
3441 | indexical
3442 | cross-index
3443 | indexical
3444 | supply
3445 | absconding
3446 | flee
3447 | encoded
3448 | code
3449 | shanked
3450 | hit
3451 | hypermarkets
3452 | supermarket
3453 | prejudge
3454 | evaluate
3455 | genuinely
3456 | sincere
3457 | genuinely
3458 | attested
3459 | discoverys
3460 | disclosure
3461 | discoverys
3462 | self-discovery
3463 | palestinians
3464 | arab
3465 | parasitical
3466 | dependent
3467 | interconnect
3468 | intercommunicate
3469 | interconnect
3470 | connect
3471 | nondescripts
3472 | person
3473 | amorphous
3474 | unformed
3475 | amorphous
3476 | inorganic
3477 | uncreative
3478 | sterile
3479 | foreigners
3480 | gringo
3481 | unaffected
3482 | unimpressed
3483 | unaffected
3484 | insensitive
3485 | gravitated
3486 | move
3487 | gravitated
3488 | tend
3489 | respectable
3490 | reputable
3491 | respectable
3492 | worthy
3493 | reproduce
3494 | photocopy
3495 | reproduce
3496 | propagate
3497 | schnauzer
3498 | giant
3499 | stimuli
3500 | stimulation
3501 | astronomical
3502 | large
3503 | microbalance
3504 | balance
3505 | subjoined
3506 | append
3507 | unquestioned
3508 | uncontroversial
3509 | loveless
3510 | unloving
3511 | loveless
3512 | unloved
3513 | postmarks
3514 | marker
3515 | postmarks
3516 | stamp
3517 | presenting
3518 | bring
3519 | presenting
3520 | argue
3521 | fiddled
3522 | embezzle
3523 | fiddled
3524 | slack
3525 | transfused
3526 | breathe
3527 | transfused
3528 | pour
3529 | interchanging
3530 | shift
3531 | interchanging
3532 | trade
3533 | antisubmarine
3534 | defensive
3535 | displeases
3536 | repel
3537 | reproachful
3538 | unfavorable
3539 | independently
3540 | worker
3541 | independently
3542 | individualist
3543 | extrajudicial
3544 | illegal
3545 | exterminated
3546 | destroy
3547 | exterminated
3548 | kill
3549 | intercede
3550 | negociate
3551 | postdates
3552 | follow
3553 | postdates
3554 | chronologize
3555 | comport
3556 | about
3557 | comport
3558 | misbehave
3559 | stockers
3560 | animal
3561 | ceremonious
3562 | formal
3563 | authorship
3564 | initiation
3565 | slacken
3566 | decrease
3567 | slacken
3568 | weaken
3569 | freshen
3570 | regenerate
3571 | freshen
3572 | wash
3573 | observed
3574 | comment
3575 | observed
3576 | discover
3577 | hydrolysed
3578 | change
3579 | enjoins
3580 | instruct
3581 | replacements
3582 | stand-in
3583 | replacements
3584 | supplanting
3585 | bengali
3586 | ethnic
3587 | transsexual
3588 | person
3589 | automates
3590 | change
3591 | amateurish
3592 | unprofessional
3593 | sponsorship
3594 | support
3595 | ejector
3596 | person
3597 | ejector
3598 | mechanism
3599 | rehashing
3600 | recycle
3601 | preassembled
3602 | produce
3603 | fascinate
3604 | matter
3605 | fascinate
3606 | interest
3607 | riskless
3608 | safe
3609 | incombustible
3610 | fireproof
3611 | rareness
3612 | scarcity
3613 | lengthy
3614 | long
3615 | lordship
3616 | authority
3617 | lordship
3618 | title
3619 | grinder
3620 | sandwich
3621 | grinder
3622 | wisdom
3623 | algebraic
3624 | quadratics
3625 | congeniality
3626 | friendliness
3627 | congeniality
3628 | compatibility
3629 | portioned
3630 | distribute
3631 | piquancy
3632 | spiciness
3633 | piquancy
3634 | quality
3635 | commenting
3636 | note
3637 | commenting
3638 | explain
3639 | reprocessing
3640 | reclaim
3641 | roosted
3642 | settle
3643 | roosted
3644 | sit
3645 | immobilizing
3646 | beat
3647 | immobilizing
3648 | withhold
3649 | promised
3650 | declare
3651 | employments
3652 | state
3653 | employments
3654 | populace
3655 | transposable
3656 | exchangeable
3657 | protractors
3658 | drafting
3659 | religiousness
3660 | piety
3661 | religiousness
3662 | conscientiousness
3663 | concerts
3664 | settle
3665 | concerts
3666 | plan
3667 | postholes
3668 | hole
3669 | liveable
3670 | habitable
3671 | besieging
3672 | attack
3673 | besieging
3674 | distress
3675 | irregardless
3676 | look
3677 | irregardless
3678 | prize
3679 | attendance
3680 | frequency
3681 | attendance
3682 | presence
3683 | computer
3684 | expert
3685 | computer
3686 | server
3687 | subtend
3688 | shepherd
3689 | subtend
3690 | suffer
3691 | irrelevance
3692 | inapplicability
3693 | desiccating
3694 | dry
3695 | desiccating
3696 | preserve
3697 | transforming
3698 | transubstantiate
3699 | transforming
3700 | change
3701 | prisoners
3702 | internee
3703 | cosponsors
3704 | sponsor
3705 | unconvincing
3706 | implausible
3707 | unconvincing
3708 | unpersuasive
3709 | analogous
3710 | similar
3711 | preheating
3712 | heat
3713 | irrigate
3714 | hush
3715 | irrigate
3716 | treat
3717 | immortalize
3718 | remind
3719 | immortalize
3720 | change
3721 | unploughed
3722 | untilled
3723 | syntaxes
3724 | system
3725 | syntaxes
3726 | structure
3727 | enforcing
3728 | execute
3729 | enforcing
3730 | compel
3731 | devilish
3732 | playful
3733 | devilish
3734 | evil
3735 | ganging
3736 | group
3737 | dissimulate
3738 | disguise
3739 | enhancement
3740 | improvement
3741 | reconstructs
3742 | construct
3743 | entrench
3744 | fasten
3745 | entrench
3746 | trespass
3747 | spherical
3748 | round
3749 | impolitic
3750 | inexpedient
3751 | londoners
3752 | person
3753 | predetermination
3754 | decision
3755 | destabilization
3756 | change
3757 | reasonable
3758 | moderate
3759 | reasonable
3760 | rational
3761 | pottery
3762 | lusterware
3763 | pottery
3764 | trade
3765 | unenthusiastic
3766 | halfhearted
3767 | virility
3768 | masculinity
3769 | virility
3770 | maleness
3771 | discordance
3772 | dissonance
3773 | discordance
3774 | strife
3775 | plundered
3776 | destroy
3777 | plundered
3778 | steal
3779 | transvestite
3780 | person
3781 | transvestite
3782 | homosexual
3783 | retraction
3784 | motion
3785 | retraction
3786 | withdrawal
3787 | enslaves
3788 | subjugate
3789 | carburettors
3790 | mechanical
3791 | explorer
3792 | diver
3793 | perfectible
3794 | perfect
3795 | stimulates
3796 | prompt
3797 | stimulates
3798 | quicken
3799 | concurrencies
3800 | agreement
3801 | concurrencies
3802 | cooperation
3803 | emulsify
3804 | change
3805 | internationalisms
3806 | scope
3807 | internationalisms
3808 | doctrine
3809 | cylindric
3810 | rounded
3811 | sexually
3812 | sexy
3813 | afghani
3814 | iranian
3815 | animalism
3816 | doctrine
3817 | animalism
3818 | disposition
3819 | latinist
3820 | classicist
3821 | punjabi
3822 | sanskrit
3823 | punjabi
3824 | indian
3825 | exterminate
3826 | destroy
3827 | exterminate
3828 | kill
3829 | consign
3830 | check
3831 | respectively
3832 | individual
3833 | receiving
3834 | fence
3835 | unicycle
3836 | wheel
3837 | unicycle
3838 | bicycle
3839 | incised
3840 | cut
3841 | incised
3842 | compound
3843 | perfective
3844 | future
3845 | perfective
3846 | aspect
3847 | apprenticeship
3848 | position
3849 | reporters
3850 | reporter
3851 | houseful
3852 | containerful
3853 | irreverence
3854 | evil
3855 | unostentatious
3856 | tasteful
3857 | disadvantaged
3858 | underprivileged
3859 | combatted
3860 | wrestle
3861 | snooper
3862 | eavesdropper
3863 | researchers
3864 | fieldworker
3865 | resides
3866 | populate
3867 | individualize
3868 | distinguish
3869 | individualize
3870 | change
3871 | coinsurance
3872 | insurance
3873 | micrometer
3874 | nanometer
3875 | micrometer
3876 | caliper
3877 | postcode
3878 | address
3879 | encrusted
3880 | coat
3881 | encrusted
3882 | decorate
3883 | undissolved
3884 | unmelted
3885 | fastness
3886 | fast
3887 | remainder
3888 | sell
3889 | remainder
3890 | part
3891 | marginality
3892 | position
3893 | unmanned
3894 | faze
3895 | refuels
3896 | fuel
3897 | sidewinder
3898 | rattlesnake
3899 | sidewinder
3900 | missile
3901 | federalize
3902 | unite
3903 | casteless
3904 | unwanted
3905 | animalize
3906 | change
3907 | reproves
3908 | knock
3909 | characters
3910 | being
3911 | characters
3912 | scratch
3913 | conjecture
3914 | hypothesis
3915 | symmetrical
3916 | balanced
3917 | allurement
3918 | temptation
3919 | allurement
3920 | invitation
3921 | binging
3922 | eat
3923 | extractor
3924 | forceps
3925 | photographer
3926 | paparazzo
3927 | perspectives
3928 | eye
3929 | perspectives
3930 | point
3931 | disinflation
3932 | economic
3933 | interplanetary
3934 | international
3935 | interplanetary
3936 | unsettled
3937 | trilogies
3938 | trio
3939 | poisoning
3940 | corrupt
3941 | poisoning
3942 | poison
3943 | bobbers
3944 | float
3945 | portrayer
3946 | painter
3947 | invariable
3948 | hard-and-fast
3949 | invariable
3950 | parameter
3951 | constrict
3952 | astringe
3953 | constrict
3954 | choke
3955 | extraversion
3956 | sociability
3957 | frivolously
3958 | superficial
3959 | remounted
3960 | mount
3961 | remounted
3962 | hop
3963 | reviles
3964 | abuse
3965 | discounters
3966 | mercantile
3967 | confinement
3968 | restraint
3969 | consciousness
3970 | knowing
3971 | consciousness
3972 | self
3973 | believing
3974 | feel
3975 | believing
3976 | believe
3977 | regimental
3978 | control
3979 | regimental
3980 | form
3981 | subdivided
3982 | subdivide
3983 | subdivided
3984 | divide
3985 | sustainable
3986 | continue
3987 | standardise
3988 | gauge
3989 | standardise
3990 | measure
3991 | defrayed
3992 | pay
3993 | devilishly
3994 | cook
3995 | devilishly
3996 | antagonize
3997 | disapproving
3998 | discountenance
3999 | subspaces
4000 | mathematical
4001 | connoting
4002 | imply
4003 | connoting
4004 | express
4005 | inheritance
4006 | transfer
4007 | inheritance
4008 | acquisition
4009 | archery
4010 | sport
4011 | sufficed
4012 | serve
4013 | belligerence
4014 | hostility
4015 | procreating
4016 | brood
4017 | gelatinous
4018 | thick
4019 | villainous
4020 | wicked
4021 | harmony
4022 | congruity
4023 | harmony
4024 | music
4025 | inoffensive
4026 | innocuous
4027 | insurrectionist
4028 | young
4029 | inquisitive
4030 | curious
4031 | inquisitive
4032 | inquiring
4033 | extraterrestrial
4034 | hypothetical
4035 | triclinic
4036 | monoclinic
4037 | murderer
4038 | killer
4039 | concurrency
4040 | agreement
4041 | concurrency
4042 | cooperation
4043 | historically
4044 | real
4045 | nontoxic
4046 | antitoxic
4047 | nontoxic
4048 | edible
4049 | strengthened
4050 | sandbag
4051 | strengthened
4052 | brace
4053 | incontrovertible
4054 | undeniable
4055 | incontrovertible
4056 | incontestable
4057 | rumbled
4058 | sound
4059 | intragroup
4060 | intramural
4061 | exceptionally
4062 | extraordinary
4063 | irredeemable
4064 | wicked
4065 | irredeemable
4066 | inconvertible
4067 | snickering
4068 | laugh
4069 |
--------------------------------------------------------------------------------
/tic.R:
--------------------------------------------------------------------------------
1 | # installs dependencies, runs R CMD check, runs covr::codecov()
2 | do_package_checks(error_on = "error") # only ERROR failures generate errors
3 |
4 | if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) {
5 | # creates pkgdown site and pushes to gh-pages branch
6 | # only for the runner with the "BUILD_PKGDOWN" env var set
7 | do_pkgdown()
8 | }
9 |
--------------------------------------------------------------------------------