├── .Rbuildignore ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ └── issue_template.md └── workflows │ ├── issue.yml │ ├── stale-actions.yml │ └── tic.yml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── fasttext_2R_interface.R └── zzz.R ├── README.md ├── codecov.yml ├── inst ├── COPYRIGHTS ├── LICENSE └── PATENTS ├── src ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── args.cc ├── args.h ├── dictionary.cc ├── dictionary.h ├── fasttext.cc ├── fasttext.h ├── init.c ├── main.cc ├── matrix.cc ├── matrix.h ├── model.cc ├── model.h ├── real.h ├── utils.cc ├── utils.h ├── vector.cc └── vector.h ├── tests ├── testthat.R └── testthat │ ├── test-wrapper_fasttext.R │ └── test_data │ ├── VECS.vec │ ├── doc.txt │ ├── model.bin │ ├── model.vec │ └── queries.txt └── tic.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.ccache$ 2 | ^\.github$ 3 | ^tic\.R$ 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # For more info see: https://docs.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser 2 | 3 | blank_issues_enabled: true 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report or feature request 3 | about: Describe a bug you've encountered or make a case for a new feature 4 | --- 5 | 6 | Please briefly describe your problem and what output you expect. If you have a question, you also have the option of (but I'm flexible if it's not too complicated) 7 | 8 | Please include a minimal reproducible example 9 | 10 | Please give a brief description of the problem 11 | 12 | Please add your Operating System (e.g., Windows10, Macintosh, Linux) and the R version that you use (e.g., 3.6.2) 13 | 14 | If my package uses Python (via 'reticulate') then please add also the Python version (e.g., Python 3.8) and the 'reticulate' version (e.g., 1.18.0) 15 | -------------------------------------------------------------------------------- /.github/workflows/issue.yml: -------------------------------------------------------------------------------- 1 | # For more info see: https://github.com/Renato66/auto-label 2 | # for the 'secrets.GITHUB_TOKEN' see: https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret 3 | 4 | name: Labeling new issue 5 | on: 6 | issues: 7 | types: ['opened'] 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: Renato66/auto-label@v2 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | ignore-comments: true 16 | labels-synonyms: '{"bug":["error","need fix","not working"],"enhancement":["upgrade"],"question":["help"]}' 17 | labels-not-allowed: '["good first issue"]' 18 | default-labels: '["help wanted"]' 19 | -------------------------------------------------------------------------------- /.github/workflows/stale-actions.yml: -------------------------------------------------------------------------------- 1 | # for the 'secrets.GITHUB_TOKEN' see: https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret 2 | 3 | name: "Mark or close stale issues and PRs" 4 | 5 | on: 6 | schedule: 7 | - cron: "00 * * * *" 8 | 9 | jobs: 10 | stale: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/stale@v3 14 | with: 15 | repo-token: ${{ secrets.GITHUB_TOKEN }} 16 | days-before-stale: 12 17 | days-before-close: 7 18 | stale-issue-message: "This is Robo-lampros because the Human-lampros is lazy. This issue has been automatically marked as stale because it has not had recent activity. It will be closed after 7 days if no further activity occurs. Feel free to re-open a closed issue and the Human-lampros will respond." 19 | stale-pr-message: "This is Robo-lampros because the Human-lampros is lazy. This PR has been automatically marked as stale because it has not had recent activity. It will be closed after 7 days if no further activity occurs." 20 | close-issue-message: "This issue was automatically closed because of being stale. Feel free to re-open a closed issue and the Human-lampros will respond." 21 | close-pr-message: "This PR was automatically closed because of being stale." 22 | stale-pr-label: "stale" 23 | stale-issue-label: "stale" 24 | exempt-issue-labels: "bug,enhancement,pinned,security,pending,work_in_progress" 25 | exempt-pr-labels: "bug,enhancement,pinned,security,pending,work_in_progress" 26 | -------------------------------------------------------------------------------- /.github/workflows/tic.yml: -------------------------------------------------------------------------------- 1 | ## tic GitHub Actions template: linux-macos-windows-deploy 2 | ## revision date: 2020-12-11 3 | on: 4 | workflow_dispatch: 5 | push: 6 | pull_request: 7 | # for now, CRON jobs only run on the default branch of the repo (i.e. usually on master) 8 | schedule: 9 | # * is a special character in YAML so you have to quote this string 10 | - cron: "0 4 * * *" 11 | 12 | name: tic 13 | 14 | jobs: 15 | all: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | config: 24 | # use a different tic template type if you do not want to build on all listed platforms 25 | - { os: windows-latest, r: "release" } 26 | - { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" } 27 | - { os: ubuntu-latest, r: "devel" } 28 | - { os: ubuntu-latest, r: "release" } 29 | 30 | env: 31 | # otherwise remotes::fun() errors cause the build to fail. Example: Unavailability of binaries 32 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 33 | CRAN: ${{ matrix.config.cran }} 34 | # make sure to run `tic::use_ghactions_deploy()` to set up deployment 35 | TIC_DEPLOY_KEY: ${{ secrets.TIC_DEPLOY_KEY }} 36 | # prevent rgl issues because no X11 display is available 37 | RGL_USE_NULL: true 38 | # if you use bookdown or blogdown, replace "PKGDOWN" by the respective 39 | # capitalized term. This also might need to be done in tic.R 40 | BUILD_PKGDOWN: ${{ matrix.config.pkgdown }} 41 | # macOS >= 10.15.4 linking 42 | SDKROOT: /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk 43 | # use GITHUB_TOKEN from GitHub to workaround rate limits in {remotes} 44 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 45 | 46 | steps: 47 | - uses: actions/checkout@v2.3.4 48 | 49 | - uses: r-lib/actions/setup-r@master 50 | with: 51 | r-version: ${{ matrix.config.r }} 52 | Ncpus: 4 53 | 54 | # LaTeX. Installation time: 55 | # Linux: ~ 1 min 56 | # macOS: ~ 1 min 30s 57 | # Windows: never finishes 58 | - uses: r-lib/actions/setup-tinytex@master 59 | if: matrix.config.latex == 'true' 60 | 61 | - uses: r-lib/actions/setup-pandoc@master 62 | 63 | # set date/week for use in cache creation 64 | # https://github.community/t5/GitHub-Actions/How-to-set-and-access-a-Workflow-variable/m-p/42970 65 | # - cache R packages daily 66 | - name: "[Cache] Prepare daily timestamp for cache" 67 | if: runner.os != 'Windows' 68 | id: date 69 | run: echo "::set-output name=date::$(date '+%d-%m')" 70 | 71 | - name: "[Cache] Cache R packages" 72 | if: runner.os != 'Windows' 73 | uses: pat-s/always-upload-cache@v2.1.3 74 | with: 75 | path: ${{ env.R_LIBS_USER }} 76 | key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} 77 | restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} 78 | 79 | # for some strange Windows reason this step and the next one need to be decoupled 80 | - name: "[Stage] Prepare" 81 | run: | 82 | Rscript -e "if (!requireNamespace('remotes')) install.packages('remotes', type = 'source')" 83 | Rscript -e "if (getRversion() < '3.2' && !requireNamespace('curl')) install.packages('curl', type = 'source')" 84 | 85 | - name: "[Stage] [Linux] Install curl and libgit2" 86 | if: runner.os == 'Linux' 87 | run: sudo apt install libcurl4-openssl-dev libgit2-dev 88 | 89 | - name: "[Stage] [macOS] Install libgit2" 90 | if: runner.os == 'macOS' 91 | run: brew install libgit2 92 | 93 | - name: "[Stage] [macOS] Install system libs for pkgdown" 94 | if: runner.os == 'macOS' && matrix.config.pkgdown != '' 95 | run: brew install harfbuzz fribidi 96 | 97 | - name: "[Stage] [Linux] Install system libs for pkgdown" 98 | if: runner.os == 'Linux' && matrix.config.pkgdown != '' 99 | run: sudo apt install libharfbuzz-dev libfribidi-dev 100 | 101 | - name: "[Stage] Install" 102 | if: matrix.config.os != 'macOS-latest' || matrix.config.r != 'devel' 103 | run: Rscript -e "remotes::install_github('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()" 104 | 105 | # macOS devel needs its own stage because we need to work with an option to suppress the usage of binaries 106 | - name: "[Stage] Prepare & Install (macOS-devel)" 107 | if: matrix.config.os == 'macOS-latest' && matrix.config.r == 'devel' 108 | run: | 109 | echo -e 'options(Ncpus = 4, pkgType = "source", repos = structure(c(CRAN = "https://cloud.r-project.org/")))' > $HOME/.Rprofile 110 | Rscript -e "remotes::install_github('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()" 111 | 112 | - name: "[Stage] Script" 113 | run: Rscript -e 'tic::script()' 114 | 115 | - name: "[Stage] After Success" 116 | if: matrix.config.os == 'macOS-latest' && matrix.config.r == 'release' 117 | run: Rscript -e "tic::after_success()" 118 | 119 | - name: "[Stage] Upload R CMD check artifacts" 120 | if: failure() 121 | uses: actions/upload-artifact@v2.2.1 122 | with: 123 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 124 | path: check 125 | - name: "[Stage] Before Deploy" 126 | run: | 127 | Rscript -e "tic::before_deploy()" 128 | 129 | - name: "[Stage] Deploy" 130 | run: Rscript -e "tic::deploy()" 131 | 132 | - name: "[Stage] After Deploy" 133 | run: Rscript -e "tic::after_deploy()" 134 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | docs/ 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: fastTextR 2 | Type: Package 3 | Title: Efficient learning of word representations 4 | Version: 1.0.2 5 | Date: 2019-04-11 6 | Author: Lampros Mouselimis 7 | Maintainer: Lampros Mouselimis 8 | BugReports: https://github.com/mlampros/fastTextR/issues 9 | URL: https://github.com/mlampros/fastTextR 10 | Description: The fastTextR package is a wrapper (only) for the skipgram and cbow functions of the fastText (https://github.com/facebookresearch/fastText) library. fastText is a library for efficient learning of word representations and sentence classification. Since it uses C++11 features, it requires a compiler with good C++11 support. These include : (gcc-4.6.3 or newer) or (clang-3.3 or newer). More information about the fastText library can be found in https://github.com/facebookresearch/fastText. 11 | License: GPL-3 + (COPYRIGHTS, LICENSE, PATENTS) 12 | Copyright: inst/COPYRIGHTS 13 | LazyData: TRUE 14 | Depends: 15 | R(>= 3.2.3) 16 | Imports: 17 | Rcpp (>= 0.12.5) 18 | LinkingTo: Rcpp, RcppArmadillo (>= 0.7.2) 19 | RoxygenNote: 6.1.0 20 | Suggests: testthat, covr 21 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(fastTextR, .registration = TRUE) 2 | importFrom(Rcpp, evalCpp) 3 | 4 | export(predict_unknown_words) 5 | export(skipgram_cbow) 6 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | ## fastTextR 1.0.2 3 | 4 | - I update the README.md file with installation instructions 5 | 6 | 7 | ## fastTextR 1.0.1 8 | 9 | - I added the [probot](https://github.com/probot/stale) 10 | 11 | 12 | ## fastTextR 1.0.0 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | convert_args_to_pointers <- function(string_commands, INPUT, OUTPUT) { 5 | invisible(.Call(`_fastTextR_convert_args_to_pointers`, string_commands, INPUT, OUTPUT)) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/fasttext_2R_interface.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' wrapper for the skipgram and cbow methods of the fasttext text classifier 4 | #' 5 | #' 6 | #' @param input_path a character string specifying the path to the train text file 7 | #' @param output_path a character string specifying the path to the output-file 8 | #' @param method a string specifying the method. One of \emph{skipgram}, \emph{cbow} 9 | #' @param lr a float number specifying the learning rate [default is 0.1] 10 | #' @param lrUpdateRate a number specifying the rate of updates for the learning rate [default is 100] 11 | #' @param dim a number specifying the size of the word vectors [default is 100] 12 | #' @param ws a number specifying the size of the context window [default is 5] 13 | #' @param epoch a value specifying the number of epochs [default is 5] 14 | #' @param minCount a number specifying the minimal number of word occurences [default is 1] 15 | #' @param neg a value specifying the number of negatives sampled [default is 5] 16 | #' @param wordNgrams a number specifying the max length of word ngram [default is 1] 17 | #' @param loss a character string specifying the loss function. One of \emph{ns (negative sampling)}, \emph{hs (hierarchical softmax)}, \emph{softmax} [default is 'ns'] 18 | #' @param bucket a value specifying the number of buckets [default is 2000000] 19 | #' @param minn a number specifying the min length of char ngram [default is 0] 20 | #' @param maxn a number specifying the max length of char ngram [default is 0] 21 | #' @param thread a value specifying the of threads [default is 6] 22 | #' @param t a float number specifying the sampling threshold [default is 0.0001] 23 | #' @param verbose a number (between 0 and 2) specifying the verbosity level [default is 2] 24 | #' @return a character string specifying the location of the saved data and the number of the word vectors 25 | #' @export 26 | #' @details 27 | #' the function will save a model.bin and the word vectors to a pre-specified path (\emph{output_path}) 28 | #' @references 29 | #' https://github.com/facebookresearch/fastText 30 | #' 31 | #' https://arxiv.org/abs/1607.04606 32 | #' 33 | #' https://arxiv.org/abs/1607.01759 34 | #' @examples 35 | #' 36 | #' # library(fastTextR) 37 | #' 38 | #' # res = skipgram_cbow(input_path = "/data_fasttext/out_test_file.txt", 39 | #' 40 | #' # output_path = "/data_fasttext/model", method = "skipgram") 41 | 42 | 43 | skipgram_cbow = function(input_path = NULL, output_path = NULL, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 100, 44 | 45 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 46 | 47 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) { 48 | 49 | try_err_files = inherits(tryCatch(normalizePath(input_path, mustWork = T), error = function(e) e), "error") 50 | if (!is.character(input_path) || try_err_files) stop("the input_path parameter should be a valid character string path") 51 | if (is.null(output_path)) stop("the output_path parameter should be a non-NULL valid character string path") 52 | if (.Platform$OS.type == 'unix') { 53 | first = strsplit(output_path, "/")[[1]] 54 | second = first[-length(first)] 55 | res = paste(second, collapse = "/") 56 | try_err_files_out = inherits(tryCatch(normalizePath(res, mustWork = T), error = function(e) e), "error") 57 | } 58 | if (.Platform$OS.type == 'windows') { 59 | first = strsplit(output_path, "\\\\|/")[[1]] 60 | second = first[-length(first)] 61 | res = paste(second, collapse = "\\") 62 | try_err_files_out = inherits(tryCatch(normalizePath(res, mustWork = T), error = function(e) e), "error") 63 | } 64 | if (!is.character(output_path) || try_err_files_out) stop("the output_path parameter should be a valid character string path") 65 | type_bin = strsplit(first[length(first)], '[.]')[[1]] 66 | if (length(type_bin) > 1) stop("the output_path parameter should be a valid path with no file extensions. Example path: ../model") 67 | if (!method %in% c('skipgram', 'cbow')) stop("the method parameter should be either 'skipgram' or 'cbow'") 68 | params = c(lr, t, lrUpdateRate, dim, ws, epoch, minCount, neg, wordNgrams, bucket) 69 | params_nams = c('lr', 't', 'lrUpdateRate', 'dim', 'ws', 'epoch', 'minCount', 'neg', 'wordNgrams', 'bucket') 70 | flag_params = sum(params <= 0) 71 | if (flag_params > 0) { 72 | idx = which(params <= 0)[1] 73 | stop(paste0("the ", paste0(params_nams[idx], " parameter should be a number greater than 0.0")))} 74 | if (minn < 0) stop("the minn parameter should be greater than 0") 75 | if (maxn < 0) stop("the maxn parameter should be greater than 0") 76 | if (verbose < 0 || verbose > 2) stop("the verbose parameter should be a number between 0 and 2") 77 | if (thread < 1) stop("the thread parameter should be greater than 0") 78 | if (!loss %in% c('ns', 'hs', 'softmax')) stop("the loss parameter should be one of 'ns', 'hs', 'softmax'") 79 | 80 | if (verbose > 0) { start = Sys.time() } 81 | 82 | default_args = c("fasttext", method, "-input", as.character(input_path), "-output", as.character(output_path), "-lr", as.character(lr), 83 | 84 | "-lrUpdateRate", as.character(lrUpdateRate), "-dim", as.character(dim), "-ws", as.character(ws), "-epoch", as.character(epoch), 85 | 86 | "-minCount", as.character(minCount), "-neg", as.character(neg), "-wordNgrams", 87 | 88 | as.character(wordNgrams), "-loss", loss, "-bucket", as.character(bucket), "-minn", as.character(minn), "-maxn", 89 | 90 | as.character(maxn), "-thread", as.character(thread), "-t", as.character(t), "-verbose", as.character(verbose)) 91 | 92 | convert_args_to_pointers(default_args, "", "") 93 | 94 | if (verbose > 0) { 95 | 96 | end = Sys.time() 97 | 98 | t = end - start 99 | 100 | cat('\n'); cat('time to complete :', t, attributes(t)$units, '\n'); cat('\n'); 101 | } 102 | 103 | return(structure(list(file_location = paste0("the output is saved in: ", output_path), num_vectors = dim), 104 | 105 | class = 'fasttextR')) 106 | } 107 | 108 | 109 | 110 | #' predict unknown words for the skipgram and cbow models 111 | #' 112 | #' 113 | #' @param skipgram_cbow_model_output the output.bin of the \emph{skipgram_cbow} function 114 | #' @param unknown_words_path a character string specifying the path to the unknown words file 115 | #' @param output_path a character string specifying the path to the output file 116 | #' @param verbose either TRUE or FALSE. If TRUE information will be printed out 117 | #' @return a character string specifying the location of the saved data 118 | #' @export 119 | #' @details 120 | #' the function will save to a pre-specified path (\emph{output_path}) the numeric vectors for the unknown words. In case of non-matches it returns zero-valued vectors 121 | #' @references 122 | #' https://github.com/facebookresearch/fastText 123 | #' 124 | #' https://arxiv.org/abs/1607.04606 125 | #' 126 | #' https://arxiv.org/abs/1607.01759 127 | #' @examples 128 | #' 129 | #' # library(fastTextR) 130 | #' 131 | #' # res = predict_unknown_words(skipgram_cbow_model_output = "/data_fasttext/model.bin", 132 | #' 133 | #' # unknown_words_path = "/data_fasttext/queries.txt", 134 | #' 135 | #' # output_path = "/data_fasttext/NEW_VEC") 136 | 137 | 138 | predict_unknown_words = function(skipgram_cbow_model_output = NULL, unknown_words_path = NULL, output_path = NULL, verbose = FALSE) { 139 | 140 | try_err_files = inherits(tryCatch(normalizePath(skipgram_cbow_model_output, mustWork = T), error = function(e) e), "error") 141 | if (!is.character(skipgram_cbow_model_output) || try_err_files) stop("the skipgram_cbow_model_output parameter should be a valid character string path") 142 | try_err_unknown = inherits(tryCatch(normalizePath(unknown_words_path, mustWork = T), error = function(e) e), "error") 143 | if (!is.character(unknown_words_path) || try_err_unknown) stop("the unknown_words_path parameter should be a valid character string path") 144 | if (is.null(output_path)) stop("the output_path parameter should be a non-NULL character string path") 145 | if (!is.null(output_path)) { 146 | if (!is.character(output_path)) { 147 | stop("the output_path parameter should be a valid character string path")}} 148 | if (!is.logical(verbose)) stop("the verbose parameter should be either TRUE or FALSE") 149 | 150 | if (verbose) { start = Sys.time() } 151 | 152 | default_args = c("fasttext", "predict_skipgram_cbow", skipgram_cbow_model_output) 153 | 154 | convert_args_to_pointers(default_args, unknown_words_path, output_path) 155 | 156 | if (verbose) { 157 | 158 | end = Sys.time() 159 | 160 | t = end - start 161 | 162 | cat('\n'); cat('time to complete :', t, attributes(t)$units, '\n'); cat('\n'); 163 | } 164 | 165 | return(paste0("the output is saved in: ", output_path)) 166 | } 167 | 168 | 169 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # temporary startup message to inform users about the new version of 'fastText' [ SEE : http://r-pkgs.had.co.nz/r.html#r-differences ] 2 | 3 | .onAttach <- function(libname, pkgname) { 4 | 5 | packageStartupMessage("UPDATE 11-04-2019: There is an updated version of the fastText R package which includes all the features of the ported fasttext library. This R package is archived. Please visit 'https://github.com/mlampros/fastText' to install the most updated version!") 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![tic](https://github.com/mlampros/fastTextR/workflows/tic/badge.svg?branch=master)](https://github.com/mlampros/fastTextR/actions) 3 | [![codecov.io](https://codecov.io/github/mlampros/fastTextR/coverage.svg?branch=master)](https://codecov.io/github/mlampros/fastTextR?branch=master) 4 | Buy Me A Coffee 5 | 6 | 7 | 8 | ## fastTextR 9 |
10 | 11 | **UPDATE 11-04-2019**: There is an [updated version of the fastText R package](https://github.com/mlampros/fastText) which includes all the features of the ported [fasttext library](https://github.com/facebookresearch/fastText). Therefore this repository **is archived**. 12 | 13 |
14 | 15 | The fastTextR package is an R wrapper (only) for the *skipgram* and *cbow* functions of the [*fastText*](https://github.com/facebookresearch/fastText) library. *fastText* is a library for efficient learning of word representations and sentence classification. Since it uses C++11 features, it requires a compiler with good C++11 support. These include : (gcc-4.6.3 or newer) or (clang-3.3 or newer). More information about the fastText library can be found in [https://github.com/facebookresearch/fastText](https://github.com/facebookresearch/fastText). COPYRIGHTS, LICENSE and PATENTS files can be found in the *inst* folder of the R package. 16 | 17 | A detailed example can be found in my [blog-post](http://mlampros.github.io/2017/01/05/textTinyR_package/) about text processing, in section 'word vectors'. 18 | 19 |
20 | 21 | To install the package from Github you can **either** use the *install_github* function of the devtools package, 22 |

23 | 24 | ```R 25 | 26 | devtools::install_github('mlampros/fastTextR') 27 | 28 | 29 | ``` 30 |
31 | 32 | **or** directly download the fastTextR-zip file using the **Clone or download** button in the [repository page](https://github.com/mlampros/fastTextR), extract it locally (rename it to *fastTextR* if necessary and check that files such as DESCRIPTION, NAMESPACE etc. are present when you open the fastTextR folder) and then run, 33 | 34 |
35 | 36 | ```R 37 | 38 | #------------- 39 | # on a Unix OS 40 | #------------- 41 | 42 | setwd('/your_folder/fastTextR/') 43 | Rcpp::compileAttributes(verbose = TRUE) 44 | setwd('/your_folder/') 45 | system("R CMD build fastTextR") 46 | system("R CMD INSTALL fastTextR_1.0.2.tar.gz") 47 | 48 | 49 | #------------------ 50 | # on the Windows OS 51 | #------------------ 52 | 53 | setwd('C:/your_folder/fastTextR/') 54 | Rcpp::compileAttributes(verbose = TRUE) 55 | setwd('C:/your_folder/') 56 | system("R CMD build fastTextR") 57 | system("R CMD INSTALL fastTextR_1.0.2.tar.gz") 58 | 59 | ``` 60 |
61 | 62 | Use the following link to report bugs/issues (for the R wrapper), 63 |

64 | 65 | [https://github.com/mlampros/fastTextR/issues](https://github.com/mlampros/fastTextR/issues) 66 | 67 | 68 |
69 | 70 | #### **Example usage** 71 | 72 | 73 |
74 | 75 | ```R 76 | 77 | 78 | # example input data ---> 'dat.txt' 79 | 80 | 81 | 82 | library(fastTextR) 83 | 84 | 85 | 86 | #-------------------------- 87 | # skipgram or cbow methods 88 | #-------------------------- 89 | 90 | 91 | res = skipgram_cbow(input_path = "/data_fasttext/dat.txt", 92 | 93 | output_path = "/data_fasttext/model", 94 | 95 | method = "skipgram", lr = 0.1, 96 | 97 | lrUpdateRate = 100, dim = 100, 98 | 99 | ws = 5, epoch = 5, minCount = 1, 100 | 101 | neg = 5, wordNgrams = 1, loss = "ns", 102 | 103 | bucket = 2000000, minn = 0, 104 | 105 | maxn = 0, thread = 6, t = 0.0001, 106 | 107 | verbose = 2) 108 | 109 | 110 | 111 | #------------------------------------------------------------- 112 | # prediction of unknown words for the skipgram and cbow models 113 | #------------------------------------------------------------- 114 | 115 | 116 | res = predict_unknown_words(skipgram_cbow_model_output = "/data_fasttext/model.bin", 117 | 118 | unknown_words_path = "/data_fasttext/queries.txt", 119 | 120 | output_path = "/data_fasttext/NEW_VEC", 121 | 122 | verbose = TRUE) 123 | 124 | ``` 125 | 126 |
127 | 128 | More information about the parameters of each function can be found in the package documentation. 129 | 130 | 131 |
132 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | 2 | 3 | ============================================================================================== 4 | The fastTextR package is a wrapper for the skipgram and cbow functions of the fastText library 5 | ============================================================================================== 6 | 7 | 8 | Copyright (c) 2016-present, Facebook, Inc. 9 | All rights reserved. 10 | 11 | This source code is licensed under the BSD-style license found in the 12 | LICENSE file in the root directory of this source tree. An additional grant 13 | of patent rights can be found in the PATENTS file in the same directory. 14 | 15 | 16 | -------------------------------------------------------------------------------- /inst/LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For fastText software 4 | 5 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /inst/PATENTS: -------------------------------------------------------------------------------- 1 | Additional Grant of Patent Rights Version 2 2 | 3 | "Software" means the fastText software distributed by Facebook, Inc. 4 | 5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software 6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable 7 | (subject to the termination provision below) license under any Necessary 8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise 9 | transfer the Software. For avoidance of doubt, no license is granted under 10 | Facebook’s rights in any patent claims that are infringed by (i) modifications 11 | to the Software made by you or any third party or (ii) the Software in 12 | combination with any software or other technology. 13 | 14 | The license granted hereunder will terminate, automatically and without notice, 15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate 16 | directly or indirectly, or take a direct financial interest in, any Patent 17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate 18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or 19 | in part from any software, technology, product or service of Facebook or any of 20 | its subsidiaries or corporate affiliates, or (iii) against any party relating 21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its 22 | subsidiaries or corporate affiliates files a lawsuit alleging patent 23 | infringement against you in the first instance, and you respond by filing a 24 | patent infringement counterclaim in that lawsuit against that party that is 25 | unrelated to the Software, the license granted hereunder will not terminate 26 | under section (i) of this paragraph due to such counterclaim. 27 | 28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is 29 | necessarily infringed by the Software standing alone. 30 | 31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, 32 | or contributory infringement or inducement to infringe any patent, including a 33 | cross-claim or counterclaim. 34 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) 3 | CXX_STD = CXX11 4 | PKG_CPPFLAGS = -I../inst/include/ 5 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) -mthreads 3 | CXX_STD = CXX11 4 | PKG_CPPFLAGS = -I../inst/include/ 5 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | // convert_args_to_pointers 10 | void convert_args_to_pointers(std::vector string_commands, std::string INPUT, std::string OUTPUT); 11 | RcppExport SEXP _fastTextR_convert_args_to_pointers(SEXP string_commandsSEXP, SEXP INPUTSEXP, SEXP OUTPUTSEXP) { 12 | BEGIN_RCPP 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< std::vector >::type string_commands(string_commandsSEXP); 15 | Rcpp::traits::input_parameter< std::string >::type INPUT(INPUTSEXP); 16 | Rcpp::traits::input_parameter< std::string >::type OUTPUT(OUTPUTSEXP); 17 | convert_args_to_pointers(string_commands, INPUT, OUTPUT); 18 | return R_NilValue; 19 | END_RCPP 20 | } 21 | -------------------------------------------------------------------------------- /src/args.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "args.h" 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | namespace fasttext { 23 | 24 | Args::Args() { 25 | lr = 0.05; 26 | dim = 100; 27 | ws = 5; 28 | epoch = 5; 29 | minCount = 5; 30 | neg = 5; 31 | wordNgrams = 1; 32 | loss = loss_name::ns; 33 | model = model_name::sg; 34 | bucket = 2000000; 35 | minn = 3; 36 | maxn = 6; 37 | thread = 12; 38 | lrUpdateRate = 100; 39 | t = 1e-4; 40 | label = "__label__"; 41 | verbose = 2; 42 | pretrainedVectors = ""; 43 | } 44 | 45 | void Args::parseArgs(int argc, char** argv) { 46 | std::string command(argv[1]); 47 | if (command == "supervised") { 48 | model = model_name::sup; 49 | loss = loss_name::softmax; 50 | minCount = 1; 51 | minn = 0; 52 | maxn = 0; 53 | lr = 0.1; 54 | } else if (command == "cbow") { 55 | model = model_name::cbow; 56 | } 57 | int ai = 2; 58 | while (ai < argc) { 59 | if (argv[ai][0] != '-') { 60 | std::cout << "Provided argument without a dash! Usage:" << std::endl; 61 | printHelp(); 62 | exit(EXIT_FAILURE); 63 | } 64 | if (strcmp(argv[ai], "-h") == 0) { 65 | std::cout << "Here is the help! Usage:" << std::endl; 66 | printHelp(); 67 | exit(EXIT_FAILURE); 68 | } else if (strcmp(argv[ai], "-input") == 0) { 69 | input = std::string(argv[ai + 1]); 70 | } else if (strcmp(argv[ai], "-test") == 0) { 71 | test = std::string(argv[ai + 1]); 72 | } else if (strcmp(argv[ai], "-output") == 0) { 73 | output = std::string(argv[ai + 1]); 74 | } else if (strcmp(argv[ai], "-lr") == 0) { 75 | lr = atof(argv[ai + 1]); 76 | } else if (strcmp(argv[ai], "-lrUpdateRate") == 0) { 77 | lrUpdateRate = atoi(argv[ai + 1]); 78 | } else if (strcmp(argv[ai], "-dim") == 0) { 79 | dim = atoi(argv[ai + 1]); 80 | } else if (strcmp(argv[ai], "-ws") == 0) { 81 | ws = atoi(argv[ai + 1]); 82 | } else if (strcmp(argv[ai], "-epoch") == 0) { 83 | epoch = atoi(argv[ai + 1]); 84 | } else if (strcmp(argv[ai], "-minCount") == 0) { 85 | minCount = atoi(argv[ai + 1]); 86 | } else if (strcmp(argv[ai], "-neg") == 0) { 87 | neg = atoi(argv[ai + 1]); 88 | } else if (strcmp(argv[ai], "-wordNgrams") == 0) { 89 | wordNgrams = atoi(argv[ai + 1]); 90 | } else if (strcmp(argv[ai], "-loss") == 0) { 91 | if (strcmp(argv[ai + 1], "hs") == 0) { 92 | loss = loss_name::hs; 93 | } else if (strcmp(argv[ai + 1], "ns") == 0) { 94 | loss = loss_name::ns; 95 | } else if (strcmp(argv[ai + 1], "softmax") == 0) { 96 | loss = loss_name::softmax; 97 | } else { 98 | std::cout << "Unknown loss: " << argv[ai + 1] << std::endl; 99 | printHelp(); 100 | exit(EXIT_FAILURE); 101 | } 102 | } else if (strcmp(argv[ai], "-bucket") == 0) { 103 | bucket = atoi(argv[ai + 1]); 104 | } else if (strcmp(argv[ai], "-minn") == 0) { 105 | minn = atoi(argv[ai + 1]); 106 | } else if (strcmp(argv[ai], "-maxn") == 0) { 107 | maxn = atoi(argv[ai + 1]); 108 | } else if (strcmp(argv[ai], "-thread") == 0) { 109 | thread = atoi(argv[ai + 1]); 110 | } else if (strcmp(argv[ai], "-t") == 0) { 111 | t = atof(argv[ai + 1]); 112 | } else if (strcmp(argv[ai], "-label") == 0) { 113 | label = std::string(argv[ai + 1]); 114 | } else if (strcmp(argv[ai], "-verbose") == 0) { 115 | verbose = atoi(argv[ai + 1]); 116 | } else if (strcmp(argv[ai], "-pretrainedVectors") == 0) { 117 | pretrainedVectors = std::string(argv[ai + 1]); 118 | } else { 119 | std::cout << "Unknown argument: " << argv[ai] << std::endl; 120 | printHelp(); 121 | exit(EXIT_FAILURE); 122 | } 123 | ai += 2; 124 | } 125 | if (input.empty() || output.empty()) { 126 | std::cout << "Empty input or output path." << std::endl; 127 | printHelp(); 128 | exit(EXIT_FAILURE); 129 | } 130 | if (wordNgrams <= 1 && maxn == 0) { 131 | bucket = 0; 132 | } 133 | } 134 | 135 | void Args::printHelp() { 136 | std::string lname = "ns"; 137 | if (loss == loss_name::hs) lname = "hs"; 138 | if (loss == loss_name::softmax) lname = "softmax"; 139 | std::cout 140 | << "\n" 141 | << "The following arguments are mandatory:\n" 142 | << " -input training file path\n" 143 | << " -output output file path\n\n" 144 | << "The following arguments are optional:\n" 145 | << " -lr learning rate [" << lr << "]\n" 146 | << " -lrUpdateRate change the rate of updates for the learning rate [" << lrUpdateRate << "]\n" 147 | << " -dim size of word vectors [" << dim << "]\n" 148 | << " -ws size of the context window [" << ws << "]\n" 149 | << " -epoch number of epochs [" << epoch << "]\n" 150 | << " -minCount minimal number of word occurences [" << minCount << "]\n" 151 | << " -neg number of negatives sampled [" << neg << "]\n" 152 | << " -wordNgrams max length of word ngram [" << wordNgrams << "]\n" 153 | << " -loss loss function {ns, hs, softmax} [ns]\n" 154 | << " -bucket number of buckets [" << bucket << "]\n" 155 | << " -minn min length of char ngram [" << minn << "]\n" 156 | << " -maxn max length of char ngram [" << maxn << "]\n" 157 | << " -thread number of threads [" << thread << "]\n" 158 | << " -t sampling threshold [" << t << "]\n" 159 | << " -label labels prefix [" << label << "]\n" 160 | << " -verbose verbosity level [" << verbose << "]\n" 161 | << " -pretrainedVectors pretrained word vectors for supervised learning []" 162 | << std::endl; 163 | } 164 | 165 | void Args::save(std::ostream& out) { 166 | out.write((char*) &(dim), sizeof(int)); 167 | out.write((char*) &(ws), sizeof(int)); 168 | out.write((char*) &(epoch), sizeof(int)); 169 | out.write((char*) &(minCount), sizeof(int)); 170 | out.write((char*) &(neg), sizeof(int)); 171 | out.write((char*) &(wordNgrams), sizeof(int)); 172 | out.write((char*) &(loss), sizeof(loss_name)); 173 | out.write((char*) &(model), sizeof(model_name)); 174 | out.write((char*) &(bucket), sizeof(int)); 175 | out.write((char*) &(minn), sizeof(int)); 176 | out.write((char*) &(maxn), sizeof(int)); 177 | out.write((char*) &(lrUpdateRate), sizeof(int)); 178 | out.write((char*) &(t), sizeof(double)); 179 | } 180 | 181 | void Args::load(std::istream& in) { 182 | in.read((char*) &(dim), sizeof(int)); 183 | in.read((char*) &(ws), sizeof(int)); 184 | in.read((char*) &(epoch), sizeof(int)); 185 | in.read((char*) &(minCount), sizeof(int)); 186 | in.read((char*) &(neg), sizeof(int)); 187 | in.read((char*) &(wordNgrams), sizeof(int)); 188 | in.read((char*) &(loss), sizeof(loss_name)); 189 | in.read((char*) &(model), sizeof(model_name)); 190 | in.read((char*) &(bucket), sizeof(int)); 191 | in.read((char*) &(minn), sizeof(int)); 192 | in.read((char*) &(maxn), sizeof(int)); 193 | in.read((char*) &(lrUpdateRate), sizeof(int)); 194 | in.read((char*) &(t), sizeof(double)); 195 | } 196 | 197 | } 198 | -------------------------------------------------------------------------------- /src/args.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_ARGS_H 11 | #define FASTTEXT_ARGS_H 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | namespace fasttext { 18 | 19 | enum class model_name : int {cbow=1, sg, sup}; 20 | enum class loss_name : int {hs=1, ns, softmax}; 21 | 22 | class Args { 23 | public: 24 | Args(); 25 | std::string input; 26 | std::string test; 27 | std::string output; 28 | double lr; 29 | int lrUpdateRate; 30 | int dim; 31 | int ws; 32 | int epoch; 33 | int minCount; 34 | int neg; 35 | int wordNgrams; 36 | loss_name loss; 37 | model_name model; 38 | int bucket; 39 | int minn; 40 | int maxn; 41 | int thread; 42 | double t; 43 | std::string label; 44 | int verbose; 45 | std::string pretrainedVectors; 46 | 47 | void parseArgs(int, char**); 48 | void printHelp(); 49 | void save(std::ostream&); 50 | void load(std::istream&); 51 | }; 52 | 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/dictionary.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "dictionary.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | namespace fasttext { 26 | 27 | const std::string Dictionary::EOS = ""; 28 | const std::string Dictionary::BOW = "<"; 29 | const std::string Dictionary::EOW = ">"; 30 | 31 | Dictionary::Dictionary(std::shared_ptr args) { 32 | args_ = args; 33 | size_ = 0; 34 | nwords_ = 0; 35 | nlabels_ = 0; 36 | ntokens_ = 0; 37 | word2int_.resize(MAX_VOCAB_SIZE); 38 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) { 39 | word2int_[i] = -1; 40 | } 41 | } 42 | 43 | int32_t Dictionary::find(const std::string& w) const { 44 | int32_t h = hash(w) % MAX_VOCAB_SIZE; 45 | while (word2int_[h] != -1 && words_[word2int_[h]].word != w) { 46 | h = (h + 1) % MAX_VOCAB_SIZE; 47 | } 48 | return h; 49 | } 50 | 51 | void Dictionary::add(const std::string& w) { 52 | int32_t h = find(w); 53 | ntokens_++; 54 | if (word2int_[h] == -1) { 55 | entry e; 56 | e.word = w; 57 | e.count = 1; 58 | e.type = (w.find(args_->label) == 0) ? entry_type::label : entry_type::word; 59 | words_.push_back(e); 60 | word2int_[h] = size_++; 61 | } else { 62 | words_[word2int_[h]].count++; 63 | } 64 | } 65 | 66 | int32_t Dictionary::nwords() const { 67 | return nwords_; 68 | } 69 | 70 | int32_t Dictionary::nlabels() const { 71 | return nlabels_; 72 | } 73 | 74 | int64_t Dictionary::ntokens() const { 75 | return ntokens_; 76 | } 77 | 78 | const std::vector& Dictionary::getNgrams(int32_t i) const { 79 | assert(i >= 0); 80 | assert(i < nwords_); 81 | return words_[i].subwords; 82 | } 83 | 84 | const std::vector Dictionary::getNgrams(const std::string& word) const { 85 | int32_t i = getId(word); 86 | if (i >= 0) { 87 | return getNgrams(i); 88 | } 89 | std::vector ngrams; 90 | computeNgrams(BOW + word + EOW, ngrams); 91 | return ngrams; 92 | } 93 | 94 | bool Dictionary::discard(int32_t id, real rand) const { 95 | assert(id >= 0); 96 | assert(id < nwords_); 97 | if (args_->model == model_name::sup) return false; 98 | return rand > pdiscard_[id]; 99 | } 100 | 101 | int32_t Dictionary::getId(const std::string& w) const { 102 | int32_t h = find(w); 103 | return word2int_[h]; 104 | } 105 | 106 | entry_type Dictionary::getType(int32_t id) const { 107 | assert(id >= 0); 108 | assert(id < size_); 109 | return words_[id].type; 110 | } 111 | 112 | std::string Dictionary::getWord(int32_t id) const { 113 | assert(id >= 0); 114 | assert(id < size_); 115 | return words_[id].word; 116 | } 117 | 118 | uint32_t Dictionary::hash(const std::string& str) const { 119 | uint32_t h = 2166136261; 120 | for (size_t i = 0; i < str.size(); i++) { 121 | h = h ^ uint32_t(str[i]); 122 | h = h * 16777619; 123 | } 124 | return h; 125 | } 126 | 127 | void Dictionary::computeNgrams(const std::string& word, 128 | std::vector& ngrams) const { 129 | for (size_t i = 0; i < word.size(); i++) { 130 | std::string ngram; 131 | if ((word[i] & 0xC0) == 0x80) continue; 132 | for (size_t j = i, n = 1; j < word.size() && n <= args_->maxn; n++) { 133 | ngram.push_back(word[j++]); 134 | while (j < word.size() && (word[j] & 0xC0) == 0x80) { 135 | ngram.push_back(word[j++]); 136 | } 137 | if (n >= args_->minn && !(n == 1 && (i == 0 || j == word.size()))) { 138 | int32_t h = hash(ngram) % args_->bucket; 139 | ngrams.push_back(nwords_ + h); 140 | } 141 | } 142 | } 143 | } 144 | 145 | void Dictionary::initNgrams() { 146 | for (size_t i = 0; i < size_; i++) { 147 | std::string word = BOW + words_[i].word + EOW; 148 | words_[i].subwords.push_back(i); 149 | computeNgrams(word, words_[i].subwords); 150 | } 151 | } 152 | 153 | bool Dictionary::readWord(std::istream& in, std::string& word) const 154 | { 155 | char c; 156 | std::streambuf& sb = *in.rdbuf(); 157 | word.clear(); 158 | while ((c = sb.sbumpc()) != EOF) { 159 | if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\f' || c == '\0') { 160 | if (word.empty()) { 161 | if (c == '\n') { 162 | word += EOS; 163 | return true; 164 | } 165 | continue; 166 | } else { 167 | if (c == '\n') 168 | sb.sungetc(); 169 | return true; 170 | } 171 | } 172 | word.push_back(c); 173 | } 174 | // trigger eofbit 175 | in.get(); 176 | return !word.empty(); 177 | } 178 | 179 | void Dictionary::readFromFile(std::istream& in) { 180 | std::string word; 181 | int64_t minThreshold = 1; 182 | while (readWord(in, word)) { 183 | add(word); 184 | if (ntokens_ % 1000000 == 0 && args_->verbose > 1) { 185 | std::cout << "\rRead " << ntokens_ / 1000000 << "M words" << std::flush; 186 | } 187 | if (size_ > 0.75 * MAX_VOCAB_SIZE) { 188 | threshold(minThreshold++); 189 | } 190 | } 191 | threshold(args_->minCount); 192 | initTableDiscard(); 193 | initNgrams(); 194 | if (args_->verbose > 0) { 195 | std::cout << "\rRead " << ntokens_ / 1000000 << "M words" << std::endl; 196 | std::cout << "Number of words: " << nwords_ << std::endl; 197 | std::cout << "Number of labels: " << nlabels_ << std::endl; 198 | } 199 | if (size_ == 0) { 200 | std::cerr << "Empty vocabulary. Try a smaller -minCount value." << std::endl; 201 | exit(EXIT_FAILURE); 202 | } 203 | } 204 | 205 | void Dictionary::threshold(int64_t t) { 206 | sort(words_.begin(), words_.end(), [](const entry& e1, const entry& e2) { 207 | if (e1.type != e2.type) return e1.type < e2.type; 208 | return e1.count > e2.count; 209 | }); 210 | words_.erase(remove_if(words_.begin(), words_.end(), [&](const entry& e) { 211 | return e.type == entry_type::word && e.count < t; 212 | }), words_.end()); 213 | words_.shrink_to_fit(); 214 | size_ = 0; 215 | nwords_ = 0; 216 | nlabels_ = 0; 217 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) { 218 | word2int_[i] = -1; 219 | } 220 | for (auto it = words_.begin(); it != words_.end(); ++it) { 221 | int32_t h = find(it->word); 222 | word2int_[h] = size_++; 223 | if (it->type == entry_type::word) nwords_++; 224 | if (it->type == entry_type::label) nlabels_++; 225 | } 226 | } 227 | 228 | void Dictionary::initTableDiscard() { 229 | pdiscard_.resize(size_); 230 | for (size_t i = 0; i < size_; i++) { 231 | real f = real(words_[i].count) / real(ntokens_); 232 | pdiscard_[i] = sqrt(args_->t / f) + args_->t / f; 233 | } 234 | } 235 | 236 | std::vector Dictionary::getCounts(entry_type type) const { 237 | std::vector counts; 238 | for (auto& w : words_) { 239 | if (w.type == type) counts.push_back(w.count); 240 | } 241 | return counts; 242 | } 243 | 244 | void Dictionary::addNgrams(std::vector& line, int32_t n) const { 245 | int32_t line_size = line.size(); 246 | for (int32_t i = 0; i < line_size; i++) { 247 | uint64_t h = line[i]; 248 | for (int32_t j = i + 1; j < line_size && j < i + n; j++) { 249 | h = h * 116049371 + line[j]; 250 | line.push_back(nwords_ + (h % args_->bucket)); 251 | } 252 | } 253 | } 254 | 255 | int32_t Dictionary::getLine(std::istream& in, 256 | std::vector& words, 257 | std::vector& labels, 258 | std::minstd_rand& rng) const { 259 | std::uniform_real_distribution<> uniform(0, 1); 260 | std::string token; 261 | int32_t ntokens = 0; 262 | words.clear(); 263 | labels.clear(); 264 | if (in.eof()) { 265 | in.clear(); 266 | in.seekg(std::streampos(0)); 267 | } 268 | while (readWord(in, token)) { 269 | if (token == EOS) break; 270 | int32_t wid = getId(token); 271 | if (wid < 0) continue; 272 | entry_type type = getType(wid); 273 | ntokens++; 274 | if (type == entry_type::word && !discard(wid, uniform(rng))) { 275 | words.push_back(wid); 276 | } 277 | if (type == entry_type::label) { 278 | labels.push_back(wid - nwords_); 279 | } 280 | if (words.size() > MAX_LINE_SIZE && args_->model != model_name::sup) break; 281 | } 282 | return ntokens; 283 | } 284 | 285 | std::string Dictionary::getLabel(int32_t lid) const { 286 | assert(lid >= 0); 287 | assert(lid < nlabels_); 288 | return words_[lid + nwords_].word; 289 | } 290 | 291 | void Dictionary::save(std::ostream& out) const { 292 | out.write((char*) &size_, sizeof(int32_t)); 293 | out.write((char*) &nwords_, sizeof(int32_t)); 294 | out.write((char*) &nlabels_, sizeof(int32_t)); 295 | out.write((char*) &ntokens_, sizeof(int64_t)); 296 | for (int32_t i = 0; i < size_; i++) { 297 | entry e = words_[i]; 298 | out.write(e.word.data(), e.word.size() * sizeof(char)); 299 | out.put(0); 300 | out.write((char*) &(e.count), sizeof(int64_t)); 301 | out.write((char*) &(e.type), sizeof(entry_type)); 302 | } 303 | } 304 | 305 | void Dictionary::load(std::istream& in) { 306 | words_.clear(); 307 | for (int32_t i = 0; i < MAX_VOCAB_SIZE; i++) { 308 | word2int_[i] = -1; 309 | } 310 | in.read((char*) &size_, sizeof(int32_t)); 311 | in.read((char*) &nwords_, sizeof(int32_t)); 312 | in.read((char*) &nlabels_, sizeof(int32_t)); 313 | in.read((char*) &ntokens_, sizeof(int64_t)); 314 | for (int32_t i = 0; i < size_; i++) { 315 | char c; 316 | entry e; 317 | while ((c = in.get()) != 0) { 318 | e.word.push_back(c); 319 | } 320 | in.read((char*) &e.count, sizeof(int64_t)); 321 | in.read((char*) &e.type, sizeof(entry_type)); 322 | words_.push_back(e); 323 | word2int_[find(e.word)] = i; 324 | } 325 | initTableDiscard(); 326 | initNgrams(); 327 | } 328 | 329 | } 330 | -------------------------------------------------------------------------------- /src/dictionary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_DICTIONARY_H 11 | #define FASTTEXT_DICTIONARY_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "args.h" 21 | #include "real.h" 22 | 23 | namespace fasttext { 24 | 25 | typedef int32_t id_type; 26 | enum class entry_type : int8_t {word=0, label=1}; 27 | 28 | struct entry { 29 | std::string word; 30 | int64_t count; 31 | entry_type type; 32 | std::vector subwords; 33 | }; 34 | 35 | class Dictionary { 36 | private: 37 | static const int32_t MAX_VOCAB_SIZE = 30000000; 38 | static const int32_t MAX_LINE_SIZE = 1024; 39 | 40 | int32_t find(const std::string&) const; 41 | void initTableDiscard(); 42 | void initNgrams(); 43 | 44 | std::shared_ptr args_; 45 | std::vector word2int_; 46 | std::vector words_; 47 | std::vector pdiscard_; 48 | int32_t size_; 49 | int32_t nwords_; 50 | int32_t nlabels_; 51 | int64_t ntokens_; 52 | 53 | public: 54 | static const std::string EOS; 55 | static const std::string BOW; 56 | static const std::string EOW; 57 | 58 | explicit Dictionary(std::shared_ptr); 59 | int32_t nwords() const; 60 | int32_t nlabels() const; 61 | int64_t ntokens() const; 62 | int32_t getId(const std::string&) const; 63 | entry_type getType(int32_t) const; 64 | bool discard(int32_t, real) const; 65 | std::string getWord(int32_t) const; 66 | const std::vector& getNgrams(int32_t) const; 67 | const std::vector getNgrams(const std::string&) const; 68 | void computeNgrams(const std::string&, std::vector&) const; 69 | uint32_t hash(const std::string& str) const; 70 | void add(const std::string&); 71 | bool readWord(std::istream&, std::string&) const; 72 | void readFromFile(std::istream&); 73 | std::string getLabel(int32_t) const; 74 | void save(std::ostream&) const; 75 | void load(std::istream&); 76 | std::vector getCounts(entry_type) const; 77 | void addNgrams(std::vector&, int32_t) const; 78 | int32_t getLine(std::istream&, std::vector&, 79 | std::vector&, std::minstd_rand&) const; 80 | void threshold(int64_t); 81 | }; 82 | 83 | } 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /src/fasttext.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "fasttext.h" 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace fasttext { 29 | 30 | void FastText::getVector(Vector& vec, const std::string& word) { 31 | const std::vector& ngrams = dict_->getNgrams(word); 32 | vec.zero(); 33 | for (auto it = ngrams.begin(); it != ngrams.end(); ++it) { 34 | vec.addRow(*input_, *it); 35 | } 36 | if (ngrams.size() > 0) { 37 | vec.mul(1.0 / ngrams.size()); 38 | } 39 | } 40 | 41 | void FastText::saveVectors() { 42 | std::ofstream ofs(args_->output + ".vec"); 43 | if (!ofs.is_open()) { 44 | std::cout << "Error opening file for saving vectors." << std::endl; 45 | exit(EXIT_FAILURE); 46 | } 47 | // ofs << dict_->nwords() << " " << args_->dim << std::endl; // exclude dimensions from output .txt file 48 | Vector vec(args_->dim); 49 | for (int32_t i = 0; i < dict_->nwords(); i++) { 50 | std::string word = dict_->getWord(i); 51 | getVector(vec, word); 52 | ofs << word << " " << vec << std::endl; 53 | } 54 | ofs.close(); 55 | } 56 | 57 | void FastText::saveModel() { 58 | std::ofstream ofs(args_->output + ".bin", std::ofstream::binary); 59 | if (!ofs.is_open()) { 60 | std::cerr << "Model file cannot be opened for saving!" << std::endl; 61 | exit(EXIT_FAILURE); 62 | } 63 | args_->save(ofs); 64 | dict_->save(ofs); 65 | input_->save(ofs); 66 | output_->save(ofs); 67 | ofs.close(); 68 | } 69 | 70 | void FastText::loadModel(const std::string& filename) { 71 | std::ifstream ifs(filename, std::ifstream::binary); 72 | if (!ifs.is_open()) { 73 | std::cerr << "Model file cannot be opened for loading!" << std::endl; 74 | exit(EXIT_FAILURE); 75 | } 76 | loadModel(ifs); 77 | ifs.close(); 78 | } 79 | 80 | void FastText::loadModel(std::istream& in) { 81 | args_ = std::make_shared(); 82 | dict_ = std::make_shared(args_); 83 | input_ = std::make_shared(); 84 | output_ = std::make_shared(); 85 | args_->load(in); 86 | dict_->load(in); 87 | input_->load(in); 88 | output_->load(in); 89 | model_ = std::make_shared(input_, output_, args_, 0); 90 | if (args_->model == model_name::sup) { 91 | model_->setTargetCounts(dict_->getCounts(entry_type::label)); 92 | } else { 93 | model_->setTargetCounts(dict_->getCounts(entry_type::word)); 94 | } 95 | } 96 | 97 | void FastText::printInfo(real progress, real loss) { 98 | real t = real(clock() - start) / CLOCKS_PER_SEC; 99 | real wst = real(tokenCount) / t; 100 | real lr = args_->lr * (1.0 - progress); 101 | int eta = int(t / progress * (1 - progress) / args_->thread); 102 | int etah = eta / 3600; 103 | int etam = (eta - etah * 3600) / 60; 104 | std::cout << std::fixed; 105 | std::cout << "\rProgress: " << std::setprecision(1) << 100 * progress << "%"; 106 | std::cout << " words/sec/thread: " << std::setprecision(0) << wst; 107 | std::cout << " lr: " << std::setprecision(6) << lr; 108 | std::cout << " loss: " << std::setprecision(6) << loss; 109 | std::cout << " eta: " << etah << "h" << etam << "m "; 110 | std::cout << std::flush; 111 | } 112 | 113 | void FastText::supervised(Model& model, real lr, 114 | const std::vector& line, 115 | const std::vector& labels) { 116 | if (labels.size() == 0 || line.size() == 0) return; 117 | std::uniform_int_distribution<> uniform(0, labels.size() - 1); 118 | int32_t i = uniform(model.rng); 119 | model.update(line, labels[i], lr); 120 | } 121 | 122 | void FastText::cbow(Model& model, real lr, 123 | const std::vector& line) { 124 | std::vector bow; 125 | std::uniform_int_distribution<> uniform(1, args_->ws); 126 | for (int32_t w = 0; w < line.size(); w++) { 127 | int32_t boundary = uniform(model.rng); 128 | bow.clear(); 129 | for (int32_t c = -boundary; c <= boundary; c++) { 130 | if (c != 0 && w + c >= 0 && w + c < line.size()) { 131 | const std::vector& ngrams = dict_->getNgrams(line[w + c]); 132 | bow.insert(bow.end(), ngrams.cbegin(), ngrams.cend()); 133 | } 134 | } 135 | model.update(bow, line[w], lr); 136 | } 137 | } 138 | 139 | void FastText::skipgram(Model& model, real lr, 140 | const std::vector& line) { 141 | std::uniform_int_distribution<> uniform(1, args_->ws); 142 | for (int32_t w = 0; w < line.size(); w++) { 143 | int32_t boundary = uniform(model.rng); 144 | const std::vector& ngrams = dict_->getNgrams(line[w]); 145 | for (int32_t c = -boundary; c <= boundary; c++) { 146 | if (c != 0 && w + c >= 0 && w + c < line.size()) { 147 | model.update(ngrams, line[w + c], lr); 148 | } 149 | } 150 | } 151 | } 152 | 153 | void FastText::test(std::istream& in, int32_t k) { 154 | int32_t nexamples = 0, nlabels = 0; 155 | double precision = 0.0; 156 | std::vector line, labels; 157 | 158 | while (in.peek() != EOF) { 159 | dict_->getLine(in, line, labels, model_->rng); 160 | dict_->addNgrams(line, args_->wordNgrams); 161 | if (labels.size() > 0 && line.size() > 0) { 162 | std::vector> modelPredictions; 163 | model_->predict(line, k, modelPredictions); 164 | for (auto it = modelPredictions.cbegin(); it != modelPredictions.cend(); it++) { 165 | if (std::find(labels.begin(), labels.end(), it->second) != labels.end()) { 166 | precision += 1.0; 167 | } 168 | } 169 | nexamples++; 170 | nlabels += labels.size(); 171 | } 172 | } 173 | std::cout << std::setprecision(3); 174 | std::cout << "P@" << k << ": " << precision / (k * nexamples) << std::endl; 175 | std::cout << "R@" << k << ": " << precision / nlabels << std::endl; 176 | std::cout << "Number of examples: " << nexamples << std::endl; 177 | } 178 | 179 | void FastText::predict(std::istream& in, int32_t k, 180 | std::vector>& predictions) const { 181 | std::vector words, labels; 182 | dict_->getLine(in, words, labels, model_->rng); 183 | dict_->addNgrams(words, args_->wordNgrams); 184 | if (words.empty()) return; 185 | Vector hidden(args_->dim); 186 | Vector output(dict_->nlabels()); 187 | std::vector> modelPredictions; 188 | model_->predict(words, k, modelPredictions, hidden, output); 189 | predictions.clear(); 190 | for (auto it = modelPredictions.cbegin(); it != modelPredictions.cend(); it++) { 191 | predictions.push_back(std::make_pair(it->first, dict_->getLabel(it->second))); 192 | } 193 | } 194 | 195 | void FastText::predict(std::istream& in, int32_t k, bool print_prob) { 196 | std::vector> predictions; 197 | while (in.peek() != EOF) { 198 | predict(in, k, predictions); 199 | if (predictions.empty()) { 200 | std::cout << "n/a" << std::endl; 201 | } 202 | for (auto it = predictions.cbegin(); it != predictions.cend(); it++) { 203 | if (it != predictions.cbegin()) { 204 | std::cout << ' '; 205 | } 206 | std::cout << it->second; 207 | if (print_prob) { 208 | std::cout << ' ' << exp(it->first); 209 | } 210 | } 211 | std::cout << std::endl; 212 | } 213 | } 214 | 215 | // void FastText::wordVectors() { 216 | // std::string word; 217 | // Vector vec(args_->dim); 218 | // while (std::cin >> word) { 219 | // getVector(vec, word); 220 | // std::cout << word << " " << vec << std::endl; // write to file 221 | // } 222 | // } 223 | 224 | 225 | void FastText::wordVectors(std::string& INPUT, std::string& OUTPUT) { 226 | 227 | std::ifstream inf(INPUT); 228 | 229 | std::ofstream ofs(OUTPUT + ".vec"); 230 | if (!ofs.is_open()) { 231 | std::cout << "Error opening file for saving vectors." << std::endl; 232 | exit(EXIT_FAILURE); 233 | } 234 | std::string word; 235 | Vector vec(args_->dim); 236 | while (std::getline(inf, word)) { 237 | getVector(vec, word); 238 | ofs << word << " " << vec << std::endl; 239 | } 240 | ofs.close(); 241 | } 242 | 243 | 244 | void FastText::textVectors() { 245 | std::vector line, labels; 246 | Vector vec(args_->dim); 247 | while (std::cin.peek() != EOF) { 248 | dict_->getLine(std::cin, line, labels, model_->rng); 249 | dict_->addNgrams(line, args_->wordNgrams); 250 | vec.zero(); 251 | for (auto it = line.cbegin(); it != line.cend(); ++it) { 252 | vec.addRow(*input_, *it); 253 | } 254 | if (!line.empty()) { 255 | vec.mul(1.0 / line.size()); 256 | } 257 | std::cout << vec << std::endl; // write to file 258 | } 259 | } 260 | 261 | void FastText::printVectors() { 262 | if (args_->model == model_name::sup) { // if model 'supervised' return textvectors else return wordvectors 263 | textVectors(); 264 | } 265 | // } else { 266 | // wordVectors(); 267 | // } 268 | } 269 | 270 | void FastText::trainThread(int32_t threadId) { 271 | std::ifstream ifs(args_->input); 272 | utils::seek(ifs, threadId * utils::size(ifs) / args_->thread); 273 | 274 | Model model(input_, output_, args_, threadId); 275 | if (args_->model == model_name::sup) { 276 | model.setTargetCounts(dict_->getCounts(entry_type::label)); 277 | } else { 278 | model.setTargetCounts(dict_->getCounts(entry_type::word)); 279 | } 280 | 281 | const int64_t ntokens = dict_->ntokens(); 282 | int64_t localTokenCount = 0; 283 | std::vector line, labels; 284 | while (tokenCount < args_->epoch * ntokens) { 285 | real progress = real(tokenCount) / (args_->epoch * ntokens); 286 | real lr = args_->lr * (1.0 - progress); 287 | localTokenCount += dict_->getLine(ifs, line, labels, model.rng); 288 | if (args_->model == model_name::sup) { 289 | dict_->addNgrams(line, args_->wordNgrams); 290 | supervised(model, lr, line, labels); 291 | } else if (args_->model == model_name::cbow) { 292 | cbow(model, lr, line); 293 | } else if (args_->model == model_name::sg) { 294 | skipgram(model, lr, line); 295 | } 296 | if (localTokenCount > args_->lrUpdateRate) { 297 | tokenCount += localTokenCount; 298 | localTokenCount = 0; 299 | if (threadId == 0 && args_->verbose > 1) { 300 | printInfo(progress, model.getLoss()); 301 | } 302 | } 303 | } 304 | if (threadId == 0 && args_->verbose > 0) { 305 | printInfo(1.0, model.getLoss()); 306 | std::cout << std::endl; 307 | } 308 | ifs.close(); 309 | } 310 | 311 | void FastText::loadVectors(std::string filename) { 312 | std::ifstream in(filename); 313 | std::vector words; 314 | std::shared_ptr mat; // temp. matrix for pretrained vectors 315 | int64_t n, dim; 316 | if (!in.is_open()) { 317 | std::cerr << "Pretrained vectors file cannot be opened!" << std::endl; 318 | exit(EXIT_FAILURE); 319 | } 320 | in >> n >> dim; 321 | if (dim != args_->dim) { 322 | std::cerr << "Dimension of pretrained vectors does not match -dim option" 323 | << std::endl; 324 | exit(EXIT_FAILURE); 325 | } 326 | mat = std::make_shared(n, dim); 327 | for (size_t i = 0; i < n; i++) { 328 | std::string word; 329 | in >> word; 330 | words.push_back(word); 331 | dict_->add(word); 332 | for (size_t j = 0; j < dim; j++) { 333 | in >> mat->data_[i * dim + j]; 334 | } 335 | } 336 | in.close(); 337 | 338 | dict_->threshold(1); 339 | input_ = std::make_shared(dict_->nwords()+args_->bucket, args_->dim); 340 | input_->uniform(1.0 / args_->dim); 341 | 342 | for (size_t i = 0; i < n; i++) { 343 | int32_t idx = dict_->getId(words[i]); 344 | if (idx < 0 || idx >= dict_->nwords()) continue; 345 | for (size_t j = 0; j < dim; j++) { 346 | input_->data_[idx * dim + j] = mat->data_[i * dim + j]; 347 | } 348 | } 349 | } 350 | 351 | void FastText::train(std::shared_ptr args) { 352 | args_ = args; 353 | dict_ = std::make_shared(args_); 354 | if (args_->input == "-") { 355 | // manage expectations 356 | std::cerr << "Cannot use stdin for training!" << std::endl; 357 | exit(EXIT_FAILURE); 358 | } 359 | std::ifstream ifs(args_->input); 360 | if (!ifs.is_open()) { 361 | std::cerr << "Input file cannot be opened!" << std::endl; 362 | exit(EXIT_FAILURE); 363 | } 364 | dict_->readFromFile(ifs); 365 | ifs.close(); 366 | 367 | if (args_->pretrainedVectors.size() != 0) { 368 | loadVectors(args_->pretrainedVectors); 369 | } else { 370 | input_ = std::make_shared(dict_->nwords()+args_->bucket, args_->dim); 371 | input_->uniform(1.0 / args_->dim); 372 | } 373 | 374 | if (args_->model == model_name::sup) { 375 | output_ = std::make_shared(dict_->nlabels(), args_->dim); 376 | } else { 377 | output_ = std::make_shared(dict_->nwords(), args_->dim); 378 | } 379 | output_->zero(); 380 | 381 | start = clock(); 382 | tokenCount = 0; 383 | std::vector threads; 384 | for (int32_t i = 0; i < args_->thread; i++) { 385 | threads.push_back(std::thread([=]() { trainThread(i); })); 386 | } 387 | for (auto it = threads.begin(); it != threads.end(); ++it) { 388 | it->join(); 389 | } 390 | model_ = std::make_shared(input_, output_, args_, 0); 391 | 392 | saveModel(); 393 | if (args_->model != model_name::sup) { 394 | saveVectors(); 395 | } 396 | } 397 | 398 | } 399 | -------------------------------------------------------------------------------- /src/fasttext.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_FASTTEXT_H 11 | #define FASTTEXT_FASTTEXT_H 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include "matrix.h" 20 | #include "vector.h" 21 | #include "dictionary.h" 22 | #include "model.h" 23 | #include "utils.h" 24 | #include "real.h" 25 | #include "args.h" 26 | 27 | namespace fasttext { 28 | 29 | class FastText { 30 | private: 31 | std::shared_ptr args_; 32 | std::shared_ptr dict_; 33 | std::shared_ptr input_; 34 | std::shared_ptr output_; 35 | std::shared_ptr model_; 36 | std::atomic tokenCount; 37 | clock_t start; 38 | 39 | public: 40 | void getVector(Vector&, const std::string&); 41 | void saveVectors(); 42 | void saveModel(); 43 | void loadModel(const std::string&); 44 | void loadModel(std::istream&); 45 | void printInfo(real, real); 46 | 47 | void supervised(Model&, real, const std::vector&, 48 | const std::vector&); 49 | void cbow(Model&, real, const std::vector&); 50 | void skipgram(Model&, real, const std::vector&); 51 | void test(std::istream&, int32_t); 52 | void predict(std::istream&, int32_t, bool); 53 | void predict(std::istream&, int32_t, std::vector>&) const; 54 | void wordVectors(std::string&, std::string&); 55 | void textVectors(); 56 | void printVectors(); 57 | void trainThread(int32_t); 58 | void train(std::shared_ptr); 59 | 60 | void loadVectors(std::string); 61 | }; 62 | 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* FIXME: 7 | Check these declarations against the C/Fortran source code. 8 | */ 9 | 10 | /* .Call calls */ 11 | extern SEXP _fastTextR_convert_args_to_pointers(SEXP, SEXP, SEXP); 12 | 13 | static const R_CallMethodDef CallEntries[] = { 14 | {"_fastTextR_convert_args_to_pointers", (DL_FUNC) &_fastTextR_convert_args_to_pointers, 3}, 15 | {NULL, NULL, 0} 16 | }; 17 | 18 | void R_init_fastTextR(DllInfo *dll) 19 | { 20 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 21 | R_useDynamicSymbols(dll, FALSE); 22 | } 23 | -------------------------------------------------------------------------------- /src/main.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | 7 | /** 8 | * Copyright (c) 2016-present, Facebook, Inc. 9 | * All rights reserved. 10 | * 11 | * This source code is licensed under the BSD-style license found in the 12 | * LICENSE file in the root directory of this source tree. An additional grant 13 | * of patent rights can be found in the PATENTS file in the same directory. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "fasttext.h" 19 | #include "args.h" 20 | 21 | using namespace fasttext; 22 | 23 | void printUsage() { 24 | std::cout 25 | << "usage: fasttext \n\n" 26 | << "The commands supported by fasttext are:\n\n" 27 | << " supervised train a supervised classifier\n" 28 | << " test evaluate a supervised classifier\n" 29 | << " predict predict most likely labels\n" 30 | << " predict-prob predict most likely labels with probabilities\n" 31 | << " skipgram train a skipgram model\n" 32 | << " cbow train a cbow model\n" 33 | << " print-vectors print vectors given a trained model\n" 34 | << std::endl; 35 | } 36 | 37 | void printTestUsage() { 38 | std::cout 39 | << "usage: fasttext test []\n\n" 40 | << " model filename\n" 41 | << " test data filename (if -, read from stdin)\n" 42 | << " (optional; 1 by default) predict top k labels\n" 43 | << std::endl; 44 | } 45 | 46 | void printPredictUsage() { 47 | std::cout 48 | << "usage: fasttext predict[-prob] []\n\n" 49 | << " model filename\n" 50 | << " test data filename (if -, read from stdin)\n" 51 | << " (optional; 1 by default) predict top k labels\n" 52 | << std::endl; 53 | } 54 | 55 | void printPrintVectorsUsage() { 56 | std::cout 57 | << "usage: fasttext print-vectors \n\n" 58 | << " model filename\n" 59 | << std::endl; 60 | } 61 | 62 | void test(int argc, char** argv) { 63 | int32_t k; 64 | if (argc == 4) { 65 | k = 1; 66 | } else if (argc == 5) { 67 | k = atoi(argv[4]); 68 | } else { 69 | printTestUsage(); 70 | exit(EXIT_FAILURE); 71 | } 72 | FastText fasttext; 73 | fasttext.loadModel(std::string(argv[2])); 74 | std::string infile(argv[3]); 75 | if (infile == "-") { 76 | fasttext.test(std::cin, k); 77 | } else { 78 | std::ifstream ifs(infile); 79 | if (!ifs.is_open()) { 80 | std::cerr << "Test file cannot be opened!" << std::endl; 81 | exit(EXIT_FAILURE); 82 | } 83 | fasttext.test(ifs, k); 84 | ifs.close(); 85 | } 86 | exit(0); 87 | } 88 | 89 | void predict(int argc, char** argv) { 90 | int32_t k; 91 | if (argc == 4) { 92 | k = 1; 93 | } else if (argc == 5) { 94 | k = atoi(argv[4]); 95 | } else { 96 | printPredictUsage(); 97 | exit(EXIT_FAILURE); 98 | } 99 | bool print_prob = std::string(argv[1]) == "predict-prob"; 100 | FastText fasttext; 101 | fasttext.loadModel(std::string(argv[2])); 102 | 103 | std::string infile(argv[3]); 104 | if (infile == "-") { 105 | fasttext.predict(std::cin, k, print_prob); 106 | } else { 107 | std::ifstream ifs(infile); 108 | if (!ifs.is_open()) { 109 | std::cerr << "Input file cannot be opened!" << std::endl; 110 | exit(EXIT_FAILURE); 111 | } 112 | fasttext.predict(ifs, k, print_prob); 113 | ifs.close(); 114 | } 115 | 116 | exit(0); 117 | } 118 | 119 | void printVectors(int argc, char** argv) { 120 | if (argc != 3) { 121 | printPrintVectorsUsage(); 122 | exit(EXIT_FAILURE); 123 | } 124 | FastText fasttext; 125 | fasttext.loadModel(std::string(argv[2])); 126 | fasttext.printVectors(); 127 | exit(0); 128 | } 129 | 130 | void train(int argc, char** argv) { 131 | std::shared_ptr a = std::make_shared(); 132 | a->parseArgs(argc, argv); 133 | FastText fasttext; 134 | fasttext.train(a); 135 | } 136 | 137 | 138 | void SAVE_dict_vectors(char** argv, std::string INPUT, std::string OUTPUT) { 139 | 140 | FastText fasttext; 141 | 142 | fasttext.loadModel(std::string(argv[2])); 143 | 144 | fasttext.wordVectors(INPUT, OUTPUT); 145 | } 146 | 147 | 148 | 149 | // wrapper for the train(), test(), printVectors(), predict() functions 150 | // conversion of string-arguments to pointers in c++ : http://stackoverflow.com/questions/26032039/convert-vectorstring-into-char-c 151 | // for supervised, modify the std::cin in predict(), test() 152 | // 153 | 154 | // [[Rcpp::export]] 155 | void convert_args_to_pointers(std::vector string_commands, std::string INPUT, std::string OUTPUT) { // 'string_commands' includes also the fasttext argument in index 0; 156 | 157 | int num_argc = string_commands.size(); 158 | 159 | utils::initTables(); 160 | 161 | if (num_argc < 2) { 162 | 163 | printUsage(); 164 | 165 | exit(EXIT_FAILURE); 166 | } 167 | 168 | char** cstrings = new char*[string_commands.size()]; 169 | 170 | for(size_t i = 0; i < string_commands.size(); ++i) { 171 | 172 | cstrings[i] = new char[string_commands[i].size() + 1]; 173 | 174 | std::strcpy(cstrings[i], string_commands[i].c_str()); 175 | } 176 | 177 | std::string command = string_commands[1]; 178 | 179 | if (command == "skipgram" || command == "cbow" || command == "supervised") { 180 | 181 | train(num_argc, cstrings);} 182 | 183 | else if (command == "test") { 184 | 185 | test(num_argc, cstrings);} 186 | 187 | else if (command == "print-vectors") { 188 | 189 | printVectors(num_argc, cstrings);} 190 | 191 | else if (command == "predict" || command == "predict-prob" ) { 192 | 193 | predict(num_argc, cstrings);} 194 | 195 | else if (command == "predict_skipgram_cbow") { 196 | 197 | SAVE_dict_vectors(cstrings, INPUT, OUTPUT);} 198 | 199 | else { 200 | 201 | printUsage(); 202 | 203 | exit(EXIT_FAILURE); 204 | } 205 | 206 | utils::freeTables(); 207 | 208 | // clean up memory 209 | 210 | for(size_t i = 0; i < num_argc; ++i) { 211 | 212 | delete[] cstrings[i]; 213 | } 214 | 215 | delete[] cstrings; 216 | } 217 | 218 | -------------------------------------------------------------------------------- /src/matrix.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "matrix.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "utils.h" 22 | #include "vector.h" 23 | 24 | namespace fasttext { 25 | 26 | Matrix::Matrix() { 27 | m_ = 0; 28 | n_ = 0; 29 | data_ = nullptr; 30 | } 31 | 32 | Matrix::Matrix(int64_t m, int64_t n) { 33 | m_ = m; 34 | n_ = n; 35 | data_ = new real[m * n]; 36 | } 37 | 38 | Matrix::Matrix(const Matrix& other) { 39 | m_ = other.m_; 40 | n_ = other.n_; 41 | data_ = new real[m_ * n_]; 42 | for (int64_t i = 0; i < (m_ * n_); i++) { 43 | data_[i] = other.data_[i]; 44 | } 45 | } 46 | 47 | Matrix& Matrix::operator=(const Matrix& other) { 48 | Matrix temp(other); 49 | m_ = temp.m_; 50 | n_ = temp.n_; 51 | std::swap(data_, temp.data_); 52 | return *this; 53 | } 54 | 55 | Matrix::~Matrix() { 56 | delete[] data_; 57 | } 58 | 59 | void Matrix::zero() { 60 | for (int64_t i = 0; i < (m_ * n_); i++) { 61 | data_[i] = 0.0; 62 | } 63 | } 64 | 65 | void Matrix::uniform(real a) { 66 | std::minstd_rand rng(1); 67 | std::uniform_real_distribution<> uniform(-a, a); 68 | for (int64_t i = 0; i < (m_ * n_); i++) { 69 | data_[i] = uniform(rng); 70 | } 71 | } 72 | 73 | void Matrix::addRow(const Vector& vec, int64_t i, real a) { 74 | assert(i >= 0); 75 | assert(i < m_); 76 | assert(vec.m_ == n_); 77 | for (int64_t j = 0; j < n_; j++) { 78 | data_[i * n_ + j] += a * vec.data_[j]; 79 | } 80 | } 81 | 82 | real Matrix::dotRow(const Vector& vec, int64_t i) { 83 | assert(i >= 0); 84 | assert(i < m_); 85 | assert(vec.m_ == n_); 86 | real d = 0.0; 87 | for (int64_t j = 0; j < n_; j++) { 88 | d += data_[i * n_ + j] * vec.data_[j]; 89 | } 90 | return d; 91 | } 92 | 93 | void Matrix::save(std::ostream& out) { 94 | out.write((char*) &m_, sizeof(int64_t)); 95 | out.write((char*) &n_, sizeof(int64_t)); 96 | out.write((char*) data_, m_ * n_ * sizeof(real)); 97 | } 98 | 99 | void Matrix::load(std::istream& in) { 100 | in.read((char*) &m_, sizeof(int64_t)); 101 | in.read((char*) &n_, sizeof(int64_t)); 102 | delete[] data_; 103 | data_ = new real[m_ * n_]; 104 | in.read((char*) data_, m_ * n_ * sizeof(real)); 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/matrix.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_MATRIX_H 11 | #define FASTTEXT_MATRIX_H 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "real.h" 18 | 19 | namespace fasttext { 20 | 21 | class Vector; 22 | 23 | class Matrix { 24 | 25 | public: 26 | real* data_; 27 | int64_t m_; 28 | int64_t n_; 29 | 30 | Matrix(); 31 | Matrix(int64_t, int64_t); 32 | Matrix(const Matrix&); 33 | Matrix& operator=(const Matrix&); 34 | ~Matrix(); 35 | 36 | void zero(); 37 | void uniform(real); 38 | real dotRow(const Vector&, int64_t); 39 | void addRow(const Vector&, int64_t, real); 40 | 41 | void save(std::ostream&); 42 | void load(std::istream&); 43 | }; 44 | 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/model.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "model.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "utils.h" 22 | 23 | namespace fasttext { 24 | 25 | Model::Model(std::shared_ptr wi, 26 | std::shared_ptr wo, 27 | std::shared_ptr args, 28 | int32_t seed) 29 | : hidden_(args->dim), output_(wo->m_), grad_(args->dim), rng(seed) 30 | { 31 | wi_ = wi; 32 | wo_ = wo; 33 | args_ = args; 34 | isz_ = wi->m_; 35 | osz_ = wo->m_; 36 | hsz_ = args->dim; 37 | negpos = 0; 38 | loss_ = 0.0; 39 | nexamples_ = 1; 40 | } 41 | 42 | real Model::binaryLogistic(int32_t target, bool label, real lr) { 43 | real score = utils::sigmoid(wo_->dotRow(hidden_, target)); 44 | real alpha = lr * (real(label) - score); 45 | grad_.addRow(*wo_, target, alpha); 46 | wo_->addRow(hidden_, target, alpha); 47 | if (label) { 48 | return -utils::log(score); 49 | } else { 50 | return -utils::log(1.0 - score); 51 | } 52 | } 53 | 54 | real Model::negativeSampling(int32_t target, real lr) { 55 | real loss = 0.0; 56 | grad_.zero(); 57 | for (int32_t n = 0; n <= args_->neg; n++) { 58 | if (n == 0) { 59 | loss += binaryLogistic(target, true, lr); 60 | } else { 61 | loss += binaryLogistic(getNegative(target), false, lr); 62 | } 63 | } 64 | return loss; 65 | } 66 | 67 | real Model::hierarchicalSoftmax(int32_t target, real lr) { 68 | real loss = 0.0; 69 | grad_.zero(); 70 | const std::vector& binaryCode = codes[target]; 71 | const std::vector& pathToRoot = paths[target]; 72 | for (int32_t i = 0; i < pathToRoot.size(); i++) { 73 | loss += binaryLogistic(pathToRoot[i], binaryCode[i], lr); 74 | } 75 | return loss; 76 | } 77 | 78 | void Model::computeOutputSoftmax(Vector& hidden, Vector& output) const { 79 | output.mul(*wo_, hidden); 80 | real max = output[0], z = 0.0; 81 | for (int32_t i = 0; i < osz_; i++) { 82 | max = std::max(output[i], max); 83 | } 84 | for (int32_t i = 0; i < osz_; i++) { 85 | output[i] = exp(output[i] - max); 86 | z += output[i]; 87 | } 88 | for (int32_t i = 0; i < osz_; i++) { 89 | output[i] /= z; 90 | } 91 | } 92 | 93 | void Model::computeOutputSoftmax() { 94 | computeOutputSoftmax(hidden_, output_); 95 | } 96 | 97 | real Model::softmax(int32_t target, real lr) { 98 | grad_.zero(); 99 | computeOutputSoftmax(); 100 | for (int32_t i = 0; i < osz_; i++) { 101 | real label = (i == target) ? 1.0 : 0.0; 102 | real alpha = lr * (label - output_[i]); 103 | grad_.addRow(*wo_, i, alpha); 104 | wo_->addRow(hidden_, i, alpha); 105 | } 106 | return -utils::log(output_[target]); 107 | } 108 | 109 | void Model::computeHidden(const std::vector& input, Vector& hidden) const { 110 | assert(hidden.size() == hsz_); 111 | hidden.zero(); 112 | for (auto it = input.cbegin(); it != input.cend(); ++it) { 113 | hidden.addRow(*wi_, *it); 114 | } 115 | hidden.mul(1.0 / input.size()); 116 | } 117 | 118 | bool Model::comparePairs(const std::pair &l, 119 | const std::pair &r) { 120 | return l.first > r.first; 121 | } 122 | 123 | void Model::predict(const std::vector& input, int32_t k, 124 | std::vector>& heap, 125 | Vector& hidden, Vector& output) const { 126 | assert(k > 0); 127 | heap.reserve(k + 1); 128 | computeHidden(input, hidden); 129 | if (args_->loss == loss_name::hs) { 130 | dfs(k, 2 * osz_ - 2, 0.0, heap, hidden); 131 | } else { 132 | findKBest(k, heap, hidden, output); 133 | } 134 | std::sort_heap(heap.begin(), heap.end(), comparePairs); 135 | } 136 | 137 | void Model::predict(const std::vector& input, int32_t k, 138 | std::vector>& heap) { 139 | predict(input, k, heap, hidden_, output_); 140 | } 141 | 142 | void Model::findKBest(int32_t k, std::vector>& heap, 143 | Vector& hidden, Vector& output) const { 144 | computeOutputSoftmax(hidden, output); 145 | for (int32_t i = 0; i < osz_; i++) { 146 | if (heap.size() == k && utils::log(output[i]) < heap.front().first) { 147 | continue; 148 | } 149 | heap.push_back(std::make_pair(utils::log(output[i]), i)); 150 | std::push_heap(heap.begin(), heap.end(), comparePairs); 151 | if (heap.size() > k) { 152 | std::pop_heap(heap.begin(), heap.end(), comparePairs); 153 | heap.pop_back(); 154 | } 155 | } 156 | } 157 | 158 | void Model::dfs(int32_t k, int32_t node, real score, 159 | std::vector>& heap, 160 | Vector& hidden) const { 161 | if (heap.size() == k && score < heap.front().first) { 162 | return; 163 | } 164 | 165 | if (tree[node].left == -1 && tree[node].right == -1) { 166 | heap.push_back(std::make_pair(score, node)); 167 | std::push_heap(heap.begin(), heap.end(), comparePairs); 168 | if (heap.size() > k) { 169 | std::pop_heap(heap.begin(), heap.end(), comparePairs); 170 | heap.pop_back(); 171 | } 172 | return; 173 | } 174 | 175 | real f = utils::sigmoid(wo_->dotRow(hidden, node - osz_)); 176 | dfs(k, tree[node].left, score + utils::log(1.0 - f), heap, hidden); 177 | dfs(k, tree[node].right, score + utils::log(f), heap, hidden); 178 | } 179 | 180 | void Model::update(const std::vector& input, int32_t target, real lr) { 181 | assert(target >= 0); 182 | assert(target < osz_); 183 | if (input.size() == 0) return; 184 | computeHidden(input, hidden_); 185 | if (args_->loss == loss_name::ns) { 186 | loss_ += negativeSampling(target, lr); 187 | } else if (args_->loss == loss_name::hs) { 188 | loss_ += hierarchicalSoftmax(target, lr); 189 | } else { 190 | loss_ += softmax(target, lr); 191 | } 192 | nexamples_ += 1; 193 | 194 | if (args_->model == model_name::sup) { 195 | grad_.mul(1.0 / input.size()); 196 | } 197 | for (auto it = input.cbegin(); it != input.cend(); ++it) { 198 | wi_->addRow(grad_, *it, 1.0); 199 | } 200 | } 201 | 202 | void Model::setTargetCounts(const std::vector& counts) { 203 | assert(counts.size() == osz_); 204 | if (args_->loss == loss_name::ns) { 205 | initTableNegatives(counts); 206 | } 207 | if (args_->loss == loss_name::hs) { 208 | buildTree(counts); 209 | } 210 | } 211 | 212 | void Model::initTableNegatives(const std::vector& counts) { 213 | real z = 0.0; 214 | for (size_t i = 0; i < counts.size(); i++) { 215 | z += pow(counts[i], 0.5); 216 | } 217 | for (size_t i = 0; i < counts.size(); i++) { 218 | real c = pow(counts[i], 0.5); 219 | for (size_t j = 0; j < c * NEGATIVE_TABLE_SIZE / z; j++) { 220 | negatives.push_back(i); 221 | } 222 | } 223 | std::shuffle(negatives.begin(), negatives.end(), rng); 224 | } 225 | 226 | int32_t Model::getNegative(int32_t target) { 227 | int32_t negative; 228 | do { 229 | negative = negatives[negpos]; 230 | negpos = (negpos + 1) % negatives.size(); 231 | } while (target == negative); 232 | return negative; 233 | } 234 | 235 | void Model::buildTree(const std::vector& counts) { 236 | tree.resize(2 * osz_ - 1); 237 | for (int32_t i = 0; i < 2 * osz_ - 1; i++) { 238 | tree[i].parent = -1; 239 | tree[i].left = -1; 240 | tree[i].right = -1; 241 | tree[i].count = 1e15; 242 | tree[i].binary = false; 243 | } 244 | for (int32_t i = 0; i < osz_; i++) { 245 | tree[i].count = counts[i]; 246 | } 247 | int32_t leaf = osz_ - 1; 248 | int32_t node = osz_; 249 | for (int32_t i = osz_; i < 2 * osz_ - 1; i++) { 250 | int32_t mini[2]; 251 | for (int32_t j = 0; j < 2; j++) { 252 | if (leaf >= 0 && tree[leaf].count < tree[node].count) { 253 | mini[j] = leaf--; 254 | } else { 255 | mini[j] = node++; 256 | } 257 | } 258 | tree[i].left = mini[0]; 259 | tree[i].right = mini[1]; 260 | tree[i].count = tree[mini[0]].count + tree[mini[1]].count; 261 | tree[mini[0]].parent = i; 262 | tree[mini[1]].parent = i; 263 | tree[mini[1]].binary = true; 264 | } 265 | for (int32_t i = 0; i < osz_; i++) { 266 | std::vector path; 267 | std::vector code; 268 | int32_t j = i; 269 | while (tree[j].parent != -1) { 270 | path.push_back(tree[j].parent - osz_); 271 | code.push_back(tree[j].binary); 272 | j = tree[j].parent; 273 | } 274 | paths.push_back(path); 275 | codes.push_back(code); 276 | } 277 | } 278 | 279 | real Model::getLoss() const { 280 | return loss_ / nexamples_; 281 | } 282 | 283 | } 284 | -------------------------------------------------------------------------------- /src/model.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_MODEL_H 11 | #define FASTTEXT_MODEL_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "args.h" 19 | #include "matrix.h" 20 | #include "vector.h" 21 | #include "real.h" 22 | 23 | namespace fasttext { 24 | 25 | struct Node { 26 | int32_t parent; 27 | int32_t left; 28 | int32_t right; 29 | int64_t count; 30 | bool binary; 31 | }; 32 | 33 | class Model { 34 | private: 35 | std::shared_ptr wi_; 36 | std::shared_ptr wo_; 37 | std::shared_ptr args_; 38 | Vector hidden_; 39 | Vector output_; 40 | Vector grad_; 41 | int32_t hsz_; 42 | int32_t isz_; 43 | int32_t osz_; 44 | real loss_; 45 | int64_t nexamples_; 46 | 47 | static bool comparePairs(const std::pair&, 48 | const std::pair&); 49 | 50 | std::vector negatives; 51 | size_t negpos; 52 | 53 | int32_t getNegative(int32_t target); 54 | 55 | std::vector< std::vector > paths; 56 | std::vector< std::vector > codes; 57 | std::vector tree; 58 | 59 | static const int32_t NEGATIVE_TABLE_SIZE = 10000000; 60 | 61 | public: 62 | Model(std::shared_ptr, std::shared_ptr, 63 | std::shared_ptr, int32_t); 64 | 65 | real binaryLogistic(int32_t, bool, real); 66 | real negativeSampling(int32_t, real); 67 | real hierarchicalSoftmax(int32_t, real); 68 | real softmax(int32_t, real); 69 | 70 | void predict(const std::vector&, int32_t, 71 | std::vector>&, 72 | Vector&, Vector&) const; 73 | void predict(const std::vector&, int32_t, 74 | std::vector>&); 75 | void dfs(int32_t, int32_t, real, 76 | std::vector>&, 77 | Vector&) const; 78 | void findKBest(int32_t, std::vector>&, 79 | Vector&, Vector&) const; 80 | void update(const std::vector&, int32_t, real); 81 | void computeHidden(const std::vector&, Vector&) const; 82 | void computeOutputSoftmax(Vector&, Vector&) const; 83 | void computeOutputSoftmax(); 84 | 85 | void setTargetCounts(const std::vector&); 86 | void initTableNegatives(const std::vector&); 87 | void buildTree(const std::vector&); 88 | real getLoss() const; 89 | 90 | std::minstd_rand rng; 91 | }; 92 | 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /src/real.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_REAL_H 11 | #define FASTTEXT_REAL_H 12 | 13 | namespace fasttext { 14 | 15 | typedef float real; 16 | 17 | } 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/utils.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "utils.h" 16 | 17 | #include 18 | #include 19 | 20 | namespace fasttext { 21 | 22 | namespace utils { 23 | real* t_sigmoid = nullptr; 24 | real* t_log = nullptr; 25 | 26 | real log(real x) { 27 | if (x > 1.0) { 28 | return 0.0; 29 | } 30 | int i = int(x * LOG_TABLE_SIZE); 31 | return t_log[i]; 32 | } 33 | 34 | real sigmoid(real x) { 35 | if (x < -MAX_SIGMOID) { 36 | return 0.0; 37 | } else if (x > MAX_SIGMOID) { 38 | return 1.0; 39 | } else { 40 | int i = int((x + MAX_SIGMOID) * SIGMOID_TABLE_SIZE / MAX_SIGMOID / 2); 41 | return t_sigmoid[i]; 42 | } 43 | } 44 | 45 | void initTables() { 46 | initSigmoid(); 47 | initLog(); 48 | } 49 | 50 | void initSigmoid() { 51 | if (t_sigmoid != nullptr) return; 52 | t_sigmoid = new real[SIGMOID_TABLE_SIZE + 1]; 53 | for (int i = 0; i < SIGMOID_TABLE_SIZE + 1; i++) { 54 | real x = real(i * 2 * MAX_SIGMOID) / SIGMOID_TABLE_SIZE - MAX_SIGMOID; 55 | t_sigmoid[i] = 1.0 / (1.0 + std::exp(-x)); 56 | } 57 | } 58 | 59 | void initLog() { 60 | if (t_log != nullptr) return; 61 | t_log = new real[LOG_TABLE_SIZE + 1]; 62 | for (int i = 0; i < LOG_TABLE_SIZE + 1; i++) { 63 | real x = (real(i) + 1e-5) / LOG_TABLE_SIZE; 64 | t_log[i] = std::log(x); 65 | } 66 | } 67 | 68 | void freeTables() { 69 | delete[] t_sigmoid; 70 | delete[] t_log; 71 | t_sigmoid = nullptr; 72 | t_log = nullptr; 73 | } 74 | 75 | int64_t size(std::ifstream& ifs) { 76 | ifs.seekg(std::streamoff(0), std::ios::end); 77 | return ifs.tellg(); 78 | } 79 | 80 | void seek(std::ifstream& ifs, int64_t pos) { 81 | ifs.clear(); 82 | ifs.seekg(std::streampos(pos)); 83 | } 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_UTILS_H 11 | #define FASTTEXT_UTILS_H 12 | 13 | #include 14 | 15 | #include "real.h" 16 | 17 | #define SIGMOID_TABLE_SIZE 512 18 | #define MAX_SIGMOID 8 19 | #define LOG_TABLE_SIZE 512 20 | 21 | namespace fasttext { 22 | 23 | namespace utils { 24 | 25 | real log(real); 26 | real sigmoid(real); 27 | 28 | void initTables(); 29 | void initSigmoid(); 30 | void initLog(); 31 | void freeTables(); 32 | 33 | int64_t size(std::ifstream&); 34 | void seek(std::ifstream&, int64_t); 35 | } 36 | 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/vector.cc: -------------------------------------------------------------------------------- 1 | # include 2 | // [[Rcpp::depends("RcppArmadillo")]] 3 | // [[Rcpp::plugins(openmp)]] 4 | // [[Rcpp::plugins(cpp11)]] 5 | 6 | /** 7 | * Copyright (c) 2016-present, Facebook, Inc. 8 | * All rights reserved. 9 | * 10 | * This source code is licensed under the BSD-style license found in the 11 | * LICENSE file in the root directory of this source tree. An additional grant 12 | * of patent rights can be found in the PATENTS file in the same directory. 13 | */ 14 | 15 | #include "vector.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "matrix.h" 22 | #include "utils.h" 23 | 24 | namespace fasttext { 25 | 26 | Vector::Vector(int64_t m) { 27 | m_ = m; 28 | data_ = new real[m]; 29 | } 30 | 31 | Vector::~Vector() { 32 | delete[] data_; 33 | } 34 | 35 | int64_t Vector::size() const { 36 | return m_; 37 | } 38 | 39 | void Vector::zero() { 40 | for (int64_t i = 0; i < m_; i++) { 41 | data_[i] = 0.0; 42 | } 43 | } 44 | 45 | void Vector::mul(real a) { 46 | for (int64_t i = 0; i < m_; i++) { 47 | data_[i] *= a; 48 | } 49 | } 50 | 51 | void Vector::addRow(const Matrix& A, int64_t i) { 52 | assert(i >= 0); 53 | assert(i < A.m_); 54 | assert(m_ == A.n_); 55 | for (int64_t j = 0; j < A.n_; j++) { 56 | data_[j] += A.data_[i * A.n_ + j]; 57 | } 58 | } 59 | 60 | void Vector::addRow(const Matrix& A, int64_t i, real a) { 61 | assert(i >= 0); 62 | assert(i < A.m_); 63 | assert(m_ == A.n_); 64 | for (int64_t j = 0; j < A.n_; j++) { 65 | data_[j] += a * A.data_[i * A.n_ + j]; 66 | } 67 | } 68 | 69 | void Vector::mul(const Matrix& A, const Vector& vec) { 70 | assert(A.m_ == m_); 71 | assert(A.n_ == vec.m_); 72 | for (int64_t i = 0; i < m_; i++) { 73 | data_[i] = 0.0; 74 | for (int64_t j = 0; j < A.n_; j++) { 75 | data_[i] += A.data_[i * A.n_ + j] * vec.data_[j]; 76 | } 77 | } 78 | } 79 | 80 | int64_t Vector::argmax() { 81 | real max = data_[0]; 82 | int64_t argmax = 0; 83 | for (int64_t i = 1; i < m_; i++) { 84 | if (data_[i] > max) { 85 | max = data_[i]; 86 | argmax = i; 87 | } 88 | } 89 | return argmax; 90 | } 91 | 92 | real& Vector::operator[](int64_t i) { 93 | return data_[i]; 94 | } 95 | 96 | const real& Vector::operator[](int64_t i) const { 97 | return data_[i]; 98 | } 99 | 100 | std::ostream& operator<<(std::ostream& os, const Vector& v) 101 | { 102 | os << std::setprecision(5); 103 | for (int64_t j = 0; j < v.m_; j++) { 104 | os << v.data_[j] << ' '; 105 | } 106 | return os; 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /src/vector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. An additional grant 7 | * of patent rights can be found in the PATENTS file in the same directory. 8 | */ 9 | 10 | #ifndef FASTTEXT_VECTOR_H 11 | #define FASTTEXT_VECTOR_H 12 | 13 | #include 14 | #include 15 | 16 | #include "real.h" 17 | 18 | namespace fasttext { 19 | 20 | class Matrix; 21 | 22 | class Vector { 23 | 24 | public: 25 | int64_t m_; 26 | real* data_; 27 | 28 | explicit Vector(int64_t); 29 | ~Vector(); 30 | 31 | real& operator[](int64_t); 32 | const real& operator[](int64_t) const; 33 | 34 | int64_t size() const; 35 | void zero(); 36 | void mul(real); 37 | void addRow(const Matrix&, int64_t); 38 | void addRow(const Matrix&, int64_t, real); 39 | void mul(const Matrix&, const Vector&); 40 | int64_t argmax(); 41 | }; 42 | 43 | std::ostream& operator<<(std::ostream&, const Vector&); 44 | 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(fastTextR) 3 | 4 | test_check("fastTextR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-wrapper_fasttext.R: -------------------------------------------------------------------------------- 1 | 2 | if (.Platform$OS.type == "windows") { 3 | 4 | paste_delim = "\\" 5 | } 6 | 7 | if (.Platform$OS.type == "unix") { 8 | 9 | paste_delim = "/" 10 | } 11 | 12 | 13 | context('fasttext functions') 14 | 15 | 16 | #-------------------------- 17 | # 'skipgram_cbow' function 18 | #-------------------------- 19 | 20 | 21 | testthat::test_that("it returns an error if the input_path parameter is invalid", { 22 | 23 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data1", "doc.txt"), collapse = paste_delim))) 24 | 25 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 26 | 27 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 28 | 29 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 30 | 31 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 32 | }) 33 | 34 | 35 | 36 | testthat::test_that("it returns an error if the output_path parameter is invalid", { 37 | 38 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 39 | 40 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data1", "model"), collapse = paste_delim))) 41 | 42 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 43 | 44 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 45 | 46 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 47 | }) 48 | 49 | 50 | 51 | testthat::test_that("it returns an error if the method parameter is not one of c('skipgram', 'cbow')", { 52 | 53 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 54 | 55 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 56 | 57 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "unknown", lr = 0.1, lrUpdateRate = 100, dim = 10, 58 | 59 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 60 | 61 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 62 | }) 63 | 64 | 65 | testthat::test_that("it returns an error if the learning rate parameter is less than or equal to 0.0", { 66 | 67 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 68 | 69 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 70 | 71 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.0, lrUpdateRate = 100, dim = 10, 72 | 73 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 74 | 75 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 76 | }) 77 | 78 | 79 | 80 | testthat::test_that("it returns an error if the lrUpdateRate parameter is less than or equal to 0.0", { 81 | 82 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 83 | 84 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 85 | 86 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 0, dim = 10, 87 | 88 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 89 | 90 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 91 | }) 92 | 93 | 94 | testthat::test_that("it returns an error if the dim parameter is less than or equal to 0.0", { 95 | 96 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 97 | 98 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 99 | 100 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 0, 101 | 102 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 103 | 104 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 105 | }) 106 | 107 | 108 | testthat::test_that("it returns an error if the ws parameter is less than or equal to 0.0", { 109 | 110 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 111 | 112 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 113 | 114 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 115 | 116 | ws = 0, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 117 | 118 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 119 | }) 120 | 121 | 122 | 123 | testthat::test_that("it returns an error if the epoch parameter is less than or equal to 0.0", { 124 | 125 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 126 | 127 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 128 | 129 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 130 | 131 | ws = 5, epoch = 0, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 132 | 133 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 134 | }) 135 | 136 | 137 | 138 | testthat::test_that("it returns an error if the minCount parameter is less than or equal to 0.0", { 139 | 140 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 141 | 142 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 143 | 144 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 145 | 146 | ws = 5, epoch = 5, minCount = 0, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 147 | 148 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 149 | }) 150 | 151 | 152 | 153 | testthat::test_that("it returns an error if the neg parameter is less than or equal to 0.0", { 154 | 155 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 156 | 157 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 158 | 159 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 160 | 161 | ws = 5, epoch = 5, minCount = 1, neg = 0, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 162 | 163 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 164 | }) 165 | 166 | 167 | 168 | testthat::test_that("it returns an error if the wordNgrams parameter is less than or equal to 0.0", { 169 | 170 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 171 | 172 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 173 | 174 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 175 | 176 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 0, loss = "ns", bucket = 2000000, minn = 0, 177 | 178 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 179 | }) 180 | 181 | 182 | 183 | testthat::test_that("it returns an error if the loss parameter is not one of c('ns', 'hs', 'softmax')", { 184 | 185 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 186 | 187 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 188 | 189 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 190 | 191 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "unknown", bucket = 2000000, minn = 0, 192 | 193 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 194 | }) 195 | 196 | 197 | testthat::test_that("it returns an error if the bucket parameter is less than or equal to 0.0", { 198 | 199 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 200 | 201 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 202 | 203 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 204 | 205 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 0, minn = 0, 206 | 207 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 208 | }) 209 | 210 | 211 | 212 | 213 | testthat::test_that("it returns an error if the minn parameter is less than 0", { 214 | 215 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 216 | 217 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 218 | 219 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 220 | 221 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = -1, 222 | 223 | maxn = 0, thread = 6, t = 0.0001, verbose = 2) ) 224 | }) 225 | 226 | 227 | 228 | testthat::test_that("it returns an error if the maxn parameter is less than 0", { 229 | 230 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 231 | 232 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 233 | 234 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 235 | 236 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 237 | 238 | maxn = -1, thread = 6, t = 0.0001, verbose = 2) ) 239 | }) 240 | 241 | 242 | 243 | testthat::test_that("it returns an error if the thread parameter is less than 1", { 244 | 245 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 246 | 247 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 248 | 249 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 250 | 251 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 252 | 253 | maxn = 0, thread = 0, t = 0.0001, verbose = 2) ) 254 | }) 255 | 256 | 257 | 258 | testthat::test_that("it returns an error if the t parameter is less than or equal to 0.0", { 259 | 260 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 261 | 262 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 263 | 264 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 265 | 266 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 267 | 268 | maxn = 0, thread = 1, t = 0.0, verbose = 2) ) 269 | }) 270 | 271 | 272 | 273 | testthat::test_that("it returns an error if the verbose parameter is less 0", { 274 | 275 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 276 | 277 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 278 | 279 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 280 | 281 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 282 | 283 | maxn = 0, thread = 1, t = 0.0001, verbose = -1) ) 284 | }) 285 | 286 | 287 | 288 | testthat::test_that("it returns an error if the verbose parameter is greater than 2", { 289 | 290 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 291 | 292 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 293 | 294 | testthat::expect_error( skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 295 | 296 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 297 | 298 | maxn = 0, thread = 1, t = 0.0001, verbose = 3) ) 299 | }) 300 | 301 | 302 | 303 | testthat::test_that("it saves the output to a file if all parameters are valid", { 304 | 305 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "doc.txt"), collapse = paste_delim))) 306 | 307 | path_out = paste0(getwd(), path.expand(paste(c("", "test_data", "model"), collapse = paste_delim))) 308 | 309 | res = skipgram_cbow(input_path = path_in, output_path = path_out, method = "skipgram", lr = 0.1, lrUpdateRate = 100, dim = 10, 310 | 311 | ws = 5, epoch = 5, minCount = 1, neg = 5, wordNgrams = 1, loss = "ns", bucket = 2000000, minn = 0, 312 | 313 | maxn = 0, thread = 1, t = 0.0001, verbose = 0) 314 | 315 | testthat::expect_silent(res) 316 | }) 317 | 318 | 319 | 320 | 321 | #--------------------------------- 322 | # 'predict_unknown_words' function 323 | #--------------------------------- 324 | 325 | 326 | testthat::test_that("it returns an error if the skipgram_cbow_model_output parameter is not a valid character string path", { 327 | 328 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = NULL) ) 329 | }) 330 | 331 | 332 | testthat::test_that("it returns an error if the skipgram_cbow_model_output parameter is not a valid character string path", { 333 | 334 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim))) 335 | 336 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = NULL) ) 337 | }) 338 | 339 | 340 | testthat::test_that("it returns an error if the unknown_words_path parameter is not a valid character string path", { 341 | 342 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim))) 343 | 344 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim))) 345 | 346 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = NULL) ) 347 | }) 348 | 349 | 350 | testthat::test_that("it returns an error if the output_path parameter is not a valid character string path", { 351 | 352 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim))) 353 | 354 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim))) 355 | 356 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = list()) ) 357 | }) 358 | 359 | 360 | testthat::test_that("it returns an error if the verbose parameter is not a boolean", { 361 | 362 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim))) 363 | 364 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim))) 365 | 366 | path_res_vecs = paste0(getwd(), path.expand(paste(c("", "test_data", "VECS.txt"), collapse = paste_delim))) 367 | 368 | testthat::expect_error( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = path_res_vecs, verbose = 'FALSE') ) 369 | }) 370 | 371 | 372 | 373 | testthat::test_that("it returns word vectors to the specified folder", { 374 | 375 | path_in = paste0(getwd(), path.expand(paste(c("", "test_data", "model.bin"), collapse = paste_delim))) 376 | 377 | path_unkn = paste0(getwd(), path.expand(paste(c("", "test_data", "queries.txt"), collapse = paste_delim))) 378 | 379 | path_res_vecs = paste0(getwd(), path.expand(paste(c("", "test_data", "VECS"), collapse = paste_delim))) 380 | 381 | testthat::expect_silent( predict_unknown_words(skipgram_cbow_model_output = path_in, unknown_words_path = path_unkn, output_path = path_res_vecs, verbose = FALSE) ) 382 | }) 383 | 384 | -------------------------------------------------------------------------------- /tests/testthat/test_data/model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlampros/fastTextR/2cd6fef56d400128779a2079b13b92a094baefeb/tests/testthat/test_data/model.bin -------------------------------------------------------------------------------- /tests/testthat/test_data/queries.txt: -------------------------------------------------------------------------------- 1 | squishing 2 | squirt 3 | undated 4 | undatable 5 | circumvents 6 | beat 7 | circumvents 8 | ebb 9 | dispossess 10 | deprive 11 | provincialism 12 | narrow-mindedness 13 | provincialism 14 | partiality 15 | instrumentality 16 | department 17 | instrumentality 18 | utility 19 | involvement 20 | action 21 | involvement 22 | implication 23 | ecclesiastic 24 | clergyman 25 | brigadier 26 | general 27 | carbonic 28 | chemical 29 | carbonic 30 | paper 31 | aspirate 32 | pronounce 33 | aspirate 34 | remove 35 | monotype 36 | machine 37 | incommensurate 38 | incommensurable 39 | campfires 40 | fire 41 | cognizance 42 | knowing 43 | urbanize 44 | change 45 | imperfection 46 | state 47 | assessment 48 | charge 49 | assessment 50 | assay 51 | incubate 52 | breed 53 | incubate 54 | develop 55 | principality 56 | domain 57 | vicarious 58 | abnormal 59 | vicarious 60 | secondary 61 | ungraceful 62 | awkward 63 | unsighted 64 | color-blind 65 | socialise 66 | educate 67 | socialise 68 | swing 69 | diagonals 70 | line 71 | diagonals 72 | heterosexual 73 | naturalise 74 | adapt 75 | insubordinate 76 | rebellious 77 | insubordinate 78 | defiant 79 | subdividing 80 | subdivide 81 | subdividing 82 | separate 83 | antifeminism 84 | sexism 85 | circumcising 86 | cut 87 | circumcising 88 | remove 89 | excommunicate 90 | oust 91 | accomplished 92 | over 93 | attackers 94 | wrongdoer 95 | contravened 96 | disagree 97 | contravened 98 | transgress 99 | tenderize 100 | change 101 | blithering 102 | chatter 103 | resurfacing 104 | coat 105 | resurfacing 106 | surface 107 | friendships 108 | brotherhood 109 | soulfully 110 | emotional 111 | elector 112 | voter 113 | intentionality 114 | intended 115 | vulgarism 116 | profanity 117 | vulgarism 118 | inelegance 119 | preliterate 120 | illiterate 121 | preliterate 122 | noncivilized 123 | retrying 124 | hear 125 | wanderers 126 | program 127 | wanderers 128 | nomad 129 | marginalize 130 | interact 131 | hyperlink 132 | link 133 | inducted 134 | admit 135 | inducted 136 | install 137 | entrapping 138 | capture 139 | entrapping 140 | deceive 141 | alleviated 142 | comfort 143 | alleviated 144 | help 145 | radiators 146 | beginning 147 | radiators 148 | system 149 | postmodernism 150 | genre 151 | excavations 152 | site 153 | excavations 154 | removal 155 | comfortable 156 | cozy 157 | unfeathered 158 | unfledged 159 | unfeathered 160 | plucked 161 | assigned 162 | regiment 163 | assigned 164 | allow 165 | listeners 166 | eavesdropper 167 | unheralded 168 | unexpected 169 | inabilities 170 | insufficiency 171 | inabilities 172 | incomprehension 173 | monocultures 174 | culture 175 | tricolour 176 | flag 177 | omnipotence 178 | state 179 | mingles 180 | change 181 | calcify 182 | harden 183 | calcify 184 | change 185 | disinheritance 186 | discontinuance 187 | interwove 188 | braid 189 | cession 190 | relinquishment 191 | dwarfish 192 | small 193 | assessments 194 | charge 195 | assessments 196 | classification 197 | prescriptions 198 | medicine 199 | prescriptions 200 | direction 201 | antipsychotic 202 | lithium 203 | circumferential 204 | peripheral 205 | roosters 206 | cockerel 207 | nonpublic 208 | private 209 | yodeling 210 | sing 211 | yodeling 212 | singing 213 | autoerotic 214 | sexy 215 | unisons 216 | concurrence 217 | unisons 218 | agreement 219 | disassembled 220 | destroy 221 | preteens 222 | juvenile 223 | unaccessible 224 | pathless 225 | monogram 226 | symbol 227 | opalescence 228 | brightness 229 | estrogenic 230 | hormone 231 | misleading 232 | beat 233 | consubstantial 234 | considerable 235 | coeducation 236 | education 237 | canonical 238 | standard 239 | galvanic 240 | exciting 241 | nominated 242 | nominate 243 | nominated 244 | choose 245 | exceedance 246 | probability 247 | confide 248 | consign 249 | confide 250 | unwrap 251 | established 252 | initiate 253 | condescend 254 | act 255 | tricycle 256 | pedicab 257 | discharged 258 | spread 259 | postmodernist 260 | artist 261 | ruralist 262 | rustic 263 | ruralist 264 | advocate 265 | conjurors 266 | enchantress 267 | detestable 268 | offensive 269 | detestable 270 | hateful 271 | comparing 272 | compare 273 | comparing 274 | analogize 275 | marketers 276 | selling 277 | hypercoaster 278 | roller 279 | pittance 280 | payment 281 | soulless 282 | insensitive 283 | hypermarket 284 | supermarket 285 | confluent 286 | branch 287 | confluent 288 | convergent 289 | anterooms 290 | building 291 | reasoning 292 | deduce 293 | reasoning 294 | re-argue 295 | summonings 296 | page 297 | summonings 298 | demand 299 | preordained 300 | predetermine 301 | antechamber 302 | room 303 | concavity 304 | shape 305 | concavity 306 | recess 307 | unzipping 308 | unfasten 309 | spoonful 310 | containerful 311 | partible 312 | divisible 313 | hypersensitive 314 | susceptible 315 | impurity 316 | adulteration 317 | impurity 318 | waste 319 | inscribe 320 | engrave 321 | virginals 322 | harpsichord 323 | hypertexts 324 | database 325 | inheritances 326 | acquisition 327 | infectious 328 | septic 329 | infectious 330 | contagious 331 | pinpointed 332 | locate 333 | capitalised 334 | profit 335 | capitalised 336 | supply 337 | retrace 338 | return 339 | deadness 340 | quality 341 | deadness 342 | inelasticity 343 | conformism 344 | legalism 345 | tripods 346 | tripod 347 | lastingly 348 | wear 349 | lastingly 350 | populate 351 | unexpected 352 | unannounced 353 | interlink 354 | intercommunicate 355 | interlink 356 | connect 357 | brained 358 | kill 359 | brained 360 | hit 361 | unicycles 362 | wheel 363 | unicycles 364 | bicycle 365 | preservers 366 | cook 367 | preservers 368 | worker 369 | autografts 370 | graft 371 | retarding 372 | decelerate 373 | retarding 374 | stay 375 | subfamily 376 | group 377 | encrust 378 | coat 379 | encrust 380 | decorate 381 | wingless 382 | flightless 383 | intraspecific 384 | interspecies 385 | tunneled 386 | penetrate 387 | tunneled 388 | dig 389 | suppressor 390 | gene 391 | suppressor 392 | restrainer 393 | expound 394 | elaborate 395 | expound 396 | detail 397 | brisker 398 | energetic 399 | brisker 400 | invigorating 401 | wealthy 402 | rich 403 | eventful 404 | important 405 | eventful 406 | lively 407 | edgeless 408 | dull 409 | clownish 410 | humorous 411 | inquisitor 412 | thousand 413 | inquisitor 414 | inquirer 415 | extravert 416 | extroversive 417 | finality 418 | conclusive 419 | gibberish 420 | dutch 421 | championship 422 | status 423 | championship 424 | contest 425 | indelicate 426 | indecent 427 | indelicate 428 | tasteless 429 | sheikhdoms 430 | domain 431 | flighted 432 | fly 433 | flighted 434 | shoot 435 | backwardness 436 | idiocy 437 | discontinuous 438 | disjunct 439 | cheapen 440 | devalue 441 | flatulence 442 | physical 443 | venomous 444 | toxic 445 | venomous 446 | malicious 447 | enshrouded 448 | envelop 449 | impotently 450 | ineffective 451 | deviationism 452 | desertion 453 | capitation 454 | tax 455 | denominate 456 | label 457 | illiberal 458 | narrow-minded 459 | periodical 460 | nightly 461 | periodical 462 | publication 463 | distillate 464 | liquid 465 | returning 466 | bounce 467 | dictatorship 468 | state 469 | disfavoring 470 | prejudice 471 | postglacial 472 | cold 473 | ennobled 474 | honor 475 | anticyclones 476 | high 477 | cylindrical 478 | rounded 479 | nonpolitical 480 | apolitical 481 | circumference 482 | size 483 | repositions 484 | move 485 | repositions 486 | reduce 487 | librarianship 488 | position 489 | conductive 490 | semiconducting 491 | bounced 492 | skip 493 | bounced 494 | bounce 495 | entreaty 496 | request 497 | convertible 498 | car 499 | convertible 500 | security 501 | constitutive 502 | essential 503 | kindergarteners 504 | child 505 | angrier 506 | huffy 507 | angrier 508 | stormy 509 | defiles 510 | mar 511 | defiles 512 | spot 513 | hankering 514 | desire 515 | hankering 516 | longing 517 | circumnavigate 518 | circle 519 | criticality 520 | juncture 521 | criticality 522 | urgency 523 | mistrustful 524 | distrustful 525 | presuppose 526 | imply 527 | presuppose 528 | premise 529 | mayoralty 530 | position 531 | companionships 532 | friendship 533 | primates 534 | priest 535 | omnipotent 536 | powerful 537 | postboxes 538 | maildrop 539 | loveable 540 | desirable 541 | antedating 542 | chronologize 543 | benefited 544 | help 545 | benefited 546 | get 547 | contrastive 548 | different 549 | contrastive 550 | antonymous 551 | interned 552 | work 553 | interned 554 | confine 555 | clamorous 556 | noisy 557 | baseness 558 | unworthiness 559 | serenaded 560 | perform 561 | snookered 562 | play 563 | snookered 564 | flim-flam 565 | immeasurable 566 | incalculable 567 | immeasurable 568 | illimitable 569 | encroachments 570 | inroad 571 | encroachments 572 | entrance 573 | deregulating 574 | liberation 575 | deregulating 576 | exempt 577 | acceptable 578 | satisfactory 579 | acceptable 580 | standard 581 | sentenced 582 | declare 583 | shrieks 584 | shout 585 | shrieks 586 | cry 587 | nonviable 588 | dead 589 | papered 590 | cover 591 | territorials 592 | soldier 593 | territorials 594 | guard 595 | publicise 596 | tell 597 | reenact 598 | re-create 599 | reenact 600 | ordain 601 | interstellar 602 | major 603 | scattered 604 | separate 605 | transmigrating 606 | immigrate 607 | transmigrating 608 | born 609 | associations 610 | southern 611 | associations 612 | sociable 613 | admiralty 614 | department 615 | admiralty 616 | position 617 | autobiographer 618 | biographer 619 | planners 620 | schemer 621 | planners 622 | notebook 623 | supplement 624 | constitute 625 | supplement 626 | leverage 627 | combusts 628 | blow 629 | combusts 630 | ablaze 631 | brightness 632 | intelligence 633 | brightness 634 | radiance 635 | producing 636 | together 637 | reserve 638 | assign 639 | reserve 640 | withhold 641 | unflagging 642 | constant 643 | unflagging 644 | energetic 645 | preschooler 646 | child 647 | baggers 648 | machine 649 | baggers 650 | workman 651 | willingness 652 | wholeheartedness 653 | unacceptable 654 | unsatisfactory 655 | unacceptable 656 | unwelcome 657 | directionless 658 | purposeless 659 | replications 660 | reproduction 661 | replications 662 | procedure 663 | retrials 664 | trial 665 | venders 666 | selling 667 | fantasist 668 | creator 669 | interlinks 670 | intercommunicate 671 | interlinks 672 | connect 673 | adversely 674 | unfavorable 675 | repulses 676 | disgust 677 | repulses 678 | fight 679 | humanness 680 | quality 681 | autofocus 682 | optical 683 | conversely 684 | interview 685 | conversely 686 | proposition 687 | ceaseless 688 | continuous 689 | hybridise 690 | breed 691 | antitumor 692 | brain 693 | parallelism 694 | similarity 695 | sightedness 696 | sight 697 | battleships 698 | dreadnought 699 | subarctic 700 | polar 701 | subarctic 702 | overshoe 703 | sufferance 704 | self 705 | uncomprehending 706 | undiscerning 707 | regretful 708 | penitent 709 | monoplanes 710 | airplane 711 | steepen 712 | change 713 | transfuse 714 | breathe 715 | transfuse 716 | pour 717 | hyperextension 718 | extension 719 | amazings 720 | surprise 721 | amazings 722 | stump 723 | perished 724 | change 725 | hilarity 726 | gaiety 727 | appearance 728 | apparition 729 | transmissible 730 | infectious 731 | transmissible 732 | inheritable 733 | wheaten 734 | source 735 | magnetize 736 | charm 737 | magnetize 738 | change 739 | militarize 740 | change 741 | circumspect 742 | prudent 743 | translocate 744 | transfer 745 | macroevolution 746 | evolution 747 | circumvented 748 | attack 749 | circumvented 750 | surpass 751 | adventism 752 | christianity 753 | breather 754 | submarine 755 | breather 756 | respite 757 | disabused 758 | inform 759 | contravene 760 | deny 761 | contravene 762 | transgress 763 | transducers 764 | device 765 | icelandic 766 | scandinavian 767 | uncertainty 768 | speculativeness 769 | disengages 770 | unclog 771 | painkillers 772 | hydrochloride 773 | associational 774 | legion 775 | associational 776 | affiliation 777 | luxuriance 778 | abundance 779 | vacations 780 | spend 781 | chooses 782 | compare 783 | chooses 784 | decide 785 | enunciated 786 | state 787 | cosponsoring 788 | sponsor 789 | impeded 790 | obstruct 791 | impeded 792 | dam 793 | irremovable 794 | tenured 795 | strangers 796 | person 797 | utilitarianism 798 | doctrine 799 | puffery 800 | flattery 801 | noncitizens 802 | traveler 803 | monsignori 804 | priest 805 | refered 806 | apply 807 | refered 808 | remember 809 | macrocosmic 810 | large 811 | functionality 812 | practicality 813 | spoonfuls 814 | containerful 815 | instructorship 816 | position 817 | approved 818 | authorize 819 | approved 820 | rubberstamp 821 | recorders 822 | box 823 | recorders 824 | official 825 | headship 826 | position 827 | credentials 828 | document 829 | credentials 830 | certificate 831 | enunciating 832 | state 833 | enunciating 834 | round 835 | caramelize 836 | convert 837 | cosigns 838 | validate 839 | cosigns 840 | endorse 841 | deformity 842 | appearance 843 | responsible 844 | causative 845 | undisputable 846 | undeniable 847 | reassess 848 | measure 849 | colonise 850 | settle 851 | subserve 852 | help 853 | religionist 854 | person 855 | sanctioned 856 | empower 857 | sanctioned 858 | back 859 | suggestible 860 | susceptible 861 | warmness 862 | protectiveness 863 | warmness 864 | hotness 865 | relates 866 | focus 867 | relates 868 | remember 869 | cardinality 870 | number 871 | rotational 872 | transformation 873 | rotational 874 | circumvolution 875 | totalism 876 | political 877 | irrationality 878 | insanity 879 | absorbance 880 | density 881 | intracerebral 882 | emotional 883 | disjoined 884 | separate 885 | intramuscular 886 | powerful 887 | endangerment 888 | hazard 889 | decomposition 890 | fragmentation 891 | decomposition 892 | algebra 893 | autobiographies 894 | memoir 895 | characterless 896 | ordinary 897 | dissenters 898 | conscientious 899 | subspecies 900 | group 901 | irreproducible 902 | unrepeatable 903 | cosigned 904 | validate 905 | cosigned 906 | endorse 907 | embellishment 908 | expansion 909 | encyclopaedic 910 | comprehensive 911 | indispensable 912 | critical 913 | indispensable 914 | necessary 915 | fractures 916 | destroy 917 | fractures 918 | pervert 919 | entraps 920 | gin 921 | entraps 922 | deceive 923 | anamorphosis 924 | evolution 925 | anamorphosis 926 | copy 927 | dispersive 928 | distributive 929 | smoothen 930 | rub 931 | interpreter 932 | person 933 | interpreter 934 | symbolist 935 | meadows 936 | grassland 937 | obtainment 938 | acquiring 939 | nonprofessional 940 | lay 941 | attendances 942 | frequency 943 | attendances 944 | appearance 945 | protraction 946 | continuance 947 | transshipped 948 | transfer 949 | entrapped 950 | capture 951 | entrapped 952 | deceive 953 | exclaiming 954 | call 955 | exclaiming 956 | declare 957 | passable 958 | satisfactory 959 | passable 960 | negotiable 961 | undetectable 962 | invisible 963 | undetectable 964 | imperceptible 965 | endurable 966 | tolerable 967 | supposed 968 | speculate 969 | supposed 970 | suspect 971 | transact 972 | bank 973 | survivalist 974 | person 975 | increasing 976 | grow 977 | increasing 978 | up 979 | fabricate 980 | make 981 | fabricate 982 | think 983 | partnership 984 | partner 985 | partnership 986 | relationship 987 | microorganism 988 | organism 989 | impossibilities 990 | unattainableness 991 | repress 992 | suppress 993 | dimensional 994 | multidimensional 995 | performance 996 | universe 997 | performance 998 | musical 999 | feudalism 1000 | organization 1001 | behaviorist 1002 | psychologist 1003 | interjection 1004 | break 1005 | interjection 1006 | exclamation 1007 | consequences 1008 | position 1009 | consequences 1010 | result 1011 | preschoolers 1012 | child 1013 | unmentionables 1014 | garment 1015 | subeditor 1016 | editor 1017 | standardize 1018 | regulate 1019 | standardize 1020 | measure 1021 | winners 1022 | walloper 1023 | persuasions 1024 | electioneering 1025 | persuasions 1026 | belief 1027 | conformations 1028 | balance 1029 | conformations 1030 | curvature 1031 | seriousness 1032 | badness 1033 | seriousness 1034 | gravity 1035 | metabolism 1036 | organic 1037 | reprints 1038 | reproduce 1039 | reprints 1040 | publication 1041 | replication 1042 | procedure 1043 | replication 1044 | copying 1045 | highjacking 1046 | robbery 1047 | highjacking 1048 | seize 1049 | repurchases 1050 | buy 1051 | sympathized 1052 | feel 1053 | unsuitable 1054 | unfit 1055 | unsuitable 1056 | irrelevant 1057 | victorious 1058 | successful 1059 | victorious 1060 | undefeated 1061 | leagued 1062 | unite 1063 | ravenous 1064 | gluttonous 1065 | ravenous 1066 | hungry 1067 | inversions 1068 | abnormality 1069 | inversions 1070 | phenomenon 1071 | flavourful 1072 | tasty 1073 | spaciousness 1074 | largeness 1075 | evidently 1076 | obvious 1077 | evidently 1078 | noticeable 1079 | reinsured 1080 | insure 1081 | crudeness 1082 | wild 1083 | crudeness 1084 | impoliteness 1085 | initialise 1086 | divide 1087 | initialise 1088 | determine 1089 | requirement 1090 | duty 1091 | requirement 1092 | thing 1093 | contortionists 1094 | acrobat 1095 | dysentery 1096 | diarrhea 1097 | occlusion 1098 | thrombosis 1099 | reenactor 1100 | actor 1101 | ulcerate 1102 | affect 1103 | ulcerate 1104 | change 1105 | exemplify 1106 | embody 1107 | exemplify 1108 | elaborate 1109 | attractor 1110 | entertainer 1111 | macroeconomists 1112 | economist 1113 | exploitive 1114 | consumptive 1115 | lectureship 1116 | position 1117 | automate 1118 | change 1119 | incorruptible 1120 | incorrupt 1121 | exacerbated 1122 | anger 1123 | exacerbated 1124 | inflame 1125 | continuously 1126 | unbroken 1127 | crusaders 1128 | warrior 1129 | crusaders 1130 | insurgent 1131 | formations 1132 | flight 1133 | formations 1134 | filing 1135 | bestowals 1136 | giving 1137 | bestowals 1138 | gift 1139 | undeviating 1140 | reliable 1141 | undeviating 1142 | direct 1143 | impassively 1144 | voice 1145 | paradoxical 1146 | inexplicable 1147 | deceitful 1148 | dishonest 1149 | commissions 1150 | equip 1151 | commissions 1152 | order 1153 | leisured 1154 | idle 1155 | unsalable 1156 | unmarketable 1157 | hypersensitivity 1158 | sensitivity 1159 | inquisitiveness 1160 | nosiness 1161 | monograms 1162 | symbol 1163 | admitting 1164 | confess 1165 | deflowering 1166 | deface 1167 | innovativeness 1168 | originality 1169 | impulsion 1170 | force 1171 | impulsion 1172 | drive 1173 | unisexual 1174 | sexual 1175 | anarchist 1176 | radical 1177 | circumcision 1178 | banquet 1179 | socialites 1180 | person 1181 | rearrangements 1182 | reordering 1183 | unquenchable 1184 | insatiate 1185 | interrelated 1186 | interrelate 1187 | interrelated 1188 | associate 1189 | synthetical 1190 | logical 1191 | entombment 1192 | funeral 1193 | kidnapped 1194 | shanghai 1195 | uproarious 1196 | humorous 1197 | uproarious 1198 | noisy 1199 | discipleship 1200 | position 1201 | vaporise 1202 | evaporate 1203 | vaporise 1204 | change 1205 | memorialize 1206 | remind 1207 | memorialize 1208 | address 1209 | personify 1210 | embody 1211 | personify 1212 | typify 1213 | inbreeding 1214 | coupling 1215 | lenience 1216 | softness 1217 | lenience 1218 | mercifulness 1219 | preposed 1220 | put 1221 | prophetical 1222 | predictive 1223 | standoffish 1224 | unapproachable 1225 | procurator 1226 | agent 1227 | excitations 1228 | arousal 1229 | excitations 1230 | fever 1231 | thoughtless 1232 | inconsiderate 1233 | untruth 1234 | statement 1235 | malfeasance 1236 | wrongdoing 1237 | supporters 1238 | trader 1239 | supporters 1240 | strength 1241 | punctuate 1242 | quote 1243 | punctuate 1244 | point 1245 | translocation 1246 | organic 1247 | translocation 1248 | procedure 1249 | deforming 1250 | change 1251 | deforming 1252 | morph 1253 | attributions 1254 | attributable 1255 | shouter 1256 | crier 1257 | excrete 1258 | make 1259 | concerti 1260 | concerto 1261 | reformism 1262 | doctrine 1263 | moisten 1264 | baste 1265 | moisten 1266 | sprinkle 1267 | inflammation 1268 | pitch 1269 | intermingles 1270 | commingle 1271 | gathering 1272 | sponge 1273 | gathering 1274 | hive 1275 | concerning 1276 | involve 1277 | deviously 1278 | indirect 1279 | deviously 1280 | untrustworthy 1281 | admittance 1282 | right 1283 | performing 1284 | improvise 1285 | performing 1286 | church 1287 | pretenders 1288 | ringer 1289 | toppled 1290 | push 1291 | toppled 1292 | over 1293 | nonconscious 1294 | unconscious 1295 | nonconscious 1296 | inanimate 1297 | meaningless 1298 | empty 1299 | immoveable 1300 | immobile 1301 | unblock 1302 | play 1303 | unblock 1304 | unstuff 1305 | rhythmicity 1306 | lilt 1307 | significances 1308 | meaning 1309 | sheepish 1310 | docile 1311 | sheepish 1312 | ashamed 1313 | immensely 1314 | large 1315 | eruptive 1316 | active 1317 | eruptive 1318 | aqueous 1319 | transvestitism 1320 | practice 1321 | royalist 1322 | monarchist 1323 | libelous 1324 | harmful 1325 | commodes 1326 | drawers 1327 | commodes 1328 | fixture 1329 | conscripting 1330 | enlist 1331 | depopulate 1332 | shrink 1333 | directional 1334 | leading 1335 | disbelieving 1336 | doubt 1337 | disbelieving 1338 | incredulous 1339 | hypervelocity 1340 | speed 1341 | interdisciplinary 1342 | nonindulgent 1343 | nonverbally 1344 | numerical 1345 | pressurise 1346 | change 1347 | measurements 1348 | viscometry 1349 | nonfunctional 1350 | run-down 1351 | severer 1352 | intense 1353 | brainless 1354 | unintelligent 1355 | marinate 1356 | steep 1357 | freighter 1358 | cargo 1359 | terrorize 1360 | coerce 1361 | terrorize 1362 | frighten 1363 | prayerful 1364 | pious 1365 | bestowal 1366 | giving 1367 | bestowal 1368 | gift 1369 | diagonal 1370 | line 1371 | diagonal 1372 | heterosexual 1373 | ingroup 1374 | bohemia 1375 | uncomfortable 1376 | comfortless 1377 | uncomfortable 1378 | disquieting 1379 | hyperlinks 1380 | link 1381 | therapeutical 1382 | acoustic 1383 | therapeutical 1384 | healthful 1385 | depreciate 1386 | deflate 1387 | intelligence 1388 | shrewdness 1389 | intelligence 1390 | agency 1391 | cynically 1392 | distrustful 1393 | autopilot 1394 | unconsciousness 1395 | enjoining 1396 | forbid 1397 | enjoining 1398 | command 1399 | reelections 1400 | election 1401 | tidings 1402 | float 1403 | tidings 1404 | ebb 1405 | transmigrated 1406 | immigrate 1407 | infeasible 1408 | impossible 1409 | rhymers 1410 | writer 1411 | germanic 1412 | scandinavian 1413 | anticancer 1414 | person 1415 | fording 1416 | traverse 1417 | fording 1418 | deep 1419 | unmolested 1420 | untroubled 1421 | covariant 1422 | variable 1423 | postposition 1424 | place 1425 | splashy 1426 | covered 1427 | splashy 1428 | ostentatious 1429 | sprouting 1430 | germinate 1431 | sprouting 1432 | grow 1433 | entwined 1434 | stitch 1435 | entwined 1436 | wreathe 1437 | hypertext 1438 | text 1439 | expressible 1440 | representable 1441 | unicyclist 1442 | pedaler 1443 | spatiality 1444 | property 1445 | acoustical 1446 | remedy 1447 | strains 1448 | trouble 1449 | strains 1450 | rack 1451 | resistor 1452 | splitter 1453 | pastorship 1454 | position 1455 | brightly 1456 | colorful 1457 | lubricate 1458 | change 1459 | lubricate 1460 | fill 1461 | hilariously 1462 | humorous 1463 | intercession 1464 | prayer 1465 | evangelicalism 1466 | revivalism 1467 | unmarried 1468 | unwed 1469 | globalise 1470 | widen 1471 | cofactor 1472 | compound 1473 | energized 1474 | enliven 1475 | energized 1476 | change 1477 | registry 1478 | register 1479 | unrealizable 1480 | impossible 1481 | dissociations 1482 | compartmentalization 1483 | dissociations 1484 | separation 1485 | griping 1486 | bite 1487 | griping 1488 | complain 1489 | quieten 1490 | hush 1491 | quieten 1492 | compose 1493 | transfigure 1494 | change 1495 | scarceness 1496 | rarity 1497 | corroding 1498 | decay 1499 | corroding 1500 | corrode 1501 | freakishly 1502 | panic 1503 | copilot 1504 | pilot 1505 | analyzed 1506 | synthesize 1507 | analyzed 1508 | survey 1509 | confinements 1510 | pregnancy 1511 | confinements 1512 | restraint 1513 | prideful 1514 | elated 1515 | prideful 1516 | proud 1517 | commode 1518 | drawers 1519 | commode 1520 | seat 1521 | fluidity 1522 | thinness 1523 | fluidity 1524 | changeableness 1525 | internationalize 1526 | control 1527 | internationalize 1528 | change 1529 | academicism 1530 | traditionalism 1531 | boisterously 1532 | spirited 1533 | boisterously 1534 | disorderly 1535 | effected 1536 | carry 1537 | effected 1538 | draw 1539 | subhead 1540 | heading 1541 | whizzed 1542 | sound 1543 | independences 1544 | independent 1545 | independences 1546 | victory 1547 | conductance 1548 | electrical 1549 | uncontrolled 1550 | rampant 1551 | selectively 1552 | exclusive 1553 | selectively 1554 | discriminating 1555 | fulfillments 1556 | satisfaction 1557 | fulfillments 1558 | self-fulfillment 1559 | premeditation 1560 | planning 1561 | stewardship 1562 | position 1563 | residing 1564 | populate 1565 | residing 1566 | stay 1567 | coefficient 1568 | self 1569 | algebraist 1570 | mathematician 1571 | drownings 1572 | extinguish 1573 | drownings 1574 | cover 1575 | encamping 1576 | populate 1577 | prostatic 1578 | criticism 1579 | prostatic 1580 | radio 1581 | violating 1582 | fly 1583 | violating 1584 | observe 1585 | remitting 1586 | transfer 1587 | orientate 1588 | reorientate 1589 | postmark 1590 | marker 1591 | postmark 1592 | stamp 1593 | establishment 1594 | organization 1595 | establishment 1596 | beginning 1597 | recitalist 1598 | soloist 1599 | institutionalize 1600 | hospitalize 1601 | verbalize 1602 | enthuse 1603 | verbalize 1604 | talk 1605 | presenters 1606 | communicator 1607 | presenters 1608 | advocate 1609 | repressing 1610 | suppress 1611 | repressing 1612 | oppress 1613 | premisses 1614 | presuppose 1615 | premisses 1616 | premise 1617 | outfoxed 1618 | surpass 1619 | gardens 1620 | sink 1621 | gardens 1622 | tend 1623 | phosphate 1624 | drink 1625 | phosphate 1626 | sodium 1627 | airship 1628 | trade 1629 | submariners 1630 | bluejacket 1631 | infectiously 1632 | canker 1633 | subsurface 1634 | submarine 1635 | extendible 1636 | long 1637 | refresher 1638 | beverage 1639 | refresher 1640 | legal 1641 | seasonable 1642 | opportune 1643 | moderatorship 1644 | position 1645 | modesty 1646 | demureness 1647 | prejudging 1648 | evaluate 1649 | roadless 1650 | inaccessible 1651 | objectifying 1652 | change 1653 | expounded 1654 | clarify 1655 | expounded 1656 | premise 1657 | nonperformance 1658 | negligence 1659 | acoustics 1660 | remedy 1661 | acoustics 1662 | physics 1663 | yellowish 1664 | chromatic 1665 | reckoner 1666 | statistician 1667 | reckoner 1668 | handbook 1669 | conscientious 1670 | careful 1671 | amounted 1672 | make 1673 | amounted 1674 | work 1675 | vegetational 1676 | growth 1677 | vegetational 1678 | forest 1679 | unfavourable 1680 | adverse 1681 | unfavourable 1682 | discriminatory 1683 | vocalism 1684 | voice 1685 | vocalism 1686 | system 1687 | continence 1688 | self-discipline 1689 | immoderate 1690 | excessive 1691 | internships 1692 | position 1693 | translunar 1694 | heavenly 1695 | ideality 1696 | quality 1697 | importance 1698 | momentousness 1699 | importance 1700 | primacy 1701 | jarringly 1702 | move 1703 | jarringly 1704 | conflict 1705 | affectional 1706 | emotional 1707 | rediscovery 1708 | discovery 1709 | microfossils 1710 | fossil 1711 | unknowing 1712 | ignorance 1713 | unknowing 1714 | uninformed 1715 | commandership 1716 | position 1717 | autoimmunity 1718 | autoimmune 1719 | undefended 1720 | vulnerable 1721 | collected 1722 | take 1723 | collected 1724 | corral 1725 | secluding 1726 | isolate 1727 | ceramicist 1728 | craftsman 1729 | teaspoonful 1730 | containerful 1731 | migrational 1732 | emigration 1733 | migrational 1734 | people 1735 | newness 1736 | brand-newness 1737 | circumscribes 1738 | content 1739 | rudderless 1740 | purposeless 1741 | internationaler 1742 | foreign 1743 | contrive 1744 | plot 1745 | contrive 1746 | make 1747 | unarguable 1748 | incontestable 1749 | replaces 1750 | preempt 1751 | unconcern 1752 | heartlessness 1753 | unconcern 1754 | carefreeness 1755 | reformations 1756 | religious 1757 | procreated 1758 | make 1759 | inducement 1760 | motivation 1761 | inducement 1762 | causing 1763 | sanctify 1764 | lustrate 1765 | sanctify 1766 | declare 1767 | effectiveness 1768 | potent 1769 | restrainer 1770 | chemical 1771 | restrainer 1772 | nazi 1773 | imprecise 1774 | inaccurate 1775 | heraldist 1776 | applaud 1777 | heraldist 1778 | tell 1779 | sweetish 1780 | sweet 1781 | bootless 1782 | unproductive 1783 | follower 1784 | tail 1785 | follower 1786 | cultist 1787 | traversals 1788 | travel 1789 | traversals 1790 | skiing 1791 | requests 1792 | invite 1793 | posthole 1794 | hole 1795 | unilluminated 1796 | dark 1797 | consigning 1798 | abandon 1799 | consigning 1800 | entrust 1801 | purchasable 1802 | available 1803 | purchasable 1804 | corrupt 1805 | abandonment 1806 | absence 1807 | pestilence 1808 | plague 1809 | pestilence 1810 | disease 1811 | weirdly 1812 | deity 1813 | weirdly 1814 | supernatural 1815 | antagonist 1816 | person 1817 | antagonist 1818 | muscle 1819 | puritanism 1820 | sternness 1821 | profitless 1822 | unrewarding 1823 | customise 1824 | produce 1825 | customise 1826 | change 1827 | insurrectional 1828 | conflict 1829 | algebras 1830 | vector 1831 | monotony 1832 | constancy 1833 | monotony 1834 | unvariedness 1835 | subletting 1836 | lease 1837 | princedom 1838 | domain 1839 | princedom 1840 | rank 1841 | uninhibited 1842 | unreserved 1843 | sublieutenant 1844 | lieutenant 1845 | absorbing 1846 | assimilate 1847 | absorbing 1848 | learn 1849 | conflagration 1850 | fire 1851 | condescended 1852 | act 1853 | decompositions 1854 | decay 1855 | decompositions 1856 | algebra 1857 | obstructive 1858 | preventive 1859 | intelligences 1860 | brain 1861 | intelligences 1862 | military 1863 | indirectness 1864 | characteristic 1865 | imperils 1866 | exist 1867 | skillfulness 1868 | command 1869 | unmentionable 1870 | impermissible 1871 | shortish 1872 | short 1873 | deserters 1874 | quitter 1875 | engineering 1876 | design 1877 | engineering 1878 | plan 1879 | provisionally 1880 | conditional 1881 | subordination 1882 | relation 1883 | subordination 1884 | dependence 1885 | cofounder 1886 | founder 1887 | membership 1888 | body 1889 | membership 1890 | relationship 1891 | embroideries 1892 | needlepoint 1893 | embroideries 1894 | expansion 1895 | americanize 1896 | change 1897 | protectorship 1898 | position 1899 | unilateralist 1900 | advocate 1901 | nonstandard 1902 | measure 1903 | convector 1904 | heater 1905 | evacuated 1906 | move 1907 | evacuated 1908 | empty 1909 | subroutines 1910 | software 1911 | brittany 1912 | france 1913 | accomplishments 1914 | attainment 1915 | accomplishments 1916 | horsemanship 1917 | outperforming 1918 | outshout 1919 | microcircuit 1920 | chip 1921 | voraciously 1922 | gluttonous 1923 | voraciously 1924 | acquisitive 1925 | intramolecular 1926 | molar 1927 | hospitalize 1928 | commit 1929 | distinguishing 1930 | discriminate 1931 | distinguishing 1932 | sex 1933 | reproductive 1934 | fruitful 1935 | goldplated 1936 | plate 1937 | favourable 1938 | complimentary 1939 | procreation 1940 | generation 1941 | postponements 1942 | adjournment 1943 | postponements 1944 | extension 1945 | detectable 1946 | perceptible 1947 | detectable 1948 | noticeable 1949 | contraception 1950 | control 1951 | lushness 1952 | abundance 1953 | incensing 1954 | anger 1955 | incensing 1956 | odorize 1957 | outlawed 1958 | illegal 1959 | embroiderers 1960 | embroideress 1961 | blitzed 1962 | attack 1963 | wilderness 1964 | disfavor 1965 | wilderness 1966 | bush 1967 | decapitated 1968 | guillotine 1969 | decapitated 1970 | headless 1971 | microflora 1972 | microorganism 1973 | acceptance 1974 | blessing 1975 | acceptance 1976 | recognition 1977 | unfortunate 1978 | prisoner 1979 | unfortunate 1980 | black 1981 | refuted 1982 | oppose 1983 | refuted 1984 | disprove 1985 | greenly 1986 | discolor 1987 | greenly 1988 | emerald 1989 | importances 1990 | standing 1991 | importances 1992 | deal 1993 | autoimmune 1994 | carrier 1995 | autoimmune 1996 | exempt 1997 | circumnavigations 1998 | travel 1999 | interrelationship 2000 | psychodynamics 2001 | monoatomic 2002 | small 2003 | monoatomic 2004 | thermonuclear 2005 | undefinable 2006 | undefined 2007 | catalogued 2008 | compose 2009 | catalogued 2010 | classify 2011 | heterosexism 2012 | discrimination 2013 | inflicted 2014 | intrude 2015 | preaching 2016 | evangelize 2017 | preaching 2018 | sermonize 2019 | improver 2020 | benefactor 2021 | improver 2022 | attachment 2023 | prudery 2024 | modesty 2025 | combusted 2026 | burn 2027 | swooshing 2028 | sound 2029 | intersected 2030 | meet 2031 | unwaveringly 2032 | hover 2033 | interlingua 2034 | language 2035 | tricolours 2036 | flag 2037 | fictitiously 2038 | unreal 2039 | fictitiously 2040 | counterfeit 2041 | languishing 2042 | weaken 2043 | scampering 2044 | run 2045 | sulfuric 2046 | process 2047 | sulfuric 2048 | sulfide 2049 | trilateral 2050 | reciprocal 2051 | trilateral 2052 | isosceles 2053 | delimitations 2054 | property 2055 | management 2056 | administration 2057 | management 2058 | finance 2059 | microfiche 2060 | microfilm 2061 | medicate 2062 | impregnate 2063 | medicate 2064 | treat 2065 | subgroup 2066 | group 2067 | subgroup 2068 | bench 2069 | normalise 2070 | normalize 2071 | irreligious 2072 | nonobservant 2073 | slanderous 2074 | harmful 2075 | microbiologist 2076 | virologist 2077 | circumvent 2078 | beat 2079 | circumvent 2080 | attack 2081 | revolutionise 2082 | indoctrinate 2083 | revolutionise 2084 | change 2085 | discrete 2086 | separate 2087 | protrusion 2088 | mogul 2089 | protrusion 2090 | shape 2091 | bewitchment 2092 | sorcery 2093 | locality 2094 | scenery 2095 | scornful 2096 | disrespectful 2097 | reburial 2098 | burying 2099 | untracked 2100 | inaccessible 2101 | mutinied 2102 | rebel 2103 | unforeseen 2104 | unexpected 2105 | helical 2106 | coiled 2107 | carbonate 2108 | process 2109 | carbonate 2110 | change 2111 | disturbances 2112 | magnetic 2113 | disturbances 2114 | agitation 2115 | mccarthyism 2116 | witch-hunt 2117 | titillated 2118 | please 2119 | titillated 2120 | itch 2121 | fetishism 2122 | belief 2123 | indifferently 2124 | uninterested 2125 | indifferently 2126 | unconcerned 2127 | rascality 2128 | naughtiness 2129 | parallelize 2130 | put 2131 | fractionate 2132 | separate 2133 | chairmanship 2134 | position 2135 | starkness 2136 | limit 2137 | bellowing 2138 | shout 2139 | destroyers 2140 | annihilator 2141 | destroyers 2142 | warship 2143 | rededicated 2144 | give 2145 | reassuringly 2146 | affirm 2147 | inconvertible 2148 | incommutable 2149 | dissatisfying 2150 | disgruntle 2151 | seeders 2152 | person 2153 | seeders 2154 | mechanical 2155 | prospector 2156 | sourdough 2157 | leadership 2158 | helm 2159 | leadership 2160 | high 2161 | assassinated 2162 | kill 2163 | assassinated 2164 | defame 2165 | considerable 2166 | significant 2167 | guardedly 2168 | shepherd 2169 | guardedly 2170 | patrol 2171 | accessible 2172 | approachable 2173 | accessible 2174 | comprehensible 2175 | interconnectedness 2176 | connection 2177 | autograft 2178 | graft 2179 | antagonize 2180 | annoy 2181 | antagonize 2182 | act 2183 | nerveless 2184 | composed 2185 | distrustful 2186 | cynical 2187 | democratize 2188 | change 2189 | diffidence 2190 | unassertiveness 2191 | heartlessly 2192 | spiritless 2193 | sensualist 2194 | epicure 2195 | concordance 2196 | agreement 2197 | concordance 2198 | order 2199 | promiscuous 2200 | unchaste 2201 | promiscuous 2202 | indiscriminate 2203 | excitedly 2204 | affect 2205 | excitedly 2206 | arouse 2207 | careerism 2208 | practice 2209 | retraced 2210 | return 2211 | internationality 2212 | scope 2213 | advisory 2214 | announcement 2215 | advisory 2216 | informative 2217 | enunciates 2218 | state 2219 | battened 2220 | strengthen 2221 | assistance 2222 | facilitation 2223 | blunders 2224 | transgress 2225 | combust 2226 | blow 2227 | combust 2228 | burn 2229 | excitation 2230 | arousal 2231 | excitation 2232 | exciting 2233 | blackmailed 2234 | extort 2235 | bunking 2236 | cheat 2237 | bunking 2238 | bed 2239 | labourer 2240 | hire 2241 | rectorate 2242 | position 2243 | designed 2244 | intend 2245 | designed 2246 | mental 2247 | behavioural 2248 | action 2249 | behavioural 2250 | propriety 2251 | defrauding 2252 | short-change 2253 | procurators 2254 | bureaucrat 2255 | procurators 2256 | agent 2257 | assistances 2258 | resource 2259 | assistances 2260 | recourse 2261 | unsubdivided 2262 | smooth 2263 | implantations 2264 | placement 2265 | implantations 2266 | procedure 2267 | advancement 2268 | seafaring 2269 | advancement 2270 | encouragement 2271 | translocating 2272 | transfer 2273 | codefendants 2274 | corespondent 2275 | monarchic 2276 | undemocratic 2277 | traitorous 2278 | disloyal 2279 | highlanders 2280 | soldier 2281 | highlanders 2282 | scot 2283 | syntactic 2284 | plan 2285 | reproducible 2286 | duplicable 2287 | monopolist 2288 | person 2289 | comportment 2290 | manner 2291 | roofers 2292 | thatcher 2293 | improving 2294 | relieve 2295 | improving 2296 | reform 2297 | adjustor 2298 | investigator 2299 | dooming 2300 | convict 2301 | preadolescent 2302 | young 2303 | depictive 2304 | representational 2305 | stoical 2306 | unemotional 2307 | dynastic 2308 | ruler 2309 | hinduism 2310 | religion 2311 | pathfinder 2312 | usher 2313 | romanic 2314 | italian 2315 | overlying 2316 | lie 2317 | overlying 2318 | kill 2319 | refinery 2320 | plant 2321 | industrialise 2322 | change 2323 | expressionless 2324 | uncommunicative 2325 | censorship 2326 | military 2327 | censorship 2328 | deletion 2329 | tricolor 2330 | flag 2331 | tricolor 2332 | colored 2333 | interlaces 2334 | hold 2335 | interlaces 2336 | splice 2337 | personifying 2338 | embody 2339 | personifying 2340 | exemplify 2341 | immobilization 2342 | restraint 2343 | immobilization 2344 | preservation 2345 | subsequences 2346 | result 2347 | circumcisions 2348 | rite 2349 | circumcisions 2350 | day 2351 | bibliographies 2352 | list 2353 | unnecessary 2354 | inessential 2355 | rejoinders 2356 | reply 2357 | rejoinders 2358 | pleading 2359 | lavishness 2360 | expensiveness 2361 | acronymic 2362 | form 2363 | incoordination 2364 | unskillfulness 2365 | provisionary 2366 | conditional 2367 | regardless 2368 | heedless 2369 | promotive 2370 | encouraging 2371 | indicted 2372 | charge 2373 | asphaltic 2374 | paving 2375 | asphaltic 2376 | pave 2377 | cowered 2378 | crouch 2379 | cowered 2380 | bend 2381 | mimicked 2382 | imitate 2383 | protestantism 2384 | fundamentalism 2385 | performances 2386 | play 2387 | regained 2388 | locate 2389 | regained 2390 | get 2391 | monoculture 2392 | culture 2393 | emulsifying 2394 | change 2395 | knightly 2396 | past 2397 | knightly 2398 | courteous 2399 | monogenesis 2400 | reproduction 2401 | interlace 2402 | hold 2403 | communistic 2404 | socialist 2405 | communistic 2406 | politician 2407 | extraterrestrials 2408 | animal 2409 | bronchus 2410 | tube 2411 | contraries 2412 | opposition 2413 | amethysts 2414 | crystalline 2415 | travelers 2416 | foreigner 2417 | gathered 2418 | muster 2419 | gathered 2420 | convene 2421 | trusteeship 2422 | position 2423 | trusteeship 2424 | district 2425 | suspiciousness 2426 | distrust 2427 | feminised 2428 | change 2429 | mushroomed 2430 | grow 2431 | mushroomed 2432 | pick 2433 | vindictively 2434 | unforgiving 2435 | vindictively 2436 | malicious 2437 | castled 2438 | move 2439 | castled 2440 | fancy 2441 | glittery 2442 | bright 2443 | lightship 2444 | ship 2445 | eroticism 2446 | arousal 2447 | eroticism 2448 | desire 2449 | caesarism 2450 | autocracy 2451 | sessions 2452 | quarter 2453 | sessions 2454 | sitting 2455 | fashionable 2456 | up-to-date 2457 | teasingly 2458 | torment 2459 | teasingly 2460 | kid 2461 | microwaving 2462 | cook 2463 | transverse 2464 | crosswise 2465 | managership 2466 | position 2467 | methodically 2468 | acting 2469 | methodically 2470 | know-how 2471 | excitements 2472 | fever 2473 | excitements 2474 | intoxication 2475 | christianise 2476 | convert 2477 | monarchical 2478 | undemocratic 2479 | monarchical 2480 | noble 2481 | cooperators 2482 | spouse 2483 | circumscribed 2484 | restrict 2485 | circumscribed 2486 | trace 2487 | topically 2488 | current 2489 | topically 2490 | local 2491 | evangelistic 2492 | enthusiastic 2493 | emotionalism 2494 | emotional 2495 | differences 2496 | differentia 2497 | differences 2498 | variation 2499 | hallucinating 2500 | ill 2501 | hallucinating 2502 | perceive 2503 | liverpools 2504 | england 2505 | deciphering 2506 | read 2507 | stroked 2508 | touch 2509 | motherless 2510 | unparented 2511 | impermanent 2512 | improvised 2513 | contrabands 2514 | merchandise 2515 | astronautical 2516 | spacewalker 2517 | scrutiny 2518 | look 2519 | discolor 2520 | bleach 2521 | receiverships 2522 | proceeding 2523 | rematches 2524 | repeat 2525 | scandalize 2526 | disgust 2527 | condensing 2528 | encapsulate 2529 | scholarship 2530 | letters 2531 | scholarship 2532 | prize 2533 | transmitter 2534 | communicator 2535 | transmitter 2536 | carrier 2537 | autobuses 2538 | school 2539 | dematerialised 2540 | vanish 2541 | predators 2542 | attacker 2543 | predators 2544 | carnivore 2545 | enlarger 2546 | equipment 2547 | repositioned 2548 | down 2549 | repositioned 2550 | reduce 2551 | equivalence 2552 | tie 2553 | localise 2554 | lie 2555 | localise 2556 | situate 2557 | enfolded 2558 | cocoon 2559 | approachable 2560 | comprehensible 2561 | animality 2562 | nature 2563 | interweaved 2564 | braid 2565 | engorge 2566 | eat 2567 | protesters 2568 | picket 2569 | protesters 2570 | nonconformist 2571 | rooters 2572 | enthusiast 2573 | unobjectionable 2574 | dirty 2575 | unobjectionable 2576 | inoffensive 2577 | physically 2578 | material 2579 | physically 2580 | bodily 2581 | unceremonious 2582 | informal 2583 | unceremonious 2584 | discourteous 2585 | postcodes 2586 | code 2587 | autosuggestion 2588 | self-improvement 2589 | circumventing 2590 | beat 2591 | circumventing 2592 | ebb 2593 | homoerotic 2594 | homosexual 2595 | undesirable 2596 | unwelcome 2597 | adaptive 2598 | accommodative 2599 | foresters 2600 | farmer 2601 | winking 2602 | flicker 2603 | trichloride 2604 | chloride 2605 | preconception 2606 | opinion 2607 | preconception 2608 | homophobia 2609 | fringes 2610 | decorate 2611 | fringes 2612 | surround 2613 | shepherded 2614 | guard 2615 | shepherded 2616 | tend 2617 | kingship 2618 | rank 2619 | excretion 2620 | matter 2621 | excretion 2622 | defecation 2623 | inventively 2624 | creative 2625 | inheritor 2626 | heiress 2627 | conspicuousness 2628 | boldness 2629 | preconceptions 2630 | opinion 2631 | preconceptions 2632 | experimenter 2633 | uproariously 2634 | combustion 2635 | uproariously 2636 | noise 2637 | glistens 2638 | spangle 2639 | glistens 2640 | brightness 2641 | sexless 2642 | asexual 2643 | sexless 2644 | unsexy 2645 | spellers 2646 | writer 2647 | spellers 2648 | primer 2649 | orchestrations 2650 | musical 2651 | orchestrations 2652 | arrangement 2653 | embroiderer 2654 | needleworker 2655 | arousal 2656 | desire 2657 | arousal 2658 | inflammation 2659 | extending 2660 | increase 2661 | extending 2662 | range 2663 | enforcements 2664 | imposition 2665 | connectedness 2666 | bridge 2667 | blacken 2668 | discolor 2669 | blacken 2670 | singe 2671 | galvanize 2672 | coat 2673 | galvanize 2674 | shock 2675 | weaponize 2676 | change 2677 | sorrowful 2678 | grievous 2679 | postdated 2680 | follow 2681 | antipsychotics 2682 | clozapine 2683 | convocation 2684 | gathering 2685 | convocation 2686 | assembly 2687 | intensions 2688 | meaning 2689 | employed 2690 | ship 2691 | employed 2692 | give 2693 | susceptible 2694 | impressionable 2695 | susceptible 2696 | allergic 2697 | alarmism 2698 | warning 2699 | canonize 2700 | declare 2701 | canonize 2702 | laud 2703 | imbedding 2704 | nest 2705 | foreigner 2706 | transalpine 2707 | foreigner 2708 | gringo 2709 | anaesthetics 2710 | drug 2711 | disassociates 2712 | separate 2713 | utterance 2714 | communication 2715 | mitigated 2716 | relieve 2717 | mitigated 2718 | apologize 2719 | unconsolidated 2720 | loose 2721 | ukrainians 2722 | slavic 2723 | hypocrisy 2724 | pretense 2725 | refurbishment 2726 | improvement 2727 | intertwining 2728 | raw 2729 | objectify 2730 | change 2731 | crispness 2732 | freshness 2733 | magically 2734 | supernatural 2735 | philanthropy 2736 | aid 2737 | washers 2738 | worker 2739 | washers 2740 | seal 2741 | frowning 2742 | displeased 2743 | frowning 2744 | scowl 2745 | debarred 2746 | prevent 2747 | nonnative 2748 | foreign 2749 | defeatist 2750 | pessimist 2751 | nakedness 2752 | gloom 2753 | nakedness 2754 | undress 2755 | inexpert 2756 | unprofessional 2757 | designs 2758 | plot 2759 | designs 2760 | intend 2761 | implicational 2762 | meaning 2763 | accordance 2764 | giving 2765 | accordance 2766 | agreement 2767 | skateboarders 2768 | skater 2769 | amusements 2770 | delight 2771 | divided 2772 | paragraph 2773 | divided 2774 | calculate 2775 | dissociable 2776 | divisible 2777 | reduced 2778 | abbreviate 2779 | reduced 2780 | spill 2781 | unintelligible 2782 | incomprehensible 2783 | unintelligible 2784 | slurred 2785 | syllable 2786 | word 2787 | governance 2788 | sociable 2789 | governance 2790 | government 2791 | rainless 2792 | dry 2793 | kazakhstani 2794 | asian 2795 | microseconds 2796 | nanosecond 2797 | disgorge 2798 | seed 2799 | discernment 2800 | knowing 2801 | discernment 2802 | discrimination 2803 | employable 2804 | worker 2805 | univocal 2806 | absolute 2807 | disturbing 2808 | affect 2809 | disturbing 2810 | toss 2811 | unicycling 2812 | bicycle 2813 | mildness 2814 | balminess 2815 | mildness 2816 | manner 2817 | civilise 2818 | sophisticate 2819 | civilise 2820 | change 2821 | valorous 2822 | brave 2823 | exporters 2824 | businessperson 2825 | enrollment 2826 | body 2827 | enrollment 2828 | entrance 2829 | preheated 2830 | heat 2831 | scholarships 2832 | aid 2833 | scholarships 2834 | education 2835 | stressor 2836 | agent 2837 | correspondence 2838 | write 2839 | correspondence 2840 | conformity 2841 | interlinking 2842 | connect 2843 | princedoms 2844 | domain 2845 | princedoms 2846 | rank 2847 | extrapolations 2848 | calculation 2849 | extrapolations 2850 | inference 2851 | extraterritorial 2852 | territorial 2853 | whimsically 2854 | arbitrary 2855 | nobelist 2856 | laureate 2857 | resigning 2858 | top 2859 | resigning 2860 | office 2861 | wrathful 2862 | angry 2863 | cowboys 2864 | performer 2865 | cowboys 2866 | ranch 2867 | conjoins 2868 | intermarry 2869 | conjoins 2870 | cross-link 2871 | predominance 2872 | obviousness 2873 | predominance 2874 | dominance 2875 | brandish 2876 | expose 2877 | brandish 2878 | hold 2879 | regionalisms 2880 | policy 2881 | regionalisms 2882 | address 2883 | extrasensory 2884 | clairvoyant 2885 | microcomputers 2886 | computer 2887 | subtropical 2888 | figurative 2889 | subtropical 2890 | equatorial 2891 | unbiased 2892 | impartial 2893 | unbiased 2894 | nonpartisan 2895 | adhesion 2896 | scar 2897 | resistive 2898 | defiant 2899 | homogeneous 2900 | homogenized 2901 | dependence 2902 | addiction 2903 | dependence 2904 | helplessness 2905 | disinvestment 2906 | withdrawal 2907 | containership 2908 | ship 2909 | naivety 2910 | artlessness 2911 | transmuted 2912 | become 2913 | baptistic 2914 | protestant 2915 | purveying 2916 | supply 2917 | secularist 2918 | advocate 2919 | demerit 2920 | mark 2921 | remarriage 2922 | marriage 2923 | interpreted 2924 | reinterpret 2925 | interpreted 2926 | deconstruct 2927 | clericalism 2928 | policy 2929 | irresolution 2930 | doubt 2931 | irresolution 2932 | volatility 2933 | transmutes 2934 | become 2935 | transmutes 2936 | work 2937 | footballers 2938 | player 2939 | excommunicated 2940 | oust 2941 | containers 2942 | cargo 2943 | commutation 2944 | travel 2945 | transponder 2946 | device 2947 | cooperator 2948 | spouse 2949 | buggered 2950 | copulate 2951 | remarkable 2952 | extraordinary 2953 | remarkable 2954 | significant 2955 | suppleness 2956 | gracefulness 2957 | suppleness 2958 | bendability 2959 | purgatory 2960 | situation 2961 | prehistorical 2962 | past 2963 | technology 2964 | aeronautical 2965 | technology 2966 | science 2967 | transfusing 2968 | pour 2969 | transfusing 2970 | lend 2971 | prolapse 2972 | descend 2973 | circularize 2974 | canvass 2975 | circularize 2976 | poll 2977 | greenness 2978 | profusion 2979 | greenness 2980 | ripeness 2981 | formalisms 2982 | philosophic 2983 | formalisms 2984 | imitation 2985 | interpenetrate 2986 | spiritize 2987 | worsens 2988 | inflame 2989 | worsens 2990 | tumble 2991 | pathfinders 2992 | hunt 2993 | demanded 2994 | clamor 2995 | demanded 2996 | cost 2997 | unequivocal 2998 | unambiguous 2999 | unequivocal 3000 | explicit 3001 | intending 3002 | aim 3003 | intending 3004 | plan 3005 | foreclosed 3006 | obstruct 3007 | disturbance 3008 | storm 3009 | disturbance 3010 | agitation 3011 | eldership 3012 | position 3013 | homophony 3014 | pronunciation 3015 | homophony 3016 | music 3017 | contrarily 3018 | brown 3019 | partnerships 3020 | relationship 3021 | partnerships 3022 | copartnership 3023 | removes 3024 | empty 3025 | removes 3026 | out 3027 | skidding 3028 | skid 3029 | sportive 3030 | playful 3031 | nurturance 3032 | care 3033 | microvolts 3034 | potential 3035 | asteroidal 3036 | angular 3037 | asteroidal 3038 | child 3039 | exterminator 3040 | killer 3041 | talkativeness 3042 | communicativeness 3043 | difference 3044 | distinction 3045 | difference 3046 | inflection 3047 | gumption 3048 | fortitude 3049 | competes 3050 | race 3051 | scheduled 3052 | calendar 3053 | scheduled 3054 | program 3055 | undisclosed 3056 | covert 3057 | abstractionist 3058 | nonrepresentational 3059 | abstractionist 3060 | painter 3061 | merchantable 3062 | salable 3063 | exacted 3064 | necessitate 3065 | developments 3066 | advancement 3067 | syphons 3068 | draw 3069 | syphons 3070 | tube 3071 | organismal 3072 | system 3073 | sanctifying 3074 | spiritualize 3075 | cuteness 3076 | beauty 3077 | subserving 3078 | help 3079 | cofactors 3080 | compound 3081 | combusting 3082 | ablaze 3083 | combusting 3084 | change 3085 | shoulders 3086 | thrust 3087 | shoulders 3088 | raise 3089 | consonant 3090 | letter 3091 | auditive 3092 | analyze 3093 | auditive 3094 | learn 3095 | clients 3096 | case 3097 | clients 3098 | guest 3099 | interesting 3100 | fascinate 3101 | interesting 3102 | refer 3103 | exhibited 3104 | flaunt 3105 | exhibited 3106 | possess 3107 | remakes 3108 | recast 3109 | remakes 3110 | creation 3111 | florescence 3112 | growth 3113 | autopilots 3114 | guidance 3115 | autopilots 3116 | unconsciousness 3117 | separationist 3118 | separatist 3119 | falsifier 3120 | deceiver 3121 | manacles 3122 | shackle 3123 | microcircuits 3124 | chip 3125 | purposeless 3126 | worthless 3127 | cofounders 3128 | founder 3129 | specialism 3130 | career 3131 | specialism 3132 | concentration 3133 | apocalyptical 3134 | prophetic 3135 | copilots 3136 | pilot 3137 | reprehensible 3138 | wrong 3139 | abashed 3140 | upset 3141 | unshaped 3142 | unformed 3143 | boastful 3144 | proud 3145 | commingled 3146 | blend 3147 | trioxide 3148 | oxide 3149 | despoil 3150 | destroy 3151 | bachelors 3152 | live 3153 | bachelors 3154 | man 3155 | macroeconomist 3156 | economist 3157 | placidity 3158 | calmness 3159 | placidity 3160 | composure 3161 | affordable 3162 | cheap 3163 | wallpapered 3164 | cover 3165 | friendship 3166 | brotherhood 3167 | practicable 3168 | practical 3169 | practicable 3170 | possible 3171 | householders 3172 | warrior 3173 | blurting 3174 | talk 3175 | confirmable 3176 | empirical 3177 | positioners 3178 | actuator 3179 | inadvertence 3180 | omission 3181 | reassessments 3182 | appraisal 3183 | reclaim 3184 | save 3185 | reclaim 3186 | get 3187 | broadcasters 3188 | disk 3189 | broadcasters 3190 | mechanical 3191 | inclosure 3192 | document 3193 | inclosure 3194 | insertion 3195 | regularize 3196 | decide 3197 | regularize 3198 | arrange 3199 | interlayers 3200 | layer 3201 | disembodied 3202 | rid 3203 | interviewing 3204 | converse 3205 | pledged 3206 | donate 3207 | pledged 3208 | guarantee 3209 | insidiously 3210 | dangerous 3211 | insidiously 3212 | seductive 3213 | spiritualist 3214 | psychic 3215 | microphallus 3216 | penis 3217 | interceptor 3218 | fighter 3219 | surroundings 3220 | touch 3221 | surroundings 3222 | cover 3223 | reviewers 3224 | critic 3225 | reviewers 3226 | writer 3227 | gladness 3228 | happiness 3229 | followed 3230 | tailgate 3231 | disestablishing 3232 | deprive 3233 | dissolved 3234 | state 3235 | dissolved 3236 | integrity 3237 | grassroots 3238 | common 3239 | grassroots 3240 | basic 3241 | slaughterers 3242 | skilled 3243 | corruptive 3244 | evil 3245 | autographic 3246 | picture 3247 | autographic 3248 | written 3249 | predetermine 3250 | prejudice 3251 | predetermine 3252 | determine 3253 | autoregulation 3254 | organic 3255 | incurved 3256 | curved 3257 | knifing 3258 | injure 3259 | immigrating 3260 | inch 3261 | immigrating 3262 | migrate 3263 | noticeable 3264 | broad 3265 | noticeable 3266 | perceptible 3267 | synchronic 3268 | synchronized 3269 | explorers 3270 | person 3271 | circumstances 3272 | possession 3273 | circumstances 3274 | providence 3275 | expounding 3276 | premise 3277 | inharmonious 3278 | incongruous 3279 | disavowed 3280 | deny 3281 | regularise 3282 | even 3283 | regularise 3284 | decide 3285 | possessor 3286 | holder 3287 | consultive 3288 | informative 3289 | distressful 3290 | heavy 3291 | advised 3292 | inform 3293 | advised 3294 | hash 3295 | deposes 3296 | oust 3297 | deposes 3298 | declare 3299 | wordless 3300 | inarticulate 3301 | demoralise 3302 | bastardize 3303 | muscularity 3304 | strength 3305 | muscularity 3306 | condition 3307 | unspecialised 3308 | generalized 3309 | appearances 3310 | manifestation 3311 | disarranged 3312 | randomize 3313 | sniffers 3314 | person 3315 | irritatingly 3316 | worsen 3317 | irritatingly 3318 | fret 3319 | exaction 3320 | demand 3321 | sailings 3322 | travel 3323 | sailings 3324 | swan 3325 | objector 3326 | dissenter 3327 | earmuffs 3328 | covering 3329 | synoptic 3330 | same 3331 | infolding 3332 | organic 3333 | smallish 3334 | small 3335 | digitise 3336 | change 3337 | receptions 3338 | tea 3339 | receptions 3340 | greeting 3341 | corpulence 3342 | fleshiness 3343 | disfigure 3344 | scar 3345 | refurbishments 3346 | improvement 3347 | censorships 3348 | deletion 3349 | censorships 3350 | censoring 3351 | depressor 3352 | nerve 3353 | depressor 3354 | muscle 3355 | grocery 3356 | greengrocery 3357 | fruiterer 3358 | seller 3359 | unionise 3360 | enroll 3361 | unionise 3362 | join 3363 | malevolence 3364 | vindictiveness 3365 | malevolence 3366 | evil 3367 | unprecedented 3368 | new 3369 | reclassifications 3370 | categorization 3371 | embracement 3372 | cuddle 3373 | autoloading 3374 | automatic 3375 | abductor 3376 | muscle 3377 | cliffhanger 3378 | episode 3379 | cliffhanger 3380 | contest 3381 | solemnity 3382 | seriousness 3383 | delimited 3384 | determine 3385 | moralist 3386 | stickler 3387 | insecurities 3388 | insecureness 3389 | insecurities 3390 | anxiety 3391 | repeating 3392 | replicate 3393 | churchs 3394 | perform 3395 | discovery 3396 | rediscovery 3397 | discovery 3398 | disclosure 3399 | submerging 3400 | cover 3401 | submerging 3402 | sink 3403 | literalness 3404 | concreteness 3405 | acknowledgement 3406 | admission 3407 | acknowledgement 3408 | acceptance 3409 | rompers 3410 | garment 3411 | rompers 3412 | person 3413 | enfolding 3414 | cocoon 3415 | enfolding 3416 | change 3417 | antifeminist 3418 | chauvinist 3419 | omniscience 3420 | wisdom 3421 | astonish 3422 | dazzle 3423 | circumpolar 3424 | polar 3425 | ascendence 3426 | predominance 3427 | aerialist 3428 | ropewalker 3429 | precociously 3430 | early 3431 | precociously 3432 | intelligent 3433 | suspenseful 3434 | tense 3435 | banished 3436 | expel 3437 | relocation 3438 | transportation 3439 | relocation 3440 | change 3441 | indexical 3442 | cross-index 3443 | indexical 3444 | supply 3445 | absconding 3446 | flee 3447 | encoded 3448 | code 3449 | shanked 3450 | hit 3451 | hypermarkets 3452 | supermarket 3453 | prejudge 3454 | evaluate 3455 | genuinely 3456 | sincere 3457 | genuinely 3458 | attested 3459 | discoverys 3460 | disclosure 3461 | discoverys 3462 | self-discovery 3463 | palestinians 3464 | arab 3465 | parasitical 3466 | dependent 3467 | interconnect 3468 | intercommunicate 3469 | interconnect 3470 | connect 3471 | nondescripts 3472 | person 3473 | amorphous 3474 | unformed 3475 | amorphous 3476 | inorganic 3477 | uncreative 3478 | sterile 3479 | foreigners 3480 | gringo 3481 | unaffected 3482 | unimpressed 3483 | unaffected 3484 | insensitive 3485 | gravitated 3486 | move 3487 | gravitated 3488 | tend 3489 | respectable 3490 | reputable 3491 | respectable 3492 | worthy 3493 | reproduce 3494 | photocopy 3495 | reproduce 3496 | propagate 3497 | schnauzer 3498 | giant 3499 | stimuli 3500 | stimulation 3501 | astronomical 3502 | large 3503 | microbalance 3504 | balance 3505 | subjoined 3506 | append 3507 | unquestioned 3508 | uncontroversial 3509 | loveless 3510 | unloving 3511 | loveless 3512 | unloved 3513 | postmarks 3514 | marker 3515 | postmarks 3516 | stamp 3517 | presenting 3518 | bring 3519 | presenting 3520 | argue 3521 | fiddled 3522 | embezzle 3523 | fiddled 3524 | slack 3525 | transfused 3526 | breathe 3527 | transfused 3528 | pour 3529 | interchanging 3530 | shift 3531 | interchanging 3532 | trade 3533 | antisubmarine 3534 | defensive 3535 | displeases 3536 | repel 3537 | reproachful 3538 | unfavorable 3539 | independently 3540 | worker 3541 | independently 3542 | individualist 3543 | extrajudicial 3544 | illegal 3545 | exterminated 3546 | destroy 3547 | exterminated 3548 | kill 3549 | intercede 3550 | negociate 3551 | postdates 3552 | follow 3553 | postdates 3554 | chronologize 3555 | comport 3556 | about 3557 | comport 3558 | misbehave 3559 | stockers 3560 | animal 3561 | ceremonious 3562 | formal 3563 | authorship 3564 | initiation 3565 | slacken 3566 | decrease 3567 | slacken 3568 | weaken 3569 | freshen 3570 | regenerate 3571 | freshen 3572 | wash 3573 | observed 3574 | comment 3575 | observed 3576 | discover 3577 | hydrolysed 3578 | change 3579 | enjoins 3580 | instruct 3581 | replacements 3582 | stand-in 3583 | replacements 3584 | supplanting 3585 | bengali 3586 | ethnic 3587 | transsexual 3588 | person 3589 | automates 3590 | change 3591 | amateurish 3592 | unprofessional 3593 | sponsorship 3594 | support 3595 | ejector 3596 | person 3597 | ejector 3598 | mechanism 3599 | rehashing 3600 | recycle 3601 | preassembled 3602 | produce 3603 | fascinate 3604 | matter 3605 | fascinate 3606 | interest 3607 | riskless 3608 | safe 3609 | incombustible 3610 | fireproof 3611 | rareness 3612 | scarcity 3613 | lengthy 3614 | long 3615 | lordship 3616 | authority 3617 | lordship 3618 | title 3619 | grinder 3620 | sandwich 3621 | grinder 3622 | wisdom 3623 | algebraic 3624 | quadratics 3625 | congeniality 3626 | friendliness 3627 | congeniality 3628 | compatibility 3629 | portioned 3630 | distribute 3631 | piquancy 3632 | spiciness 3633 | piquancy 3634 | quality 3635 | commenting 3636 | note 3637 | commenting 3638 | explain 3639 | reprocessing 3640 | reclaim 3641 | roosted 3642 | settle 3643 | roosted 3644 | sit 3645 | immobilizing 3646 | beat 3647 | immobilizing 3648 | withhold 3649 | promised 3650 | declare 3651 | employments 3652 | state 3653 | employments 3654 | populace 3655 | transposable 3656 | exchangeable 3657 | protractors 3658 | drafting 3659 | religiousness 3660 | piety 3661 | religiousness 3662 | conscientiousness 3663 | concerts 3664 | settle 3665 | concerts 3666 | plan 3667 | postholes 3668 | hole 3669 | liveable 3670 | habitable 3671 | besieging 3672 | attack 3673 | besieging 3674 | distress 3675 | irregardless 3676 | look 3677 | irregardless 3678 | prize 3679 | attendance 3680 | frequency 3681 | attendance 3682 | presence 3683 | computer 3684 | expert 3685 | computer 3686 | server 3687 | subtend 3688 | shepherd 3689 | subtend 3690 | suffer 3691 | irrelevance 3692 | inapplicability 3693 | desiccating 3694 | dry 3695 | desiccating 3696 | preserve 3697 | transforming 3698 | transubstantiate 3699 | transforming 3700 | change 3701 | prisoners 3702 | internee 3703 | cosponsors 3704 | sponsor 3705 | unconvincing 3706 | implausible 3707 | unconvincing 3708 | unpersuasive 3709 | analogous 3710 | similar 3711 | preheating 3712 | heat 3713 | irrigate 3714 | hush 3715 | irrigate 3716 | treat 3717 | immortalize 3718 | remind 3719 | immortalize 3720 | change 3721 | unploughed 3722 | untilled 3723 | syntaxes 3724 | system 3725 | syntaxes 3726 | structure 3727 | enforcing 3728 | execute 3729 | enforcing 3730 | compel 3731 | devilish 3732 | playful 3733 | devilish 3734 | evil 3735 | ganging 3736 | group 3737 | dissimulate 3738 | disguise 3739 | enhancement 3740 | improvement 3741 | reconstructs 3742 | construct 3743 | entrench 3744 | fasten 3745 | entrench 3746 | trespass 3747 | spherical 3748 | round 3749 | impolitic 3750 | inexpedient 3751 | londoners 3752 | person 3753 | predetermination 3754 | decision 3755 | destabilization 3756 | change 3757 | reasonable 3758 | moderate 3759 | reasonable 3760 | rational 3761 | pottery 3762 | lusterware 3763 | pottery 3764 | trade 3765 | unenthusiastic 3766 | halfhearted 3767 | virility 3768 | masculinity 3769 | virility 3770 | maleness 3771 | discordance 3772 | dissonance 3773 | discordance 3774 | strife 3775 | plundered 3776 | destroy 3777 | plundered 3778 | steal 3779 | transvestite 3780 | person 3781 | transvestite 3782 | homosexual 3783 | retraction 3784 | motion 3785 | retraction 3786 | withdrawal 3787 | enslaves 3788 | subjugate 3789 | carburettors 3790 | mechanical 3791 | explorer 3792 | diver 3793 | perfectible 3794 | perfect 3795 | stimulates 3796 | prompt 3797 | stimulates 3798 | quicken 3799 | concurrencies 3800 | agreement 3801 | concurrencies 3802 | cooperation 3803 | emulsify 3804 | change 3805 | internationalisms 3806 | scope 3807 | internationalisms 3808 | doctrine 3809 | cylindric 3810 | rounded 3811 | sexually 3812 | sexy 3813 | afghani 3814 | iranian 3815 | animalism 3816 | doctrine 3817 | animalism 3818 | disposition 3819 | latinist 3820 | classicist 3821 | punjabi 3822 | sanskrit 3823 | punjabi 3824 | indian 3825 | exterminate 3826 | destroy 3827 | exterminate 3828 | kill 3829 | consign 3830 | check 3831 | respectively 3832 | individual 3833 | receiving 3834 | fence 3835 | unicycle 3836 | wheel 3837 | unicycle 3838 | bicycle 3839 | incised 3840 | cut 3841 | incised 3842 | compound 3843 | perfective 3844 | future 3845 | perfective 3846 | aspect 3847 | apprenticeship 3848 | position 3849 | reporters 3850 | reporter 3851 | houseful 3852 | containerful 3853 | irreverence 3854 | evil 3855 | unostentatious 3856 | tasteful 3857 | disadvantaged 3858 | underprivileged 3859 | combatted 3860 | wrestle 3861 | snooper 3862 | eavesdropper 3863 | researchers 3864 | fieldworker 3865 | resides 3866 | populate 3867 | individualize 3868 | distinguish 3869 | individualize 3870 | change 3871 | coinsurance 3872 | insurance 3873 | micrometer 3874 | nanometer 3875 | micrometer 3876 | caliper 3877 | postcode 3878 | address 3879 | encrusted 3880 | coat 3881 | encrusted 3882 | decorate 3883 | undissolved 3884 | unmelted 3885 | fastness 3886 | fast 3887 | remainder 3888 | sell 3889 | remainder 3890 | part 3891 | marginality 3892 | position 3893 | unmanned 3894 | faze 3895 | refuels 3896 | fuel 3897 | sidewinder 3898 | rattlesnake 3899 | sidewinder 3900 | missile 3901 | federalize 3902 | unite 3903 | casteless 3904 | unwanted 3905 | animalize 3906 | change 3907 | reproves 3908 | knock 3909 | characters 3910 | being 3911 | characters 3912 | scratch 3913 | conjecture 3914 | hypothesis 3915 | symmetrical 3916 | balanced 3917 | allurement 3918 | temptation 3919 | allurement 3920 | invitation 3921 | binging 3922 | eat 3923 | extractor 3924 | forceps 3925 | photographer 3926 | paparazzo 3927 | perspectives 3928 | eye 3929 | perspectives 3930 | point 3931 | disinflation 3932 | economic 3933 | interplanetary 3934 | international 3935 | interplanetary 3936 | unsettled 3937 | trilogies 3938 | trio 3939 | poisoning 3940 | corrupt 3941 | poisoning 3942 | poison 3943 | bobbers 3944 | float 3945 | portrayer 3946 | painter 3947 | invariable 3948 | hard-and-fast 3949 | invariable 3950 | parameter 3951 | constrict 3952 | astringe 3953 | constrict 3954 | choke 3955 | extraversion 3956 | sociability 3957 | frivolously 3958 | superficial 3959 | remounted 3960 | mount 3961 | remounted 3962 | hop 3963 | reviles 3964 | abuse 3965 | discounters 3966 | mercantile 3967 | confinement 3968 | restraint 3969 | consciousness 3970 | knowing 3971 | consciousness 3972 | self 3973 | believing 3974 | feel 3975 | believing 3976 | believe 3977 | regimental 3978 | control 3979 | regimental 3980 | form 3981 | subdivided 3982 | subdivide 3983 | subdivided 3984 | divide 3985 | sustainable 3986 | continue 3987 | standardise 3988 | gauge 3989 | standardise 3990 | measure 3991 | defrayed 3992 | pay 3993 | devilishly 3994 | cook 3995 | devilishly 3996 | antagonize 3997 | disapproving 3998 | discountenance 3999 | subspaces 4000 | mathematical 4001 | connoting 4002 | imply 4003 | connoting 4004 | express 4005 | inheritance 4006 | transfer 4007 | inheritance 4008 | acquisition 4009 | archery 4010 | sport 4011 | sufficed 4012 | serve 4013 | belligerence 4014 | hostility 4015 | procreating 4016 | brood 4017 | gelatinous 4018 | thick 4019 | villainous 4020 | wicked 4021 | harmony 4022 | congruity 4023 | harmony 4024 | music 4025 | inoffensive 4026 | innocuous 4027 | insurrectionist 4028 | young 4029 | inquisitive 4030 | curious 4031 | inquisitive 4032 | inquiring 4033 | extraterrestrial 4034 | hypothetical 4035 | triclinic 4036 | monoclinic 4037 | murderer 4038 | killer 4039 | concurrency 4040 | agreement 4041 | concurrency 4042 | cooperation 4043 | historically 4044 | real 4045 | nontoxic 4046 | antitoxic 4047 | nontoxic 4048 | edible 4049 | strengthened 4050 | sandbag 4051 | strengthened 4052 | brace 4053 | incontrovertible 4054 | undeniable 4055 | incontrovertible 4056 | incontestable 4057 | rumbled 4058 | sound 4059 | intragroup 4060 | intramural 4061 | exceptionally 4062 | extraordinary 4063 | irredeemable 4064 | wicked 4065 | irredeemable 4066 | inconvertible 4067 | snickering 4068 | laugh 4069 | -------------------------------------------------------------------------------- /tic.R: -------------------------------------------------------------------------------- 1 | # installs dependencies, runs R CMD check, runs covr::codecov() 2 | do_package_checks(error_on = "error") # only ERROR failures generate errors 3 | 4 | if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) { 5 | # creates pkgdown site and pushes to gh-pages branch 6 | # only for the runner with the "BUILD_PKGDOWN" env var set 7 | do_pkgdown() 8 | } 9 | --------------------------------------------------------------------------------