├── .ci
├── build-docs.sh
├── install.sh
├── lint-r-code.R
├── report_to_covr.sh
├── setup.sh
└── test.sh
├── .github
├── CODEOWNERS
├── dependabot.yml
└── workflows
│ ├── build-docs.yaml
│ └── ci.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── NEWS.md
├── README.md
├── cleanup_local.sh
├── cran-comments.md
├── r-pkg
├── .Rbuildignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
│ ├── assertions.R
│ ├── chomp_aggs.R
│ ├── chomp_hits.R
│ ├── es_search.R
│ ├── get_fields.R
│ ├── helperfuns.R
│ ├── logging.R
│ ├── parse_date_time.R
│ ├── unpack_nested_data.R
│ └── uptasticsearch.R
├── _pkgdown.yml
├── inst
│ └── testdata
│ │ └── .gitkeep
├── man
│ ├── chomp_aggs.Rd
│ ├── chomp_hits.Rd
│ ├── doc_shared.Rd
│ ├── es_search.Rd
│ ├── get_fields.Rd
│ ├── parse_date_time.Rd
│ └── unpack_nested_data.Rd
├── tests
│ ├── testthat.R
│ └── testthat
│ │ ├── test-assertions.R
│ │ ├── test-chomp_aggs.R
│ │ ├── test-chomp_hits.R
│ │ ├── test-es_search.R
│ │ ├── test-get_fields.R
│ │ ├── test-integration.R
│ │ ├── test-parse_date_time.R
│ │ └── test-unpack_nested_data.R
└── vignettes
│ └── FAQ.Rmd
├── setup_local.sh
└── test-data
├── aggs_cardinality.json
├── aggs_date_histogram.json
├── aggs_date_histogram_cardinality.json
├── aggs_date_histogram_extended_stats.json
├── aggs_date_histogram_histogram.json
├── aggs_date_histogram_percentiles.json
├── aggs_date_histogram_significant_terms.json
├── aggs_date_histogram_stats.json
├── aggs_date_histogram_terms.json
├── aggs_extended_stats.json
├── aggs_histogram.json
├── aggs_percentiles.json
├── aggs_significant_terms.json
├── aggs_stats.json
├── aggs_terms.json
├── aggs_terms_cardinality.json
├── aggs_terms_date_histogram.json
├── aggs_terms_date_histogram_cardinality.json
├── aggs_terms_date_histogram_extended_stats.json
├── aggs_terms_date_histogram_percentiles.json
├── aggs_terms_date_histogram_significant_terms.json
├── aggs_terms_date_histogram_stats.json
├── aggs_terms_date_histogram_terms.json
├── aggs_terms_extended_stats.json
├── aggs_terms_histogram.json
├── aggs_terms_percentiles.json
├── aggs_terms_significant_terms.json
├── aggs_terms_stats.json
├── aggs_terms_terms.json
├── empty_terms.json
├── es5_shakespeare_mapping.json
├── es6_shakespeare_mapping.json
├── es7_shakespeare_mapping.json
├── es_hits.json
├── legacy_shakespeare_mapping.json
├── one_index_mapping.json
├── one_var_agg.json
├── sample.json
├── sample_es7.json
├── three_var_agg.json
└── two_index_mapping.json
/.ci/build-docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # failure is a natural part of life
4 | set -e -u -o pipefail
5 |
6 | # setup LaTeX stuff
7 | brew install basictex
8 | export PATH="/Library/TeX/texbin:$PATH"
9 | sudo tlmgr --verify-repo=none update --self
10 | sudo tlmgr --verify-repo=none install inconsolata helvetic rsfs
11 |
12 | # install dependencies
13 | Rscript -e "install.packages(c('assertthat', 'curl', 'data.table', 'futile.logger', 'jsonlite', 'knitr', 'markdown', 'pkgdown', 'purrr', 'roxygen2', 'stringr'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
14 |
15 | cp NEWS.md ./r-pkg/
16 | cp README.md ./r-pkg/
17 |
18 | # build the docs
19 | pushd ./r-pkg
20 | R CMD INSTALL --with-keep.source .
21 | Rscript -e "roxygen2::roxygenize()"
22 | Rscript -e "pkgdown::build_site()"
23 | popd
24 |
--------------------------------------------------------------------------------
/.ci/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # failure is a natural part of life
4 | set -e -u -o pipefail
5 |
6 | R CMD INSTALL \
7 | --clean \
8 | ./r-pkg
9 |
--------------------------------------------------------------------------------
/.ci/lint-r-code.R:
--------------------------------------------------------------------------------
1 |
2 | library(lintr) # nolint[unused_import]
3 |
4 | args <- commandArgs(
5 | trailingOnly = TRUE
6 | )
7 | SOURCE_DIR <- args[[1L]]
8 |
9 | FILES_TO_LINT <- list.files(
10 | path = SOURCE_DIR
11 | , pattern = "\\.r$"
12 | , all.files = TRUE
13 | , ignore.case = TRUE
14 | , full.names = TRUE
15 | , recursive = TRUE
16 | , include.dirs = FALSE
17 | )
18 |
19 | # text to use for pipe operators from packages like 'magrittr'
20 | pipe_text <- paste0(
21 | "For consistency and the sake of being explicit, this project's code "
22 | , "does not use the pipe operator."
23 | )
24 |
25 | # text to use for functions that should only be called interactively
26 | interactive_text <- paste0(
27 | "Functions like '?', 'help', and 'install.packages()' should only be used "
28 | , "interactively, not in package code."
29 | )
30 |
31 | LINTERS_TO_USE <- list(
32 | "absolute_path" = lintr::absolute_path_linter()
33 | , "any_duplicated" = lintr::any_duplicated_linter()
34 | , "any_is_na" = lintr::any_is_na_linter()
35 | , "assignment" = lintr::assignment_linter()
36 | , "backport" = lintr::backport_linter()
37 | , "boolean_arithmetic" = lintr::boolean_arithmetic_linter()
38 | , "braces" = lintr::brace_linter()
39 | , "class_equals" = lintr::class_equals_linter()
40 | , "commas" = lintr::commas_linter()
41 | , "conjunct_test" = lintr::conjunct_test_linter()
42 | , "duplicate_argument" = lintr::duplicate_argument_linter()
43 | , "empty_assignment" = lintr::empty_assignment_linter()
44 | , "equals_na" = lintr::equals_na_linter()
45 | , "fixed_regex" = lintr::fixed_regex_linter()
46 | , "for_loop_index" = lintr::for_loop_index_linter()
47 | , "function_left" = lintr::function_left_parentheses_linter()
48 | , "function_return" = lintr::function_return_linter()
49 | , "implicit_assignment" = lintr::implicit_assignment_linter()
50 | , "infix_spaces" = lintr::infix_spaces_linter()
51 | , "inner_combine" = lintr::inner_combine_linter()
52 | , "is_numeric" = lintr::is_numeric_linter()
53 | , "lengths" = lintr::lengths_linter()
54 | , "length_levels" = lintr::length_levels_linter()
55 | , "length_test" = lintr::length_test_linter()
56 | , "line_length" = lintr::line_length_linter(length = 150L)
57 | , "literal_coercion" = lintr::literal_coercion_linter()
58 | , "matrix" = lintr::matrix_apply_linter()
59 | , "missing_argument" = lintr::missing_argument_linter()
60 | , "non_portable_path" = lintr::nonportable_path_linter()
61 | , "numeric_leading_zero" = lintr::numeric_leading_zero_linter()
62 | , "outer_negation" = lintr::outer_negation_linter()
63 | , "package_hooks" = lintr::package_hooks_linter()
64 | , "paren_body" = lintr::paren_body_linter()
65 | , "paste" = lintr::paste_linter()
66 | , "quotes" = lintr::quotes_linter()
67 | , "redundant_equals" = lintr::redundant_equals_linter()
68 | , "regex_subset" = lintr::regex_subset_linter()
69 | , "routine_registration" = lintr::routine_registration_linter()
70 | , "scalar_in" = lintr::scalar_in_linter()
71 | , "semicolon" = lintr::semicolon_linter()
72 | , "seq" = lintr::seq_linter()
73 | , "spaces_inside" = lintr::spaces_inside_linter()
74 | , "spaces_left_parens" = lintr::spaces_left_parentheses_linter()
75 | , "sprintf" = lintr::sprintf_linter()
76 | , "string_boundary" = lintr::string_boundary_linter()
77 | #, "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do"))
78 | , "trailing_blank" = lintr::trailing_blank_lines_linter()
79 | , "trailing_white" = lintr::trailing_whitespace_linter()
80 | , "true_false" = lintr::T_and_F_symbol_linter()
81 | , "undesirable_function" = lintr::undesirable_function_linter(
82 | fun = c(
83 | "cbind" = paste0(
84 | "cbind is an unsafe way to build up a data frame. merge() or direct "
85 | , "column assignment is preferred."
86 | )
87 | , "help" = interactive_text
88 | , "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types."
89 | , "install.packages" = interactive_text
90 | , "rbind" = "data.table::rbindlist() is faster and safer than rbind(), and is preferred in this project."
91 | , "require" = paste0(
92 | "library() is preferred to require() because it will raise an error immediately "
93 | , "if a package is missing."
94 | )
95 | )
96 | )
97 | , "undesirable_operator" = lintr::undesirable_operator_linter(
98 | op = c(
99 | "%>%" = pipe_text
100 | , "%.%" = pipe_text
101 | , "%..%" = pipe_text
102 | , "?" = interactive_text
103 | , "??" = interactive_text
104 | )
105 | )
106 | , "unnecessary_concatenation" = lintr::unnecessary_concatenation_linter()
107 | , "unnecessary_lambda" = lintr::unnecessary_lambda_linter()
108 | , "unreachable_code" = lintr::unreachable_code_linter()
109 | , "unused_import" = lintr::unused_import_linter()
110 | , "vector_logic" = lintr::vector_logic_linter()
111 | , "whitespace" = lintr::whitespace_linter()
112 | )
113 |
114 | cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
115 |
116 | results <- NULL
117 |
118 | for (r_file in FILES_TO_LINT) {
119 |
120 | this_result <- lintr::lint(
121 | filename = r_file
122 | , linters = LINTERS_TO_USE
123 | , cache = FALSE
124 | )
125 |
126 | print(
127 | sprintf(
128 | "Found %i linting errors in %s"
129 | , length(this_result)
130 | , r_file
131 | )
132 | , quote = FALSE
133 | )
134 |
135 | results <- c(results, this_result)
136 |
137 | }
138 |
139 | issues_found <- length(results)
140 |
141 | if (issues_found > 0L) {
142 | print(results)
143 | }
144 |
145 | quit(save = "no", status = issues_found)
146 |
--------------------------------------------------------------------------------
/.ci/report_to_covr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # failure is a natural part of life
4 | set -e -u -o pipefail
5 |
6 | Rscript -e " \
7 | Sys.setenv(NOT_CRAN = 'true'); \
8 | covr::codecov('r-pkg/') \
9 | "
10 |
--------------------------------------------------------------------------------
/.ci/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # failure is a natural part of life
4 | set -e -u -o pipefail
5 |
6 | # `devscripts` is required for 'checkbashisms' (https://github.com/r-lib/actions/issues/111)
7 | sudo apt-get update
8 | sudo apt-get install \
9 | --no-install-recommends \
10 | -y \
11 | --allow-downgrades \
12 | libcurl4-openssl-dev \
13 | curl \
14 | devscripts \
15 | texinfo \
16 | texlive-latex-recommended \
17 | texlive-fonts-recommended \
18 | texlive-fonts-extra \
19 | tidy \
20 | qpdf
21 |
22 | Rscript -e "install.packages(c('covr', 'curl', 'data.table', 'futile.logger', 'jsonlite', 'knitr', 'lintr', 'markdown', 'purrr', 'stringr', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
23 | cp test-data/* r-pkg/inst/testdata/
24 |
--------------------------------------------------------------------------------
/.ci/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # failure is a natural part of life
4 | set -e -u -o pipefail
5 |
6 | R CMD build ./r-pkg
7 | export _R_CHECK_CRAN_INCOMING_=false
8 | R CMD check \
9 | --as-cran \
10 | ./*.tar.gz
11 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # This file controls default reviewers for 'uptasticsearch' code.
2 | # See https://help.github.com/en/articles/about-code-owners
3 | # for details
4 | #
5 | # Maintainers are encouraged to use their best discretion in
6 | # setting reviewers on PRs manually, but this file should
7 | # offer a reasonable automatic best-guess.
8 | #
9 | # NOTE: according to GitHub, the LAST rule matched in this
10 | # file will determine who is added to a PR for review
11 |
12 | # Default reviewers for all code
13 | * @jameslamb @austin3dickey
14 |
15 | # community files
16 | LICENSE @jameslamb @austin3dickey @bburns632
17 | CONDUCT.md @jameslamb @austin3dickey @bburns632
18 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 2
3 | updates:
4 | - package-ecosystem: github-actions
5 | directory: /
6 | schedule:
7 | interval: monthly
8 | groups:
9 | ci-dependencies:
10 | patterns:
11 | - "*"
12 | commit-message:
13 | prefix: "[ci]"
14 | labels:
15 | - maintenance
16 |
--------------------------------------------------------------------------------
/.github/workflows/build-docs.yaml:
--------------------------------------------------------------------------------
1 | name: build-docs
2 |
3 | concurrency:
4 | group: docs-build-on-${{ github.event_name }}-from-${{ github.ref_name }}
5 | cancel-in-progress: true
6 |
7 | on:
8 | # run only when called by other workflows
9 | workflow_call:
10 | inputs:
11 | deploy:
12 | required: true
13 | type: boolean
14 | default: false
15 | description: "set to true to publish docs"
16 |
17 | jobs:
18 | build:
19 | runs-on: macos-latest
20 | steps:
21 | - uses: actions/checkout@v4
22 | with:
23 | fetch-depth: 0
24 | - name: set up R
25 | uses: r-lib/actions/setup-r@v2
26 | with:
27 | r-version: release
28 | - name: set up pandoc
29 | uses: r-lib/actions/setup-pandoc@v2
30 | - name: build docs
31 | run: |
32 | .ci/build-docs.sh
33 | - uses: actions/upload-pages-artifact@v3
34 | with:
35 | path: ./r-pkg/docs
36 |
37 | deploy:
38 | needs:
39 | - build
40 | if: inputs.deploy
41 |
42 | # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
43 | permissions:
44 | pages: write # to deploy to Pages
45 | id-token: write # to verify the deployment originates from an appropriate source
46 |
47 | # Deploy to the github-pages environment
48 | environment:
49 | name: github-pages
50 | url: ${{ steps.deployment.outputs.page_url }}
51 |
52 | runs-on: ubuntu-latest
53 | steps:
54 | - name: Deploy to GitHub Pages
55 | id: deployment
56 | uses: actions/deploy-pages@v4
57 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: ci
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 | # run by clicking buttons in the GitHub Actions UI
11 | workflow_dispatch:
12 | inputs:
13 | deploy-docs:
14 | description: 'Update the docs site?'
15 | required: true
16 | type: boolean
17 |
18 | # automatically cancel in-progress builds if another commit is pushed
19 | concurrency:
20 | group: ${{ github.workflow }}-${{ github.ref }}
21 | cancel-in-progress: true
22 |
23 | env:
24 | # parallelize compilation (extra important for Linux, where CRAN doesn't supply pre-compiled binaries)
25 | MAKEFLAGS: "-j4"
26 |
27 | jobs:
28 | build-docs:
29 | uses: ./.github/workflows/build-docs.yaml
30 | with:
31 | deploy: ${{ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags')) || (github.event_name == 'workflow_dispatch' && inputs.deploy-docs == true) }}
32 | secrets: inherit
33 | lint:
34 | name: lint
35 | runs-on: ubuntu-latest
36 | timeout-minutes: 30
37 | steps:
38 | - name: checkout repository
39 | uses: actions/checkout@v4
40 | with:
41 | fetch-depth: 0
42 | - uses: pre-commit/action@v3.0.1
43 | - name: set up R
44 | uses: r-lib/actions/setup-r@v2
45 | - name: run lintr
46 | run: |
47 | Rscript -e "install.packages('lintr')"
48 | Rscript ./.ci/lint-r-code.R $(pwd)
49 | test:
50 | name: test (ES ${{ matrix.es_version }})
51 | runs-on: ubuntu-latest
52 | timeout-minutes: 60
53 | strategy:
54 | fail-fast: false
55 | matrix:
56 | es_version:
57 | - 1.7.6
58 | - 2.4.6
59 | - 5.6.16
60 | - 6.8.15
61 | - 7.0.1
62 | - 7.17.22
63 | - 8.0.1
64 | - 8.5.3
65 | - 8.10.4
66 | - 8.15.5
67 | - 8.17.2
68 | steps:
69 | - name: checkout repository
70 | uses: actions/checkout@v4
71 | with:
72 | fetch-depth: 1
73 | - name: set up R
74 | uses: r-lib/actions/setup-r@v2
75 | with:
76 | r-version: release
77 | - name: set up pandoc
78 | uses: r-lib/actions/setup-pandoc@v2
79 | - name: run tests
80 | shell: bash
81 | run: |
82 | export ES_VERSION=${{ matrix.es_version }}
83 | $GITHUB_WORKSPACE/.ci/setup.sh
84 | $GITHUB_WORKSPACE/.ci/install.sh
85 | $GITHUB_WORKSPACE/setup_local.sh ${{ matrix.es_version }}
86 | $GITHUB_WORKSPACE/.ci/test.sh
87 | $GITHUB_WORKSPACE/.ci/report_to_covr.sh
88 | all-successful:
89 | if: always()
90 | runs-on: ubuntu-latest
91 | needs:
92 | - build-docs
93 | - lint
94 | - test
95 | steps:
96 | - name: Decide whether the needed jobs succeeded or failed
97 | uses: re-actors/alls-green@v1.2.2
98 | with:
99 | jobs: ${{ toJSON(needs) }}
100 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # Example code in package build process
9 | *-Ex.R
10 |
11 | # Output files from R CMD build
12 | *.tar.gz
13 | *.Rcheck/
14 |
15 | # RStudio files
16 | *.Rproj
17 | .Rproj.user/
18 |
19 | # produced vignettes
20 | vignettes/*.html
21 | vignettes/*.pdf
22 |
23 | # Temporary files created by R markdown
24 | *.utf8.md
25 | *.knit.md
26 | .Rproj.user
27 |
28 | # Data files
29 | *.Rda
30 | *.pdf
31 | *.csv
32 |
33 | # system files
34 | *.DS_Store
35 |
36 | # misc testing files
37 | sandbox/
38 | lib/
39 | coverage.html
40 |
41 | # shared files copied into package at build time
42 | r-pkg/NEWS.md
43 | r-pkg/README.md
44 | r-pkg/inst/testdata/*.json
45 |
46 | # Python stuff
47 | **/.pytest_cache/
48 | **/__pycache__/
49 | **/dist/
50 | **/htmlcov/
51 | **/*.egg-info/
52 |
53 | # As long as we're storing the pkgdown site
54 | # at the repo root, should protect against
55 | # people committing files in r-pkg
56 | r-pkg/docs/
57 |
58 | # backup files from command-line tools
59 | *.bak
60 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | exclude: |
3 | (?x)^(
4 | test-data/.*
5 | )$
6 | repos:
7 | - repo: https://github.com/pre-commit/pre-commit-hooks
8 | rev: v5.0.0
9 | hooks:
10 | - id: end-of-file-fixer
11 | - id: trailing-whitespace
12 | - repo: https://github.com/maxwinterstein/shfmt-py
13 | rev: v3.7.0.1
14 | hooks:
15 | - id: shfmt
16 | args: ["--indent=4", "--space-redirects", "--write"]
17 | - repo: https://github.com/shellcheck-py/shellcheck-py
18 | rev: v0.10.0.1
19 | hooks:
20 | - id: shellcheck
21 | args: ["--exclude=SC2002"]
22 | - repo: https://github.com/codespell-project/codespell
23 | rev: v2.4.1
24 | hooks:
25 | - id: codespell
26 | # additional_dependencies: [tomli]
27 | # args: ["--toml", "pyproject.toml"]
28 |
--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | **Examples of behavior that contributes to creating a positive environment include:**
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | **Examples of unacceptable behavior by participants include:**
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team (see the "Maintainer" field in file `r-pkg/DESCRIPTION`). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4.
44 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2018, Uptake
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: build
2 | build:
3 | cp test-data/* r-pkg/inst/testdata/
4 | R CMD BUILD r-pkg/
5 |
6 | .PHONY: coverage
7 | coverage:
8 | echo "Calculating test coverage..."
9 | Rscript -e "Sys.setenv(NOT_CRAN = 'true'); coverage <- covr::package_coverage('r-pkg/'); print(coverage); covr::report(coverage, './coverage.html')"
10 | echo "Done calculating coverage"
11 | open coverage.html
12 |
13 | .PHONY: install
14 | install: build
15 | R CMD INSTALL r-pkg/
16 |
17 | .PHONY: test
18 | test: build
19 | R CMD CHECK --as-cran uptasticsearch_*.tar.gz
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # uptasticsearch
2 |
3 | [](https://github.com/uptake/uptasticsearch/actions/workflows/ci.yml)
4 | [](https://app.codecov.io/gh/uptake/uptasticsearch)
5 | [](https://cran.r-project.org/package=uptasticsearch)
6 | [](https://cran.r-project.org/package=uptasticsearch)
7 |
8 | ## Introduction
9 |
10 | `uptasticsearch` tackles the issue of getting data out of Elasticsearch and into a tabular format in R.
11 | It should work for all versions of Elasticsearch from 1.0.0 onwards, but [is not regularly tested against all of them](https://github.com/uptake/uptasticsearch/blob/main/CONTRIBUTING.md#gha).
12 | If you run into a problem, please [open an issue](https://github.com/uptake/uptasticsearch/issues).
13 |
14 | # Table of contents
15 |
16 | * [How it Works](#howitworks)
17 | * [Installation](#installation)
18 | * [R](#rinstallation)
19 | * [Usage Examples](#examples)
20 | * [Get a Batch of Documents](#example1)
21 | * [Aggregation Results](#example2)
22 |
23 | ## How it Works
24 |
25 | The core functionality of this package is the `es_search()` function.
26 | This returns a `data.table` containing the parsed result of any given query. Note that this includes `aggs` queries.
27 |
28 | ## Installation
29 |
30 | ### R
31 |
32 | 
33 |
34 | Releases of this package can be installed from CRAN:
35 |
36 | ```r
37 | install.packages(
38 | 'uptasticsearch'
39 | , repos = "http://cran.rstudio.com"
40 | )
41 | ```
42 |
43 | or from `conda-forge`
44 |
45 | ```shell
46 | conda install -c conda-forge r-uptasticsearch
47 | ```
48 |
49 | To use the development version of the package, which has the newest changes, you can install directly from GitHub
50 |
51 | ```r
52 | remotes::install_github(
53 | "uptake/uptasticsearch"
54 | , subdir = "r-pkg"
55 | )
56 | ```
57 |
58 | ## Usage Examples
59 |
60 | The examples presented here pertain to a fictional Elasticsearch index holding some information on a movie theater business.
61 |
62 | ### Example 1: Get a Batch of Documents
63 |
64 | The most common use case for this package will be the case where you have an Elasticsearch query and want to get a data frame representation of many resulting documents.
65 |
66 | In the example below, we use `uptasticsearch` to look for all survey results in which customers said their satisfaction was "low" or "very low" and mentioned food in their comments.
67 |
68 | ```r
69 | library(uptasticsearch)
70 |
71 | # Build your query in an R string
72 | qbody <- '{
73 | "query": {
74 | "filtered": {
75 | "filter": {
76 | "bool": {
77 | "must": [
78 | {
79 | "exists": {
80 | "field": "customer_comments"
81 | }
82 | },
83 | {
84 | "terms": {
85 | "overall_satisfaction": ["very low", "low"]
86 | }
87 | }
88 | ]
89 | }
90 | }
91 | },
92 | "query": {
93 | "match_phrase": {
94 | "customer_comments": "food"
95 | }
96 | }
97 | }
98 | }'
99 |
100 | # Execute the query, parse into a data.table
101 | commentDT <- es_search(
102 | es_host = 'http://mydb.mycompany.com:9200'
103 | , es_index = "survey_results"
104 | , query_body = qbody
105 | , scroll = "1m"
106 | , n_cores = 4
107 | )
108 | ```
109 |
110 | ### Example 2: Aggregation Results
111 |
112 | Elasticsearch ships with a rich set of aggregations for creating summarized views of your data.
113 | `uptasticsearch` has built-in support for these aggregations.
114 |
115 | In the example below, we use `uptasticsearch` to create daily timeseries of summary statistics like total revenue and average payment amount.
116 |
117 | ```r
118 | library(uptasticsearch)
119 |
120 | # Build your query in an R string
121 | qbody <- '{
122 | "query": {
123 | "filtered": {
124 | "filter": {
125 | "bool": {
126 | "must": [
127 | {
128 | "exists": {
129 | "field": "pmt_amount"
130 | }
131 | }
132 | ]
133 | }
134 | }
135 | }
136 | },
137 | "aggs": {
138 | "timestamp": {
139 | "date_histogram": {
140 | "field": "timestamp",
141 | "interval": "day"
142 | },
143 | "aggs": {
144 | "revenue": {
145 | "extended_stats": {
146 | "field": "pmt_amount"
147 | }
148 | }
149 | }
150 | }
151 | },
152 | "size": 0
153 | }'
154 |
155 | # Execute the query, parse result into a data.table
156 | revenueDT <- es_search(
157 | es_host = 'http://mydb.mycompany.com:9200'
158 | , es_index = "transactions"
159 | , size = 1000
160 | , query_body = qbody
161 | , n_cores = 1
162 | )
163 | ```
164 |
165 | In the example above, we used the [date_histogram](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html) and [extended_stats](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html) aggregations.
166 | `es_search()` has built-in support for many other aggregations and combinations of aggregations, with more on the way.
167 | Please see the table below for the current status of the package.
168 | Note that names of the form "agg1 - agg2" refer to the ability to handled aggregations nested inside other aggregations.
169 |
170 | |Agg type | support? |
171 | |:------------------------------------------|:--------:|
172 | |["cardinality"][1] |YES |
173 | |["date_histogram"][2] |YES |
174 | |date_histogram - cardinality |YES |
175 | |date_histogram - extended_stats |YES |
176 | |date_histogram - histogram |YES |
177 | |date_histogram - percentiles |YES |
178 | |date_histogram - significant_terms |YES |
179 | |date_histogram - stats |YES |
180 | |date_histogram - terms |YES |
181 | |["extended_stats"][3] |YES |
182 | |["histogram"][4] |YES |
183 | |["percentiles"][5] |YES |
184 | |["significant terms"][6] |YES |
185 | |["stats"][7] |YES |
186 | |["terms"][8] |YES |
187 | |terms - cardinality |YES |
188 | |terms - date_histogram |YES |
189 | |terms - date_histogram - cardinality |YES |
190 | |terms - date_histogram - extended_stats |YES |
191 | |terms - date_histogram - histogram |YES |
192 | |terms - date_histogram - percentiles |YES |
193 | |terms - date_histogram - significant_terms |YES |
194 | |terms - date_histogram - stats |YES |
195 | |terms - date_histogram - terms |YES |
196 | |terms - extended_stats |YES |
197 | |terms - histogram |YES |
198 | |terms - percentiles |YES |
199 | |terms - significant_terms |YES |
200 | |terms - stats |YES |
201 | |terms - terms |YES |
202 |
203 | [1]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
204 | [2]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html
205 | [3]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html
206 | [4]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-histogram-aggregation.html
207 | [5]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
208 | [6]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html
209 | [7]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
210 | [8]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html
211 |
--------------------------------------------------------------------------------
/cleanup_local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e -u -o pipefail
4 |
5 | # Remove testing directory
6 | echo "removing testing directory"
7 | rm -r ./sandbox
8 |
9 | # Kill the running container
10 | echo "killing running container"
11 | docker kill "$(docker ps -ql)"
12 |
13 | echo "done cleaning up test environment"
14 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | # CRAN Submission History
2 |
3 | ## v0.0.2 - Submission 1 - (July 17, 2017)
4 |
5 | ### Test environments
6 | * Alpine 3.5 (on Jenkins CI), R 3.4.0
7 | * local CentOS 7.3, R 3.4.0
8 | * local OS X, R 3.3.2
9 | * local Windows 10, R 3.3.2
10 | * Windows via `devtools::build_win()`
11 |
12 | ### `R CMD check` results
13 | * There were no ERRORs, WARNINGs.
14 | * One NOTE from `checking CRAN incoming feasibility ...` can be safely ignored since it's a note that notifies CRAN that this is a new maintainer/submission.
15 |
16 | ### CRAN Response
17 | * Automatic checking upon CRAN submission yielded two notes. One was the "incoming feasibility..." item we mentioned above, which is not an issue.
18 | * The other note said that `Author field differs from that derived from Authors@R`. This did not arise when running `R CMD check --as-cran` locally, but it looks like "fnd" is not a supported tag for an author. Removed that tag.
19 |
20 | ## v0.0.2 - Submission 2 - (July 17, 2017)
21 |
22 | ### CRAN Response
23 | * Need to use the [CRAN preferred method](https://cran.r-project.org/web/licenses/BSD_3_clause) of declaring the BSD 3-Clause license
24 | * Need to quote software names
25 |
26 | ## v0.0.2 - Submission 3 - (July 18, 2017)
27 |
28 | ### CRAN Response
29 | * No lingering issues. v0.0.2 released to CRAN!
30 |
31 | ## v0.1.0 - Submission 1 - (August 28, 2017)
32 |
33 | ### `R CMD check` results
34 | * No issues
35 |
36 | ### CRAN Response
37 | * Need to use CRAN canonical form (http://cran.r-project.org/package=uptasticsearch)
38 |
39 | ## v0.1.0 - Submission 2 - (August 28, 2017)
40 |
41 | ### `R CMD check` results
42 | * No issues
43 |
44 | ### CRAN Response
45 | * CRAN canonical form uses HTTPS (https://cran.r-project.org/package=uptasticsearch)
46 |
47 | ## v0.1.0 - Submission 3 - (August 29, 2017)
48 |
49 | ### `R CMD check` results
50 | * No issues
51 |
52 | ### CRAN Response
53 | * CRAN URLs are still missing HTTPS (submitter error)
54 |
55 | ## v0.1.0 - Submission 4 - (August 29, 2017)
56 |
57 | ### `R CMD check` results
58 | * No issues
59 |
60 | ### CRAN Response
61 | * Still missing HTTPS in CRAN URLs (we'd been editing the README at the repo root, not the one built with the package)
62 | * Reviewers asked if examples in "\dontrun" could be run instead
63 |
64 | ## v0.1.0 - Submission 5 - (August 29, 2017)
65 |
66 | ### `R CMD check` results
67 | * No issues
68 |
69 | ### CRAN Response
70 | * No lingering issues. v0.1.0 released to CRAN!
71 |
72 | ## v0.2.0 - Submission 1 - (April 12, 2018)
73 |
74 | ### `R CMD check` results
75 | * No issues
76 |
77 | ### CRAN Response
78 | * No issues. v0.2.0 released to CRAN!
79 |
80 | ## v0.3.0 - Submission 1 - (June 18, 2018)
81 |
82 | ### `R CMD check` results
83 | * No issues
84 |
85 | ### CRAN Response
86 | * No issues. v0.3.0 released to CRAN!
87 |
88 | ## v0.3.1 - Submission 1 - (January 28, 2019)
89 |
90 | ### `R CMD check` results
91 | * Issues on several platforms, of the form `premature EOF...`. This is a result of forgetting to put the test data in the package tarball before upload.
92 |
93 | ### CRAN Response
94 | * Upload a new version with this fixed or your package comes down in 7 days
95 |
96 | ## v0.3.1 - Submission 2 - (January 29, 2019)
97 |
98 | ### `R CMD check` results
99 | * Empty links in `NEWS.md`
100 |
101 | ### CRAN Response
102 | * Upload a new version with this fixed or your package comes down in 7 days
103 |
104 | ## v0.3.1 - Submission 3 - (January 30, 2019)
105 |
106 | ### `R CMD check` results
107 | * No issues
108 |
109 | ### CRAN Response
110 | * No issues. v0.3.1 released to CRAN!
111 |
112 | ## v0.4.0 - Submission 1 - (September 9, 2019)
113 |
114 | In this submission, we changed maintainer from `james.lamb@uptake.com` to `jaylamb20@gmail.com`. Added this note in the initial submission:
115 |
116 | > This is a release to add support for Elasticsearch 7.x, a major release stream that has been General Availability since April 2019.
117 |
118 | > You may see that the maintainer email is changing from "james.lamb@uptake.com" to "jaylamb20@gmail.com". This is a contact info update only, not an actual maintainer change. The "uptake.com" address is tied to the company that holds copyright over this project (https://github.com/uptake/uptasticsearch/blob/master/LICENSE#L3). I no longer work there but have received their permission to continue on as the maintainer. If you need confirmation you can contact my coauthors who still work there (austin.dickey@uptake.com, nick.paras@uptake.com) or that company's legal team (dennis.lee@uptake.com)
119 |
120 | ### `R CMD check` results
121 | * No issues
122 |
123 | ### CRAN Response
124 | * Release was auto-accepted, but the response email said "We are waiting for confirmation from the old maintainer address now.". I responded and re-iterated the message above about changed maintainer email. No response yet. We are blocked until they respond.
125 | * CRAN seems ok with the maintainer change, noted that we have one bad link in `README.md`, "`./CONTRIBUTING.md"`. Needs to be changed to a fully-specified URL.
126 |
127 | ## v0.4.0 - Submission 1 - (September 11, 2019)
128 |
129 | ### `R CMD check` results
130 | * No issues
131 |
132 | ### CRAN Response
133 | * No issues. v0.4.0 released to CRAN!
134 |
135 | ## v1.0.0 - Submission 1 - (February 24, 2025)
136 |
137 | Submitted with the following comments.
138 |
139 | > This is the first release of 'uptasticsearch' since 2019.
140 | > It mainly seeks to preserve the package on CRAN by removing use of deprecated-and-soon-to-be-removed functionality in 'testthat' (https://github.com/uptake/uptasticsearch/issues/223).
141 |
142 | ### `R CMD check` results
143 |
144 | * No issues
145 |
146 | ### CRAN Response
147 |
148 | * No issues. v1.0.0 released to CRAN!
149 |
--------------------------------------------------------------------------------
/r-pkg/.Rbuildignore:
--------------------------------------------------------------------------------
1 |
2 | # Files currently checked into the repo
3 | ^docs$
4 | ^_pkgdown\.yml$
5 | ^cran-comments\.md$
6 | ^tests/testthat/test-integration_tests\.R$
7 | ^inst/testdata/shakespeare_mapping\.json$
8 | ^CONDUCT\.md$
9 | ^LICENSE\.MD$
10 | ^.travis\.yml$
11 | ^setup_local.sh$
12 | ^cleanup_local.sh$
13 | ^coverage.sh$
14 |
15 | # History files
16 | ^\.Rhistory*
17 | ^\.Rapp\.history*
18 |
19 | # Session Data files
20 | ^\.RData$
21 |
22 | # Example code in package build process
23 | .*-Ex\.R
24 |
25 | # Output files from R CMD build
26 | .*\.tar\.gz
27 |
28 | # Output files from R CMD check
29 | .*\.Rcheck/
30 |
31 | # RStudio files
32 | .*\.Rproj
33 | \.Rproj\.user/
34 |
35 | # produced vignettes
36 | vignettes/*\.html
37 | vignettes/*\.pdf
38 |
39 | # Temporary files created by R markdown
40 | .*\.utf8\.md
41 | .*\.knit\.md
42 | ^\.Rproj\.user$
43 |
44 | # Data files
45 | .*\.Rda
46 | .*\.pdf
47 | .*\.csv
48 |
49 | # system files
50 | .*\.DS_Store
51 | ^.*\.Rproj$
52 |
53 | # Temporary files generated by local testing
54 | ^lib$
55 | ^sandbox$
56 | ^coverage.html$
57 |
58 | # Stuff
59 | .Rbuildignore
60 | .*\.gitkeep
61 |
--------------------------------------------------------------------------------
/r-pkg/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: uptasticsearch
2 | Type: Package
3 | Title: Get Data Frame Representations of 'Elasticsearch' Results
4 | Version: 1.0.0.9999
5 | Authors@R: c(
6 | person("James", "Lamb", email = "jaylamb20@gmail.com", role = c("aut", "cre")),
7 | person("Nick", "Paras", role = c("aut")),
8 | person("Austin", "Dickey", role = c("aut")),
9 | person("Michael", "Frasco", email = "mfrasco6@gmail.com", role = c("ctb")),
10 | person("Weiwen", "Gu", role = c("ctb")),
11 | person("Will", "Dearden", role = c("ctb")),
12 | person("Uptake Technologies Inc.", role = c("cph")))
13 | Maintainer: James Lamb
14 | Description:
15 | 'Elasticsearch' is an open-source, distributed, document-based datastore
16 | ().
17 | It provides an 'HTTP' 'API' for querying the database and extracting datasets, but that
18 | 'API' was not designed for common data science workflows like pulling large batches of
19 | records and normalizing those documents into a data frame that can be used as a training
20 | dataset for statistical models. 'uptasticsearch' provides an interface for 'Elasticsearch'
21 | that is explicitly designed to make these data science workflows easy and fun.
22 | Depends:
23 | R (>= 3.3.0)
24 | Imports:
25 | curl,
26 | data.table,
27 | futile.logger,
28 | jsonlite,
29 | purrr,
30 | stats,
31 | stringr
32 | Suggests:
33 | knitr,
34 | markdown,
35 | testthat
36 | License: BSD_3_clause + file LICENSE
37 | URL: https://github.com/uptake/uptasticsearch
38 | BugReports: https://github.com/uptake/uptasticsearch/issues
39 | RoxygenNote: 7.3.2
40 | VignetteBuilder: knitr
41 | Encoding: UTF-8
42 |
--------------------------------------------------------------------------------
/r-pkg/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017
2 | COPYRIGHT HOLDER: Uptake Technologies Inc.
3 | ORGANIZATION: Uptake Technologies Inc.
4 |
--------------------------------------------------------------------------------
/r-pkg/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(chomp_aggs)
4 | export(chomp_hits)
5 | export(es_search)
6 | export(get_fields)
7 | export(parse_date_time)
8 | export(unpack_nested_data)
9 | importFrom(curl,curl_fetch_memory)
10 | importFrom(curl,handle_setheaders)
11 | importFrom(curl,handle_setopt)
12 | importFrom(curl,new_handle)
13 | importFrom(data.table,":=")
14 | importFrom(data.table,as.data.table)
15 | importFrom(data.table,copy)
16 | importFrom(data.table,data.table)
17 | importFrom(data.table,is.data.table)
18 | importFrom(data.table,rbindlist)
19 | importFrom(data.table,setcolorder)
20 | importFrom(data.table,setkeyv)
21 | importFrom(data.table,setnames)
22 | importFrom(data.table,uniqueN)
23 | importFrom(futile.logger,flog.debug)
24 | importFrom(futile.logger,flog.fatal)
25 | importFrom(futile.logger,flog.info)
26 | importFrom(futile.logger,flog.warn)
27 | importFrom(jsonlite,fromJSON)
28 | importFrom(parallel,clusterMap)
29 | importFrom(parallel,detectCores)
30 | importFrom(parallel,makeForkCluster)
31 | importFrom(parallel,makePSOCKcluster)
32 | importFrom(parallel,stopCluster)
33 | importFrom(purrr,map2)
34 | importFrom(purrr,map_if)
35 | importFrom(purrr,map_int)
36 | importFrom(purrr,map_lgl)
37 | importFrom(purrr,simplify)
38 | importFrom(stats,runif)
39 | importFrom(stringr,str_extract)
40 | importFrom(stringr,str_replace_all)
41 | importFrom(stringr,str_split)
42 | importFrom(stringr,str_split_fixed)
43 |
--------------------------------------------------------------------------------
/r-pkg/R/assertions.R:
--------------------------------------------------------------------------------
1 |
2 | # [title] assert something and raise an exception if it isn't true
3 | # [name] .assert
4 | # [description] If the condition passed to .assert() does not evaluate to TRUE,
5 | # issues a FATAL-level log message and then raises an R exception,
6 | # both with the content of `msg`.
7 | .assert <- function(expr, msg) {
8 | res <- eval(expr, envir = parent.frame())
9 | if (isTRUE(res)) {
10 | return(invisible(TRUE))
11 | }
12 | .log_fatal(msg)
13 | }
14 |
15 | # [title] check if an object is a count
16 | # [name] .is_count
17 | # [description] Returns TRUE if `x` is a single positive integer
18 | # and FALSE otherwise.
19 | .is_count <- function(x) {
20 | return(
21 | length(x) == 1 &&
22 | is.numeric(x) &&
23 | !is.na(x) &&
24 | x > 0 &&
25 | trunc(x) == x
26 | )
27 | }
28 |
29 | # [title] check if an object is a scalar logical
30 | # [name] .is_flag
31 | # [description] Returns TRUE if `x` is `TRUE` or `FALSE`
32 | # and `FALSE` otherwise.
33 | .is_flag <- function(x) {
34 | return(
35 | is.logical(x) &&
36 | length(x) == 1L &&
37 | !is.na(x)
38 | )
39 | }
40 |
41 | # [title] check if an object is a string
42 | # [name] .is_string
43 | # [description] Returns TRUE if `x` is a non-empty string
44 | # and FALSE otherwise.
45 | .is_string <- function(x) {
46 | return(
47 | is.character(x) &&
48 | length(x) == 1L &&
49 | !is.na(x) &&
50 | x != ""
51 | )
52 | }
53 |
54 | # [title] check if an object is a writeable filepath that exists
55 | # [name] .is_writeable
56 | # [description] Returns TRUE if `x` is a filepath that already exists
57 | # and is writeable, and FALSE otherwise.
58 | .is_writeable <- function(x) {
59 | return(
60 | .is_string(x) &&
61 | file.exists(x) &&
62 | file.access(x, mode = 2L)[[1L]] == 0L
63 | )
64 | }
65 |
--------------------------------------------------------------------------------
/r-pkg/R/chomp_hits.R:
--------------------------------------------------------------------------------
1 | #' @title Hits to data.tables
2 | #' @name chomp_hits
3 | #' @description A function for converting Elasticsearch docs into R data.tables. It
4 | #' uses \code{\link[jsonlite]{fromJSON}} with \code{flatten = TRUE} to convert a
5 | #' JSON into an R data.frame, and formats it into a data.table.
6 | #' @importFrom jsonlite fromJSON
7 | #' @importFrom data.table as.data.table setnames
8 | #' @export
9 | #' @param hits_json A character vector. If its length is greater than 1, its elements will be pasted
10 | #' together. This can contain a JSON returned from a \code{search} query in
11 | #' Elasticsearch, or a filepath or URL pointing at one.
12 | #' @param keep_nested_data_cols a boolean (default TRUE); whether to keep columns that are nested
13 | #' arrays in the original JSON. A warning will be given if these
14 | #' columns are deleted.
15 | #' @examples
16 | #' # A sample raw result from a hits query:
17 | #' result <- '[{"_source":{"timestamp":"2017-01-01","cust_name":"Austin","details":{
18 | #' "cust_class":"big_spender","location":"chicago","pastPurchases":[{"film":"The Notebook",
19 | #' "pmt_amount":6.25},{"film":"The Town","pmt_amount":8.00},{"film":"Zootopia","pmt_amount":7.50,
20 | #' "matinee":true}]}}},{"_source":{"timestamp":"2017-02-02","cust_name":"James","details":{
21 | #' "cust_class":"peasant","location":"chicago","pastPurchases":[{"film":"Minions",
22 | #' "pmt_amount":6.25,"matinee":true},{"film":"Rogue One","pmt_amount":10.25},{"film":"Bridesmaids",
23 | #' "pmt_amount":8.75},{"film":"Bridesmaids","pmt_amount":6.25,"matinee":true}]}}},{"_source":{
24 | #' "timestamp":"2017-03-03","cust_name":"Nick","details":{"cust_class":"critic","location":"cannes",
25 | #' "pastPurchases":[{"film":"Aala Kaf Ifrit","pmt_amount":0,"matinee":true},{
26 | #' "film":"Dopo la guerra (Apres la Guerre)","pmt_amount":0,"matinee":true},{
27 | #' "film":"Avengers: Infinity War","pmt_amount":12.75}]}}}]'
28 | #'
29 | #' # Chomp into a data.table
30 | #' sampleChompedDT <- chomp_hits(hits_json = result, keep_nested_data_cols = TRUE)
31 | #' print(sampleChompedDT)
32 | #'
33 | #' # (Note: use es_search() to get here in one step)
34 | #'
35 | #' # Unpack by details.pastPurchases
36 | #' unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
37 | #' , col_to_unpack = "details.pastPurchases")
38 | #' print(unpackedDT)
39 | chomp_hits <- function(hits_json = NULL, keep_nested_data_cols = TRUE) {
40 |
41 | # If nothing was passed to hits_json, return NULL and warn
42 | if (is.null(hits_json)) {
43 | msg <- "You did not pass any input data to chomp_hits. Returning NULL."
44 | .log_warn(msg)
45 | return(invisible(NULL))
46 | }
47 |
48 | if (!is.character(hits_json)) {
49 | msg <- paste0("The first argument of chomp_hits must be a character vector."
50 | , "You may have passed an R list. In that case, if you already "
51 | , "used jsonlite::fromJSON(), you can just call "
52 | , "data.table::as.data.table().")
53 | .log_fatal(msg)
54 | }
55 |
56 | # Parse the input JSON to a list object
57 | jsonList <- jsonlite::fromJSON(hits_json, flatten = TRUE)
58 |
59 | # If this came from a raw query result, we need to grab the hits.hits element.
60 | # Otherwise, just assume we have a list of hits
61 | if (all(c("took", "timed_out", "_shards", "hits") %in% names(jsonList))) {
62 | batchDT <- data.table::as.data.table(jsonList[["hits"]][["hits"]])
63 | } else {
64 | batchDT <- data.table::as.data.table(jsonList)
65 | }
66 |
67 | # Strip "_source" from all the column names because blegh
68 | data.table::setnames(batchDT, gsub("_source.", "", names(batchDT), fixed = TRUE))
69 |
70 | # Warn the user if there's nested data
71 | colTypes <- sapply(batchDT, mode)
72 | if (any(colTypes == "list")) {
73 | if (keep_nested_data_cols) {
74 | msg <- paste(
75 | "Keeping the following nested data columns."
76 | , "Consider using unpack_nested_data for one:\n"
77 | , toString(names(colTypes)[colTypes == "list"])
78 | )
79 | .log_info(msg)
80 | } else {
81 |
82 | msg <- paste(
83 | "Deleting the following nested data columns:\n"
84 | , toString(names(colTypes)[colTypes == "list"])
85 | )
86 | .log_warn(msg)
87 | batchDT <- batchDT[, !names(colTypes[colTypes == "list"]), with = FALSE]
88 | }
89 | }
90 |
91 | return(batchDT)
92 | }
93 |
--------------------------------------------------------------------------------
/r-pkg/R/helperfuns.R:
--------------------------------------------------------------------------------
1 | # [title] Extract the content of an HTTP response into a different format
2 | # [name] .content
3 | # [description] Mainly here to making mocking easier in testing.
4 | # [references] https://testthat.r-lib.org/reference/local_mocked_bindings.html#namespaced-calls
5 | #' @importFrom jsonlite fromJSON
6 | .content <- function(response, as) {
7 | text_content <- rawToChar(response$content)
8 | if (as == "text") {
9 | return(text_content)
10 | }
11 |
12 | # if not plain text, assume we want to parse JSON into an R list
13 | return(jsonlite::fromJSON(
14 | txt = text_content
15 | , simplifyVector = FALSE
16 | , simplifyDataFrame = FALSE
17 | , simplifyMatrix = FALSE
18 | ))
19 | }
20 |
21 | # [title] Get a random length-n string
22 | # [name] .random_string
23 | # [description] Get a random length-n string of lowercase letters.
24 | # Note that this uses sample() and so might produce deterministic
25 | # results in programs where set.seed() is used to control randomness.
26 | .random_string <- function(num_characters) {
27 | return(
28 | paste(
29 | sample(letters, replace = TRUE, size = num_characters)
30 | , collapse = ""
31 | )
32 | )
33 | }
34 |
35 | # [title] List out HTTP codes that should be treated as retryable
36 | # [name] .should_retry
37 | # [description] Here because {curl} doesn't ship a retry mechanism, so this library
38 | # implements its own.
39 | .should_retry <- function(response) {
40 | retryable_error_codes <- c(
41 | # 408 - timeout
42 | 408L
43 | # 422 - unprocessable entity
44 | , 422L
45 | # 425 - too early
46 | , 425L
47 | # 429 - too many requests
48 | , 429L
49 | # 500 - internal server error
50 | , 500L
51 | # 502 - bad gateway
52 | , 502L
53 | # 503 - service unavailable
54 | , 503L
55 | # 504 - gateway timeout
56 | , 504L
57 | )
58 | return(response$status_code %in% retryable_error_codes)
59 | }
60 |
61 | # [title] Retry an HTTP requests a couple times (if necessary)
62 | # [name] .retry
63 | # [description] Implements exponential backoff with jitter, around failed requests.
64 | # See .should_retry() for details on which status codes are considered retryable.
65 | # This is here because {curl} does not have a built-in retry API.
66 | #' @importFrom curl curl_fetch_memory
67 | #' @importFrom stats runif
68 | .retry <- function(handle, url) {
69 |
70 | max_retries <- 3L
71 | attempt_count <- 1L
72 | while (attempt_count <= max_retries) {
73 |
74 | # if this isn't the 1st attempt, apply backoff
75 | if (attempt_count > 1L) {
76 | # exponential backoff with jitter
77 | #
78 | # 1.45s + {jitter}
79 | # 2.10s + {jitter}
80 | # 3.05s + {jitter}
81 | # etc., etc.
82 | #
83 | # ref: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
84 | sleep_seconds <- 1.45 ** (attempt_count - 1L) + stats::runif(n = 1L, min = 0.1, max = 0.5)
85 | .log_debug(sprintf("Sleeping for %.2f seconds before retrying.", sleep_seconds))
86 | Sys.sleep(sleep_seconds)
87 | }
88 |
89 | # execute request
90 | response <- curl::curl_fetch_memory(
91 | url = url
92 | , handle = handle
93 | )
94 |
95 | # check if the response should be retried
96 | if (.should_retry(response)) {
97 | .log_debug(sprintf(
98 | "Request failed (status code %i): '%s %s'"
99 | , response$status_code
100 | , response$method
101 | , response$url
102 | ))
103 | attempt_count <- attempt_count + 1L
104 | } else {
105 | break
106 | }
107 | }
108 | return(response)
109 | }
110 |
111 | # [title] Execute an HTTP request and return the result
112 | # [name] .request
113 | # [description] Mainly here to making mocking easier in testing, but this
114 | # also centralizes the mechanism for HTTP request execution in one place.
115 | # [references] https://testthat.r-lib.org/reference/local_mocked_bindings.html#namespaced-calls
116 | #' @importFrom curl handle_setheaders handle_setopt new_handle
117 | .request <- function(verb, url, body) {
118 | handle <- curl::new_handle()
119 |
120 | # set headers
121 | #
122 | # This can safely be hard-coded here because every payload this library
123 | # posts and every response body it receives is JSON data.
124 | curl::handle_setheaders(
125 | handle = handle
126 | , "Accept" = "application/json" # nolint[non_portable_path]
127 | , "Content-Type" = "application/json" # nolint[non_portable_path]
128 | )
129 |
130 | # set HTTP method
131 | curl::handle_setopt(handle = handle, customrequest = verb)
132 |
133 | # add body
134 | if (!is.null(body)) {
135 | curl::handle_setopt(
136 | handle = handle
137 | , copypostfields = body
138 | )
139 | }
140 |
141 | # actually execute request
142 | response <- .retry(
143 | handle = handle
144 | , url = url
145 | )
146 |
147 | return(invisible(response))
148 | }
149 |
150 | # [title] Raise an exception if an HTTP response indicates an error
151 | # [name] .stop_for_status
152 | # [description] 3xx, 4xx, and 5xx responses are treated as errors.
153 | # curl should automatically follow redirects (which is what most
154 | # 3xx responses are), so if that's working well then this code should
155 | # never actually see a 3xx response.
156 | .stop_for_status <- function(response) {
157 | if (response$status_code <= 300L) {
158 | return(invisible(NULL))
159 | }
160 | .log_fatal(sprintf(
161 | "Request failed (status code %i): '%s %s'"
162 | , response$status_code
163 | , response$method
164 | , response$url
165 | ))
166 | }
167 |
--------------------------------------------------------------------------------
/r-pkg/R/logging.R:
--------------------------------------------------------------------------------
1 | #' @importFrom futile.logger flog.debug
2 | .log_debug <- function(...) {
3 | futile.logger::flog.debug(...)
4 | }
5 |
6 | #' @importFrom futile.logger flog.info
7 | .log_info <- function(...) {
8 | futile.logger::flog.info(...)
9 | }
10 |
11 | #' @importFrom futile.logger flog.warn
12 | .log_warn <- function(...) {
13 | futile.logger::flog.warn(...)
14 | warning(...)
15 | }
16 |
17 | #' @importFrom futile.logger flog.fatal
18 | .log_fatal <- function(...) {
19 | futile.logger::flog.fatal(...)
20 | stop(...)
21 | }
22 |
--------------------------------------------------------------------------------
/r-pkg/R/parse_date_time.R:
--------------------------------------------------------------------------------
1 | #' @title Parse date-times from Elasticsearch records
2 | #' @name parse_date_time
3 | #' @description Given a data.table with date-time strings,
4 | #' this function converts those dates-times to type POSIXct with the appropriate
5 | #' time zone. Assumption is that dates are of the form "2016-07-25T22:15:19Z"
6 | #' where T is just a separator and the last letter is a military timezone.
7 | #'
8 | #' This is a side-effect-free function: it returns a new data.table and the
9 | #' input data.table is unmodified.
10 | #' @importFrom data.table copy is.data.table
11 | #' @importFrom purrr map2 simplify
12 | #' @importFrom stringr str_extract
13 | #' @export
14 | #' @param input_df a data.table with one or more date-time columns you want to convert
15 | #' @param date_cols Character vector of column names to convert. Columns should have
16 | #' string dates of the form "2016-07-25T22:15:19Z".
17 | #' @param assume_tz Timezone to convert to if parsing fails. Default is UTC
18 | #' @references \url{https://www.timeanddate.com/time/zones/military}
19 | #' @references \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones}
20 | #' @examples
21 | #' # Sample es_search(), chomp_hits(), or chomp_aggs() output:
22 | #' someDT <- data.table::data.table(id = 1:5
23 | #' , company = c("Apple", "Apple", "Banana", "Banana", "Cucumber")
24 | #' , timestamp = c("2015-03-14T09:26:53B", "2015-03-14T09:26:54B"
25 | #' , "2031-06-28T08:53:07Z", "2031-06-28T08:53:08Z"
26 | #' , "2000-01-01"))
27 | #'
28 | #' # Note that the date field is character right now
29 | #' str(someDT)
30 | #'
31 | #' # Let's fix that!
32 | #' someDT <- parse_date_time(input_df = someDT
33 | #' , date_cols = "timestamp"
34 | #' , assume_tz = "UTC")
35 | #' str(someDT)
36 | parse_date_time <- function(input_df
37 | , date_cols
38 | , assume_tz = "UTC"
39 | ) {
40 |
41 | # Break if input_df isn't actually a data.table
42 | if (!data.table::is.data.table(input_df)) {
43 | msg <- paste("parse_date_time expects to receive a data.table object."
44 | , "You provided an object of class"
45 | , toString(class(input_df))
46 | , "to input_df.")
47 | .log_fatal(msg)
48 | }
49 |
50 | # Break if date_cols is not a character vector
51 | if (!identical(class(date_cols), "character")) {
52 | msg <- paste("The date_cols argument in parse_date_time expects",
53 | "a character vector of column names. You gave an object",
54 | "of class", toString(class(date_cols)))
55 | .log_fatal(msg)
56 | }
57 |
58 | # Break if any of the date_cols are not actually in this DT
59 | if (!all(date_cols %in% names(input_df))) {
60 | not_there <- date_cols[!(date_cols %in% names(input_df))]
61 | msg <- paste("The following columns, which you passed to date_cols,",
62 | "do not actually exist in input_df:",
63 | toString(not_there))
64 | .log_fatal(msg)
65 | }
66 |
67 | # Other input checks we don't have explicit error messages for
68 | .assert(.is_string(assume_tz), "Argument 'assume_tz' must be a non-empty string")
69 |
70 | # Work on a copy of the DT to avoid side effects
71 | outDT <- data.table::copy(input_df)
72 |
73 | # Map one-letter TZs to valid timezones to be passed to lubridate functions
74 | # Military (one-letter) times:
75 | # Mapping UTC to etc --> https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
76 | tzHash <- vector("character")
77 | # nolint start
78 | tzHash["A"] <- "Etc/GMT-1" # UTC +1
79 | tzHash["B"] <- "Etc/GMT-2" # UTC +2
80 | tzHash["C"] <- "Etc/GMT-3" # UTC +3
81 | tzHash["D"] <- "Etc/GMT-4" # UTC +4
82 | tzHash["E"] <- "Etc/GMT-5" # UTC +5
83 | tzHash["F"] <- "Etc/GMT-6" # UTC +6
84 | tzHash["G"] <- "Etc/GMT-7" # UTC +7
85 | tzHash["H"] <- "Etc/GMT-8" # UTC +8
86 | tzHash["I"] <- "Etc/GMT-9" # UTC +9
87 | tzHash["K"] <- "Etc/GMT-10" # UTC +10
88 | tzHash["L"] <- "Etc/GMT-11" # UTC +11
89 | tzHash["M"] <- "Etc/GMT-12" # UTC +12
90 | tzHash["N"] <- "Etc/GMT+1" # UTC -1
91 | tzHash["O"] <- "Etc/GMT+2" # UTC -2
92 | tzHash["P"] <- "Etc/GMT+3" # UTC -3
93 | tzHash["Q"] <- "Etc/GMT+4" # UTC -4
94 | tzHash["R"] <- "Etc/GMT+5" # UTC -5
95 | tzHash["S"] <- "Etc/GMT+6" # UTC -6
96 | tzHash["T"] <- "Etc/GMT+7" # UTC -7
97 | tzHash["U"] <- "Etc/GMT+8" # UTC -8
98 | tzHash["V"] <- "Etc/GMT+9" # UTC -9
99 | tzHash["W"] <- "Etc/GMT+10" # UTC -10
100 | tzHash["X"] <- "Etc/GMT+11" # UTC -11
101 | tzHash["Y"] <- "Etc/GMT+12" # UTC -12
102 | tzHash["Z"] <- "UTC" # UTC
103 | # nolint end
104 |
105 | # Parse dates, return POSIXct UTC dates
106 | for (dateCol in date_cols) {
107 |
108 | # Grab this vector to work on
109 | dateVec <- outDT[[dateCol]]
110 |
111 | # Parse out timestamps and military timezone strings
112 | dateTimes <- paste0(
113 | stringr::str_extract(dateVec, "^\\d{4}-\\d{2}-\\d{2}") # nolint[non_portable_path]
114 | , " "
115 | , stringr::str_extract(dateVec, "\\d{2}:\\d{2}:\\d{2}")
116 | )
117 | tzKeys <- stringr::str_extract(dateVec, "[A-Za-z]{1}$")
118 |
119 | # Grab a vector of timezones
120 | timeZones <- tzHash[tzKeys]
121 | timeZones[is.na(timeZones)] <- assume_tz
122 |
123 | # Combine the timestamp and timezone vector to convert to POSIXct
124 | dateTimes <- purrr::map2(
125 | dateTimes
126 | , timeZones
127 | , function(dateTime, timeZone) {
128 | return(as.POSIXct(dateTime, tz = timeZone))
129 | }
130 | )
131 |
132 | utcDates <- as.POSIXct.numeric(
133 | purrr::simplify(dateTimes)
134 | , origin = "1970-01-01"
135 | , tz = "UTC"
136 | )
137 |
138 | # Put back in the data.table
139 | outDT[, (dateCol) := utcDates]
140 | }
141 |
142 | return(outDT)
143 | }
144 |
--------------------------------------------------------------------------------
/r-pkg/R/unpack_nested_data.R:
--------------------------------------------------------------------------------
1 | #' @title Unpack a nested data.table
2 | #' @name unpack_nested_data
3 | #' @description After calling a \code{chomp_*} function or \code{es_search}, if
4 | #' you had a nested array in the JSON, its corresponding column in the
5 | #' resulting data.table is a data.frame itself (or a list of vectors). This
6 | #' function expands that nested column out, adding its data to the original
7 | #' data.table, and duplicating metadata down the rows as necessary.
8 | #'
9 | #' This is a side-effect-free function: it returns a new data.table and the
10 | #' input data.table is unmodified.
11 | #' @importFrom data.table as.data.table copy is.data.table rbindlist setnames
12 | #' @importFrom purrr map_if map_lgl map_int
13 | #' @export
14 | #' @param chomped_df a data.table
15 | #' @param col_to_unpack a character vector of length one: the column name to unpack
16 | #' @examples
17 | #' # A sample raw result from a hits query:
18 | #' result <- '[{"_source":{"timestamp":"2017-01-01","cust_name":"Austin","details":{
19 | #' "cust_class":"big_spender","location":"chicago","pastPurchases":[{"film":"The Notebook",
20 | #' "pmt_amount":6.25},{"film":"The Town","pmt_amount":8.00},{"film":"Zootopia","pmt_amount":7.50,
21 | #' "matinee":true}]}}},{"_source":{"timestamp":"2017-02-02","cust_name":"James","details":{
22 | #' "cust_class":"peasant","location":"chicago","pastPurchases":[{"film":"Minions",
23 | #' "pmt_amount":6.25,"matinee":true},{"film":"Rogue One","pmt_amount":10.25},{"film":"Bridesmaids",
24 | #' "pmt_amount":8.75},{"film":"Bridesmaids","pmt_amount":6.25,"matinee":true}]}}},{"_source":{
25 | #' "timestamp":"2017-03-03","cust_name":"Nick","details":{"cust_class":"critic","location":"cannes",
26 | #' "pastPurchases":[{"film":"Aala Kaf Ifrit","pmt_amount":0,"matinee":true},{
27 | #' "film":"Dopo la guerra (Apres la Guerre)","pmt_amount":0,"matinee":true},{
28 | #' "film":"Avengers: Infinity War","pmt_amount":12.75}]}}}]'
29 | #'
30 | #' # Chomp into a data.table
31 | #' sampleChompedDT <- chomp_hits(hits_json = result, keep_nested_data_cols = TRUE)
32 | #' print(sampleChompedDT)
33 | #'
34 | #' # (Note: use es_search() to get here in one step)
35 | #'
36 | #' # Unpack by details.pastPurchases
37 | #' unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
38 | #' , col_to_unpack = "details.pastPurchases")
39 | #' print(unpackedDT)
40 | unpack_nested_data <- function(chomped_df, col_to_unpack) {
41 |
42 | # Input checks
43 | if (!data.table::is.data.table(chomped_df)) {
44 | msg <- "For unpack_nested_data, chomped_df must be a data.table"
45 | .log_fatal(msg)
46 | }
47 | if (!.is_string(col_to_unpack)) {
48 | msg <- "For unpack_nested_data, col_to_unpack must be a character of length 1"
49 | .log_fatal(msg)
50 | }
51 | if (!(col_to_unpack %in% names(chomped_df))) {
52 | msg <- "For unpack_nested_data, col_to_unpack must be one of the column names"
53 | .log_fatal(msg)
54 | }
55 |
56 | inDT <- data.table::copy(chomped_df)
57 |
58 | # Define a column name to store original row ID
59 | repeat {
60 | joinCol <- .random_string(36L)
61 | if (!(joinCol %in% names(inDT))) {
62 | break
63 | }
64 | }
65 | inDT[, (joinCol) := .I]
66 |
67 | # Take out the packed column
68 | listDT <- inDT[[col_to_unpack]]
69 | inDT[, (col_to_unpack) := NULL]
70 |
71 | # Check for empty column
72 | if (all(purrr::map_int(listDT, NROW) == 0)) {
73 | msg <- "The column given to unpack_nested_data had no data in it."
74 | .log_fatal(msg)
75 | }
76 |
77 | listDT[lengths(listDT) == 0] <- NA
78 |
79 | is_df <- purrr::map_lgl(listDT, is.data.frame)
80 | is_list <- purrr::map_lgl(listDT, is.list)
81 | is_atomic <- purrr::map_lgl(listDT, is.atomic)
82 | is_na <- is.na(listDT)
83 |
84 | # Bind packed column into one data.table
85 | if (all(is_atomic)) {
86 | newDT <- data.table::as.data.table(unlist(listDT))
87 | newDT[, (joinCol) := rep(seq_along(listDT), lengths(listDT))]
88 | } else if (all(is_df | is_list | is_na)) {
89 | # Find name to use for NA columns
90 | first_df <- min(which(is_df))
91 | col_name <- names(listDT[[first_df]])[1]
92 |
93 | .prep_na_row <- function(x, col_name) {
94 | x <- data.table::as.data.table(x)
95 | names(x) <- col_name
96 | return(x)
97 | }
98 |
99 | # If the packed column contains data.tables, we use rbindlist
100 | newDT <- purrr::map_if(listDT, is_na, .prep_na_row, col_name = col_name)
101 | newDT <- data.table::rbindlist(newDT, fill = TRUE, idcol = joinCol)
102 | } else {
103 | msg <- paste0("Each row in column ", col_to_unpack, " must be a data frame or a vector.")
104 | .log_fatal(msg)
105 | }
106 |
107 | # Join it back in
108 | outDT <- inDT[newDT, on = joinCol]
109 | outDT[, (joinCol) := NULL]
110 |
111 | # In the case of all atomic...
112 | if ("V1" %in% names(outDT)) {
113 | data.table::setnames(outDT, "V1", col_to_unpack)
114 | }
115 |
116 | return(outDT)
117 | }
118 |
--------------------------------------------------------------------------------
/r-pkg/R/uptasticsearch.R:
--------------------------------------------------------------------------------
1 | # Globals to make R CMD check not spit out "no visible binding for global
2 | # variable" notes.
3 | # Basically, R CMD check doesn't like it when you don't quote the "V1" in
4 | # a call like DT[, V1].
5 | # See: http://stackoverflow.com/a/12429344
6 | # Also: see hadley's comments on his own post there. They're great.
7 |
8 | utils::globalVariables(c(
9 | "."
10 | , ".I"
11 | , ".id"
12 | , "alias"
13 | , "field"
14 | , "index"
15 | , "V1"
16 | , "V2"
17 | ))
18 |
19 |
20 | # NULL object for common parameter documentation
21 | #' @param es_host A string identifying an Elasticsearch host. This should be of the form
22 | #' \code{[transfer_protocol][hostname]:[port]}. For example, \code{'http://myindex.thing.com:9200'}.
23 | #' @param es_index The name of an Elasticsearch index to be queried. Note that passing
24 | #' \code{NULL} is not supported. Technically, not passing an index
25 | #' to Elasticsearch is legal and results in searching over all indexes.
26 | #' To be sure that this very expensive query is not executed by accident,
27 | #' uptasticsearch forbids this. If you want to execute a query over
28 | #' all indexes in the cluster, set this argument to \code{"_all"}.
29 | #' @name doc_shared
30 | #' @title NULL Object For Common Documentation
31 | #' @description This is a NULL object with documentation so that later functions can call
32 | #' inheritParams
33 | #' @keywords internal
34 | NULL
35 |
--------------------------------------------------------------------------------
/r-pkg/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | template:
2 | bootstrap: 5
3 | params:
4 | bootswatch: flatly
5 |
6 | # this needs to be specified and to match 'URL:' in DESCRIPTION,
7 | # or pkgdown raises this error:
8 | #
9 | # URLs not ok.
10 | # In _pkgdown.yml, url is missing.
11 | #
12 | url: https://github.com/uptake/uptasticsearch
13 |
14 | repo:
15 | url:
16 | home: https://github.com/uptake/uptasticsearch/
17 | source: https://github.com/uptake/uptasticsearch/tree/main/r-pkg/
18 | issue: https://github.com/uptake/uptasticsearch/issues
19 | user: https://github.com/
20 |
21 | reference:
22 | - title: Main function
23 | contents:
24 | - es_search
25 | - title: Parse raw JSON into data.table
26 | contents:
27 | - starts_with("chomp_")
28 | - title: Utilities
29 | contents:
30 | - unpack_nested_data
31 | - parse_date_time
32 | - title: Exploratory functions
33 | contents:
34 | - get_fields
35 |
--------------------------------------------------------------------------------
/r-pkg/inst/testdata/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uptake/uptasticsearch/62d4739912db1e56cba7771f9903d6e551e557dc/r-pkg/inst/testdata/.gitkeep
--------------------------------------------------------------------------------
/r-pkg/man/chomp_aggs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/chomp_aggs.R
3 | \name{chomp_aggs}
4 | \alias{chomp_aggs}
5 | \title{Aggs query to data.table}
6 | \usage{
7 | chomp_aggs(aggs_json = NULL)
8 | }
9 | \arguments{
10 | \item{aggs_json}{A character vector. If its length is greater than 1, its elements will be pasted
11 | together. This can contain a JSON returned from an \code{aggs} query in
12 | Elasticsearch, or a filepath or URL pointing at one.}
13 | }
14 | \value{
15 | A data.table representation of the result or NULL if the aggregation result is empty.
16 | }
17 | \description{
18 | Given some raw JSON from an aggs query in Elasticsearch, parse the
19 | aggregations into a data.table.
20 | }
21 | \examples{
22 | # A sample raw result from an aggs query combining date_histogram and extended_stats:
23 | result <- '{"aggregations":{"dateTime":{"buckets":[{"key_as_string":"2016-12-01T00:00:00.000Z",
24 | "key":1480550400000,"doc_count":123,"num_potatoes":{"count":120,"min":0,"max":40,"avg":15,
25 | "sum":1800,"sum_of_squares":28000,"variance":225,"std_deviation":15,"std_deviation_bounds":{
26 | "upper":26,"lower":13}}},{"key_as_string":"2017-01-01T00:00:00.000Z","key":1483228800000,
27 | "doc_count":134,"num_potatoes":{"count":131,"min":0,"max":39,"avg":16,"sum":2096,
28 | "sum_of_squares":34000,"variance":225,"std_deviation":15,"std_deviation_bounds":{"upper":26,
29 | "lower":13}}}]}}}'
30 |
31 | # Parse into a data.table
32 | aggDT <- chomp_aggs(aggs_json = result)
33 | print(aggDT)
34 | }
35 |
--------------------------------------------------------------------------------
/r-pkg/man/chomp_hits.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/chomp_hits.R
3 | \name{chomp_hits}
4 | \alias{chomp_hits}
5 | \title{Hits to data.tables}
6 | \usage{
7 | chomp_hits(hits_json = NULL, keep_nested_data_cols = TRUE)
8 | }
9 | \arguments{
10 | \item{hits_json}{A character vector. If its length is greater than 1, its elements will be pasted
11 | together. This can contain a JSON returned from a \code{search} query in
12 | Elasticsearch, or a filepath or URL pointing at one.}
13 |
14 | \item{keep_nested_data_cols}{a boolean (default TRUE); whether to keep columns that are nested
15 | arrays in the original JSON. A warning will be given if these
16 | columns are deleted.}
17 | }
18 | \description{
19 | A function for converting Elasticsearch docs into R data.tables. It
20 | uses \code{\link[jsonlite]{fromJSON}} with \code{flatten = TRUE} to convert a
21 | JSON into an R data.frame, and formats it into a data.table.
22 | }
23 | \examples{
24 | # A sample raw result from a hits query:
25 | result <- '[{"_source":{"timestamp":"2017-01-01","cust_name":"Austin","details":{
26 | "cust_class":"big_spender","location":"chicago","pastPurchases":[{"film":"The Notebook",
27 | "pmt_amount":6.25},{"film":"The Town","pmt_amount":8.00},{"film":"Zootopia","pmt_amount":7.50,
28 | "matinee":true}]}}},{"_source":{"timestamp":"2017-02-02","cust_name":"James","details":{
29 | "cust_class":"peasant","location":"chicago","pastPurchases":[{"film":"Minions",
30 | "pmt_amount":6.25,"matinee":true},{"film":"Rogue One","pmt_amount":10.25},{"film":"Bridesmaids",
31 | "pmt_amount":8.75},{"film":"Bridesmaids","pmt_amount":6.25,"matinee":true}]}}},{"_source":{
32 | "timestamp":"2017-03-03","cust_name":"Nick","details":{"cust_class":"critic","location":"cannes",
33 | "pastPurchases":[{"film":"Aala Kaf Ifrit","pmt_amount":0,"matinee":true},{
34 | "film":"Dopo la guerra (Apres la Guerre)","pmt_amount":0,"matinee":true},{
35 | "film":"Avengers: Infinity War","pmt_amount":12.75}]}}}]'
36 |
37 | # Chomp into a data.table
38 | sampleChompedDT <- chomp_hits(hits_json = result, keep_nested_data_cols = TRUE)
39 | print(sampleChompedDT)
40 |
41 | # (Note: use es_search() to get here in one step)
42 |
43 | # Unpack by details.pastPurchases
44 | unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
45 | , col_to_unpack = "details.pastPurchases")
46 | print(unpackedDT)
47 | }
48 |
--------------------------------------------------------------------------------
/r-pkg/man/doc_shared.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/uptasticsearch.R
3 | \name{doc_shared}
4 | \alias{doc_shared}
5 | \title{NULL Object For Common Documentation}
6 | \arguments{
7 | \item{es_host}{A string identifying an Elasticsearch host. This should be of the form
8 | \code{[transfer_protocol][hostname]:[port]}. For example, \code{'http://myindex.thing.com:9200'}.}
9 |
10 | \item{es_index}{The name of an Elasticsearch index to be queried. Note that passing
11 | \code{NULL} is not supported. Technically, not passing an index
12 | to Elasticsearch is legal and results in searching over all indexes.
13 | To be sure that this very expensive query is not executed by accident,
14 | uptasticsearch forbids this. If you want to execute a query over
15 | all indexes in the cluster, set this argument to \code{"_all"}.}
16 | }
17 | \description{
18 | This is a NULL object with documentation so that later functions can call
19 | inheritParams
20 | }
21 | \keyword{internal}
22 |
--------------------------------------------------------------------------------
/r-pkg/man/es_search.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/es_search.R
3 | \name{es_search}
4 | \alias{es_search}
5 | \title{Execute an Elasticsearch query and get a data.table}
6 | \usage{
7 | es_search(
8 | es_host,
9 | es_index,
10 | size = 10000,
11 | query_body = "{}",
12 | scroll = "5m",
13 | max_hits = Inf,
14 | n_cores = ceiling(parallel::detectCores()/2),
15 | break_on_duplicates = TRUE,
16 | ignore_scroll_restriction = FALSE,
17 | intermediates_dir = getwd()
18 | )
19 | }
20 | \arguments{
21 | \item{es_host}{A string identifying an Elasticsearch host. This should be of the form
22 | \code{[transfer_protocol][hostname]:[port]}. For example, \code{'http://myindex.thing.com:9200'}.}
23 |
24 | \item{es_index}{The name of an Elasticsearch index to be queried. Note that passing
25 | \code{NULL} is not supported. Technically, not passing an index
26 | to Elasticsearch is legal and results in searching over all indexes.
27 | To be sure that this very expensive query is not executed by accident,
28 | uptasticsearch forbids this. If you want to execute a query over
29 | all indexes in the cluster, set this argument to \code{"_all"}.}
30 |
31 | \item{size}{Number of records per page of results.
32 | See \href{https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size}{Elasticsearch docs} for more.
33 | Note that this will be reset to 0 if you submit a \code{query_body} with
34 | an "aggs" request in it. Also see \code{max_hits}.}
35 |
36 | \item{query_body}{String with a valid Elasticsearch query. Default is an empty query.}
37 |
38 | \item{scroll}{How long should the scroll context be held open? This should be a
39 | duration string like "1m" (for one minute) or "15s" (for 15 seconds).
40 | The scroll context will be refreshed every time you ask Elasticsearch
41 | for another record, so this parameter should just be the amount of
42 | time you expect to pass between requests. See the
43 | \href{https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-scroll}{Elasticsearch scroll/pagination docs}
44 | for more information.}
45 |
46 | \item{max_hits}{Integer. If specified, \code{es_search} will stop pulling data as soon
47 | as it has pulled this many hits. Default is \code{Inf}, meaning that
48 | all possible hits will be pulled.}
49 |
50 | \item{n_cores}{Number of cores to distribute fetching and processing over.}
51 |
52 | \item{break_on_duplicates}{Boolean, defaults to TRUE. \code{es_search} uses the size of the
53 | final object it returns to check whether or not some data were lost
54 | during the processing. If you have duplicates in the source data, you
55 | will have to set this flag to FALSE and just trust that no data have
56 | been lost. Sorry :( .}
57 |
58 | \item{ignore_scroll_restriction}{There is a cost associated with keeping an
59 | Elasticsearch scroll context open. By default,
60 | this function does not allow arguments to \code{scroll}
61 | which exceed one hour. This is done to prevent
62 | costly mistakes made by novice Elasticsearch users.
63 | If you understand the cost of keeping the context
64 | open for a long time and would like to pass a \code{scroll}
65 | value longer than an hour, set \code{ignore_scroll_restriction}
66 | to \code{TRUE}.}
67 |
68 | \item{intermediates_dir}{When scrolling over search results, this function writes
69 | intermediate results to disk. By default, `es_search` will create a temporary
70 | directory in whatever working directory the function is called from. If you
71 | want to change this behavior, provide a path here. `es_search` will create
72 | and write to a temporary directory under whatever path you provide.}
73 | }
74 | \description{
75 | Given a query and some optional parameters, \code{es_search} gets results
76 | from HTTP requests to Elasticsearch and returns a data.table
77 | representation of those results.
78 | }
79 | \examples{
80 | \dontrun{
81 |
82 | ###=== Example 1: Get low-scoring food survey results ===###
83 |
84 | query_body <- '{"query":{"filtered":{"filter":{"bool":{"must":[
85 | {"exists":{"field":"customer_comments"}},
86 | {"terms":{"overall_satisfaction":["very low","low"]}}]}}},
87 | "query":{"match_phrase":{"customer_comments":"food"}}}}'
88 |
89 | # Execute the query, parse into a data.table
90 | commentDT <- es_search(es_host = 'http://mydb.mycompany.com:9200'
91 | , es_index = "survey_results"
92 | , query_body = query_body
93 | , scroll = "1m"
94 | , n_cores = 4)
95 |
96 | ###=== Example 2: Time series agg features ===###
97 |
98 | # Create query that will give you daily summary stats for revenue
99 | query_body <- '{"query":{"filtered":{"filter":{"bool":{"must":[
100 | {"exists":{"field":"pmt_amount"}}]}}}},
101 | "aggs":{"timestamp":{"date_histogram":{"field":"timestamp","interval":"day"},
102 | "aggs":{"revenue":{"extended_stats":{"field":"pmt_amount"}}}}},"size":0}'
103 |
104 | # Execute the query and get the result
105 | resultDT <- es_search(es_host = "http://es.custdb.mycompany.com:9200"
106 | , es_index = 'ticket_sales'
107 | , query_body = query_body)
108 | }
109 | }
110 | \references{
111 | \href{https://www.elastic.co/guide/en/elasticsearch/reference/6.7/search-request-scroll.html}{Elasticsearch 6 scrolling strategy}
112 | }
113 |
--------------------------------------------------------------------------------
/r-pkg/man/get_fields.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_fields.R
3 | \name{get_fields}
4 | \alias{get_fields}
5 | \title{Get the names and data types of the indexed fields in an index}
6 | \usage{
7 | get_fields(es_host, es_indices = "_all")
8 | }
9 | \arguments{
10 | \item{es_host}{A string identifying an Elasticsearch host. This should be of the form
11 | \code{[transfer_protocol][hostname]:[port]}. For example, \code{'http://myindex.thing.com:9200'}.}
12 |
13 | \item{es_indices}{A character vector that contains the names of indices for
14 | which to get mappings. Default is \code{'_all'}, which means
15 | get the mapping for all indices. Names of indices can be
16 | treated as regular expressions.}
17 | }
18 | \value{
19 | A data.table containing four columns: index, type, field, and data_type
20 | }
21 | \description{
22 | For a given Elasticsearch index, return the mapping from field name
23 | to data type for all indexed fields.
24 | }
25 | \examples{
26 | \dontrun{
27 | # get the mapping for all indexed fields in the ticket_sales and customers indices
28 | mappingDT <- get_fields(es_host = "http://es.custdb.mycompany.com:9200"
29 | , es_indices = c("ticket_sales", "customers"))
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/r-pkg/man/parse_date_time.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/parse_date_time.R
3 | \name{parse_date_time}
4 | \alias{parse_date_time}
5 | \title{Parse date-times from Elasticsearch records}
6 | \usage{
7 | parse_date_time(input_df, date_cols, assume_tz = "UTC")
8 | }
9 | \arguments{
10 | \item{input_df}{a data.table with one or more date-time columns you want to convert}
11 |
12 | \item{date_cols}{Character vector of column names to convert. Columns should have
13 | string dates of the form "2016-07-25T22:15:19Z".}
14 |
15 | \item{assume_tz}{Timezone to convert to if parsing fails. Default is UTC}
16 | }
17 | \description{
18 | Given a data.table with date-time strings,
19 | this function converts those dates-times to type POSIXct with the appropriate
20 | time zone. Assumption is that dates are of the form "2016-07-25T22:15:19Z"
21 | where T is just a separator and the last letter is a military timezone.
22 |
23 | This is a side-effect-free function: it returns a new data.table and the
24 | input data.table is unmodified.
25 | }
26 | \examples{
27 | # Sample es_search(), chomp_hits(), or chomp_aggs() output:
28 | someDT <- data.table::data.table(id = 1:5
29 | , company = c("Apple", "Apple", "Banana", "Banana", "Cucumber")
30 | , timestamp = c("2015-03-14T09:26:53B", "2015-03-14T09:26:54B"
31 | , "2031-06-28T08:53:07Z", "2031-06-28T08:53:08Z"
32 | , "2000-01-01"))
33 |
34 | # Note that the date field is character right now
35 | str(someDT)
36 |
37 | # Let's fix that!
38 | someDT <- parse_date_time(input_df = someDT
39 | , date_cols = "timestamp"
40 | , assume_tz = "UTC")
41 | str(someDT)
42 | }
43 | \references{
44 | \url{https://www.timeanddate.com/time/zones/military}
45 |
46 | \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones}
47 | }
48 |
--------------------------------------------------------------------------------
/r-pkg/man/unpack_nested_data.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/unpack_nested_data.R
3 | \name{unpack_nested_data}
4 | \alias{unpack_nested_data}
5 | \title{Unpack a nested data.table}
6 | \usage{
7 | unpack_nested_data(chomped_df, col_to_unpack)
8 | }
9 | \arguments{
10 | \item{chomped_df}{a data.table}
11 |
12 | \item{col_to_unpack}{a character vector of length one: the column name to unpack}
13 | }
14 | \description{
15 | After calling a \code{chomp_*} function or \code{es_search}, if
16 | you had a nested array in the JSON, its corresponding column in the
17 | resulting data.table is a data.frame itself (or a list of vectors). This
18 | function expands that nested column out, adding its data to the original
19 | data.table, and duplicating metadata down the rows as necessary.
20 |
21 | This is a side-effect-free function: it returns a new data.table and the
22 | input data.table is unmodified.
23 | }
24 | \examples{
25 | # A sample raw result from a hits query:
26 | result <- '[{"_source":{"timestamp":"2017-01-01","cust_name":"Austin","details":{
27 | "cust_class":"big_spender","location":"chicago","pastPurchases":[{"film":"The Notebook",
28 | "pmt_amount":6.25},{"film":"The Town","pmt_amount":8.00},{"film":"Zootopia","pmt_amount":7.50,
29 | "matinee":true}]}}},{"_source":{"timestamp":"2017-02-02","cust_name":"James","details":{
30 | "cust_class":"peasant","location":"chicago","pastPurchases":[{"film":"Minions",
31 | "pmt_amount":6.25,"matinee":true},{"film":"Rogue One","pmt_amount":10.25},{"film":"Bridesmaids",
32 | "pmt_amount":8.75},{"film":"Bridesmaids","pmt_amount":6.25,"matinee":true}]}}},{"_source":{
33 | "timestamp":"2017-03-03","cust_name":"Nick","details":{"cust_class":"critic","location":"cannes",
34 | "pastPurchases":[{"film":"Aala Kaf Ifrit","pmt_amount":0,"matinee":true},{
35 | "film":"Dopo la guerra (Apres la Guerre)","pmt_amount":0,"matinee":true},{
36 | "film":"Avengers: Infinity War","pmt_amount":12.75}]}}}]'
37 |
38 | # Chomp into a data.table
39 | sampleChompedDT <- chomp_hits(hits_json = result, keep_nested_data_cols = TRUE)
40 | print(sampleChompedDT)
41 |
42 | # (Note: use es_search() to get here in one step)
43 |
44 | # Unpack by details.pastPurchases
45 | unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
46 | , col_to_unpack = "details.pastPurchases")
47 | print(unpackedDT)
48 | }
49 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(uptasticsearch) # nolint[unused_import]
3 |
4 | testthat::test_check(
5 | package = "uptasticsearch"
6 | , stop_on_failure = TRUE
7 | , stop_on_warning = FALSE
8 | , reporter = testthat::SummaryReporter$new()
9 | )
10 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-assertions.R:
--------------------------------------------------------------------------------
1 | test_that(".is_count() works", {
2 | expect_true(.is_count(1L))
3 | expect_true(.is_count(8L))
4 | expect_false(.is_count(-2L))
5 | expect_false(.is_count(0))
6 | expect_false(.is_count(15.2))
7 | expect_false(.is_count(NA))
8 | expect_false(.is_count(NA_character_))
9 | expect_false(.is_count(NA_integer_))
10 | expect_false(.is_count(NA_real_))
11 | expect_false(.is_count(c(1L, 2L)))
12 | expect_false(.is_count("a-number"))
13 | expect_false(.is_count(NULL))
14 | expect_false(.is_count(TRUE))
15 | })
16 |
17 | test_that(".is_flag() works", {
18 | expect_true(.is_flag(TRUE))
19 | expect_true(.is_flag(FALSE))
20 | expect_false(.is_flag(-1))
21 | expect_false(.is_flag(-1L))
22 | expect_false(.is_flag(0))
23 | expect_false(.is_flag(0L))
24 | expect_false(.is_flag(1))
25 | expect_false(.is_flag(1L))
26 | expect_false(.is_flag(15.2))
27 | expect_false(.is_flag(NA))
28 | expect_false(.is_flag(NA_character_))
29 | expect_false(.is_flag(NA_integer_))
30 | expect_false(.is_flag(NA_real_))
31 | expect_false(.is_flag(c(1L, 2L)))
32 | expect_false(.is_flag("a-number"))
33 | expect_false(.is_flag(NULL))
34 | })
35 |
36 | test_that(".is_string() works", {
37 | expect_true(.is_string("abc"))
38 | expect_true(.is_string(" "))
39 | expect_false(.is_string(""))
40 | expect_false(.is_string(-2L))
41 | expect_false(.is_string(0))
42 | expect_false(.is_string(15.2))
43 | expect_false(.is_string(NA))
44 | expect_false(.is_string(NA_character_))
45 | expect_false(.is_string(NA_integer_))
46 | expect_false(.is_string(NA_real_))
47 | expect_false(.is_string(c(1L, 2L)))
48 | expect_false(.is_string(NULL))
49 | expect_false(.is_string(TRUE))
50 | })
51 |
52 | test_that(".is_writeable() works", {
53 | expect_true(.is_writeable(getwd()))
54 | expect_false(.is_writeable(file.path(tempdir(), "some-nonsense")))
55 | expect_false(.is_writeable(""))
56 | expect_false(.is_writeable(-2L))
57 | expect_false(.is_writeable(0))
58 | expect_false(.is_writeable(15.2))
59 | expect_false(.is_writeable(NA))
60 | expect_false(.is_writeable(NA_character_))
61 | expect_false(.is_writeable(NA_integer_))
62 | expect_false(.is_writeable(NA_real_))
63 | expect_false(.is_writeable(c(1L, 2L)))
64 | expect_false(.is_writeable("a-number"))
65 | expect_false(.is_writeable(NULL))
66 | expect_false(.is_writeable(TRUE))
67 | })
68 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-chomp_hits.R:
--------------------------------------------------------------------------------
1 |
2 | # Configure logger (suppress all logs in testing)
3 | loggerOptions <- futile.logger::logger.options()
4 | if (!identical(loggerOptions, list())) {
5 | origLogThreshold <- loggerOptions[[1]][["threshold"]]
6 | } else {
7 | origLogThreshold <- futile.logger::INFO
8 | }
9 | futile.logger::flog.threshold(0)
10 |
11 | # This is effectively a test of running elastic::Search(raw = TRUE) and passing it through chomp_hits()
12 | test_that("chomp_hits should work from a one-element character vector", {
13 | # nolint start
14 | jsonString <- '{"took": 54, "timed_out": false, "_shards": {"total": 16,"successful": 16, "failed": 0},
15 | "hits": {
16 | "total": 46872,
17 | "max_score": 0.882234,
18 | "hits": [
19 | {"_index": "redsawx", "_type": "ballplayer", "_id": "abc123", "_score": 0.882234,
20 | "_source": {"name": "David Ortiz", "stats" : {"yrs_played": 20, "final_season": {"avg": 0.315, "HR": 38, "R": 79},
21 | "full_career": {"avg": 0.286, "HR": 541, "R": 1419}}}},
22 | {"_index": "redsawx", "_type": "ballplayer", "_id": "def567", "_score": 0.882234,
23 | "_source": {"name": "Kevin Youkilis", "stats" : {"yrs_played": 10, "final_season": {"avg": 0.219, "HR": 2, "R": 12},
24 | "full_career": {"avg": 0.281, "HR": 150, "R": 653}}}},
25 | {"_index": "redsawx", "_type": "ballplayer", "_id": "abc567", "_score": 0.882234,
26 | "_source": {"name": "Trot Nixon", "stats" : {"yrs_played": 12, "final_season": {"avg": 0.171, "HR": 1, "R": 2},
27 | "full_career": {"avg": 0.274, "HR": 137, "R": 579}}}},
28 | {"_index": "redsawx", "_type": "ballplayer", "_id": "def123", "_score": 0.882234,
29 | "_source": {"name": "Manny Ramirez", "stats" : {"yrs_played": 19, "final_season": {"avg": 0.059, "HR": 0, "R": 0},
30 | "full_career": {"avg": 0.312, "HR": 555, "R": 1544}}}},
31 | {"_index": "redsawx", "_type": "ballplayer", "_id": "ghi890", "_score": 0.882234,
32 | "_source": {"name": "Jason Varitek", "stats" : {"yrs_played": 15, "final_season": {"avg": 0.221, "HR": 11, "R": 32},
33 | "full_career": {"avg": 0.256, "HR": "193", "R": 664}}}}
34 | ]}}'
35 | # nolint end
36 | chompDT <- chomp_hits(hits_json = jsonString)
37 | expect_true(data.table::is.data.table(chompDT))
38 | expect_equivalent(dim(chompDT), c(5, 12))
39 | expect_true(all(c("_id", "_index", "_score", "name", "stats.final_season.avg",
40 | "stats.final_season.HR", "stats.final_season.R", "stats.full_career.avg",
41 | "stats.full_career.HR", "stats.full_career.R", "stats.yrs_played", "_type") %in%
42 | names(chompDT)))
43 | expect_identical(chompDT$stats.full_career.R, as.integer(c(1419, 653, 579, 1544, 664)))
44 | expect_identical(chompDT$stats.full_career.HR, as.character(c(541, 150, 137, 555, 193)))
45 | })
46 |
47 | # What if we're passing the hits array, not the entire result?
48 | test_that("chomp_hits should work with just the hits array", {
49 | # nolint start
50 | jsonString <- '[
51 | {"_index": "redsawx", "_type": "ballplayer", "_id": "abc123", "_score": 0.882234,
52 | "_source": {"name": "David Ortiz", "stats" : {"yrs_played": 20, "final_season": {"avg": 0.315, "HR": 38, "R": 79},
53 | "full_career": {"avg": 0.286, "HR": 541, "R": 1419}}}},
54 | {"_index": "redsawx", "_type": "ballplayer", "_id": "def567", "_score": 0.882234,
55 | "_source": {"name": "Kevin Youkilis", "stats" : {"yrs_played": 10, "final_season": {"avg": 0.219, "HR": 2, "R": 12},
56 | "full_career": {"avg": 0.281, "HR": 150, "R": 653}}}},
57 | {"_index": "redsawx", "_type": "ballplayer", "_id": "abc567", "_score": 0.882234,
58 | "_source": {"name": "Trot Nixon", "stats" : {"yrs_played": 12, "final_season": {"avg": 0.171, "HR": 1, "R": 2},
59 | "full_career": {"avg": 0.274, "HR": 137, "R": 579}}}},
60 | {"_index": "redsawx", "_type": "ballplayer", "_id": "def123", "_score": 0.882234,
61 | "_source": {"name": "Manny Ramirez", "stats" : {"yrs_played": 19, "final_season": {"avg": 0.059, "HR": 0, "R": 0},
62 | "full_career": {"avg": 0.312, "HR": 555, "R": 1544}}}},
63 | {"_index": "redsawx", "_type": "ballplayer", "_id": "ghi890", "_score": 0.882234,
64 | "_source": {"name": "Jason Varitek", "stats" : {"yrs_played": 15, "final_season": {"avg": 0.221, "HR": 11, "R": 32},
65 | "full_career": {"avg": 0.256, "HR": "193", "R": 664}}}}
66 | ]'
67 | # nolint end
68 | chompDT <- chomp_hits(hits_json = jsonString)
69 | expect_true(data.table::is.data.table(chompDT))
70 | expect_equivalent(dim(chompDT), c(5, 12))
71 | expect_true(all(c("_id", "_index", "_score", "name", "stats.final_season.avg",
72 | "stats.final_season.HR", "stats.final_season.R", "stats.full_career.avg",
73 | "stats.full_career.HR", "stats.full_career.R", "stats.yrs_played", "_type") %in%
74 | names(chompDT)))
75 | expect_identical(chompDT$stats.full_career.R, as.integer(c(1419, 653, 579, 1544, 664)))
76 | expect_identical(chompDT$stats.full_career.HR, as.character(c(541, 150, 137, 555, 193)))
77 | })
78 |
79 | # This tests the type of data representation you'd get from reading in a JSON file with readLines
80 | test_that("chomp_hits should work from a multi-element character vector", {
81 | test_json <- system.file("testdata", "es_hits.json", package = "uptasticsearch")
82 | jsonVec <- suppressWarnings(readLines(test_json))
83 | chompDT <- chomp_hits(hits_json = jsonVec)
84 | expect_true(data.table::is.data.table(chompDT))
85 | expect_equivalent(dim(chompDT), c(5, 12))
86 | expect_true(all(c("_id", "_index", "_score", "name", "stats.final_season.avg",
87 | "stats.final_season.HR", "stats.final_season.R", "stats.full_career.avg",
88 | "stats.full_career.HR", "stats.full_career.R", "stats.yrs_played", "_type") %in%
89 | names(chompDT)))
90 | expect_identical(chompDT$stats.full_career.R, as.integer(c(1419, 653, 579, 1544, 664)))
91 | expect_identical(chompDT$stats.full_career.HR, as.character(c(541, 150, 137, 555, 193)))
92 | })
93 |
94 | # In case you need to have a non-R, non-Python run queries for you and store them in a file
95 | test_that("chomp_hits should work from a file", {
96 | test_json <- system.file("testdata", "es_hits.json", package = "uptasticsearch")
97 | chompDT <- chomp_hits(hits_json = test_json)
98 | expect_true(data.table::is.data.table(chompDT))
99 | expect_equivalent(dim(chompDT), c(5, 12))
100 | expect_true(all(c("_id", "_index", "_score", "name", "stats.final_season.avg",
101 | "stats.final_season.HR", "stats.final_season.R", "stats.full_career.avg",
102 | "stats.full_career.HR", "stats.full_career.R", "stats.yrs_played", "_type") %in%
103 | names(chompDT)))
104 | expect_identical(chompDT$stats.full_career.R, as.integer(c(1419, 653, 579, 1544, 664)))
105 | expect_identical(chompDT$stats.full_career.HR, as.character(c(541, 150, 137, 555, 193)))
106 | })
107 |
108 | # Should warn and return null if you don't provide any data
109 | test_that("chomp_hits should return NULL if you do not provide data", {
110 | result <- suppressWarnings(chomp_hits(hits_json = NULL))
111 | expect_true(is.null(result))
112 | expect_warning(chomp_hits(hits_json = NULL),
113 | regexp = "You did not pass any input data to chomp_hits")
114 | })
115 |
116 | # Should break if you pass the wrong kind of input
117 | test_that("chomp_hits should break if you pass the wrong input", {
118 | expect_error(chomp_hits(hits_json = data.frame(a = 1:5)),
119 | regexp = "The first argument of chomp_hits must be a character vector")
120 | })
121 |
122 | # Should warn if the resulting data is nested with default keep_nested_data_cols = FALSE
123 | test_that("chomp_hits should warn and delete if the resulting data is nested with keep_nested_data_cols = FALSE", {
124 | expect_warning({
125 | chomped <- chomp_hits(
126 | hits_json = '[{"test1":[{"a":1}],"test2":2}]'
127 | , keep_nested_data_cols = FALSE
128 | )
129 | }, regexp = "Deleting the following nested data columns:")
130 | expect_equal(names(chomped), "test2")
131 | })
132 |
133 | ##### TEST TEAR DOWN #####
134 | futile.logger::flog.threshold(origLogThreshold)
135 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-es_search.R:
--------------------------------------------------------------------------------
1 |
2 | # Configure logger (suppress all logs in testing)
3 | loggerOptions <- futile.logger::logger.options()
4 | if (!identical(loggerOptions, list())) {
5 | origLogThreshold <- loggerOptions[[1]][["threshold"]]
6 | } else {
7 | origLogThreshold <- futile.logger::INFO
8 | }
9 | futile.logger::flog.threshold(0)
10 |
11 | # Should reject NULL index
12 | test_that("es_search should reject NULL index", {
13 | expect_error({
14 | es_search(
15 | es_host = "http://mycompany.com:9200"
16 | , es_index = NULL
17 | )
18 | }, regexp = "You passed NULL to es_index")
19 | })
20 |
21 | # Should reject bad queries
22 | test_that("es_search should reject malformed queries", {
23 | # Length greater than 1
24 | expect_error({
25 | es_search(
26 | es_host = "http://mycompany.com:9200"
27 | , es_index = "_all"
28 | , query = c(
29 | '{"_source": {"include": ["stuff.*"]},'
30 | , '{"aggs": {"superman": {"terms": {"field": "hi"}}}}}'
31 | )
32 | )
33 | }, regexp = "You gave an object of length 2")
34 |
35 | # Specified as a list (like you might get from jsonlite::fromJSON)
36 | expect_error({
37 | es_search(
38 | es_host = "http://mycompany.com:9200"
39 | , es_index = "_all"
40 | , query = list(
41 | '{"_source": {"include": ["stuff.*"]},{"aggs": {"superman": {"terms": {"field": "hi"}}}}}'
42 | )
43 | )
44 | }, regexp = "query_body should be a single string")
45 | })
46 |
47 | #---- .ConvertToSec
48 |
49 | # .ConvertToSec should work for seconds
50 | test_that(".ConvertToSec should work for seconds",
51 | expect_identical(60, uptasticsearch:::.ConvertToSec("60s")))
52 |
53 | # .ConverToSec should work for minutes
54 | test_that(".ConvertToSec should work for minutes",
55 | expect_identical(600, uptasticsearch:::.ConvertToSec("10m")))
56 |
57 | # .ConvertToSec should work for hours
58 | test_that(".ConvertToSec should work for hours",
59 | expect_identical(72000, uptasticsearch:::.ConvertToSec("20h")))
60 |
61 | # .ConvertToSec should work for days
62 | test_that(".ConvertToSec should work for days",
63 | expect_identical(172800, uptasticsearch:::.ConvertToSec("2d")))
64 |
65 | # .ConvertToSec should work for weeks
66 | test_that(".ConvertToSec should work for weeks",
67 | expect_identical(3024000, uptasticsearch:::.ConvertToSec("5w")))
68 |
69 | # .ConvertToSec should break on unsupported timeStrings
70 | test_that(".ConvertToSec should work for seconds",
71 | expect_error(uptasticsearch:::.ConvertToSec("50Y")
72 | , regexp = "Could not figure out units of datemath"))
73 |
74 | #---- ValidateAndFormatHost
75 |
76 | # .ValidateAndFormatHost should break if you give it a non-character input
77 | test_that(".ValidateAndFormatHost should break if you give it a non-character input",
78 | expect_error(uptasticsearch:::.ValidateAndFormatHost(9200)
79 | , regexp = "es_host should be a string"))
80 |
81 | # .ValidateAndFormatHost should break if you give it a multi-element vector
82 | test_that(".ValidateAndFormatHost should break if you give it a multi-element vector",
83 | expect_error(uptasticsearch:::.ValidateAndFormatHost(c("http://", "mydb.mycompany.com:9200"))
84 | , regexp = "es_host should be length 1"))
85 |
86 | # .ValidateAndFormatHost should warn you and drop trailing slashes if you have them
87 | test_that(".ValidateAndFormatHost should handle trailing slashes", {
88 | # single slash
89 | newHost <- uptasticsearch:::.ValidateAndFormatHost("http://mydb.mycompany.com:9200/")
90 | expect_identical(newHost, "http://mydb.mycompany.com:9200")
91 |
92 | # objectively ridiculous number of slashes
93 | newHost2 <- uptasticsearch:::.ValidateAndFormatHost("http://mydb.mycompany.com:9200/////////")
94 | expect_identical(newHost2, "http://mydb.mycompany.com:9200")
95 | })
96 |
97 | # .ValidateAndFormatHost should break if you don't have a port
98 | test_that(".ValidateAndFormatHost should break if you don't have a port",
99 | expect_error(uptasticsearch:::.ValidateAndFormatHost("http://mydb.mycompany.com")
100 | , regexp = "No port found in es_host"))
101 |
102 | # .ValidateAndFormatHost should warn if you don't have a valid transfer protocol
103 | test_that(".ValidateAndFormatHost should warn and use http if you don't give a port", {
104 | # single slash
105 | expect_warning({
106 | hostWithTransfer <- uptasticsearch:::.ValidateAndFormatHost("mydb.mycompany.com:9200")
107 | }, regexp = "You did not provide a transfer protocol")
108 | expect_identical(hostWithTransfer, "http://mydb.mycompany.com:9200")
109 | })
110 |
111 | #---- .major_version
112 | test_that(".major_version should correctly parse semver version strings", {
113 |
114 | # yay random tests
115 | for (i in 1:50) {
116 | v1 <- as.character(sample(0:9, size = 1))
117 | v2 <- as.character(sample(0:9, size = 1))
118 | v3 <- as.character(sample(0:9, size = 1))
119 | test_version <- paste0(v1, ".", v2, ".", v3)
120 | expect_identical(
121 | uptasticsearch:::.major_version(test_version)
122 | , v1
123 | , info = paste0("version that broke this: ", test_version)
124 | )
125 | }
126 | })
127 |
128 | ##### TEST TEAR DOWN #####
129 | futile.logger::flog.threshold(origLogThreshold)
130 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-get_fields.R:
--------------------------------------------------------------------------------
1 | # Configure logger (suppress all logs in testing)
2 | loggerOptions <- futile.logger::logger.options()
3 | if (!identical(loggerOptions, list())) {
4 | origLogThreshold <- loggerOptions[[1]][["threshold"]]
5 | } else {
6 | origLogThreshold <- futile.logger::INFO
7 | }
8 | futile.logger::flog.threshold(0)
9 |
10 |
11 | #--- get_fields
12 |
13 | # Gives an informative error if es_indices is NULL or an empty string
14 | test_that("get_fields should give an informative error if es_indices is NULL or an empty string", {
15 | expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
16 | , es_indices = NULL),
17 | regexp = "Argument 'es_indices' must be a non-empty character vector")
18 | expect_error(get_fields(es_host = "http://es.custdb.mycompany.com:9200"
19 | , es_indices = ""),
20 | regexp = "get_fields must be passed a valid es_indices")
21 | })
22 |
23 | # works as expected when mocked
24 | test_that("get_fields works as expected when mocked", {
25 |
26 | test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
27 | aliasDT <- data.table::data.table(
28 | alias = c("alias1", "alias2")
29 | , index = c("company", "otherIndex")
30 | )
31 | testthat::with_mocked_bindings(
32 | `.content` = function(...) {
33 | return(jsonlite::fromJSON(txt = test_json))
34 | },
35 | `.get_aliases` = function(...) {
36 | return(aliasDT)
37 | },
38 | `.get_es_version` = function(...) {
39 | return("6")
40 | }
41 | ,
42 | `.request` = function(...) {
43 | return(NULL)
44 | },
45 | `.stop_for_status` = function(...) {
46 | return(NULL)
47 | },
48 | {
49 | outDT <- get_fields(
50 | es_host = "http://db.mycompany.com:9200"
51 | , es_indices = c("company", "hotel")
52 | )
53 | data.table::setkey(outDT, NULL)
54 | expected <- data.table::data.table(
55 | index = c(rep("alias1", 3), rep("hotel", 5))
56 | , type = c(rep("building", 3), rep("bed_room", 2), rep("conference_room", 3))
57 | , field = c("id", "address", "address.keyword", "num_beds", "description"
58 | , "num_people", "purpose", "purpose.keyword")
59 | , data_type = c("long", "text", "keyword", "integer", "text", "integer"
60 | , "text", "keyword")
61 | )
62 | expect_identical(outDT, expected)
63 | }
64 | )
65 | })
66 |
67 | #--- .flatten_mapping
68 |
69 | # Works if one index is passed
70 | test_that(".flatten_mapping should work if the mapping for one index is provided", {
71 | test_json <- system.file("testdata", "one_index_mapping.json", package = "uptasticsearch")
72 | mapping <- jsonlite::fromJSON(txt = test_json)
73 | mappingDT <- uptasticsearch:::.flatten_mapping(mapping = mapping)
74 | expected <- data.table::data.table(
75 | index = rep("basketball", 5)
76 | , type = rep("players", 5)
77 | , field = c("team", "name.first", "name.last", "age", "position")
78 | , data_type = c("keyword", "text", "text", "integer", "keyword")
79 | )
80 | expect_identical(mappingDT, expected)
81 | })
82 |
83 | # works if multiple indices are passed
84 | test_that(".flatten_mapping should work if the mapping for multiple indices are provided", {
85 | test_json <- system.file("testdata", "two_index_mapping.json", package = "uptasticsearch")
86 | mapping <- jsonlite::fromJSON(txt = test_json)
87 | mappingDT <- uptasticsearch:::.flatten_mapping(mapping = mapping)
88 | expected <- data.table::data.table(
89 | index = c(rep("company", 3), rep("hotel", 5))
90 | , type = c(rep("building", 3), rep("bed_room", 2), rep("conference_room", 3))
91 | , field = c("id", "address", "address.keyword", "num_beds", "description"
92 | , "num_people", "purpose", "purpose.keyword")
93 | , data_type = c("long", "text", "keyword", "integer", "text", "integer"
94 | , "text", "keyword")
95 | )
96 | expect_identical(mappingDT, expected)
97 | })
98 |
99 | ##### TEST TEAR DOWN #####
100 | futile.logger::flog.threshold(origLogThreshold)
101 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-parse_date_time.R:
--------------------------------------------------------------------------------
1 | # Configure logger (suppress all logs in testing)
2 | loggerOptions <- futile.logger::logger.options()
3 | if (!identical(loggerOptions, list())) {
4 | origLogThreshold <- loggerOptions[[1]][["threshold"]]
5 | } else {
6 | origLogThreshold <- futile.logger::INFO
7 | }
8 | futile.logger::flog.threshold(0)
9 |
10 | # Correctly adjusts UTC date-times
11 | test_that("parse_date_time should transform the indicated date_cols to POSIXct with timezone UTC if they're given in UTC", {
12 | testDT <- data.table::data.table(
13 | id = c("a", "b", "c")
14 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
15 | )
16 | newDT <- parse_date_time(testDT, date_cols = "dateTime")
17 |
18 | expect_true(inherits(newDT$dateTime, "POSIXct"))
19 | expect_identical(
20 | newDT
21 | , data.table::data.table(
22 | id = c("a", "b", "c")
23 | , dateTime = as.POSIXct(c("2016-07-16 21:15:00", "2015-04-16 02:15:00", "2015-03-04 15:25:00"), tz = "UTC")
24 | )
25 | )
26 | })
27 |
28 | # Correctly adjusts non-UTC date-times
29 | test_that("parse_date_time should transform the indicated date_cols to POSIXct with timezone UTC correctly even if the dates are not specified in UTC", { # nolint[line_length]
30 | testDT <- data.table::data.table(
31 | id = c("a", "b", "c")
32 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00A", "2015-03-04T15:25:00B")
33 | )
34 | newDT <- parse_date_time(testDT, date_cols = "dateTime")
35 |
36 | expect_true(inherits(newDT$dateTime, "POSIXct"))
37 | expect_identical(
38 | newDT
39 | , data.table::data.table(
40 | id = c("a", "b", "c")
41 | , dateTime = as.POSIXct(c("2016-07-16 21:15:00", "2015-04-16 01:15:00", "2015-03-04 13:25:00"), tz = "UTC")
42 | )
43 | )
44 | })
45 |
46 | # Returns object of class POSIXct
47 | test_that("parse_date_time should transform the indicated date_cols to class POSIXct", {
48 | testDT <- data.table::data.table(
49 | id = c("a", "b", "c")
50 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
51 | )
52 | newDT <- parse_date_time(testDT, date_cols = "dateTime")
53 |
54 | expect_true(inherits(newDT$dateTime, "POSIXct"))
55 | expect_identical(
56 | newDT
57 | , data.table::data.table(
58 | id = c("a", "b", "c")
59 | , dateTime = as.POSIXct(c("2016-07-16 21:15:00", "2015-04-16 02:15:00", "2015-03-04 15:25:00"), tz = "UTC")
60 | )
61 | )
62 | })
63 |
64 | # Works for one date column
65 | test_that("parse_date_time should perform adjustments only on the columns you ask it to", {
66 | testDT <- data.table::data.table(
67 | id = c("a", "b", "c")
68 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
69 | , otherDate = c("2014-03-11T12:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
70 | )
71 | newDT <- parse_date_time(testDT, date_cols = "dateTime")
72 |
73 | expect_true(all(c("dateTime", "otherDate") %in% names(newDT)))
74 | expect_true(inherits(newDT$dateTime, "POSIXct"))
75 | expect_true(is.character(newDT$otherDate))
76 | expect_identical(
77 | newDT
78 | , data.table::data.table(
79 | id = c("a", "b", "c")
80 | , dateTime = as.POSIXct(c("2016-07-16 21:15:00", "2015-04-16 02:15:00", "2015-03-04 15:25:00"), tz = "UTC")
81 | , otherDate = c("2014-03-11T12:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
82 | )
83 | )
84 | })
85 |
86 | # works for multiple date columns
87 | test_that("parse_date_time should perform adjustments for multiple data columns if asked", {
88 | testDT <- data.table::data.table(
89 | id = c("a", "b", "c")
90 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
91 | , otherDate = c("2014-03-11T12:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
92 | )
93 | newDT <- parse_date_time(testDT, date_cols = c("dateTime", "otherDate"))
94 |
95 | expect_true(all(c("dateTime", "otherDate") %in% names(newDT)))
96 | expect_true(inherits(newDT$dateTime, "POSIXct"))
97 | expect_true(inherits(newDT$otherDate, "POSIXct"))
98 | expect_identical(
99 | newDT
100 | , data.table::data.table(
101 | id = c("a", "b", "c")
102 | , dateTime = as.POSIXct(c("2016-07-16 21:15:00", "2015-04-16 02:15:00", "2015-03-04 15:25:00"), tz = "UTC")
103 | , otherDate = as.POSIXct(c("2014-03-11 12:15:00", "2015-04-16 02:15:00", "2015-03-04 15:25:00"), tz = "UTC")
104 | )
105 | )
106 | })
107 |
108 | # Gives an informative error if date_cols is not character vector
109 | test_that("parse_date_time should give an informative error if you pass non-character stuff to date_cols", {
110 | testDT <- data.table::data.table(
111 | id = c("a", "b", "c")
112 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
113 | )
114 |
115 | expect_error({
116 | parse_date_time(testDT, date_cols = list("dateTime"))
117 | }, regexp = "The date_cols argument in parse_date_time expects a character vector")
118 | })
119 |
120 | # Gives informative error if inputDT is not a data.table
121 | test_that("parse_date_time should give an informative error if you don't pass it a data.table", {
122 | testDF <- data.frame(
123 | id = c("a", "b", "c")
124 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
125 | )
126 |
127 | expect_error({
128 | parse_date_time(testDF, date_cols = "dateTime")
129 | }, regexp = "parse_date_time expects to receive a data\\.table object") # nolint[non_portable_path]
130 | })
131 |
132 | # Gives informative error if you ask to adjust date_cols that don't exist
133 | test_that("parse_date_time should give an informative error if you give it dateCol names that don't exist in the DT", {
134 | testDT <- data.table::data.table(
135 | id = c("a", "b", "c")
136 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
137 | )
138 |
139 | expect_error({
140 | parse_date_time(testDT, date_cols = c("dateTime", "dateTyme"))
141 | }, regexp = "do not actually exist in input_df")
142 | })
143 |
144 | # Does not have side effects (works on a copy)
145 | test_that("parse_date_time should leave the original DT unchanged", {
146 | testDT <- data.table::data.table(
147 | id = c("a", "b", "c")
148 | , dateTime = c("2016-07-16T21:15:00Z", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
149 | )
150 |
151 | beforeDT <- data.table::copy(testDT)
152 | origAddress <- data.table::address(testDT)
153 | newDT <- parse_date_time(testDT, date_cols = "dateTime")
154 |
155 | expect_identical(testDT, beforeDT)
156 | expect_identical(origAddress, data.table::address(testDT))
157 | expect_true(origAddress != data.table::address(newDT))
158 | })
159 |
160 | # Substitutes in assume_tz if missing a timezone
161 | test_that("parse_date_time should leave the original DT unchanged", {
162 |
163 | testDT <- data.table::data.table(
164 | id = c("a", "b", "c")
165 | , dateTime = c("2016-07-16T21:15:00", "2015-04-16T02:15:00Z", "2015-03-04T15:25:00Z")
166 | )
167 | beforeDT <- data.table::copy(testDT)
168 | origAddress <- data.table::address(testDT)
169 | newDT <- parse_date_time(testDT, date_cols = "dateTime", assume_tz = "UTC")
170 |
171 | expect_identical(newDT[id == "a", dateTime], as.POSIXct("2016-07-16 21:15:00", tz = "UTC"))
172 | })
173 |
174 | ##### TEST TEAR DOWN #####
175 | futile.logger::flog.threshold(origLogThreshold)
176 |
--------------------------------------------------------------------------------
/r-pkg/tests/testthat/test-unpack_nested_data.R:
--------------------------------------------------------------------------------
1 |
2 | # Configure logger (suppress all logs in testing)
3 | loggerOptions <- futile.logger::logger.options()
4 | if (!identical(loggerOptions, list())) {
5 | origLogThreshold <- loggerOptions[[1]][["threshold"]]
6 | } else {
7 | origLogThreshold <- futile.logger::INFO
8 | }
9 | futile.logger::flog.threshold(0)
10 |
11 | #--- unpack_nested_data
12 |
13 | # Should work with result of chomp_hits
14 | test_that("unpack_nested_data should work with the result of chomp_hits", {
15 | # nolint start
16 | test_json <- '[{"_source":{"dateTime":"2017-01-01","username":"Austin1","details":{
17 | "interactions":400,"userType":"active","appData":[{"appName":"farmville","minutes":500},
18 | {"appName":"candy_crush","value":350},{"appName":"angry_birds","typovalue":422}]}}},
19 | {"_source":{"dateTime":"2017-02-02","username":"Austin2","details":{"interactions":5,
20 | "userType":"very_active","appData":[{"appName":"minesweeper","value":28},{"appName":
21 | "pokemon_go","value":190},{"appName":"pokemon_stay","value":1},{"appName":"block_dude",
22 | "value":796}]}}}]'
23 | # nolint end
24 | sampleChompedDT <- chomp_hits(test_json
25 | , keep_nested_data_cols = TRUE)
26 | unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
27 | , col_to_unpack = "details.appData")
28 | expect_true(data.table::is.data.table(unpackedDT))
29 | expect_equivalent(dim(unpackedDT), c(7, 8))
30 | expect_named(unpackedDT, c("dateTime", "username", "details.interactions",
31 | "details.userType", "appName", "minutes", "value", "typovalue"))
32 | expect_identical(unpackedDT$appName, c("farmville", "candy_crush", "angry_birds",
33 | "minesweeper", "pokemon_go", "pokemon_stay",
34 | "block_dude"))
35 | expect_identical(unpackedDT$username, c(rep("Austin1", 3), rep("Austin2", 4)))
36 | expect_true(sum(is.na(unpackedDT$minutes)) == 6)
37 | })
38 |
39 | # Should work if the array is a simple array rather than an array of maps
40 | test_that("unpack_nested_data should work if the array is a simple array", {
41 | # nolint start
42 | test_json <- '[{"_source":{"dateTime":"2017-01-01","username":"Austin1","details":{
43 | "interactions":400,"userType":"active","minutes":[500,350,422]}}},
44 | {"_source":{"dateTime":"2017-02-02","username":"Austin2","details":{"interactions":0,
45 | "userType":"never","minutes":[]}}},
46 | {"_source":{"dateTime":"2017-03-03","username":"Austin3","details":{"interactions":5,
47 | "userType":"very_active","minutes":[28,190,1,796]}}}]'
48 | # nolint end
49 | sampleChompedDT <- chomp_hits(test_json
50 | , keep_nested_data_cols = TRUE)
51 | unpackedDT <- unpack_nested_data(chomped_df = sampleChompedDT
52 | , col_to_unpack = "details.minutes")
53 | expect_true(data.table::is.data.table(unpackedDT))
54 | expect_equivalent(dim(unpackedDT), c(8, 5))
55 | expect_named(unpackedDT, c("dateTime", "username", "details.interactions",
56 | "details.userType", "details.minutes"))
57 | expect_equivalent(unpackedDT$details.minutes, c(500, 350, 422, NA, 28, 190, 1, 796))
58 | expect_identical(unpackedDT$username, c(rep("Austin1", 3), "Austin2", rep("Austin3", 4)))
59 | })
60 |
61 | # Should break if chomped_df is not a data.table
62 | test_that("unpack_nested_data should break if you don't pass a data.table", {
63 | expect_error(unpack_nested_data(chomped_df = 42
64 | , col_to_unpack = "blah"),
65 | regexp = "chomped_df must be a data.table")
66 | })
67 |
68 | # Should break if col_to_unpack is not a string
69 | test_that("unpack_nested_data should break if col_to_unpack is not a string", {
70 | expect_error(unpack_nested_data(chomped_df = data.table::data.table(wow = 7)
71 | , col_to_unpack = 8),
72 | regexp = "col_to_unpack must be a character of length 1")
73 | })
74 |
75 | # Should break if col_to_unpack is not of length 1
76 | test_that("unpack_nested_data should break if col_to_unpack is not of length 1", {
77 | expect_error(unpack_nested_data(chomped_df = data.table::data.table(wow = 7)
78 | , col_to_unpack = c("a", "b")),
79 | regexp = "col_to_unpack must be a character of length 1")
80 | })
81 |
82 | # Should break if col_to_unpack is not one of the column names
83 | test_that("unpack_nested_data should break if col_to_unpack is not one of the column names", {
84 | expect_error(unpack_nested_data(chomped_df = data.table::data.table(wow = 7)
85 | , col_to_unpack = "a"),
86 | regexp = "col_to_unpack must be one of the column names")
87 | })
88 |
89 | # Should break if the column doesn't include any data
90 | test_that("unpack_nested_data should break if the column doesn't include any data", {
91 | expect_error(unpack_nested_data(chomped_df = data.table::data.table(wow = 7, dang = list())
92 | , col_to_unpack = "dang"),
93 | regexp = "The column given to unpack_nested_data had no data in it")
94 | })
95 |
96 | test_that("unpack_nested_data should break if the column contains something that is not a dataframe or vector", {
97 | DT <- data.table::data.table(x = 1:2, y = list(list(2), 3))
98 | expect_error(unpack_nested_data(chomped_df = DT, col_to_unpack = "y")
99 | , regexp = "must be a data frame or a vector")
100 | })
101 |
102 | test_that("unpack_nested_data should handle NA and empty rows", {
103 | DT <- data.table::data.table(x = 1:2, y = list(z = NA, data.table::data.table(w = 5:6, z = 7:8)))
104 | DT2 <- data.table::data.table(x = 1:2, y = list(z = list(), data.table::data.table(w = 5:6, z = 7:8)))
105 | unpackedDT <- data.table::data.table(
106 | x = c(1, 2, 2)
107 | , w = c(NA, 5, 6)
108 | , z = c(NA, 7, 8)
109 | )
110 | expect_equal(unpack_nested_data(DT, col_to_unpack = "y"), unpackedDT)
111 | expect_equal(unpack_nested_data(DT2, col_to_unpack = "y"), unpackedDT)
112 | })
113 |
114 |
115 | ##### TEST TEAR DOWN #####
116 | futile.logger::flog.threshold(origLogThreshold)
117 |
--------------------------------------------------------------------------------
/r-pkg/vignettes/FAQ.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Frequently Asked Questions"
3 | author: "Stephanie Kirmer"
4 | date: "`r Sys.Date()`"
5 | output:
6 | markdown::html_format:
7 | options:
8 | toc: true
9 | toc_depth: 2
10 | number_sections: true
11 | vignette: >
12 | %\VignetteIndexEntry{FAQ - Help with Uptasticsearch Functionalities}
13 | %\VignetteEngine{knitr::knitr}
14 | %\VignetteEncoding{UTF-8}
15 | ---
16 |
17 | # Introduction
18 |
19 | Welcome to uptasticsearch! This package exists to help R users connect to elasticsearch databases smoothly and easily. However, sometimes things go wrong! This FAQ is an ongoing project to catalog common errors and questions people have about this system, and provide simple and useful answers.
20 |
21 | If your question is not presented here, and google doesn't help, go ahead and post an issue on github so somebody can help you.
22 |
23 | ***
24 |
25 | # Questions
26 |
27 | ## Query Syntax Problems
28 | Developing queries of your own for elasticsearch is sometimes tough. If you are finding that the queries you write are not valid, there may be many reasons. You are probably in this situation if your error is `Bad Request (HTTP 400)` or similar.
29 |
30 | ### Troubleshooting Guide
31 |
32 | * Are all your **brackets and curly braces** correct and paired? Check just to make sure.
33 | * Are you **quoting** things correctly? This syntax calls for an awful lot of quotation marks, don't forget them.
34 |
35 | ## Query Returns No Results
36 |
37 | After you have verified with certainty that your query is appropriately structured and written, you might still have challenges. What to do if you get the error `Query is syntactically valid but 0 documents were matched. Returning NULL` ?
38 |
39 | ### Troubleshooting Guide
40 |
41 | * Are your **search terms** named and described correctly? All spelled right?
42 | * Are you looking in the **correct index**? Perhaps your document is in a different one.
43 | * If you are passing **dates or datetimes**, are the formats of your input formatted just right?
44 | * IMPORTANT: **Are the terms you are using indexed**? This is a tricky one. Your term may exist and have data in the documents, but if your particular elasticsearch database has not indexed that term, you won't be able to use it for searching. This does not mean that the document/s aren't there, but just that you can't use that term for searching.
45 | * Are you sure the **document exists**? It might just not be there.
46 |
47 | ***
48 |
49 | # Contribute to this Guide!
50 |
51 | We are always happy to get more questions and answers to add to this guide. If you have a tricky issue that you have figured out the solution to, please submit a PR on github and add it to this guide.
52 |
--------------------------------------------------------------------------------
/setup_local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | echo "collecting arguments..."
6 |
7 | ES_VERSION=${1}
8 | echo "Elasticsearch version: $ES_VERSION"
9 |
10 | WDIR=$(pwd)
11 | TESTDIR=${WDIR}/sandbox
12 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample.json
13 | ES_HOST="127.0.0.1"
14 | ES_PORT="9200"
15 |
16 | echo "Starting up Elasticsearch..."
17 |
18 | case "${ES_VERSION}" in
19 |
20 | 1.7.6)
21 | docker run --rm -d -p "${ES_PORT}:9200" elasticsearch:1.7.6
22 | MAPPING_FILE=$(pwd)/test-data/legacy_shakespeare_mapping.json
23 | ;;
24 | 2.4.6)
25 | docker run --rm -d -p "${ES_PORT}:9200" elasticsearch:2.4.6
26 | MAPPING_FILE=$(pwd)/test-data/legacy_shakespeare_mapping.json
27 | ;;
28 | 5.6.16)
29 | docker run --rm -d -p "${ES_PORT}:9200" \
30 | -e "xpack.security.enabled=false" \
31 | docker.elastic.co/elasticsearch/elasticsearch:5.6.16
32 | MAPPING_FILE=$(pwd)/test-data/es5_shakespeare_mapping.json
33 | ;;
34 | 6.8.15)
35 | docker run --rm -d -p "${ES_PORT}:9200" \
36 | -e "discovery.type=single-node" \
37 | -e "xpack.security.enabled=false" \
38 | docker.elastic.co/elasticsearch/elasticsearch:6.8.15
39 | MAPPING_FILE=$(pwd)/test-data/es6_shakespeare_mapping.json
40 | ;;
41 | 7.0.1)
42 | docker run --rm -d -p "${ES_PORT}:9200" \
43 | -e "discovery.type=single-node" \
44 | -e "xpack.security.enabled=false" \
45 | docker.elastic.co/elasticsearch/elasticsearch:7.0.1
46 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
47 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
48 | ;;
49 | 7.17.22)
50 | docker run --rm -d -p "${ES_PORT}:9200" \
51 | -e "discovery.type=single-node" \
52 | -e "xpack.security.enabled=false" \
53 | docker.elastic.co/elasticsearch/elasticsearch:7.17.22
54 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
55 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
56 | ;;
57 | 8.0.1)
58 | docker run --rm -d -p "${ES_PORT}:9200" \
59 | -e "discovery.type=single-node" \
60 | -e "xpack.security.enabled=false" \
61 | docker.elastic.co/elasticsearch/elasticsearch:8.0.1
62 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
63 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
64 | ;;
65 | 8.5.3)
66 | docker run --rm -d -p "${ES_PORT}:9200" \
67 | -e "discovery.type=single-node" \
68 | -e "xpack.security.enabled=false" \
69 | docker.elastic.co/elasticsearch/elasticsearch:8.5.3
70 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
71 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
72 | ;;
73 | 8.10.4)
74 | docker run --rm -d -p "${ES_PORT}:9200" \
75 | -e "discovery.type=single-node" \
76 | -e "xpack.security.enabled=false" \
77 | docker.elastic.co/elasticsearch/elasticsearch:8.10.4
78 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
79 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
80 | ;;
81 | 8.15.5)
82 | docker run --rm -d -p "${ES_PORT}:9200" \
83 | -e "discovery.type=single-node" \
84 | -e "xpack.security.enabled=false" \
85 | docker.elastic.co/elasticsearch/elasticsearch:8.15.5
86 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
87 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
88 | ;;
89 | 8.17.2)
90 | docker run --rm -d -p "${ES_PORT}:9200" \
91 | -e "discovery.type=single-node" \
92 | -e "xpack.security.enabled=false" \
93 | docker.elastic.co/elasticsearch/elasticsearch:8.17.2
94 | MAPPING_FILE=$(pwd)/test-data/es7_shakespeare_mapping.json
95 | SAMPLE_DATA_FILE=$(pwd)/test-data/sample_es7.json
96 | ;;
97 | *)
98 | echo "Did not recognize version ${ES_VERSION}. Not starting Elasticsearch"
99 | exit 1
100 | ;;
101 | esac
102 |
103 | echo "Elasticsearch v${ES_VERSION} is now running at http://${ES_HOST}:9200"
104 |
105 | echo "Setting up local testing environment"
106 |
107 | # Creating testing directory
108 | mkdir -p "${TESTDIR}"
109 |
110 | # Get data
111 | cp "${MAPPING_FILE}" "${TESTDIR}/shakespeare_mapping.json"
112 | cp "${SAMPLE_DATA_FILE}" "${TESTDIR}/sample.json"
113 | cd "${TESTDIR}"
114 |
115 | # give the cluster a chance
116 | sleep 30
117 |
118 | # Create shakespeare index and shakespeare mapping
119 | curl -X PUT "http://${ES_HOST}:9200/shakespeare" \
120 | -H 'Content-Type: application/json' \
121 | -d @shakespeare_mapping.json
122 |
123 | # Upload data
124 | curl -X POST "http://${ES_HOST}:9200/shakespeare/_bulk" \
125 | -H 'Content-Type: application/json' \
126 | --data-binary @sample.json
127 |
128 | # Add an intentionally empty index
129 | curl -X PUT "http://${ES_HOST}:9200/empty_index" \
130 | -H 'Content-Type: application/json' \
131 | -d @shakespeare_mapping.json
132 |
133 | # Refresh all indices
134 | curl -X POST "http://${ES_HOST}:9200/_refresh"
135 |
136 | # Check that we got something
137 | curl -X GET "http://${ES_HOST}:9200/shakespeare/_search?size=1"
138 |
139 | cd "${WDIR}"
140 |
141 | echo ""
142 | echo "Your local environment is ready."
143 |
--------------------------------------------------------------------------------
/test-data/aggs_cardinality.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 30,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 12,
6 | "successful": 12,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2651,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "number_of_things": {
16 | "value": 777
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 41,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674
21 | },
22 | {
23 | "key_as_string": "2017-03-06T00:00:00.000Z",
24 | "key": 1488758400000,
25 | "doc_count": 295596
26 | },
27 | {
28 | "key_as_string": "2017-03-13T00:00:00.000Z",
29 | "key": 1489363200000,
30 | "doc_count": 277618
31 | },
32 | {
33 | "key_as_string": "2017-03-20T00:00:00.000Z",
34 | "key": 1489968000000,
35 | "doc_count": 259233
36 | },
37 | {
38 | "key_as_string": "2017-03-27T00:00:00.000Z",
39 | "key": 1490572800000,
40 | "doc_count": 265538
41 | },
42 | {
43 | "key_as_string": "2017-04-03T00:00:00.000Z",
44 | "key": 1491177600000,
45 | "doc_count": 299502
46 | },
47 | {
48 | "key_as_string": "2017-04-10T00:00:00.000Z",
49 | "key": 1491782400000,
50 | "doc_count": 303826
51 | },
52 | {
53 | "key_as_string": "2017-04-17T00:00:00.000Z",
54 | "key": 1492387200000,
55 | "doc_count": 305400
56 | },
57 | {
58 | "key_as_string": "2017-04-24T00:00:00.000Z",
59 | "key": 1492992000000,
60 | "doc_count": 325883
61 | },
62 | {
63 | "key_as_string": "2017-05-01T00:00:00.000Z",
64 | "key": 1493596800000,
65 | "doc_count": 92953
66 | }
67 | ]
68 | }
69 | }
70 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_cardinality.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 38,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674,
21 | "num_customers": {
22 | "value": 4
23 | }
24 | },
25 | {
26 | "key_as_string": "2017-03-06T00:00:00.000Z",
27 | "key": 1488758400000,
28 | "doc_count": 295596,
29 | "num_customers": {
30 | "value": 5
31 | }
32 | },
33 | {
34 | "key_as_string": "2017-03-13T00:00:00.000Z",
35 | "key": 1489363200000,
36 | "doc_count": 277618,
37 | "num_customers": {
38 | "value": 5
39 | }
40 | },
41 | {
42 | "key_as_string": "2017-03-20T00:00:00.000Z",
43 | "key": 1489968000000,
44 | "doc_count": 259233,
45 | "num_customers": {
46 | "value": 5
47 | }
48 | },
49 | {
50 | "key_as_string": "2017-03-27T00:00:00.000Z",
51 | "key": 1490572800000,
52 | "doc_count": 265538,
53 | "num_customers": {
54 | "value": 5
55 | }
56 | },
57 | {
58 | "key_as_string": "2017-04-03T00:00:00.000Z",
59 | "key": 1491177600000,
60 | "doc_count": 299502,
61 | "num_customers": {
62 | "value": 5
63 | }
64 | },
65 | {
66 | "key_as_string": "2017-04-10T00:00:00.000Z",
67 | "key": 1491782400000,
68 | "doc_count": 303826,
69 | "num_customers": {
70 | "value": 5
71 | }
72 | },
73 | {
74 | "key_as_string": "2017-04-17T00:00:00.000Z",
75 | "key": 1492387200000,
76 | "doc_count": 305400,
77 | "num_customers": {
78 | "value": 4
79 | }
80 | },
81 | {
82 | "key_as_string": "2017-04-24T00:00:00.000Z",
83 | "key": 1492992000000,
84 | "doc_count": 325883,
85 | "num_customers": {
86 | "value": 4
87 | }
88 | },
89 | {
90 | "key_as_string": "2017-05-01T00:00:00.000Z",
91 | "key": 1493596800000,
92 | "doc_count": 92953,
93 | "num_customers": {
94 | "value": 4
95 | }
96 | }
97 | ]
98 | }
99 | }
100 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_extended_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 27,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674,
21 | "some_score": {
22 | "count": 201674,
23 | "min": 0,
24 | "max": 3,
25 | "avg": 1.572527941132719,
26 | "sum": 317138,
27 | "sum_of_squares": 575494,
28 | "variance": 0.3807413638101676,
29 | "std_deviation": 0.6170424327468635,
30 | "std_deviation_bounds": {
31 | "upper": 2.806612806626446,
32 | "lower": 0.338443075638992
33 | }
34 | }
35 | },
36 | {
37 | "key_as_string": "2017-03-06T00:00:00.000Z",
38 | "key": 1488758400000,
39 | "doc_count": 295596,
40 | "some_score": {
41 | "count": 295596,
42 | "min": 0,
43 | "max": 7,
44 | "avg": 1.5650110285660157,
45 | "sum": 462611,
46 | "sum_of_squares": 832155,
47 | "variance": 0.3659172758225649,
48 | "std_deviation": 0.604910965202785,
49 | "std_deviation_bounds": {
50 | "upper": 2.7748329589715857,
51 | "lower": 0.35518909816044575
52 | }
53 | }
54 | },
55 | {
56 | "key_as_string": "2017-03-13T00:00:00.000Z",
57 | "key": 1489363200000,
58 | "doc_count": 277618,
59 | "some_score": {
60 | "count": 277618,
61 | "min": 0,
62 | "max": 7,
63 | "avg": 1.5557384607626306,
64 | "sum": 431901,
65 | "sum_of_squares": 772061,
66 | "variance": 0.36069708397207323,
67 | "std_deviation": 0.6005806223747759,
68 | "std_deviation_bounds": {
69 | "upper": 2.7568997055121827,
70 | "lower": 0.3545772160130787
71 | }
72 | }
73 | },
74 | {
75 | "key_as_string": "2017-03-20T00:00:00.000Z",
76 | "key": 1489968000000,
77 | "doc_count": 259233,
78 | "some_score": {
79 | "count": 259233,
80 | "min": 0,
81 | "max": 7,
82 | "avg": 1.5482635312633808,
83 | "sum": 401361,
84 | "sum_of_squares": 717589,
85 | "variance": 0.37100369485597195,
86 | "std_deviation": 0.609100726363031,
87 | "std_deviation_bounds": {
88 | "upper": 2.766464983989443,
89 | "lower": 0.3300620785373187
90 | }
91 | }
92 | },
93 | {
94 | "key_as_string": "2017-03-27T00:00:00.000Z",
95 | "key": 1490572800000,
96 | "doc_count": 265538,
97 | "some_score": {
98 | "count": 265538,
99 | "min": 0,
100 | "max": 7,
101 | "avg": 1.5432329836031,
102 | "sum": 409787,
103 | "sum_of_squares": 729499,
104 | "variance": 0.36568093963288295,
105 | "std_deviation": 0.6047155857367023,
106 | "std_deviation_bounds": {
107 | "upper": 2.7526641550765047,
108 | "lower": 0.3338018121296955
109 | }
110 | }
111 | },
112 | {
113 | "key_as_string": "2017-04-03T00:00:00.000Z",
114 | "key": 1491177600000,
115 | "doc_count": 299502,
116 | "some_score": {
117 | "count": 299502,
118 | "min": 0,
119 | "max": 7,
120 | "avg": 1.539488884882238,
121 | "sum": 461080,
122 | "sum_of_squares": 818386,
123 | "variance": 0.3624632388381306,
124 | "std_deviation": 0.6020491996823271,
125 | "std_deviation_bounds": {
126 | "upper": 2.743587284246892,
127 | "lower": 0.33539048551758377
128 | }
129 | }
130 | },
131 | {
132 | "key_as_string": "2017-04-10T00:00:00.000Z",
133 | "key": 1491782400000,
134 | "doc_count": 303826,
135 | "some_score": {
136 | "count": 303826,
137 | "min": 0,
138 | "max": 7,
139 | "avg": 1.5399274584795244,
140 | "sum": 467870,
141 | "sum_of_squares": 831860,
142 | "variance": 0.36657211693925107,
143 | "std_deviation": 0.6054519939179746,
144 | "std_deviation_bounds": {
145 | "upper": 2.7508314463154733,
146 | "lower": 0.3290234706435753
147 | }
148 | }
149 | },
150 | {
151 | "key_as_string": "2017-04-17T00:00:00.000Z",
152 | "key": 1492387200000,
153 | "doc_count": 305400,
154 | "some_score": {
155 | "count": 305400,
156 | "min": 0,
157 | "max": 3,
158 | "avg": 1.5349738048461035,
159 | "sum": 468781,
160 | "sum_of_squares": 829427,
161 | "variance": 0.35972640730333577,
162 | "std_deviation": 0.5997719627519578,
163 | "std_deviation_bounds": {
164 | "upper": 2.734517730350019,
165 | "lower": 0.33542987934218793
166 | }
167 | }
168 | },
169 | {
170 | "key_as_string": "2017-04-24T00:00:00.000Z",
171 | "key": 1492992000000,
172 | "doc_count": 325883,
173 | "some_score": {
174 | "count": 325883,
175 | "min": 0,
176 | "max": 3,
177 | "avg": 1.506402604615767,
178 | "sum": 490911,
179 | "sum_of_squares": 851439,
180 | "variance": 0.34346495817661304,
181 | "std_deviation": 0.5860588350810975,
182 | "std_deviation_bounds": {
183 | "upper": 2.678520274777962,
184 | "lower": 0.3342849344535721
185 | }
186 | }
187 | },
188 | {
189 | "key_as_string": "2017-05-01T00:00:00.000Z",
190 | "key": 1493596800000,
191 | "doc_count": 92953,
192 | "some_score": {
193 | "count": 92953,
194 | "min": 0,
195 | "max": 3,
196 | "avg": 1.5381429324497327,
197 | "sum": 142975,
198 | "sum_of_squares": 252321,
199 | "variance": 0.34861719614213066,
200 | "std_deviation": 0.5904381391323994,
201 | "std_deviation_bounds": {
202 | "upper": 2.7190192107145315,
203 | "lower": 0.35726665418493386
204 | }
205 | }
206 | }
207 | ]
208 | }
209 | }
210 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_histogram.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 64,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674,
21 | "num_customers": {
22 | "buckets": [
23 | {
24 | "key": 0,
25 | "doc_count": 96724
26 | },
27 | {
28 | "key": 2,
29 | "doc_count": 104950
30 | }
31 | ]
32 | }
33 | },
34 | {
35 | "key_as_string": "2017-03-06T00:00:00.000Z",
36 | "key": 1488758400000,
37 | "doc_count": 295596,
38 | "num_customers": {
39 | "buckets": [
40 | {
41 | "key": 0,
42 | "doc_count": 141532
43 | },
44 | {
45 | "key": 2,
46 | "doc_count": 154061
47 | },
48 | {
49 | "key": 6,
50 | "doc_count": 3
51 | }
52 | ]
53 | }
54 | },
55 | {
56 | "key_as_string": "2017-03-13T00:00:00.000Z",
57 | "key": 1489363200000,
58 | "doc_count": 277618,
59 | "num_customers": {
60 | "buckets": [
61 | {
62 | "key": 0,
63 | "doc_count": 137844
64 | },
65 | {
66 | "key": 2,
67 | "doc_count": 139770
68 | },
69 | {
70 | "key": 6,
71 | "doc_count": 4
72 | }
73 | ]
74 | }
75 | },
76 | {
77 | "key_as_string": "2017-03-20T00:00:00.000Z",
78 | "key": 1489968000000,
79 | "doc_count": 259233,
80 | "num_customers": {
81 | "buckets": [
82 | {
83 | "key": 0,
84 | "doc_count": 131999
85 | },
86 | {
87 | "key": 2,
88 | "doc_count": 127233
89 | },
90 | {
91 | "key": 6,
92 | "doc_count": 1
93 | }
94 | ]
95 | }
96 | },
97 | {
98 | "key_as_string": "2017-03-27T00:00:00.000Z",
99 | "key": 1490572800000,
100 | "doc_count": 265538,
101 | "num_customers": {
102 | "buckets": [
103 | {
104 | "key": 0,
105 | "doc_count": 135852
106 | },
107 | {
108 | "key": 2,
109 | "doc_count": 129683
110 | },
111 | {
112 | "key": 6,
113 | "doc_count": 3
114 | }
115 | ]
116 | }
117 | },
118 | {
119 | "key_as_string": "2017-04-03T00:00:00.000Z",
120 | "key": 1491177600000,
121 | "doc_count": 299502,
122 | "num_customers": {
123 | "buckets": [
124 | {
125 | "key": 0,
126 | "doc_count": 152149
127 | },
128 | {
129 | "key": 2,
130 | "doc_count": 147352
131 | },
132 | {
133 | "key": 6,
134 | "doc_count": 1
135 | }
136 | ]
137 | }
138 | },
139 | {
140 | "key_as_string": "2017-04-10T00:00:00.000Z",
141 | "key": 1491782400000,
142 | "doc_count": 303826,
143 | "num_customers": {
144 | "buckets": [
145 | {
146 | "key": 0,
147 | "doc_count": 152587
148 | },
149 | {
150 | "key": 2,
151 | "doc_count": 151237
152 | },
153 | {
154 | "key": 6,
155 | "doc_count": 2
156 | }
157 | ]
158 | }
159 | },
160 | {
161 | "key_as_string": "2017-04-17T00:00:00.000Z",
162 | "key": 1492387200000,
163 | "doc_count": 305400,
164 | "num_customers": {
165 | "buckets": [
166 | {
167 | "key": 0,
168 | "doc_count": 155831
169 | },
170 | {
171 | "key": 2,
172 | "doc_count": 149569
173 | }
174 | ]
175 | }
176 | },
177 | {
178 | "key_as_string": "2017-04-24T00:00:00.000Z",
179 | "key": 1492992000000,
180 | "doc_count": 325883,
181 | "num_customers": {
182 | "buckets": [
183 | {
184 | "key": 0,
185 | "doc_count": 174351
186 | },
187 | {
188 | "key": 2,
189 | "doc_count": 151532
190 | }
191 | ]
192 | }
193 | },
194 | {
195 | "key_as_string": "2017-05-01T00:00:00.000Z",
196 | "key": 1493596800000,
197 | "doc_count": 92953,
198 | "num_customers": {
199 | "buckets": [
200 | {
201 | "key": 0,
202 | "doc_count": 47062
203 | },
204 | {
205 | "key": 2,
206 | "doc_count": 45891
207 | }
208 | ]
209 | }
210 | }
211 | ]
212 | }
213 | }
214 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_percentiles.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 793,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674,
21 | "some_score": {
22 | "values": {
23 | "1.0": -34.088625462317765,
24 | "5.0": -22.21437155815218,
25 | "25.0": 0,
26 | "50.0": 0,
27 | "75.0": 0.9635160402586782,
28 | "95.0": 39.92845350581187,
29 | "99.0": 54.1784650053889
30 | }
31 | }
32 | },
33 | {
34 | "key_as_string": "2017-03-06T00:00:00.000Z",
35 | "key": 1488758400000,
36 | "doc_count": 295596,
37 | "some_score": {
38 | "values": {
39 | "1.0": -34.08282723410825,
40 | "5.0": -22.21305824228935,
41 | "25.0": 0,
42 | "50.0": 0,
43 | "75.0": 11.410704161577858,
44 | "95.0": 39.92881631128357,
45 | "99.0": 54.41136809871141
46 | }
47 | }
48 | },
49 | {
50 | "key_as_string": "2017-03-13T00:00:00.000Z",
51 | "key": 1489363200000,
52 | "doc_count": 277618,
53 | "some_score": {
54 | "values": {
55 | "1.0": -34.08776755675079,
56 | "5.0": -22.204749690684626,
57 | "25.0": 0,
58 | "50.0": 0,
59 | "75.0": 0,
60 | "95.0": 40.69975507593814,
61 | "99.0": 55.713896441756184
62 | }
63 | }
64 | },
65 | {
66 | "key_as_string": "2017-03-20T00:00:00.000Z",
67 | "key": 1489968000000,
68 | "doc_count": 259233,
69 | "some_score": {
70 | "values": {
71 | "1.0": -34.090477892822264,
72 | "5.0": -22.183271306999618,
73 | "25.0": 0,
74 | "50.0": 0,
75 | "75.0": 0.0000076293945,
76 | "95.0": 41.17598738972316,
77 | "99.0": 55.81825399052243
78 | }
79 | }
80 | },
81 | {
82 | "key_as_string": "2017-03-27T00:00:00.000Z",
83 | "key": 1490572800000,
84 | "doc_count": 265538,
85 | "some_score": {
86 | "values": {
87 | "1.0": -33.6658307712262,
88 | "5.0": -22.1560300289784,
89 | "25.0": 0,
90 | "50.0": 0,
91 | "75.0": 0,
92 | "95.0": 40.98001281894075,
93 | "99.0": 53.650719571905
94 | }
95 | }
96 | },
97 | {
98 | "key_as_string": "2017-04-03T00:00:00.000Z",
99 | "key": 1491177600000,
100 | "doc_count": 299502,
101 | "some_score": {
102 | "values": {
103 | "1.0": -25.785169981452103,
104 | "5.0": -22.198657666424893,
105 | "25.0": 0,
106 | "50.0": 0,
107 | "75.0": 0,
108 | "95.0": 40.17350207009979,
109 | "99.0": 53.226301237661175
110 | }
111 | }
112 | },
113 | {
114 | "key_as_string": "2017-04-10T00:00:00.000Z",
115 | "key": 1491782400000,
116 | "doc_count": 303826,
117 | "some_score": {
118 | "values": {
119 | "1.0": -23.592878827369006,
120 | "5.0": -22.19212706309159,
121 | "25.0": 0,
122 | "50.0": 0,
123 | "75.0": 0,
124 | "95.0": 40.54370418041331,
125 | "99.0": 52.52955001574485
126 | }
127 | }
128 | },
129 | {
130 | "key_as_string": "2017-04-17T00:00:00.000Z",
131 | "key": 1492387200000,
132 | "doc_count": 305400,
133 | "some_score": {
134 | "values": {
135 | "1.0": -23.491448460820184,
136 | "5.0": -22.20308940649364,
137 | "25.0": 0,
138 | "50.0": 0,
139 | "75.0": 0,
140 | "95.0": 42.20244370485052,
141 | "99.0": 54.15284094789408
142 | }
143 | }
144 | },
145 | {
146 | "key_as_string": "2017-04-24T00:00:00.000Z",
147 | "key": 1492992000000,
148 | "doc_count": 325883,
149 | "some_score": {
150 | "values": {
151 | "1.0": -23.50024845031057,
152 | "5.0": -22.20040238272332,
153 | "25.0": 0,
154 | "50.0": 0,
155 | "75.0": 0,
156 | "95.0": 41.52234592261954,
157 | "99.0": 56.313834199795735
158 | }
159 | }
160 | },
161 | {
162 | "key_as_string": "2017-05-01T00:00:00.000Z",
163 | "key": 1493596800000,
164 | "doc_count": 92953,
165 | "some_score": {
166 | "values": {
167 | "1.0": -24.467312120035082,
168 | "5.0": -22.212801839940852,
169 | "25.0": 0,
170 | "50.0": 0,
171 | "75.0": 0,
172 | "95.0": 41.001332251036345,
173 | "99.0": 55.490378534226195
174 | }
175 | }
176 | }
177 | ]
178 | }
179 | }
180 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 21,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627223,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 201674,
21 | "some_score": {
22 | "count": 201674,
23 | "min": 0,
24 | "max": 3,
25 | "avg": 1.572527941132719,
26 | "sum": 317138
27 | }
28 | },
29 | {
30 | "key_as_string": "2017-03-06T00:00:00.000Z",
31 | "key": 1488758400000,
32 | "doc_count": 295596,
33 | "some_score": {
34 | "count": 295596,
35 | "min": 0,
36 | "max": 7,
37 | "avg": 1.5650110285660157,
38 | "sum": 462611
39 | }
40 | },
41 | {
42 | "key_as_string": "2017-03-13T00:00:00.000Z",
43 | "key": 1489363200000,
44 | "doc_count": 277618,
45 | "some_score": {
46 | "count": 277618,
47 | "min": 0,
48 | "max": 7,
49 | "avg": 1.5557384607626306,
50 | "sum": 431901
51 | }
52 | },
53 | {
54 | "key_as_string": "2017-03-20T00:00:00.000Z",
55 | "key": 1489968000000,
56 | "doc_count": 259233,
57 | "some_score": {
58 | "count": 259233,
59 | "min": 0,
60 | "max": 7,
61 | "avg": 1.5482635312633808,
62 | "sum": 401361
63 | }
64 | },
65 | {
66 | "key_as_string": "2017-03-27T00:00:00.000Z",
67 | "key": 1490572800000,
68 | "doc_count": 265538,
69 | "some_score": {
70 | "count": 265538,
71 | "min": 0,
72 | "max": 7,
73 | "avg": 1.5432329836031,
74 | "sum": 409787
75 | }
76 | },
77 | {
78 | "key_as_string": "2017-04-03T00:00:00.000Z",
79 | "key": 1491177600000,
80 | "doc_count": 299502,
81 | "some_score": {
82 | "count": 299502,
83 | "min": 0,
84 | "max": 7,
85 | "avg": 1.539488884882238,
86 | "sum": 461080
87 | }
88 | },
89 | {
90 | "key_as_string": "2017-04-10T00:00:00.000Z",
91 | "key": 1491782400000,
92 | "doc_count": 303826,
93 | "some_score": {
94 | "count": 303826,
95 | "min": 0,
96 | "max": 7,
97 | "avg": 1.5399274584795244,
98 | "sum": 467870
99 | }
100 | },
101 | {
102 | "key_as_string": "2017-04-17T00:00:00.000Z",
103 | "key": 1492387200000,
104 | "doc_count": 305400,
105 | "some_score": {
106 | "count": 305400,
107 | "min": 0,
108 | "max": 3,
109 | "avg": 1.5349738048461035,
110 | "sum": 468781
111 | }
112 | },
113 | {
114 | "key_as_string": "2017-04-24T00:00:00.000Z",
115 | "key": 1492992000000,
116 | "doc_count": 325883,
117 | "some_score": {
118 | "count": 325883,
119 | "min": 0,
120 | "max": 3,
121 | "avg": 1.506402604615767,
122 | "sum": 490911
123 | }
124 | },
125 | {
126 | "key_as_string": "2017-05-01T00:00:00.000Z",
127 | "key": 1493596800000,
128 | "doc_count": 92953,
129 | "some_score": {
130 | "count": 92953,
131 | "min": 0,
132 | "max": 3,
133 | "avg": 1.5381429324497327,
134 | "sum": 142975
135 | }
136 | }
137 | ]
138 | }
139 | }
140 | }
--------------------------------------------------------------------------------
/test-data/aggs_date_histogram_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 44,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 103069,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "report_week": {
16 | "buckets": [
17 | {
18 | "key_as_string": "2017-02-27T00:00:00.000Z",
19 | "key": 1488153600000,
20 | "doc_count": 7838,
21 | "theater_number": {
22 | "doc_count_error_upper_bound": 0,
23 | "sum_other_doc_count": 0,
24 | "buckets": [
25 | {
26 | "key": 0,
27 | "doc_count": 5273
28 | },
29 | {
30 | "key": 1,
31 | "doc_count": 2055
32 | },
33 | {
34 | "key": 3,
35 | "doc_count": 510
36 | }
37 | ]
38 | }
39 | },
40 | {
41 | "key_as_string": "2017-03-06T00:00:00.000Z",
42 | "key": 1488758400000,
43 | "doc_count": 11608,
44 | "theater_number": {
45 | "doc_count_error_upper_bound": 0,
46 | "sum_other_doc_count": 0,
47 | "buckets": [
48 | {
49 | "key": 0,
50 | "doc_count": 8025
51 | },
52 | {
53 | "key": 1,
54 | "doc_count": 2931
55 | },
56 | {
57 | "key": 3,
58 | "doc_count": 652
59 | }
60 | ]
61 | }
62 | },
63 | {
64 | "key_as_string": "2017-03-13T00:00:00.000Z",
65 | "key": 1489363200000,
66 | "doc_count": 12043,
67 | "theater_number": {
68 | "doc_count_error_upper_bound": 0,
69 | "sum_other_doc_count": 0,
70 | "buckets": [
71 | {
72 | "key": 0,
73 | "doc_count": 8306
74 | },
75 | {
76 | "key": 1,
77 | "doc_count": 3009
78 | },
79 | {
80 | "key": 3,
81 | "doc_count": 728
82 | }
83 | ]
84 | }
85 | },
86 | {
87 | "key_as_string": "2017-03-20T00:00:00.000Z",
88 | "key": 1489968000000,
89 | "doc_count": 11918,
90 | "theater_number": {
91 | "doc_count_error_upper_bound": 0,
92 | "sum_other_doc_count": 0,
93 | "buckets": [
94 | {
95 | "key": 0,
96 | "doc_count": 8118
97 | },
98 | {
99 | "key": 1,
100 | "doc_count": 3098
101 | },
102 | {
103 | "key": 3,
104 | "doc_count": 700
105 | },
106 | {
107 | "key": 2,
108 | "doc_count": 2
109 | }
110 | ]
111 | }
112 | },
113 | {
114 | "key_as_string": "2017-03-27T00:00:00.000Z",
115 | "key": 1490572800000,
116 | "doc_count": 11580,
117 | "theater_number": {
118 | "doc_count_error_upper_bound": 0,
119 | "sum_other_doc_count": 0,
120 | "buckets": [
121 | {
122 | "key": 0,
123 | "doc_count": 8126
124 | },
125 | {
126 | "key": 1,
127 | "doc_count": 2834
128 | },
129 | {
130 | "key": 3,
131 | "doc_count": 619
132 | }
133 | ]
134 | }
135 | },
136 | {
137 | "key_as_string": "2017-04-03T00:00:00.000Z",
138 | "key": 1491177600000,
139 | "doc_count": 11404,
140 | "theater_number": {
141 | "doc_count_error_upper_bound": 0,
142 | "sum_other_doc_count": 0,
143 | "buckets": [
144 | {
145 | "key": 0,
146 | "doc_count": 7976
147 | },
148 | {
149 | "key": 1,
150 | "doc_count": 2753
151 | },
152 | {
153 | "key": 3,
154 | "doc_count": 675
155 | }
156 | ]
157 | }
158 | },
159 | {
160 | "key_as_string": "2017-04-10T00:00:00.000Z",
161 | "key": 1491782400000,
162 | "doc_count": 10583,
163 | "theater_number": {
164 | "doc_count_error_upper_bound": 0,
165 | "sum_other_doc_count": 0,
166 | "buckets": [
167 | {
168 | "key": 0,
169 | "doc_count": 7267
170 | },
171 | {
172 | "key": 1,
173 | "doc_count": 2706
174 | },
175 | {
176 | "key": 3,
177 | "doc_count": 610
178 | }
179 | ]
180 | }
181 | },
182 | {
183 | "key_as_string": "2017-04-17T00:00:00.000Z",
184 | "key": 1492387200000,
185 | "doc_count": 11358,
186 | "theater_number": {
187 | "doc_count_error_upper_bound": 0,
188 | "sum_other_doc_count": 0,
189 | "buckets": [
190 | {
191 | "key": 0,
192 | "doc_count": 7916
193 | },
194 | {
195 | "key": 1,
196 | "doc_count": 2756
197 | },
198 | {
199 | "key": 3,
200 | "doc_count": 686
201 | }
202 | ]
203 | }
204 | },
205 | {
206 | "key_as_string": "2017-04-24T00:00:00.000Z",
207 | "key": 1492992000000,
208 | "doc_count": 11303,
209 | "theater_number": {
210 | "doc_count_error_upper_bound": 0,
211 | "sum_other_doc_count": 0,
212 | "buckets": [
213 | {
214 | "key": 0,
215 | "doc_count": 7864
216 | },
217 | {
218 | "key": 1,
219 | "doc_count": 2812
220 | },
221 | {
222 | "key": 3,
223 | "doc_count": 614
224 | }
225 | ]
226 | }
227 | },
228 | {
229 | "key_as_string": "2017-05-01T00:00:00.000Z",
230 | "key": 1493596800000,
231 | "doc_count": 3434,
232 | "theater_number": {
233 | "doc_count_error_upper_bound": 0,
234 | "sum_other_doc_count": 0,
235 | "buckets": [
236 | {
237 | "key": 0,
238 | "doc_count": 2436
239 | },
240 | {
241 | "key": 1,
242 | "doc_count": 810
243 | },
244 | {
245 | "key": 3,
246 | "doc_count": 188
247 | }
248 | ]
249 | }
250 | }
251 | ]
252 | }
253 | }
254 | }
--------------------------------------------------------------------------------
/test-data/aggs_extended_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 194,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 92958,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "affinity_score": {
16 | "count": 59068,
17 | "min": -37.73445,
18 | "max": 70.62504577636719,
19 | "avg": 1.6455430412652865,
20 | "sum": 97198.93636145795,
21 | "sum_of_squares": 21853691.855166968,
22 | "variance": 367.26733293524387,
23 | "std_deviation": 19.164220123324714,
24 | "std_deviation_bounds": {
25 | "upper": 39.97398328791471,
26 | "lower": -36.682897205384144
27 | }
28 | }
29 | }
30 | }
--------------------------------------------------------------------------------
/test-data/aggs_histogram.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 36,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 2627232,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "affinity_score": {
16 | "buckets": [
17 | {
18 | "key": -50,
19 | "doc_count": 21470
20 | },
21 | {
22 | "key": -25,
23 | "doc_count": 331525
24 | },
25 | {
26 | "key": 0,
27 | "doc_count": 1096008
28 | },
29 | {
30 | "key": 25,
31 | "doc_count": 263747
32 | },
33 | {
34 | "key": 50,
35 | "doc_count": 27445
36 | }
37 | ]
38 | }
39 | }
40 | }
--------------------------------------------------------------------------------
/test-data/aggs_percentiles.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 76,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 92958,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "affinity_score": {
16 | "values": {
17 | "1.0": -24.4674287519375,
18 | "5.0": -22.212802690289852,
19 | "25.0": 0,
20 | "50.0": 0,
21 | "65.489756": 0,
22 | "75.0": 0,
23 | "95.0": 40.997696236818356,
24 | "99.0": 55.490141729049355
25 | }
26 | }
27 | }
28 | }
--------------------------------------------------------------------------------
/test-data/aggs_significant_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 343,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 103104,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "top_tweet_keywords": {
16 | "doc_count": 103104,
17 | "buckets": [
18 | {
19 | "key": "no",
20 | "doc_count": 72807,
21 | "score": 137.9584061112563,
22 | "bg_count": 384901
23 | },
24 | {
25 | "key": "cont",
26 | "doc_count": 66740,
27 | "score": 135.87842669458297,
28 | "bg_count": 328493
29 | },
30 | {
31 | "key": "sa",
32 | "doc_count": 64397,
33 | "score": 125.67996557134086,
34 | "bg_count": 330583
35 | },
36 | {
37 | "key": "norm",
38 | "doc_count": 65314,
39 | "score": 125.59086038715985,
40 | "bg_count": 340281
41 | },
42 | {
43 | "key": "nor",
44 | "doc_count": 65314,
45 | "score": 125.58381289257261,
46 | "bg_count": 340300
47 | }
48 | ]
49 | }
50 | }
51 | }
--------------------------------------------------------------------------------
/test-data/aggs_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 137,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 92958,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "affinity_score": {
16 | "count": 59068,
17 | "min": -37.73445,
18 | "max": 70.62504577636719,
19 | "avg": 1.6455430412652863,
20 | "sum": 97198.93636145793
21 | }
22 | }
23 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 17,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "magic_number": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 2988,
18 | "buckets": [
19 | {
20 | "key": 3,
21 | "doc_count": 24996
22 | },
23 | {
24 | "key": 9,
25 | "doc_count": 22329
26 | },
27 | {
28 | "key": 19,
29 | "doc_count": 21830
30 | },
31 | {
32 | "key": 8,
33 | "doc_count": 13440
34 | },
35 | {
36 | "key": 5,
37 | "doc_count": 11663
38 | },
39 | {
40 | "key": 2,
41 | "doc_count": 9896
42 | },
43 | {
44 | "key": 4,
45 | "doc_count": 6860
46 | },
47 | {
48 | "key": 1,
49 | "doc_count": 3676
50 | },
51 | {
52 | "key": 14,
53 | "doc_count": 1561
54 | },
55 | {
56 | "key": 6,
57 | "doc_count": 1229
58 | }
59 | ]
60 | }
61 | }
62 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_cardinality.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 5,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "customerNumber": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 51313,
18 | "buckets": [
19 | {
20 | "key": 3,
21 | "doc_count": 24996,
22 | "purchase_types": {
23 | "value": 4
24 | }
25 | },
26 | {
27 | "key": 9,
28 | "doc_count": 22329,
29 | "purchase_types": {
30 | "value": 4
31 | }
32 | },
33 | {
34 | "key": 19,
35 | "doc_count": 21830,
36 | "purchase_types": {
37 | "value": 2
38 | }
39 | }
40 | ]
41 | }
42 | }
43 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_date_histogram.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 85,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 3299133,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "customerNumber": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 1610025,
18 | "buckets": [
19 | {
20 | "key": 3,
21 | "doc_count": 635876,
22 | "purchase_date": {
23 | "buckets": [
24 | {
25 | "key_as_string": "2017-02-27T00:00:00.000Z",
26 | "key": 1488153600000,
27 | "doc_count": 40944
28 | },
29 | {
30 | "key_as_string": "2017-03-06T00:00:00.000Z",
31 | "key": 1488758400000,
32 | "doc_count": 67822
33 | },
34 | {
35 | "key_as_string": "2017-03-13T00:00:00.000Z",
36 | "key": 1489363200000,
37 | "doc_count": 69171
38 | },
39 | {
40 | "key_as_string": "2017-03-20T00:00:00.000Z",
41 | "key": 1489968000000,
42 | "doc_count": 62926
43 | },
44 | {
45 | "key_as_string": "2017-03-27T00:00:00.000Z",
46 | "key": 1490572800000,
47 | "doc_count": 70643
48 | },
49 | {
50 | "key_as_string": "2017-04-03T00:00:00.000Z",
51 | "key": 1491177600000,
52 | "doc_count": 76538
53 | },
54 | {
55 | "key_as_string": "2017-04-10T00:00:00.000Z",
56 | "key": 1491782400000,
57 | "doc_count": 80135
58 | },
59 | {
60 | "key_as_string": "2017-04-17T00:00:00.000Z",
61 | "key": 1492387200000,
62 | "doc_count": 72677
63 | },
64 | {
65 | "key_as_string": "2017-04-24T00:00:00.000Z",
66 | "key": 1492992000000,
67 | "doc_count": 70024
68 | },
69 | {
70 | "key_as_string": "2017-05-01T00:00:00.000Z",
71 | "key": 1493596800000,
72 | "doc_count": 24996
73 | }
74 | ]
75 | }
76 | },
77 | {
78 | "key": 5,
79 | "doc_count": 529046,
80 | "purchase_date": {
81 | "buckets": [
82 | {
83 | "key_as_string": "2017-02-27T00:00:00.000Z",
84 | "key": 1488153600000,
85 | "doc_count": 41429
86 | },
87 | {
88 | "key_as_string": "2017-03-06T00:00:00.000Z",
89 | "key": 1488758400000,
90 | "doc_count": 60928
91 | },
92 | {
93 | "key_as_string": "2017-03-13T00:00:00.000Z",
94 | "key": 1489363200000,
95 | "doc_count": 65796
96 | },
97 | {
98 | "key_as_string": "2017-03-20T00:00:00.000Z",
99 | "key": 1489968000000,
100 | "doc_count": 63584
101 | },
102 | {
103 | "key_as_string": "2017-03-27T00:00:00.000Z",
104 | "key": 1490572800000,
105 | "doc_count": 60740
106 | },
107 | {
108 | "key_as_string": "2017-04-03T00:00:00.000Z",
109 | "key": 1491177600000,
110 | "doc_count": 81163
111 | },
112 | {
113 | "key_as_string": "2017-04-10T00:00:00.000Z",
114 | "key": 1491782400000,
115 | "doc_count": 65028
116 | },
117 | {
118 | "key_as_string": "2017-04-17T00:00:00.000Z",
119 | "key": 1492387200000,
120 | "doc_count": 40006
121 | },
122 | {
123 | "key_as_string": "2017-04-24T00:00:00.000Z",
124 | "key": 1492992000000,
125 | "doc_count": 38709
126 | },
127 | {
128 | "key_as_string": "2017-05-01T00:00:00.000Z",
129 | "key": 1493596800000,
130 | "doc_count": 11663
131 | }
132 | ]
133 | }
134 | },
135 | {
136 | "key": 19,
137 | "doc_count": 524186,
138 | "purchase_date": {
139 | "buckets": [
140 | {
141 | "key_as_string": "2017-02-27T00:00:00.000Z",
142 | "key": 1488153600000,
143 | "doc_count": 49385
144 | },
145 | {
146 | "key_as_string": "2017-03-06T00:00:00.000Z",
147 | "key": 1488758400000,
148 | "doc_count": 42337
149 | },
150 | {
151 | "key_as_string": "2017-03-13T00:00:00.000Z",
152 | "key": 1489363200000,
153 | "doc_count": 48440
154 | },
155 | {
156 | "key_as_string": "2017-03-20T00:00:00.000Z",
157 | "key": 1489968000000,
158 | "doc_count": 29970
159 | },
160 | {
161 | "key_as_string": "2017-03-27T00:00:00.000Z",
162 | "key": 1490572800000,
163 | "doc_count": 37824
164 | },
165 | {
166 | "key_as_string": "2017-04-03T00:00:00.000Z",
167 | "key": 1491177600000,
168 | "doc_count": 94017
169 | },
170 | {
171 | "key_as_string": "2017-04-10T00:00:00.000Z",
172 | "key": 1491782400000,
173 | "doc_count": 79809
174 | },
175 | {
176 | "key_as_string": "2017-04-17T00:00:00.000Z",
177 | "key": 1492387200000,
178 | "doc_count": 47820
179 | },
180 | {
181 | "key_as_string": "2017-04-24T00:00:00.000Z",
182 | "key": 1492992000000,
183 | "doc_count": 72754
184 | },
185 | {
186 | "key_as_string": "2017-05-01T00:00:00.000Z",
187 | "key": 1493596800000,
188 | "doc_count": 21830
189 | }
190 | ]
191 | }
192 | }
193 | ]
194 | }
195 | }
196 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_extended_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 418,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "campaign_status": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 55,
18 | "buckets": [
19 | {
20 | "key": "market_to",
21 | "doc_count": 107736,
22 | "some_score": {
23 | "count": 74717,
24 | "min": -175.1807861328125,
25 | "max": 151.98361,
26 | "avg": 4.2240326056588025,
27 | "sum": 315607.04419700877,
28 | "sum_of_squares": 187165555.7172291,
29 | "variance": 2487.150464713055,
30 | "std_deviation": 49.87133911088667,
31 | "std_deviation_bounds": {
32 | "upper": 103.96671082743215,
33 | "lower": -95.51864561611454
34 | }
35 | }
36 | },
37 | {
38 | "key": "maybe",
39 | "doc_count": 10548,
40 | "some_score": {
41 | "count": 10456,
42 | "min": -90.16599,
43 | "max": 148.19164,
44 | "avg": 117.63368726205155,
45 | "sum": 1229977.834012011,
46 | "sum_of_squares": 169895700.71152127,
47 | "variance": 2410.9480533757473,
48 | "std_deviation": 49.10140581873137,
49 | "std_deviation_bounds": {
50 | "upper": 215.8364988995143,
51 | "lower": 19.430875624588808
52 | }
53 | }
54 | },
55 | {
56 | "key": "ignore",
57 | "doc_count": 2129,
58 | "some_score": {
59 | "count": 2127,
60 | "min": -90.16599,
61 | "max": 148.35457,
62 | "avg": 75.17809260768585,
63 | "sum": 159903.80297654783,
64 | "sum_of_squares": 24416683.030253433,
65 | "variance": 5827.654029977135,
66 | "std_deviation": 76.33907276078965,
67 | "std_deviation_bounds": {
68 | "upper": 227.85623812926514,
69 | "lower": -77.50005291389344
70 | }
71 | }
72 | }
73 | ]
74 | }
75 | }
76 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_histogram.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 295,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "campaign_status": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 55,
18 | "buckets": [
19 | {
20 | "key": "ignore",
21 | "doc_count": 107736,
22 | "affinity_score": {
23 | "buckets": [
24 | {
25 | "key": -50,
26 | "doc_count": 5262
27 | },
28 | {
29 | "key": 0,
30 | "doc_count": 66695
31 | },
32 | {
33 | "key": 50,
34 | "doc_count": 2760
35 | }
36 | ]
37 | }
38 | },
39 | {
40 | "key": "maybe",
41 | "doc_count": 10548,
42 | "affinity_score": {
43 | "buckets": [
44 | {
45 | "key": -50,
46 | "doc_count": 9099
47 | },
48 | {
49 | "key": 0,
50 | "doc_count": 1357
51 | }
52 | ]
53 | }
54 | },
55 | {
56 | "key": "market_to",
57 | "doc_count": 2129,
58 | "affinity_score": {
59 | "buckets": [
60 | {
61 | "key": -50,
62 | "doc_count": 1097
63 | },
64 | {
65 | "key": 0,
66 | "doc_count": 1030
67 | }
68 | ]
69 | }
70 | }
71 | ]
72 | }
73 | }
74 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_percentiles.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 142,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "campaign_status": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 55,
18 | "buckets": [
19 | {
20 | "key": "maybe",
21 | "doc_count": 107736,
22 | "some_score": {
23 | "values": {
24 | "1.0": -112.12122535844742,
25 | "5.0": -86.76868939171038,
26 | "25.0": 0,
27 | "50.0": 0,
28 | "60.58934": 0,
29 | "75.0": 0,
30 | "95.0": 117.86637213803532,
31 | "99.0": 129.32387561889348
32 | }
33 | }
34 | },
35 | {
36 | "key": "ignore",
37 | "doc_count": 10548,
38 | "some_score": {
39 | "values": {
40 | "1.0": 0,
41 | "5.0": 0,
42 | "25.0": 117.86835624469992,
43 | "50.0": 148.06826928571428,
44 | "60.58934": 148.0793833809623,
45 | "75.0": 148.09198967492816,
46 | "95.0": 148.10842188873627,
47 | "99.0": 148.1524185
48 | }
49 | }
50 | },
51 | {
52 | "key": "market_to",
53 | "doc_count": 2129,
54 | "some_score": {
55 | "values": {
56 | "1.0": -90.1644744,
57 | "5.0": 0,
58 | "25.0": 0,
59 | "50.0": 148.1146593939394,
60 | "60.58934": 148.15548840481475,
61 | "75.0": 148.1812391941392,
62 | "95.0": 148.31047099999998,
63 | "99.0": 148.3409206
64 | }
65 | }
66 | }
67 | ]
68 | }
69 | }
70 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_significant_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 236,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 3433,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "popularity_score": {
16 | "doc_count_error_upper_bound": 11,
17 | "sum_other_doc_count": 2625,
18 | "buckets": [
19 | {
20 | "key": "summaries",
21 | "doc_count": 388,
22 | "comment_term": {
23 | "doc_count": 388,
24 | "buckets": [
25 | {
26 | "key": "suggeste",
27 | "doc_count": 375,
28 | "score": 4473.743549520632,
29 | "bg_count": 22318
30 | },
31 | {
32 | "key": "suggested",
33 | "doc_count": 375,
34 | "score": 4473.743549520632,
35 | "bg_count": 22318
36 | },
37 | {
38 | "key": "strong",
39 | "doc_count": 376,
40 | "score": 4065.286271318061,
41 | "bg_count": 24691
42 | },
43 | {
44 | "key": "stron",
45 | "doc_count": 376,
46 | "score": 4065.286271318061,
47 | "bg_count": 24691
48 | },
49 | {
50 | "key": "stro",
51 | "doc_count": 376,
52 | "score": 4064.133797943521,
53 | "bg_count": 24698
54 | },
55 | {
56 | "key": "heavy",
57 | "doc_count": 376,
58 | "score": 3803.2106294847563,
59 | "bg_count": 26392
60 | },
61 | {
62 | "key": "action",
63 | "doc_count": 367,
64 | "score": 2914.3560174102226,
65 | "bg_count": 32810
66 | },
67 | {
68 | "key": "actio",
69 | "doc_count": 367,
70 | "score": 2914.3560174102226,
71 | "bg_count": 32810
72 | },
73 | {
74 | "key": "gorgeous",
75 | "doc_count": 4,
76 | "score": 2840.6390424062065,
77 | "bg_count": 4
78 | },
79 | {
80 | "key": "suggest",
81 | "doc_count": 371,
82 | "score": 2748.930000046098,
83 | "bg_count": 35546
84 | }
85 | ]
86 | }
87 | },
88 | {
89 | "key": "opinion",
90 | "doc_count": 230,
91 | "comment_term": {
92 | "doc_count": 230,
93 | "buckets": [
94 | {
95 | "key": "check",
96 | "doc_count": 3,
97 | "score": 1957.5013232514177,
98 | "bg_count": 9
99 | },
100 | {
101 | "key": "sealer",
102 | "doc_count": 6,
103 | "score": 1531.941678310183,
104 | "bg_count": 46
105 | },
106 | {
107 | "key": "scrape",
108 | "doc_count": 6,
109 | "score": 1531.941678310183,
110 | "bg_count": 46
111 | },
112 | {
113 | "key": "splines",
114 | "doc_count": 26,
115 | "score": 1341.9556298577054,
116 | "bg_count": 986
117 | },
118 | {
119 | "key": "doesn'",
120 | "doc_count": 6,
121 | "score": 1304.9834908632638,
122 | "bg_count": 54
123 | },
124 | {
125 | "key": "doesn't",
126 | "doc_count": 6,
127 | "score": 1304.9834908632638,
128 | "bg_count": 54
129 | },
130 | {
131 | "key": "love",
132 | "doc_count": 26,
133 | "score": 1292.1523001831285,
134 | "bg_count": 1024
135 | },
136 | {
137 | "key": "miles",
138 | "doc_count": 17,
139 | "score": 1240.5437448180944,
140 | "bg_count": 456
141 | },
142 | {
143 | "key": "mile",
144 | "doc_count": 17,
145 | "score": 1240.5437448180944,
146 | "bg_count": 456
147 | },
148 | {
149 | "key": "doesn",
150 | "doc_count": 6,
151 | "score": 1194.3894588446378,
152 | "bg_count": 59
153 | }
154 | ]
155 | }
156 | },
157 | {
158 | "key": "reviews",
159 | "doc_count": 190,
160 | "comment_term": {
161 | "doc_count": 190,
162 | "buckets": [
163 | {
164 | "key": "hey",
165 | "doc_count": 13,
166 | "score": 16157.088567867037,
167 | "bg_count": 30
168 | },
169 | {
170 | "key": "whoa",
171 | "doc_count": 6,
172 | "score": 14750.373787099328,
173 | "bg_count": 7
174 | },
175 | {
176 | "key": "only",
177 | "doc_count": 14,
178 | "score": 10410.191831332715,
179 | "bg_count": 54
180 | },
181 | {
182 | "key": "no",
183 | "doc_count": 14,
184 | "score": 10410.191831332715,
185 | "bg_count": 54
186 | },
187 | {
188 | "key": "not",
189 | "doc_count": 5,
190 | "score": 10243.310743965174,
191 | "bg_count": 7
192 | },
193 | {
194 | "key": "first",
195 | "doc_count": 3,
196 | "score": 8604.387340720223,
197 | "bg_count": 3
198 | },
199 | {
200 | "key": "fly",
201 | "doc_count": 4,
202 | "score": 2415.2500014579387,
203 | "bg_count": 19
204 | },
205 | {
206 | "key": "sizes",
207 | "doc_count": 4,
208 | "score": 2415.2500014579387,
209 | "bg_count": 19
210 | },
211 | {
212 | "key": "unacc",
213 | "doc_count": 9,
214 | "score": 2370.5534939793097,
215 | "bg_count": 98
216 | },
217 | {
218 | "key": "unacce",
219 | "doc_count": 9,
220 | "score": 2370.5534939793097,
221 | "bg_count": 98
222 | }
223 | ]
224 | }
225 | }
226 | ]
227 | }
228 | }
229 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_stats.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 4,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 64,
6 | "successful": 64,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "customerNumber": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 51313,
18 | "buckets": [
19 | {
20 | "key": 3,
21 | "doc_count": 24996,
22 | "some_score": {
23 | "count": 24996,
24 | "min": 0,
25 | "max": 2,
26 | "avg": 0.06052968474955993,
27 | "sum": 1513
28 | }
29 | },
30 | {
31 | "key": 9,
32 | "doc_count": 22329,
33 | "some_score": {
34 | "count": 22329,
35 | "min": 0,
36 | "max": 1,
37 | "avg": 0.009807873169420932,
38 | "sum": 219
39 | }
40 | },
41 | {
42 | "key": 19,
43 | "doc_count": 21830,
44 | "some_score": {
45 | "count": 21830,
46 | "min": 0,
47 | "max": 0,
48 | "avg": 0,
49 | "sum": 0
50 | }
51 | }
52 | ]
53 | }
54 | }
55 | }
--------------------------------------------------------------------------------
/test-data/aggs_terms_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 9,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 5,
6 | "successful": 5,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 120468,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "magic_number": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 51313,
18 | "buckets": [
19 | {
20 | "key": 3,
21 | "doc_count": 24996,
22 | "customerType": {
23 | "doc_count_error_upper_bound": 0,
24 | "sum_other_doc_count": 0,
25 | "buckets": [
26 | {
27 | "key": "type_a",
28 | "doc_count": 24996
29 | }
30 | ]
31 | }
32 | },
33 | {
34 | "key": 9,
35 | "doc_count": 22329,
36 | "customerType": {
37 | "doc_count_error_upper_bound": 0,
38 | "sum_other_doc_count": 0,
39 | "buckets": [
40 | {
41 | "key": "type_a",
42 | "doc_count": 22329
43 | }
44 | ]
45 | }
46 | },
47 | {
48 | "key": 19,
49 | "doc_count": 21830,
50 | "customerType": {
51 | "doc_count_error_upper_bound": 0,
52 | "sum_other_doc_count": 0,
53 | "buckets": [
54 | {
55 | "key": "type_a",
56 | "doc_count": 21830
57 | }
58 | ]
59 | }
60 | }
61 | ]
62 | }
63 | }
64 | }
--------------------------------------------------------------------------------
/test-data/empty_terms.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 15,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 5,
6 | "successful": 5,
7 | "skipped": 0,
8 | "failed": 0
9 | },
10 | "hits": {
11 | "total": 48,
12 | "max_score": 0.0,
13 | "hits": []
14 | },
15 | "aggregations": {
16 | "blegh": {
17 | "doc_count_error_upper_bound": 0,
18 | "sum_other_doc_count": 0,
19 | "buckets": []
20 | }
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/test-data/es5_shakespeare_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "mappings": {
3 | "_default_": {
4 | "properties": {
5 | "speaker": {
6 | "type": "string",
7 | "fielddata": true
8 | },
9 | "play_name": {
10 | "type": "string",
11 | "fielddata": true
12 | },
13 | "line_id": {
14 | "type": "integer"
15 | },
16 | "line_number": {
17 | "type": "string",
18 | "fielddata": true
19 | },
20 | "speech_number": {
21 | "type": "integer"
22 | },
23 | "text_entry": {
24 | "type": "string",
25 | "fielddata": true
26 | }
27 | }
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/test-data/es6_shakespeare_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "mappings": {
3 | "_default_": {
4 | "properties": {
5 | "speaker": {
6 | "type": "text",
7 | "fielddata": true
8 | },
9 | "play_name": {
10 | "type": "text",
11 | "fielddata": true
12 | },
13 | "line_id": {
14 | "type": "integer"
15 | },
16 | "line_number": {
17 | "type": "text",
18 | "fielddata": true
19 | },
20 | "speech_number": {
21 | "type": "integer"
22 | },
23 | "text_entry": {
24 | "type": "text",
25 | "fielddata": true
26 | }
27 | }
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/test-data/es7_shakespeare_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "mappings": {
3 | "properties": {
4 | "speaker": {
5 | "type": "keyword"
6 | },
7 | "play_name": {
8 | "type": "keyword"
9 | },
10 | "line_id": {
11 | "type": "integer"
12 | },
13 | "line_number": {
14 | "type": "keyword"
15 | },
16 | "speech_number": {
17 | "type": "keyword"
18 | },
19 | "text_entry": {
20 | "type": "text"
21 | }
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/test-data/es_hits.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 54,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 16,
6 | "successful": 16,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 46872,
11 | "max_score": 0.882234,
12 | "hits": [
13 | {"_index": "staging_redsawx", "_type": "ballplayer", "_id": "abc123", "_score": 0.882234,
14 | "_source": {"name": "David Ortiz", "stats" : {"yrs_played": 20, "final_season": {"avg": 0.315, "HR": 38, "R": 79},
15 | "full_career": {"avg": 0.286, "HR": 541, "R": 1419}}}},
16 | {"_index": "staging_redsawx", "_type": "ballplayer", "_id": "def567", "_score": 0.882234,
17 | "_source": {"name": "Kevin Youkilis", "stats" : {"yrs_played": 10, "final_season": {"avg": 0.219, "HR": 2, "R": 12},
18 | "full_career": {"avg": 0.281, "HR": 150, "R": 653}}}},
19 | {"_index": "staging_redsawx", "_type": "ballplayer", "_id": "abc567", "_score": 0.882234,
20 | "_source": {"name": "Trot Nixon", "stats" : {"yrs_played": 12, "final_season": {"avg": 0.171, "HR": 1, "R": 2},
21 | "full_career": {"avg": 0.274, "HR": 137, "R": 579}}}},
22 | {"_index": "staging_redsawx", "_type": "ballplayer", "_id": "def123", "_score": 0.882234,
23 | "_source": {"name": "Manny Ramirez", "stats" : {"yrs_played": 19, "final_season": {"avg": 0.059, "HR": 0, "R": 0},
24 | "full_career": {"avg": 0.312, "HR": 555, "R": 1544}}}},
25 | {"_index": "staging_redsawx", "_type": "ballplayer", "_id": "ghi890", "_score": 0.882234,
26 | "_source": {"name": "Jason Varitek", "stats" : {"yrs_played": 15, "final_season": {"avg": 0.221, "HR": 11, "R": 32},
27 | "full_career": {"avg": 0.256, "HR": "193", "R": 664}}}}
28 | ]
29 | }
30 | }
--------------------------------------------------------------------------------
/test-data/legacy_shakespeare_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "mappings": {
3 | "_default_": {
4 | "properties": {
5 | "speaker": {
6 | "type": "string"
7 | },
8 | "play_name": {
9 | "type": "string"
10 | },
11 | "line_id": {
12 | "type": "integer"
13 | },
14 | "line_number": {
15 | "type": "string"
16 | },
17 | "speech_number": {
18 | "type": "integer"
19 | },
20 | "text_entry": {
21 | "type": "string"
22 | }
23 | }
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/test-data/one_index_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "basketball": {
3 | "mappings": {
4 | "players": {
5 | "properties": {
6 | "team": {
7 | "type": "keyword"
8 | },
9 | "name": {
10 | "properties": {
11 | "first": {
12 | "type": "text"
13 | },
14 | "last": {
15 | "type": "text"
16 | }
17 | }
18 | },
19 | "age": {
20 | "type": "integer"
21 | },
22 | "position": {
23 | "type": "keyword"
24 | }
25 | }
26 | }
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/test-data/one_var_agg.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 5,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 16,
6 | "successful": 16,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 110207,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "some_variable": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 0,
18 | "buckets": [
19 | {
20 | "key": "level1",
21 | "doc_count": 62159
22 | },
23 | {
24 | "key": "level2",
25 | "doc_count": 21576
26 | },
27 | {
28 | "key": "level3",
29 | "doc_count": 10575
30 | }
31 | ]
32 | }
33 | }
34 | }
--------------------------------------------------------------------------------
/test-data/three_var_agg.json:
--------------------------------------------------------------------------------
1 | {
2 | "took": 494,
3 | "timed_out": false,
4 | "_shards": {
5 | "total": 16,
6 | "successful": 16,
7 | "failed": 0
8 | },
9 | "hits": {
10 | "total": 11335918,
11 | "max_score": 0,
12 | "hits": []
13 | },
14 | "aggregations": {
15 | "a_grouping_var": {
16 | "doc_count_error_upper_bound": 0,
17 | "sum_other_doc_count": 526088,
18 | "buckets": [
19 | {
20 | "key": 0,
21 | "doc_count": 3403964,
22 | "another_one": {
23 | "doc_count_error_upper_bound": 23422,
24 | "sum_other_doc_count": 2941783,
25 | "buckets": [
26 | {
27 | "key": 2915,
28 | "doc_count": 188629,
29 | "yet_another_one": {
30 | "doc_count_error_upper_bound": 0,
31 | "sum_other_doc_count": 0,
32 | "buckets": [
33 | {
34 | "key": "lupe_fiasco",
35 | "doc_count": 168098
36 | },
37 | {
38 | "key": "tech_n9ne",
39 | "doc_count": 20531
40 | }
41 | ]
42 | }
43 | },
44 | {
45 | "key": 3952,
46 | "doc_count": 146357,
47 | "yet_another_one": {
48 | "doc_count_error_upper_bound": 0,
49 | "sum_other_doc_count": 0,
50 | "buckets": [
51 | {
52 | "key": "lupe_fiasco",
53 | "doc_count": 145484
54 | },
55 | {
56 | "key": "tech_n9ne",
57 | "doc_count": 873
58 | }
59 | ]
60 | }
61 | },
62 | {
63 | "key": 2632,
64 | "doc_count": 127195,
65 | "yet_another_one": {
66 | "doc_count_error_upper_bound": 0,
67 | "sum_other_doc_count": 0,
68 | "buckets": [
69 | {
70 | "key": "lupe_fiasco",
71 | "doc_count": 121318
72 | },
73 | {
74 | "key": "tech_n9ne",
75 | "doc_count": 5877
76 | }
77 | ]
78 | }
79 | }
80 | ]
81 | }
82 | },
83 | {
84 | "key": 2,
85 | "doc_count": 3360049,
86 | "another_one": {
87 | "doc_count_error_upper_bound": 13449,
88 | "sum_other_doc_count": 2105828,
89 | "buckets": [
90 | {
91 | "key": 2349,
92 | "doc_count": 542582,
93 | "yet_another_one": {
94 | "doc_count_error_upper_bound": 0,
95 | "sum_other_doc_count": 0,
96 | "buckets": [
97 | {
98 | "key": "childish_gambino",
99 | "doc_count": 485820
100 | },
101 | {
102 | "key": "tech_n9ne",
103 | "doc_count": 56762
104 | }
105 | ]
106 | }
107 | },
108 | {
109 | "key": 2201,
110 | "doc_count": 505387,
111 | "yet_another_one": {
112 | "doc_count_error_upper_bound": 0,
113 | "sum_other_doc_count": 0,
114 | "buckets": [
115 | {
116 | "key": "childish_gambino",
117 | "doc_count": 470503
118 | },
119 | {
120 | "key": "tech_n9ne",
121 | "doc_count": 34884
122 | }
123 | ]
124 | }
125 | },
126 | {
127 | "key": 2247,
128 | "doc_count": 206252,
129 | "yet_another_one": {
130 | "doc_count_error_upper_bound": 0,
131 | "sum_other_doc_count": 0,
132 | "buckets": [
133 | {
134 | "key": "childish_gambino",
135 | "doc_count": 188375
136 | },
137 | {
138 | "key": "tech_n9ne",
139 | "doc_count": 17877
140 | }
141 | ]
142 | }
143 | }
144 | ]
145 | }
146 | },
147 | {
148 | "key": 1,
149 | "doc_count": 2600800,
150 | "another_one": {
151 | "doc_count_error_upper_bound": 17346,
152 | "sum_other_doc_count": 1692470,
153 | "buckets": [
154 | {
155 | "key": 2126,
156 | "doc_count": 433735,
157 | "yet_another_one": {
158 | "doc_count_error_upper_bound": 0,
159 | "sum_other_doc_count": 0,
160 | "buckets": [
161 | {
162 | "key": "lupe_fiasco",
163 | "doc_count": 405476
164 | },
165 | {
166 | "key": "tech_n9ne",
167 | "doc_count": 28259
168 | }
169 | ]
170 | }
171 | },
172 | {
173 | "key": 777,
174 | "doc_count": 277387,
175 | "yet_another_one": {
176 | "doc_count_error_upper_bound": 0,
177 | "sum_other_doc_count": 0,
178 | "buckets": [
179 | {
180 | "key": "lupe_fiasco",
181 | "doc_count": 241894
182 | },
183 | {
184 | "key": "tech_n9ne",
185 | "doc_count": 35493
186 | }
187 | ]
188 | }
189 | },
190 | {
191 | "key": 663,
192 | "doc_count": 197208,
193 | "yet_another_one": {
194 | "doc_count_error_upper_bound": 0,
195 | "sum_other_doc_count": 0,
196 | "buckets": [
197 | {
198 | "key": "lupe_fiasco",
199 | "doc_count": 193540
200 | },
201 | {
202 | "key": "tech_n9ne",
203 | "doc_count": 3668
204 | }
205 | ]
206 | }
207 | }
208 | ]
209 | }
210 | }
211 | ]
212 | }
213 | }
214 | }
--------------------------------------------------------------------------------
/test-data/two_index_mapping.json:
--------------------------------------------------------------------------------
1 | {
2 | "company": {
3 | "mappings": {
4 | "building": {
5 | "properties": {
6 | "id": {
7 | "type": "long"
8 | },
9 | "address": {
10 | "type": "text",
11 | "fields": {
12 | "keyword": {
13 | "type": "keyword",
14 | "ignore_above": 256
15 | }
16 | }
17 | }
18 | }
19 | }
20 | }
21 | },
22 | "hotel": {
23 | "mappings": {
24 | "bed_room": {
25 | "properties": {
26 | "num_beds": {
27 | "type": "integer"
28 | },
29 | "description": {
30 | "type": "text"
31 | }
32 | }
33 | },
34 | "conference_room": {
35 | "properties": {
36 | "num_people": {
37 | "type": "integer"
38 | },
39 | "purpose": {
40 | "type": "text",
41 | "fields": {
42 | "keyword": {
43 | "type": "keyword",
44 | "ignore_above": 256
45 | }
46 | }
47 | }
48 | }
49 | }
50 | }
51 | }
52 | }
--------------------------------------------------------------------------------