├── .circleci └── config.yml ├── .codeclimate.yml ├── .codeinventory.yml ├── .csslintrc ├── .dockerignore ├── .env ├── .eslintignore ├── .eslintrc ├── .github └── pull_request_template.md ├── .gitignore ├── .rubocop.yml ├── .rubocop_todo.yml ├── .ruby-version ├── .simplecov ├── Capfile ├── Dockerfile ├── Dockerfile.dev ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── README.md ├── Rakefile ├── app ├── classes │ ├── document_query.rb │ ├── document_search.rb │ ├── document_search_results.rb │ └── query_parser.rb ├── controllers │ └── api │ │ ├── base.rb │ │ └── v1 │ │ ├── base.rb │ │ ├── collections.rb │ │ └── documents.rb ├── models │ ├── collection.rb │ └── document.rb ├── repositories │ ├── collection_repository.rb │ ├── concerns │ │ └── repository.rb │ └── document_repository.rb └── templates │ ├── collections.rb │ └── documents.rb ├── appspec.yml ├── bin ├── bundle ├── rails ├── rake ├── secure_docker ├── setup └── update ├── buildspec_i14y.yml ├── cicd-scripts └── fetch_env_vars.sh ├── config.ru ├── config ├── access_control.yml ├── application.rb ├── boot.rb ├── deploy.rb ├── deploy │ ├── development.rb │ ├── production.rb │ └── staging.rb ├── elasticsearch.yml ├── environment.rb ├── environments │ ├── development.rb │ ├── production.rb │ └── test.rb ├── initializers │ ├── access_control.rb │ ├── application_controller_renderer.rb │ ├── backtrace_silencers.rb │ ├── cookies_serializer.rb │ ├── elasticsearch.rb │ ├── filter_parameter_logging.rb │ ├── inflections.rb │ ├── locales.rb │ ├── mime_types.rb │ ├── session_store.rb │ └── wrap_parameters.rb ├── locales │ ├── analysis │ │ ├── en_protwords.txt │ │ ├── en_synonyms.txt │ │ ├── es_protwords.txt │ │ └── es_synonyms.txt │ └── en.yml ├── newrelic.yml ├── puma.rb └── routes.rb ├── lib ├── ext │ └── string.rb ├── namespaced_index.rb ├── read_only_access_control.rb ├── serde.rb ├── tasks │ └── i14y.rake ├── templatable.rb └── validations │ └── max_bytes.rb ├── public ├── 404.html ├── 422.html ├── 500.html ├── favicon.ico └── robots.txt ├── spec ├── classes │ ├── document_query_spec.rb │ ├── document_search_results_spec.rb │ ├── document_search_spec.rb │ └── query_parser_spec.rb ├── config │ └── initializers │ │ └── filter_parameter_logging_spec.rb ├── lib │ ├── serde_spec.rb │ └── validations │ │ └── max_bytes_spec.rb ├── models │ ├── collection_spec.rb │ └── document_spec.rb ├── rails_helper.rb ├── repositories │ ├── collection_repository_spec.rb │ └── document_repository_spec.rb ├── requests │ └── api │ │ └── v1 │ │ ├── collections_spec.rb │ │ └── documents_spec.rb ├── spec_helper.rb ├── support │ ├── document_crud.rb │ ├── shared_examples │ │ ├── read_only_mode.rb │ │ └── repository_behavior.rb │ └── shoulda.rb └── test_services.rb └── vendor └── stream2es /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | ruby: circleci/ruby@1.4.0 5 | # aws-ecr: circleci/aws-ecr@8.2.1 6 | # aws-eks: circleci/aws-eks@1.1.0 7 | # aws-cli: circleci/aws-cli@3.1.4 8 | # kubernetes: circleci/kubernetes@1.3.1 9 | docker: circleci/docker@1.5.0 10 | 11 | jobs: 12 | build_and_test: 13 | parameters: 14 | ruby_version: 15 | type: string 16 | elasticsearch_version: 17 | type: string 18 | 19 | docker: 20 | - image: cimg/ruby:<< parameters.ruby_version >> 21 | 22 | - image: docker.elastic.co/elasticsearch/elasticsearch:<< parameters.elasticsearch_version >> 23 | environment: 24 | bootstrap.memory_lock: true 25 | discovery.type: single-node 26 | xpack.security.enabled: false 27 | ES_JAVA_OPTS: '-Xms512m -Xmx512m' 28 | 29 | working_directory: ~/app 30 | 31 | steps: 32 | - setup_remote_docker 33 | - checkout 34 | # Install gems with Bundler 35 | - ruby/install-deps: 36 | key: gems-ruby-<< parameters.ruby_version >>-v{{ .Environment.CACHE_VERSION }} 37 | - run: 38 | name: Setup Code Climate test-reporter 39 | command: | 40 | curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter 41 | chmod +x ./cc-test-reporter 42 | - run: 43 | name: Wait for Elasticsearch 44 | command: dockerize -wait tcp://localhost:9200 -timeout 1m 45 | - run: 46 | name: Install Elasticsearch Plugins 47 | command: | 48 | until curl -sS localhost:9200; do sleep 1; done 49 | elasticsearch_container_id=$(docker ps -qf "ancestor=docker.elastic.co/elasticsearch/elasticsearch:<< parameters.elasticsearch_version >>") 50 | docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-kuromoji 51 | docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-icu 52 | docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-smartcn 53 | # Restart Elasticsearch to apply the plugins 54 | docker restart $elasticsearch_container_id 55 | # Wait for Elasticsearch to be ready after restart 56 | dockerize -wait tcp://localhost:9200 -timeout 1m 57 | - run: 58 | name: RSpec 59 | environment: 60 | CC_TEST_REPORTER_ID: 09d5a7d453407f367679c8f86c4c582ec3583bed3c7a06286d61d16e89290bd1 61 | command: | 62 | bundle exec rake i14y:setup 63 | ./cc-test-reporter before-build 64 | bundle exec rspec spec 65 | ./cc-test-reporter after-build --exit-code $? 66 | - store_artifacts: 67 | path: coverage 68 | destination: ~/coverage 69 | 70 | # update-staging: 71 | # docker: 72 | # - image: cimg/aws:2023.01 73 | # steps: 74 | # - checkout 75 | # - setup_remote_docker 76 | # - aws-cli/setup: 77 | # aws-region: STAGING_AWS_REGION 78 | # aws-access-key-id: CIRCLE_CI_USER_ACCESS_KEY_ID 79 | # aws-secret-access-key: CIRCLE_CI_USER_SECRET_ACCESS_KEY 80 | # - run: docker build -t i14y . 81 | # - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:latest 82 | # - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:${CIRCLE_BUILD_NUM} 83 | # - run: aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 213305845712.dkr.ecr.us-east-2.amazonaws.com 84 | # - run: docker push 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:latest 85 | # - run: docker push 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:${CIRCLE_BUILD_NUM} 86 | # - run: 87 | # name: update i14y k8s deployment 88 | # command: | 89 | # aws ssm send-command \ 90 | # --document-name "searchgov-deployment-ssm-document" \ 91 | # --targets "Key=tag:Name,Values=jumphost-staging-search-instance" \ 92 | # --parameters '{"DeploymentName":["search-staging-i14y-deploy"], "Namespace":["search"]}' \ 93 | # --comment "restart i14y staging deployment" 94 | 95 | # update-prod: 96 | # docker: 97 | # - image: cimg/aws:2023.01 98 | # steps: 99 | # - checkout 100 | # - setup_remote_docker 101 | # - aws-cli/setup: 102 | # aws-region: PROD_AWS_REGION 103 | # aws-access-key-id: CIRCLE_CI_USER_ACCESS_KEY_ID 104 | # aws-secret-access-key: CIRCLE_CI_USER_SECRET_ACCESS_KEY 105 | # - run: docker build -t i14y . 106 | # - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:latest 107 | # - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:${CIRCLE_BUILD_NUM} 108 | # - run: aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 213305845712.dkr.ecr.us-east-1.amazonaws.com 109 | # - run: docker push 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:latest 110 | # - run: docker push 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:${CIRCLE_BUILD_NUM} 111 | # - run: 112 | # name: update i14y k8s deployment 113 | # command: | 114 | # aws ssm send-command \ 115 | # --document-name "searchgov-deployment-ssm-document" \ 116 | # --targets "Key=tag:Name,Values=jumphost-prod-search-instance" \ 117 | # --parameters '{"DeploymentName":["search-prod-i14y-deploy"], "Namespace":["search"]}' \ 118 | # --comment "restart i14y prod deployment" 119 | 120 | workflows: 121 | build_and_test: 122 | jobs: 123 | - build_and_test: 124 | name: "Ruby << matrix.ruby_version >>, ES << matrix.elasticsearch_version >>" 125 | matrix: 126 | parameters: 127 | ruby_version: 128 | - 3.3.7 129 | elasticsearch_version: 130 | - 7.17.7 131 | # not yet compatible with Elasticsearch 8 132 | 133 | # build_image_and_deploy: 134 | # jobs: 135 | # - update-staging: 136 | # context: 137 | # - aws-client-keys 138 | # filters: 139 | # branches: 140 | # only: 141 | # - main 142 | # - update-prod: 143 | # context: 144 | # - aws-client-keys 145 | # filters: 146 | # branches: 147 | # only: 148 | # - production 149 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | plugins: 3 | brakeman: 4 | enabled: true 5 | bundler-audit: 6 | enabled: true 7 | csslint: 8 | enabled: true 9 | duplication: 10 | enabled: true 11 | config: 12 | languages: 13 | - ruby 14 | exclude_patterns: 15 | - "spec/" 16 | eslint: 17 | enabled: true 18 | fixme: 19 | enabled: true 20 | rubocop: 21 | enabled: true 22 | channel: rubocop-1-65-0 23 | exclude_patterns: 24 | - bin/ 25 | - vendor/ 26 | - coverage/ -------------------------------------------------------------------------------- /.codeinventory.yml: -------------------------------------------------------------------------------- 1 | name: i14y 2 | description: 'An API for indexing agency web content in real time.' 3 | license: 'https://creativecommons.org/publicdomain/zero/1.0' 4 | openSourceProject: 1 5 | governmentWideReuseProject: 1 6 | tags: 7 | - GSA 8 | - DigitalGovSearch 9 | - websites 10 | - search 11 | - indexing 12 | contact: 13 | email: search@gsa.gov 14 | -------------------------------------------------------------------------------- /.csslintrc: -------------------------------------------------------------------------------- 1 | --exclude-exts=.min.css 2 | --ignore=adjoining-classes,box-model,ids,order-alphabetical,unqualified-attributes 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # Logs 6 | log/* 7 | 8 | # Temporary files 9 | tmp/* 10 | 11 | # Editor temp files 12 | *.swp 13 | *.swo 14 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | I14Y_ADMIN_USER=dev 2 | I14Y_ADMIN_PASSWORD=devpwd -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | **/*{.,-}min.js 2 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | ecmaFeatures: 2 | modules: true 3 | jsx: true 4 | 5 | env: 6 | amd: true 7 | browser: true 8 | es6: true 9 | jquery: true 10 | node: true 11 | 12 | # http://eslint.org/docs/rules/ 13 | rules: 14 | # Possible Errors 15 | comma-dangle: [2, never] 16 | no-cond-assign: 2 17 | no-console: 0 18 | no-constant-condition: 2 19 | no-control-regex: 2 20 | no-debugger: 2 21 | no-dupe-args: 2 22 | no-dupe-keys: 2 23 | no-duplicate-case: 2 24 | no-empty: 2 25 | no-empty-character-class: 2 26 | no-ex-assign: 2 27 | no-extra-boolean-cast: 2 28 | no-extra-parens: 0 29 | no-extra-semi: 2 30 | no-func-assign: 2 31 | no-inner-declarations: [2, functions] 32 | no-invalid-regexp: 2 33 | no-irregular-whitespace: 2 34 | no-negated-in-lhs: 2 35 | no-obj-calls: 2 36 | no-regex-spaces: 2 37 | no-sparse-arrays: 2 38 | no-unexpected-multiline: 2 39 | no-unreachable: 2 40 | use-isnan: 2 41 | valid-jsdoc: 0 42 | valid-typeof: 2 43 | 44 | # Best Practices 45 | accessor-pairs: 2 46 | block-scoped-var: 0 47 | complexity: [2, 6] 48 | consistent-return: 0 49 | curly: 0 50 | default-case: 0 51 | dot-location: 0 52 | dot-notation: 0 53 | eqeqeq: 2 54 | guard-for-in: 2 55 | no-alert: 2 56 | no-caller: 2 57 | no-case-declarations: 2 58 | no-div-regex: 2 59 | no-else-return: 0 60 | no-empty-label: 2 61 | no-empty-pattern: 2 62 | no-eq-null: 2 63 | no-eval: 2 64 | no-extend-native: 2 65 | no-extra-bind: 2 66 | no-fallthrough: 2 67 | no-floating-decimal: 0 68 | no-implicit-coercion: 0 69 | no-implied-eval: 2 70 | no-invalid-this: 0 71 | no-iterator: 2 72 | no-labels: 0 73 | no-lone-blocks: 2 74 | no-loop-func: 2 75 | no-magic-number: 0 76 | no-multi-spaces: 0 77 | no-multi-str: 0 78 | no-native-reassign: 2 79 | no-new-func: 2 80 | no-new-wrappers: 2 81 | no-new: 2 82 | no-octal-escape: 2 83 | no-octal: 2 84 | no-proto: 2 85 | no-redeclare: 2 86 | no-return-assign: 2 87 | no-script-url: 2 88 | no-self-compare: 2 89 | no-sequences: 0 90 | no-throw-literal: 0 91 | no-unused-expressions: 2 92 | no-useless-call: 2 93 | no-useless-concat: 2 94 | no-void: 2 95 | no-warning-comments: 0 96 | no-with: 2 97 | radix: 2 98 | vars-on-top: 0 99 | wrap-iife: 2 100 | yoda: 0 101 | 102 | # Strict 103 | strict: 0 104 | 105 | # Variables 106 | init-declarations: 0 107 | no-catch-shadow: 2 108 | no-delete-var: 2 109 | no-label-var: 2 110 | no-shadow-restricted-names: 2 111 | no-shadow: 0 112 | no-undef-init: 2 113 | no-undef: 0 114 | no-undefined: 0 115 | no-unused-vars: 0 116 | no-use-before-define: 0 117 | 118 | # Node.js and CommonJS 119 | callback-return: 2 120 | global-require: 2 121 | handle-callback-err: 2 122 | no-mixed-requires: 0 123 | no-new-require: 0 124 | no-path-concat: 2 125 | no-process-exit: 2 126 | no-restricted-modules: 0 127 | no-sync: 0 128 | 129 | # Stylistic Issues 130 | array-bracket-spacing: 0 131 | block-spacing: 0 132 | brace-style: 0 133 | camelcase: 0 134 | comma-spacing: 0 135 | comma-style: 0 136 | computed-property-spacing: 0 137 | consistent-this: 0 138 | eol-last: 0 139 | func-names: 0 140 | func-style: 0 141 | id-length: 0 142 | id-match: 0 143 | indent: 0 144 | jsx-quotes: 0 145 | key-spacing: 0 146 | linebreak-style: 0 147 | lines-around-comment: 0 148 | max-depth: 0 149 | max-len: 0 150 | max-nested-callbacks: 0 151 | max-params: 0 152 | max-statements: [2, 30] 153 | new-cap: 0 154 | new-parens: 0 155 | newline-after-var: 0 156 | no-array-constructor: 0 157 | no-bitwise: 0 158 | no-continue: 0 159 | no-inline-comments: 0 160 | no-lonely-if: 0 161 | no-mixed-spaces-and-tabs: 0 162 | no-multiple-empty-lines: 0 163 | no-negated-condition: 0 164 | no-nested-ternary: 0 165 | no-new-object: 0 166 | no-plusplus: 0 167 | no-restricted-syntax: 0 168 | no-spaced-func: 0 169 | no-ternary: 0 170 | no-trailing-spaces: 0 171 | no-underscore-dangle: 0 172 | no-unneeded-ternary: 0 173 | object-curly-spacing: 0 174 | one-var: 0 175 | operator-assignment: 0 176 | operator-linebreak: 0 177 | padded-blocks: 0 178 | quote-props: 0 179 | quotes: 0 180 | require-jsdoc: 0 181 | semi-spacing: 0 182 | semi: 0 183 | sort-vars: 0 184 | space-after-keywords: 0 185 | space-before-blocks: 0 186 | space-before-function-paren: 0 187 | space-before-keywords: 0 188 | space-in-parens: 0 189 | space-infix-ops: 0 190 | space-return-throw-case: 0 191 | space-unary-ops: 0 192 | spaced-comment: 0 193 | wrap-regex: 0 194 | 195 | # ECMAScript 6 196 | arrow-body-style: 0 197 | arrow-parens: 0 198 | arrow-spacing: 0 199 | constructor-super: 0 200 | generator-star-spacing: 0 201 | no-arrow-condition: 0 202 | no-class-assign: 0 203 | no-const-assign: 0 204 | no-dupe-class-members: 0 205 | no-this-before-super: 0 206 | no-var: 0 207 | object-shorthand: 0 208 | prefer-arrow-callback: 0 209 | prefer-const: 0 210 | prefer-reflect: 0 211 | prefer-spread: 0 212 | prefer-template: 0 213 | require-yield: 0 214 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | - Brief summary of the changes included in this PR 3 | - Any additional information or context which may help the reviewer 4 | 5 | ### Checklist 6 | Please ensure you have addressed all concerns below before marking a PR "ready for review" or before requesting a re-review. If you cannot complete an item below, replace the checkbox with the ⚠️ `:warning:` emoji and explain why the step was not completed. 7 | 8 | #### Functionality Checks 9 | 10 | - [ ] You have merged the latest changes from the target branch (usually `main`) into your branch. 11 | 12 | - [ ] Your primary commit message is of the format **SRCH-#### \** matching the associated Jira ticket. 13 | 14 | - [ ] PR title is either of the format **SRCH-#### \** matching the associated Jira ticket (i.e. "SRCH-123 implement feature X"), or **Release - SRCH-####, SRCH-####, SRCH-####** matching the Jira ticket numbers in the release. 15 | 16 | - [ ] Automated checks pass. If Code Climate checks do not pass, explain reason for failures: 17 | 18 | #### Process Checks 19 | 20 | - [ ] You have specified at least one "Reviewer". 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.rbc 2 | capybara-*.html 3 | 4 | #auto-generated by RubyMine 5 | .idea 6 | 7 | .rspec 8 | .rspec-local 9 | # Ignore the YAML file that is downloaded when running `rubocop` locally 10 | .rubocop*default-yml 11 | /log 12 | /tmp 13 | /db/*.sqlite3 14 | /db/*.sqlite3-journal 15 | /public/system 16 | /coverage/ 17 | /spec/tmp 18 | **.orig 19 | 20 | ## Environment normalisation: 21 | /.bundle 22 | /vendor/bundle 23 | 24 | # these should all be checked in to normalise the environment: 25 | # Gemfile.lock, .ruby-version, .ruby-gemset 26 | 27 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 28 | .rvmrc 29 | 30 | # if using bower-rails ignore default bower_components path bower.json files 31 | /vendor/assets/bower_components 32 | *.bowerrc 33 | bower.json 34 | 35 | # Ignore pow environment settings 36 | .powenv 37 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | inherit_from: .rubocop_todo.yml 2 | 3 | AllCops: 4 | NewCops: enable 5 | 6 | require: 7 | - rubocop-performance 8 | - rubocop-rails 9 | - rubocop-rake 10 | - rubocop-rspec 11 | 12 | inherit_mode: 13 | merge: 14 | - Exclude 15 | -------------------------------------------------------------------------------- /.rubocop_todo.yml: -------------------------------------------------------------------------------- 1 | # This configuration was generated by 2 | # `rubocop --auto-gen-config` 3 | # on 2025-04-01 21:22:59 UTC using RuboCop version 1.75.1. 4 | # The point is for the user to remove these configuration records 5 | # one by one as the offenses are removed from the code base. 6 | # Note that changes in the inspected code, or installation of new 7 | # versions of RuboCop, may require this file to be generated again. 8 | 9 | # Offense count: 1 10 | RSpec/BeforeAfterAll: 11 | Exclude: 12 | - 'spec/requests/api/v1/documents_spec.rb' 13 | 14 | # Offense count: 2 15 | # Configuration parameters: Prefixes, AllowedPatterns. 16 | # Prefixes: when, with, without 17 | RSpec/ContextWording: 18 | Exclude: 19 | - 'spec/classes/query_parser_spec.rb' 20 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | ruby-3.3.7 2 | -------------------------------------------------------------------------------- /.simplecov: -------------------------------------------------------------------------------- 1 | SimpleCov.start 'rails' do 2 | minimum_coverage 100 3 | add_filter '/templates/' 4 | add_filter '/lib/templatable.rb' 5 | end 6 | -------------------------------------------------------------------------------- /Capfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'capistrano/setup' 4 | 5 | require 'capistrano/deploy' 6 | 7 | require 'capistrano/scm/git' 8 | install_plugin Capistrano::SCM::Git 9 | 10 | SSHKit.config.command_map[:bundle] = 'bin/bundle' 11 | 12 | require "capistrano/rbenv" 13 | require "capistrano/bundler" 14 | 15 | require 'capistrano/puma' 16 | require 'capistrano/puma/workers' 17 | 18 | install_plugin Capistrano::Puma, load_hooks: false 19 | install_plugin Capistrano::Puma::Systemd 20 | 21 | Dir.glob('lib/capistrano/tasks/*.rake').each { |r| import r } 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG RUBY_VERSION=3.3.4 2 | FROM public.ecr.aws/docker/library/ruby:$RUBY_VERSION-slim as base 3 | 4 | WORKDIR /rails 5 | 6 | RUN apt-get update -qq && \ 7 | apt-get install -y build-essential libcurl4-openssl-dev curl && \ 8 | apt-get clean && \ 9 | rm -rf /var/lib/apt/lists/* 10 | 11 | ENV RAILS_ENV="production" \ 12 | BUNDLE_DEPLOYMENT="1" \ 13 | BUNDLE_PATH="/usr/local/bundle" \ 14 | RAILS_LOG_TO_STDOUT="1" 15 | 16 | FROM base as build 17 | 18 | RUN gem install bundler -v 2.4.7 19 | 20 | COPY Gemfile Gemfile.lock ./ 21 | 22 | RUN bundle install && \ 23 | rm -rf ~/.bundle/ "${BUNDLE_PATH}"/ruby/*/cache "${BUNDLE_PATH}"/ruby/*/bundler/gems/*/.git 24 | 25 | COPY . . 26 | 27 | FROM base 28 | 29 | COPY --from=build "${BUNDLE_PATH}" "${BUNDLE_PATH}" 30 | COPY --from=build /rails /rails 31 | 32 | RUN groupadd --system --gid 1000 rails && \ 33 | useradd --uid 1000 --gid 1000 --create-home --shell /bin/bash rails 34 | 35 | RUN mkdir -p /rails/log /rails/tmp && \ 36 | chown -R rails:rails /rails/log /rails/tmp 37 | 38 | RUN bin/secure_docker 39 | 40 | USER 1000:1000 41 | 42 | EXPOSE 3200 43 | CMD ["bundle", "exec", "rails", "server", "-b", "0.0.0.0", "-p", "3200"] 44 | -------------------------------------------------------------------------------- /Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM ruby:3.3.4 2 | WORKDIR /usr/src/app 3 | EXPOSE 3200 4 | 5 | ENV OPENSSL_CONF /etc/ssl/ 6 | 7 | RUN apt install -y curl \ 8 | && gem install bundler:2.4.7 9 | 10 | COPY Gemfile* /usr/src/app/ 11 | ENV BUNDLE_PATH /gems 12 | RUN bundle install 13 | 14 | COPY . /usr/src/app/ 15 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | source 'https://rubygems.org' 3 | 4 | gem 'dotenv', '~> 3.1' 5 | gem 'elasticsearch', '~> 6.0' 6 | gem 'elasticsearch-dsl', '~> 0.1.9' 7 | gem 'elasticsearch-persistence', '~> 6.0' 8 | gem 'grape', '~> 1.7.0' 9 | gem 'jbuilder', '~> 2.7' 10 | gem 'newrelic_rpm', '~> 9.10' 11 | gem 'puma', '~> 5.6' 12 | gem 'rack', '~> 2.2.8' 13 | gem 'rack-cors', '~> 1.0.5' 14 | gem 'rails', '~> 7.1.0' 15 | gem 'rails_semantic_logger', '~> 4.14' 16 | gem 'rake', '~> 13.0.0' 17 | gem 'typhoeus', '~> 1.4.0' 18 | gem 'virtus', '~> 1.0' # Virtus is no longer supported. Consider replacing with ActiveModel::Attributes 19 | 20 | group :development, :test do 21 | gem 'awesome_print', '~> 1.8' #To enable in Pry: https://github.com/awesome-print/awesome_print#pry-integration 22 | gem 'capistrano', require: false 23 | gem 'capistrano3-puma', require: false 24 | gem 'capistrano-newrelic', require: false 25 | gem 'capistrano-rails', require: false 26 | gem 'capistrano-rbenv', require: false 27 | gem 'debug' 28 | gem 'listen' 29 | gem 'pry-byebug', '~> 3.4' 30 | gem 'pry-rails', '~> 0.3' 31 | gem 'rspec-rails', '~> 3.7' 32 | gem 'rubocop', require: false 33 | gem 'rubocop-performance', require: false 34 | gem 'rubocop-rails', require: false 35 | gem 'rubocop-rake', require: false 36 | gem 'rubocop-rspec', require: false 37 | end 38 | 39 | group :test do 40 | gem 'codeclimate-test-reporter', '~> 1.0.8', require: nil 41 | gem 'shoulda', '~> 4.0' 42 | gem 'simplecov', '~> 0.13.0', require: false 43 | end 44 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | actioncable (7.1.5.1) 5 | actionpack (= 7.1.5.1) 6 | activesupport (= 7.1.5.1) 7 | nio4r (~> 2.0) 8 | websocket-driver (>= 0.6.1) 9 | zeitwerk (~> 2.6) 10 | actionmailbox (7.1.5.1) 11 | actionpack (= 7.1.5.1) 12 | activejob (= 7.1.5.1) 13 | activerecord (= 7.1.5.1) 14 | activestorage (= 7.1.5.1) 15 | activesupport (= 7.1.5.1) 16 | mail (>= 2.7.1) 17 | net-imap 18 | net-pop 19 | net-smtp 20 | actionmailer (7.1.5.1) 21 | actionpack (= 7.1.5.1) 22 | actionview (= 7.1.5.1) 23 | activejob (= 7.1.5.1) 24 | activesupport (= 7.1.5.1) 25 | mail (~> 2.5, >= 2.5.4) 26 | net-imap 27 | net-pop 28 | net-smtp 29 | rails-dom-testing (~> 2.2) 30 | actionpack (7.1.5.1) 31 | actionview (= 7.1.5.1) 32 | activesupport (= 7.1.5.1) 33 | nokogiri (>= 1.8.5) 34 | racc 35 | rack (>= 2.2.4) 36 | rack-session (>= 1.0.1) 37 | rack-test (>= 0.6.3) 38 | rails-dom-testing (~> 2.2) 39 | rails-html-sanitizer (~> 1.6) 40 | actiontext (7.1.5.1) 41 | actionpack (= 7.1.5.1) 42 | activerecord (= 7.1.5.1) 43 | activestorage (= 7.1.5.1) 44 | activesupport (= 7.1.5.1) 45 | globalid (>= 0.6.0) 46 | nokogiri (>= 1.8.5) 47 | actionview (7.1.5.1) 48 | activesupport (= 7.1.5.1) 49 | builder (~> 3.1) 50 | erubi (~> 1.11) 51 | rails-dom-testing (~> 2.2) 52 | rails-html-sanitizer (~> 1.6) 53 | activejob (7.1.5.1) 54 | activesupport (= 7.1.5.1) 55 | globalid (>= 0.3.6) 56 | activemodel (7.1.5.1) 57 | activesupport (= 7.1.5.1) 58 | activerecord (7.1.5.1) 59 | activemodel (= 7.1.5.1) 60 | activesupport (= 7.1.5.1) 61 | timeout (>= 0.4.0) 62 | activestorage (7.1.5.1) 63 | actionpack (= 7.1.5.1) 64 | activejob (= 7.1.5.1) 65 | activerecord (= 7.1.5.1) 66 | activesupport (= 7.1.5.1) 67 | marcel (~> 1.0) 68 | activesupport (7.1.5.1) 69 | base64 70 | benchmark (>= 0.3) 71 | bigdecimal 72 | concurrent-ruby (~> 1.0, >= 1.0.2) 73 | connection_pool (>= 2.2.5) 74 | drb 75 | i18n (>= 1.6, < 2) 76 | logger (>= 1.4.2) 77 | minitest (>= 5.1) 78 | mutex_m 79 | securerandom (>= 0.3) 80 | tzinfo (~> 2.0) 81 | airbrussh (1.5.3) 82 | sshkit (>= 1.6.1, != 1.7.0) 83 | ast (2.4.3) 84 | awesome_print (1.9.2) 85 | axiom-types (0.1.1) 86 | descendants_tracker (~> 0.0.4) 87 | ice_nine (~> 0.11.0) 88 | thread_safe (~> 0.3, >= 0.3.1) 89 | base64 (0.2.0) 90 | benchmark (0.4.0) 91 | bigdecimal (3.1.9) 92 | builder (3.3.0) 93 | byebug (12.0.0) 94 | capistrano (3.19.2) 95 | airbrussh (>= 1.0.0) 96 | i18n 97 | rake (>= 10.0.0) 98 | sshkit (>= 1.9.0) 99 | capistrano-bundler (2.1.1) 100 | capistrano (~> 3.1) 101 | capistrano-newrelic (0.10.1) 102 | capistrano (~> 3.0) 103 | newrelic_rpm 104 | capistrano-rails (1.7.0) 105 | capistrano (~> 3.1) 106 | capistrano-bundler (>= 1.1, < 3) 107 | capistrano-rbenv (2.2.0) 108 | capistrano (~> 3.1) 109 | sshkit (~> 1.3) 110 | capistrano3-puma (5.2.0) 111 | capistrano (~> 3.7) 112 | capistrano-bundler 113 | puma (>= 4.0, < 6.0) 114 | codeclimate-test-reporter (1.0.9) 115 | simplecov (<= 0.13) 116 | coderay (1.1.3) 117 | coercible (1.0.0) 118 | descendants_tracker (~> 0.0.1) 119 | concurrent-ruby (1.3.5) 120 | connection_pool (2.5.0) 121 | crass (1.0.6) 122 | date (3.4.1) 123 | debug (1.10.0) 124 | irb (~> 1.10) 125 | reline (>= 0.3.8) 126 | descendants_tracker (0.0.4) 127 | thread_safe (~> 0.3, >= 0.3.1) 128 | diff-lcs (1.6.1) 129 | docile (1.1.5) 130 | dotenv (3.1.7) 131 | drb (2.2.1) 132 | dry-core (1.1.0) 133 | concurrent-ruby (~> 1.0) 134 | logger 135 | zeitwerk (~> 2.6) 136 | dry-inflector (1.2.0) 137 | dry-logic (1.6.0) 138 | bigdecimal 139 | concurrent-ruby (~> 1.0) 140 | dry-core (~> 1.1) 141 | zeitwerk (~> 2.6) 142 | dry-types (1.8.2) 143 | bigdecimal (~> 3.0) 144 | concurrent-ruby (~> 1.0) 145 | dry-core (~> 1.0) 146 | dry-inflector (~> 1.0) 147 | dry-logic (~> 1.4) 148 | zeitwerk (~> 2.6) 149 | elasticsearch (6.8.3) 150 | elasticsearch-api (= 6.8.3) 151 | elasticsearch-transport (= 6.8.3) 152 | elasticsearch-api (6.8.3) 153 | multi_json 154 | elasticsearch-dsl (0.1.10) 155 | elasticsearch-model (7.1.1) 156 | activesupport (> 3) 157 | elasticsearch (> 1) 158 | hashie 159 | elasticsearch-persistence (6.1.2) 160 | activemodel (> 4) 161 | activesupport (> 4) 162 | elasticsearch (~> 6) 163 | elasticsearch-model (>= 5) 164 | hashie 165 | elasticsearch-transport (6.8.3) 166 | faraday (~> 1) 167 | multi_json 168 | equalizer (0.0.11) 169 | erubi (1.13.1) 170 | ethon (0.16.0) 171 | ffi (>= 1.15.0) 172 | faraday (1.10.4) 173 | faraday-em_http (~> 1.0) 174 | faraday-em_synchrony (~> 1.0) 175 | faraday-excon (~> 1.1) 176 | faraday-httpclient (~> 1.0) 177 | faraday-multipart (~> 1.0) 178 | faraday-net_http (~> 1.0) 179 | faraday-net_http_persistent (~> 1.0) 180 | faraday-patron (~> 1.0) 181 | faraday-rack (~> 1.0) 182 | faraday-retry (~> 1.0) 183 | ruby2_keywords (>= 0.0.4) 184 | faraday-em_http (1.0.0) 185 | faraday-em_synchrony (1.0.0) 186 | faraday-excon (1.1.0) 187 | faraday-httpclient (1.0.1) 188 | faraday-multipart (1.1.0) 189 | multipart-post (~> 2.0) 190 | faraday-net_http (1.0.2) 191 | faraday-net_http_persistent (1.2.0) 192 | faraday-patron (1.0.0) 193 | faraday-rack (1.0.0) 194 | faraday-retry (1.0.3) 195 | ffi (1.17.1-aarch64-linux-gnu) 196 | ffi (1.17.1-aarch64-linux-musl) 197 | ffi (1.17.1-arm-linux-gnu) 198 | ffi (1.17.1-arm-linux-musl) 199 | ffi (1.17.1-arm64-darwin) 200 | ffi (1.17.1-x86_64-darwin) 201 | ffi (1.17.1-x86_64-linux-gnu) 202 | ffi (1.17.1-x86_64-linux-musl) 203 | globalid (1.2.1) 204 | activesupport (>= 6.1) 205 | grape (1.7.1) 206 | activesupport 207 | builder 208 | dry-types (>= 1.1) 209 | mustermann-grape (~> 1.0.0) 210 | rack (>= 1.3.0, < 3) 211 | rack-accept 212 | hashie (5.0.0) 213 | i18n (1.14.7) 214 | concurrent-ruby (~> 1.0) 215 | ice_nine (0.11.2) 216 | io-console (0.8.0) 217 | irb (1.15.1) 218 | pp (>= 0.6.0) 219 | rdoc (>= 4.0.0) 220 | reline (>= 0.4.2) 221 | jbuilder (2.13.0) 222 | actionview (>= 5.0.0) 223 | activesupport (>= 5.0.0) 224 | json (2.10.2) 225 | language_server-protocol (3.17.0.4) 226 | lint_roller (1.1.0) 227 | listen (3.9.0) 228 | rb-fsevent (~> 0.10, >= 0.10.3) 229 | rb-inotify (~> 0.9, >= 0.9.10) 230 | logger (1.7.0) 231 | loofah (2.24.0) 232 | crass (~> 1.0.2) 233 | nokogiri (>= 1.12.0) 234 | mail (2.8.1) 235 | mini_mime (>= 0.1.1) 236 | net-imap 237 | net-pop 238 | net-smtp 239 | marcel (1.0.4) 240 | method_source (1.1.0) 241 | mini_mime (1.1.5) 242 | minitest (5.25.5) 243 | multi_json (1.15.0) 244 | multipart-post (2.4.1) 245 | mustermann (3.0.3) 246 | ruby2_keywords (~> 0.0.1) 247 | mustermann-grape (1.0.2) 248 | mustermann (>= 1.0.0) 249 | mutex_m (0.3.0) 250 | net-imap (0.5.6) 251 | date 252 | net-protocol 253 | net-pop (0.1.2) 254 | net-protocol 255 | net-protocol (0.2.2) 256 | timeout 257 | net-scp (4.1.0) 258 | net-ssh (>= 2.6.5, < 8.0.0) 259 | net-sftp (4.0.0) 260 | net-ssh (>= 5.0.0, < 8.0.0) 261 | net-smtp (0.5.1) 262 | net-protocol 263 | net-ssh (7.3.0) 264 | newrelic_rpm (9.17.0) 265 | nio4r (2.7.4) 266 | nokogiri (1.18.7-aarch64-linux-gnu) 267 | racc (~> 1.4) 268 | nokogiri (1.18.7-aarch64-linux-musl) 269 | racc (~> 1.4) 270 | nokogiri (1.18.7-arm-linux-gnu) 271 | racc (~> 1.4) 272 | nokogiri (1.18.7-arm-linux-musl) 273 | racc (~> 1.4) 274 | nokogiri (1.18.7-arm64-darwin) 275 | racc (~> 1.4) 276 | nokogiri (1.18.7-x86_64-darwin) 277 | racc (~> 1.4) 278 | nokogiri (1.18.7-x86_64-linux-gnu) 279 | racc (~> 1.4) 280 | nokogiri (1.18.7-x86_64-linux-musl) 281 | racc (~> 1.4) 282 | ostruct (0.6.1) 283 | parallel (1.26.3) 284 | parser (3.3.7.4) 285 | ast (~> 2.4.1) 286 | racc 287 | pp (0.6.2) 288 | prettyprint 289 | prettyprint (0.2.0) 290 | prism (1.4.0) 291 | pry (0.15.2) 292 | coderay (~> 1.1) 293 | method_source (~> 1.0) 294 | pry-byebug (3.11.0) 295 | byebug (~> 12.0) 296 | pry (>= 0.13, < 0.16) 297 | pry-rails (0.3.11) 298 | pry (>= 0.13.0) 299 | psych (5.2.3) 300 | date 301 | stringio 302 | puma (5.6.9) 303 | nio4r (~> 2.0) 304 | racc (1.8.1) 305 | rack (2.2.13) 306 | rack-accept (0.4.5) 307 | rack (>= 0.4) 308 | rack-cors (1.0.6) 309 | rack (>= 1.6.0) 310 | rack-session (1.0.2) 311 | rack (< 3) 312 | rack-test (2.2.0) 313 | rack (>= 1.3) 314 | rackup (1.0.1) 315 | rack (< 3) 316 | webrick 317 | rails (7.1.5.1) 318 | actioncable (= 7.1.5.1) 319 | actionmailbox (= 7.1.5.1) 320 | actionmailer (= 7.1.5.1) 321 | actionpack (= 7.1.5.1) 322 | actiontext (= 7.1.5.1) 323 | actionview (= 7.1.5.1) 324 | activejob (= 7.1.5.1) 325 | activemodel (= 7.1.5.1) 326 | activerecord (= 7.1.5.1) 327 | activestorage (= 7.1.5.1) 328 | activesupport (= 7.1.5.1) 329 | bundler (>= 1.15.0) 330 | railties (= 7.1.5.1) 331 | rails-dom-testing (2.2.0) 332 | activesupport (>= 5.0.0) 333 | minitest 334 | nokogiri (>= 1.6) 335 | rails-html-sanitizer (1.6.2) 336 | loofah (~> 2.21) 337 | nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0) 338 | rails_semantic_logger (4.17.0) 339 | rack 340 | railties (>= 5.1) 341 | semantic_logger (~> 4.16) 342 | railties (7.1.5.1) 343 | actionpack (= 7.1.5.1) 344 | activesupport (= 7.1.5.1) 345 | irb 346 | rackup (>= 1.0.0) 347 | rake (>= 12.2) 348 | thor (~> 1.0, >= 1.2.2) 349 | zeitwerk (~> 2.6) 350 | rainbow (3.1.1) 351 | rake (13.0.6) 352 | rb-fsevent (0.11.2) 353 | rb-inotify (0.11.1) 354 | ffi (~> 1.0) 355 | rdoc (6.13.1) 356 | psych (>= 4.0.0) 357 | regexp_parser (2.10.0) 358 | reline (0.6.0) 359 | io-console (~> 0.5) 360 | rspec-core (3.9.3) 361 | rspec-support (~> 3.9.3) 362 | rspec-expectations (3.9.4) 363 | diff-lcs (>= 1.2.0, < 2.0) 364 | rspec-support (~> 3.9.0) 365 | rspec-mocks (3.9.1) 366 | diff-lcs (>= 1.2.0, < 2.0) 367 | rspec-support (~> 3.9.0) 368 | rspec-rails (3.9.1) 369 | actionpack (>= 3.0) 370 | activesupport (>= 3.0) 371 | railties (>= 3.0) 372 | rspec-core (~> 3.9.0) 373 | rspec-expectations (~> 3.9.0) 374 | rspec-mocks (~> 3.9.0) 375 | rspec-support (~> 3.9.0) 376 | rspec-support (3.9.4) 377 | rubocop (1.75.1) 378 | json (~> 2.3) 379 | language_server-protocol (~> 3.17.0.2) 380 | lint_roller (~> 1.1.0) 381 | parallel (~> 1.10) 382 | parser (>= 3.3.0.2) 383 | rainbow (>= 2.2.2, < 4.0) 384 | regexp_parser (>= 2.9.3, < 3.0) 385 | rubocop-ast (>= 1.43.0, < 2.0) 386 | ruby-progressbar (~> 1.7) 387 | unicode-display_width (>= 2.4.0, < 4.0) 388 | rubocop-ast (1.43.0) 389 | parser (>= 3.3.7.2) 390 | prism (~> 1.4) 391 | rubocop-performance (1.25.0) 392 | lint_roller (~> 1.1) 393 | rubocop (>= 1.75.0, < 2.0) 394 | rubocop-ast (>= 1.38.0, < 2.0) 395 | rubocop-rails (2.31.0) 396 | activesupport (>= 4.2.0) 397 | lint_roller (~> 1.1) 398 | rack (>= 1.1) 399 | rubocop (>= 1.75.0, < 2.0) 400 | rubocop-ast (>= 1.38.0, < 2.0) 401 | rubocop-rake (0.7.1) 402 | lint_roller (~> 1.1) 403 | rubocop (>= 1.72.1) 404 | rubocop-rspec (3.5.0) 405 | lint_roller (~> 1.1) 406 | rubocop (~> 1.72, >= 1.72.1) 407 | ruby-progressbar (1.13.0) 408 | ruby2_keywords (0.0.5) 409 | securerandom (0.4.1) 410 | semantic_logger (4.16.1) 411 | concurrent-ruby (~> 1.0) 412 | shoulda (4.0.0) 413 | shoulda-context (~> 2.0) 414 | shoulda-matchers (~> 4.0) 415 | shoulda-context (2.0.0) 416 | shoulda-matchers (4.5.1) 417 | activesupport (>= 4.2.0) 418 | simplecov (0.13.0) 419 | docile (~> 1.1.0) 420 | json (>= 1.8, < 3) 421 | simplecov-html (~> 0.10.0) 422 | simplecov-html (0.10.2) 423 | sshkit (1.24.0) 424 | base64 425 | logger 426 | net-scp (>= 1.1.2) 427 | net-sftp (>= 2.1.2) 428 | net-ssh (>= 2.8.0) 429 | ostruct 430 | stringio (3.1.6) 431 | thor (1.3.2) 432 | thread_safe (0.3.6) 433 | timeout (0.4.3) 434 | typhoeus (1.4.1) 435 | ethon (>= 0.9.0) 436 | tzinfo (2.0.6) 437 | concurrent-ruby (~> 1.0) 438 | unicode-display_width (3.1.4) 439 | unicode-emoji (~> 4.0, >= 4.0.4) 440 | unicode-emoji (4.0.4) 441 | virtus (1.0.5) 442 | axiom-types (~> 0.1) 443 | coercible (~> 1.0) 444 | descendants_tracker (~> 0.0, >= 0.0.3) 445 | equalizer (~> 0.0, >= 0.0.9) 446 | webrick (1.9.1) 447 | websocket-driver (0.7.7) 448 | base64 449 | websocket-extensions (>= 0.1.0) 450 | websocket-extensions (0.1.5) 451 | zeitwerk (2.7.2) 452 | 453 | PLATFORMS 454 | aarch64-linux-gnu 455 | aarch64-linux-musl 456 | arm-linux-gnu 457 | arm-linux-musl 458 | arm64-darwin 459 | x86_64-darwin 460 | x86_64-linux-gnu 461 | x86_64-linux-musl 462 | 463 | DEPENDENCIES 464 | awesome_print (~> 1.8) 465 | capistrano 466 | capistrano-newrelic 467 | capistrano-rails 468 | capistrano-rbenv 469 | capistrano3-puma 470 | codeclimate-test-reporter (~> 1.0.8) 471 | debug 472 | dotenv (~> 3.1) 473 | elasticsearch (~> 6.0) 474 | elasticsearch-dsl (~> 0.1.9) 475 | elasticsearch-persistence (~> 6.0) 476 | grape (~> 1.7.0) 477 | jbuilder (~> 2.7) 478 | listen 479 | newrelic_rpm (~> 9.10) 480 | pry-byebug (~> 3.4) 481 | pry-rails (~> 0.3) 482 | puma (~> 5.6) 483 | rack (~> 2.2.8) 484 | rack-cors (~> 1.0.5) 485 | rails (~> 7.1.0) 486 | rails_semantic_logger (~> 4.14) 487 | rake (~> 13.0.0) 488 | rspec-rails (~> 3.7) 489 | rubocop 490 | rubocop-performance 491 | rubocop-rails 492 | rubocop-rake 493 | rubocop-rspec 494 | shoulda (~> 4.0) 495 | simplecov (~> 0.13.0) 496 | typhoeus (~> 1.4.0) 497 | virtus (~> 1.0) 498 | 499 | BUNDLED WITH 500 | 2.6.3 501 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | i14y 2 | ==== 3 | 4 | [![CircleCI](https://circleci.com/gh/GSA/i14y.svg?style=shield)](https://circleci.com/gh/GSA/i14y) 5 | [![Code Climate](https://codeclimate.com/github/GSA/i14y/badges/gpa.svg)](https://codeclimate.com/github/GSA/i14y) 6 | [![Test Coverage](https://codeclimate.com/github/GSA/i14y/badges/coverage.svg)](https://codeclimate.com/github/GSA/i14y) 7 | 8 | Search engine for agencies' published content 9 | 10 | ## Dependencies/Prerequisites 11 | 12 | * Ruby 13 | 14 | Use [rvm](https://rvm.io/) to install the version of Ruby specified in `.ruby-version`. 15 | 16 | ### Docker 17 | 18 | Docker can be used to: 1) run just the required services (MySQL, Elasticsearch, etc.) while [running the i14y application in your local machine](https://github.com/GSA/i14y#development), and/or 2) run the entire `i14y` application in a Docker container. Please refer to [searchgov-services](https://github.com/GSA/search-services) for detailed instructions on centralized configuration for the services. 19 | 20 | When running in a Docker container (option 2 above), the `i14y` application is configured to run on port [3200](http://localhost:3200/). Required dependencies - ([Ruby](https://github.com/GSA/i14y#dependenciesprerequisites), and Gems) - are installed using Docker. However, other data or configuration may need to be setup manually, which can be done in the running container using `bash`. 21 | 22 | Using bash to perform any operations on i14y application running in Docker container, below command needs to be run in `search-services`. 23 | 24 | $ docker compose run i14y bash 25 | 26 | For example, to setup DB in Docker: 27 | 28 | $ docker compose run i14y bash 29 | $ bin/rails i14y:setup 30 | 31 | The Elasticsearch services provided by `searchgov-services` is configured to run on the default port, [9200](http://localhost:9200/). To use a different host (with or without port) or set of hosts, set the `ES_HOSTS` environment variable. For example, use following command to run the specs using Elasticsearch running on `localhost:9207`: 32 | 33 | ES_HOSTS=localhost:9207 bundle exec rspec spec 34 | 35 | Verify that Elasticsearch 7.17.x is running on the expected port (port 9200 by default): 36 | 37 | ``` 38 | $ curl localhost:9200 39 | { 40 | "name" : "002410188f61", 41 | "cluster_name" : "es7-docker-cluster", 42 | "cluster_uuid" : "l3cAhBd4Sqa3B4SkpUilPQ", 43 | "version" : { 44 | "number" : "7.17.7", 45 | "build_flavor" : "default", 46 | "build_type" : "docker", 47 | "build_hash" : "78dcaaa8cee33438b91eca7f5c7f56a70fec9e80", 48 | "build_date" : "2022-10-17T15:29:54.167373105Z", 49 | "build_snapshot" : false, 50 | "lucene_version" : "8.11.1", 51 | "minimum_wire_compatibility_version" : "6.8.0", 52 | "minimum_index_compatibility_version" : "6.0.0-beta1" 53 | }, 54 | "tagline" : "You Know, for Search" 55 | } 56 | ``` 57 | 58 | ## Development 59 | 60 | - `bundle install`. 61 | - Run `bundle exec rake i14y:setup` to create the neccessary indexes, index templates, and dynamic field templates. 62 | 63 | If you ever want to start from scratch with your indexes/templates, you can clear everything out: 64 | `bundle exec rake i14y:clear_all` 65 | 66 | - Run the Rails server on port 8081 for compatibility with the 67 | search-gov app: 68 | ``` 69 | $ rails s -p 8081 70 | ``` 71 | 72 | You should see the default Rails index page on [http://localhost:8081/](http://localhost:8081/). 73 | 74 | ### Code Quality 75 | 76 | We use [Rubocop](https://rubocop.org/) for static code analysis. Settings specific to I14Y are configured via [.rubocop.yml](.rubocop.yml). Settings that can be shared among all Search.gov repos should be configured via the [searchgov_style](https://github.com/GSA/searchgov_style) gem. 77 | 78 | ## Basic Usage 79 | 80 | ### Create a collection for storing documents 81 | ``` 82 | $ curl -u dev:devpwd -XPOST http://localhost:8081/api/v1/collections \ 83 | -H "Content-Type:application/json" -d \ 84 | '{"handle":"test_collection","description":"my test collection","token":"test_collection_token"}' 85 | ``` 86 | 87 | ### Create a document within that collection 88 | Use the collection handle and token for authorization: 89 | 90 | ``` 91 | curl http://localhost:8081/api/v1/documents \ 92 | -XPOST \ 93 | -H "Content-Type:application/json" \ 94 | -u test_collection:test_collection_token \ 95 | -d '{"document_id":"1", 96 | "title":"a doc about rutabagas", 97 | "path": "http://www.foo.gov/rutabagas.html", 98 | "created": "2020-05-12T22:35:09Z", 99 | "description":"Lots of very important info on rutabagas", 100 | "content":"rutabagas", 101 | "promote": false, 102 | "language" : "en", 103 | "tags" : "tag1, another tag" 104 | }' 105 | ``` 106 | 107 | ### Search for a document within a collection 108 | ``` 109 | $ curl -u dev:devpwd http://localhost:8081/api/v1/collections/search?handles=test_collection&query=rutabaga 110 | ``` 111 | 112 | ## Tests 113 | ``` 114 | # Fire up Elasticsearch in search-services 115 | $ docker-compose up elasticsearch7 116 | 117 | $ bundle exec rake i14y:setup 118 | $ rake 119 | ``` 120 | 121 | ## Code Quality 122 | 123 | We use [Rubocop](https://rubocop.org/) for static code analysis. Settings specific to i14y are configured via [.rubocop.yml](.rubocop.yml). 124 | 125 | ### Running RuboCop Locally 126 | 127 | Basic commands you should frequently use: 128 | 129 | - **Generate or update the RuboCop TODO file**. Use this when RuboCop identifies many issues: 130 | ```bash 131 | bundle exec rubocop --auto-gen-config 132 | ``` 133 | - **Autocorrect easy-to-fix offenses** (safe corrections only): 134 | ```bash 135 | bundle exec rubocop -a 136 | ``` 137 | - **Autocorrect all possible offenses, including some more complex cases** (use with caution, review changes carefully): 138 | ```bash 139 | bundle exec rubocop -A 140 | ``` 141 | - **Disable offenses that cannot be automatically corrected** when running autocorrections. Useful if you'd like to quickly apply auto-fixes without manually addressing harder issues immediately: 142 | ```bash 143 | bundle exec rubocop -a --disable-uncorrectable 144 | ``` 145 | 146 | or 147 | 148 | ```bash 149 | bundle exec rubocop -A --disable-uncorrectable 150 | ``` 151 | 152 | It is recommended to always review diff changes after running autocorrection commands to ensure code correctness and maintainability. -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # Add your own tasks in files placed in lib/tasks ending in .rake, 2 | # for example lib/tasks/yourtask.rake, and they will automatically be available to Rake. 3 | 4 | require File.expand_path('../config/application', __FILE__) 5 | 6 | Rails.application.load_tasks 7 | -------------------------------------------------------------------------------- /app/classes/document_query.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class DocumentQuery 4 | include Elasticsearch::DSL 5 | 6 | HIGHLIGHT_OPTIONS = { 7 | pre_tags: ["\ue000"], 8 | post_tags: ["\ue001"] 9 | }.freeze 10 | 11 | DEFAULT_STOPWORDS = %w[ 12 | a an and are as at be but by for if in into is it 13 | no not of on or such that the their then there these 14 | they this to was will with 15 | ].freeze 16 | 17 | FILTERABLE_TEXT_FIELDS = %i[audience 18 | content_type 19 | mime_type 20 | searchgov_custom1 21 | searchgov_custom2 22 | searchgov_custom3 23 | tags].freeze 24 | 25 | FILTERABLE_DATE_FIELDS = %i[created 26 | changed].freeze 27 | 28 | attr_reader :audience, 29 | :content_type, 30 | :date_range, 31 | :date_range_created, 32 | :excluded_sites, 33 | :ignore_tags, 34 | :thumbnail_url, 35 | :included_sites, 36 | :language, 37 | :mime_type, 38 | :searchgov_custom1, 39 | :searchgov_custom2, 40 | :searchgov_custom3, 41 | :site_filters, 42 | :tags 43 | attr_accessor :query, 44 | :search 45 | 46 | def initialize(options) 47 | @options = options 48 | @date_range = { gte: @options[:min_timestamp], lt: @options[:max_timestamp] } 49 | @date_range_created = { gte: @options[:min_timestamp_created], lt: @options[:max_timestamp_created] } 50 | @excluded_sites = [] 51 | @ignore_tags = options[:ignore_tags] 52 | @included_sites = [] 53 | @search = Search.new 54 | parse_filters 55 | parse_query(options[:query]) if options[:query] 56 | end 57 | 58 | def body 59 | search.source source_fields 60 | search.sort { by :changed, order: 'desc' } if @options[:sort_by_date] 61 | if query.present? 62 | query_options 63 | end 64 | build_search_query 65 | search.explain true if Rails.logger.debug? # scoring details 66 | search 67 | end 68 | 69 | def query_options 70 | set_highlight_options 71 | search.suggest(:suggestion, suggestion_hash) 72 | FILTERABLE_TEXT_FIELDS.each do |facet| 73 | search.aggregation(facet, aggregation_hash(facet)) 74 | end 75 | FILTERABLE_DATE_FIELDS.each do |date_facet| 76 | search.aggregation(date_facet, date_aggregation_hash(date_facet)) 77 | end 78 | end 79 | 80 | def full_text_fields 81 | @full_text_fields ||= begin 82 | %w[title description content].index_with { |field| suffixed(field) } 83 | end 84 | end 85 | 86 | def common_terms_hash 87 | { 88 | query: query, 89 | cutoff_frequency: 0.05, 90 | minimum_should_match: { low_freq: '3<90%', high_freq: '2<90%' } 91 | } 92 | end 93 | 94 | def source_fields 95 | default_fields = %w[title path created changed thumbnail_url] 96 | fields = (@options[:include] || default_fields).push('language') 97 | fields.map { |field| full_text_fields[field] || field } 98 | end 99 | 100 | def timestamp_filters_present? 101 | @options[:min_timestamp].present? or @options[:max_timestamp].present? 102 | end 103 | 104 | def created_timestamp_filters_present? 105 | @options[:min_timestamp_created].present? or @options[:max_timestamp_created].present? 106 | end 107 | 108 | def boosted_fields 109 | full_text_fields.values.map do |field| 110 | if /title/ === field 111 | "#{field}^2" 112 | elsif /description/ === field 113 | "#{field}^1.5" 114 | else 115 | field.to_s 116 | end 117 | end 118 | end 119 | 120 | def functions 121 | [ 122 | # Prefer more recent documents 123 | { 124 | gauss: { 125 | changed: { origin: 'now', scale: '1825d', offset: '30d', decay: 0.3 } 126 | } 127 | }, 128 | 129 | # Avoid pdfs, etc. 130 | { 131 | filter: { 132 | terms: { 133 | extension: %w[doc docx pdf ppt pptx xls xlsx] 134 | } 135 | }, 136 | weight: '.75' 137 | }, 138 | 139 | # Prefer documents that have been clicked more often 140 | { 141 | field_value_factor: { 142 | field: 'click_count', modifier: 'log1p', factor: 2, missing: 1 143 | } 144 | } 145 | ] 146 | end 147 | 148 | private 149 | 150 | def suffixed(field) 151 | [field, language].compact.join('_') 152 | end 153 | 154 | def parse_query(query) 155 | site_params_parser = QueryParser.new(query) 156 | @site_filters = site_params_parser.site_filters 157 | @included_sites = @site_filters[:included_sites] 158 | @excluded_sites = @site_filters[:excluded_sites] 159 | @query = site_params_parser.stripped_query 160 | end 161 | 162 | def parse_filters 163 | @audience = @options[:audience] 164 | @content_type = @options[:content_type] 165 | @language = @options[:language] || 'en' 166 | @mime_type = @options[:mime_type] 167 | @searchgov_custom1 = @options[:searchgov_custom1] 168 | @searchgov_custom2 = @options[:searchgov_custom2] 169 | @searchgov_custom3 = @options[:searchgov_custom3] 170 | @tags = @options[:tags] 171 | end 172 | 173 | def set_highlight_options 174 | highlight_fields = highlight_fields_hash 175 | search.highlight do 176 | pre_tags HIGHLIGHT_OPTIONS[:pre_tags] 177 | post_tags HIGHLIGHT_OPTIONS[:post_tags] 178 | fields highlight_fields 179 | end 180 | end 181 | 182 | def aggregation_hash(facet_field) 183 | { 184 | terms: { 185 | field: facet_field 186 | } 187 | } 188 | end 189 | 190 | def date_aggregation_hash(date_facet_field) 191 | { 192 | date_range: { 193 | field: date_facet_field, 194 | format: '8M/d/u', 195 | ranges: [ 196 | { 197 | key: 'Last Week', 198 | from: 'now-1w', 199 | to: 'now' 200 | }, 201 | { 202 | key: 'Last Month', 203 | from: 'now-1M', 204 | to: 'now' 205 | }, 206 | { 207 | key: 'Last Year', 208 | from: 'now-12M', 209 | to: 'now' 210 | } 211 | ] 212 | } 213 | } 214 | end 215 | 216 | def suggestion_hash 217 | { text: query_without_stopwords, 218 | phrase: { 219 | field: 'bigrams', 220 | size: 1, 221 | highlight: suggestion_highlight, 222 | collate: { query: { source: { multi_match: { query: '{{suggestion}}', 223 | type: 'phrase', 224 | fields: "*_#{language}" } } } } 225 | } } 226 | end 227 | 228 | def highlight_fields_hash 229 | { 230 | full_text_fields['title'] => { 231 | number_of_fragments: 0, 232 | type: 'fvh' 233 | }, 234 | full_text_fields['description'] => { 235 | fragment_size: 75, 236 | number_of_fragments: 2, 237 | type: 'fvh' 238 | }, 239 | full_text_fields['content'] => { 240 | fragment_size: 75, 241 | number_of_fragments: 2, 242 | type: 'fvh' 243 | } 244 | } 245 | end 246 | 247 | def suggestion_highlight 248 | { 249 | pre_tag: HIGHLIGHT_OPTIONS[:pre_tags].first, 250 | post_tag: HIGHLIGHT_OPTIONS[:post_tags].first 251 | } 252 | end 253 | 254 | # Temporary fix for https://github.com/elastic/elasticsearch/issues/34282 255 | def query_without_stopwords 256 | (query.downcase.split(/ +/) - DEFAULT_STOPWORDS).join(' ') 257 | end 258 | 259 | # Disabling length-related cops, as this method is intended to mimic the structure 260 | # of a complex Elasticsearch query using the Elasticsearch DSL 261 | # https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl 262 | # rubocop:disable Metrics/MethodLength, Metrics/BlockLength 263 | def build_search_query 264 | doc_query = self 265 | 266 | search.query do 267 | function_score do 268 | functions doc_query.functions 269 | 270 | query do 271 | bool do 272 | if doc_query.query.present? 273 | must do 274 | bool do 275 | # prefer bigram matches 276 | should { match bigrams: { operator: 'and', query: doc_query.query } } 277 | should { term promote: true } 278 | 279 | # prefer_word_form_matches 280 | must do 281 | bool do 282 | should do 283 | bool do 284 | must do 285 | simple_query_string do 286 | query doc_query.query 287 | fields doc_query.boosted_fields 288 | end 289 | end 290 | 291 | unless doc_query.query.match(/".*"/) 292 | must do 293 | bool do 294 | doc_query.full_text_fields.values.each do |field| 295 | should { common({ field => doc_query.common_terms_hash }) } 296 | end 297 | end 298 | end 299 | end 300 | end 301 | end 302 | 303 | should { match(audience: { operator: 'and', query: doc_query.query }) } 304 | should { match(basename: { operator: 'and', query: doc_query.query }) } 305 | should { match(searchgov_custom1: { operator: 'and', query: doc_query.query.downcase }) } 306 | should { match(searchgov_custom2: { operator: 'and', query: doc_query.query.downcase }) } 307 | should { match(searchgov_custom3: { operator: 'and', query: doc_query.query.downcase }) } 308 | should { match(tags: { operator: 'and', query: doc_query.query.downcase }) } 309 | end 310 | end 311 | end 312 | end 313 | end 314 | 315 | filter do 316 | bool do 317 | must { term language: doc_query.language } if doc_query.language.present? 318 | 319 | minimum_should_match '100%' 320 | should do 321 | bool do 322 | if doc_query.included_sites.any? 323 | minimum_should_match 1 324 | 325 | doc_query.included_sites.each do |site_filter| 326 | should do 327 | bool do 328 | must { term domain_name: site_filter.domain_name } 329 | must { term url_path: site_filter.url_path } if site_filter.url_path.present? 330 | end 331 | end 332 | end 333 | end 334 | end 335 | end 336 | 337 | FILTERABLE_TEXT_FIELDS.each do |field| 338 | next if doc_query.send(field).blank? 339 | 340 | should do 341 | bool do 342 | doc_query.send(field).each do |field_value| 343 | minimum_should_match 1 344 | should { term "#{field}": field_value.downcase } 345 | end 346 | end 347 | end 348 | end 349 | 350 | must { range changed: doc_query.date_range } if doc_query.timestamp_filters_present? 351 | must { range created: doc_query.date_range_created } if doc_query.created_timestamp_filters_present? 352 | 353 | if doc_query.ignore_tags.present? 354 | must_not do 355 | terms tags: doc_query.ignore_tags 356 | end 357 | end 358 | 359 | doc_query.excluded_sites.each do |site_filter| 360 | if site_filter.url_path.present? 361 | must_not { regexp path: { value: "https?:\/\/#{site_filter.domain_name}#{site_filter.url_path}/.*" } } 362 | else 363 | must_not { term domain_name: site_filter.domain_name } 364 | end 365 | end 366 | end 367 | end 368 | end 369 | end 370 | end 371 | end 372 | end 373 | # rubocop:enable Metrics/MethodLength, Metrics/BlockLength 374 | end 375 | -------------------------------------------------------------------------------- /app/classes/document_search.rb: -------------------------------------------------------------------------------- 1 | class DocumentSearch 2 | NO_HITS = { "hits" => { "total" => 0, "hits" => [] }} 3 | 4 | attr_reader :doc_query, :offset, :size, :indices 5 | 6 | def initialize(options) 7 | @offset = options[:offset] || 0 8 | @size = options[:size] 9 | @doc_query = DocumentQuery.new(options) 10 | @indices = options[:handles].map { |handle| DocumentRepository.index_namespace(handle) } 11 | end 12 | 13 | def search 14 | i14y_search_results = execute_client_search 15 | if i14y_search_results.total.zero? && i14y_search_results.suggestion.present? 16 | suggestion = i14y_search_results.suggestion 17 | doc_query.query = suggestion['text'] 18 | i14y_search_results = execute_client_search 19 | i14y_search_results.override_suggestion(suggestion) if i14y_search_results.results.present? 20 | end 21 | i14y_search_results 22 | rescue StandardError => error 23 | Rails.logger.error <<~ERROR_DETAILS 24 | Problem in DocumentSearch#search(): #{error} 25 | Query: #{doc_query.body.to_json} 26 | Backtrace: #{error.backtrace} 27 | ERROR_DETAILS 28 | NewRelic::Agent.notice_error(error, options: { custom_params: { indices: indices }}) 29 | DocumentSearchResults.new(NO_HITS) 30 | end 31 | 32 | private 33 | 34 | def execute_client_search 35 | params = { 36 | index: indices, 37 | body: doc_query.body, 38 | from: offset, 39 | size: size, 40 | # For compatibility with ES 6. This parameter will be removed in ES 8. 41 | # https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html#hits-total-now-object-search-response 42 | rest_total_hits_as_int: true 43 | } 44 | Rails.logger.debug "Query: *****\n#{doc_query.body.to_json}\n*****" 45 | result = ES.client.search(params) 46 | DocumentSearchResults.new(result, offset) 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /app/classes/document_search_results.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class DocumentSearchResults 4 | attr_reader :total, :offset, :results, :suggestion, :aggregations 5 | 6 | def initialize(result, offset = 0) 7 | @total = result['hits']['total'] 8 | @offset = offset 9 | @results = extract_hits(result['hits']['hits']) 10 | @suggestion = extract_suggestion(result['suggest']) 11 | @aggregations = extract_aggregations(result['aggregations']) 12 | end 13 | 14 | def override_suggestion(suggestion) 15 | @suggestion = suggestion 16 | end 17 | 18 | private 19 | 20 | def extract_suggestion(suggest) 21 | return unless suggest && total.zero? 22 | 23 | suggest['suggestion'].first['options'].first.except('score') 24 | rescue NoMethodError 25 | nil 26 | end 27 | 28 | def extract_hits(hits) 29 | hits.map do |hit| 30 | highlight = hit['highlight'] 31 | source = deserialized(hit) 32 | if highlight.present? 33 | source['title'] = highlight["title_#{source['language']}"].first if highlight["title_#{source['language']}"] 34 | %w[description content].each do |optional_field| 35 | language_field = "#{optional_field}_#{source['language']}" 36 | source[optional_field] = highlight[language_field].join('...') if highlight[language_field] 37 | end 38 | end 39 | %w[created_at created changed updated_at updated].each do |date| 40 | source[date] = Time.parse(source[date]).utc.to_s if source[date].present? 41 | end 42 | source 43 | end 44 | end 45 | 46 | def extract_aggregations(aggregations) 47 | return unless aggregations 48 | 49 | aggregations.filter_map do |field, data| 50 | if data['buckets'].present? && !data['buckets'].all? { |b| b['doc_count'].zero? } 51 | { "#{field}": extract_aggregation_rows(data['buckets']) } 52 | end 53 | end 54 | end 55 | 56 | def extract_aggregation_rows(rows) 57 | rows.filter_map do |term_hash| 58 | next if term_hash['doc_count'].zero? 59 | 60 | { agg_key: term_hash['key'], 61 | doc_count: term_hash['doc_count'], 62 | to: term_hash['to'] || nil, 63 | from: term_hash['from'] || nil, 64 | to_as_string: term_hash['to_as_string'] || nil, 65 | from_as_string: term_hash['from_as_string'] || nil }.compact 66 | end 67 | end 68 | 69 | def deserialized(hit) 70 | Serde.deserialize_hash(ActiveSupport::HashWithIndifferentAccess.new(hit['_source']), 71 | hit['_source']['language']) 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /app/classes/query_parser.rb: -------------------------------------------------------------------------------- 1 | class QueryParser 2 | SiteFilter = Struct.new(:domain_name, :url_path) 3 | attr_reader :site_filters, :query, :stripped_query 4 | 5 | def initialize(query) 6 | @query = query 7 | @site_filters = extract_site_filters 8 | end 9 | 10 | private 11 | def extract_site_filters 12 | site_filters = { included_sites: [], excluded_sites: [] } 13 | @stripped_query = @query.gsub(/\(?(-?site:\S+)\b\/?\)?/i) do 14 | match = $1 15 | if match.first == '-' 16 | site_filters[:excluded_sites] << extract_site_filter(match) 17 | else 18 | site_filters[:included_sites] << extract_site_filter(match) 19 | end 20 | nil 21 | end.squish 22 | 23 | site_filters 24 | end 25 | 26 | def extract_site_filter(site_param) 27 | domain_name, url_path = site_param.split('/', 2) 28 | domain_name.sub!(/\A-?site:/i, '') 29 | url_path = url_path.present? ? "/#{url_path}" : nil 30 | SiteFilter.new domain_name, url_path 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /app/controllers/api/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Api 4 | class Base < Grape::API 5 | rescue_from ReadOnlyAccessControl::DisallowedUpdate do 6 | message = 'The i14y API is currently in read-only mode.' 7 | message += " #{I14y::Application.config.maintenance_message}" if I14y::Application.config.maintenance_message 8 | rack_response({ developer_message: message, status: 503 }.to_json, 503) 9 | end 10 | 11 | rescue_from Elasticsearch::Persistence::Repository::DocumentNotFound, 12 | Elasticsearch::Transport::Transport::Errors::NotFound do |_e| 13 | rack_response( 14 | { developer_message: 'Resource could not be found.', status: 400 }.to_json, 15 | 400 16 | ) 17 | end 18 | 19 | rescue_from :all do |e| 20 | Rails.logger.error "#{e}\n\n#{e.backtrace.join("\n")}" 21 | 22 | rack_response({ developer_message: "Something unexpected happened and we've been alerted.", status: 500 }.to_json, 500) 23 | end 24 | 25 | mount Api::V1::Base 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /app/controllers/api/v1/base.rb: -------------------------------------------------------------------------------- 1 | module Api 2 | module V1 3 | class Base < Grape::API 4 | mount Api::V1::Documents 5 | mount Api::V1::Collections 6 | end 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /app/controllers/api/v1/collections.rb: -------------------------------------------------------------------------------- 1 | module Api 2 | module V1 3 | class Collections < Grape::API 4 | ADMIN_USER = ENV['I14Y_ADMIN_USER'] 5 | ADMIN_PASSWORD = ENV['I14Y_ADMIN_PASSWORD'] 6 | 7 | prefix 'api' 8 | version 'v1' 9 | default_format :json 10 | format :json 11 | rescue_from Grape::Exceptions::ValidationErrors do |e| 12 | rack_response({ developer_message: e.message, status: 400 }.to_json, 400) 13 | end 14 | 15 | http_basic do |admin_user, admin_password| 16 | error_hash = { developer_message: 'Unauthorized', status: 400 } 17 | error!(error_hash, 400) unless auth?(admin_user, admin_password) 18 | true 19 | end 20 | 21 | helpers ReadOnlyAccessControl 22 | 23 | helpers do 24 | def ok(user_message) 25 | { status: 200, developer_message: 'OK', user_message: user_message } 26 | end 27 | 28 | def auth?(admin_user, admin_password) 29 | admin_user == ADMIN_USER && admin_password == ADMIN_PASSWORD 30 | end 31 | end 32 | 33 | resource :collections do 34 | desc 'Create a collection' 35 | params do 36 | requires :handle, 37 | allow_blank: false, 38 | type: String, 39 | regexp: /^[a-z0-9._]+$/, 40 | desc: 'Immutable name of the logical index used when authenticating Document API calls' 41 | requires :token, 42 | type: String, 43 | allow_blank: false, 44 | desc: 'Token to be used when authenticating Document API calls' 45 | end 46 | post do 47 | check_updates_allowed 48 | handle = params[:handle] 49 | collection = Collection.new(id: handle, token: params[:token]) 50 | error!(collection.errors.messages, 400) unless collection.valid? 51 | ES.collection_repository.save(collection) 52 | documents_index_name = [DocumentRepository.index_namespace(handle), 'v1'].join('-') 53 | DocumentRepository.new.create_index!( 54 | index: documents_index_name, 55 | include_type_name: true 56 | ) 57 | ES.client.indices.put_alias( 58 | index: documents_index_name, 59 | name: DocumentRepository.index_namespace(handle) 60 | ) 61 | ok('Your collection was successfully created.') 62 | end 63 | 64 | desc 'Delete a collection' 65 | delete ':handle' do 66 | check_updates_allowed 67 | handle = params.delete(:handle) 68 | collection = ES.collection_repository.find(handle) 69 | error!(collection.errors.messages, 400) unless ES.collection_repository.delete(handle) 70 | ES.client.indices.delete( 71 | index: [DocumentRepository.index_namespace(handle), '*'].join('-') 72 | ) 73 | ok('Your collection was successfully deleted.') 74 | end 75 | 76 | desc 'Search for documents in collections' 77 | params do 78 | requires :handles, 79 | allow_blank: false, 80 | type: String, 81 | desc: 'Restrict results to this comma-separated list of document collections' 82 | optional :language, 83 | type: Symbol, 84 | values: SUPPORTED_LOCALES, 85 | allow_blank: false, 86 | desc: 'Restrict results to documents in a particular language' 87 | optional :query, 88 | allow_blank: true, 89 | type: String, 90 | desc: 'Search term. See documentation on supported query syntax.' 91 | optional :size, 92 | allow_blank: false, 93 | type: Integer, 94 | default: 20, 95 | values: 1..1000, 96 | desc: 'Number of results to return' 97 | optional :offset, 98 | allow_blank: false, 99 | type: Integer, 100 | default: 0, 101 | desc: 'Offset of results' 102 | optional :min_timestamp, 103 | type: DateTime, 104 | allow_blank: false, 105 | desc: 'Return documents that were changed at or after this time', 106 | documentation: { example: '2013-02-27T10:00:00Z' } 107 | optional :max_timestamp, 108 | type: DateTime, 109 | allow_blank: false, 110 | desc: 'Return documents that were changed before this time', 111 | documentation: { example: '2013-02-27T10:01:00Z' } 112 | optional :min_timestamp_created, 113 | type: DateTime, 114 | allow_blank: false, 115 | desc: 'Return documents that were created at or after this time', 116 | documentation: { example: '2013-02-27T10:00:00Z' } 117 | optional :max_timestamp_created, 118 | type: DateTime, 119 | allow_blank: false, 120 | desc: 'Return documents that were created before this time', 121 | documentation: { example: '2013-02-27T10:01:00Z' } 122 | optional :sort_by_date, 123 | type: Boolean, 124 | desc: 'Whether to order documents by created date instead of relevance' 125 | optional :searchgov_custom1, 126 | type: String, 127 | allow_blank: false, 128 | desc: 'Comma-separated list of custom content' 129 | optional :searchgov_custom2, 130 | type: String, 131 | allow_blank: false, 132 | desc: 'Comma-separated list of custom content' 133 | optional :searchgov_custom3, 134 | type: String, 135 | allow_blank: false, 136 | desc: 'Comma-separated list of custom content' 137 | optional :tags, 138 | type: String, 139 | allow_blank: false, 140 | desc: 'Comma-separated list of category tags' 141 | optional :ignore_tags, 142 | type: String, 143 | allow_blank: false, 144 | desc: 'Comma-separated list of category tags to exclude' 145 | optional :include, 146 | type: String, 147 | allow_blank: false, 148 | desc: 'Comma-separated list of fields to include in results', 149 | documentation: { example: 'title,path,description,content,updated_at' } 150 | end 151 | get :search do 152 | handles = params.delete(:handles).split(',') 153 | valid_collections = ES.collection_repository.find(handles).compact 154 | error!('Could not find all the specified collection handles', 400) unless valid_collections.size == handles.size 155 | arr_params = %i[include ignore_tags] << DocumentQuery::FILTERABLE_TEXT_FIELDS 156 | arr_params.flatten.compact.each do |key| 157 | params[key] = params[key].extract_array if params[key].present? 158 | end 159 | document_search = DocumentSearch.new(params.merge(handles: valid_collections.collect(&:id))) 160 | document_search_results = document_search.search 161 | metadata_hash = { total: document_search_results.total, 162 | offset: document_search_results.offset, 163 | suggestion: document_search_results.suggestion, 164 | aggregations: document_search_results.aggregations } 165 | { status: 200, developer_message: 'OK', metadata: metadata_hash, results: document_search_results.results } 166 | end 167 | 168 | desc 'Get collection info and stats' 169 | get ':handle' do 170 | handle = params.delete(:handle) 171 | collection = ES.collection_repository.find(handle) 172 | { status: 200, developer_message: 'OK' }.merge(collection.as_json(root: true, methods: [:document_total, :last_document_sent])) 173 | end 174 | end 175 | end 176 | end 177 | end 178 | -------------------------------------------------------------------------------- /app/controllers/api/v1/documents.rb: -------------------------------------------------------------------------------- 1 | module Api 2 | module V1 3 | class Documents < Grape::API 4 | prefix 'api' 5 | version 'v1' 6 | default_format :json 7 | format :json 8 | 9 | # Eventually, the validation logic should all be moved to the model classes, 10 | # and the validation itself should happen during serialization: 11 | # https://www.elastic.co/blog/activerecord-to-repository-changing-persistence-patterns-with-the-elasticsearch-rails-gem 12 | rescue_from Grape::Exceptions::ValidationErrors do |e| 13 | rack_response({ developer_message: e.message, status: 400 }.to_json, 400) 14 | end 15 | rescue_from Elasticsearch::Transport::Transport::Errors::Conflict do |_e| 16 | rack_response({ developer_message: 'Document already exists with that ID', status: 422 }.to_json, 422) 17 | end 18 | 19 | http_basic do |collection_handle, token| 20 | error_hash = { developer_message: 'Unauthorized', status: 400 } 21 | error!(error_hash, 400) unless auth?(collection_handle, token) 22 | @collection_handle = collection_handle 23 | true 24 | end 25 | 26 | helpers ReadOnlyAccessControl 27 | 28 | helpers do 29 | def ok(user_message) 30 | { status: 200, developer_message: 'OK', user_message: user_message } 31 | end 32 | 33 | def auth?(collection_handle, token) 34 | ES.collection_repository.find(collection_handle).token == token 35 | rescue Elasticsearch::Persistence::Repository::DocumentNotFound, Elasticsearch::Transport::Transport::Errors::BadRequest 36 | false 37 | end 38 | 39 | def document_repository 40 | index_name = DocumentRepository.index_namespace(@collection_handle) 41 | DocumentRepository.new(index_name: index_name) 42 | end 43 | end 44 | 45 | before do 46 | check_updates_allowed 47 | end 48 | 49 | resource :documents do 50 | desc 'Create a document' 51 | params do 52 | requires :document_id, 53 | allow_blank: false, 54 | type: String, 55 | regexp: { value: %r{^[^/]+$}, message: "cannot contain any of the following characters: ['/']" }, 56 | max_bytes: 512, 57 | desc: 'User-assigned document ID' 58 | requires :title, 59 | type: String, 60 | allow_blank: false, 61 | desc: 'Document title' 62 | requires :path, 63 | type: String, 64 | allow_blank: false, 65 | regexp: %r{^https?://[^\s/$.?#].[^\s]*$}, 66 | desc: 'Document link URL' 67 | optional :audience, 68 | type: String, 69 | allow_blank: false, 70 | desc: 'Document audience' 71 | optional :changed, 72 | type: DateTime, 73 | allow_blank: false, 74 | desc: 'When document was modified', 75 | documentation: { example: '2013-02-27T10:00:01Z' } 76 | optional :content, 77 | type: String, 78 | allow_blank: false, 79 | desc: 'Document content/body' 80 | optional :content_type, 81 | type: String, 82 | allow_blank: false, 83 | desc: 'Document content type' 84 | optional :created, 85 | type: DateTime, 86 | allow_blank: true, 87 | desc: 'When document was initially created', 88 | documentation: { example: '2013-02-27T10:00:00Z' } 89 | optional :description, 90 | type: String, 91 | allow_blank: false, 92 | desc: 'Document description' 93 | optional :thumbnail_url, 94 | type: String, 95 | allow_blank: false, 96 | desc: 'Document thumbnail_url' 97 | optional :language, 98 | type: Symbol, 99 | values: SUPPORTED_LOCALES, 100 | default: :en, 101 | allow_blank: false, 102 | desc: 'Two-letter locale describing language of document (defaults to :en)' 103 | optional :mime_type, 104 | type: String, 105 | allow_blank: false, 106 | desc: 'Document MIME type' 107 | optional :promote, 108 | type: Boolean, 109 | desc: 'Whether to promote the document in the relevance ranking' 110 | optional :searchgov_custom1, 111 | type: String, 112 | allow_blank: false, 113 | desc: 'Document custom field 1' 114 | optional :searchgov_custom2, 115 | type: String, 116 | allow_blank: false, 117 | desc: 'Document custom field 2' 118 | optional :searchgov_custom3, 119 | type: String, 120 | allow_blank: false, 121 | desc: 'Document custom field 3' 122 | optional :tags, 123 | type: String, 124 | allow_blank: false, 125 | desc: 'Comma-separated list of category tags' 126 | end 127 | 128 | post do 129 | id = params.delete(:document_id) 130 | document = Document.new(params.merge(id: id)) 131 | if document.invalid? 132 | error!({ developer_message: document.errors.full_messages.join(', '), status: 400 }, 400) 133 | end 134 | document_repository.save(document, op_type: :create) 135 | ok('Your document was successfully created.') 136 | end 137 | 138 | desc 'Update a document' 139 | params do 140 | optional :title, 141 | type: String, 142 | allow_blank: false, 143 | desc: 'Document title' 144 | optional :path, 145 | type: String, 146 | allow_blank: false, 147 | regexp: %r{^https?://[^\s/$.?#].[^\s]*$}, 148 | desc: 'Document link URL' 149 | optional :audience, 150 | type: String, 151 | allow_blank: false, 152 | desc: 'Document audience' 153 | optional :changed, 154 | type: DateTime, 155 | allow_blank: false, 156 | desc: 'When document was modified', 157 | documentation: { example: '2013-02-27T10:00:01Z' } 158 | optional :click_count, 159 | type: Integer, 160 | allow_blank: false, 161 | desc: 'Count of clicks' 162 | optional :content, 163 | type: String, 164 | allow_blank: false, 165 | desc: 'Document content/body' 166 | optional :content_type, 167 | type: String, 168 | allow_blank: false, 169 | desc: 'Document content type' 170 | optional :created, 171 | type: DateTime, 172 | allow_blank: true, 173 | desc: 'When document was initially created', 174 | documentation: { example: '2013-02-27T10:00:00Z' } 175 | optional :description, 176 | type: String, 177 | allow_blank: false, 178 | desc: 'Document description' 179 | optional :thumbnail_url, 180 | type: String, 181 | allow_blank: false, 182 | desc: 'Document thumbnail_url' 183 | optional :language, 184 | type: Symbol, 185 | values: SUPPORTED_LOCALES, 186 | allow_blank: false, 187 | desc: 'Two-letter locale describing language of document' 188 | optional :mime_type, 189 | type: String, 190 | allow_blank: false, 191 | desc: 'Document MIME type' 192 | optional :promote, 193 | type: Boolean, 194 | desc: 'Whether to promote the document in the relevance ranking' 195 | optional :searchgov_custom1, 196 | type: String, 197 | allow_blank: false, 198 | desc: 'Document custom field 1' 199 | optional :searchgov_custom2, 200 | type: String, 201 | allow_blank: false, 202 | desc: 'Document custom field 2' 203 | optional :searchgov_custom3, 204 | type: String, 205 | allow_blank: false, 206 | desc: 'Document custom field 3' 207 | optional :tags, 208 | type: String, 209 | allow_blank: false, 210 | desc: 'Comma-separated list of category tags' 211 | 212 | at_least_one_of :audience, 213 | :changed, 214 | :click_count, 215 | :content, 216 | :content_type, 217 | :created, 218 | :description, 219 | :document_id, 220 | :handle, 221 | :thumbnail_url, 222 | :language, 223 | :mime_type, 224 | :path, 225 | :promote, 226 | :searchgov_custom1, 227 | :searchgov_custom2, 228 | :searchgov_custom3, 229 | :tags, 230 | :title 231 | end 232 | 233 | put ':document_id', requirements: { document_id: /.*/ } do 234 | id = params.delete(:document_id) 235 | # SRCH-5096 Ensure that existing attributes are not overwritten on put or else the weekly 236 | # searchgov ClickMonitorJob and (infrequent) `searchgov:promote` task will delete metadata 237 | document = document_repository.find(id, _source: %w[audience 238 | changed 239 | content_type 240 | created 241 | created_at 242 | language 243 | mime_type 244 | path 245 | searchgov_custom1 246 | searchgov_custom2 247 | searchgov_custom3 248 | tags]) 249 | document.attributes = document.attributes.merge(params) 250 | if document.invalid? 251 | error!({ developer_message: document.errors.full_messages.join(', '), status: 400 }, 400) 252 | end 253 | document_repository.update(document) 254 | ok('Your document was successfully updated.') 255 | end 256 | 257 | desc 'Delete a document' 258 | delete ':document_id', requirements: { document_id: /.*/ } do 259 | id = params[:document_id] 260 | error!(document.errors.messages, 400) unless document_repository.delete(id) 261 | ok('Your document was successfully deleted.') 262 | end 263 | end 264 | end 265 | end 266 | end 267 | -------------------------------------------------------------------------------- /app/models/collection.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class Collection 4 | include ActiveModel::Serializers::JSON 5 | include ActiveModel::Validations 6 | include Virtus.model 7 | 8 | attribute :id, String 9 | attribute :token, String 10 | attribute :created_at, Time, default: proc { Time.now.utc } 11 | attribute :updated_at, Time, default: proc { Time.now.utc } 12 | 13 | validates :token, presence: true 14 | 15 | def document_total 16 | document_repository.count 17 | end 18 | 19 | def last_document_sent 20 | document_repository.search("*:*", {size:1, sort: "updated_at:desc"}). 21 | results.first.updated_at.utc.to_s 22 | rescue 23 | nil 24 | end 25 | 26 | private 27 | 28 | def document_repository 29 | @document_repository = DocumentRepository.new( 30 | index_name: DocumentRepository.index_namespace(id) 31 | ) 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /app/models/document.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'mini_mime' 4 | 5 | class Document 6 | include Virtus.model 7 | include ActiveModel::Validations 8 | 9 | attribute :audience, String 10 | attribute :changed, DateTime, default: ->(doc, _attr) { doc.created } 11 | attribute :click_count, Integer 12 | attribute :content, String 13 | attribute :content_type, String 14 | attribute :created_at, Time, default: proc { Time.now.utc } 15 | attribute :created, DateTime 16 | attribute :description, String 17 | attribute :id, String 18 | attribute :thumbnail_url, String 19 | attribute :language, String, mapping: { type: 'keyword' } 20 | attribute :mime_type, String 21 | attribute :path, String, mapping: { type: 'keyword' } 22 | attribute :promote, Boolean 23 | attribute :searchgov_custom1, String 24 | attribute :searchgov_custom2, String 25 | attribute :searchgov_custom3, String 26 | attribute :tags, String, mapping: { type: 'keyword' } 27 | attribute :title, String 28 | attribute :updated_at, Time, default: proc { Time.now.utc } 29 | attribute :updated, DateTime 30 | 31 | validates :thumbnail_url, format: { with: URI::DEFAULT_PARSER.make_regexp }, allow_blank: true 32 | validates :language, presence: true 33 | validates :path, presence: true 34 | 35 | validate :mime_type_is_valid 36 | 37 | private 38 | 39 | def mime_type_is_valid 40 | return unless mime_type 41 | 42 | errors.add(:mime_type, 'is invalid') unless MiniMime.lookup_by_content_type(mime_type) 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /app/repositories/collection_repository.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class CollectionRepository 4 | include Repository 5 | 6 | klass Collection 7 | client ES.client 8 | index_name index_namespace 9 | settings number_of_shards: 1, number_of_replicas: 1 10 | 11 | def deserialize(hash) 12 | klass.new(source_hash(hash)) 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /app/repositories/concerns/repository.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'active_support/concern' 4 | 5 | module Repository 6 | extend ActiveSupport::Concern 7 | 8 | included do 9 | include Elasticsearch::Persistence::Repository 10 | include Elasticsearch::Persistence::Repository::DSL 11 | 12 | extend NamespacedIndex 13 | 14 | client ES.client 15 | settings number_of_shards: 1, number_of_replicas: 1 16 | end 17 | 18 | def source_hash(hash) 19 | hash['_source'].merge(id: hash['_id']) 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /app/repositories/document_repository.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class DocumentRepository 4 | include Repository 5 | 6 | klass Document 7 | 8 | def serialize(document) 9 | document_hash = ActiveSupport::HashWithIndifferentAccess.new(super) 10 | Serde.serialize_hash(document_hash, document_hash[:language]) 11 | end 12 | 13 | def deserialize(hash) 14 | doc_hash = source_hash(hash) 15 | deserialized_hash = Serde.deserialize_hash(doc_hash, 16 | doc_hash['language']) 17 | klass.new deserialized_hash 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /app/templates/collections.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class Collections 4 | include Templatable 5 | 6 | def body 7 | Jbuilder.encode do |json| 8 | json.index_patterns "*-#{I14y::APP_NAME}-collections-*" 9 | json.mappings do 10 | dynamic_templates(json) 11 | end 12 | end 13 | end 14 | 15 | def dynamic_templates(json) 16 | json.dynamic_templates do 17 | string_fields_template(json, "keyword") 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /app/templates/documents.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class Documents 4 | include Templatable 5 | LIGHT_STEMMERS = { 6 | de: 'german', 7 | es: 'spanish', 8 | fr: 'french', 9 | it: 'italian', 10 | pt: 'portuguese' 11 | }.freeze 12 | STANDARD_STEMMERS = { 13 | bn: 'bengali', 14 | en: 'english', 15 | fi: 'finnish', 16 | hi: 'hindi', 17 | hu: 'hungarian', 18 | ru: 'russian', 19 | sv: 'swedish' 20 | }.freeze 21 | 22 | def initialize 23 | @synonym_filter_locales = Set.new 24 | @protected_filter_locales = Set.new 25 | end 26 | 27 | def body 28 | Jbuilder.encode do |json| 29 | json.index_patterns("*-#{I14y::APP_NAME}-documents-*") 30 | json.settings do 31 | json.analysis do 32 | char_filter(json) 33 | filter(json) 34 | analyzer(json) 35 | tokenizer(json) 36 | end 37 | end 38 | json.mappings do 39 | dynamic_templates(json) 40 | properties(json) 41 | end 42 | end 43 | end 44 | 45 | def char_filter(json) 46 | json.char_filter do 47 | json.quotes do 48 | json.type('mapping') 49 | json.mappings(['\\u0091=>\\u0027', '\\u0092=>\\u0027', '\\u2018=>\\u0027', '\\u2019=>\\u0027', '\\u201B=>\\u0027']) 50 | end 51 | end 52 | end 53 | 54 | def filter(json) 55 | json.filter do 56 | json.bigrams_filter do 57 | json.type('shingle') 58 | end 59 | language_synonyms(json) 60 | language_protwords(json) 61 | language_stemmers(json) 62 | end 63 | end 64 | 65 | def analyzer(json) 66 | json.analyzer do 67 | generic_analyzers(json) 68 | french_analyzer(json) 69 | japanese_analyzer(json) 70 | korean_analyzer(json) 71 | chinese_analyzer(json) 72 | bigrams_analyzer(json) 73 | url_path_analyzer(json) 74 | domain_name_analyzer(json) 75 | default_analyzer(json) 76 | end 77 | end 78 | 79 | def default_analyzer(json) 80 | json.default do 81 | json.type('custom') 82 | json.filter(%w[icu_normalizer icu_folding]) 83 | json.tokenizer('icu_tokenizer') 84 | json.char_filter(%w[html_strip quotes]) 85 | end 86 | end 87 | 88 | def domain_name_analyzer(json) 89 | json.domain_name_analyzer do 90 | json.type('custom') 91 | json.filter('lowercase') 92 | json.tokenizer('domain_name_tokenizer') 93 | end 94 | end 95 | 96 | def url_path_analyzer(json) 97 | json.url_path_analyzer do 98 | json.type('custom') 99 | json.filter('lowercase') 100 | json.tokenizer('url_path_tokenizer') 101 | end 102 | end 103 | 104 | def bigrams_analyzer(json) 105 | json.bigrams_analyzer do 106 | json.type('custom') 107 | json.filter(%w[icu_normalizer icu_folding bigrams_filter]) 108 | json.tokenizer('icu_tokenizer') 109 | json.char_filter(%w[html_strip quotes]) 110 | end 111 | end 112 | 113 | def generic_analyzers(json) 114 | GENERIC_ANALYZER_LOCALES.each do |locale| 115 | generic_analyzer(json, locale) 116 | end 117 | end 118 | 119 | def chinese_analyzer(json) 120 | json.zh_analyzer do 121 | json.type('custom') 122 | json.filter(%w[smartcn_word icu_normalizer icu_folding]) 123 | json.tokenizer('smartcn_sentence') 124 | json.char_filter(['html_strip']) 125 | end 126 | end 127 | 128 | def korean_analyzer(json) 129 | json.ko_analyzer do 130 | json.type('cjk') 131 | json.filter([]) 132 | end 133 | end 134 | 135 | def japanese_analyzer(json) 136 | json.ja_analyzer do 137 | json.type('custom') 138 | json.filter(%w[kuromoji_baseform ja_pos_filter icu_normalizer icu_folding cjk_width]) 139 | json.tokenizer('kuromoji_tokenizer') 140 | json.char_filter(['html_strip']) 141 | end 142 | end 143 | 144 | def french_analyzer(json) 145 | json.fr_analyzer do 146 | json.type('custom') 147 | json.filter(%w[icu_normalizer elision fr_stem_filter icu_folding]) 148 | json.tokenizer('icu_tokenizer') 149 | json.char_filter(%w[html_strip quotes]) 150 | end 151 | end 152 | 153 | def tokenizer(json) 154 | json.tokenizer do 155 | json.kuromoji do 156 | json.type('kuromoji_tokenizer') 157 | json.mode('search') 158 | json.char_filter(['html_strip']) 159 | end 160 | json.url_path_tokenizer do 161 | json.type('PathHierarchy') 162 | end 163 | json.domain_name_tokenizer do 164 | json.type('PathHierarchy') 165 | json.delimiter('.') 166 | json.reverse(true) 167 | end 168 | end 169 | end 170 | 171 | def filter_array(locale) 172 | array = ['icu_normalizer'] 173 | array << "#{locale}_protected_filter" if @protected_filter_locales.include?(locale) 174 | array << "#{locale}_stem_filter" 175 | array << "#{locale}_synonym" if @synonym_filter_locales.include?(locale) 176 | array << 'icu_folding' 177 | array 178 | end 179 | 180 | def properties(json) 181 | json.properties do 182 | %w[updated created changed].each { |field| date(json, field) } 183 | %w[audience content_type document_id extension thumbnail_url language mime_type path 184 | searchgov_custom1 searchgov_custom2 searchgov_custom3 tags].each { |field| keyword(json, field) } 185 | basename(json) 186 | url_path(json) 187 | domain_name(json) 188 | promote(json) 189 | bigrams(json) 190 | click_count(json) 191 | end 192 | end 193 | 194 | def basename(json) 195 | json.basename do 196 | json.type('text') 197 | end 198 | end 199 | 200 | def bigrams(json) 201 | json.bigrams do 202 | json.analyzer('bigrams_analyzer') 203 | json.type('text') 204 | end 205 | end 206 | 207 | def promote(json) 208 | json.promote do 209 | json.type('boolean') 210 | end 211 | end 212 | 213 | def domain_name(json) 214 | json.domain_name do 215 | json.type('text') 216 | json.analyzer('domain_name_analyzer') 217 | end 218 | end 219 | 220 | def url_path(json) 221 | json.url_path do 222 | json.type('text') 223 | json.analyzer('url_path_analyzer') 224 | end 225 | end 226 | 227 | def click_count(json) 228 | json.click_count do 229 | json.type('integer') 230 | end 231 | end 232 | 233 | def dynamic_templates(json) 234 | json.dynamic_templates do 235 | language_templates(json) 236 | string_fields_template(json, 'text') 237 | end 238 | end 239 | 240 | def language_stemmers(json) 241 | light_stemmers(json) 242 | standard_stemmers(json) 243 | japanese_position_filter(json) 244 | end 245 | 246 | def japanese_position_filter(json) 247 | json.ja_pos_filter do 248 | json.type('kuromoji_part_of_speech') 249 | json.stoptags(['\\u52a9\\u8a5e-\\u683c\\u52a9\\u8a5e-\\u4e00\\u822c', '\\u52a9\\u8a5e-\\u7d42\\u52a9\\u8a5e']) 250 | end 251 | end 252 | 253 | def light_stemmers(json) 254 | LIGHT_STEMMERS.each do |locale, language| 255 | generic_stemmer(json, locale, language, 'light') 256 | end 257 | end 258 | 259 | def standard_stemmers(json) 260 | STANDARD_STEMMERS.each do |locale, language| 261 | generic_stemmer(json, locale, language, 'standard') 262 | end 263 | end 264 | 265 | def language_templates(json) 266 | LANGUAGE_ANALYZER_LOCALES.each do |locale| 267 | json.child! do 268 | json.set!(locale) do 269 | json.match("*_#{locale}") 270 | json.match_mapping_type('string') 271 | json.mapping do 272 | json.analyzer("#{locale}_analyzer") 273 | json.type('text') 274 | json.term_vector('with_positions_offsets') 275 | json.copy_to('bigrams') 276 | end 277 | end 278 | end 279 | end 280 | end 281 | 282 | def language_synonyms(json) 283 | parse_configuration_file(json, 'synonyms') 284 | end 285 | 286 | def language_protwords(json) 287 | parse_configuration_file(json, 'protwords') 288 | end 289 | 290 | def synonyms_filter(json, locale, lines) 291 | @synonym_filter_locales.add(locale) 292 | linguistic_filter(json, locale, lines, 'synonym', 'synonyms', 'synonym') 293 | end 294 | 295 | def protwords_filter(json, locale, lines) 296 | @protected_filter_locales.add(locale) 297 | linguistic_filter(json, locale, lines, 'protected_filter', 'keywords', 'keyword_marker') 298 | end 299 | end 300 | -------------------------------------------------------------------------------- /appspec.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 0.0 3 | os: linux 4 | # files: 5 | # - source: / 6 | # destination: /home/search/cicd_temp 7 | 8 | hooks: 9 | 10 | BeforeInstall: 11 | - location: cicd-scripts/fetch_env_vars.sh 12 | timeout: 300 13 | runas: search 14 | 15 | 16 | -------------------------------------------------------------------------------- /bin/bundle: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__) 3 | load Gem.bin_path('bundler', 'bundle') 4 | -------------------------------------------------------------------------------- /bin/rails: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | APP_PATH = File.expand_path('../config/application', __dir__) 3 | require_relative '../config/boot' 4 | require 'rails/commands' 5 | -------------------------------------------------------------------------------- /bin/rake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require_relative "../config/boot" 3 | require "rake" 4 | Rake.application.run 5 | -------------------------------------------------------------------------------- /bin/secure_docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Based on Search.gov GSA Container Security Benchmark https://docs.google.com/spreadsheets/d/1_UeKZHJGF8ZfoCSnDCux5lx1fUKSFVlYE6MmmVdeS-U/edit#gid=594625648 4 | 5 | # There is no need for the container to mount volumes or devices with fstab. Removing default items reduces the attack surface. 6 | rm -f /etc/fstab 7 | 8 | # Be informative after successful login. 9 | echo "echo -e '************WARNING************'" >> /home/rails/.bashrc 10 | echo "echo -e 'This is a U.S. General Services Administration Federal Government computer system that is FOR OFFICIAL USE ONLY. This system is subject to monitoring. Therefore, no expectation of privacy is to be assumed. Individuals found performing unauthorized activities may be subject to disciplinary action including criminal prosecution.\n'" >> /home/rails/.bashrc 11 | 12 | # Remove kernel tunable items since they are not needed. 13 | rm -fr /etc/sysctl* /etc/modprobe.d /etc/modules /etc/mdev.conf /etc/acpi 14 | 15 | # Remove suid & sgid files to enforce simple permission sets. 16 | find /bin /etc /lib /sbin /usr -xdev -type f -a \( -perm /4000 -o -perm /2000 \) -delete 17 | 18 | # Check for calls out of the dockerfile to download software externally 19 | apt-get remove -y --auto-remove curl 20 | 21 | # Remove any broken symlinks, if any. 22 | find /bin /etc /lib /sbin /usr -xdev -type l -exec test ! -e {} \; -delete 23 | 24 | # Remove existing crontabs 25 | rm -rf /etc/cron.d /etc/cron.daily 26 | 27 | # Remove init scripts since we do not use them. 28 | rm -fr /etc/init.d /lib/rc /etc/conf.d /etc/inittab /etc/runlevels /etc/rc.conf /etc/logrotate.d 29 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require "fileutils" 3 | 4 | # path to your application root. 5 | APP_ROOT = File.expand_path("..", __dir__) 6 | 7 | def system!(*args) 8 | system(*args, exception: true) 9 | end 10 | 11 | FileUtils.chdir APP_ROOT do 12 | # This script is a way to set up or update your development environment automatically. 13 | # This script is idempotent, so that you can run it at any time and get an expectable outcome. 14 | # Add necessary setup steps to this file. 15 | 16 | puts '== Installing dependencies ==' 17 | system! 'gem install bundler --conservative' 18 | system('bundle check') || system!('bundle install') 19 | 20 | puts "\n== Removing old logs and tempfiles ==" 21 | system! 'bin/rails log:clear tmp:clear' 22 | 23 | puts "\n== Restarting application server ==" 24 | system! 'bin/rails restart' 25 | end 26 | -------------------------------------------------------------------------------- /bin/update: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'fileutils' 3 | include FileUtils 4 | 5 | # path to your application root. 6 | APP_ROOT = File.expand_path('..', __dir__) 7 | 8 | def system!(*args) 9 | system(*args) || abort("\n== Command #{args} failed ==") 10 | end 11 | 12 | chdir APP_ROOT do 13 | # This script is a way to update your development environment automatically. 14 | # Add necessary update steps to this file. 15 | 16 | puts '== Installing dependencies ==' 17 | system! 'gem install bundler --conservative' 18 | system('bundle check') || system!('bundle install') 19 | 20 | puts "\n== Removing old logs and tempfiles ==" 21 | system! 'bin/rails log:clear tmp:clear' 22 | 23 | puts "\n== Restarting application server ==" 24 | system! 'bin/rails restart' 25 | end 26 | -------------------------------------------------------------------------------- /buildspec_i14y.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 0.2 3 | env: 4 | parameter-store: 5 | API_SERVER_ADDRESSES: "DEPLOY_I14Y_SERVER_ADDRESS" 6 | DEPLOYMENT_PATH: "DEPLOY_I14Y_DEPLOYMENT_PATH" 7 | I14Y_THREADS: "I14Y_THREADS" 8 | I14Y_WORKERS: "I14Y_WORKERS" 9 | SERVER_DEPLOYMENT_USER: "DEPLOY_SERVER_DEPLOYMENT_USER" 10 | # SSH_KEY_PATH: "DEPLOY_SSH_KEY_PATH" - defined below 11 | 12 | # shared deployment variables with subsequent stages - might not to export as this is the final stage 13 | exported-variables: 14 | - DEPLOYMENT_PATH 15 | - I14Y_THREADS 16 | - I14Y_WORKERS 17 | - SERVER_ADDRESS 18 | - SERVER_DEPLOYMENT_USER 19 | - SSH_KEY_PATH 20 | 21 | phases: 22 | install: 23 | runtime-versions: 24 | python: 3.x 25 | commands: 26 | - export PATH="$HOME/.rbenv/bin:$PATH" 27 | - eval "$(rbenv init -)" 28 | 29 | pre_build: 30 | commands: 31 | - aws secretsmanager get-secret-value --secret-id $SEARCH_SECRETSMANAGER_KEY_SECRET_NAME --region $SEARCH_AWS_REGION --query 'SecretString' --output text > $SEARCH_ENV_EC2_KEY 32 | build: 33 | commands: 34 | - CURRENT_LOCATION=$(pwd) # would look something like this - /codebuild/output/src559980389/src - a temp dir created by codebuild 35 | - SSH_KEY_PATH="${CURRENT_LOCATION}/${SEARCH_ENV_EC2_KEY}" 36 | - echo $SSH_KEY_PATH 37 | - echo "deploying i14y app with capistrano" 38 | - bundle install 39 | - cap $SEARCH_ENV puma:config puma:systemd:config puma:systemd:enable 40 | - cap $SEARCH_ENV deploy 41 | - cap $SEARCH_ENV --tasks 42 | # - cap $SEARCH_ENV resque:start 43 | - cap $SEARCH_ENV puma:restart 44 | 45 | artifacts: 46 | files: 47 | - '**/*' 48 | -------------------------------------------------------------------------------- /cicd-scripts/fetch_env_vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | # Move to a writable location 4 | cd /home/search/cicd_temp 5 | 6 | # Leave PARAM_PATH empty to fetch all parameters in the region 7 | PARAM_PATH="" 8 | 9 | # Clear the .env file if it exists 10 | > .env 11 | 12 | echo "Starting the script" 13 | # Fetch all parameter names in the region 14 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/region) 15 | echo $REGION 16 | if [ -n "$PARAM_PATH" ]; then 17 | PARAM_KEYS=$(aws ssm get-parameters-by-path --path "$PARAM_PATH" --recursive --query "Parameters[*].Name" --output text --region $REGION) 18 | else 19 | PARAM_KEYS=$(aws ssm describe-parameters --query "Parameters[*].Name" --output text --region $REGION) 20 | fi 21 | echo "Fetched parameter keys: $PARAM_KEYS" 22 | 23 | # Loop through each parameter key 24 | for PARAM in $PARAM_KEYS; do 25 | # Exclude parameters that start with "DEPLOY_" or match "*_EC2_PEM_KEY" or match LOGIN_DOT_GOV_PEM 26 | if [[ $PARAM != DEPLOY_* && ! $PARAM =~ .*_EC2_PEM_KEY$ && $PARAM != "LOGIN_DOT_GOV_PEM" ]]; then 27 | # Fetch the parameter value from SSM 28 | VALUE=$(aws ssm get-parameter --name "$PARAM" --with-decryption --query "Parameter.Value" --output text --region $REGION) 29 | 30 | # Rename parameters that start with "SEARCH_AWS_" to "AWS_" 31 | if [[ $PARAM == SEARCH_AWS_* ]]; then 32 | PARAM=${PARAM/SEARCH_AWS_/AWS_} 33 | fi 34 | 35 | # Write the key=value pair to the .env file 36 | echo "$PARAM=$VALUE" >> .env 37 | fi 38 | done 39 | 40 | # Output the result 41 | # echo ".env file created with the following content:" 42 | # cat .env 43 | cp /home/search/cicd_temp/.env /home/search/i14y/shared/ 44 | 45 | 46 | # create puma folders and files 47 | 48 | # Create directories if they do not already exist 49 | [ ! -d /home/search/i14y/shared/tmp/pids/ ] && mkdir -p /home/search/i14y/shared/tmp/pids/ 50 | [ ! -d /home/search/i14y/shared/log ] && mkdir -p /home/search/i14y/shared/log 51 | 52 | # Create log files if they do not already exist 53 | [ ! -f /home/search/i14y/shared/log/puma_access.log ] && touch /home/search/i14y/shared/log/puma_access.log 54 | [ ! -f /home/search/i14y/shared/log/puma_error.log ] && touch /home/search/i14y/shared/log/puma_error.log 55 | 56 | 57 | sudo chown -R search:search /home/search/i14y/ 58 | sudo chmod -R 755 /home/search/i14y/ 59 | 60 | find /home/search/i14y/ -type d -exec chmod 2755 {} \; 61 | 62 | umask 022 63 | 64 | sudo rm -rf /home/search/cicd_temp/* 65 | -------------------------------------------------------------------------------- /config.ru: -------------------------------------------------------------------------------- 1 | # This file is used by Rack-based servers to start the application. 2 | 3 | require_relative "config/environment" 4 | require 'rack/cors' 5 | 6 | NewRelic::Agent.manual_start 7 | 8 | use Rack::Cors do 9 | allow do 10 | origins '*' 11 | resource '*', headers: :any, methods: [:get, :post, :put, :delete, :options] 12 | end 13 | end 14 | 15 | run Rails.application 16 | Rails.application.load_server 17 | -------------------------------------------------------------------------------- /config/access_control.yml: -------------------------------------------------------------------------------- 1 | default: &DEFAULT 2 | updates_allowed: true 3 | maintenance_message: Please try again in one hour. 4 | 5 | development: 6 | <<: *DEFAULT 7 | 8 | test: 9 | <<: *DEFAULT 10 | 11 | staging: 12 | <<: *DEFAULT 13 | 14 | production: 15 | <<: *DEFAULT 16 | -------------------------------------------------------------------------------- /config/application.rb: -------------------------------------------------------------------------------- 1 | require_relative "boot" 2 | 3 | # require "rails" 4 | # Pick the frameworks you want: 5 | require "active_model/railtie" 6 | # require "active_job/railtie" 7 | # require "active_record/railtie" 8 | # require "active_storage/engine" 9 | require "action_controller/railtie" 10 | # require "action_mailer/railtie" 11 | # require "action_mailbox/engine" 12 | # require "action_text/engine" 13 | # require "action_view/railtie" 14 | # require "action_cable/engine" 15 | # require "rails/test_unit/railtie" 16 | 17 | # Require the gems listed in Gemfile, including any gems 18 | # you've limited to :test, :development, or :production. 19 | Bundler.require(*Rails.groups) 20 | 21 | module I14y 22 | APP_NAME = 'i14y' 23 | class Application < Rails::Application 24 | # Initialize configuration defaults for originally generated Rails version. 25 | config.load_defaults 7.1 26 | 27 | # Please, add to the `ignore` list any other `lib` subdirectories that do 28 | # not contain `.rb` files, or that should not be reloaded or eager loaded. 29 | # Common ones are `templates`, `generators`, or `middleware`, for example. 30 | # config.autoload_lib(ignore: %w(assets tasks)) 31 | 32 | # Set default cache format 33 | config.active_support.cache_format_version = 7.1 34 | 35 | # Configuration for the application, engines, and railties goes here. 36 | # 37 | # These settings can be overridden in specific environments using the files 38 | # in config/environments, which are processed later. 39 | 40 | config.semantic_logger.application = ENV.fetch('APP_NAME', APP_NAME) 41 | 42 | # config.time_zone = "Central Time (US & Canada)" 43 | # config.eager_load_paths << Rails.root.join("extras") 44 | config.eager_load_paths += Dir[config.root.join('lib', '**/')] 45 | require 'ext/string' 46 | Dir["#{Rails.root}/lib/validations/*.rb"].each { |filename| require filename } 47 | 48 | # Only loads a smaller set of middleware suitable for API only apps. 49 | # Middleware like session, flash, cookies can be added back manually. 50 | # Skip views, helpers and assets when generating a new resource. 51 | config.api_only = true 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /config/boot.rb: -------------------------------------------------------------------------------- 1 | ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__) 2 | 3 | require "bundler/setup" # Set up gems listed in the Gemfile. 4 | -------------------------------------------------------------------------------- /config/deploy.rb: -------------------------------------------------------------------------------- 1 | # config valid for current version and patch releases of Capistrano 2 | lock '~> 3.19.1' 3 | 4 | I14Y_THREADS = ENV.fetch('I14Y_THREADS') { 5 } 5 | 6 | set :application, 'i14y' 7 | set :branch, ENV.fetch('SEARCH_ENV', 'staging') 8 | set :deploy_to, ENV.fetch('DEPLOYMENT_PATH') 9 | set :format, :pretty 10 | set :puma_access_log, "#{release_path}/log/puma.access.log" 11 | set :puma_bind, 'tcp://0.0.0.0:8081' 12 | set :puma_error_log, "#{release_path}/log/puma.error.log" 13 | set :puma_threads, [ENV.fetch('I14Y_MIN_THREADS', I14Y_THREADS), I14Y_THREADS] 14 | set :puma_workers, ENV.fetch('I14Y_WORKERS') { 0 } 15 | set :rails_env, 'production' 16 | set :rbenv_type, :user 17 | set :repo_url, 'https://github.com/GSA/i14y.git' 18 | set :user, ENV.fetch('SERVER_DEPLOYMENT_USER', 'search') 19 | 20 | append :linked_files, '.env' 21 | append :linked_dirs, 'log', 'tmp' 22 | 23 | API_SERVER_ADDRESSES = JSON.parse(ENV.fetch('API_SERVER_ADDRESSES', '[]')) 24 | 25 | role :app, API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER'] 26 | role :db, API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER'] 27 | role :web, API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER'] 28 | 29 | set :ssh_options, { 30 | auth_methods: %w(publickey), 31 | forward_agent: false, 32 | keys: [ENV['SSH_KEY_PATH']], 33 | user: ENV['SERVER_DEPLOYMENT_USER'] 34 | } 35 | -------------------------------------------------------------------------------- /config/deploy/development.rb: -------------------------------------------------------------------------------- 1 | set :branch, 'main' 2 | -------------------------------------------------------------------------------- /config/deploy/production.rb: -------------------------------------------------------------------------------- 1 | # server-based syntax 2 | # ====================== 3 | # Defines a single server with a list of roles and multiple properties. 4 | # You can define all roles on a single server, or split them: 5 | 6 | # server "example.com", user: "deploy", roles: %w{app db web}, my_property: :my_value 7 | # server "example.com", user: "deploy", roles: %w{app web}, other_property: :other_value 8 | # server "db.example.com", user: "deploy", roles: %w{db} 9 | 10 | 11 | 12 | # role-based syntax 13 | # ================== 14 | 15 | # Defines a role with one or multiple servers. The primary server in each 16 | # group is considered to be the first unless any hosts have the primary 17 | # property set. Specify the username and a domain or IP for the server. 18 | # Don't use `:all`, it's a meta role. 19 | 20 | # role :app, %w{deploy@example.com}, my_property: :my_value 21 | # role :web, %w{user1@primary.com user2@additional.com}, other_property: :other_value 22 | # role :db, %w{deploy@example.com} 23 | 24 | 25 | 26 | # Configuration 27 | # ============= 28 | # You can set any configuration variable like in config/deploy.rb 29 | # These variables are then only loaded and set in this stage. 30 | # For available Capistrano configuration variables see the documentation page. 31 | # http://capistranorb.com/documentation/getting-started/configuration/ 32 | # Feel free to add new variables to customise your setup. 33 | 34 | 35 | 36 | # Custom SSH Options 37 | # ================== 38 | # You may pass any option but keep in mind that net/ssh understands a 39 | # limited set of options, consult the Net::SSH documentation. 40 | # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start 41 | # 42 | # Global options 43 | # -------------- 44 | # set :ssh_options, { 45 | # keys: %w(/home/user_name/.ssh/id_rsa), 46 | # forward_agent: false, 47 | # auth_methods: %w(password) 48 | # } 49 | # 50 | # The server-based syntax can be used to override options: 51 | # ------------------------------------ 52 | # server "example.com", 53 | # user: "user_name", 54 | # roles: %w{web app}, 55 | # ssh_options: { 56 | # user: "user_name", # overrides user setting above 57 | # keys: %w(/home/user_name/.ssh/id_rsa), 58 | # forward_agent: false, 59 | # auth_methods: %w(publickey password) 60 | # # password: "please use keys" 61 | # } 62 | -------------------------------------------------------------------------------- /config/deploy/staging.rb: -------------------------------------------------------------------------------- 1 | # config/deploy/staging.rb 2 | 3 | # Server-based syntax 4 | # ====================== 5 | # Defines a single server with a list of roles and multiple properties. 6 | # You can define all roles on a single server, or split them: 7 | 8 | # Configuration 9 | # ============= 10 | # You can set any configuration variable like in config/deploy.rb. 11 | # These variables are then only loaded and set in this stage. 12 | # For available Capistrano configuration variables see the documentation page. 13 | # http://capistranorb.com/documentation/getting-started/configuration/ 14 | # Feel free to add new variables to customize your setup. 15 | 16 | # Custom SSH Options 17 | # ================== 18 | # You may pass any option but keep in mind that net/ssh understands a limited set of options, consult the Net/SSH documentation. 19 | # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start 20 | 21 | # Global options 22 | # -------------- 23 | -------------------------------------------------------------------------------- /config/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | default: &DEFAULT 2 | hosts: 3 | <% ENV.fetch('ES_HOSTS', 'localhost:9200').split(',').each do |host| %> 4 | - <%= host %> 5 | <% end %> 6 | user: <%= ENV['ES_USERNAME'] || 'elastic' %> 7 | password: <%= ENV['ES_PASSWORD'] || 'changeme' %> 8 | number_of_shards: 1 9 | log: true 10 | log_level: <%= ENV.fetch('ES_LOG_LEVEL', 'ERROR') %> 11 | 12 | development: 13 | <<: *DEFAULT 14 | 15 | test: 16 | <<: *DEFAULT 17 | 18 | staging: 19 | <<: *DEFAULT 20 | 21 | production: 22 | <<: *DEFAULT 23 | number_of_shards: 3 24 | 25 | -------------------------------------------------------------------------------- /config/environment.rb: -------------------------------------------------------------------------------- 1 | # Load the Rails application. 2 | require_relative "application" 3 | 4 | # Initialize the Rails application. 5 | Rails.application.initialize! 6 | -------------------------------------------------------------------------------- /config/environments/development.rb: -------------------------------------------------------------------------------- 1 | require "active_support/core_ext/integer/time" 2 | 3 | Rails.application.configure do 4 | # Settings specified here will take precedence over those in config/application.rb. 5 | 6 | # In the development environment your application's code is reloaded any time 7 | # it changes. This slows down response time but is perfect for development 8 | # since you don't have to restart the web server when you make code changes. 9 | config.enable_reloading = true 10 | 11 | # Do not eager load code on boot. 12 | config.eager_load = false 13 | 14 | # Show full error reports. 15 | config.consider_all_requests_local = true 16 | 17 | # Enable/disable caching. By default caching is disabled. 18 | # Run rails dev:cache to toggle caching. 19 | if Rails.root.join('tmp', 'caching-dev.txt').exist? 20 | config.cache_store = :memory_store 21 | config.public_file_server.headers = { 22 | 'Cache-Control' => "public, max-age=#{2.days.to_i}" 23 | } 24 | else 25 | config.action_controller.perform_caching = false 26 | 27 | config.cache_store = :null_store 28 | end 29 | 30 | # Print deprecation notices to the Rails logger. 31 | config.active_support.deprecation = :log 32 | 33 | # Raise exceptions for disallowed deprecations. 34 | config.active_support.disallowed_deprecation = :raise 35 | 36 | # Tell Active Support which deprecation messages to disallow. 37 | config.active_support.disallowed_deprecation_warnings = [] 38 | 39 | 40 | # Raises error for missing translations. 41 | # config.i18n.raise_on_missing_translations = true 42 | 43 | # Annotate rendered view with file names. 44 | # config.action_view.annotate_rendered_view_with_filenames = true 45 | end 46 | -------------------------------------------------------------------------------- /config/environments/production.rb: -------------------------------------------------------------------------------- 1 | require "active_support/core_ext/integer/time" 2 | 3 | Rails.application.configure do 4 | # Settings specified here will take precedence over those in config/application.rb. 5 | 6 | # Code is not reloaded between requests. 7 | config.enable_reloading = false 8 | 9 | # Eager load code on boot. This eager loads most of Rails and 10 | # your application in memory, allowing both threaded web servers 11 | # and those relying on copy on write to perform better. 12 | # Rake tasks automatically ignore this option for performance. 13 | config.eager_load = true 14 | 15 | # Full error reports are disabled and caching is turned on. 16 | config.consider_all_requests_local = false 17 | 18 | # Ensures that a master key has been made available in ENV["RAILS_MASTER_KEY"], config/master.key, or an environment 19 | # key such as config/credentials/production.key. This key is used to decrypt credentials (and other encrypted files). 20 | # config.require_master_key = true 21 | 22 | # Disable serving static files from `public/`, relying on NGINX/Apache to do so instead. 23 | # config.public_file_server.enabled = false 24 | 25 | # Enable serving of images, stylesheets, and JavaScripts from an asset server. 26 | # config.asset_host = "http://assets.example.com" 27 | 28 | # Specifies the header that your server uses for sending files. 29 | # config.action_dispatch.x_sendfile_header = "X-Sendfile" # for Apache 30 | # config.action_dispatch.x_sendfile_header = "X-Accel-Redirect" # for NGINX 31 | 32 | # Assume all access to the app is happening through a SSL-terminating reverse proxy. 33 | # Can be used together with config.force_ssl for Strict-Transport-Security and secure cookies. 34 | # config.assume_ssl = true 35 | 36 | # Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies. 37 | config.force_ssl = false 38 | 39 | # Prepend all log lines with the following tags. 40 | config.log_tags = [ :request_id ] 41 | 42 | # Info include generic and useful information about system operation, but avoids logging too much 43 | # information to avoid inadvertent exposure of personally identifiable information (PII). If you 44 | # want to log everything, set the level to "debug". 45 | config.log_level = ENV.fetch("RAILS_LOG_LEVEL", "debug") 46 | 47 | # Use a different cache store in production. 48 | # config.cache_store = :mem_cache_store 49 | 50 | # Enable locale fallbacks for I18n (makes lookups for any locale fall back to 51 | # the I18n.default_locale when a translation cannot be found). 52 | config.i18n.fallbacks = true 53 | 54 | # Send deprecation notices to registered listeners. 55 | config.active_support.deprecation = :notify 56 | 57 | # Log disallowed deprecations. 58 | config.active_support.disallowed_deprecation = :log 59 | 60 | # Tell Active Support which deprecation messages to disallow. 61 | config.active_support.disallowed_deprecation_warnings = [] 62 | 63 | # Enable DNS rebinding protection and other `Host` header attacks. 64 | config.hosts << ENV['I14Y_ALLOWED_HOSTS'] if ENV['I14Y_ALLOWED_HOSTS'].present? 65 | 66 | # Skip DNS rebinding protection for the default health check endpoint. 67 | config.host_authorization = { exclude: ->(request) { request.path == "/up" } } 68 | 69 | config.rails_semantic_logger.format = :json 70 | end 71 | -------------------------------------------------------------------------------- /config/environments/test.rb: -------------------------------------------------------------------------------- 1 | require "active_support/core_ext/integer/time" 2 | 3 | # The test environment is used exclusively to run your application's 4 | # test suite. You never need to work with it otherwise. Remember that 5 | # your test database is "scratch space" for the test suite and is wiped 6 | # and recreated between test runs. Don't rely on the data there! 7 | 8 | Rails.application.configure do 9 | # Settings specified here will take precedence over those in config/application.rb. 10 | 11 | # While tests run files are not watched, reloading is not necessary. 12 | config.enable_reloading = false 13 | 14 | # Eager loading loads your entire application. When running a single test locally, 15 | # this is usually not necessary, and can slow down your test suite. However, it's 16 | # recommended that you enable it in continuous integration systems to ensure eager 17 | # loading is working properly before deploying your code. 18 | config.eager_load = ENV['CI'].present? 19 | 20 | # Configure public file server for tests with Cache-Control for performance. 21 | config.public_file_server.enabled = true 22 | config.public_file_server.headers = { 23 | 'Cache-Control' => "public, max-age=#{1.hour.to_i}" 24 | } 25 | 26 | # Show full error reports and disable caching. 27 | config.consider_all_requests_local = true 28 | config.action_controller.perform_caching = false 29 | config.cache_store = :null_store 30 | 31 | # Render exception templates for rescuable exceptions and raise for other exceptions. 32 | config.action_dispatch.show_exceptions = false 33 | 34 | # Disable request forgery protection in test environment. 35 | config.action_controller.allow_forgery_protection = false 36 | 37 | # Print deprecation notices to the stderr. 38 | config.active_support.deprecation = :stderr 39 | 40 | # Raise exceptions for disallowed deprecations. 41 | config.active_support.disallowed_deprecation = :raise 42 | 43 | # Tell Active Support which deprecation messages to disallow. 44 | config.active_support.disallowed_deprecation_warnings = [] 45 | 46 | # Raises error for missing translations. 47 | config.i18n.raise_on_missing_translations = true 48 | 49 | # Annotate rendered view with file names. 50 | # config.action_view.annotate_rendered_view_with_filenames = true 51 | end 52 | -------------------------------------------------------------------------------- /config/initializers/access_control.rb: -------------------------------------------------------------------------------- 1 | config = Rails.application.config_for(:access_control) 2 | I14y::Application.config.updates_allowed = !!config[:updates_allowed] 3 | I14y::Application.config.maintenance_message = config[:maintenance_message] 4 | -------------------------------------------------------------------------------- /config/initializers/application_controller_renderer.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # ActiveSupport::Reloader.to_prepare do 4 | # ApplicationController.renderer.defaults.merge!( 5 | # http_host: 'example.org', 6 | # https: false 7 | # ) 8 | # end 9 | -------------------------------------------------------------------------------- /config/initializers/backtrace_silencers.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # You can add backtrace silencers for libraries that you're using but don't wish to see in your backtraces. 4 | # Rails.backtrace_cleaner.add_silencer { |line| /my_noisy_library/.match?(line) } 5 | 6 | # You can also remove all the silencers if you're trying to debug a problem that might stem from framework code 7 | # by setting BACKTRACE=1 before calling your invocation, like "BACKTRACE=1 ./bin/rails runner 'MyClass.perform'". 8 | Rails.backtrace_cleaner.remove_silencers! if ENV["BACKTRACE"] 9 | -------------------------------------------------------------------------------- /config/initializers/cookies_serializer.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # Specify a serializer for the signed and encrypted cookie jars. 4 | # Valid options are :json, :marshal, and :hybrid. 5 | Rails.application.config.action_dispatch.cookies_serializer = :json 6 | -------------------------------------------------------------------------------- /config/initializers/elasticsearch.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module ES 4 | ES_CONFIG = Rails.application.config_for(:elasticsearch).freeze 5 | 6 | def self.client 7 | Elasticsearch::Client.new(ES_CONFIG.merge({randomize_hosts: true, 8 | retry_on_failure: true, 9 | reload_connections: false, 10 | reload_on_failure: false, 11 | transport_options: { 12 | ssl: { 13 | verify: false 14 | } 15 | }, 16 | logger: Rails.logger # Explicitly set Rails logger 17 | })) 18 | end 19 | 20 | def self.collection_repository 21 | CollectionRepository.new 22 | end 23 | end 24 | 25 | if Rails.env.development? 26 | logger = ActiveSupport::Logger.new(STDERR) 27 | logger.level = Logger::DEBUG 28 | logger.formatter = proc { |_s, _d, _p, m| "\e[2m#{m}\n\e[0m" } 29 | ES.client.transport.logger = logger 30 | end 31 | -------------------------------------------------------------------------------- /config/initializers/filter_parameter_logging.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # Configure parameters to be partially matched (e.g. passw matches password) and filtered from the log file. 4 | # Use this to limit dissemination of sensitive information. 5 | # See the ActiveSupport::ParameterFilter documentation for supported notations and behaviors. 6 | Rails.application.config.filter_parameters += [ 7 | :password, :passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn, :query 8 | ] 9 | -------------------------------------------------------------------------------- /config/initializers/inflections.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # Add new inflection rules using the following format. Inflections 4 | # are locale specific, and you may define rules for as many different 5 | # locales as you wish. All of these examples are active by default: 6 | # ActiveSupport::Inflector.inflections(:en) do |inflect| 7 | # inflect.plural /^(ox)$/i, '\1en' 8 | # inflect.singular /^(ox)en/i, '\1' 9 | # inflect.irregular 'person', 'people' 10 | # inflect.uncountable %w( fish sheep ) 11 | # end 12 | 13 | # These inflection rules are supported but not enabled by default: 14 | # ActiveSupport::Inflector.inflections(:en) do |inflect| 15 | # inflect.acronym 'RESTful' 16 | # end 17 | -------------------------------------------------------------------------------- /config/initializers/locales.rb: -------------------------------------------------------------------------------- 1 | SUPPORTED_LOCALES = [:ar, :be, :bg, :bn, :ca, :cs, :da, :de, :el, :en, :es, :et, :fa, :fi, :fr, :he, :hi, :hr, :ht, 2 | :hu, :hy, :id, :it, :ja, :ka, :km, :ko, :lt, :lv, :mk, :nl, :pl, :ps, :pt, :ro, :ru, :sk, :so, 3 | :sq, :sr, :sw, :th, :tr, :uk, :ur, :uz, :vi, :zh] 4 | LANGUAGE_ANALYZER_LOCALES = [:bn, :de, :en, :es, :fi, :fr, :hi, :hu, :it, :ja, :ko, :pt, :ru, :sv, :zh] 5 | GENERIC_ANALYZER_LOCALES = LANGUAGE_ANALYZER_LOCALES - [:fr, :ja, :ko, :zh] -------------------------------------------------------------------------------- /config/initializers/mime_types.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # Add new mime types for use in respond_to blocks: 4 | # Mime::Type.register "text/richtext", :rtf 5 | -------------------------------------------------------------------------------- /config/initializers/session_store.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | Rails.application.config.session_store :cookie_store, key: '_i14y_session' 4 | -------------------------------------------------------------------------------- /config/initializers/wrap_parameters.rb: -------------------------------------------------------------------------------- 1 | # Be sure to restart your server when you modify this file. 2 | 3 | # This file contains settings for ActionController::ParamsWrapper which 4 | # is enabled by default. 5 | 6 | # Enable parameter wrapping for JSON. You can disable this by setting :format to an empty array. 7 | ActiveSupport.on_load(:action_controller) do 8 | wrap_parameters format: [:json] 9 | end 10 | -------------------------------------------------------------------------------- /config/locales/analysis/en_protwords.txt: -------------------------------------------------------------------------------- 1 | # Import these from https://github.com/GSA/punchcard 2 | gas 3 | fevs 4 | -------------------------------------------------------------------------------- /config/locales/analysis/en_synonyms.txt: -------------------------------------------------------------------------------- 1 | # Import these from https://github.com/GSA/punchcard 2 | gas, petrol -------------------------------------------------------------------------------- /config/locales/analysis/es_protwords.txt: -------------------------------------------------------------------------------- 1 | # Import these from https://github.com/GSA/punchcard 2 | ronaldo -------------------------------------------------------------------------------- /config/locales/analysis/es_synonyms.txt: -------------------------------------------------------------------------------- 1 | # Import these from https://github.com/GSA/punchcard -------------------------------------------------------------------------------- /config/locales/en.yml: -------------------------------------------------------------------------------- 1 | # Files in the config/locales directory are used for internationalization 2 | # and are automatically loaded by Rails. If you want to use locales other 3 | # than English, add the necessary files in this directory. 4 | # 5 | # To use the locales, use `I18n.t`: 6 | # 7 | # I18n.t 'hello' 8 | # 9 | # In views, this is aliased to just `t`: 10 | # 11 | # <%= t('hello') %> 12 | # 13 | # To use a different locale, set it with `I18n.locale`: 14 | # 15 | # I18n.locale = :es 16 | # 17 | # This would use the information in config/locales/es.yml. 18 | # 19 | # The following keys must be escaped otherwise they will not be retrieved by 20 | # the default I18n backend: 21 | # 22 | # true, false, on, off, yes, no 23 | # 24 | # Instead, surround them with single quotes. 25 | # 26 | # en: 27 | # 'true': 'foo' 28 | # 29 | # To learn more, please read the Rails Internationalization guide 30 | # available at https://guides.rubyonrails.org/i18n.html. 31 | 32 | en: 33 | hello: "Hello world" 34 | -------------------------------------------------------------------------------- /config/puma.rb: -------------------------------------------------------------------------------- 1 | # Puma can serve each request in a thread from an internal thread pool. 2 | # The `threads` method setting takes two numbers: a minimum and maximum. 3 | # Any libraries that use thread pools should be configured to match 4 | # the maximum value specified for Puma. Default is set to 5 threads for minimum 5 | # and maximum; this matches the default thread size of Active Record. 6 | # 7 | max_threads_count = ENV.fetch("RAILS_MAX_THREADS") { 5 } 8 | min_threads_count = ENV.fetch("RAILS_MIN_THREADS") { max_threads_count } 9 | threads min_threads_count, max_threads_count 10 | 11 | # Specifies the `worker_timeout` threshold that Puma will use to wait before 12 | # terminating a worker in development environments. 13 | # 14 | worker_timeout 3600 if ENV.fetch("RAILS_ENV", "development") == "development" 15 | 16 | # Specifies the `port` that Puma will listen on to receive requests; default is 3000. 17 | # 18 | port ENV.fetch("PORT") { 3000 } 19 | 20 | # Specifies the `environment` that Puma will run in. 21 | # 22 | environment ENV.fetch("RAILS_ENV") { "development" } 23 | 24 | # Specifies the `pidfile` that Puma will use. 25 | pidfile ENV.fetch("PIDFILE") { "tmp/pids/server.pid" } 26 | 27 | # Specifies the number of `workers` to boot in clustered mode. 28 | # Workers are forked web server processes. If using threads and workers together 29 | # the concurrency of the application would be max `threads` * `workers`. 30 | # Workers do not work on JRuby or Windows (both of which do not support 31 | # processes). 32 | # 33 | # workers ENV.fetch("WEB_CONCURRENCY") { 2 } 34 | 35 | # Use the `preload_app!` method when specifying a `workers` number. 36 | # This directive tells Puma to first boot the application and load code 37 | # before forking the application. This takes advantage of Copy On Write 38 | # process behavior so workers use less memory. 39 | # 40 | # preload_app! 41 | 42 | # Allow puma to be restarted by `rails restart` command. 43 | plugin :tmp_restart 44 | -------------------------------------------------------------------------------- /config/routes.rb: -------------------------------------------------------------------------------- 1 | Rails.application.routes.draw do 2 | # For details on the DSL available within this file, see https://guides.rubyonrails.org/routing.html 3 | mount Api::Base => '/' 4 | 5 | # Reveal health status on /up that returns 200 if the app boots with no exceptions, otherwise 500. 6 | # Can be used by load balancers and uptime monitors to verify that the app is live. 7 | get 'up' => 'rails/health#show', as: :rails_health_check 8 | end 9 | -------------------------------------------------------------------------------- /lib/ext/string.rb: -------------------------------------------------------------------------------- 1 | class String 2 | def extract_array 3 | split(',').map(&:strip).map(&:downcase) 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /lib/namespaced_index.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NamespacedIndex 4 | def index_namespace(handle = nil) 5 | [Rails.env, I14y::APP_NAME, klass.to_s.tableize, handle].compact.join('-') 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/read_only_access_control.rb: -------------------------------------------------------------------------------- 1 | module ReadOnlyAccessControl 2 | class DisallowedUpdate < StandardError; end 3 | 4 | def check_updates_allowed 5 | raise DisallowedUpdate unless I14y::Application.config.updates_allowed 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/serde.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Serde 4 | LANGUAGE_FIELDS = %i[title description content].freeze 5 | 6 | def self.serialize_hash(hash, language) 7 | serialize_language(hash, language) 8 | hash.merge!(uri_params_hash(hash[:path])) if hash[:path].present? 9 | serialize_array_fields(hash) 10 | serialize_string_fields(hash) 11 | hash[:updated_at] = Time.now.utc 12 | hash 13 | end 14 | 15 | def self.serialize_language(hash, language) 16 | LANGUAGE_FIELDS.each do |key| 17 | value = hash[key.to_sym] 18 | next if value.blank? 19 | 20 | sanitized_value = Loofah.fragment(value).text(encode_special_chars: false).squish 21 | hash.store("#{key}_#{language}", sanitized_value) 22 | hash.delete(key) 23 | end 24 | end 25 | 26 | def self.serialize_array_fields(hash) 27 | %i[searchgov_custom1 searchgov_custom2 searchgov_custom3 tags].each do |field| 28 | next if hash[field].is_a?(Array) 29 | 30 | hash[field] = hash[field].extract_array if hash[field].present? 31 | end 32 | end 33 | 34 | def self.serialize_string_fields(hash) 35 | %i[audience content_type].each do |field| 36 | hash[field] = hash[field].downcase if hash[field].present? 37 | end 38 | end 39 | 40 | def self.deserialize_hash(hash, language) 41 | derivative_language_fields = LANGUAGE_FIELDS.collect { |key| "#{key}_#{language}" } 42 | (derivative_language_fields & hash.keys).each do |field| 43 | hash[field.chomp("_#{language}")] = hash.delete(field) 44 | end 45 | misc_fields = %w[basename extension url_path domain_name bigrams] 46 | hash.except(*misc_fields) 47 | end 48 | 49 | def self.uri_params_hash(path) 50 | hash = {} 51 | uri = URI.parse(path) 52 | hash[:basename] = File.basename(uri.path, '.*') 53 | hash[:extension] = File.extname(uri.path).sub(/^./, '').downcase 54 | hash[:url_path] = uri.path 55 | hash[:domain_name] = uri.host 56 | hash 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/tasks/i14y.rake: -------------------------------------------------------------------------------- 1 | namespace :i14y do 2 | desc "Creates templates, indexes, and reader/writer aliases for all i14y models" 3 | task setup: :environment do 4 | Dir[Rails.root.join('app', 'templates', '*.rb')].each do |template_generator| 5 | entity_name = File.basename(template_generator, '.rb') 6 | klass = entity_name.camelize.constantize 7 | template_generator = klass.new 8 | ES.client.indices.put_template( 9 | name: entity_name, 10 | body: template_generator.body, 11 | order: 0, 12 | create: true, 13 | include_type_name: false 14 | ) 15 | end 16 | es_collections_index_name = [CollectionRepository.index_namespace, 'v1'].join('-') 17 | CollectionRepository.new.create_index!( 18 | index: es_collections_index_name, 19 | include_type_name: true 20 | ) 21 | ES.client.indices.put_alias( 22 | index: es_collections_index_name, 23 | name: CollectionRepository.index_name 24 | ) 25 | end 26 | 27 | desc "Copies data from one version of the i14y index to the next (e.g., collections, documents) and updates the alias" 28 | task :reindex, [:entity_name] => [:environment] do |_t, args| 29 | entity_name = args.entity_name 30 | persistence_model_klass = entity_name.singularize.camelize.constantize 31 | klass = entity_name.camelize.constantize 32 | template_generator = klass.new 33 | ES.client.indices.put_template(name: entity_name, 34 | body: template_generator.body, 35 | order: 0) 36 | 37 | wildcard = [persistence_model_klass.index_namespace, '*'].join 38 | aliases = ES.client.indices.get_alias(name: wildcard) 39 | aliases.each do |old_es_index_name, alias_names| 40 | alias_name = alias_names['aliases'].keys.first 41 | persistence_model_klass.index_name = old_es_index_name 42 | new_es_index_name = next_version(old_es_index_name) 43 | puts "Beginning copy of #{persistence_model_klass.count} #{entity_name} from #{old_es_index_name} to #{new_es_index_name}" 44 | persistence_model_klass.create_index!(index: new_es_index_name) 45 | persistence_model_klass.index_name = new_es_index_name 46 | since_timestamp = Time.now 47 | host_hash = ES.client.transport.hosts.first 48 | base_url = "#{host_hash[:protocol]}://#{host_hash[:host]}:#{host_hash[:port]}/" 49 | old_es_index_url = base_url + old_es_index_name 50 | new_es_index_url = base_url + new_es_index_name 51 | stream2es(old_es_index_url, new_es_index_url) 52 | move_alias(alias_name, old_es_index_name, new_es_index_name) 53 | stream2es(old_es_index_url, new_es_index_url, since_timestamp) 54 | puts "New #{new_es_index_name} index now contains #{persistence_model_klass.count} #{entity_name}" 55 | ES.client.indices.delete(index: old_es_index_name) 56 | end 57 | end 58 | 59 | desc "Deletes templates, indexes, and reader/writer aliases for all i14y models. Useful for development." 60 | task clear_all: :environment do 61 | Dir[Rails.root.join('app', 'templates', '*.rb')].each do |template_generator| 62 | entity_name = File.basename(template_generator, '.rb') 63 | ES.client.indices.delete_template(name: entity_name) rescue Elasticsearch::Transport::Transport::Errors::NotFound 64 | end 65 | ES.client.indices.delete(index: [Rails.env, I14y::APP_NAME, '*'].join('-')) 66 | end 67 | 68 | def next_version(index_name) 69 | matches = index_name.match(/(.*-v)(\d+)/) 70 | "#{matches[1]}#{matches[2].succ}" 71 | end 72 | 73 | def stream2es(old_es_index_url, new_es_index_url, timestamp = nil) 74 | options = ["--source #{old_es_index_url}", "--target #{new_es_index_url}"] 75 | if timestamp.present? 76 | hash = { query: { filtered: { filter: { range: { updated_at: { gte: timestamp } } } } } } 77 | options << "--query '#{hash.to_json}'" 78 | end 79 | result = `#{Rails.root.join('vendor', 'stream2es')} es #{options.join(' ')}` 80 | puts "Stream2es completed", result 81 | end 82 | 83 | def move_alias(alias_name, old_index_name, new_index_name) 84 | update_aliases_hash = { body: 85 | { actions: [ 86 | { remove: { index: old_index_name, alias: alias_name } }, 87 | { add: { index: new_index_name, alias: alias_name } } 88 | ] } } 89 | ES.client.indices.update_aliases(update_aliases_hash) 90 | end 91 | 92 | end 93 | -------------------------------------------------------------------------------- /lib/templatable.rb: -------------------------------------------------------------------------------- 1 | module Templatable 2 | def date(json, field) 3 | json.set! field do 4 | json.type "date" 5 | end 6 | end 7 | 8 | def keyword(json, field) 9 | json.set! field do 10 | json.type "keyword" 11 | json.index true 12 | end 13 | end 14 | 15 | def string_fields_template(json, type) 16 | json.child! do 17 | json.string_fields do 18 | json.mapping do 19 | json.type type 20 | json.index true 21 | end 22 | json.match_mapping_type "string" 23 | json.match "*" 24 | end 25 | end 26 | end 27 | 28 | def linguistic_filter(json, locale, lines, name, field, type) 29 | json.set! "#{locale}_#{name}" do 30 | json.type type 31 | json.set! field, lines 32 | end 33 | end 34 | 35 | def parse_configuration_file(json, type) 36 | LANGUAGE_ANALYZER_LOCALES.map do |locale| 37 | [locale, Rails.root.join("config", "locales", "analysis", "#{locale}_#{type}.txt")] 38 | end.select do |locale_file_array| 39 | File.exist? locale_file_array.last 40 | end.each do |locale, file| 41 | lines = get_lines_from(file) 42 | send("#{type}_filter", json, locale, lines) if lines.any? 43 | end 44 | end 45 | 46 | def get_lines_from(file) 47 | File.readlines(file).map(&:chomp).reject { |line| line.starts_with?("#") } 48 | end 49 | 50 | def generic_stemmer(json, locale, language, degree) 51 | json.set! "#{locale}_stem_filter" do 52 | json.type "stemmer" 53 | stemmer_name = degree == "standard" ? '' : "#{degree}_" 54 | json.name "#{stemmer_name}#{language}" 55 | end 56 | end 57 | 58 | def generic_analyzer(json, locale) 59 | json.set! "#{locale}_analyzer" do 60 | json.type "custom" 61 | json.filter filter_array(locale) 62 | json.tokenizer "icu_tokenizer" 63 | json.char_filter ["html_strip", "quotes"] 64 | end 65 | end 66 | 67 | end 68 | -------------------------------------------------------------------------------- /lib/validations/max_bytes.rb: -------------------------------------------------------------------------------- 1 | class MaxBytes < Grape::Validations::Validators::Base 2 | def validate_param!(attr_name, params) 3 | if params[attr_name].bytesize > max_bytes 4 | raise Grape::Exceptions::Validation.new( 5 | params: [@scope.full_name(attr_name)], 6 | message: "cannot be more than #{max_bytes} bytes long" 7 | ) 8 | end 9 | end 10 | 11 | private 12 | 13 | def max_bytes 14 | @max_bytes ||= [@option].flatten.first 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /public/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | The page you were looking for doesn't exist (404) 5 | 6 | 55 | 56 | 57 | 58 | 59 |
60 |
61 |

The page you were looking for doesn't exist.

62 |

You may have mistyped the address or the page may have moved.

63 |
64 |

If you are the application owner check the logs for more information.

65 |
66 | 67 | 68 | -------------------------------------------------------------------------------- /public/422.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | The change you wanted was rejected (422) 5 | 6 | 55 | 56 | 57 | 58 | 59 |
60 |
61 |

The change you wanted was rejected.

62 |

Maybe you tried to change something you didn't have access to.

63 |
64 |

If you are the application owner check the logs for more information.

65 |
66 | 67 | 68 | -------------------------------------------------------------------------------- /public/500.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | We're sorry, but something went wrong (500) 5 | 6 | 55 | 56 | 57 | 58 | 59 |
60 |
61 |

We're sorry, but something went wrong.

62 |
63 |

If you are the application owner check the logs for more information.

64 |
65 | 66 | 67 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GSA/i14y/f3be9325fa45d5dec1e716a4a9ffeb6e0cf56b22/public/favicon.ico -------------------------------------------------------------------------------- /public/robots.txt: -------------------------------------------------------------------------------- 1 | # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file 2 | # 3 | # To ban all spiders from the entire site uncomment the next two lines: 4 | # User-agent: * 5 | # Disallow: / 6 | -------------------------------------------------------------------------------- /spec/classes/document_query_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe DocumentQuery do 6 | let(:query) { 'test' } 7 | let(:options) do 8 | { query: query } 9 | end 10 | let(:document_query) { described_class.new(options) } 11 | let(:body) { document_query.body.to_hash } 12 | 13 | describe '#body' do 14 | context 'when a query includes stopwords' do 15 | let(:suggestion_hash) { body[:suggest][:suggestion] } 16 | let(:query) { 'this document IS about the theater' } 17 | 18 | it 'strips the stopwords from the query' do 19 | expect(suggestion_hash[:text]).to eq 'document about theater' 20 | end 21 | end 22 | 23 | it 'contains aggregations' do 24 | expect(body[:aggregations]).to match( 25 | hash_including(:audience, 26 | :changed, 27 | :content_type, 28 | :created, 29 | :mime_type, 30 | :searchgov_custom1, 31 | :searchgov_custom2, 32 | :searchgov_custom3, 33 | :tags) 34 | ) 35 | end 36 | 37 | context 'when the query is blank' do 38 | let(:query) { '' } 39 | 40 | it 'does not contain aggregations' do 41 | expect(body[:aggregations]).to be_nil 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /spec/classes/document_search_results_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe DocumentSearchResults do 6 | let(:document_search_results) { described_class.new(result) } 7 | 8 | describe '#suggestion' do 9 | subject(:suggestion) { document_search_results.suggestion } 10 | 11 | context 'when no hits and suggestions are present' do 12 | let(:result) do 13 | { 'hits' => { 'total' => 0, 'hits' => [] }, 14 | 'aggregations' => {}, 15 | 'suggest' => suggestion_hash } 16 | end 17 | let(:suggestion_hash) do 18 | { 'suggestion' => 19 | [{ 'text' => 'blue', 20 | 'options' => [{ 'text' => 'bulk', 21 | 'highlighted' => 'bulk' }] }] } 22 | end 23 | 24 | it { is_expected.to match(hash_including({ 'text' => 'bulk', 'highlighted' => 'bulk' })) } 25 | end 26 | end 27 | 28 | describe '#results' do 29 | subject(:results) { document_search_results.results } 30 | 31 | context 'when hits are present' do 32 | let(:result) do 33 | { 'hits' => { 'total' => 1, 'hits' => [hits] }, 34 | 'aggregations' => {}, 35 | 'suggest' => [] } 36 | end 37 | let(:hits) do 38 | { '_type' => '_doc', 39 | '_source' => { 'path' => 'https://search.gov/about/', 40 | 'created' => '2021-02-03T00:00:00.000-05:00', 41 | 'language' => 'en', 42 | 'title_en' => 'About Search.gov | Search.gov' }, 43 | 'highlight' => { 'content_en' => ['Some highlighted content'] } } 44 | end 45 | 46 | it { 47 | is_expected.to match(array_including({ 'path' => 'https://search.gov/about/', 48 | 'created' => '2021-02-03 05:00:00 UTC', 49 | 'language' => 'en', 50 | 'title' => 'About Search.gov | Search.gov', 51 | 'content' => 'Some highlighted content' })) 52 | } 53 | end 54 | end 55 | 56 | describe '#aggregations' do 57 | subject(:aggregations) { document_search_results.aggregations } 58 | 59 | context 'when aggregations are present' do 60 | let(:result) do 61 | { 'hits' => { 'total' => 1, 'hits' => [hits] }, 62 | 'aggregations' => aggregations_hash, 63 | 'suggest' => [] } 64 | end 65 | let(:hits) do 66 | { '_type' => '_doc', 67 | '_source' => { 'path' => 'https://search.gov/about/', 68 | 'created' => '2021-02-03T00:00:00.000-05:00', 69 | 'language' => 'en', 70 | 'title_en' => 'About Search.gov | Search.gov' }, 71 | 'highlight' => { 'content_en' => ['Some highlighted content'] } } 72 | end 73 | let(:aggregations_hash) do 74 | { 'content_type' => { 'doc_count_error_upper_bound' => 0, 75 | 'sum_other_doc_count' => 0, 76 | 'buckets' => [{ 'key' => 'article', 77 | 'doc_count' => 1 }] }, 78 | 'tags' => { 'doc_count_error_upper_bound' => 0, 79 | 'sum_other_doc_count' => 0, 80 | 'buckets' => [] } } 81 | end 82 | 83 | it { is_expected.to match(array_including({ content_type: [{ agg_key: 'article', doc_count: 1 }] })) } 84 | 85 | it { is_expected.not_to include(hash_including(:tags)) } 86 | end 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /spec/classes/query_parser_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe QueryParser do 6 | let(:query) { 'govt site stuff' } 7 | let(:parsed_query) { described_class.new(query) } 8 | 9 | context 'one or more site: params in query string' do 10 | let(:query) do 11 | '(site:agency.gov/archives/2015 Site:archive.agency.gov/ site:archive2.agency.gov) govt site stuff' 12 | end 13 | 14 | it 'should extract an array of SiteFilter instances' do 15 | site_filters = parsed_query.site_filters 16 | expect(site_filters[:included_sites].size).to eq(3) 17 | expect(site_filters[:included_sites][0].domain_name).to eq('agency.gov') 18 | expect(site_filters[:included_sites][0].url_path).to eq('/archives/2015') 19 | expect(site_filters[:included_sites][1].domain_name).to eq('archive.agency.gov') 20 | expect(site_filters[:included_sites][1].url_path).to be_nil 21 | expect(site_filters[:included_sites][2].domain_name).to eq('archive2.agency.gov') 22 | expect(site_filters[:included_sites][2].url_path).to be_nil 23 | end 24 | 25 | it 'should make the resulting query available' do 26 | expect(parsed_query.stripped_query).to eq('govt site stuff') 27 | end 28 | 29 | it 'does not alter the original query' do #because here there be bugs 30 | expect { described_class.new(query) }.not_to change { query } 31 | end 32 | end 33 | 34 | context 'one or more -site: params in query string' do 35 | let(:site_params_parser) { described_class.new('(-site:excluded3.agency.gov/archives/2016 -site:excluded.agency.gov -Site:excluded2.agency.gov) govt site stuff') } 36 | 37 | it 'should extract an array of :excluded_sites as SiteFilters' do 38 | site_filters = site_params_parser.site_filters 39 | expect(site_filters[:excluded_sites].size).to eq(3) 40 | expect(site_filters[:excluded_sites][0].domain_name).to eq('excluded3.agency.gov') 41 | expect(site_filters[:excluded_sites][0].url_path).to eq('/archives/2016') 42 | expect(site_filters[:excluded_sites][1].domain_name).to eq('excluded.agency.gov') 43 | expect(site_filters[:excluded_sites][1].url_path).to be_nil 44 | expect(site_filters[:excluded_sites][2].domain_name).to eq('excluded2.agency.gov') 45 | expect(site_filters[:excluded_sites][2].url_path).to be_nil 46 | end 47 | 48 | it 'should make the resulting query available' do 49 | expect(site_params_parser.stripped_query).to eq('govt site stuff') 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /spec/config/initializers/filter_parameter_logging_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe 'ActiveSupport::ParameterFilter' do 6 | let(:config) { I14y::Application.config } 7 | let(:parameter_filter) { ActiveSupport::ParameterFilter.new(config.filter_parameters) } 8 | 9 | it 'filters query from logs' do 10 | expect(config.filter_parameters.to_s).to match(/:query\b/) 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /spec/lib/serde_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe Serde do 6 | describe '.serialize_hash' do 7 | subject(:serialize_hash) do 8 | described_class.serialize_hash(original_hash, 'en') 9 | end 10 | 11 | let(:original_hash) do 12 | ActiveSupport::HashWithIndifferentAccess.new( 13 | { 'title' => 'my title', 14 | 'description' => 'my description', 15 | 'content' => 'my content', 16 | 'path' => 'http://www.foo.gov/bar.html', 17 | 'promote' => false, 18 | 'audience' => 'Everyone', 19 | 'content_type' => 'EVENT', 20 | 'tags' => 'this that', 21 | 'searchgov_custom1' => 'this, Custom, CONTENT', 22 | 'searchgov_custom2' => 'That custom, Content', 23 | 'searchgov_custom3' => '123', 24 | 'created' => '2018-01-01T12:00:00Z', 25 | 'changed' => '2018-02-01T12:00:00Z', 26 | 'created_at' => '2018-01-01T12:00:00Z', 27 | 'updated_at' => '2018-02-01T12:00:00Z' } 28 | ) 29 | end 30 | 31 | it 'stores the language fields with the language suffix' do 32 | expect(serialize_hash).to match(hash_including( 33 | { 'title_en' => 'my title', 34 | 'description_en' => 'my description', 35 | 'content_en' => 'my content' } 36 | )) 37 | end 38 | 39 | it 'stores downcased audience' do 40 | expect(serialize_hash).to match(hash_including( 41 | { 'audience' => 'everyone' } 42 | )) 43 | end 44 | 45 | it 'stores downcased content_type' do 46 | expect(serialize_hash).to match(hash_including( 47 | { 'content_type' => 'event' } 48 | )) 49 | end 50 | 51 | it 'stores tags as a downcased array' do 52 | expect(serialize_hash).to match(hash_including( 53 | { 'tags' => ['this that'] } 54 | )) 55 | end 56 | 57 | it 'stores searchgov_custom fields as downcased arrays' do 58 | expect(serialize_hash).to match(hash_including( 59 | { 'searchgov_custom1' => %w[this custom content], 60 | 'searchgov_custom2' => ['that custom', 'content'], 61 | 'searchgov_custom3' => ['123'] } 62 | )) 63 | end 64 | 65 | it 'updates the updated_at value' do 66 | expect(serialize_hash[:updated_at]).to be > 1.second.ago 67 | end 68 | 69 | context 'when language fields contain HTML/CSS' do 70 | let(:html) do 71 | <<~HTML 72 |
73 |

hello & goodbye!

74 | HTML 75 | end 76 | 77 | let(:original_hash) do 78 | ActiveSupport::HashWithIndifferentAccess.new( 79 | title: 'foo', 80 | description: html, 81 | content: "this is html" 82 | ) 83 | end 84 | 85 | it 'sanitizes the language fields' do 86 | expect(serialize_hash).to match(hash_including( 87 | title_en: 'foo', 88 | description_en: 'hello & goodbye!', 89 | content_en: 'this is html' 90 | )) 91 | end 92 | end 93 | 94 | context 'when the tags are a comma-delimited list' do 95 | let(:original_hash) do 96 | { tags: 'this, that' } 97 | end 98 | 99 | it 'converts the tags to an array' do 100 | expect(serialize_hash).to match(hash_including(tags: %w[this that])) 101 | end 102 | end 103 | end 104 | 105 | describe '.deserialize_hash' do 106 | subject(:deserialize_hash) do 107 | described_class.deserialize_hash(original_hash, :en) 108 | end 109 | 110 | let(:original_hash) do 111 | ActiveSupport::HashWithIndifferentAccess.new( 112 | { 'created_at' => '2018-08-09T21:36:50.087Z', 113 | 'updated_at' => '2018-08-09T21:36:50.087Z', 114 | 'path' => 'http://www.foo.gov/bar.html', 115 | 'language' => 'en', 116 | 'created' => '2018-08-09T19:36:50.087Z', 117 | 'updated' => '2018-08-09T14:36:50.087-07:00', 118 | 'changed' => '2018-08-09T14:36:50.087-07:00', 119 | 'promote' => true, 120 | 'tags' => 'this that', 121 | 'title_en' => 'my title', 122 | 'description_en' => 'my description', 123 | 'content_en' => 'my content', 124 | 'basename' => 'bar', 125 | 'extension' => 'html', 126 | 'url_path' => '/bar.html', 127 | 'domain_name' => 'www.foo.gov' } 128 | ) 129 | end 130 | let(:language_field_keys) { %i[title description content] } 131 | 132 | it 'removes the language suffix from the text fields' do 133 | expect(deserialize_hash).to eq( 134 | { 'created_at' => '2018-08-09T21:36:50.087Z', 135 | 'updated_at' => '2018-08-09T21:36:50.087Z', 136 | 'path' => 'http://www.foo.gov/bar.html', 137 | 'language' => 'en', 138 | 'created' => '2018-08-09T19:36:50.087Z', 139 | 'title' => 'my title', 140 | 'description' => 'my description', 141 | 'content' => 'my content', 142 | 'updated' => '2018-08-09T14:36:50.087-07:00', 143 | 'changed' => '2018-08-09T14:36:50.087-07:00', 144 | 'promote' => true, 145 | 'tags' => 'this that' } 146 | ) 147 | end 148 | end 149 | 150 | describe '.uri_params_hash' do 151 | subject(:result) { described_class.uri_params_hash(path) } 152 | 153 | let(:path) { 'https://www.agency.gov/directory/page1.html' } 154 | 155 | it 'computes basename' do 156 | expect(result[:basename]).to eq('page1') 157 | end 158 | 159 | it 'computes filename extension' do 160 | expect(result[:extension]).to eq('html') 161 | end 162 | 163 | context 'when the extension has uppercase characters' do 164 | let(:path) { 'https://www.agency.gov/directory/PAGE1.PDF' } 165 | 166 | it 'computes a downcased version of filename extension' do 167 | expect(result[:extension]).to eq('pdf') 168 | end 169 | end 170 | 171 | context 'when there is no filename extension' do 172 | let(:path) { 'https://www.agency.gov/directory/page1' } 173 | 174 | it 'computes an empty filename extension' do 175 | expect(result[:extension]).to eq('') 176 | end 177 | end 178 | 179 | it 'computes url_path' do 180 | expect(result[:url_path]).to eq('/directory/page1.html') 181 | end 182 | 183 | it 'computes domain_name' do 184 | expect(result[:domain_name]).to eq('www.agency.gov') 185 | end 186 | end 187 | end 188 | -------------------------------------------------------------------------------- /spec/lib/validations/max_bytes_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | TWO_BYTE_CHARACTER = "\u00b5" 6 | 7 | describe MaxBytes do 8 | subject(:validator) { described_class.new(attrs, options, required, scope.new) } 9 | 10 | let(:attrs) { nil } 11 | let(:options) { [10] } 12 | let(:required) { false } 13 | let(:scope) do 14 | Struct.new(:opts) do 15 | def full_name(name); end 16 | end 17 | end 18 | 19 | describe 'validate!' do 20 | let(:params) { { some_param: value_to_validate } } 21 | context 'when the value of the param being validated has fewer than tha maximum number of bytes' do 22 | let(:value_to_validate) { TWO_BYTE_CHARACTER } 23 | 24 | it 'does not raise a validation exception' do 25 | expect { validator.validate_param!(:some_param, params) }.to_not raise_error 26 | end 27 | end 28 | 29 | context 'when the value of the param being validated has exactly the maximum number of bytes' do 30 | let(:value_to_validate) { TWO_BYTE_CHARACTER * 5 } 31 | 32 | it 'does not raise a validation exception' do 33 | expect { validator.validate_param!(:some_param, params) }.to_not raise_error 34 | end 35 | end 36 | 37 | context 'when the value of the param being validated has more than tha maximum number of bytes' do 38 | let(:value_to_validate) { TWO_BYTE_CHARACTER * 5 + 'z' } 39 | 40 | it 'raises a validation exception' do 41 | expect { validator.validate_param!(:some_param, params) }.to raise_error(Grape::Exceptions::Validation, 'cannot be more than 10 bytes long') 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /spec/models/collection_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe Collection do 6 | subject(:collection) { described_class.new(collection_params) } 7 | 8 | let(:id) { 'agency_blogs' } 9 | let(:token) { 'secret' } 10 | let(:collection_params) do 11 | { 12 | id: id, 13 | token: token 14 | } 15 | end 16 | 17 | it { is_expected.to be_valid } 18 | 19 | describe 'attributes' do 20 | it do 21 | is_expected.to have_attributes( 22 | id: 'agency_blogs', 23 | token: 'secret', 24 | created_at: an_instance_of(Time), 25 | updated_at: an_instance_of(Time) 26 | ) 27 | end 28 | end 29 | 30 | describe 'validations' do 31 | it { is_expected.to validate_presence_of(:token) } 32 | end 33 | 34 | describe '#last_document_sent' do 35 | subject(:last_document_sent) { collection.last_document_sent } 36 | 37 | context 'when something goes wrong' do 38 | before do 39 | allow_any_instance_of(DocumentRepository). 40 | to receive(:search).and_raise(StandardError) 41 | end 42 | 43 | it { is_expected.to be nil } 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /spec/models/document_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe Document do 6 | subject(:document) { described_class.new(valid_params) } 7 | 8 | let(:valid_params) do 9 | { 10 | id: 'a123', 11 | title: 'My Title', 12 | path: 'http://www.agency.gov/page1.html', 13 | audience: 'Everyone', 14 | changed: DateTime.new(2020, 1, 2), 15 | click_count: 5, 16 | content: 'some content', 17 | content_type: 'article', 18 | created: DateTime.new(2020, 1, 1), 19 | description: 'My Description', 20 | thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png', 21 | language: 'en', 22 | mime_type: 'text/html', 23 | promote: true, 24 | searchgov_custom1: 'custom content with spaces', 25 | searchgov_custom2: 'comma, separated, custom, content', 26 | searchgov_custom3: '', 27 | tags: 'this,that' 28 | } 29 | end 30 | 31 | describe 'attributes' do 32 | it do 33 | is_expected.to have_attributes( 34 | id: 'a123', 35 | title: 'My Title', 36 | path: 'http://www.agency.gov/page1.html', 37 | audience: 'Everyone', 38 | changed: DateTime.new(2020, 1, 2), 39 | click_count: 5, 40 | content: 'some content', 41 | content_type: 'article', 42 | created: DateTime.new(2020, 1, 1), 43 | description: 'My Description', 44 | thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png', 45 | language: 'en', 46 | mime_type: 'text/html', 47 | promote: true, 48 | searchgov_custom1: 'custom content with spaces', 49 | searchgov_custom2: 'comma, separated, custom, content', 50 | searchgov_custom3: '', 51 | tags: 'this,that' 52 | ) 53 | end 54 | 55 | it 'sets default timestamps' do 56 | expect(document.created_at).to be_a Time 57 | expect(document.updated_at).to be_a Time 58 | end 59 | 60 | context 'with the minimum required params' do 61 | subject(:document) do 62 | described_class.new( 63 | language: 'en', 64 | path: 'https://foo.gov' 65 | ) 66 | end 67 | 68 | it { is_expected.to be_valid } 69 | end 70 | end 71 | 72 | describe 'validations' do 73 | it { is_expected.to validate_presence_of(:path) } 74 | it { is_expected.to validate_presence_of(:language) } 75 | it { is_expected.to be_valid } 76 | 77 | context 'when the MIME type is invalid' do 78 | subject(:document) do 79 | described_class.new(valid_params.merge(mime_type: 'text/not_a_valid_mime_type')) 80 | end 81 | 82 | it { is_expected.to be_invalid } 83 | 84 | it 'generates an error message' do 85 | document.valid? 86 | expect(document.errors.messages[:mime_type]).to include 'is invalid' 87 | end 88 | end 89 | 90 | context 'when the thumbnail_url is invalid' do 91 | subject(:document) do 92 | described_class.new(valid_params.merge(thumbnail_url: 'invalid thumbnail url')) 93 | end 94 | 95 | it { is_expected.to be_invalid } 96 | 97 | it 'generates an error message' do 98 | document.valid? 99 | expect(document.errors.messages[:thumbnail_url]).to include 'is invalid' 100 | end 101 | end 102 | end 103 | end 104 | -------------------------------------------------------------------------------- /spec/rails_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # This file is copied to spec/ when you run 'rails generate rspec:install' 4 | ENV['RAILS_ENV'] ||= 'test' 5 | require 'spec_helper' 6 | require File.expand_path('../../config/environment', __FILE__) 7 | require 'rspec/rails' 8 | require 'test_services' 9 | 10 | # Requires supporting ruby files with custom matchers and macros, etc, in 11 | # spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are 12 | # run as spec files by default. This means that files in spec/support that end 13 | # in _spec.rb will both be required and run as specs, causing the specs to be 14 | # run twice. It is recommended that you do not name files matching this glob to 15 | # end with _spec.rb. You can configure this pattern with the --pattern 16 | # option on the command line or in ~/.rspec, .rspec or `.rspec-local`. 17 | Dir[Rails.root.join('spec/support/**/*.rb')].each { |f| require f } 18 | 19 | RSpec.configure do |config| 20 | # RSpec Rails can automatically mix in different behaviours to your tests 21 | # based on their file location, for example enabling you to call `get` and 22 | # `post` in specs under `spec/controllers`. 23 | # 24 | # You can disable this behaviour by removing the line below, and instead 25 | # explicitly tag your specs with their type, e.g.: 26 | # 27 | # RSpec.describe UsersController, :type => :controller do 28 | # # ... 29 | # end 30 | # 31 | # The different available types are documented in the features, such as in 32 | # https://relishapp.com/rspec/rspec-rails/docs 33 | config.include DocumentCrud 34 | config.include TestServices 35 | 36 | config.infer_spec_type_from_file_location! 37 | 38 | config.before(:suite) do 39 | TestServices::delete_es_indexes 40 | TestServices::create_collections_index 41 | end 42 | 43 | config.after(:suite) do 44 | TestServices::delete_es_indexes 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /spec/repositories/collection_repository_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe CollectionRepository do 6 | subject(:repository) { described_class.new } 7 | 8 | it_behaves_like 'a repository' 9 | 10 | describe '.klass' do 11 | subject(:klass) { described_class.klass } 12 | 13 | it { is_expected.to eq(Collection) } 14 | end 15 | 16 | it 'uses the collections index namespace' do 17 | expect(repository.index_name).to eq('test-i14y-collections') 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /spec/repositories/document_repository_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe DocumentRepository do 6 | subject(:repository) { described_class.new } 7 | 8 | it_behaves_like 'a repository' 9 | 10 | describe '.klass' do 11 | subject(:klass) { described_class.klass } 12 | 13 | it { is_expected.to eq(Document) } 14 | end 15 | 16 | describe '.index_namespace' do 17 | subject(:index_namespace) { described_class.index_namespace('agency_blogs') } 18 | 19 | it 'returns the ES index namespace for the specified collection handle' do 20 | expect(index_namespace).to eq 'test-i14y-documents-agency_blogs' 21 | end 22 | end 23 | 24 | describe '.serialize' do 25 | subject(:serialize) { repository.serialize(document) } 26 | 27 | let(:document) do 28 | Document.new( 29 | language: 'en', 30 | path: 'http://www.agency.gov/page1.html' 31 | ) 32 | end 33 | 34 | it 'serializes the document' do 35 | expect(serialize).to match(hash_including( 36 | language: 'en', 37 | path: 'http://www.agency.gov/page1.html' 38 | )) 39 | end 40 | end 41 | 42 | describe 'deserialization' do 43 | context 'when a document has been persisted' do 44 | let(:document_params) do 45 | { 46 | id: 'a123', 47 | language: 'en', 48 | path: 'http://www.agency.gov/page1.html', 49 | title: 'My Title', 50 | created: DateTime.new(2020, 1, 1), 51 | changed: DateTime.new(2020, 1, 2), 52 | description: 'My Description', 53 | content: 'some content', 54 | promote: true, 55 | tags: 'this,that', 56 | click_count: 5 57 | } 58 | end 59 | 60 | before do 61 | create_document(document_params, repository) 62 | end 63 | 64 | it 'deserializes the document' do 65 | document = repository.find('a123') 66 | expect(document.id).to eq('a123') 67 | expect(document.language).to eq('en') 68 | expect(document.path).to eq('http://www.agency.gov/page1.html') 69 | expect(document.title).to eq('My Title') 70 | expect(document.description).to eq('My Description') 71 | expect(document.content).to eq('some content') 72 | expect(document.promote).to eq(true) 73 | expect(document.tags).to eq(%w[this that]) 74 | expect(document.click_count).to eq(5) 75 | end 76 | end 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /spec/requests/api/v1/collections_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | 5 | describe Api::V1::Collections do 6 | let(:valid_session) do 7 | credentials = ActionController::HttpAuthentication::Basic.encode_credentials( 8 | ENV['I14Y_ADMIN_USER'], ENV['I14Y_ADMIN_PASSWORD'] 9 | ) 10 | { 'HTTP_AUTHORIZATION' => credentials } 11 | end 12 | let(:valid_params) do 13 | { handle: 'agency_blogs', token: 'secret' } 14 | end 15 | let(:allow_updates) { true } 16 | let(:maintenance_message) { nil } 17 | let(:documents_index_name) { DocumentRepository.index_namespace('agency_blogs') } 18 | let(:document_repository) do 19 | DocumentRepository.new(index_name: documents_index_name) 20 | end 21 | 22 | before do 23 | I14y::Application.config.updates_allowed = allow_updates 24 | I14y::Application.config.maintenance_message = maintenance_message 25 | end 26 | 27 | after do 28 | I14y::Application.config.updates_allowed = true 29 | end 30 | 31 | describe 'POST /api/v1/collections' do 32 | context 'when successful' do 33 | before do 34 | clear_index(collections_index_name) 35 | post '/api/v1/collections', params: valid_params, headers: valid_session 36 | end 37 | 38 | it 'returns success message as JSON' do 39 | expect(response).to have_http_status(:created) 40 | expect(JSON.parse(response.body)).to match( 41 | hash_including('status' => 200, 42 | 'developer_message' => 'OK', 43 | 'user_message' => 'Your collection was successfully created.') 44 | ) 45 | end 46 | 47 | it 'uses the collection handle as the Elasticsearch ID' do 48 | expect(ES.collection_repository.find('agency_blogs')).to be_present 49 | end 50 | 51 | it 'stores the appropriate fields in the Elasticsearch collection' do 52 | collection = ES.collection_repository.find('agency_blogs') 53 | expect(collection.token).to eq('secret') 54 | expect(collection.created_at).to be_an_instance_of(Time) 55 | expect(collection.updated_at).to be_an_instance_of(Time) 56 | end 57 | 58 | it_behaves_like 'a data modifying request made during read-only mode' 59 | end 60 | 61 | context 'when a required parameter is empty/blank' do 62 | before do 63 | invalid_params = {} 64 | post '/api/v1/collections', params: invalid_params, headers: valid_session 65 | end 66 | 67 | it 'returns failure message as JSON' do 68 | expect(response).to have_http_status(:bad_request) 69 | expect(JSON.parse(response.body)).to match( 70 | hash_including('status' => 400, 71 | 'developer_message' => 'handle is missing, handle is empty, token is missing, token is empty') 72 | ) 73 | end 74 | end 75 | 76 | context 'when handle uses illegal characters' do 77 | let(:invalid_params) do 78 | { handle: 'agency-blogs', token: 'secret' } 79 | end 80 | 81 | before do 82 | post '/api/v1/collections', params: invalid_params, headers: valid_session 83 | end 84 | 85 | it 'returns failure message as JSON' do 86 | expect(response).to have_http_status(:bad_request) 87 | expect(JSON.parse(response.body)).to match( 88 | hash_including('status' => 400, 89 | 'developer_message' => 'handle is invalid') 90 | ) 91 | end 92 | end 93 | 94 | context 'when authentication/authorization fails' do 95 | before do 96 | bad_credentials = ActionController::HttpAuthentication::Basic.encode_credentials 'nope', 'wrong' 97 | 98 | valid_session = { 'HTTP_AUTHORIZATION' => bad_credentials } 99 | post '/api/v1/collections', params: valid_params, headers: valid_session 100 | end 101 | 102 | it 'returns error message as JSON' do 103 | expect(response).to have_http_status(:bad_request) 104 | expect(JSON.parse(response.body)).to match( 105 | hash_including('status' => 400, 106 | 'developer_message' => 'Unauthorized') 107 | ) 108 | end 109 | end 110 | 111 | context 'when something terrible happens' do 112 | before do 113 | allow(Collection).to receive(:new) { raise_error(Exception) } 114 | post '/api/v1/collections', params: valid_params, headers: valid_session 115 | end 116 | 117 | it 'returns failure message as JSON' do 118 | expect(response).to have_http_status(:internal_server_error) 119 | expect(JSON.parse(response.body)).to match( 120 | hash_including('status' => 500, 121 | 'developer_message' => "Something unexpected happened and we've been alerted.") 122 | ) 123 | end 124 | end 125 | end 126 | 127 | describe 'DELETE /api/v1/collections/{handle}' do 128 | context 'when successful' do 129 | before do 130 | clear_index(collections_index_name) 131 | collection = Collection.new(id: 'agency_blogs', token: 'secret') 132 | ES.collection_repository.save(collection) 133 | delete '/api/v1/collections/agency_blogs', headers: valid_session 134 | end 135 | 136 | it 'returns success message as JSON' do 137 | expect(response).to have_http_status(:ok) 138 | expect(JSON.parse(response.body)).to match( 139 | hash_including('status' => 200, 140 | 'developer_message' => 'OK', 141 | 'user_message' => 'Your collection was successfully deleted.') 142 | ) 143 | end 144 | 145 | it 'deletes the collection' do 146 | expect(ES.collection_repository).not_to exist('agency_blogs') 147 | end 148 | 149 | it_behaves_like 'a data modifying request made during read-only mode' 150 | end 151 | end 152 | 153 | describe 'GET /api/v1/collections/{handle}' do 154 | context 'when successful' do 155 | before do 156 | clear_index(collections_index_name) 157 | post '/api/v1/collections', params: valid_params, headers: valid_session 158 | clear_index(documents_index_name) 159 | end 160 | 161 | let(:datetime) { DateTime.now.utc } 162 | let(:hash1) do 163 | { 164 | _id: 'a1', 165 | language: 'en', 166 | title: 'title 1 common content', 167 | description: 'description 1 common content', 168 | created: Time.zone.now, 169 | path: 'http://www.agency.gov/page1.html' 170 | } 171 | end 172 | let(:hash2) do 173 | { 174 | _id: 'a2', 175 | language: 'en', 176 | title: 'title 2 common content', 177 | description: 'description 2 common content', 178 | created: Time.zone.now, 179 | path: 'http://www.agency.gov/page2.html' 180 | } 181 | end 182 | 183 | it 'returns success message with Collection stats as JSON' do 184 | document_repository.save(Document.new(hash1)) 185 | document_repository.save(Document.new(hash2)) 186 | document_repository.refresh_index! 187 | get '/api/v1/collections/agency_blogs', headers: valid_session 188 | expect(response).to have_http_status(:ok) 189 | expect(JSON.parse(response.body)).to match( 190 | hash_including('status' => 200, 191 | 'developer_message' => 'OK', 192 | 'collection' => { 'document_total' => 2, 193 | 'last_document_sent' => an_instance_of(String), 194 | 'token' => 'secret', 195 | 'id' => 'agency_blogs', 196 | 'created_at' => an_instance_of(String), 197 | 'updated_at' => an_instance_of(String) }) 198 | ) 199 | end 200 | end 201 | end 202 | 203 | describe 'GET /api/v1/collections/search' do 204 | before do 205 | clear_index(collections_index_name) 206 | post '/api/v1/collections', params: valid_params, headers: valid_session 207 | clear_index(documents_index_name) 208 | end 209 | 210 | context 'when valid search parameters are provided' do 211 | let(:valid_search_params) do 212 | { 213 | audience: 'everyone', 214 | content_type: 'article', 215 | handles: 'agency_blogs', 216 | ignore_tags: 'ignored', 217 | include: 'title,description', 218 | language: 'en', 219 | max_timestamp: '2013-02-27T10:01:00Z', 220 | mime_type: 'text/html', 221 | min_timestamp: '2013-02-27T10:00:00Z', 222 | offset: 2**32, 223 | query: 'common content', 224 | searchgov_custom1: 'custom, content', 225 | searchgov_custom2: 'content with spaces', 226 | searchgov_custom3: '123, content, 456', 227 | size: 3, 228 | sort_by_date: 1, 229 | tags: 'Foo, Bar blat' 230 | } 231 | end 232 | 233 | before do 234 | allow(DocumentSearch).to receive(:new) 235 | get '/api/v1/collections/search', params: valid_search_params, headers: valid_session 236 | end 237 | 238 | it 'symbolizes language' do 239 | expect(DocumentSearch).to have_received(:new).with(hash_including(language: Symbol)) 240 | end 241 | 242 | it 'sends the query as a string' do 243 | expect(DocumentSearch).to have_received(:new).with(hash_including(query: String)) 244 | end 245 | 246 | it 'arrayifies audience, content_type, handles, ignore_tags, include, mime_type, searchgov_customs, and tags' do 247 | expect(DocumentSearch).to have_received(:new).with(hash_including(audience: Array, 248 | content_type: Array, 249 | handles: Array, 250 | ignore_tags: Array, 251 | include: Array, 252 | mime_type: Array, 253 | searchgov_custom1: Array, 254 | searchgov_custom2: Array, 255 | searchgov_custom3: Array, 256 | tags: Array)) 257 | end 258 | 259 | it 'sends offset and size as an integers' do 260 | expect(DocumentSearch).to have_received(:new).with(hash_including(offset: Integer, 261 | size: Integer)) 262 | end 263 | 264 | it 'sends the sort_by_date as a boolean' do 265 | expect(DocumentSearch).to have_received(:new).with(hash_including(sort_by_date: TrueClass)) 266 | end 267 | 268 | it 'sends min_timestamp and max_timestamp as DateTime' do 269 | expect(DocumentSearch).to have_received(:new).with(hash_including(min_timestamp: DateTime, 270 | max_timestamp: DateTime)) 271 | end 272 | end 273 | 274 | context 'when results exist' do 275 | before do 276 | document_repository.save(Document.new(hash1)) 277 | document_repository.save(Document.new(hash2)) 278 | document_repository.refresh_index! 279 | valid_params = { language: 'en', query: 'common contentx', handles: 'agency_blogs' } 280 | get '/api/v1/collections/search', params: valid_params, headers: valid_session 281 | end 282 | 283 | let(:datetime) { DateTime.now.utc.to_s } 284 | let(:hash1) do 285 | { _id: 'a1', 286 | language: 'en', 287 | title: 'title 1 common content', 288 | description: 'description 1 common content', 289 | content: 'content 1 common content', 290 | created: datetime, 291 | thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png', 292 | path: 'http://www.agency.gov/page1.html', 293 | promote: true, 294 | updated: datetime, 295 | updated_at: datetime } 296 | end 297 | let(:hash2) do 298 | { _id: 'a2', 299 | language: 'en', 300 | title: 'title 2 common content', 301 | description: 'description 2 common content', 302 | content: 'other unrelated stuff', 303 | thumbnail_url: 'https://search.gov/assets/img/logos/search.png', 304 | created: datetime, 305 | path: 'http://www.agency.gov/page2.html', 306 | promote: false, 307 | tags: 'tag1, tag2', 308 | updated_at: datetime } 309 | end 310 | 311 | describe 'status' do 312 | subject { response.status } 313 | 314 | it { is_expected.to eq(200) } 315 | end 316 | 317 | describe 'body' do 318 | let(:body) { JSON.parse(response.body) } 319 | let(:result1) do 320 | { 321 | 'language' => 'en', 322 | 'created' => datetime, 323 | 'path' => 'http://www.agency.gov/page1.html', 324 | 'title' => 'title 1 common content', 325 | 'description' => 'description 1 common content', 326 | 'content' => 'content 1 common content', 327 | 'thumbnail_url' => 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png', 328 | 'changed' => datetime 329 | } 330 | end 331 | let(:result2) do 332 | { 333 | 'language' => 'en', 334 | 'created' => datetime, 335 | 'path' => 'http://www.agency.gov/page2.html', 336 | 'title' => 'title 2 common content', 337 | 'description' => 'description 2 common content', 338 | 'thumbnail_url' => 'https://search.gov/assets/img/logos/search.png', 339 | 'changed' => datetime 340 | } 341 | end 342 | 343 | it 'returns highlighted JSON search results' do 344 | expect(body).to match(hash_including('results' => [result1, result2])) 345 | end 346 | 347 | describe 'metadata' do 348 | let(:metadata) { body['metadata'] } 349 | let(:suggestion_hash) do 350 | { 'text' => 'common content', 351 | 'highlighted' => 'common content' } 352 | end 353 | 354 | it 'returns highlighted JSON suggestion' do 355 | expect(metadata['suggestion']).to match(hash_including(suggestion_hash)) 356 | end 357 | 358 | it 'returns a non-zero results total' do 359 | expect(metadata['total']).to be > 0 360 | end 361 | 362 | it 'returns aggregations' do 363 | expect(metadata['aggregations']).not_to be_empty 364 | end 365 | end 366 | end 367 | end 368 | 369 | context 'when no results exist' do 370 | before do 371 | valid_params = { language: 'en', query: 'no hits', handles: 'agency_blogs' } 372 | get '/api/v1/collections/search', params: valid_params, headers: valid_session 373 | end 374 | 375 | describe 'status' do 376 | subject { response.status } 377 | 378 | it { is_expected.to eq(200) } 379 | end 380 | 381 | describe 'body' do 382 | let(:body) { JSON.parse(response.body) } 383 | 384 | it 'returns an empty results array' do 385 | expect(body).to match(hash_including('results' => [])) 386 | end 387 | 388 | describe 'metadata' do 389 | let(:metadata) { body['metadata'] } 390 | 391 | it 'returns zero results total' do 392 | expect(metadata['total']).to eq(0) 393 | end 394 | 395 | it 'returns nil suggestion' do 396 | expect(metadata['suggestion']).to be_nil 397 | end 398 | 399 | it 'returns empty aggregations' do 400 | expect(metadata['aggregations']).to be_empty 401 | end 402 | end 403 | end 404 | end 405 | 406 | context 'when missing required params' do 407 | before do 408 | invalid_params = {} 409 | get '/api/v1/collections/search', params: invalid_params, headers: valid_session 410 | end 411 | 412 | it 'returns error message as JSON' do 413 | expect(response).to have_http_status(:bad_request) 414 | expect(JSON.parse(response.body)).to match( 415 | hash_including('status' => 400, 416 | 'developer_message' => 'handles is missing, handles is empty') 417 | ) 418 | end 419 | end 420 | 421 | context 'when searching across one or more collection handles that do not exist' do 422 | let(:bad_handle_params) do 423 | { language: 'en', query: 'foo', handles: 'agency_blogs,missing' } 424 | end 425 | 426 | before do 427 | clear_index(collections_index_name) 428 | collection = Collection.new(id: 'agency_blogs', token: 'secret') 429 | ES.collection_repository.save(collection) 430 | get '/api/v1/collections/search', params: bad_handle_params, headers: valid_session 431 | end 432 | 433 | it 'returns error message as JSON' do 434 | expect(response).to have_http_status(:bad_request) 435 | expect(JSON.parse(response.body)).to match( 436 | hash_including('error' => 'Could not find all the specified collection handles') 437 | ) 438 | end 439 | end 440 | end 441 | end 442 | -------------------------------------------------------------------------------- /spec/requests/api/v1/documents_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails_helper' 4 | require 'uri' 5 | 6 | describe Api::V1::Documents do 7 | let(:id) { 'some really!weird@id.name' } 8 | let(:credentials) do 9 | ActionController::HttpAuthentication::Basic.encode_credentials('test_index', 10 | 'test_key') 11 | end 12 | let(:valid_session) do 13 | { HTTP_AUTHORIZATION: credentials } 14 | end 15 | let(:allow_updates) { true } 16 | let(:maintenance_message) { nil } 17 | let(:documents_index_name) { DocumentRepository.index_namespace('test_index') } 18 | let(:document_repository) { DocumentRepository.new(index_name: documents_index_name) } 19 | 20 | before(:all) do 21 | credentials = ActionController::HttpAuthentication::Basic.encode_credentials ENV['I14Y_ADMIN_USER'], ENV['I14Y_ADMIN_PASSWORD'] 22 | valid_collection_session = { HTTP_AUTHORIZATION: credentials } 23 | valid_collection_params = { handle: 'test_index', token: 'test_key' } 24 | post '/api/v1/collections', params: valid_collection_params, headers: valid_collection_session 25 | end 26 | 27 | before do 28 | I14y::Application.config.updates_allowed = allow_updates 29 | I14y::Application.config.maintenance_message = maintenance_message 30 | end 31 | 32 | after do 33 | I14y::Application.config.updates_allowed = true 34 | clear_index(documents_index_name) 35 | end 36 | 37 | describe 'POST /api/v1/documents' do 38 | subject(:post_document) do 39 | post '/api/v1/documents', params: document_params, headers: valid_session 40 | document_repository.refresh_index! 41 | end 42 | 43 | let(:valid_params) do 44 | { 45 | document_id: id, 46 | title: 'my title', 47 | path: 'http://www.gov.gov/goo.html', 48 | audience: 'Everyone', 49 | content: 'my content', 50 | content_type: 'article', 51 | description: 'my desc', 52 | thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png', 53 | language: 'hy', 54 | mime_type: 'text/html', 55 | promote: true, 56 | searchgov_custom1: 'custom content with spaces', 57 | searchgov_custom2: 'comma, separated, custom, content', 58 | searchgov_custom3: 123, 59 | tags: 'Foo, Bar blat' 60 | } 61 | end 62 | let(:document_params) { valid_params } 63 | 64 | context 'when successful' do 65 | before do 66 | post_document 67 | end 68 | 69 | it 'returns success message as JSON' do 70 | expect(response).to have_http_status(:created) 71 | expect(response.parsed_body). 72 | to match(hash_including('status' => 200, 73 | 'developer_message' => 'OK', 74 | 'user_message' => 'Your document was successfully created.')) 75 | end 76 | 77 | it 'uses the collection handle and the document_id in the Elasticsearch ID' do 78 | expect(document_repository.find(id)).to be_present 79 | end 80 | 81 | it 'stores the appropriate fields in the Elasticsearch document' do 82 | document = document_repository.find(id) 83 | expect(document.title).to eq('my title') 84 | expect(document.path).to eq('http://www.gov.gov/goo.html') 85 | expect(document.audience).to eq('everyone') 86 | expect(document.content).to eq('my content') 87 | expect(document.content_type).to eq('article') 88 | expect(document.created_at).to be_an_instance_of(Time) 89 | expect(document.description).to eq('my desc') 90 | expect(document.thumbnail_url).to eq('https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png') 91 | expect(document.language).to eq('hy') 92 | expect(document.mime_type).to eq('text/html') 93 | expect(document.promote).to be_truthy 94 | expect(document.searchgov_custom1).to eq(['custom content with spaces']) 95 | expect(document.searchgov_custom2).to eq(%w[comma separated custom content]) 96 | expect(document.searchgov_custom3).to eq(['123']) 97 | expect(document.tags).to contain_exactly('bar blat', 'foo') 98 | expect(document.updated_at).to be_an_instance_of(Time) 99 | end 100 | 101 | context 'when a "created" value is provided but not "changed"' do 102 | let(:valid_params) do 103 | { document_id: id, 104 | title: 'my title', 105 | path: 'http://www.gov.gov/goo.html', 106 | description: 'my desc', 107 | language: 'hy', 108 | content: 'my content', 109 | created: '2020-01-01T10:00:00Z' } 110 | end 111 | 112 | it 'sets "changed" to be the same as "created"' do 113 | document = document_repository.find(id) 114 | expect(document.changed).to eq '2020-01-01T10:00:00Z' 115 | end 116 | end 117 | 118 | it_behaves_like 'a data modifying request made during read-only mode' 119 | end 120 | 121 | context 'when attepmting to create an existing document' do 122 | let(:document_params) { valid_params.merge(document_id: 'its_a_dupe') } 123 | 124 | before do 125 | create_document(valid_params.merge(id: 'its_a_dupe'), document_repository) 126 | post_document 127 | end 128 | 129 | it 'returns failure message as JSON' do 130 | expect(response).to have_http_status(:unprocessable_entity) 131 | expect(response.parsed_body). 132 | to match(hash_including('status' => 422, 133 | 'developer_message' => 'Document already exists with that ID')) 134 | end 135 | end 136 | 137 | context 'when language param is invalid' do 138 | let(:document_params) { valid_params.merge(language: 'qq') } 139 | 140 | before { post_document } 141 | 142 | it 'returns failure message as JSON' do 143 | expect(response).to have_http_status(:bad_request) 144 | expect(response.parsed_body). 145 | to match(hash_including('status' => 400, 146 | 'developer_message' => 'language does not have a valid value')) 147 | end 148 | end 149 | 150 | context 'when id contains a slash' do 151 | let(:document_params) { valid_params.merge(document_id: 'a1/234') } 152 | 153 | before { post_document } 154 | 155 | it 'returns failure message as JSON' do 156 | expect(response).to have_http_status(:bad_request) 157 | expect(response.parsed_body). 158 | to match(hash_including('status' => 400, 159 | 'developer_message' => "document_id cannot contain any of the following characters: ['/']")) 160 | end 161 | end 162 | 163 | context 'when an id is larger than 512 bytes' do 164 | let(:string_with_513_bytes_but_only_257_characters) do 165 | two_byte_character = '\u00b5' 166 | "x#{two_byte_character * 256}" 167 | end 168 | 169 | let(:document_params) do 170 | valid_params.merge(document_id: string_with_513_bytes_but_only_257_characters) 171 | end 172 | 173 | before { post_document } 174 | 175 | it 'returns failure message as JSON' do 176 | expect(response).to have_http_status(:bad_request) 177 | expect(response.parsed_body). 178 | to match(hash_including('status' => 400, 179 | 'developer_message' => 'document_id cannot be more than 512 bytes long')) 180 | end 181 | end 182 | 183 | context 'when a language param is missing' do 184 | let(:document_params) { valid_params.except(:language) } 185 | 186 | before { post_document } 187 | 188 | it 'uses English (en) as default' do 189 | expect(document_repository.find(id).language).to eq('en') 190 | end 191 | end 192 | 193 | context 'when a required parameter is empty/blank' do 194 | let(:document_params) { valid_params.merge(title: ' ') } 195 | 196 | before { post_document } 197 | 198 | it 'returns failure message as JSON' do 199 | expect(response).to have_http_status(:bad_request) 200 | expect(response.parsed_body). 201 | to match(hash_including('status' => 400, 202 | 'developer_message' => 'title is empty')) 203 | end 204 | end 205 | 206 | context 'when the path URL is poorly formatted' do 207 | let(:document_params) { valid_params.merge(path: 'http://www.gov.gov/ goo.html') } 208 | 209 | before { post_document } 210 | 211 | it 'returns failure message as JSON' do 212 | expect(response).to have_http_status(:bad_request) 213 | expect(response.parsed_body). 214 | to match(hash_including('status' => 400, 215 | 'developer_message' => 'path is invalid')) 216 | end 217 | end 218 | 219 | context 'when authentication/authorization fails' do 220 | let(:credentials) do 221 | ActionController::HttpAuthentication::Basic.encode_credentials('test_index', 222 | 'bad_key') 223 | end 224 | 225 | before { post_document } 226 | 227 | it 'returns error message as JSON' do 228 | expect(response).to have_http_status(:bad_request) 229 | expect(response.parsed_body). 230 | to match(hash_including('status' => 400, 231 | 'developer_message' => 'Unauthorized')) 232 | end 233 | end 234 | 235 | context 'when something terrible happens during authentication' do 236 | before do 237 | allow(ES).to receive(:collection_repository). 238 | and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest) 239 | post_document 240 | end 241 | 242 | it 'returns error message as JSON' do 243 | expect(response).to have_http_status(:bad_request) 244 | expect(response.parsed_body). 245 | to match(hash_including('status' => 400, 246 | 'developer_message' => 'Unauthorized')) 247 | end 248 | end 249 | 250 | context 'when something terrible happens creating the document' do 251 | before do 252 | allow(Document).to receive(:new) { raise_error(Exception) } 253 | post_document 254 | end 255 | 256 | it 'returns failure message as JSON' do 257 | expect(response).to have_http_status(:internal_server_error) 258 | expect(response.parsed_body). 259 | to match(hash_including('status' => 500, 260 | 'developer_message' => "Something unexpected happened and we've been alerted.")) 261 | end 262 | end 263 | 264 | context 'with invalid MIME type param' do 265 | let(:document_params) { valid_params.merge(mime_type: 'not_a_valid/mime_type') } 266 | 267 | before { post_document } 268 | 269 | it 'returns failure message as JSON' do 270 | expect(response).to have_http_status(:bad_request) 271 | expect(response.parsed_body). 272 | to match(hash_including('status' => 400, 273 | 'developer_message' => 'Mime type is invalid')) 274 | end 275 | end 276 | end 277 | 278 | describe 'PUT /api/v1/documents/{document_id}' do 279 | subject(:put_document) do 280 | put "/api/v1/documents/#{CGI.escape(id)}", 281 | params: update_params, 282 | headers: valid_session 283 | document_repository.refresh_index! 284 | end 285 | 286 | let(:update_params) do 287 | { 288 | changed: '2016-01-01T10:00:01Z', 289 | click_count: 1000, 290 | content_type: 'website', 291 | content: 'new content', 292 | description: 'new desc', 293 | mime_type: 'text/plain', 294 | path: 'http://www.next.gov/updated.html', 295 | promote: false, 296 | searchgov_custom1: 'custom content with spaces', 297 | searchgov_custom2: 'new, comma, separated, custom, content', 298 | tags: 'new category', 299 | thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/new/18F-Logo-M.png', 300 | title: 'new title' 301 | } 302 | end 303 | 304 | context 'when successful' do 305 | before do 306 | create_document({ audience: 'Everyone', 307 | content: 'huge content 4', 308 | created: 2.hours.ago, 309 | description: 'bigger desc 4', 310 | language: 'en', 311 | path: 'http://www.gov.gov/url4.html', 312 | promote: true, 313 | searchgov_custom2: 'comma, separated, custom, content', 314 | searchgov_custom3: 123, 315 | title: 'hi there 4', 316 | updated: Time.zone.now, 317 | id: id }, 318 | document_repository) 319 | 320 | put_document 321 | end 322 | 323 | it 'returns success message as JSON' do 324 | expect(response).to have_http_status(:ok) 325 | expect(response.parsed_body). 326 | to match(hash_including('status' => 200, 327 | 'developer_message' => 'OK', 328 | 'user_message' => 'Your document was successfully updated.')) 329 | end 330 | 331 | it 'updates the document' do 332 | document = document_repository.find(id) 333 | expect(document.changed).to eq('2016-01-01T10:00:01Z') 334 | expect(document.click_count).to eq(1000) 335 | expect(document.content_type).to eq('website') 336 | expect(document.content).to eq('new content') 337 | expect(document.description).to eq('new desc') 338 | expect(document.mime_type).to eq('text/plain') 339 | expect(document.path).to eq('http://www.next.gov/updated.html') 340 | expect(document.promote).to be_falsey 341 | expect(document.searchgov_custom1).to contain_exactly('custom content with spaces') 342 | expect(document.searchgov_custom2).to contain_exactly('new', 'comma', 'separated', 'custom', 'content') 343 | expect(document.tags).to contain_exactly('new category') 344 | expect(document.thumbnail_url).to eq('https://18f.gsa.gov/assets/img/logos/new/18F-Logo-M.png') 345 | expect(document.title).to eq('new title') 346 | end 347 | 348 | it 'persists unchanged attributes' do 349 | document = document_repository.find(id) 350 | expect(document.audience).to eq('everyone') 351 | expect(document.language).to eq('en') 352 | expect(document.searchgov_custom3).to contain_exactly('123') 353 | end 354 | 355 | it_behaves_like 'a data modifying request made during read-only mode' 356 | end 357 | 358 | context 'when time has passed since the document was created' do 359 | before do 360 | create_document({ 361 | id: id, 362 | language: 'en', 363 | title: 'hi there 4', 364 | description: 'bigger desc 4', 365 | content: 'huge content 4', 366 | path: 'http://www.gov.gov/url4.html' 367 | }, document_repository) 368 | # Force-update the timestamps to avoid fooling the specs with any 369 | # automagic trickery 370 | ES.client.update( 371 | index: documents_index_name, 372 | id: id, 373 | body: { 374 | doc: { 375 | updated_at: 1.year.ago, 376 | created_at: 1.year.ago 377 | } 378 | }, 379 | type: '_doc' 380 | ) 381 | document_repository.refresh_index! 382 | end 383 | 384 | it 'updates the updated_at timestamp' do 385 | expect { put_document }.to change { document_repository.find(id).updated_at } 386 | end 387 | 388 | it 'does not update the created_at timestamp' do 389 | expect { put_document }.not_to change { document_repository.find(id).created_at } 390 | end 391 | end 392 | 393 | context 'with invalid MIME type param' do 394 | let(:update_params) { { mime_type: 'not_a_valid/mime_type' } } 395 | 396 | before do 397 | create_document({ 398 | id: id, 399 | language: 'en', 400 | title: 'hi there 4', 401 | description: 'bigger desc 4', 402 | content: 'huge content 4', 403 | created: 2.hours.ago, 404 | updated: Time.zone.now, 405 | promote: true, 406 | path: 'http://www.gov.gov/url4.html' 407 | }, document_repository) 408 | 409 | put_document 410 | end 411 | 412 | it 'returns error message as JSON' do 413 | expect(response).to have_http_status(:bad_request) 414 | expect(response.parsed_body). 415 | to match(hash_including('status' => 400, 416 | 'developer_message' => 'Mime type is invalid')) 417 | end 418 | end 419 | end 420 | 421 | describe 'DELETE /api/v1/documents/{document_id}' do 422 | subject(:delete_document) do 423 | delete "/api/v1/documents/#{CGI.escape(id)}", headers: valid_session 424 | end 425 | 426 | context 'when successful' do 427 | before do 428 | create_document({ 429 | id: id, 430 | language: 'en', 431 | title: 'hi there 4', 432 | description: 'bigger desc 4', 433 | content: 'huge content 4', 434 | created: 2.hours.ago, 435 | updated: Time.zone.now, 436 | promote: true, 437 | path: 'http://www.gov.gov/url4.html' 438 | }, document_repository) 439 | 440 | delete_document 441 | end 442 | 443 | it 'returns success message as JSON' do 444 | expect(response).to have_http_status(:ok) 445 | expect(response.parsed_body). 446 | to match(hash_including('status' => 200, 447 | 'developer_message' => 'OK', 448 | 'user_message' => 'Your document was successfully deleted.')) 449 | end 450 | 451 | it 'deletes the document' do 452 | expect(document_repository).not_to exist(id) 453 | end 454 | 455 | it_behaves_like 'a data modifying request made during read-only mode' 456 | end 457 | 458 | context 'when document does not exist' do 459 | let(:id) { 'nonexistent' } 460 | 461 | before { delete_document } 462 | 463 | it 'delete returns an error message as JSON' do 464 | expect(response).to have_http_status(:bad_request) 465 | expect(response.parsed_body). 466 | to match(hash_including('status' => 400, 467 | 'developer_message' => 'Resource could not be found.')) 468 | end 469 | end 470 | end 471 | end 472 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'simplecov' 4 | 5 | RSpec.configure do |config| 6 | # The settings below are suggested to provide a good initial experience 7 | # with RSpec, but feel free to customize to your heart's content. 8 | 9 | # These two settings work together to allow you to limit a spec run 10 | # to individual examples or groups you care about by tagging them with 11 | # `:focus` metadata. When nothing is tagged with `:focus`, all examples 12 | # get run. 13 | config.filter_run :focus 14 | config.run_all_when_everything_filtered = true 15 | 16 | # Many RSpec users commonly either run the entire suite or an individual 17 | # file, and it's useful to allow more verbose output when running an 18 | # individual spec file. 19 | if config.files_to_run.one? 20 | # Use the documentation formatter for detailed output, 21 | # unless a formatter has already been configured 22 | # (e.g. via a command-line flag). 23 | config.default_formatter = 'doc' 24 | end 25 | 26 | # Print the 10 slowest examples and example groups at the 27 | # end of the spec run, to help surface which specs are running 28 | # particularly slow. 29 | config.profile_examples = 10 30 | 31 | # Run specs in random order to surface order dependencies. If you find an 32 | # order dependency and want to debug it, you can fix the order by providing 33 | # the seed, which is printed after each run. 34 | # --seed 1234 35 | config.order = :random 36 | 37 | # Seed global randomization in this process using the `--seed` CLI option. 38 | # Setting this allows you to use `--seed` to deterministically reproduce 39 | # test failures related to randomization by passing the same `--seed` value 40 | # as the one that triggered the failure. 41 | Kernel.srand config.seed 42 | 43 | # rspec-expectations config goes here. You can use an alternate 44 | # assertion/expectation library such as wrong or the stdlib/minitest 45 | # assertions if you prefer. 46 | config.expect_with :rspec do |expectations| 47 | # Enable only the newer, non-monkey-patching expect syntax. 48 | # For more details, see: 49 | # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax 50 | expectations.syntax = :expect 51 | end 52 | 53 | # rspec-mocks config goes here. You can use an alternate test double 54 | # library (such as bogus or mocha) by changing the `mock_with` option here. 55 | config.mock_with :rspec do |mocks| 56 | # Enable only the newer, non-monkey-patching expect syntax. 57 | # For more details, see: 58 | # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ 59 | mocks.syntax = :expect 60 | 61 | # Prevents you from mocking or stubbing a method that does not exist on 62 | # a real object. This is generally recommended. 63 | mocks.verify_partial_doubles = true 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /spec/support/document_crud.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DocumentCrud 4 | def create_document(params, repository) 5 | document = Document.new(params) 6 | # Ensure this helper method is only used to create valid docs 7 | document.validate! 8 | repository.save(document) 9 | # Ensure the document is searchable 10 | repository.refresh_index! 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /spec/support/shared_examples/read_only_mode.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | shared_examples 'a data modifying request made during read-only mode' do 4 | let(:allow_updates) { false } 5 | 6 | it 'returns a read-only-mode-releated failure message as JSON' do 7 | expect(response.status).to eq(503) 8 | expect(JSON.parse(response.body)).to match(hash_including({ 9 | 'status' => 503, 10 | 'developer_message' => 'The i14y API is currently in read-only mode.' 11 | })) 12 | end 13 | 14 | context 'when a specific maintenance message is configured' do 15 | let(:maintenance_message) { 'Sorry about that!' } 16 | 17 | it 'additionally includes the specific maintanance message' do 18 | expect(JSON.parse(response.body)).to match(hash_including({ 19 | 'status' => 503, 20 | 'developer_message' => 'The i14y API is currently in read-only mode. Sorry about that!' 21 | })) 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /spec/support/shared_examples/repository_behavior.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | shared_examples_for 'a repository' do 4 | describe 'serialization' do 5 | subject(:serialize) { repository.serialize(klass_instance) } 6 | 7 | let(:klass_instance) { repository.klass.new } 8 | 9 | it { is_expected.to be_a Hash } 10 | end 11 | 12 | describe 'deserialization' do 13 | subject(:deserialize) { repository.deserialize(hash) } 14 | 15 | # Ensures backwards compatibility with pre-ES 7 documents 16 | context 'when the source does not include the id' do 17 | let(:hash) do 18 | { 19 | '_id' => 'a123', 20 | '_source' => { } 21 | } 22 | end 23 | 24 | it 'sets the id on the deserialized object' do 25 | expect(deserialize.id).to eq 'a123' 26 | end 27 | end 28 | end 29 | 30 | it 'can connect to Elasticsearch' do 31 | expect(repository.client.ping).to be(true) 32 | end 33 | 34 | it 'uses one primary and one replica shard' do 35 | expect(repository.settings.to_hash).to match(hash_including( 36 | number_of_shards: 1, 37 | number_of_replicas: 1 38 | )) 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/support/shoulda.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | Shoulda::Matchers.configure do |config| 4 | config.integrate do |with| 5 | with.test_framework :rspec 6 | with.library :rails 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /spec/test_services.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module TestServices 4 | module_function 5 | 6 | def create_collections_index 7 | ES.client.indices.create(index: collections_index_name) 8 | ES.client.indices.put_alias( 9 | index: collections_index_name, 10 | name: ES.collection_repository.index_name 11 | ) 12 | end 13 | 14 | def delete_es_indexes 15 | ES.client.indices.delete(index: [Rails.env, I14y::APP_NAME, '*'].join('-')) 16 | end 17 | 18 | def clear_index(index_name) 19 | ES.client.delete_by_query( 20 | index: index_name, 21 | q: '*:*', 22 | conflicts: 'proceed' 23 | ) 24 | end 25 | 26 | def collections_index_name 27 | [Rails.env, I14y::APP_NAME, 'collections', 'v1'].join('-') 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /vendor/stream2es: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GSA/i14y/f3be9325fa45d5dec1e716a4a9ffeb6e0cf56b22/vendor/stream2es --------------------------------------------------------------------------------