├── .circleci
    └── config.yml
├── .codeclimate.yml
├── .codeinventory.yml
├── .csslintrc
├── .dockerignore
├── .env
├── .eslintignore
├── .eslintrc
├── .github
    └── pull_request_template.md
├── .gitignore
├── .rubocop.yml
├── .rubocop_todo.yml
├── .ruby-version
├── .simplecov
├── Capfile
├── Dockerfile
├── Dockerfile.dev
├── Gemfile
├── Gemfile.lock
├── LICENSE
├── README.md
├── Rakefile
├── app
    ├── classes
    │   ├── document_query.rb
    │   ├── document_search.rb
    │   ├── document_search_results.rb
    │   └── query_parser.rb
    ├── controllers
    │   └── api
    │   │   ├── base.rb
    │   │   └── v1
    │   │       ├── base.rb
    │   │       ├── collections.rb
    │   │       └── documents.rb
    ├── models
    │   ├── collection.rb
    │   └── document.rb
    ├── repositories
    │   ├── collection_repository.rb
    │   ├── concerns
    │   │   └── repository.rb
    │   └── document_repository.rb
    └── templates
    │   ├── collections.rb
    │   └── documents.rb
├── appspec.yml
├── bin
    ├── bundle
    ├── rails
    ├── rake
    ├── secure_docker
    ├── setup
    └── update
├── buildspec_i14y.yml
├── cicd-scripts
    └── fetch_env_vars.sh
├── config.ru
├── config
    ├── access_control.yml
    ├── application.rb
    ├── boot.rb
    ├── deploy.rb
    ├── deploy
    │   ├── development.rb
    │   ├── production.rb
    │   └── staging.rb
    ├── elasticsearch.yml
    ├── environment.rb
    ├── environments
    │   ├── development.rb
    │   ├── production.rb
    │   └── test.rb
    ├── initializers
    │   ├── access_control.rb
    │   ├── application_controller_renderer.rb
    │   ├── backtrace_silencers.rb
    │   ├── cookies_serializer.rb
    │   ├── elasticsearch.rb
    │   ├── filter_parameter_logging.rb
    │   ├── inflections.rb
    │   ├── locales.rb
    │   ├── mime_types.rb
    │   ├── session_store.rb
    │   └── wrap_parameters.rb
    ├── locales
    │   ├── analysis
    │   │   ├── en_protwords.txt
    │   │   ├── en_synonyms.txt
    │   │   ├── es_protwords.txt
    │   │   └── es_synonyms.txt
    │   └── en.yml
    ├── newrelic.yml
    ├── puma.rb
    └── routes.rb
├── lib
    ├── ext
    │   └── string.rb
    ├── namespaced_index.rb
    ├── read_only_access_control.rb
    ├── serde.rb
    ├── tasks
    │   └── i14y.rake
    ├── templatable.rb
    └── validations
    │   └── max_bytes.rb
├── public
    ├── 404.html
    ├── 422.html
    ├── 500.html
    ├── favicon.ico
    └── robots.txt
├── spec
    ├── classes
    │   ├── document_query_spec.rb
    │   ├── document_search_results_spec.rb
    │   ├── document_search_spec.rb
    │   └── query_parser_spec.rb
    ├── config
    │   └── initializers
    │   │   └── filter_parameter_logging_spec.rb
    ├── lib
    │   ├── serde_spec.rb
    │   └── validations
    │   │   └── max_bytes_spec.rb
    ├── models
    │   ├── collection_spec.rb
    │   └── document_spec.rb
    ├── rails_helper.rb
    ├── repositories
    │   ├── collection_repository_spec.rb
    │   └── document_repository_spec.rb
    ├── requests
    │   └── api
    │   │   └── v1
    │   │       ├── collections_spec.rb
    │   │       └── documents_spec.rb
    ├── spec_helper.rb
    ├── support
    │   ├── document_crud.rb
    │   ├── shared_examples
    │   │   ├── read_only_mode.rb
    │   │   └── repository_behavior.rb
    │   └── shoulda.rb
    └── test_services.rb
└── vendor
    └── stream2es


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | version: 2.1
  2 | 
  3 | orbs:
  4 |   ruby: circleci/ruby@1.4.0
  5 |   # aws-ecr: circleci/aws-ecr@8.2.1
  6 |   # aws-eks: circleci/aws-eks@1.1.0
  7 |   # aws-cli: circleci/aws-cli@3.1.4
  8 |   # kubernetes: circleci/kubernetes@1.3.1
  9 |   docker: circleci/docker@1.5.0
 10 | 
 11 | jobs:
 12 |   build_and_test:
 13 |     parameters:
 14 |       ruby_version:
 15 |         type: string
 16 |       elasticsearch_version:
 17 |         type: string
 18 | 
 19 |     docker:
 20 |       - image: cimg/ruby:<< parameters.ruby_version >>
 21 | 
 22 |       - image: docker.elastic.co/elasticsearch/elasticsearch:<< parameters.elasticsearch_version >>
 23 |         environment:
 24 |           bootstrap.memory_lock: true
 25 |           discovery.type: single-node
 26 |           xpack.security.enabled: false
 27 |           ES_JAVA_OPTS: '-Xms512m -Xmx512m'
 28 | 
 29 |     working_directory: ~/app
 30 | 
 31 |     steps:
 32 |       - setup_remote_docker
 33 |       - checkout
 34 |       # Install gems with Bundler
 35 |       - ruby/install-deps:
 36 |           key: gems-ruby-<< parameters.ruby_version >>-v{{ .Environment.CACHE_VERSION }}
 37 |       - run:
 38 |           name: Setup Code Climate test-reporter
 39 |           command: |
 40 |             curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
 41 |             chmod +x ./cc-test-reporter
 42 |       - run:
 43 |           name: Wait for Elasticsearch
 44 |           command: dockerize -wait tcp://localhost:9200 -timeout 1m
 45 |       - run:
 46 |           name: Install Elasticsearch Plugins
 47 |           command: |
 48 |             until curl -sS localhost:9200; do sleep 1; done
 49 |             elasticsearch_container_id=$(docker ps -qf "ancestor=docker.elastic.co/elasticsearch/elasticsearch:<< parameters.elasticsearch_version >>")
 50 |             docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-kuromoji
 51 |             docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-icu
 52 |             docker exec $elasticsearch_container_id elasticsearch-plugin install analysis-smartcn
 53 |             # Restart Elasticsearch to apply the plugins
 54 |             docker restart $elasticsearch_container_id
 55 |             # Wait for Elasticsearch to be ready after restart
 56 |             dockerize -wait tcp://localhost:9200 -timeout 1m
 57 |       - run:
 58 |           name: RSpec
 59 |           environment:
 60 |             CC_TEST_REPORTER_ID: 09d5a7d453407f367679c8f86c4c582ec3583bed3c7a06286d61d16e89290bd1
 61 |           command: |
 62 |             bundle exec rake i14y:setup
 63 |             ./cc-test-reporter before-build
 64 |             bundle exec rspec spec
 65 |             ./cc-test-reporter after-build --exit-code $?
 66 |       - store_artifacts:
 67 |           path: coverage
 68 |           destination: ~/coverage
 69 | 
 70 |   # update-staging:
 71 |   #   docker:
 72 |   #     - image: cimg/aws:2023.01
 73 |   #   steps:
 74 |   #     - checkout
 75 |   #     - setup_remote_docker
 76 |   #     - aws-cli/setup:
 77 |   #         aws-region: STAGING_AWS_REGION
 78 |   #         aws-access-key-id: CIRCLE_CI_USER_ACCESS_KEY_ID
 79 |   #         aws-secret-access-key: CIRCLE_CI_USER_SECRET_ACCESS_KEY
 80 |   #     - run: docker build -t i14y .
 81 |   #     - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:latest
 82 |   #     - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:${CIRCLE_BUILD_NUM}
 83 |   #     - run: aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 213305845712.dkr.ecr.us-east-2.amazonaws.com
 84 |   #     - run: docker push 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:latest
 85 |   #     - run: docker push 213305845712.dkr.ecr.us-east-2.amazonaws.com/i14y:${CIRCLE_BUILD_NUM}
 86 |   #     - run:
 87 |   #         name: update i14y k8s deployment
 88 |   #         command: |
 89 |   #           aws ssm send-command \
 90 |   #           --document-name "searchgov-deployment-ssm-document" \
 91 |   #           --targets "Key=tag:Name,Values=jumphost-staging-search-instance" \
 92 |   #           --parameters '{"DeploymentName":["search-staging-i14y-deploy"], "Namespace":["search"]}' \
 93 |   #           --comment "restart i14y staging deployment"
 94 | 
 95 |   # update-prod:
 96 |   #   docker:
 97 |   #     - image: cimg/aws:2023.01
 98 |   #   steps:
 99 |   #     - checkout
100 |   #     - setup_remote_docker
101 |   #     - aws-cli/setup:
102 |   #         aws-region: PROD_AWS_REGION
103 |   #         aws-access-key-id: CIRCLE_CI_USER_ACCESS_KEY_ID
104 |   #         aws-secret-access-key: CIRCLE_CI_USER_SECRET_ACCESS_KEY
105 |   #     - run: docker build -t i14y .
106 |   #     - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:latest
107 |   #     - run: docker tag i14y:latest 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:${CIRCLE_BUILD_NUM}
108 |   #     - run: aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 213305845712.dkr.ecr.us-east-1.amazonaws.com
109 |   #     - run: docker push 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:latest
110 |   #     - run: docker push 213305845712.dkr.ecr.us-east-1.amazonaws.com/i14y:${CIRCLE_BUILD_NUM}
111 |   #     - run:
112 |   #         name: update i14y k8s deployment
113 |   #         command: |
114 |   #           aws ssm send-command \
115 |   #           --document-name "searchgov-deployment-ssm-document" \
116 |   #           --targets "Key=tag:Name,Values=jumphost-prod-search-instance" \
117 |   #           --parameters '{"DeploymentName":["search-prod-i14y-deploy"], "Namespace":["search"]}' \
118 |   #           --comment "restart i14y prod deployment"
119 | 
120 | workflows:
121 |   build_and_test:
122 |     jobs:
123 |       - build_and_test:
124 |           name: "Ruby << matrix.ruby_version >>, ES << matrix.elasticsearch_version >>"
125 |           matrix:
126 |             parameters:
127 |               ruby_version:
128 |                 - 3.3.7
129 |               elasticsearch_version:
130 |                 - 7.17.7
131 |                 # not yet compatible with Elasticsearch 8
132 | 
133 |   # build_image_and_deploy:
134 |   #   jobs:
135 |   #     - update-staging:
136 |   #         context:
137 |   #           - aws-client-keys
138 |   #         filters:
139 |   #           branches:
140 |   #             only:
141 |   #               - main
142 |   #     - update-prod:
143 |   #         context:
144 |   #           - aws-client-keys
145 |   #         filters:
146 |   #           branches:
147 |   #             only:
148 |   #               - production
149 | 


--------------------------------------------------------------------------------
/.codeclimate.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | plugins:
 3 |   brakeman:
 4 |     enabled: true
 5 |   bundler-audit:
 6 |     enabled: true
 7 |   csslint:
 8 |     enabled: true
 9 |   duplication:
10 |     enabled: true
11 |     config:
12 |       languages:
13 |         - ruby
14 |     exclude_patterns:
15 |       - "spec/"
16 |   eslint:
17 |     enabled: true
18 |   fixme:
19 |     enabled: true
20 |   rubocop:
21 |     enabled: true
22 |     channel: rubocop-1-65-0
23 | exclude_patterns:
24 |   - bin/
25 |   - vendor/
26 |   - coverage/


--------------------------------------------------------------------------------
/.codeinventory.yml:
--------------------------------------------------------------------------------
 1 | name: i14y
 2 | description: 'An API for indexing agency web content in real time.'
 3 | license: 'https://creativecommons.org/publicdomain/zero/1.0'
 4 | openSourceProject: 1
 5 | governmentWideReuseProject: 1
 6 | tags:
 7 |     - GSA
 8 |     - DigitalGovSearch
 9 |     - websites
10 |     - search
11 |     - indexing
12 | contact:
13 |     email: search@gsa.gov
14 | 


--------------------------------------------------------------------------------
/.csslintrc:
--------------------------------------------------------------------------------
1 | --exclude-exts=.min.css
2 | --ignore=adjoining-classes,box-model,ids,order-alphabetical,unqualified-attributes
3 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | 
 5 | # Logs
 6 | log/*
 7 | 
 8 | # Temporary files
 9 | tmp/*
10 | 
11 | # Editor temp files
12 | *.swp
13 | *.swo
14 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | I14Y_ADMIN_USER=dev
2 | I14Y_ADMIN_PASSWORD=devpwd


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | **/*{.,-}min.js
2 | 


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
  1 | ecmaFeatures:
  2 |   modules: true
  3 |   jsx: true
  4 | 
  5 | env:
  6 |   amd: true
  7 |   browser: true
  8 |   es6: true
  9 |   jquery: true
 10 |   node: true
 11 | 
 12 | # http://eslint.org/docs/rules/
 13 | rules:
 14 |   # Possible Errors
 15 |   comma-dangle: [2, never]
 16 |   no-cond-assign: 2
 17 |   no-console: 0
 18 |   no-constant-condition: 2
 19 |   no-control-regex: 2
 20 |   no-debugger: 2
 21 |   no-dupe-args: 2
 22 |   no-dupe-keys: 2
 23 |   no-duplicate-case: 2
 24 |   no-empty: 2
 25 |   no-empty-character-class: 2
 26 |   no-ex-assign: 2
 27 |   no-extra-boolean-cast: 2
 28 |   no-extra-parens: 0
 29 |   no-extra-semi: 2
 30 |   no-func-assign: 2
 31 |   no-inner-declarations: [2, functions]
 32 |   no-invalid-regexp: 2
 33 |   no-irregular-whitespace: 2
 34 |   no-negated-in-lhs: 2
 35 |   no-obj-calls: 2
 36 |   no-regex-spaces: 2
 37 |   no-sparse-arrays: 2
 38 |   no-unexpected-multiline: 2
 39 |   no-unreachable: 2
 40 |   use-isnan: 2
 41 |   valid-jsdoc: 0
 42 |   valid-typeof: 2
 43 | 
 44 |   # Best Practices
 45 |   accessor-pairs: 2
 46 |   block-scoped-var: 0
 47 |   complexity: [2, 6]
 48 |   consistent-return: 0
 49 |   curly: 0
 50 |   default-case: 0
 51 |   dot-location: 0
 52 |   dot-notation: 0
 53 |   eqeqeq: 2
 54 |   guard-for-in: 2
 55 |   no-alert: 2
 56 |   no-caller: 2
 57 |   no-case-declarations: 2
 58 |   no-div-regex: 2
 59 |   no-else-return: 0
 60 |   no-empty-label: 2
 61 |   no-empty-pattern: 2
 62 |   no-eq-null: 2
 63 |   no-eval: 2
 64 |   no-extend-native: 2
 65 |   no-extra-bind: 2
 66 |   no-fallthrough: 2
 67 |   no-floating-decimal: 0
 68 |   no-implicit-coercion: 0
 69 |   no-implied-eval: 2
 70 |   no-invalid-this: 0
 71 |   no-iterator: 2
 72 |   no-labels: 0
 73 |   no-lone-blocks: 2
 74 |   no-loop-func: 2
 75 |   no-magic-number: 0
 76 |   no-multi-spaces: 0
 77 |   no-multi-str: 0
 78 |   no-native-reassign: 2
 79 |   no-new-func: 2
 80 |   no-new-wrappers: 2
 81 |   no-new: 2
 82 |   no-octal-escape: 2
 83 |   no-octal: 2
 84 |   no-proto: 2
 85 |   no-redeclare: 2
 86 |   no-return-assign: 2
 87 |   no-script-url: 2
 88 |   no-self-compare: 2
 89 |   no-sequences: 0
 90 |   no-throw-literal: 0
 91 |   no-unused-expressions: 2
 92 |   no-useless-call: 2
 93 |   no-useless-concat: 2
 94 |   no-void: 2
 95 |   no-warning-comments: 0
 96 |   no-with: 2
 97 |   radix: 2
 98 |   vars-on-top: 0
 99 |   wrap-iife: 2
100 |   yoda: 0
101 | 
102 |   # Strict
103 |   strict: 0
104 | 
105 |   # Variables
106 |   init-declarations: 0
107 |   no-catch-shadow: 2
108 |   no-delete-var: 2
109 |   no-label-var: 2
110 |   no-shadow-restricted-names: 2
111 |   no-shadow: 0
112 |   no-undef-init: 2
113 |   no-undef: 0
114 |   no-undefined: 0
115 |   no-unused-vars: 0
116 |   no-use-before-define: 0
117 | 
118 |   # Node.js and CommonJS
119 |   callback-return: 2
120 |   global-require: 2
121 |   handle-callback-err: 2
122 |   no-mixed-requires: 0
123 |   no-new-require: 0
124 |   no-path-concat: 2
125 |   no-process-exit: 2
126 |   no-restricted-modules: 0
127 |   no-sync: 0
128 | 
129 |   # Stylistic Issues
130 |   array-bracket-spacing: 0
131 |   block-spacing: 0
132 |   brace-style: 0
133 |   camelcase: 0
134 |   comma-spacing: 0
135 |   comma-style: 0
136 |   computed-property-spacing: 0
137 |   consistent-this: 0
138 |   eol-last: 0
139 |   func-names: 0
140 |   func-style: 0
141 |   id-length: 0
142 |   id-match: 0
143 |   indent: 0
144 |   jsx-quotes: 0
145 |   key-spacing: 0
146 |   linebreak-style: 0
147 |   lines-around-comment: 0
148 |   max-depth: 0
149 |   max-len: 0
150 |   max-nested-callbacks: 0
151 |   max-params: 0
152 |   max-statements: [2, 30]
153 |   new-cap: 0
154 |   new-parens: 0
155 |   newline-after-var: 0
156 |   no-array-constructor: 0
157 |   no-bitwise: 0
158 |   no-continue: 0
159 |   no-inline-comments: 0
160 |   no-lonely-if: 0
161 |   no-mixed-spaces-and-tabs: 0
162 |   no-multiple-empty-lines: 0
163 |   no-negated-condition: 0
164 |   no-nested-ternary: 0
165 |   no-new-object: 0
166 |   no-plusplus: 0
167 |   no-restricted-syntax: 0
168 |   no-spaced-func: 0
169 |   no-ternary: 0
170 |   no-trailing-spaces: 0
171 |   no-underscore-dangle: 0
172 |   no-unneeded-ternary: 0
173 |   object-curly-spacing: 0
174 |   one-var: 0
175 |   operator-assignment: 0
176 |   operator-linebreak: 0
177 |   padded-blocks: 0
178 |   quote-props: 0
179 |   quotes: 0
180 |   require-jsdoc: 0
181 |   semi-spacing: 0
182 |   semi: 0
183 |   sort-vars: 0
184 |   space-after-keywords: 0
185 |   space-before-blocks: 0
186 |   space-before-function-paren: 0
187 |   space-before-keywords: 0
188 |   space-in-parens: 0
189 |   space-infix-ops: 0
190 |   space-return-throw-case: 0
191 |   space-unary-ops: 0
192 |   spaced-comment: 0
193 |   wrap-regex: 0
194 | 
195 |   # ECMAScript 6
196 |   arrow-body-style: 0
197 |   arrow-parens: 0
198 |   arrow-spacing: 0
199 |   constructor-super: 0
200 |   generator-star-spacing: 0
201 |   no-arrow-condition: 0
202 |   no-class-assign: 0
203 |   no-const-assign: 0
204 |   no-dupe-class-members: 0
205 |   no-this-before-super: 0
206 |   no-var: 0
207 |   object-shorthand: 0
208 |   prefer-arrow-callback: 0
209 |   prefer-const: 0
210 |   prefer-reflect: 0
211 |   prefer-spread: 0
212 |   prefer-template: 0
213 |   require-yield: 0
214 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | - Brief summary of the changes included in this PR
 3 | - Any additional information or context which may help the reviewer
 4 |  
 5 | ### Checklist
 6 | Please ensure you have addressed all concerns below before marking a PR "ready for review" or before requesting a re-review. If you cannot complete an item below, replace the checkbox with the ⚠️ `:warning:` emoji and explain why the step was not completed.
 7 |  
 8 | #### Functionality Checks
 9 |  
10 | - [ ] You have merged the latest changes from the target branch (usually `main`) into your branch.
11 |   
12 | - [ ] Your primary commit message is of the format **SRCH-#### \<description\>** matching the associated Jira ticket.
13 | 
14 | - [ ] PR title is either of the format **SRCH-#### \<description\>** matching the associated Jira ticket (i.e. "SRCH-123 implement feature X"), or **Release - SRCH-####, SRCH-####, SRCH-####** matching the Jira ticket numbers in the release.
15 |  
16 | - [ ] Automated checks pass. If Code Climate checks do not pass, explain reason for failures:
17 |  
18 | #### Process Checks
19 | 
20 | - [ ] You have specified at least one "Reviewer".
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.rbc
 2 | capybara-*.html
 3 | 
 4 | #auto-generated by RubyMine
 5 | .idea
 6 | 
 7 | .rspec
 8 | .rspec-local
 9 | # Ignore the YAML file that is downloaded when running `rubocop` locally
10 | .rubocop*default-yml
11 | /log
12 | /tmp
13 | /db/*.sqlite3
14 | /db/*.sqlite3-journal
15 | /public/system
16 | /coverage/
17 | /spec/tmp
18 | **.orig
19 | 
20 | ## Environment normalisation:
21 | /.bundle
22 | /vendor/bundle
23 | 
24 | # these should all be checked in to normalise the environment:
25 | # Gemfile.lock, .ruby-version, .ruby-gemset
26 | 
27 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
28 | .rvmrc
29 | 
30 | # if using bower-rails ignore default bower_components path bower.json files
31 | /vendor/assets/bower_components
32 | *.bowerrc
33 | bower.json
34 | 
35 | # Ignore pow environment settings
36 | .powenv
37 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | inherit_from: .rubocop_todo.yml
 2 | 
 3 | AllCops:
 4 |   NewCops: enable
 5 | 
 6 | require:
 7 |   - rubocop-performance
 8 |   - rubocop-rails
 9 |   - rubocop-rake
10 |   - rubocop-rspec
11 | 
12 | inherit_mode:
13 |   merge:
14 |     - Exclude
15 | 


--------------------------------------------------------------------------------
/.rubocop_todo.yml:
--------------------------------------------------------------------------------
 1 | # This configuration was generated by
 2 | # `rubocop --auto-gen-config`
 3 | # on 2025-04-01 21:22:59 UTC using RuboCop version 1.75.1.
 4 | # The point is for the user to remove these configuration records
 5 | # one by one as the offenses are removed from the code base.
 6 | # Note that changes in the inspected code, or installation of new
 7 | # versions of RuboCop, may require this file to be generated again.
 8 | 
 9 | # Offense count: 1
10 | RSpec/BeforeAfterAll:
11 |   Exclude:
12 |     - 'spec/requests/api/v1/documents_spec.rb'
13 | 
14 | # Offense count: 2
15 | # Configuration parameters: Prefixes, AllowedPatterns.
16 | # Prefixes: when, with, without
17 | RSpec/ContextWording:
18 |   Exclude:
19 |     - 'spec/classes/query_parser_spec.rb'
20 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | ruby-3.3.7
2 | 


--------------------------------------------------------------------------------
/.simplecov:
--------------------------------------------------------------------------------
1 | SimpleCov.start 'rails' do
2 |   minimum_coverage 100
3 |   add_filter '/templates/'
4 |   add_filter '/lib/templatable.rb'
5 | end
6 | 


--------------------------------------------------------------------------------
/Capfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'capistrano/setup'
 4 | 
 5 | require 'capistrano/deploy'
 6 | 
 7 | require 'capistrano/scm/git'
 8 | install_plugin Capistrano::SCM::Git
 9 | 
10 | SSHKit.config.command_map[:bundle] = 'bin/bundle'
11 | 
12 | require "capistrano/rbenv"
13 | require "capistrano/bundler"
14 | 
15 | require 'capistrano/puma'
16 | require 'capistrano/puma/workers'
17 | 
18 | install_plugin Capistrano::Puma, load_hooks: false
19 | install_plugin Capistrano::Puma::Systemd
20 | 
21 | Dir.glob('lib/capistrano/tasks/*.rake').each { |r| import r }
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG RUBY_VERSION=3.3.4
 2 | FROM public.ecr.aws/docker/library/ruby:$RUBY_VERSION-slim as base
 3 | 
 4 | WORKDIR /rails
 5 | 
 6 | RUN apt-get update -qq && \
 7 |     apt-get install -y build-essential libcurl4-openssl-dev curl && \
 8 |     apt-get clean && \
 9 |     rm -rf /var/lib/apt/lists/*
10 | 
11 | ENV RAILS_ENV="production" \
12 |     BUNDLE_DEPLOYMENT="1" \
13 |     BUNDLE_PATH="/usr/local/bundle" \
14 |     RAILS_LOG_TO_STDOUT="1"
15 | 
16 | FROM base as build
17 | 
18 | RUN gem install bundler -v 2.4.7
19 | 
20 | COPY Gemfile Gemfile.lock ./
21 | 
22 | RUN bundle install && \
23 |     rm -rf ~/.bundle/ "${BUNDLE_PATH}"/ruby/*/cache "${BUNDLE_PATH}"/ruby/*/bundler/gems/*/.git
24 | 
25 | COPY . .
26 | 
27 | FROM base
28 | 
29 | COPY --from=build "${BUNDLE_PATH}" "${BUNDLE_PATH}"
30 | COPY --from=build /rails /rails
31 | 
32 | RUN groupadd --system --gid 1000 rails && \
33 |     useradd --uid 1000 --gid 1000 --create-home --shell /bin/bash rails
34 | 
35 | RUN mkdir -p /rails/log /rails/tmp && \
36 |     chown -R rails:rails /rails/log /rails/tmp
37 | 
38 | RUN bin/secure_docker
39 | 
40 | USER 1000:1000
41 | 
42 | EXPOSE 3200
43 | CMD ["bundle", "exec", "rails", "server", "-b", "0.0.0.0", "-p", "3200"]
44 | 


--------------------------------------------------------------------------------
/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM ruby:3.3.4
 2 | WORKDIR /usr/src/app
 3 | EXPOSE 3200
 4 | 
 5 | ENV OPENSSL_CONF /etc/ssl/
 6 | 
 7 | RUN apt install -y curl \
 8 |   && gem install bundler:2.4.7 
 9 | 
10 | COPY Gemfile* /usr/src/app/
11 | ENV BUNDLE_PATH /gems
12 | RUN bundle install
13 | 
14 | COPY . /usr/src/app/
15 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | source 'https://rubygems.org'
 3 | 
 4 | gem 'dotenv',                    '~> 3.1'
 5 | gem 'elasticsearch',             '~> 6.0'
 6 | gem 'elasticsearch-dsl',         '~> 0.1.9'
 7 | gem 'elasticsearch-persistence', '~> 6.0'
 8 | gem 'grape',                     '~> 1.7.0'
 9 | gem 'jbuilder',                  '~> 2.7'
10 | gem 'newrelic_rpm',              '~> 9.10'
11 | gem 'puma',                      '~> 5.6'
12 | gem 'rack',                      '~> 2.2.8'
13 | gem 'rack-cors',                 '~> 1.0.5'
14 | gem 'rails',                     '~> 7.1.0'
15 | gem 'rails_semantic_logger',     '~> 4.14'
16 | gem 'rake',                      '~> 13.0.0'
17 | gem 'typhoeus',                  '~> 1.4.0'
18 | gem 'virtus',                    '~> 1.0' # Virtus is no longer supported. Consider replacing with ActiveModel::Attributes
19 | 
20 | group :development, :test do
21 |   gem 'awesome_print',       '~> 1.8' #To enable in Pry: https://github.com/awesome-print/awesome_print#pry-integration
22 |   gem 'capistrano',          require: false
23 |   gem 'capistrano3-puma',    require: false
24 |   gem 'capistrano-newrelic', require: false
25 |   gem 'capistrano-rails',    require: false
26 |   gem 'capistrano-rbenv',    require: false
27 |   gem 'debug'
28 |   gem 'listen'
29 |   gem 'pry-byebug',          '~> 3.4'
30 |   gem 'pry-rails',           '~> 0.3'
31 |   gem 'rspec-rails',         '~> 3.7'
32 |   gem 'rubocop',              require: false
33 |   gem 'rubocop-performance',  require: false
34 |   gem 'rubocop-rails',        require: false
35 |   gem 'rubocop-rake',         require: false
36 |   gem 'rubocop-rspec',        require: false
37 | end
38 | 
39 | group :test do
40 |   gem 'codeclimate-test-reporter', '~> 1.0.8', require: nil
41 |   gem 'shoulda', '~> 4.0'
42 |   gem 'simplecov', '~> 0.13.0', require: false
43 | end
44 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     actioncable (7.1.5.1)
  5 |       actionpack (= 7.1.5.1)
  6 |       activesupport (= 7.1.5.1)
  7 |       nio4r (~> 2.0)
  8 |       websocket-driver (>= 0.6.1)
  9 |       zeitwerk (~> 2.6)
 10 |     actionmailbox (7.1.5.1)
 11 |       actionpack (= 7.1.5.1)
 12 |       activejob (= 7.1.5.1)
 13 |       activerecord (= 7.1.5.1)
 14 |       activestorage (= 7.1.5.1)
 15 |       activesupport (= 7.1.5.1)
 16 |       mail (>= 2.7.1)
 17 |       net-imap
 18 |       net-pop
 19 |       net-smtp
 20 |     actionmailer (7.1.5.1)
 21 |       actionpack (= 7.1.5.1)
 22 |       actionview (= 7.1.5.1)
 23 |       activejob (= 7.1.5.1)
 24 |       activesupport (= 7.1.5.1)
 25 |       mail (~> 2.5, >= 2.5.4)
 26 |       net-imap
 27 |       net-pop
 28 |       net-smtp
 29 |       rails-dom-testing (~> 2.2)
 30 |     actionpack (7.1.5.1)
 31 |       actionview (= 7.1.5.1)
 32 |       activesupport (= 7.1.5.1)
 33 |       nokogiri (>= 1.8.5)
 34 |       racc
 35 |       rack (>= 2.2.4)
 36 |       rack-session (>= 1.0.1)
 37 |       rack-test (>= 0.6.3)
 38 |       rails-dom-testing (~> 2.2)
 39 |       rails-html-sanitizer (~> 1.6)
 40 |     actiontext (7.1.5.1)
 41 |       actionpack (= 7.1.5.1)
 42 |       activerecord (= 7.1.5.1)
 43 |       activestorage (= 7.1.5.1)
 44 |       activesupport (= 7.1.5.1)
 45 |       globalid (>= 0.6.0)
 46 |       nokogiri (>= 1.8.5)
 47 |     actionview (7.1.5.1)
 48 |       activesupport (= 7.1.5.1)
 49 |       builder (~> 3.1)
 50 |       erubi (~> 1.11)
 51 |       rails-dom-testing (~> 2.2)
 52 |       rails-html-sanitizer (~> 1.6)
 53 |     activejob (7.1.5.1)
 54 |       activesupport (= 7.1.5.1)
 55 |       globalid (>= 0.3.6)
 56 |     activemodel (7.1.5.1)
 57 |       activesupport (= 7.1.5.1)
 58 |     activerecord (7.1.5.1)
 59 |       activemodel (= 7.1.5.1)
 60 |       activesupport (= 7.1.5.1)
 61 |       timeout (>= 0.4.0)
 62 |     activestorage (7.1.5.1)
 63 |       actionpack (= 7.1.5.1)
 64 |       activejob (= 7.1.5.1)
 65 |       activerecord (= 7.1.5.1)
 66 |       activesupport (= 7.1.5.1)
 67 |       marcel (~> 1.0)
 68 |     activesupport (7.1.5.1)
 69 |       base64
 70 |       benchmark (>= 0.3)
 71 |       bigdecimal
 72 |       concurrent-ruby (~> 1.0, >= 1.0.2)
 73 |       connection_pool (>= 2.2.5)
 74 |       drb
 75 |       i18n (>= 1.6, < 2)
 76 |       logger (>= 1.4.2)
 77 |       minitest (>= 5.1)
 78 |       mutex_m
 79 |       securerandom (>= 0.3)
 80 |       tzinfo (~> 2.0)
 81 |     airbrussh (1.5.3)
 82 |       sshkit (>= 1.6.1, != 1.7.0)
 83 |     ast (2.4.3)
 84 |     awesome_print (1.9.2)
 85 |     axiom-types (0.1.1)
 86 |       descendants_tracker (~> 0.0.4)
 87 |       ice_nine (~> 0.11.0)
 88 |       thread_safe (~> 0.3, >= 0.3.1)
 89 |     base64 (0.2.0)
 90 |     benchmark (0.4.0)
 91 |     bigdecimal (3.1.9)
 92 |     builder (3.3.0)
 93 |     byebug (12.0.0)
 94 |     capistrano (3.19.2)
 95 |       airbrussh (>= 1.0.0)
 96 |       i18n
 97 |       rake (>= 10.0.0)
 98 |       sshkit (>= 1.9.0)
 99 |     capistrano-bundler (2.1.1)
100 |       capistrano (~> 3.1)
101 |     capistrano-newrelic (0.10.1)
102 |       capistrano (~> 3.0)
103 |       newrelic_rpm
104 |     capistrano-rails (1.7.0)
105 |       capistrano (~> 3.1)
106 |       capistrano-bundler (>= 1.1, < 3)
107 |     capistrano-rbenv (2.2.0)
108 |       capistrano (~> 3.1)
109 |       sshkit (~> 1.3)
110 |     capistrano3-puma (5.2.0)
111 |       capistrano (~> 3.7)
112 |       capistrano-bundler
113 |       puma (>= 4.0, < 6.0)
114 |     codeclimate-test-reporter (1.0.9)
115 |       simplecov (<= 0.13)
116 |     coderay (1.1.3)
117 |     coercible (1.0.0)
118 |       descendants_tracker (~> 0.0.1)
119 |     concurrent-ruby (1.3.5)
120 |     connection_pool (2.5.0)
121 |     crass (1.0.6)
122 |     date (3.4.1)
123 |     debug (1.10.0)
124 |       irb (~> 1.10)
125 |       reline (>= 0.3.8)
126 |     descendants_tracker (0.0.4)
127 |       thread_safe (~> 0.3, >= 0.3.1)
128 |     diff-lcs (1.6.1)
129 |     docile (1.1.5)
130 |     dotenv (3.1.7)
131 |     drb (2.2.1)
132 |     dry-core (1.1.0)
133 |       concurrent-ruby (~> 1.0)
134 |       logger
135 |       zeitwerk (~> 2.6)
136 |     dry-inflector (1.2.0)
137 |     dry-logic (1.6.0)
138 |       bigdecimal
139 |       concurrent-ruby (~> 1.0)
140 |       dry-core (~> 1.1)
141 |       zeitwerk (~> 2.6)
142 |     dry-types (1.8.2)
143 |       bigdecimal (~> 3.0)
144 |       concurrent-ruby (~> 1.0)
145 |       dry-core (~> 1.0)
146 |       dry-inflector (~> 1.0)
147 |       dry-logic (~> 1.4)
148 |       zeitwerk (~> 2.6)
149 |     elasticsearch (6.8.3)
150 |       elasticsearch-api (= 6.8.3)
151 |       elasticsearch-transport (= 6.8.3)
152 |     elasticsearch-api (6.8.3)
153 |       multi_json
154 |     elasticsearch-dsl (0.1.10)
155 |     elasticsearch-model (7.1.1)
156 |       activesupport (> 3)
157 |       elasticsearch (> 1)
158 |       hashie
159 |     elasticsearch-persistence (6.1.2)
160 |       activemodel (> 4)
161 |       activesupport (> 4)
162 |       elasticsearch (~> 6)
163 |       elasticsearch-model (>= 5)
164 |       hashie
165 |     elasticsearch-transport (6.8.3)
166 |       faraday (~> 1)
167 |       multi_json
168 |     equalizer (0.0.11)
169 |     erubi (1.13.1)
170 |     ethon (0.16.0)
171 |       ffi (>= 1.15.0)
172 |     faraday (1.10.4)
173 |       faraday-em_http (~> 1.0)
174 |       faraday-em_synchrony (~> 1.0)
175 |       faraday-excon (~> 1.1)
176 |       faraday-httpclient (~> 1.0)
177 |       faraday-multipart (~> 1.0)
178 |       faraday-net_http (~> 1.0)
179 |       faraday-net_http_persistent (~> 1.0)
180 |       faraday-patron (~> 1.0)
181 |       faraday-rack (~> 1.0)
182 |       faraday-retry (~> 1.0)
183 |       ruby2_keywords (>= 0.0.4)
184 |     faraday-em_http (1.0.0)
185 |     faraday-em_synchrony (1.0.0)
186 |     faraday-excon (1.1.0)
187 |     faraday-httpclient (1.0.1)
188 |     faraday-multipart (1.1.0)
189 |       multipart-post (~> 2.0)
190 |     faraday-net_http (1.0.2)
191 |     faraday-net_http_persistent (1.2.0)
192 |     faraday-patron (1.0.0)
193 |     faraday-rack (1.0.0)
194 |     faraday-retry (1.0.3)
195 |     ffi (1.17.1-aarch64-linux-gnu)
196 |     ffi (1.17.1-aarch64-linux-musl)
197 |     ffi (1.17.1-arm-linux-gnu)
198 |     ffi (1.17.1-arm-linux-musl)
199 |     ffi (1.17.1-arm64-darwin)
200 |     ffi (1.17.1-x86_64-darwin)
201 |     ffi (1.17.1-x86_64-linux-gnu)
202 |     ffi (1.17.1-x86_64-linux-musl)
203 |     globalid (1.2.1)
204 |       activesupport (>= 6.1)
205 |     grape (1.7.1)
206 |       activesupport
207 |       builder
208 |       dry-types (>= 1.1)
209 |       mustermann-grape (~> 1.0.0)
210 |       rack (>= 1.3.0, < 3)
211 |       rack-accept
212 |     hashie (5.0.0)
213 |     i18n (1.14.7)
214 |       concurrent-ruby (~> 1.0)
215 |     ice_nine (0.11.2)
216 |     io-console (0.8.0)
217 |     irb (1.15.1)
218 |       pp (>= 0.6.0)
219 |       rdoc (>= 4.0.0)
220 |       reline (>= 0.4.2)
221 |     jbuilder (2.13.0)
222 |       actionview (>= 5.0.0)
223 |       activesupport (>= 5.0.0)
224 |     json (2.10.2)
225 |     language_server-protocol (3.17.0.4)
226 |     lint_roller (1.1.0)
227 |     listen (3.9.0)
228 |       rb-fsevent (~> 0.10, >= 0.10.3)
229 |       rb-inotify (~> 0.9, >= 0.9.10)
230 |     logger (1.7.0)
231 |     loofah (2.24.0)
232 |       crass (~> 1.0.2)
233 |       nokogiri (>= 1.12.0)
234 |     mail (2.8.1)
235 |       mini_mime (>= 0.1.1)
236 |       net-imap
237 |       net-pop
238 |       net-smtp
239 |     marcel (1.0.4)
240 |     method_source (1.1.0)
241 |     mini_mime (1.1.5)
242 |     minitest (5.25.5)
243 |     multi_json (1.15.0)
244 |     multipart-post (2.4.1)
245 |     mustermann (3.0.3)
246 |       ruby2_keywords (~> 0.0.1)
247 |     mustermann-grape (1.0.2)
248 |       mustermann (>= 1.0.0)
249 |     mutex_m (0.3.0)
250 |     net-imap (0.5.6)
251 |       date
252 |       net-protocol
253 |     net-pop (0.1.2)
254 |       net-protocol
255 |     net-protocol (0.2.2)
256 |       timeout
257 |     net-scp (4.1.0)
258 |       net-ssh (>= 2.6.5, < 8.0.0)
259 |     net-sftp (4.0.0)
260 |       net-ssh (>= 5.0.0, < 8.0.0)
261 |     net-smtp (0.5.1)
262 |       net-protocol
263 |     net-ssh (7.3.0)
264 |     newrelic_rpm (9.17.0)
265 |     nio4r (2.7.4)
266 |     nokogiri (1.18.7-aarch64-linux-gnu)
267 |       racc (~> 1.4)
268 |     nokogiri (1.18.7-aarch64-linux-musl)
269 |       racc (~> 1.4)
270 |     nokogiri (1.18.7-arm-linux-gnu)
271 |       racc (~> 1.4)
272 |     nokogiri (1.18.7-arm-linux-musl)
273 |       racc (~> 1.4)
274 |     nokogiri (1.18.7-arm64-darwin)
275 |       racc (~> 1.4)
276 |     nokogiri (1.18.7-x86_64-darwin)
277 |       racc (~> 1.4)
278 |     nokogiri (1.18.7-x86_64-linux-gnu)
279 |       racc (~> 1.4)
280 |     nokogiri (1.18.7-x86_64-linux-musl)
281 |       racc (~> 1.4)
282 |     ostruct (0.6.1)
283 |     parallel (1.26.3)
284 |     parser (3.3.7.4)
285 |       ast (~> 2.4.1)
286 |       racc
287 |     pp (0.6.2)
288 |       prettyprint
289 |     prettyprint (0.2.0)
290 |     prism (1.4.0)
291 |     pry (0.15.2)
292 |       coderay (~> 1.1)
293 |       method_source (~> 1.0)
294 |     pry-byebug (3.11.0)
295 |       byebug (~> 12.0)
296 |       pry (>= 0.13, < 0.16)
297 |     pry-rails (0.3.11)
298 |       pry (>= 0.13.0)
299 |     psych (5.2.3)
300 |       date
301 |       stringio
302 |     puma (5.6.9)
303 |       nio4r (~> 2.0)
304 |     racc (1.8.1)
305 |     rack (2.2.13)
306 |     rack-accept (0.4.5)
307 |       rack (>= 0.4)
308 |     rack-cors (1.0.6)
309 |       rack (>= 1.6.0)
310 |     rack-session (1.0.2)
311 |       rack (< 3)
312 |     rack-test (2.2.0)
313 |       rack (>= 1.3)
314 |     rackup (1.0.1)
315 |       rack (< 3)
316 |       webrick
317 |     rails (7.1.5.1)
318 |       actioncable (= 7.1.5.1)
319 |       actionmailbox (= 7.1.5.1)
320 |       actionmailer (= 7.1.5.1)
321 |       actionpack (= 7.1.5.1)
322 |       actiontext (= 7.1.5.1)
323 |       actionview (= 7.1.5.1)
324 |       activejob (= 7.1.5.1)
325 |       activemodel (= 7.1.5.1)
326 |       activerecord (= 7.1.5.1)
327 |       activestorage (= 7.1.5.1)
328 |       activesupport (= 7.1.5.1)
329 |       bundler (>= 1.15.0)
330 |       railties (= 7.1.5.1)
331 |     rails-dom-testing (2.2.0)
332 |       activesupport (>= 5.0.0)
333 |       minitest
334 |       nokogiri (>= 1.6)
335 |     rails-html-sanitizer (1.6.2)
336 |       loofah (~> 2.21)
337 |       nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0)
338 |     rails_semantic_logger (4.17.0)
339 |       rack
340 |       railties (>= 5.1)
341 |       semantic_logger (~> 4.16)
342 |     railties (7.1.5.1)
343 |       actionpack (= 7.1.5.1)
344 |       activesupport (= 7.1.5.1)
345 |       irb
346 |       rackup (>= 1.0.0)
347 |       rake (>= 12.2)
348 |       thor (~> 1.0, >= 1.2.2)
349 |       zeitwerk (~> 2.6)
350 |     rainbow (3.1.1)
351 |     rake (13.0.6)
352 |     rb-fsevent (0.11.2)
353 |     rb-inotify (0.11.1)
354 |       ffi (~> 1.0)
355 |     rdoc (6.13.1)
356 |       psych (>= 4.0.0)
357 |     regexp_parser (2.10.0)
358 |     reline (0.6.0)
359 |       io-console (~> 0.5)
360 |     rspec-core (3.9.3)
361 |       rspec-support (~> 3.9.3)
362 |     rspec-expectations (3.9.4)
363 |       diff-lcs (>= 1.2.0, < 2.0)
364 |       rspec-support (~> 3.9.0)
365 |     rspec-mocks (3.9.1)
366 |       diff-lcs (>= 1.2.0, < 2.0)
367 |       rspec-support (~> 3.9.0)
368 |     rspec-rails (3.9.1)
369 |       actionpack (>= 3.0)
370 |       activesupport (>= 3.0)
371 |       railties (>= 3.0)
372 |       rspec-core (~> 3.9.0)
373 |       rspec-expectations (~> 3.9.0)
374 |       rspec-mocks (~> 3.9.0)
375 |       rspec-support (~> 3.9.0)
376 |     rspec-support (3.9.4)
377 |     rubocop (1.75.1)
378 |       json (~> 2.3)
379 |       language_server-protocol (~> 3.17.0.2)
380 |       lint_roller (~> 1.1.0)
381 |       parallel (~> 1.10)
382 |       parser (>= 3.3.0.2)
383 |       rainbow (>= 2.2.2, < 4.0)
384 |       regexp_parser (>= 2.9.3, < 3.0)
385 |       rubocop-ast (>= 1.43.0, < 2.0)
386 |       ruby-progressbar (~> 1.7)
387 |       unicode-display_width (>= 2.4.0, < 4.0)
388 |     rubocop-ast (1.43.0)
389 |       parser (>= 3.3.7.2)
390 |       prism (~> 1.4)
391 |     rubocop-performance (1.25.0)
392 |       lint_roller (~> 1.1)
393 |       rubocop (>= 1.75.0, < 2.0)
394 |       rubocop-ast (>= 1.38.0, < 2.0)
395 |     rubocop-rails (2.31.0)
396 |       activesupport (>= 4.2.0)
397 |       lint_roller (~> 1.1)
398 |       rack (>= 1.1)
399 |       rubocop (>= 1.75.0, < 2.0)
400 |       rubocop-ast (>= 1.38.0, < 2.0)
401 |     rubocop-rake (0.7.1)
402 |       lint_roller (~> 1.1)
403 |       rubocop (>= 1.72.1)
404 |     rubocop-rspec (3.5.0)
405 |       lint_roller (~> 1.1)
406 |       rubocop (~> 1.72, >= 1.72.1)
407 |     ruby-progressbar (1.13.0)
408 |     ruby2_keywords (0.0.5)
409 |     securerandom (0.4.1)
410 |     semantic_logger (4.16.1)
411 |       concurrent-ruby (~> 1.0)
412 |     shoulda (4.0.0)
413 |       shoulda-context (~> 2.0)
414 |       shoulda-matchers (~> 4.0)
415 |     shoulda-context (2.0.0)
416 |     shoulda-matchers (4.5.1)
417 |       activesupport (>= 4.2.0)
418 |     simplecov (0.13.0)
419 |       docile (~> 1.1.0)
420 |       json (>= 1.8, < 3)
421 |       simplecov-html (~> 0.10.0)
422 |     simplecov-html (0.10.2)
423 |     sshkit (1.24.0)
424 |       base64
425 |       logger
426 |       net-scp (>= 1.1.2)
427 |       net-sftp (>= 2.1.2)
428 |       net-ssh (>= 2.8.0)
429 |       ostruct
430 |     stringio (3.1.6)
431 |     thor (1.3.2)
432 |     thread_safe (0.3.6)
433 |     timeout (0.4.3)
434 |     typhoeus (1.4.1)
435 |       ethon (>= 0.9.0)
436 |     tzinfo (2.0.6)
437 |       concurrent-ruby (~> 1.0)
438 |     unicode-display_width (3.1.4)
439 |       unicode-emoji (~> 4.0, >= 4.0.4)
440 |     unicode-emoji (4.0.4)
441 |     virtus (1.0.5)
442 |       axiom-types (~> 0.1)
443 |       coercible (~> 1.0)
444 |       descendants_tracker (~> 0.0, >= 0.0.3)
445 |       equalizer (~> 0.0, >= 0.0.9)
446 |     webrick (1.9.1)
447 |     websocket-driver (0.7.7)
448 |       base64
449 |       websocket-extensions (>= 0.1.0)
450 |     websocket-extensions (0.1.5)
451 |     zeitwerk (2.7.2)
452 | 
453 | PLATFORMS
454 |   aarch64-linux-gnu
455 |   aarch64-linux-musl
456 |   arm-linux-gnu
457 |   arm-linux-musl
458 |   arm64-darwin
459 |   x86_64-darwin
460 |   x86_64-linux-gnu
461 |   x86_64-linux-musl
462 | 
463 | DEPENDENCIES
464 |   awesome_print (~> 1.8)
465 |   capistrano
466 |   capistrano-newrelic
467 |   capistrano-rails
468 |   capistrano-rbenv
469 |   capistrano3-puma
470 |   codeclimate-test-reporter (~> 1.0.8)
471 |   debug
472 |   dotenv (~> 3.1)
473 |   elasticsearch (~> 6.0)
474 |   elasticsearch-dsl (~> 0.1.9)
475 |   elasticsearch-persistence (~> 6.0)
476 |   grape (~> 1.7.0)
477 |   jbuilder (~> 2.7)
478 |   listen
479 |   newrelic_rpm (~> 9.10)
480 |   pry-byebug (~> 3.4)
481 |   pry-rails (~> 0.3)
482 |   puma (~> 5.6)
483 |   rack (~> 2.2.8)
484 |   rack-cors (~> 1.0.5)
485 |   rails (~> 7.1.0)
486 |   rails_semantic_logger (~> 4.14)
487 |   rake (~> 13.0.0)
488 |   rspec-rails (~> 3.7)
489 |   rubocop
490 |   rubocop-performance
491 |   rubocop-rails
492 |   rubocop-rake
493 |   rubocop-rspec
494 |   shoulda (~> 4.0)
495 |   simplecov (~> 0.13.0)
496 |   typhoeus (~> 1.4.0)
497 |   virtus (~> 1.0)
498 | 
499 | BUNDLED WITH
500 |    2.6.3
501 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | i14y
  2 | ====
  3 | 
  4 | [![CircleCI](https://circleci.com/gh/GSA/i14y.svg?style=shield)](https://circleci.com/gh/GSA/i14y)
  5 | [![Code Climate](https://codeclimate.com/github/GSA/i14y/badges/gpa.svg)](https://codeclimate.com/github/GSA/i14y)
  6 | [![Test Coverage](https://codeclimate.com/github/GSA/i14y/badges/coverage.svg)](https://codeclimate.com/github/GSA/i14y)
  7 | 
  8 | Search engine for agencies' published content
  9 | 
 10 | ## Dependencies/Prerequisites
 11 | 
 12 | * Ruby
 13 | 
 14 | Use [rvm](https://rvm.io/) to install the version of Ruby specified in `.ruby-version`.
 15 | 
 16 | ### Docker
 17 | 
 18 | Docker can be used to: 1) run just the required services (MySQL, Elasticsearch, etc.) while [running the i14y application in your local machine](https://github.com/GSA/i14y#development), and/or 2) run the entire `i14y` application in a Docker container.  Please refer to [searchgov-services](https://github.com/GSA/search-services) for detailed instructions on centralized configuration for the services.
 19 | 
 20 | When running in a Docker container (option 2 above), the `i14y` application is configured to run on port [3200](http://localhost:3200/). Required dependencies - ([Ruby](https://github.com/GSA/i14y#dependenciesprerequisites), and Gems) - are installed using Docker. However, other data or configuration may need to be setup manually, which can be done in the running container using `bash`.
 21 | 
 22 | Using bash to perform any operations on i14y application running in Docker container, below command needs to be run in `search-services`.
 23 | 
 24 |     $ docker compose run i14y bash
 25 | 
 26 | For example, to setup DB in Docker:
 27 | 
 28 |     $ docker compose run i14y bash
 29 |     $ bin/rails i14y:setup
 30 | 
 31 | The Elasticsearch services provided by `searchgov-services` is configured to run on the default port, [9200](http://localhost:9200/). To use a different host (with or without port) or set of hosts, set the `ES_HOSTS` environment variable. For example, use following command to run the specs using Elasticsearch running on `localhost:9207`:
 32 | 
 33 |     ES_HOSTS=localhost:9207 bundle exec rspec spec
 34 | 
 35 | Verify that Elasticsearch 7.17.x is running on the expected port (port 9200 by default):
 36 | 
 37 | ```
 38 | $ curl localhost:9200
 39 | {
 40 |   "name" : "002410188f61",
 41 |   "cluster_name" : "es7-docker-cluster",
 42 |   "cluster_uuid" : "l3cAhBd4Sqa3B4SkpUilPQ",
 43 |   "version" : {
 44 |     "number" : "7.17.7",
 45 |     "build_flavor" : "default",
 46 |     "build_type" : "docker",
 47 |     "build_hash" : "78dcaaa8cee33438b91eca7f5c7f56a70fec9e80",
 48 |     "build_date" : "2022-10-17T15:29:54.167373105Z",
 49 |     "build_snapshot" : false,
 50 |     "lucene_version" : "8.11.1",
 51 |     "minimum_wire_compatibility_version" : "6.8.0",
 52 |     "minimum_index_compatibility_version" : "6.0.0-beta1"
 53 |   },
 54 |   "tagline" : "You Know, for Search"
 55 | }
 56 | ```
 57 | 
 58 | ## Development
 59 | 
 60 | - `bundle install`.
 61 | - Run `bundle exec rake i14y:setup` to create the neccessary indexes, index templates, and dynamic field templates.
 62 | 
 63 | If you ever want to start from scratch with your indexes/templates, you can clear everything out:
 64 | `bundle exec rake i14y:clear_all`
 65 | 
 66 | - Run the Rails server on port 8081 for compatibility with the
 67 |   search-gov app:
 68 | ```
 69 | $ rails s -p 8081
 70 | ```
 71 | 
 72 | You should see the default Rails index page on [http://localhost:8081/](http://localhost:8081/).
 73 | 
 74 | ### Code Quality
 75 | 
 76 | We use [Rubocop](https://rubocop.org/) for static code analysis. Settings specific to I14Y are configured via [.rubocop.yml](.rubocop.yml). Settings that can be shared among all Search.gov repos should be configured via the [searchgov_style](https://github.com/GSA/searchgov_style) gem.
 77 | 
 78 | ## Basic Usage
 79 | 
 80 | ### Create a collection for storing documents
 81 | ```
 82 | $ curl -u dev:devpwd -XPOST http://localhost:8081/api/v1/collections \
 83 |  -H "Content-Type:application/json" -d \
 84 |  '{"handle":"test_collection","description":"my test collection","token":"test_collection_token"}'
 85 | ```
 86 | 
 87 | ### Create a document within that collection
 88 | Use the collection handle and token for authorization:
 89 | 
 90 | ```
 91 | curl http://localhost:8081/api/v1/documents \
 92 |   -XPOST \
 93 |   -H "Content-Type:application/json" \
 94 |   -u test_collection:test_collection_token \
 95 |   -d '{"document_id":"1",
 96 |       "title":"a doc about rutabagas",
 97 |       "path": "http://www.foo.gov/rutabagas.html",
 98 |       "created": "2020-05-12T22:35:09Z",
 99 |       "description":"Lots of very important info on rutabagas",
100 |       "content":"rutabagas",
101 |       "promote": false,
102 |       "language" : "en",
103 |       "tags" : "tag1, another tag"
104 |       }'
105 | ```
106 | 
107 | ### Search for a document within a collection
108 | ```
109 | $ curl -u dev:devpwd http://localhost:8081/api/v1/collections/search?handles=test_collection&query=rutabaga
110 | ```
111 | 
112 | ## Tests
113 | ```
114 | # Fire up Elasticsearch in search-services
115 | $ docker-compose up elasticsearch7
116 | 
117 | $ bundle exec rake i14y:setup
118 | $ rake
119 | ```
120 | 
121 | ## Code Quality
122 | 
123 | We use [Rubocop](https://rubocop.org/) for static code analysis. Settings specific to i14y are configured via [.rubocop.yml](.rubocop.yml).
124 | 
125 | ### Running RuboCop Locally
126 | 
127 | Basic commands you should frequently use:
128 | 
129 | - **Generate or update the RuboCop TODO file**. Use this when RuboCop identifies many issues:
130 | ```bash
131 | bundle exec rubocop --auto-gen-config
132 | ```
133 | - **Autocorrect easy-to-fix offenses** (safe corrections only):
134 | ```bash
135 | bundle exec rubocop -a
136 | ```
137 | - **Autocorrect all possible offenses, including some more complex cases** (use with caution, review changes carefully):
138 | ```bash
139 | bundle exec rubocop -A
140 | ```
141 | - **Disable offenses that cannot be automatically corrected** when running autocorrections. Useful if you'd like to quickly apply auto-fixes without manually addressing harder issues immediately:
142 | ```bash
143 | bundle exec rubocop -a --disable-uncorrectable
144 | ```
145 | 
146 | or
147 | 
148 | ```bash
149 | bundle exec rubocop -A --disable-uncorrectable
150 | ```
151 | 
152 | It is recommended to always review diff changes after running autocorrection commands to ensure code correctness and maintainability.


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # Add your own tasks in files placed in lib/tasks ending in .rake,
2 | # for example lib/tasks/yourtask.rake, and they will automatically be available to Rake.
3 | 
4 | require File.expand_path('../config/application', __FILE__)
5 | 
6 | Rails.application.load_tasks
7 | 


--------------------------------------------------------------------------------
/app/classes/document_query.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | class DocumentQuery
  4 |   include Elasticsearch::DSL
  5 | 
  6 |   HIGHLIGHT_OPTIONS = {
  7 |     pre_tags: ["\ue000"],
  8 |     post_tags: ["\ue001"]
  9 |   }.freeze
 10 | 
 11 |   DEFAULT_STOPWORDS = %w[
 12 |     a an and are as at be but by for if in into is it
 13 |     no not of on or such that the their then there these
 14 |     they this to was will with
 15 |   ].freeze
 16 | 
 17 |   FILTERABLE_TEXT_FIELDS = %i[audience
 18 |                               content_type
 19 |                               mime_type
 20 |                               searchgov_custom1
 21 |                               searchgov_custom2
 22 |                               searchgov_custom3
 23 |                               tags].freeze
 24 | 
 25 |   FILTERABLE_DATE_FIELDS = %i[created
 26 |                               changed].freeze
 27 | 
 28 |   attr_reader :audience,
 29 |               :content_type,
 30 |               :date_range,
 31 |               :date_range_created,
 32 |               :excluded_sites,
 33 |               :ignore_tags,
 34 |               :thumbnail_url,
 35 |               :included_sites,
 36 |               :language,
 37 |               :mime_type,
 38 |               :searchgov_custom1,
 39 |               :searchgov_custom2,
 40 |               :searchgov_custom3,
 41 |               :site_filters,
 42 |               :tags
 43 |   attr_accessor :query,
 44 |                 :search
 45 | 
 46 |   def initialize(options)
 47 |     @options = options
 48 |     @date_range = { gte: @options[:min_timestamp], lt: @options[:max_timestamp] }
 49 |     @date_range_created = { gte: @options[:min_timestamp_created], lt: @options[:max_timestamp_created] }
 50 |     @excluded_sites = []
 51 |     @ignore_tags = options[:ignore_tags]
 52 |     @included_sites = []
 53 |     @search = Search.new
 54 |     parse_filters
 55 |     parse_query(options[:query]) if options[:query]
 56 |   end
 57 | 
 58 |   def body
 59 |     search.source source_fields
 60 |     search.sort { by :changed, order: 'desc' } if @options[:sort_by_date]
 61 |     if query.present?
 62 |       query_options
 63 |     end
 64 |     build_search_query
 65 |     search.explain true if Rails.logger.debug? # scoring details
 66 |     search
 67 |   end
 68 | 
 69 |   def query_options
 70 |     set_highlight_options
 71 |     search.suggest(:suggestion, suggestion_hash)
 72 |     FILTERABLE_TEXT_FIELDS.each do |facet|
 73 |       search.aggregation(facet, aggregation_hash(facet))
 74 |     end
 75 |     FILTERABLE_DATE_FIELDS.each do |date_facet|
 76 |       search.aggregation(date_facet, date_aggregation_hash(date_facet))
 77 |     end
 78 |   end
 79 | 
 80 |   def full_text_fields
 81 |     @full_text_fields ||= begin
 82 |       %w[title description content].index_with { |field| suffixed(field) }
 83 |     end
 84 |   end
 85 | 
 86 |   def common_terms_hash
 87 |     {
 88 |       query: query,
 89 |       cutoff_frequency: 0.05,
 90 |       minimum_should_match: { low_freq: '3<90%', high_freq: '2<90%' }
 91 |     }
 92 |   end
 93 | 
 94 |   def source_fields
 95 |     default_fields = %w[title path created changed thumbnail_url]
 96 |     fields = (@options[:include] || default_fields).push('language')
 97 |     fields.map { |field| full_text_fields[field] || field }
 98 |   end
 99 | 
100 |   def timestamp_filters_present?
101 |     @options[:min_timestamp].present? or @options[:max_timestamp].present?
102 |   end
103 | 
104 |   def created_timestamp_filters_present?
105 |     @options[:min_timestamp_created].present? or @options[:max_timestamp_created].present?
106 |   end
107 | 
108 |   def boosted_fields
109 |     full_text_fields.values.map do |field|
110 |       if /title/ === field
111 |         "#{field}^2"
112 |       elsif /description/ === field
113 |         "#{field}^1.5"
114 |       else
115 |         field.to_s
116 |       end
117 |     end
118 |   end
119 | 
120 |   def functions
121 |     [
122 |       # Prefer more recent documents
123 |       {
124 |         gauss: {
125 |           changed: { origin: 'now', scale: '1825d', offset: '30d', decay: 0.3 }
126 |         }
127 |       },
128 | 
129 |       # Avoid pdfs, etc.
130 |       {
131 |         filter: {
132 |           terms: {
133 |             extension: %w[doc docx pdf ppt pptx xls xlsx]
134 |           }
135 |         },
136 |         weight: '.75'
137 |       },
138 | 
139 |       # Prefer documents that have been clicked more often
140 |       {
141 |         field_value_factor: {
142 |           field: 'click_count', modifier: 'log1p', factor: 2, missing: 1
143 |         }
144 |       }
145 |     ]
146 |   end
147 | 
148 |   private
149 | 
150 |   def suffixed(field)
151 |     [field, language].compact.join('_')
152 |   end
153 | 
154 |   def parse_query(query)
155 |     site_params_parser = QueryParser.new(query)
156 |     @site_filters = site_params_parser.site_filters
157 |     @included_sites = @site_filters[:included_sites]
158 |     @excluded_sites = @site_filters[:excluded_sites]
159 |     @query = site_params_parser.stripped_query
160 |   end
161 | 
162 |   def parse_filters
163 |     @audience = @options[:audience]
164 |     @content_type = @options[:content_type]
165 |     @language = @options[:language] || 'en'
166 |     @mime_type = @options[:mime_type]
167 |     @searchgov_custom1 = @options[:searchgov_custom1]
168 |     @searchgov_custom2 = @options[:searchgov_custom2]
169 |     @searchgov_custom3 = @options[:searchgov_custom3]
170 |     @tags = @options[:tags]
171 |   end
172 | 
173 |   def set_highlight_options
174 |     highlight_fields = highlight_fields_hash
175 |     search.highlight do
176 |       pre_tags HIGHLIGHT_OPTIONS[:pre_tags]
177 |       post_tags HIGHLIGHT_OPTIONS[:post_tags]
178 |       fields highlight_fields
179 |     end
180 |   end
181 | 
182 |   def aggregation_hash(facet_field)
183 |     {
184 |       terms: {
185 |         field: facet_field
186 |       }
187 |     }
188 |   end
189 | 
190 |   def date_aggregation_hash(date_facet_field)
191 |     {
192 |       date_range: {
193 |         field: date_facet_field,
194 |         format: '8M/d/u',
195 |         ranges: [
196 |           {
197 |             key: 'Last Week',
198 |             from: 'now-1w',
199 |             to: 'now'
200 |           },
201 |           {
202 |             key: 'Last Month',
203 |             from: 'now-1M',
204 |             to: 'now'
205 |           },
206 |           {
207 |             key: 'Last Year',
208 |             from: 'now-12M',
209 |             to: 'now'
210 |           }
211 |         ]
212 |       }
213 |     }
214 |   end
215 | 
216 |   def suggestion_hash
217 |     { text: query_without_stopwords,
218 |       phrase: {
219 |         field: 'bigrams',
220 |         size: 1,
221 |         highlight: suggestion_highlight,
222 |         collate: { query: { source: { multi_match: { query: '{{suggestion}}',
223 |                                                      type: 'phrase',
224 |                                                      fields: "*_#{language}" } } } }
225 |       } }
226 |   end
227 | 
228 |   def highlight_fields_hash
229 |     {
230 |       full_text_fields['title'] => {
231 |         number_of_fragments: 0,
232 |         type: 'fvh'
233 |       },
234 |       full_text_fields['description'] => {
235 |         fragment_size: 75,
236 |         number_of_fragments: 2,
237 |         type: 'fvh'
238 |       },
239 |       full_text_fields['content'] => {
240 |         fragment_size: 75,
241 |         number_of_fragments: 2,
242 |         type: 'fvh'
243 |       }
244 |     }
245 |   end
246 | 
247 |   def suggestion_highlight
248 |     {
249 |       pre_tag: HIGHLIGHT_OPTIONS[:pre_tags].first,
250 |       post_tag: HIGHLIGHT_OPTIONS[:post_tags].first
251 |     }
252 |   end
253 | 
254 |   # Temporary fix for https://github.com/elastic/elasticsearch/issues/34282
255 |   def query_without_stopwords
256 |     (query.downcase.split(/ +/) - DEFAULT_STOPWORDS).join(' ')
257 |   end
258 | 
259 |   # Disabling length-related cops, as this method is intended to mimic the structure
260 |   # of a complex Elasticsearch query using the Elasticsearch DSL
261 |   # https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl
262 |   # rubocop:disable Metrics/MethodLength, Metrics/BlockLength
263 |   def build_search_query
264 |     doc_query = self
265 | 
266 |     search.query do
267 |       function_score do
268 |         functions doc_query.functions
269 | 
270 |         query do
271 |           bool do
272 |             if doc_query.query.present?
273 |               must do
274 |                 bool do
275 |                   # prefer bigram matches
276 |                   should { match bigrams: { operator: 'and', query: doc_query.query } }
277 |                   should { term  promote: true }
278 | 
279 |                   # prefer_word_form_matches
280 |                   must do
281 |                     bool do
282 |                       should do
283 |                         bool do
284 |                           must do
285 |                             simple_query_string do
286 |                               query doc_query.query
287 |                               fields doc_query.boosted_fields
288 |                             end
289 |                           end
290 | 
291 |                           unless doc_query.query.match(/".*"/)
292 |                             must do
293 |                               bool do
294 |                                 doc_query.full_text_fields.values.each do |field|
295 |                                   should { common({ field => doc_query.common_terms_hash }) }
296 |                                 end
297 |                               end
298 |                             end
299 |                           end
300 |                         end
301 |                       end
302 | 
303 |                       should { match(audience: { operator: 'and', query: doc_query.query }) }
304 |                       should { match(basename: { operator: 'and', query: doc_query.query }) }
305 |                       should { match(searchgov_custom1: { operator: 'and', query: doc_query.query.downcase }) }
306 |                       should { match(searchgov_custom2: { operator: 'and', query: doc_query.query.downcase }) }
307 |                       should { match(searchgov_custom3: { operator: 'and', query: doc_query.query.downcase }) }
308 |                       should { match(tags: { operator: 'and', query: doc_query.query.downcase }) }
309 |                     end
310 |                   end
311 |                 end
312 |               end
313 |             end
314 | 
315 |             filter do
316 |               bool do
317 |                 must { term language: doc_query.language } if doc_query.language.present?
318 | 
319 |                 minimum_should_match '100%'
320 |                 should do
321 |                   bool do
322 |                     if doc_query.included_sites.any?
323 |                       minimum_should_match 1
324 | 
325 |                       doc_query.included_sites.each do |site_filter|
326 |                         should do
327 |                           bool do
328 |                             must { term domain_name: site_filter.domain_name }
329 |                             must { term url_path: site_filter.url_path } if site_filter.url_path.present?
330 |                           end
331 |                         end
332 |                       end
333 |                     end
334 |                   end
335 |                 end
336 | 
337 |                 FILTERABLE_TEXT_FIELDS.each do |field|
338 |                   next if doc_query.send(field).blank?
339 | 
340 |                   should do
341 |                     bool do
342 |                       doc_query.send(field).each do |field_value|
343 |                         minimum_should_match 1
344 |                         should { term "#{field}": field_value.downcase }
345 |                       end
346 |                     end
347 |                   end
348 |                 end
349 | 
350 |                 must { range changed: doc_query.date_range } if doc_query.timestamp_filters_present?
351 |                 must { range created: doc_query.date_range_created } if doc_query.created_timestamp_filters_present?
352 | 
353 |                 if doc_query.ignore_tags.present?
354 |                   must_not do
355 |                     terms tags: doc_query.ignore_tags
356 |                   end
357 |                 end
358 | 
359 |                 doc_query.excluded_sites.each do |site_filter|
360 |                   if site_filter.url_path.present?
361 |                     must_not { regexp path: { value: "https?:\/\/#{site_filter.domain_name}#{site_filter.url_path}/.*" } }
362 |                   else
363 |                     must_not { term domain_name: site_filter.domain_name }
364 |                   end
365 |                 end
366 |               end
367 |             end
368 |           end
369 |         end
370 |       end
371 |     end
372 |   end
373 |   # rubocop:enable Metrics/MethodLength, Metrics/BlockLength
374 | end
375 | 


--------------------------------------------------------------------------------
/app/classes/document_search.rb:
--------------------------------------------------------------------------------
 1 | class DocumentSearch
 2 |   NO_HITS = { "hits" => { "total" => 0, "hits" => [] }}
 3 | 
 4 |   attr_reader :doc_query, :offset, :size, :indices
 5 | 
 6 |   def initialize(options)
 7 |     @offset = options[:offset] || 0
 8 |     @size = options[:size]
 9 |     @doc_query = DocumentQuery.new(options)
10 |     @indices = options[:handles].map { |handle| DocumentRepository.index_namespace(handle) }
11 |   end
12 | 
13 |   def search
14 |     i14y_search_results = execute_client_search
15 |     if i14y_search_results.total.zero? && i14y_search_results.suggestion.present?
16 |       suggestion = i14y_search_results.suggestion
17 |       doc_query.query = suggestion['text']
18 |       i14y_search_results = execute_client_search
19 |       i14y_search_results.override_suggestion(suggestion) if i14y_search_results.results.present?
20 |     end
21 |     i14y_search_results
22 |   rescue StandardError => error
23 |     Rails.logger.error <<~ERROR_DETAILS
24 |       Problem in DocumentSearch#search(): #{error}
25 |       Query: #{doc_query.body.to_json}
26 |       Backtrace: #{error.backtrace}
27 |     ERROR_DETAILS
28 |     NewRelic::Agent.notice_error(error, options: { custom_params: { indices: indices }})
29 |     DocumentSearchResults.new(NO_HITS)
30 |   end
31 | 
32 |   private
33 | 
34 |   def execute_client_search
35 |     params = {
36 |       index: indices,
37 |       body: doc_query.body,
38 |       from: offset,
39 |       size: size,
40 |       # For compatibility with ES 6. This parameter will be removed in ES 8.
41 |       # https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html#hits-total-now-object-search-response
42 |       rest_total_hits_as_int: true
43 |     }
44 |     Rails.logger.debug "Query: *****\n#{doc_query.body.to_json}\n*****"
45 |     result = ES.client.search(params)
46 |     DocumentSearchResults.new(result, offset)
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/app/classes/document_search_results.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | class DocumentSearchResults
 4 |   attr_reader :total, :offset, :results, :suggestion, :aggregations
 5 | 
 6 |   def initialize(result, offset = 0)
 7 |     @total = result['hits']['total']
 8 |     @offset = offset
 9 |     @results = extract_hits(result['hits']['hits'])
10 |     @suggestion = extract_suggestion(result['suggest'])
11 |     @aggregations = extract_aggregations(result['aggregations'])
12 |   end
13 | 
14 |   def override_suggestion(suggestion)
15 |     @suggestion = suggestion
16 |   end
17 | 
18 |   private
19 | 
20 |   def extract_suggestion(suggest)
21 |     return unless suggest && total.zero?
22 | 
23 |     suggest['suggestion'].first['options'].first.except('score')
24 |   rescue NoMethodError
25 |     nil
26 |   end
27 | 
28 |   def extract_hits(hits)
29 |     hits.map do |hit|
30 |       highlight = hit['highlight']
31 |       source =  deserialized(hit)
32 |       if highlight.present?
33 |         source['title'] = highlight["title_#{source['language']}"].first if highlight["title_#{source['language']}"]
34 |         %w[description content].each do |optional_field|
35 |           language_field = "#{optional_field}_#{source['language']}"
36 |           source[optional_field] = highlight[language_field].join('...') if highlight[language_field]
37 |         end
38 |       end
39 |       %w[created_at created changed updated_at updated].each do |date|
40 |         source[date] = Time.parse(source[date]).utc.to_s if source[date].present?
41 |       end
42 |       source
43 |     end
44 |   end
45 | 
46 |   def extract_aggregations(aggregations)
47 |     return unless aggregations
48 | 
49 |     aggregations.filter_map do |field, data|
50 |       if data['buckets'].present? && !data['buckets'].all? { |b| b['doc_count'].zero? }
51 |         { "#{field}": extract_aggregation_rows(data['buckets']) }
52 |       end
53 |     end
54 |   end
55 | 
56 |   def extract_aggregation_rows(rows)
57 |     rows.filter_map do |term_hash|
58 |       next if term_hash['doc_count'].zero?
59 | 
60 |       { agg_key: term_hash['key'],
61 |         doc_count: term_hash['doc_count'],
62 |         to: term_hash['to'] || nil,
63 |         from: term_hash['from'] || nil,
64 |         to_as_string: term_hash['to_as_string'] || nil,
65 |         from_as_string: term_hash['from_as_string'] || nil }.compact
66 |     end
67 |   end
68 | 
69 |   def deserialized(hit)
70 |     Serde.deserialize_hash(ActiveSupport::HashWithIndifferentAccess.new(hit['_source']),
71 |                            hit['_source']['language'])
72 |   end
73 | end
74 | 


--------------------------------------------------------------------------------
/app/classes/query_parser.rb:
--------------------------------------------------------------------------------
 1 | class QueryParser
 2 |   SiteFilter = Struct.new(:domain_name, :url_path)
 3 |   attr_reader :site_filters, :query, :stripped_query
 4 | 
 5 |   def initialize(query)
 6 |     @query = query
 7 |     @site_filters = extract_site_filters
 8 |   end
 9 | 
10 |   private
11 |   def extract_site_filters
12 |     site_filters = { included_sites: [], excluded_sites: [] }
13 |     @stripped_query = @query.gsub(/\(?(-?site:\S+)\b\/?\)?/i) do
14 |       match = $1
15 |       if match.first == '-'
16 |         site_filters[:excluded_sites] << extract_site_filter(match)
17 |       else
18 |         site_filters[:included_sites] << extract_site_filter(match)
19 |       end
20 |       nil
21 |     end.squish
22 | 
23 |     site_filters
24 |   end
25 | 
26 |   def extract_site_filter(site_param)
27 |     domain_name, url_path = site_param.split('/', 2)
28 |     domain_name.sub!(/\A-?site:/i, '')
29 |     url_path = url_path.present? ? "/#{url_path}" : nil
30 |     SiteFilter.new domain_name, url_path
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/app/controllers/api/base.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Api
 4 |   class Base < Grape::API
 5 |     rescue_from ReadOnlyAccessControl::DisallowedUpdate do
 6 |       message = 'The i14y API is currently in read-only mode.'
 7 |       message += " #{I14y::Application.config.maintenance_message}" if I14y::Application.config.maintenance_message
 8 |       rack_response({ developer_message: message, status: 503 }.to_json, 503)
 9 |     end
10 | 
11 |     rescue_from Elasticsearch::Persistence::Repository::DocumentNotFound,
12 |                 Elasticsearch::Transport::Transport::Errors::NotFound do |_e|
13 |                   rack_response(
14 |                     { developer_message: 'Resource could not be found.', status: 400 }.to_json,
15 |                     400
16 |                   )
17 |                 end
18 | 
19 |     rescue_from :all do |e|
20 |       Rails.logger.error "#{e}\n\n#{e.backtrace.join("\n")}"
21 | 
22 |       rack_response({ developer_message: "Something unexpected happened and we've been alerted.", status: 500 }.to_json, 500)
23 |     end
24 | 
25 |     mount Api::V1::Base
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/app/controllers/api/v1/base.rb:
--------------------------------------------------------------------------------
1 | module Api
2 |   module V1
3 |     class Base < Grape::API
4 |       mount Api::V1::Documents
5 |       mount Api::V1::Collections
6 |     end
7 |   end
8 | end
9 | 


--------------------------------------------------------------------------------
/app/controllers/api/v1/collections.rb:
--------------------------------------------------------------------------------
  1 | module Api
  2 |   module V1
  3 |     class Collections < Grape::API
  4 |       ADMIN_USER     = ENV['I14Y_ADMIN_USER']
  5 |       ADMIN_PASSWORD = ENV['I14Y_ADMIN_PASSWORD']
  6 | 
  7 |       prefix 'api'
  8 |       version 'v1'
  9 |       default_format :json
 10 |       format :json
 11 |       rescue_from Grape::Exceptions::ValidationErrors do |e|
 12 |         rack_response({ developer_message: e.message, status: 400 }.to_json, 400)
 13 |       end
 14 | 
 15 |       http_basic do |admin_user, admin_password|
 16 |         error_hash = { developer_message: 'Unauthorized', status: 400 }
 17 |         error!(error_hash, 400) unless auth?(admin_user, admin_password)
 18 |         true
 19 |       end
 20 | 
 21 |       helpers ReadOnlyAccessControl
 22 | 
 23 |       helpers do
 24 |         def ok(user_message)
 25 |           { status: 200, developer_message: 'OK', user_message: user_message }
 26 |         end
 27 | 
 28 |         def auth?(admin_user, admin_password)
 29 |           admin_user == ADMIN_USER && admin_password == ADMIN_PASSWORD
 30 |         end
 31 |       end
 32 | 
 33 |       resource :collections do
 34 |         desc 'Create a collection'
 35 |         params do
 36 |           requires :handle,
 37 |                    allow_blank: false,
 38 |                    type: String,
 39 |                    regexp: /^[a-z0-9._]+$/,
 40 |                    desc: 'Immutable name of the logical index used when authenticating Document API calls'
 41 |           requires :token,
 42 |                    type: String,
 43 |                    allow_blank: false,
 44 |                    desc: 'Token to be used when authenticating Document API calls'
 45 |         end
 46 |         post do
 47 |           check_updates_allowed
 48 |           handle = params[:handle]
 49 |           collection = Collection.new(id: handle, token: params[:token])
 50 |           error!(collection.errors.messages, 400) unless collection.valid?
 51 |           ES.collection_repository.save(collection)
 52 |           documents_index_name = [DocumentRepository.index_namespace(handle), 'v1'].join('-')
 53 |           DocumentRepository.new.create_index!(
 54 |             index: documents_index_name,
 55 |             include_type_name: true
 56 |           )
 57 |           ES.client.indices.put_alias(
 58 |             index: documents_index_name,
 59 |             name: DocumentRepository.index_namespace(handle)
 60 |           )
 61 |           ok('Your collection was successfully created.')
 62 |         end
 63 | 
 64 |         desc 'Delete a collection'
 65 |         delete ':handle' do
 66 |           check_updates_allowed
 67 |           handle = params.delete(:handle)
 68 |           collection = ES.collection_repository.find(handle)
 69 |           error!(collection.errors.messages, 400) unless ES.collection_repository.delete(handle)
 70 |           ES.client.indices.delete(
 71 |             index: [DocumentRepository.index_namespace(handle), '*'].join('-')
 72 |           )
 73 |           ok('Your collection was successfully deleted.')
 74 |         end
 75 | 
 76 |         desc 'Search for documents in collections'
 77 |         params do
 78 |           requires :handles,
 79 |                    allow_blank: false,
 80 |                    type: String,
 81 |                    desc: 'Restrict results to this comma-separated list of document collections'
 82 |           optional :language,
 83 |                    type: Symbol,
 84 |                    values: SUPPORTED_LOCALES,
 85 |                    allow_blank: false,
 86 |                    desc: 'Restrict results to documents in a particular language'
 87 |           optional :query,
 88 |                    allow_blank: true,
 89 |                    type: String,
 90 |                    desc: 'Search term. See documentation on supported query syntax.'
 91 |           optional :size,
 92 |                    allow_blank: false,
 93 |                    type: Integer,
 94 |                    default: 20,
 95 |                    values: 1..1000,
 96 |                    desc: 'Number of results to return'
 97 |           optional :offset,
 98 |                    allow_blank: false,
 99 |                    type: Integer,
100 |                    default: 0,
101 |                    desc: 'Offset of results'
102 |           optional :min_timestamp,
103 |                    type: DateTime,
104 |                    allow_blank: false,
105 |                    desc: 'Return documents that were changed at or after this time',
106 |                    documentation: { example: '2013-02-27T10:00:00Z' }
107 |           optional :max_timestamp,
108 |                    type: DateTime,
109 |                    allow_blank: false,
110 |                    desc: 'Return documents that were changed before this time',
111 |                    documentation: { example: '2013-02-27T10:01:00Z' }
112 |           optional :min_timestamp_created,
113 |                    type: DateTime,
114 |                    allow_blank: false,
115 |                    desc: 'Return documents that were created at or after this time',
116 |                    documentation: { example: '2013-02-27T10:00:00Z' }
117 |           optional :max_timestamp_created,
118 |                    type: DateTime,
119 |                    allow_blank: false,
120 |                    desc: 'Return documents that were created before this time',
121 |                    documentation: { example: '2013-02-27T10:01:00Z' }
122 |           optional :sort_by_date,
123 |                    type: Boolean,
124 |                    desc: 'Whether to order documents by created date instead of relevance'
125 |           optional :searchgov_custom1,
126 |                    type: String,
127 |                    allow_blank: false,
128 |                    desc: 'Comma-separated list of custom content'
129 |           optional :searchgov_custom2,
130 |                    type: String,
131 |                    allow_blank: false,
132 |                    desc: 'Comma-separated list of custom content'
133 |           optional :searchgov_custom3,
134 |                    type: String,
135 |                    allow_blank: false,
136 |                    desc: 'Comma-separated list of custom content'
137 |           optional :tags,
138 |                    type: String,
139 |                    allow_blank: false,
140 |                    desc: 'Comma-separated list of category tags'
141 |           optional :ignore_tags,
142 |                    type: String,
143 |                    allow_blank: false,
144 |                    desc: 'Comma-separated list of category tags to exclude'
145 |           optional :include,
146 |                    type: String,
147 |                    allow_blank: false,
148 |                    desc: 'Comma-separated list of fields to include in results',
149 |                    documentation: { example: 'title,path,description,content,updated_at' }
150 |         end
151 |         get :search do
152 |           handles = params.delete(:handles).split(',')
153 |           valid_collections = ES.collection_repository.find(handles).compact
154 |           error!('Could not find all the specified collection handles', 400) unless valid_collections.size == handles.size
155 |           arr_params = %i[include ignore_tags] << DocumentQuery::FILTERABLE_TEXT_FIELDS
156 |           arr_params.flatten.compact.each do |key|
157 |             params[key] = params[key].extract_array if params[key].present?
158 |           end
159 |           document_search = DocumentSearch.new(params.merge(handles: valid_collections.collect(&:id)))
160 |           document_search_results = document_search.search
161 |           metadata_hash = { total: document_search_results.total,
162 |                             offset: document_search_results.offset,
163 |                             suggestion: document_search_results.suggestion,
164 |                             aggregations: document_search_results.aggregations }
165 |           { status: 200, developer_message: 'OK', metadata: metadata_hash, results: document_search_results.results }
166 |         end
167 | 
168 |         desc 'Get collection info and stats'
169 |         get ':handle' do
170 |           handle = params.delete(:handle)
171 |           collection = ES.collection_repository.find(handle)
172 |           { status: 200, developer_message: 'OK' }.merge(collection.as_json(root: true, methods: [:document_total, :last_document_sent]))
173 |         end
174 |       end
175 |     end
176 |   end
177 | end
178 | 


--------------------------------------------------------------------------------
/app/controllers/api/v1/documents.rb:
--------------------------------------------------------------------------------
  1 | module Api
  2 |   module V1
  3 |     class Documents < Grape::API
  4 |       prefix 'api'
  5 |       version 'v1'
  6 |       default_format :json
  7 |       format :json
  8 | 
  9 |       # Eventually, the validation logic should all be moved to the model classes,
 10 |       # and the validation itself should happen during serialization:
 11 |       # https://www.elastic.co/blog/activerecord-to-repository-changing-persistence-patterns-with-the-elasticsearch-rails-gem
 12 |       rescue_from Grape::Exceptions::ValidationErrors do |e|
 13 |         rack_response({ developer_message: e.message, status: 400 }.to_json, 400)
 14 |       end
 15 |       rescue_from Elasticsearch::Transport::Transport::Errors::Conflict do |_e|
 16 |         rack_response({ developer_message: 'Document already exists with that ID', status: 422 }.to_json, 422)
 17 |       end
 18 | 
 19 |       http_basic do |collection_handle, token|
 20 |         error_hash = { developer_message: 'Unauthorized', status: 400 }
 21 |         error!(error_hash, 400) unless auth?(collection_handle, token)
 22 |         @collection_handle = collection_handle
 23 |         true
 24 |       end
 25 | 
 26 |       helpers ReadOnlyAccessControl
 27 | 
 28 |       helpers do
 29 |         def ok(user_message)
 30 |           { status: 200, developer_message: 'OK', user_message: user_message }
 31 |         end
 32 | 
 33 |         def auth?(collection_handle, token)
 34 |           ES.collection_repository.find(collection_handle).token == token
 35 |         rescue Elasticsearch::Persistence::Repository::DocumentNotFound, Elasticsearch::Transport::Transport::Errors::BadRequest
 36 |           false
 37 |         end
 38 | 
 39 |         def document_repository
 40 |           index_name = DocumentRepository.index_namespace(@collection_handle)
 41 |           DocumentRepository.new(index_name: index_name)
 42 |         end
 43 |       end
 44 | 
 45 |       before do
 46 |         check_updates_allowed
 47 |       end
 48 | 
 49 |       resource :documents do
 50 |         desc 'Create a document'
 51 |         params do
 52 |           requires :document_id,
 53 |                    allow_blank: false,
 54 |                    type: String,
 55 |                    regexp: { value: %r{^[^/]+$}, message: "cannot contain any of the following characters: ['/']" },
 56 |                    max_bytes: 512,
 57 |                    desc: 'User-assigned document ID'
 58 |           requires :title,
 59 |                    type: String,
 60 |                    allow_blank: false,
 61 |                    desc: 'Document title'
 62 |           requires :path,
 63 |                    type: String,
 64 |                    allow_blank: false,
 65 |                    regexp: %r{^https?://[^\s/$.?#].[^\s]*$},
 66 |                    desc: 'Document link URL'
 67 |           optional :audience,
 68 |                    type: String,
 69 |                    allow_blank: false,
 70 |                    desc: 'Document audience'
 71 |           optional :changed,
 72 |                    type: DateTime,
 73 |                    allow_blank: false,
 74 |                    desc: 'When document was modified',
 75 |                    documentation: { example: '2013-02-27T10:00:01Z' }
 76 |           optional :content,
 77 |                    type: String,
 78 |                    allow_blank: false,
 79 |                    desc: 'Document content/body'
 80 |           optional :content_type,
 81 |                    type: String,
 82 |                    allow_blank: false,
 83 |                    desc: 'Document content type'
 84 |           optional :created,
 85 |                    type: DateTime,
 86 |                    allow_blank: true,
 87 |                    desc: 'When document was initially created',
 88 |                    documentation: { example: '2013-02-27T10:00:00Z' }
 89 |           optional :description,
 90 |                    type: String,
 91 |                    allow_blank: false,
 92 |                    desc: 'Document description'
 93 |           optional :thumbnail_url,
 94 |                    type: String,
 95 |                    allow_blank: false,
 96 |                    desc: 'Document thumbnail_url'
 97 |           optional :language,
 98 |                    type: Symbol,
 99 |                    values: SUPPORTED_LOCALES,
100 |                    default: :en,
101 |                    allow_blank: false,
102 |                    desc: 'Two-letter locale describing language of document (defaults to :en)'
103 |           optional :mime_type,
104 |                    type: String,
105 |                    allow_blank: false,
106 |                    desc: 'Document MIME type'
107 |           optional :promote,
108 |                    type: Boolean,
109 |                    desc: 'Whether to promote the document in the relevance ranking'
110 |           optional :searchgov_custom1,
111 |                    type: String,
112 |                    allow_blank: false,
113 |                    desc: 'Document custom field 1'
114 |           optional :searchgov_custom2,
115 |                    type: String,
116 |                    allow_blank: false,
117 |                    desc: 'Document custom field 2'
118 |           optional :searchgov_custom3,
119 |                    type: String,
120 |                    allow_blank: false,
121 |                    desc: 'Document custom field 3'
122 |           optional :tags,
123 |                    type: String,
124 |                    allow_blank: false,
125 |                    desc: 'Comma-separated list of category tags'
126 |         end
127 | 
128 |         post do
129 |           id = params.delete(:document_id)
130 |           document = Document.new(params.merge(id: id))
131 |           if document.invalid?
132 |             error!({ developer_message: document.errors.full_messages.join(', '), status: 400 }, 400)
133 |           end
134 |           document_repository.save(document, op_type: :create)
135 |           ok('Your document was successfully created.')
136 |         end
137 | 
138 |         desc 'Update a document'
139 |         params do
140 |           optional :title,
141 |                    type: String,
142 |                    allow_blank: false,
143 |                    desc: 'Document title'
144 |           optional :path,
145 |                    type: String,
146 |                    allow_blank: false,
147 |                    regexp: %r{^https?://[^\s/$.?#].[^\s]*$},
148 |                    desc: 'Document link URL'
149 |           optional :audience,
150 |                    type: String,
151 |                    allow_blank: false,
152 |                    desc: 'Document audience'
153 |           optional :changed,
154 |                    type: DateTime,
155 |                    allow_blank: false,
156 |                    desc: 'When document was modified',
157 |                    documentation: { example: '2013-02-27T10:00:01Z' }
158 |           optional :click_count,
159 |                    type: Integer,
160 |                    allow_blank: false,
161 |                    desc: 'Count of clicks'
162 |           optional :content,
163 |                    type: String,
164 |                    allow_blank: false,
165 |                    desc: 'Document content/body'
166 |           optional :content_type,
167 |                    type: String,
168 |                    allow_blank: false,
169 |                    desc: 'Document content type'
170 |           optional :created,
171 |                    type: DateTime,
172 |                    allow_blank: true,
173 |                    desc: 'When document was initially created',
174 |                    documentation: { example: '2013-02-27T10:00:00Z' }
175 |           optional :description,
176 |                    type: String,
177 |                    allow_blank: false,
178 |                    desc: 'Document description'
179 |           optional :thumbnail_url,
180 |                    type: String,
181 |                    allow_blank: false,
182 |                    desc: 'Document thumbnail_url'
183 |           optional :language,
184 |                    type: Symbol,
185 |                    values: SUPPORTED_LOCALES,
186 |                    allow_blank: false,
187 |                    desc: 'Two-letter locale describing language of document'
188 |           optional :mime_type,
189 |                    type: String,
190 |                    allow_blank: false,
191 |                    desc: 'Document MIME type'
192 |           optional :promote,
193 |                    type: Boolean,
194 |                    desc: 'Whether to promote the document in the relevance ranking'
195 |           optional :searchgov_custom1,
196 |                    type: String,
197 |                    allow_blank: false,
198 |                    desc: 'Document custom field 1'
199 |           optional :searchgov_custom2,
200 |                    type: String,
201 |                    allow_blank: false,
202 |                    desc: 'Document custom field 2'
203 |           optional :searchgov_custom3,
204 |                    type: String,
205 |                    allow_blank: false,
206 |                    desc: 'Document custom field 3'
207 |           optional :tags,
208 |                    type: String,
209 |                    allow_blank: false,
210 |                    desc: 'Comma-separated list of category tags'
211 | 
212 |           at_least_one_of :audience,
213 |                           :changed,
214 |                           :click_count,
215 |                           :content,
216 |                           :content_type,
217 |                           :created,
218 |                           :description,
219 |                           :document_id,
220 |                           :handle,
221 |                           :thumbnail_url,
222 |                           :language,
223 |                           :mime_type,
224 |                           :path,
225 |                           :promote,
226 |                           :searchgov_custom1,
227 |                           :searchgov_custom2,
228 |                           :searchgov_custom3,
229 |                           :tags,
230 |                           :title
231 |         end
232 | 
233 |         put ':document_id', requirements: { document_id: /.*/ } do
234 |           id = params.delete(:document_id)
235 |           # SRCH-5096 Ensure that existing attributes are not overwritten on put or else the weekly
236 |           # searchgov ClickMonitorJob and (infrequent) `searchgov:promote` task will delete metadata
237 |           document = document_repository.find(id, _source: %w[audience
238 |                                                               changed
239 |                                                               content_type
240 |                                                               created
241 |                                                               created_at
242 |                                                               language
243 |                                                               mime_type
244 |                                                               path
245 |                                                               searchgov_custom1
246 |                                                               searchgov_custom2
247 |                                                               searchgov_custom3
248 |                                                               tags])
249 |           document.attributes = document.attributes.merge(params)
250 |           if document.invalid?
251 |             error!({ developer_message: document.errors.full_messages.join(', '), status: 400 }, 400)
252 |           end
253 |           document_repository.update(document)
254 |           ok('Your document was successfully updated.')
255 |         end
256 | 
257 |         desc 'Delete a document'
258 |         delete ':document_id', requirements: { document_id: /.*/ } do
259 |           id = params[:document_id]
260 |           error!(document.errors.messages, 400) unless document_repository.delete(id)
261 |           ok('Your document was successfully deleted.')
262 |         end
263 |       end
264 |     end
265 |   end
266 | end
267 | 


--------------------------------------------------------------------------------
/app/models/collection.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | class Collection
 4 |   include ActiveModel::Serializers::JSON
 5 |   include ActiveModel::Validations
 6 |   include Virtus.model
 7 | 
 8 |   attribute :id, String
 9 |   attribute :token, String
10 |   attribute :created_at, Time, default: proc { Time.now.utc }
11 |   attribute :updated_at, Time, default: proc { Time.now.utc }
12 | 
13 |   validates :token, presence: true
14 | 
15 |   def document_total
16 |     document_repository.count
17 |   end
18 | 
19 |   def last_document_sent
20 |     document_repository.search("*:*", {size:1, sort: "updated_at:desc"}).
21 |       results.first.updated_at.utc.to_s
22 |   rescue
23 |     nil
24 |   end
25 | 
26 |   private
27 | 
28 |   def document_repository
29 |     @document_repository = DocumentRepository.new(
30 |       index_name: DocumentRepository.index_namespace(id)
31 |     )
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/app/models/document.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'mini_mime'
 4 | 
 5 | class Document
 6 |   include Virtus.model
 7 |   include ActiveModel::Validations
 8 | 
 9 |   attribute :audience, String
10 |   attribute :changed, DateTime, default: ->(doc, _attr) { doc.created }
11 |   attribute :click_count, Integer
12 |   attribute :content, String
13 |   attribute :content_type, String
14 |   attribute :created_at, Time, default: proc { Time.now.utc }
15 |   attribute :created, DateTime
16 |   attribute :description, String
17 |   attribute :id, String
18 |   attribute :thumbnail_url, String
19 |   attribute :language, String, mapping: { type: 'keyword' }
20 |   attribute :mime_type, String
21 |   attribute :path, String, mapping: { type: 'keyword' }
22 |   attribute :promote, Boolean
23 |   attribute :searchgov_custom1, String
24 |   attribute :searchgov_custom2, String
25 |   attribute :searchgov_custom3, String
26 |   attribute :tags, String, mapping: { type: 'keyword' }
27 |   attribute :title, String
28 |   attribute :updated_at, Time, default: proc { Time.now.utc }
29 |   attribute :updated, DateTime
30 | 
31 |   validates :thumbnail_url, format: { with: URI::DEFAULT_PARSER.make_regexp }, allow_blank: true
32 |   validates :language, presence: true
33 |   validates :path, presence: true
34 | 
35 |   validate :mime_type_is_valid
36 | 
37 |   private
38 | 
39 |   def mime_type_is_valid
40 |     return unless mime_type
41 | 
42 |     errors.add(:mime_type, 'is invalid') unless MiniMime.lookup_by_content_type(mime_type)
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/app/repositories/collection_repository.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | class CollectionRepository
 4 |   include Repository
 5 | 
 6 |   klass Collection
 7 |   client ES.client
 8 |   index_name index_namespace
 9 |   settings number_of_shards: 1, number_of_replicas: 1
10 | 
11 |   def deserialize(hash)
12 |     klass.new(source_hash(hash))
13 |   end
14 | end
15 | 


--------------------------------------------------------------------------------
/app/repositories/concerns/repository.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'active_support/concern'
 4 | 
 5 | module Repository
 6 |   extend ActiveSupport::Concern
 7 | 
 8 |   included do
 9 |     include Elasticsearch::Persistence::Repository
10 |     include Elasticsearch::Persistence::Repository::DSL
11 | 
12 |     extend NamespacedIndex
13 | 
14 |     client ES.client
15 |     settings number_of_shards: 1, number_of_replicas: 1
16 |   end
17 | 
18 |   def source_hash(hash)
19 |     hash['_source'].merge(id: hash['_id'])
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/app/repositories/document_repository.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | class DocumentRepository
 4 |   include Repository
 5 | 
 6 |   klass Document
 7 | 
 8 |   def serialize(document)
 9 |     document_hash = ActiveSupport::HashWithIndifferentAccess.new(super)
10 |     Serde.serialize_hash(document_hash, document_hash[:language])
11 |   end
12 | 
13 |   def deserialize(hash)
14 |     doc_hash = source_hash(hash)
15 |     deserialized_hash = Serde.deserialize_hash(doc_hash,
16 |                                                doc_hash['language'])
17 |     klass.new deserialized_hash
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/app/templates/collections.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | class Collections
 4 |   include Templatable
 5 | 
 6 |   def body
 7 |     Jbuilder.encode do |json|
 8 |       json.index_patterns "*-#{I14y::APP_NAME}-collections-*"
 9 |       json.mappings do
10 |         dynamic_templates(json)
11 |       end
12 |     end
13 |   end
14 | 
15 |   def dynamic_templates(json)
16 |     json.dynamic_templates do
17 |       string_fields_template(json, "keyword")
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/app/templates/documents.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | class Documents
  4 |   include Templatable
  5 |   LIGHT_STEMMERS = {
  6 |     de: 'german',
  7 |     es: 'spanish',
  8 |     fr: 'french',
  9 |     it: 'italian',
 10 |     pt: 'portuguese'
 11 |   }.freeze
 12 |   STANDARD_STEMMERS = {
 13 |     bn: 'bengali',
 14 |     en: 'english',
 15 |     fi: 'finnish',
 16 |     hi: 'hindi',
 17 |     hu: 'hungarian',
 18 |     ru: 'russian',
 19 |     sv: 'swedish'
 20 |   }.freeze
 21 | 
 22 |   def initialize
 23 |     @synonym_filter_locales = Set.new
 24 |     @protected_filter_locales = Set.new
 25 |   end
 26 | 
 27 |   def body
 28 |     Jbuilder.encode do |json|
 29 |       json.index_patterns("*-#{I14y::APP_NAME}-documents-*")
 30 |       json.settings do
 31 |         json.analysis do
 32 |           char_filter(json)
 33 |           filter(json)
 34 |           analyzer(json)
 35 |           tokenizer(json)
 36 |         end
 37 |       end
 38 |       json.mappings do
 39 |         dynamic_templates(json)
 40 |         properties(json)
 41 |       end
 42 |     end
 43 |   end
 44 | 
 45 |   def char_filter(json)
 46 |     json.char_filter do
 47 |       json.quotes do
 48 |         json.type('mapping')
 49 |         json.mappings(['\\u0091=>\\u0027', '\\u0092=>\\u0027', '\\u2018=>\\u0027', '\\u2019=>\\u0027', '\\u201B=>\\u0027'])
 50 |       end
 51 |     end
 52 |   end
 53 | 
 54 |   def filter(json)
 55 |     json.filter do
 56 |       json.bigrams_filter do
 57 |         json.type('shingle')
 58 |       end
 59 |       language_synonyms(json)
 60 |       language_protwords(json)
 61 |       language_stemmers(json)
 62 |     end
 63 |   end
 64 | 
 65 |   def analyzer(json)
 66 |     json.analyzer do
 67 |       generic_analyzers(json)
 68 |       french_analyzer(json)
 69 |       japanese_analyzer(json)
 70 |       korean_analyzer(json)
 71 |       chinese_analyzer(json)
 72 |       bigrams_analyzer(json)
 73 |       url_path_analyzer(json)
 74 |       domain_name_analyzer(json)
 75 |       default_analyzer(json)
 76 |     end
 77 |   end
 78 | 
 79 |   def default_analyzer(json)
 80 |     json.default do
 81 |       json.type('custom')
 82 |       json.filter(%w[icu_normalizer icu_folding])
 83 |       json.tokenizer('icu_tokenizer')
 84 |       json.char_filter(%w[html_strip quotes])
 85 |     end
 86 |   end
 87 | 
 88 |   def domain_name_analyzer(json)
 89 |     json.domain_name_analyzer do
 90 |       json.type('custom')
 91 |       json.filter('lowercase')
 92 |       json.tokenizer('domain_name_tokenizer')
 93 |     end
 94 |   end
 95 | 
 96 |   def url_path_analyzer(json)
 97 |     json.url_path_analyzer do
 98 |       json.type('custom')
 99 |       json.filter('lowercase')
100 |       json.tokenizer('url_path_tokenizer')
101 |     end
102 |   end
103 | 
104 |   def bigrams_analyzer(json)
105 |     json.bigrams_analyzer do
106 |       json.type('custom')
107 |       json.filter(%w[icu_normalizer icu_folding bigrams_filter])
108 |       json.tokenizer('icu_tokenizer')
109 |       json.char_filter(%w[html_strip quotes])
110 |     end
111 |   end
112 | 
113 |   def generic_analyzers(json)
114 |     GENERIC_ANALYZER_LOCALES.each do |locale|
115 |       generic_analyzer(json, locale)
116 |     end
117 |   end
118 | 
119 |   def chinese_analyzer(json)
120 |     json.zh_analyzer do
121 |       json.type('custom')
122 |       json.filter(%w[smartcn_word icu_normalizer icu_folding])
123 |       json.tokenizer('smartcn_sentence')
124 |       json.char_filter(['html_strip'])
125 |     end
126 |   end
127 | 
128 |   def korean_analyzer(json)
129 |     json.ko_analyzer do
130 |       json.type('cjk')
131 |       json.filter([])
132 |     end
133 |   end
134 | 
135 |   def japanese_analyzer(json)
136 |     json.ja_analyzer do
137 |       json.type('custom')
138 |       json.filter(%w[kuromoji_baseform ja_pos_filter icu_normalizer icu_folding cjk_width])
139 |       json.tokenizer('kuromoji_tokenizer')
140 |       json.char_filter(['html_strip'])
141 |     end
142 |   end
143 | 
144 |   def french_analyzer(json)
145 |     json.fr_analyzer do
146 |       json.type('custom')
147 |       json.filter(%w[icu_normalizer elision fr_stem_filter icu_folding])
148 |       json.tokenizer('icu_tokenizer')
149 |       json.char_filter(%w[html_strip quotes])
150 |     end
151 |   end
152 | 
153 |   def tokenizer(json)
154 |     json.tokenizer do
155 |       json.kuromoji do
156 |         json.type('kuromoji_tokenizer')
157 |         json.mode('search')
158 |         json.char_filter(['html_strip'])
159 |       end
160 |       json.url_path_tokenizer do
161 |         json.type('PathHierarchy')
162 |       end
163 |       json.domain_name_tokenizer do
164 |         json.type('PathHierarchy')
165 |         json.delimiter('.')
166 |         json.reverse(true)
167 |       end
168 |     end
169 |   end
170 | 
171 |   def filter_array(locale)
172 |     array = ['icu_normalizer']
173 |     array << "#{locale}_protected_filter" if @protected_filter_locales.include?(locale)
174 |     array << "#{locale}_stem_filter"
175 |     array << "#{locale}_synonym" if @synonym_filter_locales.include?(locale)
176 |     array << 'icu_folding'
177 |     array
178 |   end
179 | 
180 |   def properties(json)
181 |     json.properties do
182 |       %w[updated created changed].each { |field| date(json, field) }
183 |       %w[audience content_type document_id extension thumbnail_url language mime_type path
184 |          searchgov_custom1 searchgov_custom2 searchgov_custom3 tags].each { |field| keyword(json, field) }
185 |       basename(json)
186 |       url_path(json)
187 |       domain_name(json)
188 |       promote(json)
189 |       bigrams(json)
190 |       click_count(json)
191 |     end
192 |   end
193 | 
194 |   def basename(json)
195 |     json.basename do
196 |       json.type('text')
197 |     end
198 |   end
199 | 
200 |   def bigrams(json)
201 |     json.bigrams do
202 |       json.analyzer('bigrams_analyzer')
203 |       json.type('text')
204 |     end
205 |   end
206 | 
207 |   def promote(json)
208 |     json.promote do
209 |       json.type('boolean')
210 |     end
211 |   end
212 | 
213 |   def domain_name(json)
214 |     json.domain_name do
215 |       json.type('text')
216 |       json.analyzer('domain_name_analyzer')
217 |     end
218 |   end
219 | 
220 |   def url_path(json)
221 |     json.url_path do
222 |       json.type('text')
223 |       json.analyzer('url_path_analyzer')
224 |     end
225 |   end
226 | 
227 |   def click_count(json)
228 |     json.click_count do
229 |       json.type('integer')
230 |     end
231 |   end
232 | 
233 |   def dynamic_templates(json)
234 |     json.dynamic_templates do
235 |       language_templates(json)
236 |       string_fields_template(json, 'text')
237 |     end
238 |   end
239 | 
240 |   def language_stemmers(json)
241 |     light_stemmers(json)
242 |     standard_stemmers(json)
243 |     japanese_position_filter(json)
244 |   end
245 | 
246 |   def japanese_position_filter(json)
247 |     json.ja_pos_filter do
248 |       json.type('kuromoji_part_of_speech')
249 |       json.stoptags(['\\u52a9\\u8a5e-\\u683c\\u52a9\\u8a5e-\\u4e00\\u822c', '\\u52a9\\u8a5e-\\u7d42\\u52a9\\u8a5e'])
250 |     end
251 |   end
252 | 
253 |   def light_stemmers(json)
254 |     LIGHT_STEMMERS.each do |locale, language|
255 |       generic_stemmer(json, locale, language, 'light')
256 |     end
257 |   end
258 | 
259 |   def standard_stemmers(json)
260 |     STANDARD_STEMMERS.each do |locale, language|
261 |       generic_stemmer(json, locale, language, 'standard')
262 |     end
263 |   end
264 | 
265 |   def language_templates(json)
266 |     LANGUAGE_ANALYZER_LOCALES.each do |locale|
267 |       json.child! do
268 |         json.set!(locale) do
269 |           json.match("*_#{locale}")
270 |           json.match_mapping_type('string')
271 |           json.mapping do
272 |             json.analyzer("#{locale}_analyzer")
273 |             json.type('text')
274 |             json.term_vector('with_positions_offsets')
275 |             json.copy_to('bigrams')
276 |           end
277 |         end
278 |       end
279 |     end
280 |   end
281 | 
282 |   def language_synonyms(json)
283 |     parse_configuration_file(json, 'synonyms')
284 |   end
285 | 
286 |   def language_protwords(json)
287 |     parse_configuration_file(json, 'protwords')
288 |   end
289 | 
290 |   def synonyms_filter(json, locale, lines)
291 |     @synonym_filter_locales.add(locale)
292 |     linguistic_filter(json, locale, lines, 'synonym', 'synonyms', 'synonym')
293 |   end
294 | 
295 |   def protwords_filter(json, locale, lines)
296 |     @protected_filter_locales.add(locale)
297 |     linguistic_filter(json, locale, lines, 'protected_filter', 'keywords', 'keyword_marker')
298 |   end
299 | end
300 | 


--------------------------------------------------------------------------------
/appspec.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 0.0
 3 | os: linux
 4 | # files:
 5 | #   - source: /
 6 | #     destination: /home/search/cicd_temp 
 7 | 
 8 | hooks:
 9 | 
10 |   BeforeInstall:
11 |     - location: cicd-scripts/fetch_env_vars.sh
12 |       timeout: 300
13 |       runas: search
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/bin/bundle:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
3 | load Gem.bin_path('bundler', 'bundle')
4 | 


--------------------------------------------------------------------------------
/bin/rails:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | APP_PATH = File.expand_path('../config/application', __dir__)
3 | require_relative '../config/boot'
4 | require 'rails/commands'
5 | 


--------------------------------------------------------------------------------
/bin/rake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require_relative "../config/boot"
3 | require "rake"
4 | Rake.application.run
5 | 


--------------------------------------------------------------------------------
/bin/secure_docker:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Based on Search.gov GSA Container Security Benchmark https://docs.google.com/spreadsheets/d/1_UeKZHJGF8ZfoCSnDCux5lx1fUKSFVlYE6MmmVdeS-U/edit#gid=594625648
 4 | 
 5 | # There is no need for the container to mount volumes or devices with fstab. Removing default items reduces the attack surface.
 6 | rm -f /etc/fstab
 7 | 
 8 | # Be informative after successful login.
 9 | echo "echo -e '************WARNING************'" >> /home/rails/.bashrc
10 | echo "echo -e 'This is a U.S. General Services Administration Federal Government computer system that is FOR OFFICIAL USE ONLY. This system is subject to monitoring. Therefore, no expectation of privacy is to be assumed. Individuals found performing unauthorized activities may be subject to disciplinary action including criminal prosecution.\n'" >> /home/rails/.bashrc
11 | 
12 | # Remove kernel tunable items since they are not needed.
13 | rm -fr /etc/sysctl* /etc/modprobe.d /etc/modules /etc/mdev.conf /etc/acpi
14 | 
15 | # Remove suid & sgid files to enforce simple permission sets.
16 | find /bin /etc /lib /sbin /usr -xdev -type f -a \( -perm /4000 -o -perm /2000 \) -delete
17 | 
18 | # Check for calls out of the dockerfile to download software externally
19 | apt-get remove -y --auto-remove curl
20 | 
21 | # Remove any broken symlinks, if any.
22 | find /bin /etc /lib /sbin /usr -xdev -type l -exec test ! -e {} \; -delete
23 | 
24 | # Remove existing crontabs
25 | rm -rf /etc/cron.d /etc/cron.daily
26 | 
27 | # Remove init scripts since we do not use them.
28 | rm -fr /etc/init.d /lib/rc /etc/conf.d /etc/inittab /etc/runlevels /etc/rc.conf /etc/logrotate.d
29 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require "fileutils"
 3 | 
 4 | # path to your application root.
 5 | APP_ROOT = File.expand_path("..", __dir__)
 6 | 
 7 | def system!(*args)
 8 |   system(*args, exception: true)
 9 | end
10 | 
11 | FileUtils.chdir APP_ROOT do
12 |   # This script is a way to set up or update your development environment automatically.
13 |   # This script is idempotent, so that you can run it at any time and get an expectable outcome.
14 |   # Add necessary setup steps to this file.
15 | 
16 |   puts '== Installing dependencies =='
17 |   system! 'gem install bundler --conservative'
18 |   system('bundle check') || system!('bundle install')
19 | 
20 |   puts "\n== Removing old logs and tempfiles =="
21 |   system! 'bin/rails log:clear tmp:clear'
22 | 
23 |   puts "\n== Restarting application server =="
24 |   system! 'bin/rails restart'
25 | end
26 | 


--------------------------------------------------------------------------------
/bin/update:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'fileutils'
 3 | include FileUtils
 4 | 
 5 | # path to your application root.
 6 | APP_ROOT = File.expand_path('..', __dir__)
 7 | 
 8 | def system!(*args)
 9 |   system(*args) || abort("\n== Command #{args} failed ==")
10 | end
11 | 
12 | chdir APP_ROOT do
13 |   # This script is a way to update your development environment automatically.
14 |   # Add necessary update steps to this file.
15 | 
16 |   puts '== Installing dependencies =='
17 |   system! 'gem install bundler --conservative'
18 |   system('bundle check') || system!('bundle install')
19 | 
20 |   puts "\n== Removing old logs and tempfiles =="
21 |   system! 'bin/rails log:clear tmp:clear'
22 | 
23 |   puts "\n== Restarting application server =="
24 |   system! 'bin/rails restart'
25 | end
26 | 


--------------------------------------------------------------------------------
/buildspec_i14y.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 0.2
 3 | env:
 4 |   parameter-store:
 5 |     API_SERVER_ADDRESSES: "DEPLOY_I14Y_SERVER_ADDRESS"
 6 |     DEPLOYMENT_PATH: "DEPLOY_I14Y_DEPLOYMENT_PATH"
 7 |     I14Y_THREADS: "I14Y_THREADS"
 8 |     I14Y_WORKERS: "I14Y_WORKERS"
 9 |     SERVER_DEPLOYMENT_USER: "DEPLOY_SERVER_DEPLOYMENT_USER"
10 |     # SSH_KEY_PATH: "DEPLOY_SSH_KEY_PATH" - defined below
11 | 
12 |     # shared deployment variables with subsequent stages - might not to export as this is the final stage
13 | exported-variables:
14 |     - DEPLOYMENT_PATH
15 |     - I14Y_THREADS
16 |     - I14Y_WORKERS
17 |     - SERVER_ADDRESS
18 |     - SERVER_DEPLOYMENT_USER
19 |     - SSH_KEY_PATH
20 | 
21 | phases:
22 |   install:
23 |     runtime-versions:
24 |       python: 3.x
25 |     commands:
26 |        - export PATH="$HOME/.rbenv/bin:$PATH"
27 |        - eval "$(rbenv init -)"
28 | 
29 |   pre_build:
30 |     commands:
31 |       - aws secretsmanager get-secret-value --secret-id $SEARCH_SECRETSMANAGER_KEY_SECRET_NAME --region $SEARCH_AWS_REGION --query 'SecretString' --output text > $SEARCH_ENV_EC2_KEY
32 |   build:
33 |     commands:
34 |       - CURRENT_LOCATION=$(pwd)  # would look something like this - /codebuild/output/src559980389/src - a temp dir created by codebuild
35 |       - SSH_KEY_PATH="${CURRENT_LOCATION}/${SEARCH_ENV_EC2_KEY}"
36 |       - echo $SSH_KEY_PATH
37 |       - echo "deploying i14y app with capistrano"
38 |       - bundle install
39 |       - cap $SEARCH_ENV puma:config puma:systemd:config puma:systemd:enable
40 |       - cap $SEARCH_ENV deploy
41 |       - cap $SEARCH_ENV --tasks
42 |       # - cap $SEARCH_ENV resque:start
43 |       - cap $SEARCH_ENV puma:restart
44 | 
45 | artifacts:
46 |   files:
47 |     - '**/*'
48 | 


--------------------------------------------------------------------------------
/cicd-scripts/fetch_env_vars.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | # Move to a writable location
 4 | cd /home/search/cicd_temp 
 5 | 
 6 | # Leave PARAM_PATH empty to fetch all parameters in the region
 7 | PARAM_PATH=""
 8 | 
 9 | # Clear the .env file if it exists
10 | > .env
11 | 
12 | echo "Starting the script"
13 | # Fetch all parameter names in the region
14 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/region)
15 | echo $REGION
16 | if [ -n "$PARAM_PATH" ]; then
17 |     PARAM_KEYS=$(aws ssm get-parameters-by-path --path "$PARAM_PATH"  --recursive --query "Parameters[*].Name" --output text --region $REGION)
18 | else
19 |     PARAM_KEYS=$(aws ssm describe-parameters  --query "Parameters[*].Name" --output text --region $REGION)
20 | fi
21 | echo "Fetched parameter keys: $PARAM_KEYS"
22 | 
23 | # Loop through each parameter key
24 | for PARAM in $PARAM_KEYS; do
25 |     # Exclude parameters that start with "DEPLOY_" or match "*_EC2_PEM_KEY" or match LOGIN_DOT_GOV_PEM
26 |     if [[ $PARAM != DEPLOY_* && ! $PARAM =~ .*_EC2_PEM_KEY$ && $PARAM != "LOGIN_DOT_GOV_PEM" ]]; then
27 |         # Fetch the parameter value from SSM
28 |         VALUE=$(aws ssm get-parameter --name "$PARAM" --with-decryption --query "Parameter.Value" --output text --region $REGION)
29 |         
30 |         # Rename parameters that start with "SEARCH_AWS_" to "AWS_"
31 |         if [[ $PARAM == SEARCH_AWS_* ]]; then
32 |             PARAM=${PARAM/SEARCH_AWS_/AWS_}
33 |         fi
34 | 
35 |         # Write the key=value pair to the .env file
36 |         echo "$PARAM=$VALUE" >> .env
37 |     fi
38 | done
39 | 
40 | # Output the result
41 | # echo ".env file created with the following content:"
42 | # cat .env
43 | cp /home/search/cicd_temp/.env /home/search/i14y/shared/
44 | 
45 | 
46 | # create puma folders and files
47 | 
48 | # Create  directories if they do not already exist
49 | [ ! -d /home/search/i14y/shared/tmp/pids/ ] && mkdir -p /home/search/i14y/shared/tmp/pids/
50 | [ ! -d /home/search/i14y/shared/log ] && mkdir -p /home/search/i14y/shared/log
51 | 
52 | # Create log files if they do not already exist
53 | [ ! -f /home/search/i14y/shared/log/puma_access.log ] && touch /home/search/i14y/shared/log/puma_access.log
54 | [ ! -f /home/search/i14y/shared/log/puma_error.log ] && touch /home/search/i14y/shared/log/puma_error.log
55 | 
56 | 
57 | sudo chown -R search:search /home/search/i14y/
58 | sudo chmod -R 755 /home/search/i14y/
59 | 
60 | find /home/search/i14y/ -type d -exec chmod 2755 {} \;
61 | 
62 | umask 022
63 | 
64 | sudo rm -rf /home/search/cicd_temp/*
65 | 


--------------------------------------------------------------------------------
/config.ru:
--------------------------------------------------------------------------------
 1 | # This file is used by Rack-based servers to start the application.
 2 | 
 3 | require_relative "config/environment"
 4 | require 'rack/cors'
 5 | 
 6 | NewRelic::Agent.manual_start
 7 | 
 8 | use Rack::Cors do
 9 |   allow do
10 |     origins '*'
11 |     resource '*', headers: :any, methods: [:get, :post, :put, :delete, :options]
12 |   end
13 | end
14 | 
15 | run Rails.application
16 | Rails.application.load_server
17 | 


--------------------------------------------------------------------------------
/config/access_control.yml:
--------------------------------------------------------------------------------
 1 | default: &DEFAULT
 2 |   updates_allowed: true
 3 |   maintenance_message: Please try again in one hour.
 4 | 
 5 | development:
 6 |   <<: *DEFAULT
 7 | 
 8 | test:
 9 |   <<: *DEFAULT
10 | 
11 | staging:
12 |   <<: *DEFAULT
13 | 
14 | production:
15 |   <<: *DEFAULT
16 | 


--------------------------------------------------------------------------------
/config/application.rb:
--------------------------------------------------------------------------------
 1 | require_relative "boot"
 2 | 
 3 | # require "rails"
 4 | # Pick the frameworks you want:
 5 | require "active_model/railtie"
 6 | # require "active_job/railtie"
 7 | # require "active_record/railtie"
 8 | # require "active_storage/engine"
 9 | require "action_controller/railtie"
10 | # require "action_mailer/railtie"
11 | # require "action_mailbox/engine"
12 | # require "action_text/engine"
13 | # require "action_view/railtie"
14 | # require "action_cable/engine"
15 | # require "rails/test_unit/railtie"
16 | 
17 | # Require the gems listed in Gemfile, including any gems
18 | # you've limited to :test, :development, or :production.
19 | Bundler.require(*Rails.groups)
20 | 
21 | module I14y
22 |   APP_NAME = 'i14y'
23 |   class Application < Rails::Application
24 |     # Initialize configuration defaults for originally generated Rails version.
25 |     config.load_defaults 7.1
26 | 
27 |     # Please, add to the `ignore` list any other `lib` subdirectories that do
28 |     # not contain `.rb` files, or that should not be reloaded or eager loaded.
29 |     # Common ones are `templates`, `generators`, or `middleware`, for example.
30 |     # config.autoload_lib(ignore: %w(assets tasks))
31 | 
32 |     # Set default cache format
33 |     config.active_support.cache_format_version = 7.1
34 | 
35 |     # Configuration for the application, engines, and railties goes here.
36 |     #
37 |     # These settings can be overridden in specific environments using the files
38 |     # in config/environments, which are processed later.
39 | 
40 |     config.semantic_logger.application = ENV.fetch('APP_NAME', APP_NAME)
41 | 
42 |     # config.time_zone = "Central Time (US & Canada)"
43 |     # config.eager_load_paths << Rails.root.join("extras")
44 |     config.eager_load_paths += Dir[config.root.join('lib', '**/')]
45 |     require 'ext/string'
46 |     Dir["#{Rails.root}/lib/validations/*.rb"].each { |filename| require filename }
47 | 
48 |     # Only loads a smaller set of middleware suitable for API only apps.
49 |     # Middleware like session, flash, cookies can be added back manually.
50 |     # Skip views, helpers and assets when generating a new resource.
51 |     config.api_only = true
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------
/config/boot.rb:
--------------------------------------------------------------------------------
1 | ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
2 | 
3 | require "bundler/setup" # Set up gems listed in the Gemfile.
4 | 


--------------------------------------------------------------------------------
/config/deploy.rb:
--------------------------------------------------------------------------------
 1 | # config valid for current version and patch releases of Capistrano
 2 | lock '~> 3.19.1'
 3 | 
 4 | I14Y_THREADS = ENV.fetch('I14Y_THREADS') { 5 }
 5 | 
 6 | set :application,     'i14y'
 7 | set :branch,          ENV.fetch('SEARCH_ENV', 'staging')
 8 | set :deploy_to,       ENV.fetch('DEPLOYMENT_PATH')
 9 | set :format,          :pretty
10 | set :puma_access_log, "#{release_path}/log/puma.access.log"
11 | set :puma_bind,       'tcp://0.0.0.0:8081'
12 | set :puma_error_log,  "#{release_path}/log/puma.error.log"
13 | set :puma_threads,    [ENV.fetch('I14Y_MIN_THREADS', I14Y_THREADS), I14Y_THREADS]
14 | set :puma_workers,    ENV.fetch('I14Y_WORKERS') { 0 }
15 | set :rails_env,       'production'
16 | set :rbenv_type,      :user
17 | set :repo_url,        'https://github.com/GSA/i14y.git'
18 | set :user,            ENV.fetch('SERVER_DEPLOYMENT_USER', 'search')
19 | 
20 | append :linked_files, '.env'
21 | append :linked_dirs,  'log', 'tmp'
22 | 
23 | API_SERVER_ADDRESSES = JSON.parse(ENV.fetch('API_SERVER_ADDRESSES', '[]'))
24 | 
25 | role :app,  API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER']
26 | role :db,   API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER']
27 | role :web,  API_SERVER_ADDRESSES, user: ENV['SERVER_DEPLOYMENT_USER']
28 | 
29 | set :ssh_options, {
30 |   auth_methods:  %w(publickey),
31 |   forward_agent: false,
32 |   keys:          [ENV['SSH_KEY_PATH']],
33 |   user:          ENV['SERVER_DEPLOYMENT_USER']
34 | }
35 | 


--------------------------------------------------------------------------------
/config/deploy/development.rb:
--------------------------------------------------------------------------------
1 | set :branch, 'main'
2 | 


--------------------------------------------------------------------------------
/config/deploy/production.rb:
--------------------------------------------------------------------------------
 1 | # server-based syntax
 2 | # ======================
 3 | # Defines a single server with a list of roles and multiple properties.
 4 | # You can define all roles on a single server, or split them:
 5 | 
 6 | # server "example.com", user: "deploy", roles: %w{app db web}, my_property: :my_value
 7 | # server "example.com", user: "deploy", roles: %w{app web}, other_property: :other_value
 8 | # server "db.example.com", user: "deploy", roles: %w{db}
 9 | 
10 | 
11 | 
12 | # role-based syntax
13 | # ==================
14 | 
15 | # Defines a role with one or multiple servers. The primary server in each
16 | # group is considered to be the first unless any hosts have the primary
17 | # property set. Specify the username and a domain or IP for the server.
18 | # Don't use `:all`, it's a meta role.
19 | 
20 | # role :app, %w{deploy@example.com}, my_property: :my_value
21 | # role :web, %w{user1@primary.com user2@additional.com}, other_property: :other_value
22 | # role :db,  %w{deploy@example.com}
23 | 
24 | 
25 | 
26 | # Configuration
27 | # =============
28 | # You can set any configuration variable like in config/deploy.rb
29 | # These variables are then only loaded and set in this stage.
30 | # For available Capistrano configuration variables see the documentation page.
31 | # http://capistranorb.com/documentation/getting-started/configuration/
32 | # Feel free to add new variables to customise your setup.
33 | 
34 | 
35 | 
36 | # Custom SSH Options
37 | # ==================
38 | # You may pass any option but keep in mind that net/ssh understands a
39 | # limited set of options, consult the Net::SSH documentation.
40 | # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start
41 | #
42 | # Global options
43 | # --------------
44 | #  set :ssh_options, {
45 | #    keys: %w(/home/user_name/.ssh/id_rsa),
46 | #    forward_agent: false,
47 | #    auth_methods: %w(password)
48 | #  }
49 | #
50 | # The server-based syntax can be used to override options:
51 | # ------------------------------------
52 | # server "example.com",
53 | #   user: "user_name",
54 | #   roles: %w{web app},
55 | #   ssh_options: {
56 | #     user: "user_name", # overrides user setting above
57 | #     keys: %w(/home/user_name/.ssh/id_rsa),
58 | #     forward_agent: false,
59 | #     auth_methods: %w(publickey password)
60 | #     # password: "please use keys"
61 | #   }
62 | 


--------------------------------------------------------------------------------
/config/deploy/staging.rb:
--------------------------------------------------------------------------------
 1 | # config/deploy/staging.rb
 2 | 
 3 | # Server-based syntax
 4 | # ======================
 5 | # Defines a single server with a list of roles and multiple properties.
 6 | # You can define all roles on a single server, or split them:
 7 | 
 8 | # Configuration
 9 | # =============
10 | # You can set any configuration variable like in config/deploy.rb.
11 | # These variables are then only loaded and set in this stage.
12 | # For available Capistrano configuration variables see the documentation page.
13 | # http://capistranorb.com/documentation/getting-started/configuration/
14 | # Feel free to add new variables to customize your setup.
15 | 
16 | # Custom SSH Options
17 | # ==================
18 | # You may pass any option but keep in mind that net/ssh understands a limited set of options, consult the Net/SSH documentation.
19 | # http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start
20 | 
21 | # Global options
22 | # --------------
23 | 


--------------------------------------------------------------------------------
/config/elasticsearch.yml:
--------------------------------------------------------------------------------
 1 | default: &DEFAULT
 2 |   hosts:
 3 |   <% ENV.fetch('ES_HOSTS', 'localhost:9200').split(',').each do |host| %>
 4 |     - <%= host %>
 5 |   <% end %>
 6 |   user: <%= ENV['ES_USERNAME'] || 'elastic' %>
 7 |   password: <%= ENV['ES_PASSWORD'] || 'changeme' %>
 8 |   number_of_shards: 1
 9 |   log: true
10 |   log_level: <%= ENV.fetch('ES_LOG_LEVEL', 'ERROR') %>
11 | 
12 | development:
13 |   <<: *DEFAULT
14 | 
15 | test:
16 |   <<: *DEFAULT
17 | 
18 | staging:
19 |   <<: *DEFAULT
20 | 
21 | production:
22 |   <<: *DEFAULT
23 |   number_of_shards: 3
24 | 
25 | 


--------------------------------------------------------------------------------
/config/environment.rb:
--------------------------------------------------------------------------------
1 | # Load the Rails application.
2 | require_relative "application"
3 | 
4 | # Initialize the Rails application.
5 | Rails.application.initialize!
6 | 


--------------------------------------------------------------------------------
/config/environments/development.rb:
--------------------------------------------------------------------------------
 1 | require "active_support/core_ext/integer/time"
 2 | 
 3 | Rails.application.configure do
 4 |   # Settings specified here will take precedence over those in config/application.rb.
 5 | 
 6 |   # In the development environment your application's code is reloaded any time
 7 |   # it changes. This slows down response time but is perfect for development
 8 |   # since you don't have to restart the web server when you make code changes.
 9 |   config.enable_reloading = true
10 | 
11 |   # Do not eager load code on boot.
12 |   config.eager_load = false
13 | 
14 |   # Show full error reports.
15 |   config.consider_all_requests_local = true
16 | 
17 |   # Enable/disable caching. By default caching is disabled.
18 |   # Run rails dev:cache to toggle caching.
19 |   if Rails.root.join('tmp', 'caching-dev.txt').exist?
20 |     config.cache_store = :memory_store
21 |     config.public_file_server.headers = {
22 |       'Cache-Control' => "public, max-age=#{2.days.to_i}"
23 |     }
24 |   else
25 |     config.action_controller.perform_caching = false
26 | 
27 |     config.cache_store = :null_store
28 |   end
29 | 
30 |   # Print deprecation notices to the Rails logger.
31 |   config.active_support.deprecation = :log
32 | 
33 |   # Raise exceptions for disallowed deprecations.
34 |   config.active_support.disallowed_deprecation = :raise
35 | 
36 |   # Tell Active Support which deprecation messages to disallow.
37 |   config.active_support.disallowed_deprecation_warnings = []
38 | 
39 | 
40 |   # Raises error for missing translations.
41 |   # config.i18n.raise_on_missing_translations = true
42 | 
43 |   # Annotate rendered view with file names.
44 |   # config.action_view.annotate_rendered_view_with_filenames = true
45 | end
46 | 


--------------------------------------------------------------------------------
/config/environments/production.rb:
--------------------------------------------------------------------------------
 1 | require "active_support/core_ext/integer/time"
 2 | 
 3 | Rails.application.configure do
 4 |   # Settings specified here will take precedence over those in config/application.rb.
 5 | 
 6 |   # Code is not reloaded between requests.
 7 |   config.enable_reloading = false
 8 | 
 9 |   # Eager load code on boot. This eager loads most of Rails and
10 |   # your application in memory, allowing both threaded web servers
11 |   # and those relying on copy on write to perform better.
12 |   # Rake tasks automatically ignore this option for performance.
13 |   config.eager_load = true
14 | 
15 |   # Full error reports are disabled and caching is turned on.
16 |   config.consider_all_requests_local = false
17 | 
18 |   # Ensures that a master key has been made available in ENV["RAILS_MASTER_KEY"], config/master.key, or an environment
19 |   # key such as config/credentials/production.key. This key is used to decrypt credentials (and other encrypted files).
20 |   # config.require_master_key = true
21 | 
22 |   # Disable serving static files from `public/`, relying on NGINX/Apache to do so instead.
23 |   # config.public_file_server.enabled = false
24 | 
25 |   # Enable serving of images, stylesheets, and JavaScripts from an asset server.
26 |   # config.asset_host = "http://assets.example.com"
27 | 
28 |   # Specifies the header that your server uses for sending files.
29 |   # config.action_dispatch.x_sendfile_header = "X-Sendfile" # for Apache
30 |   # config.action_dispatch.x_sendfile_header = "X-Accel-Redirect" # for NGINX
31 | 
32 |   # Assume all access to the app is happening through a SSL-terminating reverse proxy.
33 |   # Can be used together with config.force_ssl for Strict-Transport-Security and secure cookies.
34 |   # config.assume_ssl = true
35 | 
36 |   # Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies.
37 |   config.force_ssl = false
38 | 
39 |   # Prepend all log lines with the following tags.
40 |   config.log_tags = [ :request_id ]
41 | 
42 |   # Info include generic and useful information about system operation, but avoids logging too much
43 |   # information to avoid inadvertent exposure of personally identifiable information (PII). If you
44 |   # want to log everything, set the level to "debug".
45 |   config.log_level = ENV.fetch("RAILS_LOG_LEVEL", "debug")
46 | 
47 |   # Use a different cache store in production.
48 |   # config.cache_store = :mem_cache_store
49 | 
50 |   # Enable locale fallbacks for I18n (makes lookups for any locale fall back to
51 |   # the I18n.default_locale when a translation cannot be found).
52 |   config.i18n.fallbacks = true
53 | 
54 |   # Send deprecation notices to registered listeners.
55 |   config.active_support.deprecation = :notify
56 | 
57 |   # Log disallowed deprecations.
58 |   config.active_support.disallowed_deprecation = :log
59 | 
60 |   # Tell Active Support which deprecation messages to disallow.
61 |   config.active_support.disallowed_deprecation_warnings = []
62 | 
63 |   # Enable DNS rebinding protection and other `Host` header attacks.
64 |   config.hosts << ENV['I14Y_ALLOWED_HOSTS'] if ENV['I14Y_ALLOWED_HOSTS'].present?
65 |   
66 |   # Skip DNS rebinding protection for the default health check endpoint.
67 |   config.host_authorization = { exclude: ->(request) { request.path == "/up" } }
68 | 
69 |   config.rails_semantic_logger.format = :json
70 | end
71 | 


--------------------------------------------------------------------------------
/config/environments/test.rb:
--------------------------------------------------------------------------------
 1 | require "active_support/core_ext/integer/time"
 2 | 
 3 | # The test environment is used exclusively to run your application's
 4 | # test suite. You never need to work with it otherwise. Remember that
 5 | # your test database is "scratch space" for the test suite and is wiped
 6 | # and recreated between test runs. Don't rely on the data there!
 7 | 
 8 | Rails.application.configure do
 9 |   # Settings specified here will take precedence over those in config/application.rb.
10 | 
11 |   # While tests run files are not watched, reloading is not necessary.
12 |   config.enable_reloading = false
13 | 
14 |   # Eager loading loads your entire application. When running a single test locally,
15 |   # this is usually not necessary, and can slow down your test suite. However, it's
16 |   # recommended that you enable it in continuous integration systems to ensure eager
17 |   # loading is working properly before deploying your code.
18 |   config.eager_load = ENV['CI'].present?
19 | 
20 |   # Configure public file server for tests with Cache-Control for performance.
21 |   config.public_file_server.enabled = true
22 |   config.public_file_server.headers = {
23 |     'Cache-Control' => "public, max-age=#{1.hour.to_i}"
24 |   }
25 | 
26 |   # Show full error reports and disable caching.
27 |   config.consider_all_requests_local = true
28 |   config.action_controller.perform_caching = false
29 |   config.cache_store = :null_store
30 | 
31 |   # Render exception templates for rescuable exceptions and raise for other exceptions.
32 |   config.action_dispatch.show_exceptions = false
33 | 
34 |   # Disable request forgery protection in test environment.
35 |   config.action_controller.allow_forgery_protection = false
36 | 
37 |   # Print deprecation notices to the stderr.
38 |   config.active_support.deprecation = :stderr
39 | 
40 |   # Raise exceptions for disallowed deprecations.
41 |   config.active_support.disallowed_deprecation = :raise
42 | 
43 |   # Tell Active Support which deprecation messages to disallow.
44 |   config.active_support.disallowed_deprecation_warnings = []
45 | 
46 |   # Raises error for missing translations.
47 |   config.i18n.raise_on_missing_translations = true
48 | 
49 |   # Annotate rendered view with file names.
50 |   # config.action_view.annotate_rendered_view_with_filenames = true
51 | end
52 | 


--------------------------------------------------------------------------------
/config/initializers/access_control.rb:
--------------------------------------------------------------------------------
1 | config = Rails.application.config_for(:access_control)
2 | I14y::Application.config.updates_allowed = !!config[:updates_allowed]
3 | I14y::Application.config.maintenance_message = config[:maintenance_message]
4 | 


--------------------------------------------------------------------------------
/config/initializers/application_controller_renderer.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | # ActiveSupport::Reloader.to_prepare do
4 | #   ApplicationController.renderer.defaults.merge!(
5 | #     http_host: 'example.org',
6 | #     https: false
7 | #   )
8 | # end
9 | 


--------------------------------------------------------------------------------
/config/initializers/backtrace_silencers.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | # You can add backtrace silencers for libraries that you're using but don't wish to see in your backtraces.
4 | # Rails.backtrace_cleaner.add_silencer { |line| /my_noisy_library/.match?(line) }
5 | 
6 | # You can also remove all the silencers if you're trying to debug a problem that might stem from framework code
7 | # by setting BACKTRACE=1 before calling your invocation, like "BACKTRACE=1 ./bin/rails runner 'MyClass.perform'".
8 | Rails.backtrace_cleaner.remove_silencers! if ENV["BACKTRACE"]
9 | 


--------------------------------------------------------------------------------
/config/initializers/cookies_serializer.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | # Specify a serializer for the signed and encrypted cookie jars.
4 | # Valid options are :json, :marshal, and :hybrid.
5 | Rails.application.config.action_dispatch.cookies_serializer = :json
6 | 


--------------------------------------------------------------------------------
/config/initializers/elasticsearch.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module ES
 4 |   ES_CONFIG = Rails.application.config_for(:elasticsearch).freeze
 5 | 
 6 |   def self.client
 7 |     Elasticsearch::Client.new(ES_CONFIG.merge({randomize_hosts: true,
 8 |                                                retry_on_failure: true,
 9 |                                                reload_connections: false,
10 |                                                reload_on_failure: false,
11 |                                                transport_options: {
12 |                                                  ssl: {
13 |                                                    verify: false
14 |                                                  }
15 |                                                },
16 |                                                logger: Rails.logger # Explicitly set Rails logger
17 |                                                }))
18 |   end
19 | 
20 |   def self.collection_repository
21 |     CollectionRepository.new
22 |   end
23 | end
24 | 
25 | if Rails.env.development?
26 |   logger = ActiveSupport::Logger.new(STDERR)
27 |   logger.level = Logger::DEBUG
28 |   logger.formatter = proc { |_s, _d, _p, m| "\e[2m#{m}\n\e[0m" }
29 |   ES.client.transport.logger = logger
30 | end
31 | 


--------------------------------------------------------------------------------
/config/initializers/filter_parameter_logging.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | # Configure parameters to be partially matched (e.g. passw matches password) and filtered from the log file.
4 | # Use this to limit dissemination of sensitive information.
5 | # See the ActiveSupport::ParameterFilter documentation for supported notations and behaviors.
6 | Rails.application.config.filter_parameters += [
7 |   :password, :passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn, :query
8 | ]
9 | 


--------------------------------------------------------------------------------
/config/initializers/inflections.rb:
--------------------------------------------------------------------------------
 1 | # Be sure to restart your server when you modify this file.
 2 | 
 3 | # Add new inflection rules using the following format. Inflections
 4 | # are locale specific, and you may define rules for as many different
 5 | # locales as you wish. All of these examples are active by default:
 6 | # ActiveSupport::Inflector.inflections(:en) do |inflect|
 7 | #   inflect.plural /^(ox)$/i, '\1en'
 8 | #   inflect.singular /^(ox)en/i, '\1'
 9 | #   inflect.irregular 'person', 'people'
10 | #   inflect.uncountable %w( fish sheep )
11 | # end
12 | 
13 | # These inflection rules are supported but not enabled by default:
14 | # ActiveSupport::Inflector.inflections(:en) do |inflect|
15 | #   inflect.acronym 'RESTful'
16 | # end
17 | 


--------------------------------------------------------------------------------
/config/initializers/locales.rb:
--------------------------------------------------------------------------------
1 | SUPPORTED_LOCALES = [:ar, :be, :bg, :bn, :ca, :cs, :da, :de, :el, :en, :es, :et, :fa, :fi, :fr, :he, :hi, :hr, :ht,
2 |                      :hu, :hy, :id, :it, :ja, :ka, :km, :ko, :lt, :lv, :mk, :nl, :pl, :ps, :pt, :ro, :ru, :sk, :so,
3 |                      :sq, :sr, :sw, :th, :tr, :uk, :ur, :uz, :vi, :zh]
4 | LANGUAGE_ANALYZER_LOCALES = [:bn, :de, :en, :es, :fi, :fr, :hi, :hu, :it, :ja, :ko, :pt, :ru, :sv, :zh]
5 | GENERIC_ANALYZER_LOCALES = LANGUAGE_ANALYZER_LOCALES - [:fr, :ja, :ko, :zh]


--------------------------------------------------------------------------------
/config/initializers/mime_types.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | # Add new mime types for use in respond_to blocks:
4 | # Mime::Type.register "text/richtext", :rtf
5 | 


--------------------------------------------------------------------------------
/config/initializers/session_store.rb:
--------------------------------------------------------------------------------
1 | # Be sure to restart your server when you modify this file.
2 | 
3 | Rails.application.config.session_store :cookie_store, key: '_i14y_session'
4 | 


--------------------------------------------------------------------------------
/config/initializers/wrap_parameters.rb:
--------------------------------------------------------------------------------
 1 | # Be sure to restart your server when you modify this file.
 2 | 
 3 | # This file contains settings for ActionController::ParamsWrapper which
 4 | # is enabled by default.
 5 | 
 6 | # Enable parameter wrapping for JSON. You can disable this by setting :format to an empty array.
 7 | ActiveSupport.on_load(:action_controller) do
 8 |   wrap_parameters format: [:json]
 9 | end
10 | 


--------------------------------------------------------------------------------
/config/locales/analysis/en_protwords.txt:
--------------------------------------------------------------------------------
1 | # Import these from https://github.com/GSA/punchcard
2 | gas
3 | fevs
4 | 


--------------------------------------------------------------------------------
/config/locales/analysis/en_synonyms.txt:
--------------------------------------------------------------------------------
1 | # Import these from https://github.com/GSA/punchcard
2 | gas, petrol


--------------------------------------------------------------------------------
/config/locales/analysis/es_protwords.txt:
--------------------------------------------------------------------------------
1 | # Import these from https://github.com/GSA/punchcard
2 | ronaldo


--------------------------------------------------------------------------------
/config/locales/analysis/es_synonyms.txt:
--------------------------------------------------------------------------------
1 | # Import these from https://github.com/GSA/punchcard


--------------------------------------------------------------------------------
/config/locales/en.yml:
--------------------------------------------------------------------------------
 1 | # Files in the config/locales directory are used for internationalization
 2 | # and are automatically loaded by Rails. If you want to use locales other
 3 | # than English, add the necessary files in this directory.
 4 | #
 5 | # To use the locales, use `I18n.t`:
 6 | #
 7 | #     I18n.t 'hello'
 8 | #
 9 | # In views, this is aliased to just `t`:
10 | #
11 | #     <%= t('hello') %>
12 | #
13 | # To use a different locale, set it with `I18n.locale`:
14 | #
15 | #     I18n.locale = :es
16 | #
17 | # This would use the information in config/locales/es.yml.
18 | #
19 | # The following keys must be escaped otherwise they will not be retrieved by
20 | # the default I18n backend:
21 | #
22 | # true, false, on, off, yes, no
23 | #
24 | # Instead, surround them with single quotes.
25 | #
26 | # en:
27 | #   'true': 'foo'
28 | #
29 | # To learn more, please read the Rails Internationalization guide
30 | # available at https://guides.rubyonrails.org/i18n.html.
31 | 
32 | en:
33 |   hello: "Hello world"
34 | 


--------------------------------------------------------------------------------
/config/puma.rb:
--------------------------------------------------------------------------------
 1 | # Puma can serve each request in a thread from an internal thread pool.
 2 | # The `threads` method setting takes two numbers: a minimum and maximum.
 3 | # Any libraries that use thread pools should be configured to match
 4 | # the maximum value specified for Puma. Default is set to 5 threads for minimum
 5 | # and maximum; this matches the default thread size of Active Record.
 6 | #
 7 | max_threads_count = ENV.fetch("RAILS_MAX_THREADS") { 5 }
 8 | min_threads_count = ENV.fetch("RAILS_MIN_THREADS") { max_threads_count }
 9 | threads min_threads_count, max_threads_count
10 | 
11 | # Specifies the `worker_timeout` threshold that Puma will use to wait before
12 | # terminating a worker in development environments.
13 | #
14 | worker_timeout 3600 if ENV.fetch("RAILS_ENV", "development") == "development"
15 | 
16 | # Specifies the `port` that Puma will listen on to receive requests; default is 3000.
17 | #
18 | port ENV.fetch("PORT") { 3000 }
19 | 
20 | # Specifies the `environment` that Puma will run in.
21 | #
22 | environment ENV.fetch("RAILS_ENV") { "development" }
23 | 
24 | # Specifies the `pidfile` that Puma will use.
25 | pidfile ENV.fetch("PIDFILE") { "tmp/pids/server.pid" }
26 | 
27 | # Specifies the number of `workers` to boot in clustered mode.
28 | # Workers are forked web server processes. If using threads and workers together
29 | # the concurrency of the application would be max `threads` * `workers`.
30 | # Workers do not work on JRuby or Windows (both of which do not support
31 | # processes).
32 | #
33 | # workers ENV.fetch("WEB_CONCURRENCY") { 2 }
34 | 
35 | # Use the `preload_app!` method when specifying a `workers` number.
36 | # This directive tells Puma to first boot the application and load code
37 | # before forking the application. This takes advantage of Copy On Write
38 | # process behavior so workers use less memory.
39 | #
40 | # preload_app!
41 | 
42 | # Allow puma to be restarted by `rails restart` command.
43 | plugin :tmp_restart
44 | 


--------------------------------------------------------------------------------
/config/routes.rb:
--------------------------------------------------------------------------------
1 | Rails.application.routes.draw do
2 |   # For details on the DSL available within this file, see https://guides.rubyonrails.org/routing.html
3 |   mount Api::Base => '/'
4 | 
5 |   # Reveal health status on /up that returns 200 if the app boots with no exceptions, otherwise 500.
6 |   # Can be used by load balancers and uptime monitors to verify that the app is live.
7 |   get 'up' => 'rails/health#show', as: :rails_health_check
8 | end
9 | 


--------------------------------------------------------------------------------
/lib/ext/string.rb:
--------------------------------------------------------------------------------
1 | class String
2 |   def extract_array
3 |     split(',').map(&:strip).map(&:downcase)
4 |   end
5 | end
6 | 


--------------------------------------------------------------------------------
/lib/namespaced_index.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | module NamespacedIndex
4 |   def index_namespace(handle = nil)
5 |     [Rails.env, I14y::APP_NAME, klass.to_s.tableize, handle].compact.join('-')
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/lib/read_only_access_control.rb:
--------------------------------------------------------------------------------
1 | module ReadOnlyAccessControl
2 |   class DisallowedUpdate < StandardError; end
3 | 
4 |   def check_updates_allowed
5 |     raise DisallowedUpdate unless I14y::Application.config.updates_allowed
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/lib/serde.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Serde
 4 |   LANGUAGE_FIELDS = %i[title description content].freeze
 5 | 
 6 |   def self.serialize_hash(hash, language)
 7 |     serialize_language(hash, language)
 8 |     hash.merge!(uri_params_hash(hash[:path])) if hash[:path].present?
 9 |     serialize_array_fields(hash)
10 |     serialize_string_fields(hash)
11 |     hash[:updated_at] = Time.now.utc
12 |     hash
13 |   end
14 | 
15 |   def self.serialize_language(hash, language)
16 |     LANGUAGE_FIELDS.each do |key|
17 |       value = hash[key.to_sym]
18 |       next if value.blank?
19 | 
20 |       sanitized_value = Loofah.fragment(value).text(encode_special_chars: false).squish
21 |       hash.store("#{key}_#{language}", sanitized_value)
22 |       hash.delete(key)
23 |     end
24 |   end
25 | 
26 |   def self.serialize_array_fields(hash)
27 |     %i[searchgov_custom1 searchgov_custom2 searchgov_custom3 tags].each do |field|
28 |       next if hash[field].is_a?(Array)
29 | 
30 |       hash[field] = hash[field].extract_array if hash[field].present?
31 |     end
32 |   end
33 | 
34 |   def self.serialize_string_fields(hash)
35 |     %i[audience content_type].each do |field|
36 |       hash[field] = hash[field].downcase if hash[field].present?
37 |     end
38 |   end
39 | 
40 |   def self.deserialize_hash(hash, language)
41 |     derivative_language_fields = LANGUAGE_FIELDS.collect { |key| "#{key}_#{language}" }
42 |     (derivative_language_fields & hash.keys).each do |field|
43 |       hash[field.chomp("_#{language}")] = hash.delete(field)
44 |     end
45 |     misc_fields = %w[basename extension url_path domain_name bigrams]
46 |     hash.except(*misc_fields)
47 |   end
48 | 
49 |   def self.uri_params_hash(path)
50 |     hash = {}
51 |     uri = URI.parse(path)
52 |     hash[:basename] = File.basename(uri.path, '.*')
53 |     hash[:extension] = File.extname(uri.path).sub(/^./, '').downcase
54 |     hash[:url_path] = uri.path
55 |     hash[:domain_name] = uri.host
56 |     hash
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/lib/tasks/i14y.rake:
--------------------------------------------------------------------------------
 1 | namespace :i14y do
 2 |   desc "Creates templates, indexes, and reader/writer aliases for all i14y models"
 3 |   task setup: :environment do
 4 |     Dir[Rails.root.join('app', 'templates', '*.rb')].each do |template_generator|
 5 |       entity_name = File.basename(template_generator, '.rb')
 6 |       klass = entity_name.camelize.constantize
 7 |       template_generator = klass.new
 8 |       ES.client.indices.put_template(
 9 |         name: entity_name,
10 |         body: template_generator.body,
11 |         order: 0,
12 |         create: true,
13 |         include_type_name: false
14 |       )
15 |     end
16 |     es_collections_index_name = [CollectionRepository.index_namespace, 'v1'].join('-')
17 |     CollectionRepository.new.create_index!(
18 |       index: es_collections_index_name,
19 |       include_type_name: true
20 |     )
21 |     ES.client.indices.put_alias(
22 |       index: es_collections_index_name,
23 |       name: CollectionRepository.index_name
24 |     )
25 |   end
26 | 
27 |   desc "Copies data from one version of the i14y index to the next (e.g., collections, documents) and updates the alias"
28 |   task :reindex, [:entity_name] => [:environment] do |_t, args|
29 |     entity_name = args.entity_name
30 |     persistence_model_klass = entity_name.singularize.camelize.constantize
31 |     klass = entity_name.camelize.constantize
32 |     template_generator = klass.new
33 |     ES.client.indices.put_template(name: entity_name,
34 |                                    body: template_generator.body,
35 |                                    order: 0)
36 | 
37 |     wildcard = [persistence_model_klass.index_namespace, '*'].join
38 |     aliases = ES.client.indices.get_alias(name: wildcard)
39 |     aliases.each do |old_es_index_name, alias_names|
40 |       alias_name = alias_names['aliases'].keys.first
41 |       persistence_model_klass.index_name = old_es_index_name
42 |       new_es_index_name = next_version(old_es_index_name)
43 |       puts "Beginning copy of #{persistence_model_klass.count} #{entity_name} from #{old_es_index_name} to #{new_es_index_name}"
44 |       persistence_model_klass.create_index!(index: new_es_index_name)
45 |       persistence_model_klass.index_name = new_es_index_name
46 |       since_timestamp = Time.now
47 |       host_hash = ES.client.transport.hosts.first
48 |       base_url = "#{host_hash[:protocol]}://#{host_hash[:host]}:#{host_hash[:port]}/"
49 |       old_es_index_url = base_url + old_es_index_name
50 |       new_es_index_url = base_url + new_es_index_name
51 |       stream2es(old_es_index_url, new_es_index_url)
52 |       move_alias(alias_name, old_es_index_name, new_es_index_name)
53 |       stream2es(old_es_index_url, new_es_index_url, since_timestamp)
54 |       puts "New #{new_es_index_name} index now contains #{persistence_model_klass.count} #{entity_name}"
55 |       ES.client.indices.delete(index: old_es_index_name)
56 |     end
57 |   end
58 | 
59 |   desc "Deletes templates, indexes, and reader/writer aliases for all i14y models. Useful for development."
60 |   task clear_all: :environment do
61 |     Dir[Rails.root.join('app', 'templates', '*.rb')].each do |template_generator|
62 |       entity_name = File.basename(template_generator, '.rb')
63 |       ES.client.indices.delete_template(name: entity_name) rescue Elasticsearch::Transport::Transport::Errors::NotFound
64 |     end
65 |     ES.client.indices.delete(index: [Rails.env, I14y::APP_NAME, '*'].join('-'))
66 |   end
67 | 
68 |   def next_version(index_name)
69 |     matches = index_name.match(/(.*-v)(\d+)/)
70 |     "#{matches[1]}#{matches[2].succ}"
71 |   end
72 | 
73 |   def stream2es(old_es_index_url, new_es_index_url, timestamp = nil)
74 |     options = ["--source #{old_es_index_url}", "--target #{new_es_index_url}"]
75 |     if timestamp.present?
76 |       hash = { query: { filtered: { filter: { range: { updated_at: { gte: timestamp } } } } } }
77 |       options << "--query '#{hash.to_json}'"
78 |     end
79 |     result = `#{Rails.root.join('vendor', 'stream2es')} es #{options.join(' ')}`
80 |     puts "Stream2es completed", result
81 |   end
82 | 
83 |   def move_alias(alias_name, old_index_name, new_index_name)
84 |     update_aliases_hash = { body:
85 |                               { actions: [
86 |                                 { remove: { index: old_index_name, alias: alias_name } },
87 |                                 { add: { index: new_index_name, alias: alias_name } }
88 |                               ] } }
89 |     ES.client.indices.update_aliases(update_aliases_hash)
90 |   end
91 | 
92 | end
93 | 


--------------------------------------------------------------------------------
/lib/templatable.rb:
--------------------------------------------------------------------------------
 1 | module Templatable
 2 |   def date(json, field)
 3 |     json.set! field do
 4 |       json.type "date"
 5 |     end
 6 |   end
 7 | 
 8 |   def keyword(json, field)
 9 |     json.set! field do
10 |       json.type "keyword"
11 |       json.index true
12 |     end
13 |   end
14 | 
15 |   def string_fields_template(json, type)
16 |     json.child! do
17 |       json.string_fields do
18 |         json.mapping do
19 |           json.type type
20 |           json.index true
21 |         end
22 |         json.match_mapping_type "string"
23 |         json.match "*"
24 |       end
25 |     end
26 |   end
27 | 
28 |   def linguistic_filter(json, locale, lines, name, field, type)
29 |     json.set! "#{locale}_#{name}" do
30 |       json.type type
31 |       json.set! field, lines
32 |     end
33 |   end
34 | 
35 |   def parse_configuration_file(json, type)
36 |     LANGUAGE_ANALYZER_LOCALES.map do |locale|
37 |       [locale, Rails.root.join("config", "locales", "analysis", "#{locale}_#{type}.txt")]
38 |     end.select do |locale_file_array|
39 |       File.exist? locale_file_array.last
40 |     end.each do |locale, file|
41 |       lines = get_lines_from(file)
42 |       send("#{type}_filter", json, locale, lines) if lines.any?
43 |     end
44 |   end
45 | 
46 |   def get_lines_from(file)
47 |     File.readlines(file).map(&:chomp).reject { |line| line.starts_with?("#") }
48 |   end
49 | 
50 |   def generic_stemmer(json, locale, language, degree)
51 |     json.set! "#{locale}_stem_filter" do
52 |       json.type "stemmer"
53 |       stemmer_name = degree == "standard" ? '' : "#{degree}_"
54 |       json.name "#{stemmer_name}#{language}"
55 |     end
56 |   end
57 | 
58 |   def generic_analyzer(json, locale)
59 |     json.set! "#{locale}_analyzer" do
60 |       json.type "custom"
61 |       json.filter filter_array(locale)
62 |       json.tokenizer "icu_tokenizer"
63 |       json.char_filter ["html_strip", "quotes"]
64 |     end
65 |   end
66 | 
67 | end
68 | 


--------------------------------------------------------------------------------
/lib/validations/max_bytes.rb:
--------------------------------------------------------------------------------
 1 | class MaxBytes < Grape::Validations::Validators::Base
 2 |   def validate_param!(attr_name, params)
 3 |     if params[attr_name].bytesize > max_bytes
 4 |       raise Grape::Exceptions::Validation.new(
 5 |         params: [@scope.full_name(attr_name)],
 6 |         message: "cannot be more than #{max_bytes} bytes long"
 7 |       )
 8 |     end
 9 |   end
10 | 
11 |   private
12 | 
13 |   def max_bytes
14 |     @max_bytes ||= [@option].flatten.first
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/public/404.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>The page you were looking for doesn't exist (404)</title>
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1">
 6 |   <style>
 7 |   body {
 8 |     background-color: #EFEFEF;
 9 |     color: #2E2F30;
10 |     text-align: center;
11 |     font-family: arial, sans-serif;
12 |     margin: 0;
13 |   }
14 | 
15 |   div.dialog {
16 |     width: 95%;
17 |     max-width: 33em;
18 |     margin: 4em auto 0;
19 |   }
20 | 
21 |   div.dialog > div {
22 |     border: 1px solid #CCC;
23 |     border-right-color: #999;
24 |     border-left-color: #999;
25 |     border-bottom-color: #BBB;
26 |     border-top: #B00100 solid 4px;
27 |     border-top-left-radius: 9px;
28 |     border-top-right-radius: 9px;
29 |     background-color: white;
30 |     padding: 7px 12% 0;
31 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
32 |   }
33 | 
34 |   h1 {
35 |     font-size: 100%;
36 |     color: #730E15;
37 |     line-height: 1.5em;
38 |   }
39 | 
40 |   div.dialog > p {
41 |     margin: 0 0 1em;
42 |     padding: 1em;
43 |     background-color: #F7F7F7;
44 |     border: 1px solid #CCC;
45 |     border-right-color: #999;
46 |     border-left-color: #999;
47 |     border-bottom-color: #999;
48 |     border-bottom-left-radius: 4px;
49 |     border-bottom-right-radius: 4px;
50 |     border-top-color: #DADADA;
51 |     color: #666;
52 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
53 |   }
54 |   </style>
55 | </head>
56 | 
57 | <body>
58 |   <!-- This file lives in public/404.html -->
59 |   <div class="dialog">
60 |     <div>
61 |       <h1>The page you were looking for doesn't exist.</h1>
62 |       <p>You may have mistyped the address or the page may have moved.</p>
63 |     </div>
64 |     <p>If you are the application owner check the logs for more information.</p>
65 |   </div>
66 | </body>
67 | </html>
68 | 


--------------------------------------------------------------------------------
/public/422.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>The change you wanted was rejected (422)</title>
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1">
 6 |   <style>
 7 |   body {
 8 |     background-color: #EFEFEF;
 9 |     color: #2E2F30;
10 |     text-align: center;
11 |     font-family: arial, sans-serif;
12 |     margin: 0;
13 |   }
14 | 
15 |   div.dialog {
16 |     width: 95%;
17 |     max-width: 33em;
18 |     margin: 4em auto 0;
19 |   }
20 | 
21 |   div.dialog > div {
22 |     border: 1px solid #CCC;
23 |     border-right-color: #999;
24 |     border-left-color: #999;
25 |     border-bottom-color: #BBB;
26 |     border-top: #B00100 solid 4px;
27 |     border-top-left-radius: 9px;
28 |     border-top-right-radius: 9px;
29 |     background-color: white;
30 |     padding: 7px 12% 0;
31 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
32 |   }
33 | 
34 |   h1 {
35 |     font-size: 100%;
36 |     color: #730E15;
37 |     line-height: 1.5em;
38 |   }
39 | 
40 |   div.dialog > p {
41 |     margin: 0 0 1em;
42 |     padding: 1em;
43 |     background-color: #F7F7F7;
44 |     border: 1px solid #CCC;
45 |     border-right-color: #999;
46 |     border-left-color: #999;
47 |     border-bottom-color: #999;
48 |     border-bottom-left-radius: 4px;
49 |     border-bottom-right-radius: 4px;
50 |     border-top-color: #DADADA;
51 |     color: #666;
52 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
53 |   }
54 |   </style>
55 | </head>
56 | 
57 | <body>
58 |   <!-- This file lives in public/422.html -->
59 |   <div class="dialog">
60 |     <div>
61 |       <h1>The change you wanted was rejected.</h1>
62 |       <p>Maybe you tried to change something you didn't have access to.</p>
63 |     </div>
64 |     <p>If you are the application owner check the logs for more information.</p>
65 |   </div>
66 | </body>
67 | </html>
68 | 


--------------------------------------------------------------------------------
/public/500.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>We're sorry, but something went wrong (500)</title>
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1">
 6 |   <style>
 7 |   body {
 8 |     background-color: #EFEFEF;
 9 |     color: #2E2F30;
10 |     text-align: center;
11 |     font-family: arial, sans-serif;
12 |     margin: 0;
13 |   }
14 | 
15 |   div.dialog {
16 |     width: 95%;
17 |     max-width: 33em;
18 |     margin: 4em auto 0;
19 |   }
20 | 
21 |   div.dialog > div {
22 |     border: 1px solid #CCC;
23 |     border-right-color: #999;
24 |     border-left-color: #999;
25 |     border-bottom-color: #BBB;
26 |     border-top: #B00100 solid 4px;
27 |     border-top-left-radius: 9px;
28 |     border-top-right-radius: 9px;
29 |     background-color: white;
30 |     padding: 7px 12% 0;
31 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
32 |   }
33 | 
34 |   h1 {
35 |     font-size: 100%;
36 |     color: #730E15;
37 |     line-height: 1.5em;
38 |   }
39 | 
40 |   div.dialog > p {
41 |     margin: 0 0 1em;
42 |     padding: 1em;
43 |     background-color: #F7F7F7;
44 |     border: 1px solid #CCC;
45 |     border-right-color: #999;
46 |     border-left-color: #999;
47 |     border-bottom-color: #999;
48 |     border-bottom-left-radius: 4px;
49 |     border-bottom-right-radius: 4px;
50 |     border-top-color: #DADADA;
51 |     color: #666;
52 |     box-shadow: 0 3px 8px rgba(50, 50, 50, 0.17);
53 |   }
54 |   </style>
55 | </head>
56 | 
57 | <body>
58 |   <!-- This file lives in public/500.html -->
59 |   <div class="dialog">
60 |     <div>
61 |       <h1>We're sorry, but something went wrong.</h1>
62 |     </div>
63 |     <p>If you are the application owner check the logs for more information.</p>
64 |   </div>
65 | </body>
66 | </html>
67 | 


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GSA/i14y/f3be9325fa45d5dec1e716a4a9ffeb6e0cf56b22/public/favicon.ico


--------------------------------------------------------------------------------
/public/robots.txt:
--------------------------------------------------------------------------------
1 | # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
2 | #
3 | # To ban all spiders from the entire site uncomment the next two lines:
4 | # User-agent: *
5 | # Disallow: /
6 | 


--------------------------------------------------------------------------------
/spec/classes/document_query_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe DocumentQuery do
 6 |   let(:query) { 'test' }
 7 |   let(:options) do
 8 |     { query: query }
 9 |   end
10 |   let(:document_query) { described_class.new(options) }
11 |   let(:body) { document_query.body.to_hash }
12 | 
13 |   describe '#body' do
14 |     context 'when a query includes stopwords' do
15 |       let(:suggestion_hash) { body[:suggest][:suggestion] }
16 |       let(:query) { 'this document IS about the theater' }
17 | 
18 |       it 'strips the stopwords from the query' do
19 |         expect(suggestion_hash[:text]).to eq 'document about theater'
20 |       end
21 |     end
22 | 
23 |     it 'contains aggregations' do
24 |       expect(body[:aggregations]).to match(
25 |         hash_including(:audience,
26 |                        :changed,
27 |                        :content_type,
28 |                        :created,
29 |                        :mime_type,
30 |                        :searchgov_custom1,
31 |                        :searchgov_custom2,
32 |                        :searchgov_custom3,
33 |                        :tags)
34 |       )
35 |     end
36 | 
37 |     context 'when the query is blank' do
38 |       let(:query) { '' }
39 | 
40 |       it 'does not contain aggregations' do
41 |         expect(body[:aggregations]).to be_nil
42 |       end
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/classes/document_search_results_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe DocumentSearchResults do
 6 |   let(:document_search_results) { described_class.new(result) }
 7 | 
 8 |   describe '#suggestion' do
 9 |     subject(:suggestion) { document_search_results.suggestion }
10 | 
11 |     context 'when no hits and suggestions are present' do
12 |       let(:result) do
13 |         { 'hits' => { 'total' => 0, 'hits' => [] },
14 |           'aggregations' => {},
15 |           'suggest' => suggestion_hash }
16 |       end
17 |       let(:suggestion_hash) do
18 |         { 'suggestion' =>
19 |                             [{ 'text' => 'blue',
20 |                                'options' => [{ 'text' => 'bulk',
21 |                                                'highlighted' => 'bulk' }] }] }
22 |       end
23 | 
24 |       it { is_expected.to match(hash_including({ 'text' => 'bulk', 'highlighted' => 'bulk' })) }
25 |     end
26 |   end
27 | 
28 |   describe '#results' do
29 |     subject(:results) { document_search_results.results }
30 | 
31 |     context 'when hits are present' do
32 |       let(:result) do
33 |         { 'hits' => { 'total' => 1, 'hits' => [hits] },
34 |           'aggregations' => {},
35 |           'suggest' => [] }
36 |       end
37 |       let(:hits) do
38 |         { '_type' => '_doc',
39 |           '_source' => { 'path' => 'https://search.gov/about/',
40 |                          'created' => '2021-02-03T00:00:00.000-05:00',
41 |                          'language' => 'en',
42 |                          'title_en' => 'About Search.gov | Search.gov' },
43 |           'highlight' => { 'content_en' => ['Some highlighted content'] } }
44 |       end
45 | 
46 |       it {
47 |         is_expected.to match(array_including({ 'path' => 'https://search.gov/about/',
48 |                                                'created' => '2021-02-03 05:00:00 UTC',
49 |                                                'language' => 'en',
50 |                                                'title' => 'About Search.gov | Search.gov',
51 |                                                'content' => 'Some highlighted content' }))
52 |       }
53 |     end
54 |   end
55 | 
56 |   describe '#aggregations' do
57 |     subject(:aggregations) { document_search_results.aggregations }
58 | 
59 |     context 'when aggregations are present' do
60 |       let(:result) do
61 |         { 'hits' => { 'total' => 1, 'hits' => [hits] },
62 |           'aggregations' => aggregations_hash,
63 |           'suggest' => [] }
64 |       end
65 |       let(:hits) do
66 |         { '_type' => '_doc',
67 |           '_source' => { 'path' => 'https://search.gov/about/',
68 |                          'created' => '2021-02-03T00:00:00.000-05:00',
69 |                          'language' => 'en',
70 |                          'title_en' => 'About Search.gov | Search.gov' },
71 |           'highlight' => { 'content_en' => ['Some highlighted content'] } }
72 |       end
73 |       let(:aggregations_hash) do
74 |         { 'content_type' => { 'doc_count_error_upper_bound' => 0,
75 |                               'sum_other_doc_count' => 0,
76 |                               'buckets' => [{ 'key' => 'article',
77 |                                               'doc_count' => 1 }] },
78 |           'tags' => { 'doc_count_error_upper_bound' => 0,
79 |                       'sum_other_doc_count' => 0,
80 |                       'buckets' => [] } }
81 |       end
82 | 
83 |       it { is_expected.to match(array_including({ content_type: [{ agg_key: 'article', doc_count: 1 }] })) }
84 | 
85 |       it { is_expected.not_to include(hash_including(:tags)) }
86 |     end
87 |   end
88 | end
89 | 


--------------------------------------------------------------------------------
/spec/classes/query_parser_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe QueryParser do
 6 |   let(:query) { 'govt site stuff' }
 7 |   let(:parsed_query) { described_class.new(query) }
 8 | 
 9 |   context 'one or more site: params in query string' do
10 |     let(:query) do
11 |       '(site:agency.gov/archives/2015 Site:archive.agency.gov/ site:archive2.agency.gov) govt site stuff'
12 |     end
13 | 
14 |     it 'should extract an array of SiteFilter instances' do
15 |       site_filters = parsed_query.site_filters
16 |       expect(site_filters[:included_sites].size).to eq(3)
17 |       expect(site_filters[:included_sites][0].domain_name).to eq('agency.gov')
18 |       expect(site_filters[:included_sites][0].url_path).to eq('/archives/2015')
19 |       expect(site_filters[:included_sites][1].domain_name).to eq('archive.agency.gov')
20 |       expect(site_filters[:included_sites][1].url_path).to be_nil
21 |       expect(site_filters[:included_sites][2].domain_name).to eq('archive2.agency.gov')
22 |       expect(site_filters[:included_sites][2].url_path).to be_nil
23 |     end
24 | 
25 |     it 'should make the resulting query available' do
26 |       expect(parsed_query.stripped_query).to eq('govt site stuff')
27 |     end
28 | 
29 |     it 'does not alter the original query' do #because here there be bugs
30 |       expect { described_class.new(query) }.not_to change { query }
31 |     end
32 |   end
33 | 
34 |   context 'one or more -site: params in query string' do
35 |     let(:site_params_parser) { described_class.new('(-site:excluded3.agency.gov/archives/2016 -site:excluded.agency.gov -Site:excluded2.agency.gov) govt site stuff') }
36 | 
37 |     it 'should extract an array of :excluded_sites as SiteFilters' do
38 |       site_filters = site_params_parser.site_filters
39 |       expect(site_filters[:excluded_sites].size).to eq(3)
40 |       expect(site_filters[:excluded_sites][0].domain_name).to eq('excluded3.agency.gov')
41 |       expect(site_filters[:excluded_sites][0].url_path).to eq('/archives/2016')
42 |       expect(site_filters[:excluded_sites][1].domain_name).to eq('excluded.agency.gov')
43 |       expect(site_filters[:excluded_sites][1].url_path).to be_nil
44 |       expect(site_filters[:excluded_sites][2].domain_name).to eq('excluded2.agency.gov')
45 |       expect(site_filters[:excluded_sites][2].url_path).to be_nil
46 |     end
47 | 
48 |     it 'should make the resulting query available' do
49 |       expect(site_params_parser.stripped_query).to eq('govt site stuff')
50 |     end
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/spec/config/initializers/filter_parameter_logging_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe 'ActiveSupport::ParameterFilter' do
 6 |   let(:config) { I14y::Application.config }
 7 |   let(:parameter_filter) { ActiveSupport::ParameterFilter.new(config.filter_parameters) }
 8 | 
 9 |   it 'filters query from logs' do
10 |     expect(config.filter_parameters.to_s).to match(/:query\b/)
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/spec/lib/serde_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'rails_helper'
  4 | 
  5 | describe Serde do
  6 |   describe '.serialize_hash' do
  7 |     subject(:serialize_hash) do
  8 |       described_class.serialize_hash(original_hash, 'en')
  9 |     end
 10 | 
 11 |     let(:original_hash) do
 12 |       ActiveSupport::HashWithIndifferentAccess.new(
 13 |         { 'title' => 'my title',
 14 |           'description' => 'my description',
 15 |           'content' => 'my content',
 16 |           'path' => 'http://www.foo.gov/bar.html',
 17 |           'promote' => false,
 18 |           'audience' => 'Everyone',
 19 |           'content_type' => 'EVENT',
 20 |           'tags' => 'this that',
 21 |           'searchgov_custom1' => 'this, Custom, CONTENT',
 22 |           'searchgov_custom2' => 'That custom, Content',
 23 |           'searchgov_custom3' => '123',
 24 |           'created' => '2018-01-01T12:00:00Z',
 25 |           'changed' => '2018-02-01T12:00:00Z',
 26 |           'created_at' => '2018-01-01T12:00:00Z',
 27 |           'updated_at' => '2018-02-01T12:00:00Z' }
 28 |       )
 29 |     end
 30 | 
 31 |     it 'stores the language fields with the language suffix' do
 32 |       expect(serialize_hash).to match(hash_including(
 33 |                                         { 'title_en' => 'my title',
 34 |                                           'description_en' => 'my description',
 35 |                                           'content_en' => 'my content' }
 36 |                                       ))
 37 |     end
 38 | 
 39 |     it 'stores downcased audience' do
 40 |       expect(serialize_hash).to match(hash_including(
 41 |                                         { 'audience' => 'everyone' }
 42 |                                       ))
 43 |     end
 44 | 
 45 |     it 'stores downcased content_type' do
 46 |       expect(serialize_hash).to match(hash_including(
 47 |                                         { 'content_type' => 'event' }
 48 |                                       ))
 49 |     end
 50 | 
 51 |     it 'stores tags as a downcased array' do
 52 |       expect(serialize_hash).to match(hash_including(
 53 |                                         { 'tags' => ['this that'] }
 54 |                                       ))
 55 |     end
 56 | 
 57 |     it 'stores searchgov_custom fields as downcased arrays' do
 58 |       expect(serialize_hash).to match(hash_including(
 59 |                                         { 'searchgov_custom1' => %w[this custom content],
 60 |                                           'searchgov_custom2' => ['that custom', 'content'],
 61 |                                           'searchgov_custom3' => ['123'] }
 62 |                                       ))
 63 |     end
 64 | 
 65 |     it 'updates the updated_at value' do
 66 |       expect(serialize_hash[:updated_at]).to be > 1.second.ago
 67 |     end
 68 | 
 69 |     context 'when language fields contain HTML/CSS' do
 70 |       let(:html) do
 71 |         <<~HTML
 72 |           <div style="height: 100px; width: 100px;"></div>
 73 |           <p>hello & goodbye!</p>
 74 |         HTML
 75 |       end
 76 | 
 77 |       let(:original_hash) do
 78 |         ActiveSupport::HashWithIndifferentAccess.new(
 79 |           title: '<b><a href="http://foo.com/">foo</a></b><img src="bar.jpg">',
 80 |           description: html,
 81 |           content: "this <b>is</b> <a href='http://gov.gov/url.html'>html</a>"
 82 |         )
 83 |       end
 84 | 
 85 |       it 'sanitizes the language fields' do
 86 |         expect(serialize_hash).to match(hash_including(
 87 |                                           title_en: 'foo',
 88 |                                           description_en: 'hello & goodbye!',
 89 |                                           content_en: 'this is html'
 90 |                                         ))
 91 |       end
 92 |     end
 93 | 
 94 |     context 'when the tags are a comma-delimited list' do
 95 |       let(:original_hash) do
 96 |         { tags: 'this, that' }
 97 |       end
 98 | 
 99 |       it 'converts the tags to an array' do
100 |         expect(serialize_hash).to match(hash_including(tags: %w[this that]))
101 |       end
102 |     end
103 |   end
104 | 
105 |   describe '.deserialize_hash' do
106 |     subject(:deserialize_hash) do
107 |       described_class.deserialize_hash(original_hash, :en)
108 |     end
109 | 
110 |     let(:original_hash) do
111 |       ActiveSupport::HashWithIndifferentAccess.new(
112 |         { 'created_at' => '2018-08-09T21:36:50.087Z',
113 |           'updated_at' => '2018-08-09T21:36:50.087Z',
114 |           'path' => 'http://www.foo.gov/bar.html',
115 |           'language' => 'en',
116 |           'created' => '2018-08-09T19:36:50.087Z',
117 |           'updated' => '2018-08-09T14:36:50.087-07:00',
118 |           'changed' => '2018-08-09T14:36:50.087-07:00',
119 |           'promote' => true,
120 |           'tags' => 'this that',
121 |           'title_en' => 'my title',
122 |           'description_en' => 'my description',
123 |           'content_en' => 'my content',
124 |           'basename' => 'bar',
125 |           'extension' => 'html',
126 |           'url_path' => '/bar.html',
127 |           'domain_name' => 'www.foo.gov' }
128 |       )
129 |     end
130 |     let(:language_field_keys) { %i[title description content] }
131 | 
132 |     it 'removes the language suffix from the text fields' do
133 |       expect(deserialize_hash).to eq(
134 |         { 'created_at' => '2018-08-09T21:36:50.087Z',
135 |           'updated_at' => '2018-08-09T21:36:50.087Z',
136 |           'path' => 'http://www.foo.gov/bar.html',
137 |           'language' => 'en',
138 |           'created' => '2018-08-09T19:36:50.087Z',
139 |           'title' => 'my title',
140 |           'description' => 'my description',
141 |           'content' => 'my content',
142 |           'updated' => '2018-08-09T14:36:50.087-07:00',
143 |           'changed' => '2018-08-09T14:36:50.087-07:00',
144 |           'promote' => true,
145 |           'tags' => 'this that' }
146 |       )
147 |     end
148 |   end
149 | 
150 |   describe '.uri_params_hash' do
151 |     subject(:result) { described_class.uri_params_hash(path) }
152 | 
153 |     let(:path) { 'https://www.agency.gov/directory/page1.html' }
154 | 
155 |     it 'computes basename' do
156 |       expect(result[:basename]).to eq('page1')
157 |     end
158 | 
159 |     it 'computes filename extension' do
160 |       expect(result[:extension]).to eq('html')
161 |     end
162 | 
163 |     context 'when the extension has uppercase characters' do
164 |       let(:path) { 'https://www.agency.gov/directory/PAGE1.PDF' }
165 | 
166 |       it 'computes a downcased version of filename extension' do
167 |         expect(result[:extension]).to eq('pdf')
168 |       end
169 |     end
170 | 
171 |     context 'when there is no filename extension' do
172 |       let(:path) { 'https://www.agency.gov/directory/page1' }
173 | 
174 |       it 'computes an empty filename extension' do
175 |         expect(result[:extension]).to eq('')
176 |       end
177 |     end
178 | 
179 |     it 'computes url_path' do
180 |       expect(result[:url_path]).to eq('/directory/page1.html')
181 |     end
182 | 
183 |     it 'computes domain_name' do
184 |       expect(result[:domain_name]).to eq('www.agency.gov')
185 |     end
186 |   end
187 | end
188 | 


--------------------------------------------------------------------------------
/spec/lib/validations/max_bytes_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | TWO_BYTE_CHARACTER = "\u00b5"
 6 | 
 7 | describe MaxBytes do
 8 |   subject(:validator) { described_class.new(attrs, options, required, scope.new) }
 9 | 
10 |   let(:attrs) { nil }
11 |   let(:options) { [10] }
12 |   let(:required) { false }
13 |   let(:scope) do
14 |     Struct.new(:opts) do
15 |       def full_name(name); end
16 |     end
17 |   end
18 | 
19 |   describe 'validate!' do
20 |     let(:params) { { some_param: value_to_validate } }
21 |     context 'when the value of the param being validated has fewer than tha maximum number of bytes' do
22 |       let(:value_to_validate) { TWO_BYTE_CHARACTER }
23 | 
24 |       it 'does not raise a validation exception' do
25 |         expect { validator.validate_param!(:some_param, params) }.to_not raise_error
26 |       end
27 |     end
28 | 
29 |     context 'when the value of the param being validated has exactly the maximum number of bytes' do
30 |       let(:value_to_validate) { TWO_BYTE_CHARACTER * 5 }
31 | 
32 |       it 'does not raise a validation exception' do
33 |         expect { validator.validate_param!(:some_param, params) }.to_not raise_error
34 |       end
35 |     end
36 | 
37 |     context 'when the value of the param being validated has more than tha maximum number of bytes' do
38 |       let(:value_to_validate) { TWO_BYTE_CHARACTER * 5 + 'z' }
39 | 
40 |       it 'raises a validation exception' do
41 |         expect { validator.validate_param!(:some_param, params) }.to raise_error(Grape::Exceptions::Validation, 'cannot be more than 10 bytes long')
42 |       end
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/models/collection_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe Collection do
 6 |   subject(:collection) { described_class.new(collection_params) }
 7 | 
 8 |   let(:id) { 'agency_blogs' }
 9 |   let(:token) { 'secret' }
10 |   let(:collection_params) do
11 |     {
12 |       id: id,
13 |       token: token
14 |     }
15 |   end
16 | 
17 |   it { is_expected.to be_valid }
18 | 
19 |   describe 'attributes' do
20 |     it do
21 |       is_expected.to have_attributes(
22 |         id: 'agency_blogs',
23 |         token: 'secret',
24 |         created_at: an_instance_of(Time),
25 |         updated_at: an_instance_of(Time)
26 |       )
27 |     end
28 |   end
29 | 
30 |   describe 'validations' do
31 |     it { is_expected.to validate_presence_of(:token) }
32 |   end
33 | 
34 |   describe '#last_document_sent' do
35 |     subject(:last_document_sent) { collection.last_document_sent }
36 | 
37 |     context 'when something goes wrong' do
38 |       before do
39 |         allow_any_instance_of(DocumentRepository).
40 |           to receive(:search).and_raise(StandardError)
41 |       end
42 | 
43 |       it { is_expected.to be nil }
44 |     end
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/spec/models/document_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'rails_helper'
  4 | 
  5 | describe Document do
  6 |   subject(:document) { described_class.new(valid_params) }
  7 | 
  8 |   let(:valid_params) do
  9 |     {
 10 |       id: 'a123',
 11 |       title: 'My Title',
 12 |       path: 'http://www.agency.gov/page1.html',
 13 |       audience: 'Everyone',
 14 |       changed: DateTime.new(2020, 1, 2),
 15 |       click_count: 5,
 16 |       content: 'some content',
 17 |       content_type: 'article',
 18 |       created: DateTime.new(2020, 1, 1),
 19 |       description: 'My Description',
 20 |       thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png',
 21 |       language: 'en',
 22 |       mime_type: 'text/html',
 23 |       promote: true,
 24 |       searchgov_custom1: 'custom content with spaces',
 25 |       searchgov_custom2: 'comma, separated, custom, content',
 26 |       searchgov_custom3: '',
 27 |       tags: 'this,that'
 28 |     }
 29 |   end
 30 | 
 31 |   describe 'attributes' do
 32 |     it do
 33 |       is_expected.to have_attributes(
 34 |         id: 'a123',
 35 |         title: 'My Title',
 36 |         path: 'http://www.agency.gov/page1.html',
 37 |         audience: 'Everyone',
 38 |         changed: DateTime.new(2020, 1, 2),
 39 |         click_count: 5,
 40 |         content: 'some content',
 41 |         content_type: 'article',
 42 |         created: DateTime.new(2020, 1, 1),
 43 |         description: 'My Description',
 44 |         thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png',
 45 |         language: 'en',
 46 |         mime_type: 'text/html',
 47 |         promote: true,
 48 |         searchgov_custom1: 'custom content with spaces',
 49 |         searchgov_custom2: 'comma, separated, custom, content',
 50 |         searchgov_custom3: '',
 51 |         tags: 'this,that'
 52 |       )
 53 |     end
 54 | 
 55 |     it 'sets default timestamps' do
 56 |       expect(document.created_at).to be_a Time
 57 |       expect(document.updated_at).to be_a Time
 58 |     end
 59 | 
 60 |     context 'with the minimum required params' do
 61 |       subject(:document) do
 62 |         described_class.new(
 63 |           language: 'en',
 64 |           path: 'https://foo.gov'
 65 |         )
 66 |       end
 67 | 
 68 |       it { is_expected.to be_valid }
 69 |     end
 70 |   end
 71 | 
 72 |   describe 'validations' do
 73 |     it { is_expected.to validate_presence_of(:path) }
 74 |     it { is_expected.to validate_presence_of(:language) }
 75 |     it { is_expected.to be_valid }
 76 | 
 77 |     context 'when the MIME type is invalid' do
 78 |       subject(:document) do
 79 |         described_class.new(valid_params.merge(mime_type: 'text/not_a_valid_mime_type'))
 80 |       end
 81 | 
 82 |       it { is_expected.to be_invalid }
 83 | 
 84 |       it 'generates an error message' do
 85 |         document.valid?
 86 |         expect(document.errors.messages[:mime_type]).to include 'is invalid'
 87 |       end
 88 |     end
 89 | 
 90 |     context 'when the thumbnail_url is invalid' do
 91 |       subject(:document) do
 92 |         described_class.new(valid_params.merge(thumbnail_url: 'invalid thumbnail url'))
 93 |       end
 94 | 
 95 |       it { is_expected.to be_invalid }
 96 | 
 97 |       it 'generates an error message' do
 98 |         document.valid?
 99 |         expect(document.errors.messages[:thumbnail_url]).to include 'is invalid'
100 |       end
101 |     end
102 |   end
103 | end
104 | 


--------------------------------------------------------------------------------
/spec/rails_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # This file is copied to spec/ when you run 'rails generate rspec:install'
 4 | ENV['RAILS_ENV'] ||= 'test'
 5 | require 'spec_helper'
 6 | require File.expand_path('../../config/environment', __FILE__)
 7 | require 'rspec/rails'
 8 | require 'test_services'
 9 | 
10 | # Requires supporting ruby files with custom matchers and macros, etc, in
11 | # spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are
12 | # run as spec files by default. This means that files in spec/support that end
13 | # in _spec.rb will both be required and run as specs, causing the specs to be
14 | # run twice. It is recommended that you do not name files matching this glob to
15 | # end with _spec.rb. You can configure this pattern with the --pattern
16 | # option on the command line or in ~/.rspec, .rspec or `.rspec-local`.
17 | Dir[Rails.root.join('spec/support/**/*.rb')].each { |f| require f }
18 | 
19 | RSpec.configure do |config|
20 |   # RSpec Rails can automatically mix in different behaviours to your tests
21 |   # based on their file location, for example enabling you to call `get` and
22 |   # `post` in specs under `spec/controllers`.
23 |   #
24 |   # You can disable this behaviour by removing the line below, and instead
25 |   # explicitly tag your specs with their type, e.g.:
26 |   #
27 |   #     RSpec.describe UsersController, :type => :controller do
28 |   #       # ...
29 |   #     end
30 |   #
31 |   # The different available types are documented in the features, such as in
32 |   # https://relishapp.com/rspec/rspec-rails/docs
33 |   config.include DocumentCrud
34 |   config.include TestServices
35 | 
36 |   config.infer_spec_type_from_file_location!
37 | 
38 |   config.before(:suite) do
39 |     TestServices::delete_es_indexes
40 |     TestServices::create_collections_index
41 |   end
42 | 
43 |   config.after(:suite) do
44 |     TestServices::delete_es_indexes
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/spec/repositories/collection_repository_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe CollectionRepository do
 6 |   subject(:repository) { described_class.new }
 7 | 
 8 |   it_behaves_like 'a repository'
 9 | 
10 |   describe '.klass' do
11 |     subject(:klass) { described_class.klass }
12 | 
13 |     it { is_expected.to eq(Collection) }
14 |   end
15 | 
16 |   it 'uses the collections index namespace' do
17 |     expect(repository.index_name).to eq('test-i14y-collections')
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/spec/repositories/document_repository_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rails_helper'
 4 | 
 5 | describe DocumentRepository do
 6 |   subject(:repository) { described_class.new }
 7 | 
 8 |   it_behaves_like 'a repository'
 9 | 
10 |   describe '.klass' do
11 |     subject(:klass) { described_class.klass }
12 | 
13 |     it { is_expected.to eq(Document) }
14 |   end
15 | 
16 |   describe '.index_namespace' do
17 |     subject(:index_namespace) { described_class.index_namespace('agency_blogs') }
18 | 
19 |     it 'returns the ES index namespace for the specified collection handle' do
20 |       expect(index_namespace).to eq 'test-i14y-documents-agency_blogs'
21 |     end
22 |   end
23 | 
24 |   describe '.serialize' do
25 |     subject(:serialize) { repository.serialize(document) }
26 | 
27 |     let(:document) do
28 |       Document.new(
29 |         language: 'en',
30 |         path: 'http://www.agency.gov/page1.html'
31 |       )
32 |     end
33 | 
34 |     it 'serializes the document' do
35 |       expect(serialize).to match(hash_including(
36 |         language: 'en',
37 |         path: 'http://www.agency.gov/page1.html'
38 |       ))
39 |     end
40 |   end
41 | 
42 |   describe 'deserialization' do
43 |     context 'when a document has been persisted' do
44 |       let(:document_params) do
45 |         {
46 |           id: 'a123',
47 |           language: 'en',
48 |           path: 'http://www.agency.gov/page1.html',
49 |           title: 'My Title',
50 |           created: DateTime.new(2020, 1, 1),
51 |           changed: DateTime.new(2020, 1, 2),
52 |           description: 'My Description',
53 |           content: 'some content',
54 |           promote: true,
55 |           tags: 'this,that',
56 |           click_count: 5
57 |         }
58 |       end
59 | 
60 |       before do
61 |         create_document(document_params, repository)
62 |       end
63 | 
64 |       it 'deserializes the document' do
65 |         document = repository.find('a123')
66 |         expect(document.id).to eq('a123')
67 |         expect(document.language).to eq('en')
68 |         expect(document.path).to eq('http://www.agency.gov/page1.html')
69 |         expect(document.title).to eq('My Title')
70 |         expect(document.description).to eq('My Description')
71 |         expect(document.content).to eq('some content')
72 |         expect(document.promote).to eq(true)
73 |         expect(document.tags).to eq(%w[this that])
74 |         expect(document.click_count).to eq(5)
75 |       end
76 |     end
77 |   end
78 | end
79 | 


--------------------------------------------------------------------------------
/spec/requests/api/v1/collections_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'rails_helper'
  4 | 
  5 | describe Api::V1::Collections do
  6 |   let(:valid_session) do
  7 |     credentials = ActionController::HttpAuthentication::Basic.encode_credentials(
  8 |       ENV['I14Y_ADMIN_USER'], ENV['I14Y_ADMIN_PASSWORD']
  9 |     )
 10 |     { 'HTTP_AUTHORIZATION' => credentials }
 11 |   end
 12 |   let(:valid_params) do
 13 |     { handle: 'agency_blogs', token: 'secret' }
 14 |   end
 15 |   let(:allow_updates) { true }
 16 |   let(:maintenance_message) { nil }
 17 |   let(:documents_index_name) { DocumentRepository.index_namespace('agency_blogs') }
 18 |   let(:document_repository) do
 19 |     DocumentRepository.new(index_name: documents_index_name)
 20 |   end
 21 | 
 22 |   before do
 23 |     I14y::Application.config.updates_allowed = allow_updates
 24 |     I14y::Application.config.maintenance_message = maintenance_message
 25 |   end
 26 | 
 27 |   after do
 28 |     I14y::Application.config.updates_allowed = true
 29 |   end
 30 | 
 31 |   describe 'POST /api/v1/collections' do
 32 |     context 'when successful' do
 33 |       before do
 34 |         clear_index(collections_index_name)
 35 |         post '/api/v1/collections', params: valid_params, headers: valid_session
 36 |       end
 37 | 
 38 |       it 'returns success message as JSON' do
 39 |         expect(response).to have_http_status(:created)
 40 |         expect(JSON.parse(response.body)).to match(
 41 |           hash_including('status' => 200,
 42 |                          'developer_message' => 'OK',
 43 |                          'user_message' => 'Your collection was successfully created.')
 44 |         )
 45 |       end
 46 | 
 47 |       it 'uses the collection handle as the Elasticsearch ID' do
 48 |         expect(ES.collection_repository.find('agency_blogs')).to be_present
 49 |       end
 50 | 
 51 |       it 'stores the appropriate fields in the Elasticsearch collection' do
 52 |         collection = ES.collection_repository.find('agency_blogs')
 53 |         expect(collection.token).to eq('secret')
 54 |         expect(collection.created_at).to be_an_instance_of(Time)
 55 |         expect(collection.updated_at).to be_an_instance_of(Time)
 56 |       end
 57 | 
 58 |       it_behaves_like 'a data modifying request made during read-only mode'
 59 |     end
 60 | 
 61 |     context 'when a required parameter is empty/blank' do
 62 |       before do
 63 |         invalid_params = {}
 64 |         post '/api/v1/collections', params: invalid_params, headers: valid_session
 65 |       end
 66 | 
 67 |       it 'returns failure message as JSON' do
 68 |         expect(response).to have_http_status(:bad_request)
 69 |         expect(JSON.parse(response.body)).to match(
 70 |           hash_including('status' => 400,
 71 |                          'developer_message' => 'handle is missing, handle is empty, token is missing, token is empty')
 72 |         )
 73 |       end
 74 |     end
 75 | 
 76 |     context 'when handle uses illegal characters' do
 77 |       let(:invalid_params) do
 78 |         { handle: 'agency-blogs', token: 'secret' }
 79 |       end
 80 | 
 81 |       before do
 82 |         post '/api/v1/collections', params: invalid_params, headers: valid_session
 83 |       end
 84 | 
 85 |       it 'returns failure message as JSON' do
 86 |         expect(response).to have_http_status(:bad_request)
 87 |         expect(JSON.parse(response.body)).to match(
 88 |           hash_including('status' => 400,
 89 |                          'developer_message' => 'handle is invalid')
 90 |         )
 91 |       end
 92 |     end
 93 | 
 94 |     context 'when authentication/authorization fails' do
 95 |       before do
 96 |         bad_credentials = ActionController::HttpAuthentication::Basic.encode_credentials 'nope', 'wrong'
 97 | 
 98 |         valid_session = { 'HTTP_AUTHORIZATION' => bad_credentials }
 99 |         post '/api/v1/collections', params: valid_params, headers: valid_session
100 |       end
101 | 
102 |       it 'returns error message as JSON' do
103 |         expect(response).to have_http_status(:bad_request)
104 |         expect(JSON.parse(response.body)).to match(
105 |           hash_including('status' => 400,
106 |                          'developer_message' => 'Unauthorized')
107 |         )
108 |       end
109 |     end
110 | 
111 |     context 'when something terrible happens' do
112 |       before do
113 |         allow(Collection).to receive(:new) { raise_error(Exception) }
114 |         post '/api/v1/collections', params: valid_params, headers: valid_session
115 |       end
116 | 
117 |       it 'returns failure message as JSON' do
118 |         expect(response).to have_http_status(:internal_server_error)
119 |         expect(JSON.parse(response.body)).to match(
120 |           hash_including('status' => 500,
121 |                          'developer_message' => "Something unexpected happened and we've been alerted.")
122 |         )
123 |       end
124 |     end
125 |   end
126 | 
127 |   describe 'DELETE /api/v1/collections/{handle}' do
128 |     context 'when successful' do
129 |       before do
130 |         clear_index(collections_index_name)
131 |         collection = Collection.new(id: 'agency_blogs', token: 'secret')
132 |         ES.collection_repository.save(collection)
133 |         delete '/api/v1/collections/agency_blogs', headers: valid_session
134 |       end
135 | 
136 |       it 'returns success message as JSON' do
137 |         expect(response).to have_http_status(:ok)
138 |         expect(JSON.parse(response.body)).to match(
139 |           hash_including('status' => 200,
140 |                          'developer_message' => 'OK',
141 |                          'user_message' => 'Your collection was successfully deleted.')
142 |         )
143 |       end
144 | 
145 |       it 'deletes the collection' do
146 |         expect(ES.collection_repository).not_to exist('agency_blogs')
147 |       end
148 | 
149 |       it_behaves_like 'a data modifying request made during read-only mode'
150 |     end
151 |   end
152 | 
153 |   describe 'GET /api/v1/collections/{handle}' do
154 |     context 'when successful' do
155 |       before do
156 |         clear_index(collections_index_name)
157 |         post '/api/v1/collections', params: valid_params, headers: valid_session
158 |         clear_index(documents_index_name)
159 |       end
160 | 
161 |       let(:datetime) { DateTime.now.utc }
162 |       let(:hash1) do
163 |         {
164 |           _id: 'a1',
165 |           language: 'en',
166 |           title: 'title 1 common content',
167 |           description: 'description 1 common content',
168 |           created: Time.zone.now,
169 |           path: 'http://www.agency.gov/page1.html'
170 |         }
171 |       end
172 |       let(:hash2) do
173 |         {
174 |           _id: 'a2',
175 |           language: 'en',
176 |           title: 'title 2 common content',
177 |           description: 'description 2 common content',
178 |           created: Time.zone.now,
179 |           path: 'http://www.agency.gov/page2.html'
180 |         }
181 |       end
182 | 
183 |       it 'returns success message with Collection stats as JSON' do
184 |         document_repository.save(Document.new(hash1))
185 |         document_repository.save(Document.new(hash2))
186 |         document_repository.refresh_index!
187 |         get '/api/v1/collections/agency_blogs', headers: valid_session
188 |         expect(response).to have_http_status(:ok)
189 |         expect(JSON.parse(response.body)).to match(
190 |           hash_including('status' => 200,
191 |                          'developer_message' => 'OK',
192 |                          'collection' => { 'document_total' => 2,
193 |                                            'last_document_sent' => an_instance_of(String),
194 |                                            'token' => 'secret',
195 |                                            'id' => 'agency_blogs',
196 |                                            'created_at' => an_instance_of(String),
197 |                                            'updated_at' => an_instance_of(String) })
198 |         )
199 |       end
200 |     end
201 |   end
202 | 
203 |   describe 'GET /api/v1/collections/search' do
204 |     before do
205 |       clear_index(collections_index_name)
206 |       post '/api/v1/collections', params: valid_params, headers: valid_session
207 |       clear_index(documents_index_name)
208 |     end
209 | 
210 |     context 'when valid search parameters are provided' do
211 |       let(:valid_search_params) do
212 |         {
213 |           audience: 'everyone',
214 |           content_type: 'article',
215 |           handles: 'agency_blogs',
216 |           ignore_tags: 'ignored',
217 |           include: 'title,description',
218 |           language: 'en',
219 |           max_timestamp: '2013-02-27T10:01:00Z',
220 |           mime_type: 'text/html',
221 |           min_timestamp: '2013-02-27T10:00:00Z',
222 |           offset: 2**32,
223 |           query: 'common content',
224 |           searchgov_custom1: 'custom, content',
225 |           searchgov_custom2: 'content with spaces',
226 |           searchgov_custom3: '123, content, 456',
227 |           size: 3,
228 |           sort_by_date: 1,
229 |           tags: 'Foo, Bar blat'
230 |         }
231 |       end
232 | 
233 |       before do
234 |         allow(DocumentSearch).to receive(:new)
235 |         get '/api/v1/collections/search', params: valid_search_params, headers: valid_session
236 |       end
237 | 
238 |       it 'symbolizes language' do
239 |         expect(DocumentSearch).to have_received(:new).with(hash_including(language: Symbol))
240 |       end
241 | 
242 |       it 'sends the query as a string' do
243 |         expect(DocumentSearch).to have_received(:new).with(hash_including(query: String))
244 |       end
245 | 
246 |       it 'arrayifies audience, content_type, handles, ignore_tags, include, mime_type, searchgov_customs, and tags' do
247 |         expect(DocumentSearch).to have_received(:new).with(hash_including(audience: Array,
248 |                                                                           content_type: Array,
249 |                                                                           handles: Array,
250 |                                                                           ignore_tags: Array,
251 |                                                                           include: Array,
252 |                                                                           mime_type: Array,
253 |                                                                           searchgov_custom1: Array,
254 |                                                                           searchgov_custom2: Array,
255 |                                                                           searchgov_custom3: Array,
256 |                                                                           tags: Array))
257 |       end
258 | 
259 |       it 'sends offset and size as an integers' do
260 |         expect(DocumentSearch).to have_received(:new).with(hash_including(offset: Integer,
261 |                                                                           size: Integer))
262 |       end
263 | 
264 |       it 'sends the sort_by_date as a boolean' do
265 |         expect(DocumentSearch).to have_received(:new).with(hash_including(sort_by_date: TrueClass))
266 |       end
267 | 
268 |       it 'sends min_timestamp and max_timestamp as DateTime' do
269 |         expect(DocumentSearch).to have_received(:new).with(hash_including(min_timestamp: DateTime,
270 |                                                                           max_timestamp: DateTime))
271 |       end
272 |     end
273 | 
274 |     context 'when results exist' do
275 |       before do
276 |         document_repository.save(Document.new(hash1))
277 |         document_repository.save(Document.new(hash2))
278 |         document_repository.refresh_index!
279 |         valid_params = { language: 'en', query: 'common contentx', handles: 'agency_blogs' }
280 |         get '/api/v1/collections/search', params: valid_params, headers: valid_session
281 |       end
282 | 
283 |       let(:datetime) { DateTime.now.utc.to_s }
284 |       let(:hash1) do
285 |         { _id: 'a1',
286 |           language: 'en',
287 |           title: 'title 1 common content',
288 |           description: 'description 1 common content',
289 |           content: 'content 1 common content',
290 |           created: datetime,
291 |           thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png',
292 |           path: 'http://www.agency.gov/page1.html',
293 |           promote: true,
294 |           updated: datetime,
295 |           updated_at: datetime }
296 |       end
297 |       let(:hash2) do
298 |         { _id: 'a2',
299 |           language: 'en',
300 |           title: 'title 2 common content',
301 |           description: 'description 2 common content',
302 |           content: 'other unrelated stuff',
303 |           thumbnail_url: 'https://search.gov/assets/img/logos/search.png',
304 |           created: datetime,
305 |           path: 'http://www.agency.gov/page2.html',
306 |           promote: false,
307 |           tags: 'tag1, tag2',
308 |           updated_at: datetime }
309 |       end
310 | 
311 |       describe 'status' do
312 |         subject { response.status }
313 | 
314 |         it { is_expected.to eq(200) }
315 |       end
316 | 
317 |       describe 'body' do
318 |         let(:body) { JSON.parse(response.body) }
319 |         let(:result1) do
320 |           {
321 |             'language' => 'en',
322 |             'created' => datetime,
323 |             'path' => 'http://www.agency.gov/page1.html',
324 |             'title' => 'title 1 common content',
325 |             'description' => 'description 1 common content',
326 |             'content' => 'content 1 common content',
327 |             'thumbnail_url' => 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png',
328 |             'changed' => datetime
329 |           }
330 |         end
331 |         let(:result2) do
332 |           {
333 |             'language' => 'en',
334 |             'created' => datetime,
335 |             'path' => 'http://www.agency.gov/page2.html',
336 |             'title' => 'title 2 common content',
337 |             'description' => 'description 2 common content',
338 |             'thumbnail_url' => 'https://search.gov/assets/img/logos/search.png',
339 |             'changed' => datetime
340 |           }
341 |         end
342 | 
343 |         it 'returns highlighted JSON search results' do
344 |           expect(body).to match(hash_including('results' => [result1, result2]))
345 |         end
346 | 
347 |         describe 'metadata' do
348 |           let(:metadata) { body['metadata'] }
349 |           let(:suggestion_hash) do
350 |             { 'text' => 'common content',
351 |               'highlighted' => 'common content' }
352 |           end
353 | 
354 |           it 'returns highlighted JSON suggestion' do
355 |             expect(metadata['suggestion']).to match(hash_including(suggestion_hash))
356 |           end
357 | 
358 |           it 'returns a non-zero results total' do
359 |             expect(metadata['total']).to be > 0
360 |           end
361 | 
362 |           it 'returns aggregations' do
363 |             expect(metadata['aggregations']).not_to be_empty
364 |           end
365 |         end
366 |       end
367 |     end
368 | 
369 |     context 'when no results exist' do
370 |       before do
371 |         valid_params = { language: 'en', query: 'no hits', handles: 'agency_blogs' }
372 |         get '/api/v1/collections/search', params: valid_params, headers: valid_session
373 |       end
374 | 
375 |       describe 'status' do
376 |         subject { response.status }
377 | 
378 |         it { is_expected.to eq(200) }
379 |       end
380 | 
381 |       describe 'body' do
382 |         let(:body) { JSON.parse(response.body) }
383 | 
384 |         it 'returns an empty results array' do
385 |           expect(body).to match(hash_including('results' => []))
386 |         end
387 | 
388 |         describe 'metadata' do
389 |           let(:metadata) { body['metadata'] }
390 | 
391 |           it 'returns zero results total' do
392 |             expect(metadata['total']).to eq(0)
393 |           end
394 | 
395 |           it 'returns nil suggestion' do
396 |             expect(metadata['suggestion']).to be_nil
397 |           end
398 | 
399 |           it 'returns empty aggregations' do
400 |             expect(metadata['aggregations']).to be_empty
401 |           end
402 |         end
403 |       end
404 |     end
405 | 
406 |     context 'when missing required params' do
407 |       before do
408 |         invalid_params = {}
409 |         get '/api/v1/collections/search', params: invalid_params, headers: valid_session
410 |       end
411 | 
412 |       it 'returns error message as JSON' do
413 |         expect(response).to have_http_status(:bad_request)
414 |         expect(JSON.parse(response.body)).to match(
415 |           hash_including('status' => 400,
416 |                          'developer_message' => 'handles is missing, handles is empty')
417 |         )
418 |       end
419 |     end
420 | 
421 |     context 'when searching across one or more collection handles that do not exist' do
422 |       let(:bad_handle_params) do
423 |         { language: 'en', query: 'foo', handles: 'agency_blogs,missing' }
424 |       end
425 | 
426 |       before do
427 |         clear_index(collections_index_name)
428 |         collection = Collection.new(id: 'agency_blogs', token: 'secret')
429 |         ES.collection_repository.save(collection)
430 |         get '/api/v1/collections/search', params: bad_handle_params, headers: valid_session
431 |       end
432 | 
433 |       it 'returns error message as JSON' do
434 |         expect(response).to have_http_status(:bad_request)
435 |         expect(JSON.parse(response.body)).to match(
436 |           hash_including('error' => 'Could not find all the specified collection handles')
437 |         )
438 |       end
439 |     end
440 |   end
441 | end
442 | 


--------------------------------------------------------------------------------
/spec/requests/api/v1/documents_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'rails_helper'
  4 | require 'uri'
  5 | 
  6 | describe Api::V1::Documents do
  7 |   let(:id) { 'some really!weird@id.name' }
  8 |   let(:credentials) do
  9 |     ActionController::HttpAuthentication::Basic.encode_credentials('test_index',
 10 |                                                                    'test_key')
 11 |   end
 12 |   let(:valid_session) do
 13 |     { HTTP_AUTHORIZATION: credentials }
 14 |   end
 15 |   let(:allow_updates) { true }
 16 |   let(:maintenance_message) { nil }
 17 |   let(:documents_index_name) { DocumentRepository.index_namespace('test_index') }
 18 |   let(:document_repository) { DocumentRepository.new(index_name: documents_index_name) }
 19 | 
 20 |   before(:all) do
 21 |     credentials = ActionController::HttpAuthentication::Basic.encode_credentials ENV['I14Y_ADMIN_USER'], ENV['I14Y_ADMIN_PASSWORD']
 22 |     valid_collection_session = { HTTP_AUTHORIZATION: credentials }
 23 |     valid_collection_params = { handle: 'test_index', token: 'test_key' }
 24 |     post '/api/v1/collections', params: valid_collection_params, headers: valid_collection_session
 25 |   end
 26 | 
 27 |   before do
 28 |     I14y::Application.config.updates_allowed = allow_updates
 29 |     I14y::Application.config.maintenance_message = maintenance_message
 30 |   end
 31 | 
 32 |   after do
 33 |     I14y::Application.config.updates_allowed = true
 34 |     clear_index(documents_index_name)
 35 |   end
 36 | 
 37 |   describe 'POST /api/v1/documents' do
 38 |     subject(:post_document) do
 39 |       post '/api/v1/documents', params: document_params, headers: valid_session
 40 |       document_repository.refresh_index!
 41 |     end
 42 | 
 43 |     let(:valid_params) do
 44 |       {
 45 |         document_id: id,
 46 |         title: 'my title',
 47 |         path: 'http://www.gov.gov/goo.html',
 48 |         audience: 'Everyone',
 49 |         content: 'my content',
 50 |         content_type: 'article',
 51 |         description: 'my desc',
 52 |         thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png',
 53 |         language: 'hy',
 54 |         mime_type: 'text/html',
 55 |         promote: true,
 56 |         searchgov_custom1: 'custom content with spaces',
 57 |         searchgov_custom2: 'comma, separated, custom, content',
 58 |         searchgov_custom3: 123,
 59 |         tags: 'Foo, Bar blat'
 60 |       }
 61 |     end
 62 |     let(:document_params) { valid_params }
 63 | 
 64 |     context 'when successful' do
 65 |       before do
 66 |         post_document
 67 |       end
 68 | 
 69 |       it 'returns success message as JSON' do
 70 |         expect(response).to have_http_status(:created)
 71 |         expect(response.parsed_body).
 72 |           to match(hash_including('status' => 200,
 73 |                                   'developer_message' => 'OK',
 74 |                                   'user_message' => 'Your document was successfully created.'))
 75 |       end
 76 | 
 77 |       it 'uses the collection handle and the document_id in the Elasticsearch ID' do
 78 |         expect(document_repository.find(id)).to be_present
 79 |       end
 80 | 
 81 |       it 'stores the appropriate fields in the Elasticsearch document' do
 82 |         document = document_repository.find(id)
 83 |         expect(document.title).to eq('my title')
 84 |         expect(document.path).to eq('http://www.gov.gov/goo.html')
 85 |         expect(document.audience).to eq('everyone')
 86 |         expect(document.content).to eq('my content')
 87 |         expect(document.content_type).to eq('article')
 88 |         expect(document.created_at).to be_an_instance_of(Time)
 89 |         expect(document.description).to eq('my desc')
 90 |         expect(document.thumbnail_url).to eq('https://18f.gsa.gov/assets/img/logos/18F-Logo-M.png')
 91 |         expect(document.language).to eq('hy')
 92 |         expect(document.mime_type).to eq('text/html')
 93 |         expect(document.promote).to be_truthy
 94 |         expect(document.searchgov_custom1).to eq(['custom content with spaces'])
 95 |         expect(document.searchgov_custom2).to eq(%w[comma separated custom content])
 96 |         expect(document.searchgov_custom3).to eq(['123'])
 97 |         expect(document.tags).to contain_exactly('bar blat', 'foo')
 98 |         expect(document.updated_at).to be_an_instance_of(Time)
 99 |       end
100 | 
101 |       context 'when a "created" value is provided but not "changed"' do
102 |         let(:valid_params) do
103 |           { document_id: id,
104 |             title: 'my title',
105 |             path: 'http://www.gov.gov/goo.html',
106 |             description: 'my desc',
107 |             language: 'hy',
108 |             content: 'my content',
109 |             created: '2020-01-01T10:00:00Z' }
110 |         end
111 | 
112 |         it 'sets "changed" to be the same as "created"' do
113 |           document = document_repository.find(id)
114 |           expect(document.changed).to eq '2020-01-01T10:00:00Z'
115 |         end
116 |       end
117 | 
118 |       it_behaves_like 'a data modifying request made during read-only mode'
119 |     end
120 | 
121 |     context 'when attepmting to create an existing document' do
122 |       let(:document_params) { valid_params.merge(document_id: 'its_a_dupe') }
123 | 
124 |       before do
125 |         create_document(valid_params.merge(id: 'its_a_dupe'), document_repository)
126 |         post_document
127 |       end
128 | 
129 |       it 'returns failure message as JSON' do
130 |         expect(response).to have_http_status(:unprocessable_entity)
131 |         expect(response.parsed_body).
132 |           to match(hash_including('status' => 422,
133 |                                   'developer_message' => 'Document already exists with that ID'))
134 |       end
135 |     end
136 | 
137 |     context 'when language param is invalid' do
138 |       let(:document_params) { valid_params.merge(language: 'qq') }
139 | 
140 |       before { post_document }
141 | 
142 |       it 'returns failure message as JSON' do
143 |         expect(response).to have_http_status(:bad_request)
144 |         expect(response.parsed_body).
145 |           to match(hash_including('status' => 400,
146 |                                   'developer_message' => 'language does not have a valid value'))
147 |       end
148 |     end
149 | 
150 |     context 'when id contains a slash' do
151 |       let(:document_params) { valid_params.merge(document_id: 'a1/234') }
152 | 
153 |       before { post_document }
154 | 
155 |       it 'returns failure message as JSON' do
156 |         expect(response).to have_http_status(:bad_request)
157 |         expect(response.parsed_body).
158 |           to match(hash_including('status' => 400,
159 |                                   'developer_message' => "document_id cannot contain any of the following characters: ['/']"))
160 |       end
161 |     end
162 | 
163 |     context 'when an id is larger than 512 bytes' do
164 |       let(:string_with_513_bytes_but_only_257_characters) do
165 |         two_byte_character = '\u00b5'
166 |         "x#{two_byte_character * 256}"
167 |       end
168 | 
169 |       let(:document_params) do
170 |         valid_params.merge(document_id: string_with_513_bytes_but_only_257_characters)
171 |       end
172 | 
173 |       before { post_document }
174 | 
175 |       it 'returns failure message as JSON' do
176 |         expect(response).to have_http_status(:bad_request)
177 |         expect(response.parsed_body).
178 |           to match(hash_including('status' => 400,
179 |                                   'developer_message' => 'document_id cannot be more than 512 bytes long'))
180 |       end
181 |     end
182 | 
183 |     context 'when a language param is missing' do
184 |       let(:document_params) { valid_params.except(:language) }
185 | 
186 |       before { post_document }
187 | 
188 |       it 'uses English (en) as default' do
189 |         expect(document_repository.find(id).language).to eq('en')
190 |       end
191 |     end
192 | 
193 |     context 'when a required parameter is empty/blank' do
194 |       let(:document_params) { valid_params.merge(title: ' ') }
195 | 
196 |       before { post_document }
197 | 
198 |       it 'returns failure message as JSON' do
199 |         expect(response).to have_http_status(:bad_request)
200 |         expect(response.parsed_body).
201 |           to match(hash_including('status' => 400,
202 |                                   'developer_message' => 'title is empty'))
203 |       end
204 |     end
205 | 
206 |     context 'when the path URL is poorly formatted' do
207 |       let(:document_params) { valid_params.merge(path: 'http://www.gov.gov/ goo.html') }
208 | 
209 |       before { post_document }
210 | 
211 |       it 'returns failure message as JSON' do
212 |         expect(response).to have_http_status(:bad_request)
213 |         expect(response.parsed_body).
214 |           to match(hash_including('status' => 400,
215 |                                   'developer_message' => 'path is invalid'))
216 |       end
217 |     end
218 | 
219 |     context 'when authentication/authorization fails' do
220 |       let(:credentials) do
221 |         ActionController::HttpAuthentication::Basic.encode_credentials('test_index',
222 |                                                                        'bad_key')
223 |       end
224 | 
225 |       before { post_document }
226 | 
227 |       it 'returns error message as JSON' do
228 |         expect(response).to have_http_status(:bad_request)
229 |         expect(response.parsed_body).
230 |           to match(hash_including('status' => 400,
231 |                                   'developer_message' => 'Unauthorized'))
232 |       end
233 |     end
234 | 
235 |     context 'when something terrible happens during authentication' do
236 |       before do
237 |         allow(ES).to receive(:collection_repository).
238 |           and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest)
239 |         post_document
240 |       end
241 | 
242 |       it 'returns error message as JSON' do
243 |         expect(response).to have_http_status(:bad_request)
244 |         expect(response.parsed_body).
245 |           to match(hash_including('status' => 400,
246 |                                   'developer_message' => 'Unauthorized'))
247 |       end
248 |     end
249 | 
250 |     context 'when something terrible happens creating the document' do
251 |       before do
252 |         allow(Document).to receive(:new) { raise_error(Exception) }
253 |         post_document
254 |       end
255 | 
256 |       it 'returns failure message as JSON' do
257 |         expect(response).to have_http_status(:internal_server_error)
258 |         expect(response.parsed_body).
259 |           to match(hash_including('status' => 500,
260 |                                   'developer_message' => "Something unexpected happened and we've been alerted."))
261 |       end
262 |     end
263 | 
264 |     context 'with invalid MIME type param' do
265 |       let(:document_params) { valid_params.merge(mime_type: 'not_a_valid/mime_type') }
266 | 
267 |       before { post_document }
268 | 
269 |       it 'returns failure message as JSON' do
270 |         expect(response).to have_http_status(:bad_request)
271 |         expect(response.parsed_body).
272 |           to match(hash_including('status' => 400,
273 |                                   'developer_message' => 'Mime type is invalid'))
274 |       end
275 |     end
276 |   end
277 | 
278 |   describe 'PUT /api/v1/documents/{document_id}' do
279 |     subject(:put_document) do
280 |       put "/api/v1/documents/#{CGI.escape(id)}",
281 |           params: update_params,
282 |           headers: valid_session
283 |       document_repository.refresh_index!
284 |     end
285 | 
286 |     let(:update_params) do
287 |       {
288 |         changed: '2016-01-01T10:00:01Z',
289 |         click_count: 1000,
290 |         content_type: 'website',
291 |         content: 'new content',
292 |         description: 'new desc',
293 |         mime_type: 'text/plain',
294 |         path: 'http://www.next.gov/updated.html',
295 |         promote: false,
296 |         searchgov_custom1: 'custom content with spaces',
297 |         searchgov_custom2: 'new, comma, separated, custom, content',
298 |         tags: 'new category',
299 |         thumbnail_url: 'https://18f.gsa.gov/assets/img/logos/new/18F-Logo-M.png',
300 |         title: 'new title'
301 |       }
302 |     end
303 | 
304 |     context 'when successful' do
305 |       before do
306 |         create_document({ audience: 'Everyone',
307 |                           content: 'huge content 4',
308 |                           created: 2.hours.ago,
309 |                           description: 'bigger desc 4',
310 |                           language: 'en',
311 |                           path: 'http://www.gov.gov/url4.html',
312 |                           promote: true,
313 |                           searchgov_custom2: 'comma, separated, custom, content',
314 |                           searchgov_custom3: 123,
315 |                           title: 'hi there 4',
316 |                           updated: Time.zone.now,
317 |                           id: id },
318 |                         document_repository)
319 | 
320 |         put_document
321 |       end
322 | 
323 |       it 'returns success message as JSON' do
324 |         expect(response).to have_http_status(:ok)
325 |         expect(response.parsed_body).
326 |           to match(hash_including('status' => 200,
327 |                                   'developer_message' => 'OK',
328 |                                   'user_message' => 'Your document was successfully updated.'))
329 |       end
330 | 
331 |       it 'updates the document' do
332 |         document = document_repository.find(id)
333 |         expect(document.changed).to eq('2016-01-01T10:00:01Z')
334 |         expect(document.click_count).to eq(1000)
335 |         expect(document.content_type).to eq('website')
336 |         expect(document.content).to eq('new content')
337 |         expect(document.description).to eq('new desc')
338 |         expect(document.mime_type).to eq('text/plain')
339 |         expect(document.path).to eq('http://www.next.gov/updated.html')
340 |         expect(document.promote).to be_falsey
341 |         expect(document.searchgov_custom1).to contain_exactly('custom content with spaces')
342 |         expect(document.searchgov_custom2).to contain_exactly('new', 'comma', 'separated', 'custom', 'content')
343 |         expect(document.tags).to contain_exactly('new category')
344 |         expect(document.thumbnail_url).to eq('https://18f.gsa.gov/assets/img/logos/new/18F-Logo-M.png')
345 |         expect(document.title).to eq('new title')
346 |       end
347 | 
348 |       it 'persists unchanged attributes' do
349 |         document = document_repository.find(id)
350 |         expect(document.audience).to eq('everyone')
351 |         expect(document.language).to eq('en')
352 |         expect(document.searchgov_custom3).to contain_exactly('123')
353 |       end
354 | 
355 |       it_behaves_like 'a data modifying request made during read-only mode'
356 |     end
357 | 
358 |     context 'when time has passed since the document was created' do
359 |       before do
360 |         create_document({
361 |                           id: id,
362 |                           language: 'en',
363 |                           title: 'hi there 4',
364 |                           description: 'bigger desc 4',
365 |                           content: 'huge content 4',
366 |                           path: 'http://www.gov.gov/url4.html'
367 |                         }, document_repository)
368 |         # Force-update the timestamps to avoid fooling the specs with any
369 |         # automagic trickery
370 |         ES.client.update(
371 |           index: documents_index_name,
372 |           id: id,
373 |           body: {
374 |             doc: {
375 |               updated_at: 1.year.ago,
376 |               created_at: 1.year.ago
377 |             }
378 |           },
379 |           type: '_doc'
380 |         )
381 |         document_repository.refresh_index!
382 |       end
383 | 
384 |       it 'updates the updated_at timestamp' do
385 |         expect { put_document }.to change { document_repository.find(id).updated_at }
386 |       end
387 | 
388 |       it 'does not update the created_at timestamp' do
389 |         expect { put_document }.not_to change { document_repository.find(id).created_at }
390 |       end
391 |     end
392 | 
393 |     context 'with invalid MIME type param' do
394 |       let(:update_params) { { mime_type: 'not_a_valid/mime_type' } }
395 | 
396 |       before do
397 |         create_document({
398 |                           id: id,
399 |                           language: 'en',
400 |                           title: 'hi there 4',
401 |                           description: 'bigger desc 4',
402 |                           content: 'huge content 4',
403 |                           created: 2.hours.ago,
404 |                           updated: Time.zone.now,
405 |                           promote: true,
406 |                           path: 'http://www.gov.gov/url4.html'
407 |                         }, document_repository)
408 | 
409 |         put_document
410 |       end
411 | 
412 |       it 'returns error message as JSON' do
413 |         expect(response).to have_http_status(:bad_request)
414 |         expect(response.parsed_body).
415 |           to match(hash_including('status' => 400,
416 |                                   'developer_message' => 'Mime type is invalid'))
417 |       end
418 |     end
419 |   end
420 | 
421 |   describe 'DELETE /api/v1/documents/{document_id}' do
422 |     subject(:delete_document) do
423 |       delete "/api/v1/documents/#{CGI.escape(id)}", headers: valid_session
424 |     end
425 | 
426 |     context 'when successful' do
427 |       before do
428 |         create_document({
429 |                           id: id,
430 |                           language: 'en',
431 |                           title: 'hi there 4',
432 |                           description: 'bigger desc 4',
433 |                           content: 'huge content 4',
434 |                           created: 2.hours.ago,
435 |                           updated: Time.zone.now,
436 |                           promote: true,
437 |                           path: 'http://www.gov.gov/url4.html'
438 |                         }, document_repository)
439 | 
440 |         delete_document
441 |       end
442 | 
443 |       it 'returns success message as JSON' do
444 |         expect(response).to have_http_status(:ok)
445 |         expect(response.parsed_body).
446 |           to match(hash_including('status' => 200,
447 |                                   'developer_message' => 'OK',
448 |                                   'user_message' => 'Your document was successfully deleted.'))
449 |       end
450 | 
451 |       it 'deletes the document' do
452 |         expect(document_repository).not_to exist(id)
453 |       end
454 | 
455 |       it_behaves_like 'a data modifying request made during read-only mode'
456 |     end
457 | 
458 |     context 'when document does not exist' do
459 |       let(:id) { 'nonexistent' }
460 | 
461 |       before { delete_document }
462 | 
463 |       it 'delete returns an error message as JSON' do
464 |         expect(response).to have_http_status(:bad_request)
465 |         expect(response.parsed_body).
466 |           to match(hash_including('status' => 400,
467 |                                   'developer_message' => 'Resource could not be found.'))
468 |       end
469 |     end
470 |   end
471 | end
472 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'simplecov'
 4 | 
 5 | RSpec.configure do |config|
 6 | # The settings below are suggested to provide a good initial experience
 7 | # with RSpec, but feel free to customize to your heart's content.
 8 | 
 9 |   # These two settings work together to allow you to limit a spec run
10 |   # to individual examples or groups you care about by tagging them with
11 |   # `:focus` metadata. When nothing is tagged with `:focus`, all examples
12 |   # get run.
13 |   config.filter_run :focus
14 |   config.run_all_when_everything_filtered = true
15 | 
16 |   # Many RSpec users commonly either run the entire suite or an individual
17 |   # file, and it's useful to allow more verbose output when running an
18 |   # individual spec file.
19 |   if config.files_to_run.one?
20 |     # Use the documentation formatter for detailed output,
21 |     # unless a formatter has already been configured
22 |     # (e.g. via a command-line flag).
23 |     config.default_formatter = 'doc'
24 |   end
25 | 
26 |   # Print the 10 slowest examples and example groups at the
27 |   # end of the spec run, to help surface which specs are running
28 |   # particularly slow.
29 |   config.profile_examples = 10
30 | 
31 |   # Run specs in random order to surface order dependencies. If you find an
32 |   # order dependency and want to debug it, you can fix the order by providing
33 |   # the seed, which is printed after each run.
34 |   #     --seed 1234
35 |   config.order = :random
36 | 
37 |   # Seed global randomization in this process using the `--seed` CLI option.
38 |   # Setting this allows you to use `--seed` to deterministically reproduce
39 |   # test failures related to randomization by passing the same `--seed` value
40 |   # as the one that triggered the failure.
41 |   Kernel.srand config.seed
42 | 
43 |   # rspec-expectations config goes here. You can use an alternate
44 |   # assertion/expectation library such as wrong or the stdlib/minitest
45 |   # assertions if you prefer.
46 |   config.expect_with :rspec do |expectations|
47 |     # Enable only the newer, non-monkey-patching expect syntax.
48 |     # For more details, see:
49 |     #   - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
50 |     expectations.syntax = :expect
51 |   end
52 | 
53 |   # rspec-mocks config goes here. You can use an alternate test double
54 |   # library (such as bogus or mocha) by changing the `mock_with` option here.
55 |   config.mock_with :rspec do |mocks|
56 |     # Enable only the newer, non-monkey-patching expect syntax.
57 |     # For more details, see:
58 |     #   - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
59 |     mocks.syntax = :expect
60 | 
61 |     # Prevents you from mocking or stubbing a method that does not exist on
62 |     # a real object. This is generally recommended.
63 |     mocks.verify_partial_doubles = true
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/spec/support/document_crud.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module DocumentCrud
 4 |   def create_document(params, repository)
 5 |     document = Document.new(params)
 6 |     # Ensure this helper method is only used to create valid docs
 7 |     document.validate!
 8 |     repository.save(document)
 9 |     # Ensure the document is searchable
10 |     repository.refresh_index!
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/spec/support/shared_examples/read_only_mode.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | shared_examples 'a data modifying request made during read-only mode' do
 4 |   let(:allow_updates) { false }
 5 | 
 6 |   it 'returns a read-only-mode-releated failure message as JSON' do
 7 |     expect(response.status).to eq(503)
 8 |     expect(JSON.parse(response.body)).to match(hash_including({
 9 |       'status' => 503,
10 |       'developer_message' => 'The i14y API is currently in read-only mode.'
11 |     }))
12 |   end
13 | 
14 |   context 'when a specific maintenance message is configured' do
15 |     let(:maintenance_message) { 'Sorry about that!' }
16 | 
17 |     it 'additionally includes the specific maintanance message' do
18 |       expect(JSON.parse(response.body)).to match(hash_including({
19 |         'status' => 503,
20 |         'developer_message' => 'The i14y API is currently in read-only mode. Sorry about that!'
21 |       }))
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/spec/support/shared_examples/repository_behavior.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | shared_examples_for 'a repository' do
 4 |   describe 'serialization' do
 5 |     subject(:serialize) { repository.serialize(klass_instance) }
 6 | 
 7 |     let(:klass_instance) { repository.klass.new }
 8 | 
 9 |     it { is_expected.to be_a Hash }
10 |   end
11 | 
12 |   describe 'deserialization' do
13 |     subject(:deserialize) { repository.deserialize(hash) }
14 | 
15 |     # Ensures backwards compatibility with pre-ES 7 documents
16 |     context 'when the source does not include the id' do
17 |       let(:hash) do
18 |         {
19 |           '_id' => 'a123',
20 |           '_source' => { }
21 |         }
22 |       end
23 | 
24 |       it 'sets the id on the deserialized object' do
25 |         expect(deserialize.id).to eq 'a123'
26 |       end
27 |     end
28 |   end
29 | 
30 |   it 'can connect to Elasticsearch' do
31 |     expect(repository.client.ping).to be(true)
32 |   end
33 | 
34 |   it 'uses one primary and one replica shard' do
35 |     expect(repository.settings.to_hash).to match(hash_including(
36 |       number_of_shards: 1,
37 |       number_of_replicas: 1
38 |     ))
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/spec/support/shoulda.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | Shoulda::Matchers.configure do |config|
4 |   config.integrate do |with|
5 |     with.test_framework :rspec
6 |     with.library :rails
7 |   end
8 | end
9 | 


--------------------------------------------------------------------------------
/spec/test_services.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module TestServices
 4 |   module_function
 5 | 
 6 |   def create_collections_index
 7 |     ES.client.indices.create(index: collections_index_name)
 8 |     ES.client.indices.put_alias(
 9 |       index: collections_index_name,
10 |       name: ES.collection_repository.index_name
11 |     )
12 |   end
13 | 
14 |   def delete_es_indexes
15 |     ES.client.indices.delete(index: [Rails.env, I14y::APP_NAME, '*'].join('-'))
16 |   end
17 | 
18 |   def clear_index(index_name)
19 |     ES.client.delete_by_query(
20 |       index: index_name,
21 |       q: '*:*',
22 |       conflicts: 'proceed'
23 |     )
24 |   end
25 | 
26 |   def collections_index_name
27 |     [Rails.env, I14y::APP_NAME, 'collections', 'v1'].join('-')
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/vendor/stream2es:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GSA/i14y/f3be9325fa45d5dec1e716a4a9ffeb6e0cf56b22/vendor/stream2es


--------------------------------------------------------------------------------