├── .backportrc.json ├── .buildkite ├── pipeline.yml └── scripts │ ├── run_ci_step.sh │ └── run_command.sh ├── .bundler-version ├── .ci └── settings.xml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── enhancement.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── add-labels-main.yml │ └── backport.yml ├── .gitignore ├── .rspec ├── .rubocop.yml ├── .ruby-version ├── Dockerfile ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── Makefile ├── NOTICE.txt ├── Procfile ├── README.md ├── VERSION ├── bin ├── connectors_service └── list_connectors ├── catalog-info.yaml ├── config └── connectors.yml.example ├── connectors_service.gemspec ├── connectors_utility.gemspec ├── docs ├── CODE_OF_CONDUCT.md ├── CONFIG.md ├── CONTRIBUTING.md ├── DEVELOPING.md ├── INTERNAL.md ├── RELEASING.md ├── SECURITY.md ├── SUPPORT.md └── TERMINOLOGY.md ├── lib ├── app │ ├── app.rb │ ├── config.rb │ ├── console_app.rb │ ├── dispatcher.rb │ ├── menu.rb │ ├── preflight_check.rb │ └── version.rb ├── connectors.rb ├── connectors │ ├── base │ │ ├── adapter.rb │ │ ├── connector.rb │ │ └── custom_client.rb │ ├── connector_status.rb │ ├── crawler │ │ └── scheduler.rb │ ├── example │ │ ├── attachments │ │ │ ├── first_attachment.txt │ │ │ ├── second_attachment.txt │ │ │ └── third_attachment.txt │ │ ├── connector.rb │ │ └── example_advanced_snippet_validator.rb │ ├── gitlab │ │ ├── adapter.rb │ │ ├── connector.rb │ │ ├── custom_client.rb │ │ ├── extractor.rb │ │ └── gitlab_advanced_snippet_validator.rb │ ├── job_trigger_method.rb │ ├── mongodb │ │ ├── connector.rb │ │ ├── mongo_advanced_snippet_against_schema_validator.rb │ │ ├── mongo_advanced_snippet_schema.rb │ │ └── mongo_advanced_snippet_snake_case_transformer.rb │ ├── registry.rb │ ├── sync_status.rb │ └── tolerable_error_helper.rb ├── connectors_service.rb ├── connectors_utility.rb ├── core.rb ├── core │ ├── configuration.rb │ ├── connector_job.rb │ ├── connector_settings.rb │ ├── elastic_connector_actions.rb │ ├── filtering.rb │ ├── filtering │ │ ├── advanced_snippet │ │ │ ├── advanced_snippet_against_schema_validator.rb │ │ │ └── advanced_snippet_validator.rb │ │ ├── filter_validator.rb │ │ ├── hash_against_schema_validator.rb │ │ ├── post_process_engine.rb │ │ ├── post_process_result.rb │ │ ├── processing_stage.rb │ │ ├── simple_rules │ │ │ ├── simple_rule.rb │ │ │ ├── simple_rules_parser.rb │ │ │ └── validation │ │ │ │ ├── no_conflicting_policies_rules_validator.rb │ │ │ │ ├── simple_rules_schema.rb │ │ │ │ ├── simple_rules_validator.rb │ │ │ │ └── single_rule_against_schema_validator.rb │ │ ├── transform │ │ │ ├── filter_transformer.rb │ │ │ ├── filter_transformer_facade.rb │ │ │ └── transformation_target.rb │ │ ├── validation_job_runner.rb │ │ └── validation_status.rb │ ├── heartbeat.rb │ ├── ingestion.rb │ ├── ingestion │ │ └── es_sink.rb │ ├── job_cleanup.rb │ ├── jobs │ │ ├── consumer.rb │ │ └── producer.rb │ ├── native_scheduler.rb │ ├── scheduler.rb │ ├── single_scheduler.rb │ └── sync_job_runner.rb ├── list_connectors.rb ├── stubs │ ├── app_config.rb │ ├── connectors │ │ └── stats.rb │ └── service_type.rb ├── utility.rb └── utility │ ├── bulk_queue.rb │ ├── common.rb │ ├── constants.rb │ ├── cron.rb │ ├── elasticsearch │ └── index │ │ ├── language_data.yml │ │ ├── mappings.rb │ │ └── text_analysis_settings.rb │ ├── environment.rb │ ├── error_monitor.rb │ ├── errors.rb │ ├── es_client.rb │ ├── exception_tracking.rb │ ├── extension_mapping_util.rb │ ├── filtering.rb │ ├── logger.rb │ └── middleware │ ├── basic_auth.rb │ ├── bearer_auth.rb │ └── restrict_hostnames.rb ├── logo-enterprise-search.png ├── make.bat ├── package.json ├── script ├── run_in_docker.sh └── version.sh ├── spec ├── app │ ├── config_spec.rb │ ├── dispatcher_spec.rb │ └── preflight_check_spec.rb ├── connectors │ ├── base │ │ ├── adapter_spec.rb │ │ ├── connector_spec.rb │ │ └── custom_client_spec.rb │ ├── crawler │ │ └── crawler_scheduler_spec.rb │ ├── example │ │ └── connector_spec.rb │ ├── gitlab │ │ ├── adapter_spec.rb │ │ ├── connector_spec.rb │ │ └── extractor_spec.rb │ ├── mongodb │ │ ├── connector_spec.rb │ │ ├── mongo_advanced_snippet_against_schema_validator_spec.rb │ │ └── mongo_advanced_snippet_snake_case_transformer_spec.rb │ ├── registry_spec.rb │ └── tolerable_error_helper_spec.rb ├── core │ ├── configuration_spec.rb │ ├── connector_job_spec.rb │ ├── connector_settings_spec.rb │ ├── elastic_connector_actions_spec.rb │ ├── filtering │ │ ├── advanced_snippet │ │ │ └── advanced_snippet_validator_spec.rb │ │ ├── filter_validator_spec.rb │ │ ├── hash_against_schema_validator_spec.rb │ │ ├── post_process_engine_spec.rb │ │ ├── simple_rule │ │ │ ├── no_conflicting_policies_rules_validator_spec.rb │ │ │ ├── simple_rule_spec.rb │ │ │ └── single_rule_against_schema_validator_spec.rb │ │ ├── transform │ │ │ ├── filter_transformer_facade_spec.rb │ │ │ └── filter_transformer_spec.rb │ │ └── validation_job_runner_spec.rb │ ├── heartbeat_spec.rb │ ├── ingestion │ │ └── es_sink_spec.rb │ ├── job_cleanup_spec.rb │ ├── jobs │ │ ├── consumer_spec.rb │ │ └── producer_spec.rb │ ├── native_scheduler_spec.rb │ ├── scheduler_spec.rb │ ├── single_scheduler_spec.rb │ └── sync_job_runner_spec.rb ├── fixtures │ ├── gitlab │ │ ├── external_user.json │ │ ├── external_users.json │ │ ├── project.json │ │ ├── project_members.json │ │ ├── projects_list.json │ │ ├── simple_project.json │ │ └── user.json │ └── uncrate.com.html ├── repo_spec.rb ├── spec_helper.rb ├── support │ ├── shared_examples.rb │ └── shared_examples │ │ └── filtering.rb └── utility │ ├── bulk_queue_spec.rb │ ├── common_spec.rb │ ├── cron_spec.rb │ ├── elasticsearch │ ├── mappings_spec.rb │ └── text_analysis_settings_spec.rb │ ├── error_monitor_spec.rb │ ├── es_client_spec.rb │ ├── exception_tracking_spec.rb │ ├── filtering_spec.rb │ ├── logger_spec.rb │ └── middleware │ └── restrict_hostnames_spec.rb ├── tests ├── Makefile ├── connectors.yml ├── docker-compose.yml ├── ftest.rb └── loadsample.sh ├── win32 ├── getmsys2.vbs └── install.bat └── yarn.lock /.backportrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "targetBranchChoices": [ 3 | { "name": "main", "checked": true }, 4 | "8.16" 5 | ], 6 | "fork": false, 7 | "targetPRLabels": ["backport"], 8 | "branchLabelMapping": { 9 | "^v8.17.0$": "main", 10 | "^v(\\d+).(\\d+)(.\\d+)+$": "$1.$2" 11 | }, 12 | "upstream": "elastic/connectors-ruby" 13 | } 14 | -------------------------------------------------------------------------------- /.buildkite/pipeline.yml: -------------------------------------------------------------------------------- 1 | agents: 2 | provider: "gcp" 3 | machineType: "n1-standard-8" 4 | 5 | defaultTimeoutInMinutes: 45 6 | 7 | steps: 8 | - label: ":safety_vest: Connectors Tests" 9 | commands: 10 | - ".buildkite/scripts/run_command.sh tests" 11 | artifact_paths: 12 | - "coverage/index.html" 13 | - label: ":wrench: Linter" 14 | commands: 15 | - ".buildkite/scripts/run_command.sh linter" 16 | - label: ":package: Docker" 17 | commands: 18 | - ".buildkite/scripts/run_command.sh docker" 19 | - label: ":package: Packaging" 20 | commands: 21 | - ".buildkite/scripts/run_command.sh packaging" 22 | artifact_paths: 23 | - ".gems/*.gem" 24 | -------------------------------------------------------------------------------- /.buildkite/scripts/run_ci_step.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | export PATH="$PATH:/root/.rbenv/bin:/root/.rbenv/plugins/ruby-build/bin:/ci/.rbenv/shims" 6 | 7 | RUBY_VERSION=$(cat .ruby-version) 8 | echo "---- installing Ruby version $RUBY_VERSION" 9 | rbenv install $RUBY_VERSION 10 | rbenv global $RUBY_VERSION 11 | 12 | case $1 in 13 | 14 | tests) 15 | echo "---- running unit tests" 16 | make install test 17 | ;; 18 | 19 | linter) 20 | echo "---- running linter" 21 | make install lint 22 | ;; 23 | 24 | packaging) 25 | echo "---- running packaging" 26 | git config --global --add safe.directory '*' 27 | git config --global --add safe.directory /ci 28 | curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.21.1/yq_linux_amd64 29 | chmod +x yq 30 | YQ=`realpath yq` make install build_service build_service_gem 31 | gem install .gems/connectors_service-8.* 32 | ;; 33 | 34 | *) 35 | echo "Usage: run_command {tests|linter|packaging}" 36 | exit 2 37 | ;; 38 | esac 39 | -------------------------------------------------------------------------------- /.buildkite/scripts/run_command.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | COMMAND_TO_RUN=${1:-} 6 | 7 | if [[ "${COMMAND_TO_RUN:-}" == "" ]]; then 8 | echo "Usage: run_command.sh {tests|linter|docker|packaging}" 9 | exit 2 10 | fi 11 | 12 | function realpath { 13 | echo "$(cd "$(dirname "$1")"; pwd)"/"$(basename "$1")"; 14 | } 15 | 16 | SCRIPT_WORKING_DIR=$(realpath "$(dirname "$0")") 17 | BUILDKITE_DIR=$(realpath "$(dirname "$SCRIPT_WORKING_DIR")") 18 | PROJECT_ROOT=$(realpath "$(dirname "$BUILDKITE_DIR")") 19 | 20 | if [[ "${COMMAND_TO_RUN:-}" == "docker" ]]; then 21 | echo "running docker build" 22 | make build-docker 23 | else 24 | DOCKER_IMAGE="docker.elastic.co/ci-agent-images/enterprise-search/rbenv-buildkite-agent:latest" 25 | SCRIPT_CMD="/ci/.buildkite/scripts/run_ci_step.sh" 26 | 27 | docker run --interactive --rm \ 28 | --sig-proxy=true --init \ 29 | --user "root" \ 30 | --volume "$PROJECT_ROOT:/ci" \ 31 | --workdir /ci \ 32 | --env HOME=/ci \ 33 | --env CI \ 34 | --env GIT_REVISION=${BUILDKITE_COMMIT-} \ 35 | --env BUILD_ID=${BUILDKITE_BUILD_NUMBER-} \ 36 | --entrypoint "${SCRIPT_CMD}" \ 37 | $DOCKER_IMAGE \ 38 | $COMMAND_TO_RUN 39 | fi 40 | -------------------------------------------------------------------------------- /.bundler-version: -------------------------------------------------------------------------------- 1 | 2.3.15 2 | -------------------------------------------------------------------------------- /.ci/settings.xml: -------------------------------------------------------------------------------- 1 | 3 | /var/lib/jenkins/.m2/repository 4 | 5 | org.apache.maven.plugins 6 | org.codehaus.mojo 7 | 8 | 9 | 10 | sonatype-nexus-snapshots 11 | ${env.SERVER_USERNAME} 12 | ${env.SERVER_PASSWORD} 13 | 14 | 15 | sonatype-nexus-staging 16 | ${env.SERVER_USERNAME} 17 | ${env.SERVER_PASSWORD} 18 | 19 | 20 | 21 | 22 | 23 | true 24 | 25 | 26 | ${env.KEYPASS} 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve. 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Bug Description 11 | 12 | 13 | ### To Reproduce 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | ## Expected behavior 21 | 22 | 23 | ## Screenshots 24 | 26 | 27 | ## Environment 28 | 29 | 30 | 31 | - OS: [e.g. iOS] 32 | - Browser [e.g. chrome, safari] 33 | - Version [e.g. 22] 34 | 35 | 36 | ## Additional context 37 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Question or Discussion 4 | url: https://discuss.elastic.co/c/enterprise-search/workplace-search/ 5 | about: Please ask and answer questions here. 6 | - name: Security Vulnerability 7 | url: https://www.elastic.co/community/security 8 | about: DO NOT file issues related to security. Instead, please follow our security policy here. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement 3 | about: It's not a bug, but some desired feature is missing 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Problem Description 11 | 14 | 15 | ### Proposed Solution 16 | 18 | 19 | 20 | ### Alternatives 21 | 23 | 24 | ### Additional Context 25 | 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Closes https://github.com/elastic/connectors-ruby/issues/### 2 | 3 | 4 | 10 | 11 | ## Checklists 12 | 13 | 15 | 16 | #### Pre-Review Checklist 17 | - [ ] Covered the changes with automated tests 18 | - [ ] Tested the changes locally 19 | - [ ] Added a label for each target release version (example: `v7.13.2`, `v7.14.0`, `v8.0.0`) 20 | - [ ] Considered corresponding documentation changes 21 | - [ ] Contributed any configuration settings changes to the configuration reference 22 | 23 | #### Changes Requiring Extra Attention 24 | 25 | 28 | 29 | - [ ] Security-related changes (encryption, TLS, SSRF, etc) 30 | - [ ] New external service dependencies added. 31 | 32 | ## Related Pull Requests 33 | 34 | 37 | 38 | ## Release Note 39 | 40 | 44 | 45 | ## For Elastic Internal Use Only 46 | - [ ] Considered corresponding documentation changes to [contribute separately](https://github.com/elastic/enterprise-search-pubs#contribute-docs-changes-for-product-changes) 47 | - [ ] New configuration settings added in this PR follow the [official guidelines](https://github.com/elastic/ent-search/blob/main/doc/enterprise-search-config.md) 48 | - [ ] Built gems (both `connectors_utility` and `connectors_service`) and included into Enterprise Search and tested that Enterprise Search works well with new gem versions. Instruction can be found [here](https://docs.google.com/document/d/10KJOIhe4sauDul8iWeV9Cn-_3uPWa76qG8SwYk6BCAA/edit) 49 | -------------------------------------------------------------------------------- /.github/workflows/add-labels-main.yml: -------------------------------------------------------------------------------- 1 | name: Force backport labels for main 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - opened 9 | 10 | jobs: 11 | add_labels: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - id: version 16 | uses: juliangruber/read-file-action@ebfa650188272343fef925480eb4d18c5d49b925 17 | with: 18 | path: ./VERSION 19 | - uses: actions-ecosystem/action-add-labels@v1 20 | with: 21 | labels: | 22 | auto-backport 23 | v${{ steps.version.outputs.content }} 24 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport PR 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - labeled 9 | - closed 10 | 11 | jobs: 12 | backport: 13 | if: | 14 | github.event.pull_request.merged == true 15 | && contains(github.event.pull_request.labels.*.name, 'auto-backport') 16 | && ( 17 | (github.event.action == 'labeled' && github.event.label.name == 'auto-backport') 18 | || (github.event.action == 'closed') 19 | ) 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Checkout Actions 23 | uses: actions/checkout@v2 24 | with: 25 | repository: 'swiftype/kibana-github-actions' 26 | ref: main 27 | path: ./actions 28 | 29 | - name: Install Actions 30 | run: npm install --production --prefix ./actions 31 | 32 | - name: Run Backport 33 | uses: ./actions/backport 34 | with: 35 | github_token: ${{ secrets.GITHUB_TOKEN }} 36 | approver_token: ${{ secrets.REPO_SCOPED_TOKEN }} 37 | auto_approve: 'true' 38 | commit_user: elastic 39 | commit_email: ent-search-backport@users.noreply.github.com 40 | auto_merge: 'true' 41 | auto_merge_method: 'squash' 42 | manual_backport_command_template: 'backport --pr %pullNumber% --autoMerge --autoMergeMethod squash' 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | 25 | # jetbrains 26 | *.iml 27 | .idea 28 | 29 | # maven 30 | target 31 | 32 | # temp files/folders for build artifacts 33 | .gems 34 | coverage 35 | tmp 36 | 37 | # bundler state 38 | /.bundle 39 | /vendor/bundle/ 40 | /vendor/ruby/ 41 | /vendor/filebeat/ 42 | /vendor/metricbeat/ 43 | /vendor/jruby/ 44 | 45 | # temp file with auth credentials 46 | ent-search-dev.json 47 | 48 | # vim 49 | *.un~ 50 | *.swp 51 | 52 | #test coverage 53 | /coverage 54 | 55 | config/connectors.yml 56 | .vscode 57 | 58 | # node deps 59 | node_modules 60 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --format RspecJunitFormatter --out tmp/test-output-junit.xml 3 | --color 4 | --profile 5 | -r spec_helper 6 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.6.9 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:noble-20241118.1 2 | 3 | WORKDIR /app 4 | COPY . /app 5 | 6 | # installing all system dependencies, yq, ruby-build and rbenv 7 | RUN apt-get update && \ 8 | apt-get install --yes --no-install-recommends \ 9 | uuid-runtime curl ca-certificates git make build-essential \ 10 | libssl-dev libreadline-dev zlib1g-dev && \ 11 | rm -rf /var/lib/apt/lists/* 12 | RUN curl -L https://github.com/mikefarah/yq/releases/download/v4.40.2/yq_linux_amd64.tar.gz | tar -xzvf - && \ 13 | mv yq_linux_amd64 /usr/bin/yq 14 | RUN git clone https://github.com/rbenv/rbenv.git ~/.rbenv && \ 15 | curl -L https://github.com/sstephenson/ruby-build/archive/v20231114.tar.gz | tar -zxvf - -C /tmp/ && \ 16 | cd /tmp/ruby-build-* && \ 17 | ./install.sh 18 | 19 | # set the env 20 | ENV PATH /root/.rbenv/bin:/root/.rbenv/shims:$PATH 21 | RUN echo 'eval "$(rbenv init -)"' >> .bashrc 22 | RUN echo 'eval "$(rbenv init -)"' >> /etc/profile.d/rbenv.sh 23 | 24 | # run the make file to install the app 25 | # override CFLAGS because -w (warning suppression) screws up ruby-build in newer versions 26 | # when compiling ruby 2.6.x, *but* we need newer ruby-build to compile older openssl 27 | # on newer Ubuntu releases 28 | RUN make install RUBY_CFLAGS='' 29 | 30 | CMD ["/bin/bash", "script/run_in_docker.sh"] 31 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | ruby File.read(File.join(__dir__, '.ruby-version')).strip 4 | 5 | # Pull gem index from rubygems 6 | source 'https://rubygems.org' 7 | 8 | # Pin the version of bundle we support 9 | gem 'bundler', File.read(File.join(__dir__, '.bundler-version')).strip 10 | 11 | # Dependencies for connectors 12 | gem 'activesupport', '~>6.1.7.3' 13 | gem 'mime-types', '~> 3.6' 14 | gem 'tzinfo-data' 15 | gem 'tzinfo', '~> 2.0' 16 | gem 'fugit', '~> 1.11', '>= 1.11.1' 17 | gem 'remedy', '~> 0.3.0' 18 | gem 'ecs-logging', '~> 1.0.0' 19 | 20 | # Remove this section when gem 'config' is removed 21 | gem 'dry-container', '= 0.9.0' 22 | gem 'dry-core', '= 0.7.1' 23 | gem 'dry-configurable', '= 0.13.0' 24 | gem 'dry-initializer', '= 3.0.4' 25 | gem 'dry-inflector', '= 0.2.1' 26 | gem 'dry-schema', '= 1.8.0' 27 | gem 'dry-validation', '= 1.7.0' 28 | 29 | group :test do 30 | gem 'rspec-collection_matchers', '~> 1.2.0' 31 | gem 'rspec-core', '~> 3.10.1' 32 | gem 'rspec_junit_formatter' 33 | gem 'rubocop', '1.18.4' 34 | gem 'rubocop-performance', '1.11.5' 35 | gem 'rspec-mocks' 36 | gem 'webmock' 37 | gem 'rack', '>= 2.2.6.4' 38 | gem 'rack-test' 39 | gem 'ruby-debug-ide' 40 | gem 'pry-remote' 41 | gem 'pry-nav' 42 | gem 'debase', '0.2.8' 43 | gem 'timecop' 44 | gem 'simplecov', require: false 45 | gem 'simplecov-material' 46 | end 47 | 48 | # Dependencies for the HTTP service 49 | gem 'config', '~> 4.0.0' 50 | gem 'forwardable', '~> 1.3.2' 51 | gem 'faraday', '~> 1.10.0' 52 | gem 'faraday_middleware', '= 1.0.0' 53 | gem 'httpclient', '~> 2.8.3' 54 | gem 'attr_extras', '~> 6.2.5' 55 | gem 'hashie', '~> 5.0.0' 56 | gem 'concurrent-ruby', '~> 1.1.9' 57 | gem 'elasticsearch', '~> 8.8.0' 58 | 59 | # Dependencies for oauth 60 | gem 'signet', '~> 0.16.0' 61 | 62 | # Dependency for mongo connector 63 | gem 'mongo', '~> 2.18' 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Elastic License 2.0 2 | 3 | URL: https://www.elastic.co/licensing/elastic-license 4 | 5 | ## Acceptance 6 | 7 | By using the software, you agree to all of the terms and conditions below. 8 | 9 | ## Copyright License 10 | 11 | The licensor grants you a non-exclusive, royalty-free, worldwide, 12 | non-sublicensable, non-transferable license to use, copy, distribute, make 13 | available, and prepare derivative works of the software, in each case subject to 14 | the limitations and conditions below. 15 | 16 | ## Limitations 17 | 18 | You may not provide the software to third parties as a hosted or managed 19 | service, where the service provides users with access to any substantial set of 20 | the features or functionality of the software. 21 | 22 | You may not move, change, disable, or circumvent the license key functionality 23 | in the software, and you may not remove or obscure any functionality in the 24 | software that is protected by the license key. 25 | 26 | You may not alter, remove, or obscure any licensing, copyright, or other notices 27 | of the licensor in the software. Any use of the licensor’s trademarks is subject 28 | to applicable law. 29 | 30 | ## Patents 31 | 32 | The licensor grants you a license, under any patent claims the licensor can 33 | license, or becomes able to license, to make, have made, use, sell, offer for 34 | sale, import and have imported the software, in each case subject to the 35 | limitations and conditions in this license. This license does not cover any 36 | patent claims that you cause to be infringed by modifications or additions to 37 | the software. If you or your company make any written claim that the software 38 | infringes or contributes to infringement of any patent, your patent license for 39 | the software granted under these terms ends immediately. If your company makes 40 | such a claim, your patent license ends immediately for work on behalf of your 41 | company. 42 | 43 | ## Notices 44 | 45 | You must ensure that anyone who gets a copy of any part of the software from you 46 | also gets a copy of these terms. 47 | 48 | If you modify the software, you must include in any modified copies of the 49 | software prominent notices stating that you have modified the software. 50 | 51 | ## No Other Rights 52 | 53 | These terms do not imply any licenses other than those expressly granted in 54 | these terms. 55 | 56 | ## Termination 57 | 58 | If you use the software in violation of these terms, such use is not licensed, 59 | and your licenses will automatically terminate. If the licensor provides you 60 | with a notice of your violation, and you cease all violation of this license no 61 | later than 30 days after you receive that notice, your licenses will be 62 | reinstated retroactively. However, if you violate these terms after such 63 | reinstatement, any additional violation of these terms will cause your licenses 64 | to terminate automatically and permanently. 65 | 66 | ## No Liability 67 | 68 | *As far as the law allows, the software comes as is, without any warranty or 69 | condition, and the licensor will not be liable to you for any damages arising 70 | out of these terms or the use or nature of the software, under any kind of 71 | legal claim.* 72 | 73 | ## Definitions 74 | 75 | The **licensor** is the entity offering these terms, and the **software** is the 76 | software the licensor makes available under these terms, including any portion 77 | of it. 78 | 79 | **you** refers to the individual or entity agreeing to these terms. 80 | 81 | **your company** is any legal entity, sole proprietorship, or other kind of 82 | organization that you work for, plus all organizations that have control over, 83 | are under the control of, or are under common control with that 84 | organization. **control** means ownership of substantially all the assets of an 85 | entity, or the power to direct its management and policies by vote, contract, or 86 | otherwise. Control can be direct or indirect. 87 | 88 | **your licenses** are all the licenses granted to you for the software under 89 | these terms. 90 | 91 | **use** means anything you do with the software requiring one of your licenses. 92 | 93 | **trademark** means trademarks, service marks, and similar rights. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | YQ ?= "yq" 2 | RUBY_CFLAGS ?= -w 3 | .phony: test ftest lint autocorrect update_config autocorrect-unsafe install build-docker run-docker exec_app tag exec_cli 4 | .phony: build_utility build_service release_utility_dev release_service_dev release_utility release_service build_utility_gem build_service_gem 5 | 6 | config/connectors.yml: 7 | cp config/connectors.yml.example config/connectors.yml 8 | 9 | test: config/connectors.yml 10 | bundle _$(shell cat .bundler-version)_ exec rspec spec --order rand 11 | 12 | ftest: 13 | -cp config/connectors.yml config/connectors.yml.$$(date +%Y%m%d%H%M%S).saved 2>/dev/null 14 | cp tests/connectors.yml config/connectors.yml 15 | rbenv exec bundle exec ruby tests/ftest.rb 16 | 17 | lint: config/connectors.yml 18 | bundle _$(shell cat .bundler-version)_ exec rubocop lib spec 19 | 20 | autocorrect: config/connectors.yml 21 | bundle _$(shell cat .bundler-version)_ exec rubocop lib spec -a 22 | 23 | autocorrect-unsafe: config/connectors.yml 24 | bundle _$(shell cat .bundler-version)_ exec rubocop lib spec -A 25 | 26 | # build will set the revision key in the config we use in the Gem 27 | # we can add more build=time info there if we want 28 | update_config_dev: config/connectors.yml 29 | ${YQ} e ".revision = \"$(shell git rev-parse HEAD)\"" -i config/connectors.yml 30 | ${YQ} e ".repository = \"$(shell git config --get remote.origin.url)\"" -i config/connectors.yml 31 | ${YQ} e ".version = \"$(shell script/version.sh)\"" -i config/connectors.yml 32 | 33 | update_config: config/connectors.yml 34 | ${YQ} e ".revision = \"$(shell git rev-parse HEAD)\"" -i config/connectors.yml 35 | ${YQ} e ".repository = \"$(shell git config --get remote.origin.url)\"" -i config/connectors.yml 36 | ${YQ} e ".version = \"$(shell cat VERSION)\"" -i config/connectors.yml 37 | 38 | build_utility: update_config_dev build_utility_gem 39 | 40 | build_service: update_config_dev build_service_gem 41 | 42 | release_utility: update_config build_utility_gem push_gem tag 43 | 44 | release_service: update_config build_service_gem push_gem tag 45 | 46 | release_utility_dev: update_config_dev build_utility_gem push_gem 47 | 48 | release_service_dev: update_config_dev build_service_gem push_gem 49 | 50 | tag: 51 | git tag v$(shell cat VERSION) 52 | git push --tags 53 | 54 | build_utility_gem: 55 | mkdir -p .gems 56 | bundle _$(shell cat .bundler-version)_ exec gem build connectors_utility.gemspec 57 | rm -f .gems/* 58 | mv *.gem .gems/ 59 | echo "DO NOT FORGET TO UPDATE ENT-SEARCH" 60 | 61 | build_service_gem: 62 | mkdir -p .gems 63 | bundle _$(shell cat .bundler-version)_ exec gem build connectors_service.gemspec 64 | rm -f .gems/* 65 | mv *.gem .gems/ 66 | 67 | push_gem: 68 | bundle _$(shell cat .bundler-version)_ exec gem push .gems/* 69 | 70 | install: 71 | RUBY_CFLAGS="$(RUBY_CFLAGS)" rbenv install -s 72 | - gem install bundler -v $(shell cat .bundler-version) && rbenv rehash 73 | bundle _$(shell cat .bundler-version)_ install --jobs 1 74 | 75 | build-docker: 76 | docker build -t connectors . 77 | 78 | run-docker: 79 | docker run --env "elasticsearch.hosts=http://host.docker.internal:9200" --env "elasticsearch.api_key=$(API_KEY)" --rm -it connectors 80 | 81 | exec_app: 82 | cd lib/app; bundle _$(shell cat .bundler-version)_ exec ruby app.rb 83 | 84 | exec_cli: 85 | cd lib/app; bundle _$(shell cat .bundler-version)_ exec ruby console_app.rb 86 | 87 | run: | update_config_dev exec_app 88 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | connectors 2 | Copyright 2022 Elasticsearch B.V. 3 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: cd lib/app; bundle exec rackup config.ru -p ${PORT} 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elastic Ruby connectors 2 | 3 | > [!IMPORTANT] 4 | > _**Enterprise Search will be discontinued in 9.0.**_ 5 | > 6 | > Starting with Elastic version 9.0, we're deprecating the standalone Enterprise Search product with its included features and functionalities (including [Workplace Search](https://www.elastic.co/guide/en/workplace-search/8.x/index.html) and [App Search](https://www.elastic.co/guide/en/app-search/8.x/index.html)). They remain supported in their current form in version 8.x and will only receive security upgrades and fixes. Workplace Search Connector Packages will continue to be supported in their current form throughout 8.x versions, according to our EOL policy: https://www.elastic.co/support/eol. 7 | > We recommend transitioning to our actively developed [Elastic Stack](https://www.elastic.co/elastic-stack) tools for your search use cases. However, if you're still using any Enterprise Search products, we recommend using the latest stable release. 8 | > 9 | > Here are some useful links with more information: 10 | > * Enterprise Search FAQ: https://www.elastic.co/resources/enterprise-search/enterprise-search-faq 11 | > * Migrating to 9.x from Enterprise Search 8.x versions: https://www.elastic.co/guide/en/enterprise-search/current/upgrading-to-9-x.html 12 | ___ 13 | 14 | The home of Elastic connector service and native connectors in Ruby language. This repository contains the framework for customizing Elastic native connectors, or writing your own connectors for advanced use cases. 15 | 16 | Any connector implementation in this repository is only for reference, for supported versions please see [connectors-python](https://github.com/elastic/connectors-python). 17 | 18 | **The connector will be operated by an administrative user from within Kibana.** 19 | 20 | > Note: The connector framework is a tech preview feature. Tech preview features are subject to change and are not covered by the support SLA of general release (GA) features. 21 | 22 | Before getting started, review important information about this feature: 23 | 24 | - [Terminology](docs/TERMINOLOGY.md) 25 | - [Getting help and providing feedback](docs/SUPPORT.md) 26 | - [Understand the connector protocol](https://github.com/elastic/connectors-python/blob/main/docs/CONNECTOR_PROTOCOL.md) 27 | 28 | Build, deploy, and operate your own connector: 29 | 30 | - [Building/Deploying a connector](docs/DEVELOPING.md) 31 | - [Operating a connector](https://www.elastic.co/guide/en/enterprise-search/current/connectors.html) 32 | 33 | How to publish your connector: 34 | 35 | - [Contribute to the repository](docs/CONTRIBUTING.md) 36 | 37 | ## Other guides 38 | 39 | - [Code of Conduct](https://www.elastic.co/community/codeofconduct) 40 | - [Getting Support](docs/SUPPORT.md) 41 | - [Releasing](docs/RELEASING.md) 42 | - [Developer guide](docs/DEVELOPING.md) 43 | - [Security Policy](docs/SECURITY.md) 44 | - [Elastic-internal guide](docs/INTERNAL.md) 45 | - [Connector Protocol](https://github.com/elastic/connectors-python/blob/main/docs/CONNECTOR_PROTOCOL.md) 46 | - [Configuration](docs/CONFIG.md) 47 | - [Terminology](docs/TERMINOLOGY.md) 48 | - [Contributing guide](docs/CONTRIBUTING.md) 49 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 8.17.0.0 2 | -------------------------------------------------------------------------------- /bin/connectors_service: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'connectors_service' 4 | ConnectorsService.run! 5 | -------------------------------------------------------------------------------- /bin/list_connectors: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'list_connectors' 4 | ListConnectors.run! 5 | -------------------------------------------------------------------------------- /catalog-info.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json 3 | apiVersion: backstage.io/v1alpha1 4 | kind: Resource 5 | metadata: 6 | name: buildkite-pipeline-connectors-ruby 7 | description: Buildkite Pipeline for connectors-ruby 8 | links: 9 | - title: Pipeline 10 | url: https://buildkite.com/elastic/connectors-ruby 11 | 12 | spec: 13 | type: buildkite-pipeline 14 | owner: group:enterprise-search 15 | system: buildkite 16 | implementation: 17 | apiVersion: buildkite.elastic.dev/v1 18 | kind: Pipeline 19 | metadata: 20 | name: connectors-ruby 21 | spec: 22 | repository: elastic/connectors-ruby 23 | pipeline_file: ".buildkite/pipeline.yml" 24 | teams: 25 | enterprise-search: 26 | access_level: MANAGE_BUILD_AND_READ 27 | everyone: 28 | access_level: READ_ONLY 29 | -------------------------------------------------------------------------------- /config/connectors.yml.example: -------------------------------------------------------------------------------- 1 | # general metadata 2 | version: CHANGEME 3 | repository: git@github.com:elastic/connectors-ruby.git 4 | revision: main 5 | 6 | elasticsearch: 7 | cloud_id: CHANGEME 8 | hosts: http://localhost:9200 9 | api_key: CHANGEME 10 | retry_on_failure: 3 11 | request_timeout: 120 12 | disable_warnings: true 13 | trace: false 14 | log: false 15 | 16 | thread_pool: 17 | min_threads: 0 18 | max_threads: 5 19 | max_queue: 100 20 | 21 | log_level: info 22 | ecs_logging: true 23 | 24 | poll_interval: 3 25 | termination_timeout: 60 26 | heartbeat_interval: 1800 27 | job_cleanup_interval: 300 28 | 29 | native_mode: true 30 | connector_id: CHANGEME 31 | service_type: CHANGEME 32 | 33 | max_ingestion_queue_size: 500 34 | max_ingestion_queue_bytes: 5242880 35 | -------------------------------------------------------------------------------- /connectors_service.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | 4 | Gem::Specification.new do |s| 5 | s.name = 'connectors_service' 6 | s.version = File.read('VERSION').strip 7 | s.homepage = 'https://github.com/elastic/connectors-ruby' 8 | s.summary = 'Gem containing Elastic connectors service' 9 | s.description = '' 10 | s.authors = ['Elastic'] 11 | s.email = 'ent-search-dev@elastic.co' 12 | s.executables << 'connectors_service' 13 | s.executables << 'list_connectors' 14 | s.files = Dir['lib/**/*'] + %w[config/connectors.yml LICENSE NOTICE.txt] 15 | s.license = 'Elastic-2.0' 16 | Bundler.definition.dependencies.select do |dep| 17 | (dep.groups & [:test, :development]).empty? 18 | end.sort_by(&:name).each do |dep| 19 | if dep.latest_version? 20 | s.add_dependency dep.name 21 | else 22 | s.add_dependency dep.name, dep.requirement.as_list 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /connectors_utility.gemspec: -------------------------------------------------------------------------------- 1 | 2 | Gem::Specification.new do |s| 3 | s.name = 'connectors_utility' 4 | s.version = File.read('VERSION').strip 5 | s.homepage = 'https://github.com/elastic/connectors-ruby' 6 | s.summary = 'Gem containing shared Connector Services libraries' 7 | s.description = '' 8 | s.authors = ['Elastic'] 9 | s.metadata = { 10 | "revision" => `git rev-parse HEAD`.strip, 11 | "repository" => 'https://github.com/elastic/connectors-ruby' 12 | } 13 | s.email = 'ent-search-dev@elastic.co' 14 | s.files = %w[ 15 | LICENSE 16 | NOTICE.txt 17 | lib/connectors_utility.rb 18 | lib/utility/es_client.rb 19 | lib/utility/logger.rb 20 | lib/utility/bulk_queue.rb 21 | lib/utility/common.rb 22 | lib/utility/constants.rb 23 | lib/utility/cron.rb 24 | lib/utility/errors.rb 25 | lib/utility/es_client.rb 26 | lib/utility/environment.rb 27 | lib/utility/error_monitor.rb 28 | lib/utility/exception_tracking.rb 29 | lib/utility/extension_mapping_util.rb 30 | lib/utility/filtering.rb 31 | lib/utility/logger.rb 32 | lib/utility.rb 33 | lib/utility/elasticsearch/index/text_analysis_settings.rb 34 | lib/utility/elasticsearch/index/mappings.rb 35 | lib/utility/elasticsearch/index/language_data.yml 36 | lib/connectors/sync_status.rb 37 | lib/core/scheduler.rb 38 | lib/connectors/connector_status.rb 39 | lib/connectors/crawler/scheduler.rb 40 | lib/core/elastic_connector_actions.rb 41 | lib/core/connector_settings.rb 42 | lib/core/connector_job.rb 43 | lib/core/filtering/validation_status.rb 44 | lib/connectors/job_trigger_method.rb 45 | ] 46 | s.license = 'Elastic-2.0' 47 | s.add_dependency 'activesupport', '>= 5.2' 48 | s.add_dependency 'ecs-logging', '~> 1.0.0' 49 | s.add_dependency 'fugit', '~> 1.11', '>= 1.11.1' 50 | s.add_dependency 'mime-types', '~> 3.6' 51 | s.add_dependency 'tzinfo' 52 | s.add_dependency 'tzinfo-data' 53 | end 54 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 303 See Other 2 | 3 | Location: https://www.elastic.co/community/codeofconduct 4 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Connectors Contributor's Guide 2 | 3 | Thank you for your interest in contributing to Connectors! 4 | 5 | You may also want to read the [development guide](./DEVELOPING.md). 6 | 7 | ### Before you start 8 | 9 | * Prior to opening a pull request, please: 10 | * Read the entirety of this document 11 | * [Create an issue](https://github.com/elastic/connectors-ruby/issues) to discuss the scope of your proposal. 12 | * Sign the [Contributor License Agreement](https://www.elastic.co/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. 13 | * Run all tests locally, and ensure they are all passing 14 | * Please write simple code and concise documentation, when appropriate. 15 | 16 | 17 | ### Testing 18 | 19 | It is expected that any contribution will include unit tests. The linter and all unit tests must be passing in order to merge any pull request. Ensure that your tests are passing locally _before_ submitting a pull request. 20 | 21 | ```shell 22 | # ensure code standards 23 | make lint 24 | 25 | # run unit tests 26 | make test 27 | ``` 28 | 29 | ### Branching Strategy 30 | 31 | Our `main` branch holds the latest development code for the next release. If the next release will be a minor release, the expecation is that no breaking changes will be in `main`. If a change would be breaking, we need to put it behind a feature flag, or make it an opt-in change. We will only merge breaking PRs when we are ready to start working on the next major. 32 | 33 | All PRs should be created from a fork, to keep a clean set of branches on `origin`. 34 | 35 | Releases will be performed directly in `main` (or a minor branch for patches). 36 | 37 | We will create branches for all minor releases. 38 | -------------------------------------------------------------------------------- /docs/INTERNAL.md: -------------------------------------------------------------------------------- 1 | # Elastic Internal Documentation 2 | 3 | ### Testing locally with Enterprise Search and Kibana 4 | 5 | ##### Setup 6 | * clone [kibana](https://github.com/elastic/kibana) 7 | * `cd` into your kibana checkout 8 | * install kibana dependencies with: 9 | ```shell 10 | nvm use && yarn kbn clean && yarn kbn bootstrap 11 | ``` 12 | * clone [ent-search](https://github.com/elastic/ent-search/) 13 | * follow the ent-search [setup steps](https://github.com/elastic/ent-search/#set-up) 14 | 15 | ##### Start Elasticsearch 16 | * `cd` into your kibana checkout 17 | * start elasticsearch with: 18 | ```shell 19 | nvm use && yarn es snapshot -E xpack.security.authc.api_key.enabled=true 20 | ``` 21 | 22 | ##### Start Kibana 23 | * `cd` into your kibana checkout 24 | * start kibana with: 25 | ```shell 26 | nvm use && yarn start --no-base-path 27 | ``` 28 | 29 | ##### Start Enterprise Search 30 | * `cd` into your ent-search checkout 31 | * start Enterprise Search with: 32 | ```shell 33 | script/togo/development start 34 | ``` 35 | 36 | ##### Start Connectors 37 | * `cd` into your connectors checkout 38 | * run `make install` to get the latest dependencies 39 | * run `make run` to start Connectors. 40 | -------------------------------------------------------------------------------- /docs/RELEASING.md: -------------------------------------------------------------------------------- 1 | # Releasing the connector service 2 | 3 | > Note: This is for internal use within Elastic. Only Elastic members can release the connector service. 4 | 5 | The version scheme we use is **MAJOR.MINOR.PATCH.BUILD** and stored in the [VERSION](https://github.com/elastic/connectors-ruby/blob/main/VERSION) file at the root of this repository. 6 | 7 | ## RubyGem Account 8 | 9 | When releasing Gems, you will be asked for an Email and Password. Look into the Vault in the `ent-search-team/rubygem` secret. 10 | 11 | ## Unified release 12 | 13 | **MAJOR.MINOR.PATCH** should match the Elastic and Enterprise Search version it targets and the *BUILD* number should be set to **0** the day the Connector service release is created to be included with the Enterprise Search distribution. 14 | 15 | For example, when shipping for `8.1.2`, the version is `8.1.2.0`. 16 | 17 | To release the connector service: 18 | 19 | 1. Make sure all tests and linter pass with `make lint test` 20 | 2. Run `make release_service release_utility` 21 | 3. Set the [VERSION](../VERSION) file to the new/incremented version on the release branch 22 | 4. PR these changes to the appropriate connector service release branch 23 | 24 | Two Gems will be published to RubyGems: [connectors_service](https://rubygems.org/gems/connectors_service) and [connectors_utility](https://rubygems.org/gems/connectors_utility) 25 | 26 | > Note: you should be logged into rubygems (your_gem_account_name is the ent-search email from vault under `ent-search-team/rubygem`) 27 | 28 | ```shell 29 | curl -u {your_gem_account_name} https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials 30 | Enter host password for user '{your_gem_account_name}': {your_password} 31 | 32 | ``` 33 | 34 | Take care of the branching (minor releases only): 35 | 36 | - Increment the VERSION on main to match the next minor release 37 | - Create a new maintenance branch 38 | - Make sure the `.backportrc.json` is updated. The previous minor is added to `targetBranchChoices` and the new minor is used in `branchLabelMapping` 39 | 40 | After the Elastic unified release is complete 41 | 42 | - Update the **BUILD** version ([example PR](https://github.com/elastic/connectors-ruby/pull/81)). Note that the Connectors project does not immediately bump to the next **PATCH** version. That won't happen until that patch release's FF date. 43 | 44 | ## In-Between releases 45 | 46 | Sometimes, we need to release connector service independently from Enterprise Search. 47 | For instance, if someone wants to use the project as an HTTP Service and we have a bug fix we want them to have as soon as possible. 48 | 49 | In that case, we increment the **BUILD** number, and follow the same release process than for the unified release. 50 | 51 | So `8.1.2.1`, `8.1.2.2` etc. On the next unified release, the version will be bumped to the next **PATCH** value, and **BUILD** set to `0` 52 | 53 | **In-Between releases should never introduce new features since they will eventually be merged into the next PATCH release. New features are always done in Developer previews** 54 | 55 | ## Developer preview releases 56 | 57 | For developer previews, we are adding a `pre` tag using an ISO8601 date. You can use `make build_service build_utility`, and the gems will be generated in directory `.gems/` 58 | -------------------------------------------------------------------------------- /docs/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Thanks for your interest in the security of our products. Our security policy can be found at [https://www.elastic.co/community/security](https://www.elastic.co/community/security). 4 | 5 | ## Reporting a Vulnerability 6 | Please send security vulnerability reports to security@elastic.co. 7 | -------------------------------------------------------------------------------- /docs/SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting Support 2 | 3 | ### Official Support Services 4 | If you have an Elastic subscription, you are entitled to Support services. See our welcome page for [working with our support team](https://www.elastic.co/support/welcome). 5 | 6 | ### Where do I report issues with Connectors? 7 | If something is not working as expected, please open an [issue](https://github.com/elastic/connectors-ruby/issues/new). 8 | 9 | ### Where else can I go to get help? 10 | The Ingestion team at Elastic maintains this repository and is happy to help. Try posting your question to the [Elastic discuss forums](https://discuss.elastic.co/c/enterprise-search/84). Be sure to mention that you're using Connectors and also let us know what service type you're trying to use, and any errors/issues you are encountering. You can also find us in the `#enterprise-search` channel of the [Elastic Community Slack](http://elasticstack.slack.com). 11 | -------------------------------------------------------------------------------- /docs/TERMINOLOGY.md: -------------------------------------------------------------------------------- 1 | # Terminology 2 | 3 | - `connector client` - specific light-weight connector implementation, open-code. Connector clients can be built by Elastic or Community. 4 | - `native connector` - a connector client built and supported by Elastic, made available by default on Elastic Cloud. 5 | - `connector service` - the app that runs the asynchronous loop that calls Elasticsearch on a regular basis to check whether syncs need to happen. 6 | - `connector packages` - a previous version of the connector clients specific to Workplace Search. Refer to the [8.3 branch](https://github.com/elastic/connectors-ruby/tree/8.3) if you're looking for connector packages. Also, read more about them in the [custom connector packages guide](https://www.elastic.co/guide/en/workplace-search/current/custom-connector-package.html). 7 | - `data source` - file/database/service that provides data to be ingested into Elasticsearch. 8 | - `connector index` - `.elastic-connectors`, the index to hold connector definitions, e.g. name, service type, configuration, scheduling, etc.. 9 | - `connector job index` - `.elastic-connectors-sync-jobs`, the index to hold sync job history. 10 | - `connector content index` - The index to hold data for a connector. It has prefix `search-`, and is set in `index_name` of `connector index`. 11 | -------------------------------------------------------------------------------- /lib/app/app.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | $LOAD_PATH << '../' 10 | 11 | require 'app/dispatcher' 12 | require 'app/config' 13 | require 'app/preflight_check' 14 | require 'utility/environment' 15 | require 'utility/logger' 16 | 17 | module App 18 | Utility::Environment.set_execution_environment(App::Config) do 19 | App::PreflightCheck.run! 20 | 21 | # set exit hook 22 | Kernel.at_exit { App::Dispatcher.shutdown! } 23 | 24 | App::Dispatcher.start! 25 | rescue App::PreflightCheck::CheckFailure => e 26 | Utility::Logger.error("Preflight check failed: #{e.message}") 27 | exit(-1) 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/app/menu.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | # 8 | require 'remedy' 9 | 10 | module App 11 | class Menu 12 | attr_reader :items 13 | attr_reader :title 14 | attr_reader :index 15 | 16 | def initialize(title, items) 17 | super() 18 | @index = 0 19 | @title = title 20 | @items = items.map.with_index do |item, i| 21 | item.is_a?(String) ? MenuItem.new(item, nil, i == 0) : MenuItem.new(item[:command], item[:hint], i == 0) 22 | end 23 | end 24 | 25 | def select_item(index) 26 | @index = index 27 | @items.each_with_index { |item, i| item.selected = (i == index) } 28 | display 29 | end 30 | 31 | def select_command 32 | display 33 | interaction = Remedy::Interaction.new 34 | interaction.loop do |key| 35 | if key.nil? 36 | break 37 | end 38 | case key.to_s.to_sym 39 | when :down 40 | index = @index + 1 41 | index = 0 if index >= @items.size 42 | select_item(index) 43 | when :up 44 | index = @index - 1 45 | index = 0 if index < 0 46 | select_item(index) 47 | when :control_m 48 | return @items[@index].command 49 | end 50 | end 51 | end 52 | 53 | private 54 | 55 | def display 56 | clear_screen 57 | puts(title) 58 | @items.each do |item| 59 | print(item.selected ? '--> ' : ' ') 60 | puts item.hint.present? ? "#{item.hint} (#{item.command})" : item.command 61 | end 62 | end 63 | 64 | def clear_screen 65 | system('clear') || system('cls') 66 | end 67 | 68 | def read_char 69 | STDIN.echo = false 70 | STDIN.raw! 71 | 72 | input = STDIN.getc 73 | if input == "\e" 74 | begin 75 | input << STDIN.read_nonblock(3) 76 | rescue StandardError 77 | nil 78 | end 79 | begin 80 | input << STDIN.read_nonblock(2) 81 | rescue StandardError 82 | nil 83 | end 84 | end 85 | input 86 | ensure 87 | STDIN.echo = true 88 | STDIN.cooked! 89 | end 90 | end 91 | 92 | class MenuItem 93 | attr_reader :command 94 | attr_reader :hint 95 | attr_accessor :selected 96 | 97 | def initialize(command, hint = nil, selected = false) 98 | super() 99 | @command = command 100 | @hint = hint 101 | @selected = selected 102 | end 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/app/version.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | require 'app/config' 7 | 8 | module App 9 | VERSION = App::Config[:version] 10 | end 11 | -------------------------------------------------------------------------------- /lib/connectors.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'utility' 8 | 9 | def required_path(absolute_path) 10 | absolute_dir = File.dirname(absolute_path) 11 | relative_dir = absolute_dir.sub(/.*lib\/connectors/, 'connectors') 12 | name = File.basename(absolute_path, '.rb') 13 | File.join(relative_dir, name) 14 | end 15 | 16 | Dir[File.join(__dir__, 'connectors/**/*.rb')].each { |f| require required_path(f) } 17 | -------------------------------------------------------------------------------- /lib/connectors/base/adapter.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'active_support/core_ext/array/wrap' 8 | require 'active_support/core_ext/numeric/time' 9 | require 'active_support/core_ext/object/deep_dup' 10 | require 'active_support/core_ext/object/json' 11 | require 'utility' 12 | require 'utility/extension_mapping_util' 13 | require 'date' 14 | require 'json' 15 | require 'mime-types' 16 | 17 | module Connectors 18 | module Base 19 | class Adapter 20 | def self.fields_to_preserve 21 | @fields_to_preserve ||= ['body'] 22 | .concat(Utility::Constants::THUMBNAIL_FIELDS) 23 | .concat(Utility::Constants::SUBEXTRACTOR_RESERVED_FIELDS) 24 | .map(&:freeze) 25 | .freeze 26 | end 27 | 28 | def self.generate_id_helpers(method_prefix, id_prefix) 29 | define_singleton_method("#{method_prefix}_id_to_es_id") do |id| 30 | "#{id_prefix}_#{id}" 31 | end 32 | 33 | define_singleton_method("es_id_is_#{method_prefix}_id?") do |es_id| 34 | regex_match = /#{id_prefix}_(.+)$/.match(es_id) 35 | regex_match.present? && regex_match.size == 2 36 | end 37 | 38 | define_singleton_method("es_id_to_#{method_prefix}_id") do |es_id| 39 | regex_match = /#{id_prefix}_(.+)$/.match(es_id) 40 | 41 | raise ArgumentError, "Invalid id #{es_id} for source with method prefix #{method_prefix}." if regex_match.nil? || regex_match.length != 2 42 | regex_match[1] 43 | end 44 | end 45 | 46 | def self.mime_type_for_file(file_name) 47 | ruby_detected_type = MIME::Types.type_for(file_name) 48 | return ruby_detected_type.first.simplified if ruby_detected_type.present? 49 | extension = extension_for_file(file_name) 50 | Utility::ExtensionMappingUtil.get_mime_types(extension)&.first 51 | end 52 | 53 | def self.extension_for_file(file_name) 54 | File.extname(file_name.downcase).delete_prefix!('.') 55 | end 56 | 57 | def self.strip_file_extension(file_name) 58 | File.basename(file_name, File.extname(file_name)) 59 | end 60 | 61 | def self.normalize_enum(enum) 62 | enum&.to_s&.downcase 63 | end 64 | 65 | def self.normalize_date(date) 66 | return nil if date.blank? 67 | 68 | case date 69 | when Date, Time, DateTime, ActiveSupport::TimeWithZone 70 | date.to_datetime.rfc3339 71 | else 72 | begin 73 | Time.zone.parse(date).to_datetime.rfc3339 74 | rescue ArgumentError, TypeError => e 75 | Utility::ExceptionTracking.capture_exception(e) 76 | nil 77 | end 78 | end 79 | end 80 | 81 | def self.normalize_path(path) 82 | return nil if path.blank? 83 | return path if path.start_with?('/') 84 | "/#{path}" 85 | end 86 | 87 | def self.url_to_path(url) 88 | return nil if url.blank? 89 | uri = URI(url) 90 | return nil if uri.scheme.blank? 91 | normalize_path(uri.path) 92 | rescue URI::InvalidURIError, ArgumentError 93 | nil 94 | end 95 | 96 | def self.es_document_from_configured_object_base(object_type:, object:, fields:) 97 | object_as_json = object.as_json 98 | 99 | adapted_object = { 100 | :type => normalize_enum(object_type) 101 | } 102 | 103 | fields.each do |field_data| 104 | remote_field_name = field_data.fetch(:remote) 105 | 106 | value = object_as_json[remote_field_name] 107 | value = object_as_json.dig(*remote_field_name.split('.')) if value.blank? 108 | next if value.nil? 109 | 110 | adapted_object[field_data.fetch(:target)] = value 111 | end 112 | 113 | adapted_object.symbolize_keys 114 | end 115 | 116 | delegate :normalize_enum, :normalize_date, :normalize_path, :to => :class 117 | end 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /lib/connectors/base/custom_client.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'faraday' 8 | require 'httpclient' 9 | require 'active_support/core_ext/array/wrap' 10 | require 'active_support/core_ext/numeric/time' 11 | require 'active_support/core_ext/object/deep_dup' 12 | require 'utility' 13 | require 'date' 14 | 15 | module Connectors 16 | module Base 17 | class CustomClient 18 | attr_reader :base_url, :middleware, :ensure_fresh_auth 19 | 20 | MAX_RETRIES = 5 21 | 22 | def initialize(base_url: nil, ensure_fresh_auth: nil) 23 | @base_url = base_url 24 | @ensure_fresh_auth = ensure_fresh_auth 25 | middleware! 26 | end 27 | 28 | def middleware! 29 | @middleware = Array.wrap(additional_middleware) 30 | @middleware += Array.wrap(default_middleware) 31 | @middleware.compact! 32 | end 33 | 34 | def additional_middleware 35 | [] # define as needed in subclass 36 | end 37 | 38 | def default_middleware 39 | [[Faraday::Request::Retry, retry_config]] 40 | end 41 | 42 | def retry_config 43 | { 44 | :retry_statuses => [429], 45 | :backoff_factor => 2, 46 | :max => MAX_RETRIES, 47 | :interval => 0.05 48 | } 49 | end 50 | 51 | [ 52 | :delete, 53 | :get, 54 | :head, 55 | :options, 56 | :patch, 57 | :post, 58 | :put, 59 | ].each do |http_verb| 60 | define_method http_verb do |*args, &block| 61 | ensure_fresh_auth.call(self) if ensure_fresh_auth.present? 62 | http_client.public_send(http_verb, *args, &block) 63 | end 64 | end 65 | 66 | def http_client! 67 | @http_client = nil 68 | http_client 69 | end 70 | 71 | def http_client 72 | @http_client ||= Faraday.new(base_url) do |faraday| 73 | middleware.each do |middleware_config| 74 | faraday.use(*middleware_config) 75 | end 76 | 77 | faraday.adapter :httpclient 78 | end 79 | end 80 | 81 | private 82 | 83 | # https://github.com/lostisland/faraday/blob/b09c6db31591dd1a58fffcc0979b0c7d96b5388b/lib/faraday/connection.rb#L171 84 | METHODS_WITH_BODY = [:post, :put, :patch].freeze 85 | 86 | def send_body?(method) 87 | METHODS_WITH_BODY.include?(method.to_sym) 88 | end 89 | 90 | def request_with_throttling(method, url, options = {}) 91 | response = 92 | if send_body?(method) 93 | public_send(method, url, options[:body], options[:headers]) 94 | else 95 | public_send(method, url, options[:params], options[:headers]) 96 | end 97 | 98 | if response.status == 429 99 | retry_after = response.headers['Retry-After'] 100 | multiplier = options.fetch(:retry_mulitplier, 1) 101 | retry_after_secs = (retry_after.is_a?(Array) ? retry_after.first.to_i : retry_after.to_i) * multiplier 102 | retry_after_secs = 60 if retry_after_secs <= 0 103 | Utility::Logger.warn("Exceeded #{self.class} request limits. Going to sleep for #{retry_after_secs} seconds") 104 | raise Utility::ThrottlingError.new(:suspend_until => DateTime.now + retry_after_secs.seconds, :cursors => options[:cursors]) 105 | else 106 | response 107 | end 108 | end 109 | end 110 | end 111 | end 112 | -------------------------------------------------------------------------------- /lib/connectors/connector_status.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Connectors 10 | class ConnectorStatus 11 | CREATED = 'created' 12 | NEEDS_CONFIGURATION = 'needs_configuration' 13 | CONFIGURED = 'configured' 14 | CONNECTED = 'connected' 15 | ERROR = 'error' 16 | 17 | STATUSES = [ 18 | CREATED, 19 | NEEDS_CONFIGURATION, 20 | CONFIGURED, 21 | CONNECTED, 22 | ERROR 23 | ] 24 | 25 | STATUSES_ALLOWING_SYNC = [ 26 | CONFIGURED, 27 | CONNECTED, 28 | ERROR 29 | ] 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /lib/connectors/crawler/scheduler.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/scheduler' 10 | require 'core/connector_settings' 11 | require 'core/elastic_connector_actions' 12 | require 'utility/logger' 13 | require 'utility/exception_tracking' 14 | 15 | module Connectors 16 | module Crawler 17 | class Scheduler < Core::Scheduler 18 | def connector_settings 19 | Core::ConnectorSettings.fetch_crawler_connectors || [] 20 | rescue StandardError => e 21 | Utility::ExceptionTracking.log_exception(e, 'Could not retrieve Crawler connectors due to unexpected error.') 22 | [] 23 | end 24 | 25 | def when_triggered 26 | loop do 27 | time_at_poll_start = Time.now # grab the time right before we iterate over all connectors 28 | connector_settings.each do |cs| 29 | # crawler only supports :sync 30 | if sync_triggered?(cs, time_at_poll_start) 31 | yield cs, :sync, nil 32 | next 33 | end 34 | 35 | schedule_key = custom_schedule_triggered(cs, time_at_poll_start) 36 | yield cs, :sync, schedule_key if schedule_key 37 | end 38 | rescue *Utility::AUTHORIZATION_ERRORS => e 39 | log_authorization_error(e) 40 | rescue StandardError => e 41 | log_standard_error(e) 42 | ensure 43 | if @is_shutting_down 44 | break 45 | end 46 | sleep_for_poll_interval 47 | end 48 | end 49 | 50 | private 51 | 52 | def connector_registered?(service_type) 53 | service_type == 'elastic-crawler' 54 | end 55 | 56 | # custom scheduling has no ordering, so the first-found schedule is returned 57 | def custom_schedule_triggered(cs, time_at_poll_start) 58 | cs.custom_scheduling_settings.each do |key, custom_scheduling| 59 | identifier = "#{cs.formatted} - #{custom_scheduling[:name]}" 60 | if schedule_triggered?(custom_scheduling, identifier, time_at_poll_start) 61 | return key 62 | end 63 | end 64 | 65 | nil 66 | end 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /lib/connectors/example/attachments/first_attachment.txt: -------------------------------------------------------------------------------- 1 | e1xydGYxXGFuc2lcYW5zaWNwZzEyNTJcY29jb2FydGYyNjM5Clxjb2NvYXRleHRzY2FsaW5nMFxjb2NvYXBsYXRmb3JtMHtcZm9udHRibFxmMFxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYS1Cb2xkO1xmMVxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYTtcZjJcZnN3aXNzXGZjaGFyc2V0MCBIZWx2ZXRpY2EtT2JsaXF1ZTsKfQp7XGNvbG9ydGJsO1xyZWQyNTVcZ3JlZW4yNTVcYmx1ZTI1NTt9CntcKlxleHBhbmRlZGNvbG9ydGJsOzt9ClxwYXBlcncxMTkwMFxwYXBlcmgxNjg0MFxtYXJnbDE0NDBcbWFyZ3IxNDQwXHZpZXd3MTE1MjBcdmlld2g4NDAwXHZpZXdraW5kMApccGFyZFx0eDU2Nlx0eDExMzNcdHgxNzAwXHR4MjI2N1x0eDI4MzRcdHgzNDAxXHR4Mzk2OFx0eDQ1MzVcdHg1MTAyXHR4NTY2OVx0eDYyMzZcdHg2ODAzXHBhcmRpcm5hdHVyYWxccGFydGlnaHRlbmZhY3RvcjAKClxmMFxiXGZzMjQgXGNmMCBFeGFtcGxlIEF0dGFjaG1lbnQgMDFcClwKClxmMVxiMCBUaGlzIGlzIHRoZQpcZjJcaSAgZmlyc3QgClxmMVxpMCBvZiBcdWwgdGhyZWUgXHVsbm9uZSBleGFtcGxlIGF0dGFjaG1lbnRzfQ== -------------------------------------------------------------------------------- /lib/connectors/example/attachments/second_attachment.txt: -------------------------------------------------------------------------------- 1 | e1xydGYxXGFuc2lcYW5zaWNwZzEyNTJcY29jb2FydGYyNjM5Clxjb2NvYXRleHRzY2FsaW5nMFxjb2NvYXBsYXRmb3JtMHtcZm9udHRibFxmMFxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYS1Cb2xkO1xmMVxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYTtcZjJcZnN3aXNzXGZjaGFyc2V0MCBIZWx2ZXRpY2EtT2JsaXF1ZTsKfQp7XGNvbG9ydGJsO1xyZWQyNTVcZ3JlZW4yNTVcYmx1ZTI1NTt9CntcKlxleHBhbmRlZGNvbG9ydGJsOzt9ClxwYXBlcncxMTkwMFxwYXBlcmgxNjg0MFxtYXJnbDE0NDBcbWFyZ3IxNDQwXHZpZXd3MTE1MjBcdmlld2g4NDAwXHZpZXdraW5kMApccGFyZFx0eDU2Nlx0eDExMzNcdHgxNzAwXHR4MjI2N1x0eDI4MzRcdHgzNDAxXHR4Mzk2OFx0eDQ1MzVcdHg1MTAyXHR4NTY2OVx0eDYyMzZcdHg2ODAzXHBhcmRpcm5hdHVyYWxccGFydGlnaHRlbmZhY3RvcjAKClxmMFxiXGZzMjQgXGNmMCBFeGFtcGxlIEF0dGFjaG1lbnQgMDJcClwKClxmMVxiMCBUaGlzIGlzIHRoZQpcZjJcaSAgc2Vjb25kIApcZjFcaTAgb2YgXHVsIHRocmVlIFx1bG5vbmUgZXhhbXBsZSBhdHRhY2htZW50c30= -------------------------------------------------------------------------------- /lib/connectors/example/attachments/third_attachment.txt: -------------------------------------------------------------------------------- 1 | e1xydGYxXGFuc2lcYW5zaWNwZzEyNTJcY29jb2FydGYyNjM5Clxjb2NvYXRleHRzY2FsaW5nMFxjb2NvYXBsYXRmb3JtMHtcZm9udHRibFxmMFxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYS1Cb2xkO1xmMVxmc3dpc3NcZmNoYXJzZXQwIEhlbHZldGljYTtcZjJcZnN3aXNzXGZjaGFyc2V0MCBIZWx2ZXRpY2EtT2JsaXF1ZTsKfQp7XGNvbG9ydGJsO1xyZWQyNTVcZ3JlZW4yNTVcYmx1ZTI1NTt9CntcKlxleHBhbmRlZGNvbG9ydGJsOzt9ClxwYXBlcncxMTkwMFxwYXBlcmgxNjg0MFxtYXJnbDE0NDBcbWFyZ3IxNDQwXHZpZXd3MTE1MjBcdmlld2g4NDAwXHZpZXdraW5kMApccGFyZFx0eDU2Nlx0eDExMzNcdHgxNzAwXHR4MjI2N1x0eDI4MzRcdHgzNDAxXHR4Mzk2OFx0eDQ1MzVcdHg1MTAyXHR4NTY2OVx0eDYyMzZcdHg2ODAzXHBhcmRpcm5hdHVyYWxccGFydGlnaHRlbmZhY3RvcjAKClxmMFxiXGZzMjQgXGNmMCBFeGFtcGxlIEF0dGFjaG1lbnQgMDNcClwKClxmMVxiMCBUaGlzIGlzIHRoZQpcZjJcaSAgdGhpcmQgClxmMVxpMCBvZiBcdWwgdGhyZWUgXHVsbm9uZSBleGFtcGxlIGF0dGFjaG1lbnRzfQ== -------------------------------------------------------------------------------- /lib/connectors/example/connector.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/base/connector' 10 | require 'connectors/example/example_advanced_snippet_validator' 11 | require 'core/filtering/validation_status' 12 | require 'utility' 13 | 14 | module Connectors 15 | module Example 16 | class Connector < Connectors::Base::Connector 17 | def self.service_type 18 | 'example' 19 | end 20 | 21 | def self.display_name 22 | 'Example Connector' 23 | end 24 | 25 | # Field 'Foo' won't have a default value. Field 'Bar' will have the default value 'Value'. 26 | def self.configurable_fields 27 | { 28 | 'foo' => { 29 | 'label' => 'Foo', 30 | 'value' => nil 31 | }, 32 | :bar => { 33 | :label => 'Bar', 34 | :value => 'Value' 35 | } 36 | } 37 | end 38 | 39 | def initialize(configuration: {}, job_description: nil) 40 | super 41 | end 42 | 43 | def do_health_check 44 | # Do the health check by trying to access 3rd-party system just to verify that everything is set up properly. 45 | # 46 | # To emulate unhealthy 3rd-party system situation, uncomment the following line: 47 | # raise 'something went wrong' 48 | end 49 | 50 | def self.advanced_snippet_validators 51 | ExampleAdvancedSnippetValidator 52 | end 53 | 54 | def yield_documents 55 | attachments = [ 56 | load_attachment('first_attachment.txt'), 57 | load_attachment('second_attachment.txt'), 58 | load_attachment('third_attachment.txt'), 59 | ] 60 | 61 | attachments.each_with_index do |att, index| 62 | data = { id: (index + 1).to_s, name: "example document #{index + 1}", _attachment: File.read(att) } 63 | 64 | # Uncomment one of these two lines to simulate longer running sync jobs 65 | # 66 | # sleep(rand(10..60).seconds) 67 | # sleep(rand(1..10).minutes) 68 | 69 | yield data 70 | end 71 | end 72 | 73 | private 74 | 75 | def load_attachment(path) 76 | attachment_dir = "#{File.dirname(__FILE__)}/attachments" 77 | attachment_path = "#{attachment_dir}/#{path}" 78 | 79 | unless File.exist?(attachment_path) 80 | raise "Attachment at location '#{attachment_path}' doesn't exist. Attachments should be located under #{attachment_dir}" 81 | end 82 | 83 | File.open(attachment_path) 84 | end 85 | end 86 | end 87 | end 88 | -------------------------------------------------------------------------------- /lib/connectors/example/example_advanced_snippet_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/advanced_snippet/advanced_snippet_validator' 10 | require 'core/filtering/validation_status' 11 | 12 | module Connectors 13 | module Example 14 | class ExampleAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator 15 | 16 | def is_snippet_valid 17 | # TODO: real filtering validation will follow later 18 | errors = [ 19 | { 20 | :ids => ['missing-implementation'], 21 | :messages => ['Filtering is not implemented yet for the example connector'] 22 | } 23 | ] 24 | 25 | validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty? 26 | { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } 27 | else 28 | { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } 29 | end 30 | log_validation_result(validation_result) 31 | validation_result 32 | end 33 | 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/connectors/gitlab/adapter.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 5 | # or more contributor license agreements. Licensed under the Elastic License; 6 | # you may not use this file except in compliance with the Elastic License. 7 | # 8 | require 'hashie/mash' 9 | require 'active_support/core_ext/hash/indifferent_access' 10 | require 'connectors/base/adapter' 11 | 12 | module Connectors 13 | module GitLab 14 | class Adapter < Connectors::Base::Adapter 15 | # it's important to have this to generate ID converters between the GitLab ID and the 16 | # Enterprise Search document ID. The Enterprise Search document ID will be prefixed with the service type, 17 | # in our case - `gitlab`. 18 | generate_id_helpers :gitlab, 'gitlab' 19 | 20 | def self.to_es_document(type, source_doc) 21 | source_doc = source_doc.with_indifferent_access 22 | result = {} 23 | case type.to_sym 24 | when :project 25 | result.merge!( 26 | { 27 | :url => source_doc[:web_url], 28 | :body => source_doc[:description], 29 | :title => source_doc[:name], 30 | :created_at => source_doc[:created_at], 31 | :last_modified_at => source_doc[:last_activity_at], 32 | :visibility => source_doc[:visibility], 33 | :namespace => if source_doc[:namespace].nil? 34 | nil 35 | else 36 | source_doc[:namespace][:name] 37 | end 38 | } 39 | ) 40 | else 41 | # don't remap 42 | result.merge!(source_doc) 43 | end 44 | result[:id] = gitlab_id_to_es_id(source_doc[:id]) 45 | result[:type] = type 46 | result 47 | end 48 | end 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/connectors/gitlab/connector.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | require 'active_support/core_ext/hash/indifferent_access' 9 | 10 | require 'connectors/base/connector' 11 | require 'connectors/gitlab/extractor' 12 | require 'connectors/gitlab/custom_client' 13 | require 'connectors/gitlab/adapter' 14 | require 'connectors/gitlab/gitlab_advanced_snippet_validator' 15 | require 'core/ingestion' 16 | 17 | module Connectors 18 | module GitLab 19 | class Connector < Connectors::Base::Connector 20 | def self.service_type 21 | 'gitlab' 22 | end 23 | 24 | def self.display_name 25 | 'GitLab Connector' 26 | end 27 | 28 | def self.configurable_fields 29 | { 30 | :base_url => { 31 | :label => 'Base URL', 32 | :value => Connectors::GitLab::DEFAULT_BASE_URL 33 | }, 34 | :api_key => { 35 | :label => 'API Key' 36 | } 37 | } 38 | end 39 | 40 | def self.advanced_snippet_validators 41 | GitLabAdvancedSnippetValidator 42 | end 43 | 44 | def initialize(configuration: {}, job_description: nil) 45 | super 46 | 47 | @extractor = Connectors::GitLab::Extractor.new( 48 | :base_url => @configuration.dig(:base_url, :value), 49 | :api_token => @configuration.dig(:api_token, :value) 50 | ) 51 | end 52 | 53 | def yield_documents 54 | next_page_link = nil 55 | loop do 56 | next_page_link = @extractor.yield_projects_page(next_page_link) do |projects_chunk| 57 | projects_chunk.each do |project| 58 | yield Connectors::GitLab::Adapter.to_es_document(:project, project) 59 | end 60 | end 61 | break unless next_page_link.present? 62 | end 63 | end 64 | 65 | private 66 | 67 | def do_health_check 68 | @extractor.health_check 69 | end 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/connectors/gitlab/custom_client.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | require 'faraday_middleware/response/follow_redirects' 7 | require 'connectors/base/custom_client' 8 | require 'utility/middleware/bearer_auth' 9 | require 'utility/middleware/basic_auth' 10 | require 'utility/middleware/restrict_hostnames' 11 | 12 | require 'app/config' 13 | 14 | module Connectors 15 | module GitLab 16 | DEFAULT_BASE_URL = 'https://gitlab.com/api/v4' 17 | 18 | class CustomClient < Connectors::Base::CustomClient 19 | attr_reader :api_token 20 | 21 | class ClientError < StandardError 22 | attr_reader :status_code, :endpoint, :api_token 23 | 24 | def initialize(status_code, endpoint) 25 | @status_code = status_code 26 | @endpoint = endpoint 27 | end 28 | end 29 | 30 | def initialize(base_url:, api_token:, ensure_fresh_auth: nil) 31 | @api_token = api_token 32 | super(:base_url => base_url || DEFAULT_BASE_URL, :ensure_fresh_auth => ensure_fresh_auth) 33 | end 34 | 35 | def additional_middleware 36 | [ 37 | ::FaradayMiddleware::FollowRedirects, 38 | [Utility::Middleware::RestrictHostnames, { :allowed_hosts => [base_url, DEFAULT_BASE_URL] }], 39 | [Utility::Middleware::BearerAuth, { :bearer_auth_token => api_token }] 40 | ] 41 | end 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/connectors/gitlab/extractor.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'hashie' 10 | require 'json' 11 | require 'rack/utils' 12 | require 'active_support/core_ext/hash/indifferent_access' 13 | require 'connectors/gitlab/custom_client' 14 | 15 | module Connectors 16 | module GitLab 17 | class Extractor 18 | PAGE_SIZE = 100 # max is 100 19 | 20 | def initialize(base_url: nil, api_token: nil, owned_only: true) 21 | super() 22 | @base_url = base_url 23 | @api_token = api_token 24 | # only get projects that user owns 25 | @owned_only = owned_only 26 | end 27 | 28 | def yield_projects_page(next_page_link = nil) 29 | query_params = { 30 | :pagination => :keyset, 31 | :per_page => PAGE_SIZE, 32 | :order_by => :id, 33 | :sort => :desc, 34 | :owned => @owned_only 35 | } 36 | 37 | if next_page_link.present? 38 | if (matcher = /(https?:[^>]*)/.match(next_page_link)) 39 | clean_query = URI.parse(matcher.captures[0]).query 40 | query_params = Rack::Utils.parse_query(clean_query) 41 | else 42 | raise "Next page link has unexpected format: #{next_page_link}" 43 | end 44 | end 45 | response = client.get('projects', query_params) 46 | 47 | projects_chunk = JSON.parse(response.body) 48 | yield projects_chunk 49 | 50 | # return next link 51 | response.headers['Link'] || nil 52 | end 53 | 54 | def health_check 55 | # let's do a simple call to get the current user 56 | response = client.get('user') 57 | unless response.present? && response.status == 200 58 | raise "Health check failed with response status #{response.status} and body #{response.body}" 59 | end 60 | end 61 | 62 | private 63 | 64 | def client 65 | @client ||= Connectors::GitLab::CustomClient.new(base_url: @base_url, api_token: @api_token) 66 | end 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/advanced_snippet/advanced_snippet_validator' 10 | 11 | module Connectors 12 | module GitLab 13 | class GitLabAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator 14 | 15 | def is_snippet_valid 16 | # TODO: real filtering validation will follow later 17 | errors = [ 18 | { 19 | :ids => ['missing-implementation'], 20 | :messages => ['Filtering is not implemented yet for the GitLab connector'] 21 | } 22 | ] 23 | 24 | if @advanced_snippet.present? && !@advanced_snippet.empty? 25 | { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } 26 | else 27 | { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } 28 | end 29 | end 30 | 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/connectors/job_trigger_method.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Connectors 10 | class JobTriggerMethod 11 | ON_DEMAND = 'on_demand' 12 | SCHEDULED = 'scheduled' 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/advanced_snippet/advanced_snippet_against_schema_validator' 10 | require 'connectors/mongodb/mongo_advanced_snippet_schema' 11 | 12 | module Connectors 13 | module MongoDB 14 | class MongoAdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetAgainstSchemaValidator 15 | 16 | def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA) 17 | super 18 | end 19 | 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/transform/filter_transformer' 10 | require 'active_support' 11 | 12 | module Connectors 13 | module MongoDB 14 | class MongoAdvancedSnippetSnakeCaseTransformer < Core::Filtering::Transform::FilterTransformer 15 | 16 | def initialize(advanced_snippet = {}) 17 | super 18 | 19 | @advanced_snippet = advanced_snippet 20 | @transformation = ->(snippet) { snake_case_filter(snippet) } 21 | end 22 | 23 | private 24 | 25 | def snake_case_filter(advanced_snippet, transformed_filter = {}) 26 | advanced_snippet&.each do |key, value| 27 | snake_case_key = key.to_s.underscore 28 | 29 | value = value.is_a?(Hash) ? snake_case_filter(value, {}) : value 30 | 31 | if value.is_a?(Array) 32 | new_entries = [] 33 | 34 | value.each do |entry| 35 | new_entry = entry.is_a?(Hash) ? snake_case_filter(entry, {}) : entry 36 | new_entries.push(new_entry) 37 | end 38 | 39 | value = new_entries 40 | end 41 | 42 | transformed_filter[snake_case_key] = value 43 | end 44 | 45 | transformed_filter 46 | end 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /lib/connectors/registry.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | module Connectors 8 | class Factory 9 | attr_reader :connectors 10 | 11 | def initialize 12 | @connectors = {} 13 | end 14 | 15 | def register(name, klass) 16 | @connectors[name] = klass 17 | end 18 | 19 | def registered?(name) 20 | @connectors.has_key?(name) 21 | end 22 | 23 | def connector_class(name) 24 | @connectors[name] 25 | end 26 | 27 | def connector(name, configuration, job_description: nil) 28 | klass = connector_class(name) 29 | if klass.present? 30 | return klass.new(configuration: configuration, job_description: job_description) 31 | end 32 | raise "Connector #{name} is not yet registered. You need to register it before use" 33 | end 34 | 35 | def registered_connectors 36 | @connectors.keys.sort 37 | end 38 | end 39 | 40 | REGISTRY = Factory.new 41 | 42 | require_relative './example/connector' 43 | REGISTRY.register(Connectors::Example::Connector.service_type, Connectors::Example::Connector) 44 | 45 | # loading plugins (might replace this with a directory scan and conventions on names) 46 | require_relative './gitlab/connector' 47 | 48 | REGISTRY.register(Connectors::GitLab::Connector.service_type, Connectors::GitLab::Connector) 49 | 50 | require_relative 'mongodb/connector' 51 | REGISTRY.register(Connectors::MongoDB::Connector.service_type, Connectors::MongoDB::Connector) 52 | end 53 | -------------------------------------------------------------------------------- /lib/connectors/sync_status.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Connectors 10 | class SyncStatus 11 | PENDING = 'pending' 12 | IN_PROGRESS = 'in_progress' 13 | CANCELING = 'canceling' 14 | CANCELED = 'canceled' 15 | SUSPENDED = 'suspended' 16 | COMPLETED = 'completed' 17 | ERROR = 'error' 18 | 19 | STATUSES = [ 20 | PENDING, 21 | IN_PROGRESS, 22 | CANCELING, 23 | CANCELED, 24 | SUSPENDED, 25 | COMPLETED, 26 | ERROR 27 | ] 28 | 29 | PENDING_STATUSES = [ 30 | PENDING, 31 | SUSPENDED 32 | ] 33 | 34 | ACTIVE_STATUSES = [ 35 | IN_PROGRESS, 36 | CANCELING 37 | ] 38 | 39 | TERMINAL_STATUSES = [ 40 | CANCELED, 41 | COMPLETED, 42 | ERROR 43 | ] 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/connectors/tolerable_error_helper.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'utility/logger' 8 | require 'utility/exception_tracking' 9 | require 'utility/error_monitor' 10 | 11 | module Connectors 12 | class TolerableErrorHelper 13 | def initialize(error_monitor) 14 | @error_monitor = error_monitor 15 | end 16 | 17 | def yield_single_document(identifier: nil) 18 | Utility::Logger.debug("Extracting single document for #{identifier}") if identifier 19 | yield 20 | @error_monitor.note_success 21 | rescue *fatal_exception_classes => e 22 | Utility::ExceptionTracking.augment_exception(e) 23 | Utility::Logger.error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}") 24 | raise 25 | rescue StandardError => e 26 | Utility::ExceptionTracking.augment_exception(e) 27 | Utility::Logger.warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}") 28 | @error_monitor.note_error(e, :id => e.id) 29 | end 30 | 31 | private 32 | 33 | def identifying_error_message(identifier) 34 | identifier.present? ? " of '#{identifier}'" : '' 35 | end 36 | 37 | def fatal_exception_classes 38 | [ 39 | Utility::ErrorMonitor::MonitoringError, 40 | Core::ConnectorNotFoundError, 41 | Core::ConnectorJobNotFoundError, 42 | Core::ConnectorJobCanceledError, 43 | Core::ConnectorJobNotRunningError 44 | ] 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/connectors_service.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'app/config' 10 | require 'app/dispatcher' 11 | require 'app/preflight_check' 12 | require 'utility' 13 | 14 | class ConnectorsService 15 | def self.run! 16 | Utility::Environment.set_execution_environment(App::Config) do 17 | App::PreflightCheck.run! 18 | App::Dispatcher.start! 19 | rescue App::PreflightCheck::CheckFailure => e 20 | Utility::Logger.error("Preflight check failed: #{e.message}") 21 | exit(-1) 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/connectors_utility.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require_relative 'utility' 10 | 11 | require_relative 'connectors/connector_status' 12 | require_relative 'connectors/crawler/scheduler' 13 | require_relative 'connectors/job_trigger_method' 14 | require_relative 'connectors/sync_status' 15 | require_relative 'core/connector_job' 16 | require_relative 'core/connector_settings' 17 | require_relative 'core/elastic_connector_actions' 18 | require_relative 'core/filtering/validation_status' 19 | require_relative 'core/scheduler' 20 | -------------------------------------------------------------------------------- /lib/core.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/configuration' 10 | require 'core/connector_job' 11 | require 'core/connector_settings' 12 | require 'core/elastic_connector_actions' 13 | require 'core/filtering' 14 | require 'core/heartbeat' 15 | require 'core/job_cleanup' 16 | require 'core/scheduler' 17 | require 'core/single_scheduler' 18 | require 'core/native_scheduler' 19 | require 'core/sync_job_runner' 20 | require 'core/jobs/producer' 21 | require 'core/jobs/consumer' 22 | -------------------------------------------------------------------------------- /lib/core/configuration.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/connector_status' 10 | require 'connectors/registry' 11 | require 'core/connector_settings' 12 | require 'core/elastic_connector_actions' 13 | require 'utility/logger' 14 | 15 | module Core 16 | class Configuration 17 | class << self 18 | 19 | def update(connector_settings, service_type = nil) 20 | if connector_settings.connector_status == Connectors::ConnectorStatus::CREATED 21 | connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type || service_type) 22 | unless connector_class 23 | Utility::Logger.error("Couldn't find connector for service type #{connector_settings.service_type || service_type}") 24 | return 25 | end 26 | configuration = connector_class.configurable_fields_indifferent_access 27 | 28 | features = {} 29 | 30 | connector_class.kibana_features.each do |feature_definition, _hsh| 31 | feature = feature_definition[:feature] 32 | subfeature = feature_definition[:subfeature] 33 | enabled = feature_definition[:enabled] 34 | 35 | features[feature] = {} unless features.key?(feature) 36 | 37 | features[feature][subfeature] = { :enabled => enabled } 38 | end 39 | 40 | doc = { 41 | :configuration => configuration, 42 | :features => features 43 | } 44 | 45 | doc[:service_type] = service_type if service_type && connector_settings.needs_service_type? 46 | 47 | # We want to set connector to CONFIGURED status if all configurable fields have default values 48 | new_connector_status = if configuration.values.all? { |setting| setting[:value].present? } 49 | Utility::Logger.debug("All connector configurable fields provided default values for #{connector_settings.formatted}.") 50 | Connectors::ConnectorStatus::CONFIGURED 51 | else 52 | Connectors::ConnectorStatus::NEEDS_CONFIGURATION 53 | end 54 | 55 | doc[:status] = new_connector_status 56 | Utility::Logger.info("Changing connector status to #{new_connector_status} for #{connector_settings.formatted}.") 57 | Core::ElasticConnectorActions.update_connector_fields(connector_settings.id, doc) 58 | end 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/core/filtering.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/post_process_engine' 10 | require 'core/filtering/post_process_result' 11 | require 'core/filtering/simple_rules/simple_rule' 12 | require 'core/filtering/validation_job_runner' 13 | require 'core/filtering/validation_status' 14 | 15 | module Core 16 | module Filtering 17 | DEFAULT_DOMAIN = 'DEFAULT' 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'active_support/core_ext/hash' 9 | require 'utility/logger' 10 | require 'core/filtering/advanced_snippet/advanced_snippet_validator' 11 | require 'core/filtering/validation_status' 12 | require 'core/filtering/hash_against_schema_validator' 13 | 14 | module Core 15 | module Filtering 16 | module AdvancedSnippet 17 | class AdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator 18 | 19 | def initialize(advanced_snippet, schema) 20 | super(advanced_snippet) 21 | @schema = schema 22 | @schema_validator = Core::Filtering::SchemaValidator.new(schema: schema, payload: advanced_snippet, error_id: ADVANCED_SNIPPET_ID) 23 | end 24 | 25 | def is_snippet_valid 26 | @schema_validator.validate_against_schema 27 | end 28 | 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'utility/logger' 9 | 10 | module Core 11 | module Filtering 12 | module AdvancedSnippet 13 | class AdvancedSnippetValidator 14 | 15 | ADVANCED_SNIPPET_ID = 'advanced_snippet' 16 | 17 | def initialize(advanced_snippet) 18 | @advanced_snippet = advanced_snippet || {} 19 | end 20 | 21 | def is_snippet_valid 22 | raise 'Advanced Snippet validation not implemented' 23 | end 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/core/filtering/post_process_engine.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering' 10 | require 'utility/filtering' 11 | 12 | module Core 13 | module Filtering 14 | class PostProcessEngine 15 | attr_reader :rules 16 | 17 | def initialize(filtering) 18 | @rules = ordered_rules(filtering) 19 | end 20 | 21 | def process(document) 22 | @rules.each do |rule| 23 | if rule.match?(document.stringify_keys) 24 | return PostProcessResult.new(document, rule) 25 | end 26 | end 27 | PostProcessResult.new(document, SimpleRule::DEFAULT_RULE) 28 | end 29 | 30 | private 31 | 32 | def ordered_rules(job_filtering) 33 | job_rules = Utility::Filtering.extract_filter(job_filtering)['rules'] 34 | sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID } 35 | sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) } 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/core/filtering/post_process_result.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'utility/logger' 10 | 11 | module Core 12 | module Filtering 13 | class PostProcessResult 14 | attr_reader :document, :matching_rule 15 | 16 | def initialize(document, matching_rule) 17 | @document = document 18 | @matching_rule = matching_rule 19 | Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d") 20 | end 21 | 22 | def is_include? 23 | matching_rule.is_include? 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/core/filtering/processing_stage.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | module Core 9 | module Filtering 10 | class ProcessingStage 11 | PRE = 'pre-processing' 12 | POST = 'post-processing' 13 | 14 | ALL = [ 15 | PRE, 16 | POST 17 | ] 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/core/filtering/simple_rules/simple_rules_parser.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'active_support/core_ext/hash/indifferent_access' 9 | require 'active_support/core_ext/object/blank' 10 | require 'core/filtering/simple_rules/simple_rule' 11 | 12 | module Core 13 | module Filtering 14 | module SimpleRules 15 | class SimpleRulesParser 16 | def initialize(rules) 17 | @rules = (rules || []).map(&:with_indifferent_access).filter { |r| r[:id] != 'DEFAULT' }.sort_by { |r| r[:order] } 18 | end 19 | 20 | def parse 21 | merge_rules(@rules.map do |rule_hash| 22 | rule = Core::Filtering::SimpleRule.new(rule_hash) 23 | unless rule.is_include? || rule.is_exclude? 24 | raise "Unknown policy: #{rule.policy}" 25 | end 26 | parse_rule(rule) 27 | end) 28 | end 29 | 30 | private 31 | 32 | # merge all rules into a filter object or array 33 | # in a base case, does no transformations 34 | def merge_rules(rules) 35 | rules || [] 36 | end 37 | 38 | def parse_rule(_rule) 39 | raise 'Not implemented' 40 | end 41 | end 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'core/filtering/simple_rules/validation/simple_rules_validator' 9 | require 'core/filtering/validation_status' 10 | 11 | module Core 12 | module Filtering 13 | module SimpleRules 14 | module Validation 15 | class NoConflictingPoliciesRulesValidator < Core::Filtering::SimpleRules::Validation::SimpleRulesValidator 16 | 17 | def are_rules_valid 18 | rule_field_value_to_policy = {} 19 | 20 | @rules.each do |simple_rule| 21 | rule_field_value = simple_rule.slice('rule', 'field', 'value') 22 | policy = simple_rule['policy'] 23 | 24 | return conflicting_rules(rule_field_value) if rule_field_value_to_policy.key?(rule_field_value) 25 | 26 | rule_field_value_to_policy[rule_field_value] = policy 27 | end 28 | 29 | { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } 30 | end 31 | 32 | private 33 | 34 | def conflicting_rules(rule_type_value) 35 | { 36 | :state => Core::Filtering::ValidationStatus::INVALID, 37 | :errors => [ 38 | :ids => [SIMPLE_RULES_ID], 39 | :messages => ["Two simple rules with same rule (#{rule_type_value['rule']}), field (#{rule_type_value['field']}), value (#{rule_type_value['value']}) and conflicting policies detected."] 40 | ] 41 | } 42 | end 43 | end 44 | end 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/core/filtering/simple_rules/validation/simple_rules_schema.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'core/filtering/simple_rules/simple_rule' 9 | 10 | module Core 11 | module Filtering 12 | module SimpleRules 13 | module Validation 14 | DEFAULT_RULE_ID = 'DEFAULT' 15 | 16 | ALLOWED_VALUE_TYPES = ->(rule_value) { rule_value.is_a?(String) || rule_value.is_a?(Integer) || rule_value.is_a?(TrueClass) || rule_value.is_a?(FalseClass) } 17 | MATCH_ALL_REGEX_NOT_ALLOWED = ->(simple_rule) { simple_rule['id'] == DEFAULT_RULE_ID || !(simple_rule['rule'] == Core::Filtering::SimpleRule::Rule::REGEX && (simple_rule['value'] == '(.*)' || simple_rule['value'] == '.*')) } 18 | 19 | SINGLE_RULE_SCHEMA = { 20 | :fields => { 21 | :constraints => [MATCH_ALL_REGEX_NOT_ALLOWED], 22 | :values => [ 23 | { 24 | :name => 'id', 25 | :type => String, 26 | :optional => false 27 | }, 28 | { 29 | :name => 'field', 30 | :type => String, 31 | :optional => false 32 | }, 33 | { 34 | :name => 'value', 35 | :type => ALLOWED_VALUE_TYPES, 36 | :optional => false 37 | }, 38 | { 39 | :name => 'policy', 40 | :type => ->(policy) { Core::Filtering::SimpleRule::Policy::POLICIES.include?(policy) }, 41 | :optional => false 42 | }, 43 | { 44 | :name => 'rule', 45 | :type => ->(rule) { Core::Filtering::SimpleRule::Rule::RULES.include?(rule) }, 46 | :optional => false 47 | }, 48 | { 49 | :name => 'order', 50 | :type => ->(order) { order.is_a?(Integer) && order >= 0 }, 51 | }, 52 | { 53 | :name => 'updated_at', 54 | :type => String, 55 | :optional => true 56 | }, 57 | { 58 | :name => 'created_at', 59 | :type => String, 60 | :optional => true 61 | } 62 | ] 63 | } 64 | } 65 | end 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/core/filtering/simple_rules/validation/simple_rules_validator.rb: -------------------------------------------------------------------------------- 1 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 2 | # or more contributor license agreements. Licensed under the Elastic License; 3 | # you may not use this file except in compliance with the Elastic License. 4 | # 5 | # frozen_string_literal: true 6 | 7 | module Core 8 | module Filtering 9 | module SimpleRules 10 | module Validation 11 | SIMPLE_RULES_ID = 'simple_rules' 12 | 13 | class SimpleRulesValidator 14 | def initialize(rules) 15 | @rules = rules || [] 16 | end 17 | 18 | def are_rules_valid 19 | raise 'Simple rules validation not implemented' 20 | end 21 | end 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'core/filtering/simple_rules/validation/simple_rules_validator' 9 | require 'core/filtering/hash_against_schema_validator' 10 | require 'core/filtering/simple_rules/validation/simple_rules_schema' 11 | 12 | module Core 13 | module Filtering 14 | module SimpleRules 15 | module Validation 16 | class SingleRuleAgainstSchemaValidator < Core::Filtering::SimpleRules::Validation::SimpleRulesValidator 17 | 18 | def initialize(rules, schema = Core::Filtering::SimpleRules::Validation::SINGLE_RULE_SCHEMA) 19 | super(rules) 20 | @schema = schema 21 | @schema_validator = SchemaValidator.new(error_id: SIMPLE_RULES_ID) 22 | end 23 | 24 | def are_rules_valid 25 | @rules.each do |rule| 26 | validation_result = @schema_validator.validate_against_schema(@schema, rule) 27 | return validation_result unless validation_result[:state] == Core::Filtering::ValidationStatus::VALID 28 | end 29 | 30 | { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } 31 | end 32 | 33 | end 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/core/filtering/transform/filter_transformer.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Core 10 | module Filtering 11 | module Transform 12 | class FilterTransformer 13 | 14 | def initialize(filter = {}, transformation = (->(_filter) { filter })) 15 | @filter = filter 16 | @transformation = transformation 17 | end 18 | 19 | def transform 20 | @transformation.call(@filter) 21 | end 22 | 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/core/filtering/transform/filter_transformer_facade.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/transform/transformation_target' 10 | require 'core/filtering/transform/filter_transformer' 11 | 12 | module Core 13 | module Filtering 14 | module Transform 15 | class FilterTransformerFacade < Core::Filtering::Transform::FilterTransformer 16 | 17 | def initialize(filter = {}, 18 | filter_transformers = { 19 | Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [], 20 | Core::Filtering::Transform::TransformationTarget::RULES => [], 21 | }) 22 | super(filter) 23 | 24 | rule_transformer_classes = filter_transformers[Core::Filtering::Transform::TransformationTarget::RULES] 25 | snippet_transformer_classes = filter_transformers[Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET] 26 | 27 | @rule_transformers = rule_transformer_classes.is_a?(Array) ? rule_transformer_classes : [rule_transformer_classes] 28 | @snippet_transformers = snippet_transformer_classes.is_a?(Array) ? snippet_transformer_classes : [snippet_transformer_classes] 29 | 30 | @facade = FilterTransformer.new(filter, execute_rule_and_snippet_transformations) 31 | end 32 | 33 | def transform 34 | @facade.transform 35 | end 36 | 37 | private 38 | 39 | def execute_rule_and_snippet_transformations 40 | lambda do |filter| 41 | rules = filter[:rules] 42 | advanced_snippet = filter[:advanced_snippet] 43 | 44 | { 45 | :rules => call_transformers(@rule_transformers, rules), 46 | :advanced_snippet => call_transformers(@snippet_transformers, advanced_snippet) 47 | } 48 | end 49 | end 50 | 51 | def call_transformers(transformer_classes, payload) 52 | transformer_classes.each do |transformer_class| 53 | payload = transformer_class.new(payload).transform if transformer_class.present? 54 | end 55 | 56 | payload 57 | end 58 | end 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /lib/core/filtering/transform/transformation_target.rb: -------------------------------------------------------------------------------- 1 | module Core 2 | module Filtering 3 | module Transform 4 | module TransformationTarget 5 | RULES = 'rules' 6 | ADVANCED_SNIPPET = 'advanced_snippet' 7 | end 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/core/filtering/validation_job_runner.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/connector_status' 10 | require 'connectors/registry' 11 | 12 | module Core 13 | module Filtering 14 | class ValidationJobRunner 15 | def initialize(connector_settings) 16 | @connector_settings = connector_settings 17 | @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type) 18 | @validation_finished = false 19 | @status = { :error => nil } 20 | end 21 | 22 | def execute 23 | Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.") 24 | 25 | validation_result = @connector_class.validate_filtering(@connector_settings.filtering[:draft]) 26 | 27 | # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler) 28 | ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { Core::Filtering::DEFAULT_DOMAIN => validation_result }) 29 | 30 | @validation_finished = true 31 | rescue StandardError => e 32 | Utility::ExceptionTracking.log_exception(e) 33 | validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID, 34 | :errors => [ 35 | { :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] } 36 | ] } 37 | ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result }) 38 | ensure 39 | if !@validation_finished && !@status[:error].present? 40 | @status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.' 41 | end 42 | 43 | if @status[:error] 44 | Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.") 45 | else 46 | Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.") 47 | end 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/core/filtering/validation_status.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Core 10 | module Filtering 11 | class ValidationStatus 12 | INVALID = 'invalid' 13 | VALID = 'valid' 14 | EDITED = 'edited' 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/core/heartbeat.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/connector_status' 10 | require 'connectors/registry' 11 | require 'core/connector_settings' 12 | require 'core/elastic_connector_actions' 13 | 14 | module Core 15 | class Heartbeat 16 | class << self 17 | def send(connector_settings) 18 | doc = { 19 | :last_seen => Time.now 20 | } 21 | if connector_settings.connector_status_allows_sync? 22 | connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration) 23 | doc[:status] = connector_instance.is_healthy? ? Connectors::ConnectorStatus::CONNECTED : Connectors::ConnectorStatus::ERROR 24 | message = "Health check for 3d party service failed for connector [#{connector_settings.id}], service type [#{connector_settings.service_type}]. Check the application logs for more information." 25 | doc[:error] = doc[:status] == Connectors::ConnectorStatus::ERROR ? message : nil 26 | end 27 | 28 | Core::ElasticConnectorActions.update_connector_fields(connector_settings.id, doc) 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/core/ingestion.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/ingestion/es_sink' 10 | -------------------------------------------------------------------------------- /lib/core/job_cleanup.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core' 10 | require 'utility/logger' 11 | 12 | module Core 13 | class JobCleanUp 14 | class << self 15 | def execute(connector_id = nil) 16 | process_orphaned_jobs 17 | process_idle_jobs(connector_id) 18 | end 19 | 20 | private 21 | 22 | def process_orphaned_jobs 23 | Utility::Logger.debug('Start cleaning up orphaned jobs...') 24 | all_connectors = ConnectorSettings.fetch_all_connectors 25 | orphaned_jobs = ConnectorJob.orphaned_jobs(all_connectors.map(&:id)) 26 | if orphaned_jobs.empty? 27 | Utility::Logger.debug('No orphaned jobs found. Skipping...') 28 | return 29 | end 30 | 31 | # delete content indicies in case they are re-created by sync job 32 | content_indices = (orphaned_jobs.map(&:index_name) - all_connectors.map(&:index_name)).compact.uniq 33 | ElasticConnectorActions.delete_indices(content_indices) if content_indices.any? 34 | result = ConnectorJob.delete_jobs(orphaned_jobs) 35 | Utility::Logger.error("Error found when deleting jobs: #{result['failures']}") if result['failures']&.any? 36 | Utility::Logger.info("Successfully deleted #{result['deleted']} out of #{result['total']} orphaned jobs.") 37 | end 38 | 39 | def process_idle_jobs(connector_id = nil) 40 | Utility::Logger.debug("Start cleaning up idle jobs for #{connector_id ? "connector #{connector_id}" : 'native connectors'}...") 41 | idle_jobs = ConnectorJob.idle_jobs(connector_id) 42 | if idle_jobs.empty? 43 | Utility::Logger.debug('No idle jobs found. Skipping...') 44 | return 45 | end 46 | 47 | marked_count = 0 48 | idle_jobs.each do |job| 49 | job.error!('The job has not seen any update for some time.') 50 | Utility::Logger.debug("Successfully marked job #{job.id} as error.") 51 | 52 | job_id = job.id 53 | job = ConnectorJob.fetch_by_id(job_id) 54 | Utility::Logger.warn("Could not found job by id #{job_id}") if job.nil? 55 | Utility::Logger.warn("Could not found connector by id #{job.connector_id}") if job && job.connector.nil? 56 | 57 | job&.connector&.update_last_sync!(job) 58 | marked_count += 1 59 | rescue StandardError => e 60 | Utility::ExceptionTracking.log_exception(e) 61 | end 62 | Utility::Logger.info("Successfully marked #{marked_count} out of #{idle_jobs.count} idle jobs as error.") 63 | end 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/core/jobs/producer.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/connector_settings' 10 | require 'core/elastic_connector_actions' 11 | 12 | module Core 13 | module Jobs 14 | class Producer 15 | JOB_TYPES = %i(sync).freeze 16 | 17 | class << self 18 | def enqueue_job(job_type:, connector_settings:) 19 | raise UnsupportedJobType unless JOB_TYPES.include?(job_type) 20 | raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings) 21 | 22 | ElasticConnectorActions.create_job(connector_settings: connector_settings) 23 | end 24 | end 25 | end 26 | 27 | class UnsupportedJobType < StandardError; end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/core/native_scheduler.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/scheduler' 10 | require 'core/connector_settings' 11 | require 'core/elastic_connector_actions' 12 | require 'utility/logger' 13 | require 'utility/exception_tracking' 14 | 15 | module Core 16 | class NativeScheduler < Core::Scheduler 17 | def connector_settings 18 | Core::ConnectorSettings.fetch_native_connectors || [] 19 | rescue *Utility::AUTHORIZATION_ERRORS => e 20 | # should be handled by the general scheduler 21 | raise e 22 | rescue StandardError => e 23 | Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.') 24 | [] 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/core/single_scheduler.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/scheduler' 10 | require 'core/connector_settings' 11 | require 'utility/logger' 12 | require 'utility/exception_tracking' 13 | 14 | module Core 15 | class SingleScheduler < Core::Scheduler 16 | def initialize(connector_id, poll_interval, heartbeat_interval) 17 | super(poll_interval, heartbeat_interval) 18 | @connector_id = connector_id 19 | end 20 | 21 | def connector_settings 22 | connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id) 23 | [connector_settings].compact 24 | rescue *Utility::AUTHORIZATION_ERRORS => e 25 | # should be handled by the general scheduler 26 | raise e 27 | rescue StandardError => e 28 | Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.") 29 | [] 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/list_connectors.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/registry' 10 | require 'utility' 11 | 12 | class ListConnectors 13 | def self.run! 14 | Utility::Environment.set_execution_environment(App::Config) do 15 | Utility::Logger.info('Registered connectors:') 16 | Connectors::REGISTRY.registered_connectors.each do |connector| 17 | Utility::Logger.info("- #{Connectors::REGISTRY.connector_class(connector).display_name}") 18 | end 19 | Utility::Logger.info('Bye') 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/stubs/app_config.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | class AppConfig 8 | class << self 9 | def connectors 10 | { 11 | 'transient_server_error_retry_delay_minutes' => 5 12 | } 13 | end 14 | 15 | def content_source_sync_max_errors 16 | 1000 17 | end 18 | 19 | def content_source_sync_max_consecutive_errors 20 | 10 21 | end 22 | 23 | def content_source_sync_max_error_ratio 24 | 0.15 25 | end 26 | 27 | def content_source_sync_error_ratio_window_size 28 | 100 29 | end 30 | 31 | def content_source_sync_thumbnails_enabled? 32 | true 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/stubs/connectors/stats.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'active_support/inflector' 10 | 11 | module Connectors 12 | module Stats 13 | def self.measure(_key, _value = nil, &block) 14 | block.call 15 | end 16 | 17 | def self.increment(key, value = 1) 18 | # no op 19 | end 20 | 21 | def self.prefix_key(key) 22 | "connectors.#{key}" 23 | end 24 | 25 | def self.class_key(klass, deconstantize = true) 26 | name = klass.name 27 | # Changes Connectors::GoogleDrive::Adapter to Connectors::GoogleDrive 28 | name = ActiveSupport::Inflector.deconstantize(name) if deconstantize 29 | # Changes Connectors::GoogleDrive to GoogleDrive 30 | name = ActiveSupport::Inflector.demodulize(name) 31 | # Changes GoogleDrive to google_drive 32 | ActiveSupport::Inflector.underscore(name) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/stubs/service_type.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | class ServiceType 10 | def classify 11 | 'classify' 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/utility.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # !!!!!!!! 8 | # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec` 9 | require 'utility/bulk_queue' 10 | require 'utility/common' 11 | require 'utility/constants' 12 | require 'utility/cron' 13 | require 'utility/elasticsearch/index/mappings' 14 | require 'utility/elasticsearch/index/text_analysis_settings' 15 | require 'utility/environment' 16 | require 'utility/error_monitor' 17 | require 'utility/errors' 18 | require 'utility/filtering' 19 | require 'utility/es_client' 20 | require 'utility/exception_tracking' 21 | require 'utility/extension_mapping_util' 22 | require 'utility/logger' 23 | # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec` 24 | # !!!!!!!! 25 | -------------------------------------------------------------------------------- /lib/utility/bulk_queue.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'json' 8 | 9 | require 'utility/constants' 10 | 11 | module Utility 12 | class BulkQueue 13 | class QueueOverflowError < StandardError; end 14 | 15 | # 500 items or 5MB 16 | def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES) 17 | @operation_count_threshold = operation_count_threshold.freeze 18 | @size_threshold = size_threshold.freeze 19 | 20 | @buffer = '' 21 | 22 | @current_operation_count = 0 23 | 24 | @current_buffer_size = 0 25 | @current_data_size = 0 26 | end 27 | 28 | def pop_all 29 | result = @buffer 30 | 31 | reset 32 | 33 | result 34 | end 35 | 36 | def add(operation, payload = nil) 37 | raise QueueOverflowError unless will_fit?(operation, payload) 38 | 39 | operation_size = get_size(operation) 40 | payload_size = get_size(payload) 41 | 42 | @current_operation_count += 1 43 | @current_buffer_size += operation_size 44 | @current_buffer_size += payload_size 45 | @current_data_size += payload_size 46 | 47 | @buffer << operation 48 | @buffer << "\n" 49 | 50 | if payload 51 | @buffer << payload 52 | @buffer << "\n" 53 | end 54 | end 55 | 56 | def will_fit?(operation, payload = nil) 57 | return false if @current_operation_count + 1 > @operation_count_threshold 58 | 59 | operation_size = get_size(operation) 60 | payload_size = get_size(payload) 61 | 62 | @current_buffer_size + operation_size + payload_size < @size_threshold 63 | end 64 | 65 | def current_stats 66 | { 67 | :current_operation_count => @current_operation_count, 68 | :current_buffer_size => @current_buffer_size 69 | } 70 | end 71 | 72 | private 73 | 74 | def get_size(str) 75 | return 0 unless str 76 | str.bytesize 77 | end 78 | 79 | def reset 80 | @current_operation_count = 0 81 | @current_buffer_size = 0 82 | @current_data_size = 0 83 | 84 | @buffer = '' 85 | end 86 | end 87 | end 88 | -------------------------------------------------------------------------------- /lib/utility/common.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | class Common 11 | class << self 12 | def return_if_present(*args) 13 | args.each do |arg| 14 | return arg unless arg.nil? 15 | end 16 | nil 17 | end 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/utility/constants.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | class Constants 11 | THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze 12 | SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze 13 | ALLOW_FIELD = '_allow_permissions' 14 | DENY_FIELD = '_deny_permissions' 15 | CONNECTORS_INDEX = '.elastic-connectors' 16 | JOB_INDEX = '.elastic-connectors-sync-jobs' 17 | CONTENT_INDEX_PREFIX = 'search-' 18 | CRAWLER_SERVICE_TYPE = 'elastic-crawler' 19 | 20 | # Maximum number of operations in BULK Elasticsearch operation that will ingest the data 21 | DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500 22 | # Maximum size of either whole BULK Elasticsearch operation or one document in it 23 | DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/utility/elasticsearch/index/language_data.yml: -------------------------------------------------------------------------------- 1 | --- 2 | da: 3 | name: Danish 4 | stemmer: danish 5 | stop_words: _danish_ 6 | de: 7 | name: German 8 | stemmer: light_german 9 | stop_words: _german_ 10 | en: 11 | name: English 12 | stemmer: light_english 13 | stop_words: _english_ 14 | es: 15 | name: Spanish 16 | stemmer: light_spanish 17 | stop_words: _spanish_ 18 | fr: 19 | name: French 20 | stemmer: light_french 21 | stop_words: _french_ 22 | custom_filter_definitions: 23 | fr-elision: 24 | type: elision 25 | articles: 26 | - l 27 | - m 28 | - t 29 | - qu 30 | - n 31 | - s 32 | - j 33 | - d 34 | - c 35 | - jusqu 36 | - quoiqu 37 | - lorsqu 38 | - puisqu 39 | articles_case: true 40 | prepended_filters: 41 | - fr-elision 42 | it: 43 | name: Italian 44 | stemmer: light_italian 45 | stop_words: _italian_ 46 | custom_filter_definitions: 47 | it-elision: 48 | type: elision 49 | articles: 50 | - c 51 | - l 52 | - all 53 | - dall 54 | - dell 55 | - nell 56 | - sull 57 | - coll 58 | - pell 59 | - gl 60 | - agl 61 | - dagl 62 | - degl 63 | - negl 64 | - sugl 65 | - un 66 | - m 67 | - t 68 | - s 69 | - v 70 | - d 71 | articles_case: true 72 | prepended_filters: 73 | - it-elision 74 | ja: 75 | name: Japanese 76 | stemmer: light_english 77 | stop_words: _english_ 78 | postpended_filters: 79 | - cjk_bigram 80 | ko: 81 | name: Korean 82 | stemmer: light_english 83 | stop_words: _english_ 84 | postpended_filters: 85 | - cjk_bigram 86 | nl: 87 | name: Dutch 88 | stemmer: dutch 89 | stop_words: _dutch_ 90 | pt: 91 | name: Portuguese 92 | stemmer: light_portuguese 93 | stop_words: _portuguese_ 94 | pt-br: 95 | name: Portuguese (Brazil) 96 | stemmer: brazilian 97 | stop_words: _brazilian_ 98 | ru: 99 | name: Russian 100 | stemmer: russian 101 | stop_words: _russian_ 102 | th: 103 | name: Thai 104 | stemmer: light_english 105 | stop_words: _thai_ 106 | zh: 107 | name: Chinese 108 | stemmer: light_english 109 | stop_words: _english_ 110 | postpended_filters: 111 | - cjk_bigram 112 | -------------------------------------------------------------------------------- /lib/utility/elasticsearch/index/mappings.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | module Elasticsearch 11 | module Index 12 | module Mappings 13 | ENUM_IGNORE_ABOVE = 2048 14 | 15 | DATE_FIELD_MAPPING = { 16 | type: 'date' 17 | } 18 | 19 | KEYWORD_FIELD_MAPPING = { 20 | type: 'keyword' 21 | } 22 | 23 | TEXT_FIELD_MAPPING = { 24 | type: 'text', 25 | analyzer: 'iq_text_base', 26 | index_options: 'freqs', 27 | fields: { 28 | 'stem': { 29 | type: 'text', 30 | analyzer: 'iq_text_stem' 31 | }, 32 | 'prefix' => { 33 | type: 'text', 34 | analyzer: 'i_prefix', 35 | search_analyzer: 'q_prefix', 36 | index_options: 'docs' 37 | }, 38 | 'delimiter' => { 39 | type: 'text', 40 | analyzer: 'iq_text_delimiter', 41 | index_options: 'freqs' 42 | }, 43 | 'joined': { 44 | type: 'text', 45 | analyzer: 'i_text_bigram', 46 | search_analyzer: 'q_text_bigram', 47 | index_options: 'freqs' 48 | }, 49 | 'enum': { 50 | type: 'keyword', 51 | ignore_above: ENUM_IGNORE_ABOVE 52 | } 53 | } 54 | } 55 | 56 | WORKPLACE_SEARCH_SUBEXTRACTION_STAMP_FIELD_MAPPINGS = { 57 | _subextracted_as_of: DATE_FIELD_MAPPING, 58 | _subextracted_version: KEYWORD_FIELD_MAPPING 59 | }.freeze 60 | 61 | CRAWLER_FIELD_MAPPINGS = { 62 | additional_urls: KEYWORD_FIELD_MAPPING, 63 | body_content: TEXT_FIELD_MAPPING, 64 | domains: KEYWORD_FIELD_MAPPING, 65 | headings: TEXT_FIELD_MAPPING, 66 | last_crawled_at: DATE_FIELD_MAPPING, 67 | links: KEYWORD_FIELD_MAPPING, 68 | meta_description: TEXT_FIELD_MAPPING, 69 | meta_keywords: KEYWORD_FIELD_MAPPING, 70 | title: TEXT_FIELD_MAPPING, 71 | url: KEYWORD_FIELD_MAPPING, 72 | url_host: KEYWORD_FIELD_MAPPING, 73 | url_path: KEYWORD_FIELD_MAPPING, 74 | url_path_dir1: KEYWORD_FIELD_MAPPING, 75 | url_path_dir2: KEYWORD_FIELD_MAPPING, 76 | url_path_dir3: KEYWORD_FIELD_MAPPING, 77 | url_port: KEYWORD_FIELD_MAPPING, 78 | url_scheme: KEYWORD_FIELD_MAPPING 79 | }.freeze 80 | 81 | def self.default_text_fields_mappings(connectors_index:, crawler_index: false) 82 | { 83 | dynamic: true, 84 | dynamic_templates: [ 85 | { 86 | data: { 87 | match_mapping_type: 'string', 88 | mapping: TEXT_FIELD_MAPPING 89 | } 90 | } 91 | ], 92 | properties: { 93 | id: KEYWORD_FIELD_MAPPING 94 | }.tap do |properties| 95 | properties.merge!(WORKPLACE_SEARCH_SUBEXTRACTION_STAMP_FIELD_MAPPINGS) if connectors_index 96 | end.tap do |properties| 97 | properties.merge!(CRAWLER_FIELD_MAPPINGS) if crawler_index 98 | end 99 | } 100 | end 101 | end 102 | end 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/utility/environment.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'logger' 8 | require 'utility/logger' 9 | require 'active_support/core_ext/module' 10 | 11 | module Utility 12 | module Environment 13 | def self.set_execution_environment(config, &block) 14 | # Set UTC as the timezone 15 | ENV['TZ'] = 'UTC' 16 | Logger.level = config[:log_level] 17 | es_config = config[:elasticsearch] 18 | disable_warnings = if es_config.has_key?(:disable_warnings) 19 | es_config[:disable_warnings] 20 | else 21 | true 22 | end 23 | 24 | if disable_warnings 25 | Logger.info('Disabling warnings') 26 | Kernel.silence_warnings(&block) 27 | else 28 | Logger.info('Enabling warnings') 29 | Kernel.enable_warnings(&block) 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/utility/es_client.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'logger' 10 | require 'elasticsearch' 11 | 12 | module Utility 13 | class EsClient < ::Elasticsearch::Client 14 | class IndexingFailedError < StandardError 15 | def initialize(message, error = nil) 16 | super(message) 17 | @cause = error 18 | end 19 | 20 | attr_reader :cause 21 | end 22 | 23 | def initialize(es_config, &block) 24 | super(connection_configs(es_config), &block) 25 | end 26 | 27 | def connection_configs(es_config) 28 | configs = {} 29 | configs[:api_key] = es_config[:api_key] if es_config[:api_key] 30 | if es_config[:cloud_id] 31 | configs[:cloud_id] = es_config[:cloud_id] 32 | elsif es_config[:hosts] 33 | configs[:hosts] = es_config[:hosts] 34 | else 35 | raise 'Either elasticsearch.cloud_id or elasticsearch.hosts should be configured.' 36 | end 37 | configs[:retry_on_failure] = es_config[:retry_on_failure] || false 38 | configs[:request_timeout] = es_config[:request_timeout] || nil 39 | configs[:log] = es_config[:log] || false 40 | configs[:trace] = es_config[:trace] || false 41 | 42 | # transport options 43 | configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options] 44 | configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint] 45 | 46 | # headers 47 | # these are necessary for cloud-hosted native connectors 48 | configs[:headers] = es_config[:headers].to_h if es_config[:headers] 49 | 50 | # if log or trace is activated, we use the application logger 51 | configs[:logger] = if configs[:log] || configs[:trace] 52 | Utility::Logger.logger 53 | else 54 | # silence! 55 | ::Logger.new(IO::NULL) 56 | end 57 | configs 58 | end 59 | 60 | def bulk(arguments = {}) 61 | raise_if_necessary(super(arguments)) 62 | end 63 | 64 | private 65 | 66 | def raise_if_necessary(response) 67 | if response['errors'] 68 | first_error = nil 69 | 70 | response['items'].each do |item| 71 | %w[index delete].each do |op| 72 | if item.has_key?(op) && item[op].has_key?('error') 73 | first_error = item 74 | 75 | break 76 | end 77 | end 78 | end 79 | 80 | if first_error 81 | trace_id = Utility::Logger.generate_trace_id 82 | Utility::Logger.error("Failed to index documents into Elasticsearch. First error in response is: #{first_error.to_json}") 83 | short_message = Utility::Logger.abbreviated_message(first_error.to_json) 84 | raise IndexingFailedError.new("Failed to index documents into Elasticsearch with an error '#{short_message}'. Look up the error ID [#{trace_id}] in the application logs to see the full error message.") 85 | else 86 | raise IndexingFailedError.new('Failed to index documents into Elasticsearch due to unknown error. Try enabling tracing for Elasticsearch and checking the logs.') 87 | end 88 | end 89 | response 90 | end 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /lib/utility/exception_tracking.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'bson' 10 | require 'utility/logger' 11 | 12 | module Utility 13 | class ExceptionTracking 14 | class << self 15 | def capture_message(message, context = {}) 16 | Utility::Logger.error("Error: #{message}. Context: #{context.inspect}") 17 | 18 | # When the method is called from a rescue block, our return value may leak outside of its 19 | # intended scope, so let's explicitly return nil here to be safe. 20 | nil 21 | end 22 | 23 | def capture_exception(exception, context = {}) 24 | Utility::Logger.log_stacktrace(generate_stack_trace(exception)) 25 | Utility::Logger.error("Context: #{context.inspect}") if context 26 | end 27 | 28 | def log_exception(exception, message = nil) 29 | Utility::Logger.error(message) if message 30 | Utility::Logger.log_stacktrace(generate_stack_trace(exception)) 31 | end 32 | 33 | def augment_exception(exception) 34 | unless exception.respond_to?(:id) 35 | exception.instance_eval do 36 | def id 37 | @error_id ||= BSON::ObjectId.new.to_s 38 | end 39 | end 40 | end 41 | end 42 | 43 | def generate_error_message(exception, message, context) 44 | context = { :message_id => exception.id }.merge(context || {}) if exception.respond_to?(:id) 45 | context_message = context && "Context: #{context.inspect}" 46 | ['Exception', message, exception.class.to_s, exception.message, context_message] 47 | .compact 48 | .map { |part| part.to_s.dup.force_encoding('UTF-8') } 49 | .join(': ') 50 | end 51 | 52 | def generate_stack_trace(exception) 53 | full_message = exception.full_message 54 | 55 | cause = exception 56 | while cause.cause != cause && (cause = cause.cause) 57 | full_message << "Cause:\n#{cause.full_message}" 58 | end 59 | 60 | full_message.dup.force_encoding('UTF-8') 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/utility/filtering.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | class Filtering 11 | class << self 12 | def extract_filter(filtering) 13 | return {} unless filtering.present? 14 | 15 | # assume for now, that first object in filtering array or a filter object itself is the only filtering object 16 | filter = filtering.is_a?(Array) ? filtering.first : filtering 17 | 18 | filter.present? ? filter : {} 19 | end 20 | 21 | def rule_pre_processing_active?(filter) 22 | !filter.dig('advanced_snippet', 'value')&.present? 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/utility/logger.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | require 'logger' 8 | require 'active_support/core_ext/module' 9 | require 'active_support/core_ext/string/filters' 10 | require 'ecs_logging/logger' 11 | 12 | module Utility 13 | class Logger 14 | SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze 15 | MAX_SHORT_MESSAGE_LENGTH = 1000.freeze 16 | 17 | class << self 18 | 19 | delegate :formatter, :formatter=, :to => :logger 20 | 21 | def level=(log_level) 22 | logger.level = log_level 23 | end 24 | 25 | def logger 26 | @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT) 27 | end 28 | 29 | SUPPORTED_LOG_LEVELS.each do |level| 30 | define_method(level) do |message| 31 | if logger.is_a?(EcsLogging::Logger) 32 | logger.public_send(level, message, extra_ecs_fields) 33 | else 34 | logger.public_send(level, message) 35 | end 36 | end 37 | end 38 | 39 | def log_stacktrace(stacktrace) 40 | if logger.is_a?(EcsLogging::Logger) 41 | logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace })) 42 | else 43 | logger.error(stacktrace) 44 | end 45 | end 46 | 47 | def error_with_backtrace(message: nil, exception: nil, prog_name: nil) 48 | logger.error(prog_name) { message } if message 49 | logger.error exception.message if exception 50 | logger.error exception.backtrace.join("\n") if exception 51 | end 52 | 53 | def new_line 54 | logger.info("\n") 55 | end 56 | 57 | def generate_trace_id 58 | SecureRandom.uuid 59 | end 60 | 61 | def abbreviated_message(message) 62 | message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH) 63 | end 64 | 65 | private 66 | 67 | def extra_ecs_fields 68 | { 69 | :labels => { :index_date => Time.now.strftime('%Y.%m.%d') }, 70 | :log => { :logger => logger.progname }, 71 | :service => { 72 | :type => 'connectors-ruby', 73 | :version => Settings.version 74 | }, 75 | :process => { 76 | :pid => Process.pid, 77 | :name => $PROGRAM_NAME, 78 | :thread => Thread.current.object_id 79 | } 80 | } 81 | end 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/utility/middleware/basic_auth.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | module Middleware 11 | class BasicAuth 12 | AUTHORIZATION = 'Authorization' 13 | 14 | attr_reader :basic_auth_token 15 | 16 | def initialize(app = nil, options = {}) 17 | @app = app 18 | @basic_auth_token = options.fetch(:basic_auth_token) 19 | end 20 | 21 | def call(env) 22 | env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}" 23 | @app.call(env) 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/utility/middleware/bearer_auth.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | module Utility 10 | module Middleware 11 | class BearerAuth 12 | AUTHORIZATION = 'Authorization' 13 | 14 | attr_reader :bearer_auth_token 15 | 16 | def initialize(app = nil, options = {}) 17 | @app = app 18 | @bearer_auth_token = options.fetch(:bearer_auth_token) 19 | end 20 | 21 | def call(env) 22 | env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}" 23 | @app.call(env) 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/utility/middleware/restrict_hostnames.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'faraday/middleware' 10 | require 'resolv' 11 | 12 | require 'utility/errors' 13 | require 'utility/logger' 14 | 15 | module Utility 16 | module Middleware 17 | class RestrictHostnames < Faraday::Middleware 18 | class AddressNotAllowed < Utility::ClientError; end 19 | URL_PATTERN = /\Ahttp/ 20 | 21 | attr_reader :allowed_hosts, :allowed_ips 22 | 23 | def initialize(app = nil, options = {}) 24 | super(app) 25 | @allowed_hosts = options[:allowed_hosts] 26 | @allowed_ips = ips_from_hosts(@allowed_hosts) 27 | end 28 | 29 | def call(env) 30 | raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env) 31 | @app.call(env) 32 | end 33 | 34 | private 35 | 36 | def ips_from_hosts(hosts) 37 | hosts&.flat_map do |host| 38 | if URL_PATTERN.match(host) 39 | lookup_ips(Addressable::URI.parse(host).hostname) 40 | elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host) 41 | IPAddr.new(host) 42 | else 43 | lookup_ips(host) 44 | end 45 | end || [] 46 | end 47 | 48 | def denied?(env) 49 | requested_ips = lookup_ips(env[:url].hostname) 50 | no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) } 51 | return false unless no_match 52 | Utility::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \ 53 | "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.") 54 | @allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup 55 | no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) } 56 | Utility::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \ 57 | "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match 58 | no_match 59 | end 60 | 61 | def lookup_ips(hostname) 62 | addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) } 63 | end 64 | 65 | def addr_infos(hostname) 66 | Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM) 67 | rescue SocketError 68 | # In case of invalid hostname, return an empty list of addresses 69 | [] 70 | end 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /logo-enterprise-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/connectors-ruby/56e5e999992f8fb78791f5a3c3a991f8d53d529d/logo-enterprise-search.png -------------------------------------------------------------------------------- /make.bat: -------------------------------------------------------------------------------- 1 | rem 2 | rem Makefile for Windows 3 | rem 4 | rem This batch script makes the assumption that your rbenv environment is installed. 5 | rem A fully automated install script is provided in win32\install.bat 6 | rem 7 | @echo off 8 | setlocal 9 | 10 | set instpath="%USERPROFILE%\.rbenv-win" 11 | set RBENV_ROOT="%instpath%" 12 | set HOME=%~dp0 13 | 14 | for /f "delims=" %%x in (%~dp0.ruby-version) do set RUBY_VERSION=%%x 15 | for /f "delims=" %%x in (%~dp0.bundler-version) do set BUNDLER_VERSION=%%x 16 | echo "Ruby %RUBY_VERSION% - Bundler %BUNDLER_VERSION%" 17 | 18 | set PATH=C:\MSYS2\usr\bin;C:\MSYS2\usr\local\bin;%instpath%\versions\%RUBY_VERSION%\bin;%instpath%\bin;%instpath%\shims;%PATH% 19 | 20 | echo "Install gem dependencies..." 21 | call %instpath%\versions\%RUBY_VERSION%\bin\gem install "bundler:%BUNDLER_VERSION%" 22 | call %instpath%\versions\%RUBY_VERSION%\bin\bundle _%BUNDLER_VERSION%_ install --with test 23 | call rbenv rehash 24 | 25 | echo "Running tests..." 26 | copy config\connectors.yml.example config\connectors.yml 27 | 28 | call %instpath%\versions\%RUBY_VERSION%\bin\bundle exec %instpath%\versions\%RUBY_VERSION%\bin\rspec spec --order rand 29 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "UNLICENSED", 3 | "scripts": { 4 | "backport": "node_modules/.bin/backport --multipleBranches --multipleCommits" 5 | }, 6 | "engines": { 7 | "yarn": "^1.21.1" 8 | }, 9 | "devDependencies": { 10 | "backport": "^5.6.6" 11 | }, 12 | "packageManager": "yarn@1.22.15" 13 | } 14 | -------------------------------------------------------------------------------- /script/run_in_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$(dirname "${BASH_SOURCE[0]}")" 4 | DIR="$(realpath "${DIR}")" 5 | 6 | echo $DIR 7 | 8 | 9 | for var in `compgen -v` 10 | do 11 | export $var 12 | done 13 | 14 | cd $DIR/../lib/app 15 | rbenv exec bundle exec ruby app.rb 16 | -------------------------------------------------------------------------------- /script/version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | prefix=`cat VERSION` # Retrieve the version prefix 5 | timestamp=`date -u +%Y%m%dT%H%M%SZ` # Calculate the current ISO8601 format UTC timestamp 6 | version="${prefix}-${timestamp}" # concatenate the prefix with the timestamp 7 | 8 | echo $version 9 | -------------------------------------------------------------------------------- /spec/connectors/base/custom_client_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/base/custom_client' 10 | 11 | describe Connectors::Base::CustomClient do 12 | let(:base_url) { 'http://localhost' } 13 | let(:client) { described_class.new(:base_url => base_url) } 14 | 15 | describe '#get' do 16 | it 'makes http request' do 17 | get_request = stub_request(:get, 'http://localhost').to_return(:status => 200) 18 | client.get('') 19 | expect(get_request).to have_been_requested.at_least_once 20 | end 21 | 22 | context 'retries' do 23 | it 'retries on timeout response' do 24 | stubbed_request = stub_request(:get, 'http://localhost') 25 | .to_timeout.then 26 | .to_return(:status => 200) 27 | 28 | client.get('') 29 | expect(stubbed_request).to have_been_requested.twice 30 | end 31 | 32 | it 'only retries MAX_RETRIES times' do 33 | max = Connectors::Base::CustomClient::MAX_RETRIES 34 | stubbed_request = (max + 5).times.each_with_object(stub_request(:get, 'http://localhost')) do |_, stub| 35 | stub.to_timeout.then 36 | end.to_return(:status => 200) 37 | expect { client.get('') }.to raise_error(Faraday::TimeoutError) 38 | expect(stubbed_request).to have_been_requested.times(max + 1) # original + "retries" 39 | end 40 | end 41 | 42 | context 'ensuring auth is fresh' do 43 | let(:refresh_lambda) { ->(_client) { refresh_double.the_big_red_button } } 44 | let(:refresh_double) { double(:the_big_red_button => :kaboom) } 45 | 46 | it 'does not require refresh logic' do 47 | expect(refresh_double).not_to receive(:the_big_red_button) 48 | 49 | stub_request(:get, 'http://localhost').to_return(:status => 200) 50 | client.get('') 51 | end 52 | 53 | context 'auth refresh logic is provided' do 54 | let(:client) { described_class.new(:base_url => base_url, :ensure_fresh_auth => refresh_lambda) } 55 | 56 | it 'will use refresh logic when supplied' do 57 | expect(refresh_double).to receive(:the_big_red_button) 58 | 59 | stub_request(:get, 'http://localhost').to_return(:status => 200) 60 | client.get('') 61 | end 62 | end 63 | end 64 | end 65 | 66 | describe '#post' do 67 | it 'makes http request' do 68 | post_request = stub_request(:post, 'http://localhost').to_return(:status => 200) 69 | client.post('', {}) 70 | expect(post_request).to have_been_requested.at_least_once 71 | end 72 | end 73 | 74 | describe '#put' do 75 | it 'makes http request' do 76 | put_request = stub_request(:put, 'http://localhost').to_return(:status => 200) 77 | client.put('', {}) 78 | expect(put_request).to have_been_requested.at_least_once 79 | end 80 | end 81 | 82 | describe '#delete' do 83 | it 'makes http request' do 84 | delete_request = stub_request(:delete, 'http://localhost').to_return(:status => 200) 85 | client.delete('') 86 | expect(delete_request).to have_been_requested.at_least_once 87 | end 88 | end 89 | 90 | describe '#request_with_throttling' do 91 | let(:url) { '/test' } 92 | 93 | context 'when request is successful' do 94 | it 'returns 200' do 95 | stub_request(:get, "#{base_url}#{url}").to_return(:status => 200) 96 | response = client.send(:request_with_throttling, :get, url) 97 | expect(response).to be_success 98 | end 99 | end 100 | 101 | context 'when rate limit is reached' do 102 | it 'raises ThrottlingError' do 103 | stub_request(:get, "#{base_url}#{url}").to_return(:status => 429, :headers => { 'Retry-After': 0 }) 104 | expect { client.send(:request_with_throttling, :get, url) }.to raise_error(Utility::ThrottlingError) 105 | end 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /spec/connectors/example/connector_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'connectors/example/connector' 4 | require 'spec_helper' 5 | 6 | describe Connectors::Example::Connector do 7 | subject { described_class.new(configuration: configuration) } 8 | let(:configuration) do 9 | { 10 | :foo => { 11 | :label => 'Foo', 12 | :value => 'something' 13 | } 14 | } 15 | end 16 | 17 | it_behaves_like 'a connector' 18 | 19 | describe '#is_healthy?' do 20 | it 'returns ok' do 21 | expect(subject.is_healthy?).to eq(true) 22 | end 23 | end 24 | 25 | describe '#yield_documents' do 26 | before do 27 | @documents = [] 28 | 29 | subject.yield_documents { |doc| @documents << doc } 30 | end 31 | 32 | it 'returns three documents' do 33 | expect(@documents.size).to be 3 34 | end 35 | 36 | it 'returns attachments' do 37 | expect(@documents.all? { |doc| doc.has_key?(:_attachment) }).to be true 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/connectors/gitlab/adapter_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'hashie/mash' 4 | require 'connectors/gitlab/adapter' 5 | 6 | describe Connectors::GitLab::Adapter do 7 | let(:project_hash) { Hashie::Mash.new(JSON.parse(connectors_fixture_raw('gitlab/simple_project.json'))) } 8 | 9 | describe '#to_es_document' do 10 | it 'correctly produced the Enterprise Search ID' do 11 | new_id = described_class.gitlab_id_to_es_id(project_hash.id) 12 | expect(new_id).to include(project_hash.id.to_s) 13 | expect(new_id).to include('gitlab') 14 | end 15 | 16 | it 'fills in all the other data' do 17 | adapted = described_class.to_es_document(:project, project_hash) 18 | expect(adapted[:type]).to eq(:project) 19 | expect(adapted[:url]).to eq(project_hash[:web_url]) 20 | expect(adapted[:body]).to eq(project_hash[:description]) 21 | expect(adapted[:title]).to eq(project_hash[:name]) 22 | expect(adapted[:namespace]).to eq(project_hash[:namespace][:name]) 23 | expect(adapted[:created_at]).to eq(project_hash[:created_at]) 24 | expect(adapted[:last_modified_at]).to eq(project_hash[:last_activity_at]) 25 | expect(adapted[:visibility]).to eq(project_hash[:visibility]) 26 | end 27 | 28 | context 'with permissions' do 29 | let(:permissions) { { :_allow_permissions => %w[something something_else] } } 30 | let(:project_with_permissions) { project_hash.merge(permissions) } 31 | 32 | # TODO: permissions 33 | xit 'fills in permissions' do 34 | adapted = described_class.to_es_document(:project, project_with_permissions) 35 | expect(adapted[:_allow_permissions]).to eq(permissions[:_allow_permissions]) 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/connectors/gitlab/connector_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'connectors/gitlab/connector' 4 | require 'connectors/gitlab/custom_client' 5 | require 'core/filtering/validation_status' 6 | require 'spec_helper' 7 | 8 | describe Connectors::GitLab::Connector do 9 | let(:user_json) { connectors_fixture_raw('gitlab/user.json') } 10 | let(:base_url) { Connectors::GitLab::DEFAULT_BASE_URL } 11 | let(:config) do 12 | { 13 | :base_url => { :value => base_url }, 14 | :api_key => { :value => 'some_token' } 15 | } 16 | end 17 | 18 | let(:advanced_config) { 19 | {} 20 | } 21 | 22 | let(:filtering) { 23 | { 24 | :advanced_config => advanced_config 25 | } 26 | } 27 | 28 | subject do 29 | Connectors::GitLab::Connector.new(configuration: config) 30 | end 31 | 32 | it_behaves_like 'a connector' 33 | 34 | describe '.validate_filtering' do 35 | context 'filtering is not present' do 36 | let(:filtering) { 37 | {} 38 | } 39 | 40 | it_behaves_like 'filtering is valid' 41 | end 42 | 43 | context 'filtering is present' do 44 | let(:filtering) { 45 | { 46 | :advanced_config => advanced_config 47 | } 48 | } 49 | 50 | it_behaves_like 'filtering is valid' 51 | end 52 | end 53 | 54 | describe '#is_healthy?' do 55 | it 'correctly returns true on 200' do 56 | stub_request(:get, "#{base_url}/user") 57 | .to_return(:status => 200, :body => user_json) 58 | result = subject.is_healthy? 59 | 60 | expect(result).to eq(true) 61 | end 62 | 63 | it 'correctly returns false on 401' do 64 | stub_request(:get, "#{base_url}/user") 65 | .to_return(:status => 401, :body => '{ "error": "wrong token" }') 66 | result = subject.is_healthy? 67 | 68 | expect(result).to eq(false) 69 | end 70 | 71 | it 'correctly returns false on 400' do 72 | stub_request(:get, "#{base_url}/user") 73 | .to_return(:status => 401, :body => '{ "error": "wrong token" }') 74 | result = subject.is_healthy? 75 | 76 | expect(result).to eq(false) 77 | end 78 | end 79 | 80 | describe '#yield_documents' do 81 | let(:page_count) { 3 } 82 | let(:page_size) { 100 } 83 | 84 | let(:first_page_next_page_link) { 'https://next.page/1' } 85 | let(:second_page_next_page_link) { 'https://next.page/2' } 86 | let(:third_page_next_page_link) { 'https://next.page/3' } 87 | 88 | let(:extractor) { double } 89 | 90 | def create_data_page(ids) 91 | ids.map do |id| 92 | { 93 | :id => id, 94 | :something => "something-#{id}" 95 | } 96 | end 97 | end 98 | 99 | before(:each) do 100 | allow(Connectors::GitLab::Extractor).to receive(:new).and_return(extractor) 101 | 102 | allow(extractor) 103 | .to receive(:yield_projects_page) 104 | .with(nil) 105 | .and_yield(create_data_page(1..page_size)) 106 | .and_return(first_page_next_page_link) 107 | 108 | allow(extractor) 109 | .to receive(:yield_projects_page) 110 | .with(first_page_next_page_link) 111 | .and_yield(create_data_page(page_size + 1..page_size * 2)) 112 | .and_return(second_page_next_page_link) 113 | 114 | allow(extractor) 115 | .to receive(:yield_projects_page) 116 | .with(second_page_next_page_link) 117 | .and_yield(create_data_page(page_size * 2 + 1..page_size * 3)) 118 | .and_return(third_page_next_page_link) 119 | 120 | allow(extractor) 121 | .to receive(:yield_projects_page) 122 | .with(third_page_next_page_link) 123 | .and_return(nil) 124 | end 125 | 126 | it 'extracts all documents' do 127 | docs = [] 128 | 129 | subject.yield_documents { |doc| docs << doc } 130 | 131 | expect(docs.size).to eq(page_count * page_size) 132 | end 133 | end 134 | end 135 | -------------------------------------------------------------------------------- /spec/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/mongodb/mongo_advanced_snippet_snake_case_transformer' 10 | 11 | describe Connectors::MongoDB::MongoAdvancedSnippetSnakeCaseTransformer do 12 | let(:advanced_snippet) { 13 | { 14 | 'allowDiskUse' => false, 15 | 'nested' => { 16 | 'maxTimeMS' => 10 17 | }, 18 | 'arrayWithHashes' => [ 19 | { 20 | 'shouldChangeToo' => 10 21 | }, 22 | { 23 | 'shouldChangeToo' => 10 24 | }, 25 | { 26 | 'nested' => { 27 | 'someKey' => 'value' 28 | } 29 | } 30 | ] 31 | } 32 | } 33 | 34 | subject { described_class.new(advanced_snippet) } 35 | 36 | describe '#transform' do 37 | shared_examples_for 'does not throw error' do 38 | it '' do 39 | expect { subject.transform }.to_not raise_exception 40 | end 41 | end 42 | 43 | context 'when advanced snippet is empty' do 44 | context 'when advanced snippet is nil' do 45 | let(:advanced_snippet) { 46 | nil 47 | } 48 | 49 | it_behaves_like 'does not throw error' 50 | end 51 | 52 | context 'when advanced snippet is nil' do 53 | let(:advanced_snippet) { 54 | {} 55 | } 56 | 57 | it_behaves_like 'does not throw error' 58 | end 59 | end 60 | 61 | context 'when filter contains camel case keys' do 62 | it 'transforms all keys to snake_case' do 63 | expect(subject.transform).to eq({ 64 | 'allow_disk_use' => false, 65 | 'nested' => { 66 | 'max_time_ms' => 10 67 | }, 68 | 'array_with_hashes' => [ 69 | { 70 | 'should_change_too' => 10 71 | }, 72 | { 73 | 'should_change_too' => 10 74 | }, 75 | { 76 | 'nested' => { 77 | 'some_key' => 'value' 78 | } 79 | } 80 | ] 81 | }) 82 | end 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /spec/connectors/registry_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/registry' 10 | 11 | describe Connectors::Factory do 12 | subject { described_class.new } 13 | 14 | let(:configuration) { 15 | {} 16 | } 17 | 18 | let(:job_description) { 19 | {} 20 | } 21 | 22 | let(:registered_connector) { 23 | 'my-connector' 24 | } 25 | 26 | let(:unregistered_connector) { 27 | 'another-connector' 28 | } 29 | 30 | class MyConnector 31 | def initialize(configuration: {}, job_description: {}); end 32 | end 33 | 34 | before(:each) do 35 | subject.register(registered_connector, MyConnector) 36 | end 37 | 38 | describe '#connector_class' do 39 | context 'when called against previously registered service type' do 40 | it 'returns registered class' do 41 | expect(subject.connector_class(registered_connector)).to eq MyConnector 42 | end 43 | end 44 | end 45 | 46 | describe '#registered?' do 47 | context 'when called against previously registered service type' do 48 | it 'should return that my-connector is registered' do 49 | expect(subject.registered?(registered_connector)).to be_truthy 50 | end 51 | end 52 | 53 | context 'when called against non-registered service type' do 54 | it 'should return that non-registered service type is not registered' do 55 | expect(subject.registered?(unregistered_connector)).to be_falsey 56 | end 57 | end 58 | end 59 | 60 | describe '#connector' do 61 | context 'when called against previously registered service type' do 62 | it 'should return the corresponding connector instance' do 63 | connector_instance = subject.connector(registered_connector, configuration, job_description) 64 | 65 | expect(connector_instance).to be_a(MyConnector) 66 | end 67 | end 68 | 69 | context 'when called against non-registered service type' do 70 | it 'should raise an exception, that non-registered connector is not registered' do 71 | expect { subject.connector(unregistered_connector, configuration, job_description) }.to raise_exception 72 | end 73 | end 74 | end 75 | 76 | describe '#registered_connectors' do 77 | let(:registered_connectors) { 78 | %w[a-connector b-connector c-connector] 79 | } 80 | 81 | before(:each) do 82 | registered_connectors.each { |connector| subject.register(connector, MyConnector) } 83 | end 84 | 85 | it 'returns registered connectors' do 86 | expect(subject.registered_connectors).to include('a-connector', 'b-connector', 'c-connector') 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /spec/connectors/tolerable_error_helper_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'connectors/tolerable_error_helper' 10 | 11 | describe Connectors::TolerableErrorHelper do 12 | let(:error_monitor) { double } 13 | subject { described_class.new(error_monitor) } 14 | 15 | before(:each) do 16 | allow(Utility::Logger).to receive(:debug) 17 | allow(Utility::Logger).to receive(:error) 18 | allow(Utility::Logger).to receive(:warn) 19 | 20 | allow(error_monitor).to receive(:note_success) 21 | allow(error_monitor).to receive(:note_error) 22 | end 23 | 24 | describe '#yield_single_document' do 25 | context 'when no errors happen' do 26 | it 'notes success to error monitor' do 27 | expect(error_monitor).to receive(:note_success) 28 | 29 | subject.yield_single_document(identifier: 'hello!') do 30 | { :bring => 'a_towel' } 31 | end 32 | end 33 | end 34 | 35 | context 'when an error happens' do 36 | let(:error) { StandardError.new } 37 | let(:unique_error_id) { 'hey im an error' } 38 | 39 | before(:each) do 40 | allow(Utility::ExceptionTracking).to receive(:augment_exception).with(error) # this method actually populates id of the error 41 | allow(error).to receive(:id).and_return(unique_error_id) 42 | end 43 | 44 | it 'augments the error' do 45 | expect(Utility::ExceptionTracking).to receive(:augment_exception).with(error) 46 | 47 | subject.yield_single_document(identifier: 'hello!') do 48 | raise error 49 | end 50 | end 51 | 52 | it 'notes failure to error monitor' do 53 | expect(error_monitor).to receive(:note_error).with(error, { :id => unique_error_id }) 54 | 55 | subject.yield_single_document(identifier: 'hello!') do 56 | raise error 57 | end 58 | end 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /spec/core/filtering/advanced_snippet/advanced_snippet_validator_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | # frozen_string_literal: true 7 | 8 | require 'core/filtering/advanced_snippet/advanced_snippet_validator' 9 | 10 | describe Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator do 11 | subject { described_class.new({}) } 12 | 13 | describe '#is_snippet_valid' do 14 | it 'should raise an exception' do 15 | expect { subject.is_snippet_valid }.to raise_exception(StandardError, 'Advanced Snippet validation not implemented') 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /spec/core/filtering/simple_rule/no_conflicting_policies_rules_validator_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/simple_rules/simple_rule' 10 | require 'core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator' 11 | 12 | describe Core::Filtering::SimpleRules::Validation::NoConflictingPoliciesRulesValidator do 13 | let(:field) { 'foo' } 14 | let(:value) { 'bar' } 15 | let(:rule) { Core::Filtering::SimpleRule::Rule::EQUALS } 16 | 17 | let(:simple_rule_including) { 18 | { 19 | 'id' => 'test', 20 | 'field' => field, 21 | 'value' => value, 22 | 'policy' => Core::Filtering::SimpleRule::Policy::INCLUDE, 23 | 'rule' => rule 24 | } 25 | } 26 | 27 | let(:simple_rule_excluding) { 28 | { 29 | 'id' => 'test', 30 | 'field' => field, 31 | 'value' => value, 32 | 'policy' => Core::Filtering::SimpleRule::Policy::EXCLUDE, 33 | 'rule' => rule 34 | } 35 | } 36 | 37 | let(:simple_rules) { 38 | [] 39 | } 40 | 41 | subject { described_class.new(simple_rules) } 42 | 43 | describe '#are_rules_valid' do 44 | context 'when one simple rule uses include policy and another simple rule uses exclude policy for the same fields' do 45 | context 'when include rule comes before the exclude rule' do 46 | let(:simple_rules) do 47 | [ 48 | simple_rule_including, 49 | simple_rule_excluding 50 | ] 51 | end 52 | 53 | it_behaves_like 'simple rules are invalid' 54 | end 55 | 56 | context 'when exclude rule comes before the include rule' do 57 | let(:simple_rules) do 58 | [ 59 | simple_rule_excluding, 60 | simple_rule_including 61 | ] 62 | end 63 | 64 | it_behaves_like 'simple rules are invalid' 65 | end 66 | end 67 | 68 | context 'when simple rules do not conflict' do 69 | let(:simple_rules) do 70 | [ 71 | { 72 | 'id' => 'test', 73 | 'field' => field, 74 | 'value' => value, 75 | 'policy' => Core::Filtering::SimpleRule::Policy::INCLUDE, 76 | 'rule' => rule 77 | }, 78 | { 79 | 'id' => 'test', 80 | 'field' => 'another-field', 81 | 'value' => 'another-value', 82 | 'policy' => Core::Filtering::SimpleRule::Policy::EXCLUDE, 83 | 'rule' => Core::Filtering::SimpleRule::Rule::CONTAINS 84 | } 85 | ] 86 | end 87 | 88 | it_behaves_like 'simple rules are valid' 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /spec/core/filtering/transform/filter_transformer_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/transform/filter_transformer' 10 | 11 | describe Core::Filtering::Transform::FilterTransformer do 12 | let(:filter) { 13 | { 14 | 'key' => 'value' 15 | } 16 | } 17 | 18 | describe '#transform' do 19 | subject { described_class.new(filter) } 20 | 21 | context 'when transformation is not specified' do 22 | it 'returns the original filter' do 23 | expect(subject.transform).to eq(filter) 24 | end 25 | end 26 | 27 | context 'when transformation is present' do 28 | subject { 29 | described_class.new(filter, lambda { |filter| 30 | filter['key'] = 'transformed value' 31 | filter 32 | }) 33 | } 34 | 35 | it 'applies transformation' do 36 | expect(subject.transform).to eq({ 'key' => 'transformed value' }) 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/core/heartbeat_spec.rb: -------------------------------------------------------------------------------- 1 | require 'core/heartbeat' 2 | require 'connectors/connector_status' 3 | 4 | describe Core::Heartbeat do 5 | describe '.send' do 6 | let(:connector_settings) { double } 7 | let(:connector_instance) { double } 8 | let(:connector_id) { '123' } 9 | let(:service_type) { 'mongo' } 10 | let(:configured) { false } 11 | let(:is_healthy) { true } 12 | let(:configuration) { {} } 13 | 14 | before(:each) do 15 | allow(Core::ElasticConnectorActions).to receive(:update_connector_fields) 16 | allow(Connectors::REGISTRY).to receive(:connector).and_return(connector_instance) 17 | allow(connector_settings).to receive(:id).and_return(connector_id) 18 | allow(connector_settings).to receive(:service_type).and_return(service_type) 19 | allow(connector_settings).to receive(:connector_status_allows_sync?).and_return(configured) 20 | allow(connector_settings).to receive(:configuration).and_return(configuration) 21 | allow(connector_instance).to receive(:is_healthy?).and_return(is_healthy) 22 | end 23 | 24 | describe '.send' do 25 | it 'updates last_seen' do 26 | expect(Core::ElasticConnectorActions).to receive(:update_connector_fields).with(connector_id, hash_including(:last_seen => anything)) 27 | 28 | described_class.send(connector_settings) 29 | end 30 | 31 | context 'when it is configured' do 32 | let(:configured) { true } 33 | context 'when remote source is up' do 34 | let(:is_healthy) { true } 35 | 36 | it 'updates status' do 37 | expect(Core::ElasticConnectorActions) 38 | .to receive(:update_connector_fields) 39 | .with( 40 | connector_id, 41 | hash_including( 42 | :status => Connectors::ConnectorStatus::CONNECTED 43 | ) 44 | ) 45 | 46 | described_class.send(connector_settings) 47 | end 48 | end 49 | 50 | context 'when remote source is down' do 51 | let(:is_healthy) { false } 52 | 53 | it 'updates status' do 54 | expect(Core::ElasticConnectorActions) 55 | .to receive(:update_connector_fields) 56 | .with( 57 | connector_id, 58 | hash_including( 59 | :status => Connectors::ConnectorStatus::ERROR, 60 | :error => /Health check for 3d party service failed/ 61 | ) 62 | ) 63 | 64 | described_class.send(connector_settings) 65 | end 66 | end 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /spec/core/job_cleanup_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core' 10 | 11 | describe Core::JobCleanUp do 12 | describe '.execute' do 13 | let(:connectors) { [] } 14 | let(:orphaned_jobs) { [] } 15 | let(:idle_jobs) { [] } 16 | let(:job1) { double } 17 | let(:job2) { double } 18 | 19 | before(:each) do 20 | allow(Core::ConnectorSettings).to receive(:fetch_all_connectors).and_return(connectors) 21 | allow(Core::ConnectorJob).to receive(:orphaned_jobs).and_return(orphaned_jobs) 22 | allow(Core::ConnectorJob).to receive(:idle_jobs).and_return(idle_jobs) 23 | end 24 | 25 | it 'should not clean up orphaned jobs' do 26 | expect(Core::ElasticConnectorActions).to_not receive(:delete_indices) 27 | expect(Core::ConnectorJob).to_not receive(:delete_jobs) 28 | 29 | described_class.execute 30 | end 31 | 32 | it 'should not mark idle jobs error' do 33 | expect_any_instance_of(Core::ConnectorJob).to_not receive(:error!) 34 | expect(Core::ConnectorJob).to_not receive(:fetch_by_id) 35 | expect(Core::ConnectorSettings).to_not receive(:fetch_by_id) 36 | expect_any_instance_of(Core::ConnectorSettings).to_not receive(:update_last_sync!) 37 | 38 | described_class.execute 39 | end 40 | 41 | context 'with orphaned jobs' do 42 | let(:index_name) { 'index_name' } 43 | let(:orphaned_jobs) { [job1, job2] } 44 | 45 | before(:each) do 46 | allow(job1).to receive(:index_name).and_return(:index_name) 47 | allow(job2).to receive(:index_name).and_return(:index_name) 48 | allow(Core::ConnectorJob).to receive(:delete_jobs).and_return({}) 49 | end 50 | 51 | it 'should clean up orphaned jobs' do 52 | expect(Core::ElasticConnectorActions).to receive(:delete_indices) 53 | expect(Core::ConnectorJob).to receive(:delete_jobs) 54 | 55 | described_class.execute 56 | end 57 | end 58 | 59 | context 'with idle jobs' do 60 | let(:idle_jobs) { [job1, job2] } 61 | let(:connector) { double } 62 | let(:connector_id) { '1' } 63 | let(:id1) { '1' } 64 | let(:id2) { '2' } 65 | 66 | before(:each) do 67 | allow(job1).to receive(:id).and_return(id1) 68 | allow(job2).to receive(:id).and_return(id2) 69 | allow(Core::ConnectorJob).to receive(:fetch_by_id).with(id1).and_return(job1) 70 | allow(Core::ConnectorJob).to receive(:fetch_by_id).with(id2).and_return(job2) 71 | allow(job1).to receive(:connector_id).and_return(connector_id) 72 | allow(job2).to receive(:connector_id).and_return(connector_id) 73 | allow(job1).to receive(:connector).and_return(connector) 74 | allow(job2).to receive(:connector).and_return(connector) 75 | end 76 | 77 | it 'should mark idle jobs error' do 78 | expect(job1).to receive(:error!) 79 | expect(job2).to receive(:error!) 80 | expect(connector).to receive(:update_last_sync!).twice 81 | 82 | described_class.execute 83 | end 84 | end 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /spec/core/jobs/producer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'core/jobs/producer' 2 | 3 | describe Core::Jobs::Producer do 4 | describe '#enqueue_job' do 5 | let(:job_type) { 'unsupported_type' } 6 | let(:connector_settings) { double } 7 | 8 | context 'when the job type is unsupported' do 9 | it 'raise UnsupportedJobType error' do 10 | expect { described_class.enqueue_job(job_type: job_type, connector_settings: connector_settings) } 11 | .to raise_error(Core::Jobs::UnsupportedJobType) 12 | end 13 | end 14 | 15 | context 'when the job type is supported' do 16 | let(:job_type) { Core::Jobs::Producer::JOB_TYPES.first } 17 | 18 | context 'when connector_settings is not a kind of Core::ConnectorSettings' do 19 | it 'raise ArgumentError' do 20 | expect { described_class.enqueue_job(job_type: job_type, connector_settings: connector_settings) } 21 | .to raise_error(ArgumentError) 22 | end 23 | end 24 | 25 | context 'when connector_settings is a kind of Core::ConnectorSettings' do 26 | let(:fake_es_response) { {} } 27 | let(:fake_connector_meta) { {} } 28 | 29 | let(:connector_settings) { Core::ConnectorSettings.new(fake_es_response, fake_connector_meta) } 30 | it 'execute Core::ElasticConnectorActions.create_job' do 31 | allow(Core::ElasticConnectorActions).to receive(:create_job).with(connector_settings: connector_settings) 32 | 33 | described_class.enqueue_job(job_type: job_type, connector_settings: connector_settings) 34 | 35 | expect(Core::ElasticConnectorActions).to have_received(:create_job).with(connector_settings: connector_settings) 36 | end 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/core/native_scheduler_spec.rb: -------------------------------------------------------------------------------- 1 | require 'core/connector_settings' 2 | require 'core/native_scheduler' 3 | 4 | describe Core::NativeScheduler do 5 | subject { described_class.new(poll_interval, heartbeat_interval) } 6 | 7 | let(:poll_interval) { 999 } 8 | let(:heartbeat_interval) { 999 } 9 | 10 | describe '#connector_settings' do 11 | context 'when elasticsearch query runs successfully' do 12 | let(:connector_settings) { double } 13 | before(:each) do 14 | allow(Core::ConnectorSettings).to receive(:fetch_native_connectors).and_return(connector_settings) 15 | end 16 | 17 | it 'fetches crawler connectors' do 18 | expect(subject.connector_settings).to eq(connector_settings) 19 | end 20 | end 21 | 22 | context 'when elasticsearch query fails' do 23 | before(:each) do 24 | allow(Core::ConnectorSettings).to receive(:fetch_native_connectors).and_raise(StandardError) 25 | end 26 | 27 | it 'fetches crawler connectors' do 28 | expect(subject.connector_settings).to be_empty 29 | end 30 | end 31 | 32 | context 'when authorization error appears' do 33 | before(:each) do 34 | allow(Core::ConnectorSettings).to receive(:fetch_native_connectors).and_raise(Elastic::Transport::Transport::Errors::Unauthorized, 'Unauthorized') 35 | end 36 | 37 | it 'rethrows error' do 38 | expect { subject.connector_settings }.to raise_error(Elastic::Transport::Transport::Errors::Unauthorized, 'Unauthorized') 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/core/single_scheduler_spec.rb: -------------------------------------------------------------------------------- 1 | require 'core/connector_settings' 2 | require 'core/single_scheduler' 3 | 4 | describe Core::SingleScheduler do 5 | subject { described_class.new(connector_id, poll_interval, heartbeat_interval) } 6 | 7 | let(:connector_id) { '123' } 8 | let(:poll_interval) { 999 } 9 | let(:heartbeat_interval) { 999 } 10 | 11 | describe '#connector_settings' do 12 | context 'when elasticsearch query runs successfully' do 13 | let(:connector_setting) { double } 14 | before(:each) do 15 | allow(Core::ConnectorSettings).to receive(:fetch_by_id).with(connector_id).and_return(connector_setting) 16 | end 17 | 18 | it 'fetches the connector' do 19 | expect(subject.connector_settings).to eq([connector_setting]) 20 | end 21 | 22 | context 'when connector does not exist' do 23 | let(:connector_setting) { nil } 24 | 25 | it 'fetches no connector' do 26 | expect(subject.connector_settings).to be_empty 27 | end 28 | end 29 | end 30 | 31 | context 'when elasticsearch query fails' do 32 | before(:each) do 33 | allow(Core::ConnectorSettings).to receive(:fetch_by_id).with(connector_id).and_raise(StandardError) 34 | end 35 | 36 | it 'fetches no connector' do 37 | expect(subject.connector_settings).to be_empty 38 | end 39 | end 40 | 41 | context 'when authorization error appears' do 42 | before(:each) do 43 | allow(Core::ConnectorSettings).to receive(:fetch_by_id).and_raise(Elastic::Transport::Transport::Errors::Unauthorized, 'Unauthorized') 44 | end 45 | 46 | it 'rethrows error' do 47 | expect { subject.connector_settings }.to raise_error(Elastic::Transport::Transport::Errors::Unauthorized, 'Unauthorized') 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /spec/fixtures/gitlab/external_user.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 11422639, 3 | "username": "PCHINC1", 4 | "name": "Pramod Chincholkar", 5 | "state": "active", 6 | "avatar_url": "https://secure.gravatar.com/avatar/0b90a654999fd081e97490cc5aa75d80?s=80&d=identicon", 7 | "web_url": "https://gitlab.com/PCHINC1", 8 | "created_at": "2022-04-22T20:35:32.978Z", 9 | "bio": "", 10 | "location": null, 11 | "public_email": "", 12 | "skype": "", 13 | "linkedin": "", 14 | "twitter": "", 15 | "website_url": "", 16 | "organization": null, 17 | "job_title": "", 18 | "pronouns": null, 19 | "bot": false, 20 | "work_information": null, 21 | "followers": 0, 22 | "following": 0, 23 | "is_followed": false, 24 | "local_time": null 25 | } 26 | -------------------------------------------------------------------------------- /spec/fixtures/gitlab/external_users.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 11422639, 4 | "username": "PCHINC1", 5 | "name": "Pramod Chincholkar", 6 | "state": "active", 7 | "avatar_url": "https://secure.gravatar.com/avatar/0b90a654999fd081e97490cc5aa75d80?s=80&d=identicon", 8 | "web_url": "https://gitlab.com/PCHINC1" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /spec/fixtures/gitlab/project_members.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 33, 4 | "username": "someone", 5 | "name": "someone", 6 | "state": "active", 7 | "avatar_url": "https://secure.gravatar.com/avatar/fb24ce8723e1cc23986de0669e8bd4da?s=80&d=identicon", 8 | "web_url": "https://gitlab.com/someone", 9 | "access_level": 50, 10 | "created_at": "2018-03-13T09:13:19.346Z", 11 | "expires_at": null, 12 | "membership_state": "active" 13 | }, 14 | { 15 | "id": 22, 16 | "username": "yetsomeone", 17 | "name": "Yet Someone", 18 | "state": "active", 19 | "avatar_url": "https://secure.gravatar.com/avatar/efa724b7092f8d0bdc5ce00c2d3f86eb?s=80&d=identicon", 20 | "web_url": "https://gitlab.com/yetsomeone", 21 | "access_level": 30, 22 | "created_at": "2018-04-15T07:55:32.423Z", 23 | "created_by": { 24 | "id": 33, 25 | "username": "someone", 26 | "name": "someone", 27 | "state": "active", 28 | "avatar_url": "https://secure.gravatar.com/avatar/fb24ce8723e1cc23986de0669e8bd4da?s=80&d=identicon", 29 | "web_url": "https://gitlab.com/someone" 30 | }, 31 | "expires_at": null, 32 | "membership_state": "active" 33 | }, 34 | { 35 | "id": 11, 36 | "username": "someoneelse", 37 | "name": "MC", 38 | "state": "active", 39 | "avatar_url": "https://secure.gravatar.com/avatar/bac1e2c06f6feba833cefbfa6b686708?s=80&d=identicon", 40 | "web_url": "https://gitlab.com/someoneelse", 41 | "access_level": 40, 42 | "created_at": "2018-07-09T19:01:19.109Z", 43 | "created_by": { 44 | "id": 33, 45 | "username": "someone", 46 | "name": "someone", 47 | "state": "active", 48 | "avatar_url": "https://secure.gravatar.com/avatar/fb24ce8723e1cc23986de0669e8bd4da?s=80&d=identicon", 49 | "web_url": "https://gitlab.com/someone" 50 | }, 51 | "expires_at": null, 52 | "membership_state": "active" 53 | } 54 | ] 55 | -------------------------------------------------------------------------------- /spec/fixtures/gitlab/simple_project.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 35879607, 3 | "description": "My awesome project", 4 | "name": "project-to-test-milestones-34bfb1ff-426fa7e1ce896959", 5 | "name_with_namespace": "gitlab-qa-sandbox-group / group-to-test-milestones-883f203d / project-to-test-milestones-34bfb1ff-426fa7e1ce896959", 6 | "path": "project-to-test-milestones-34bfb1ff-426fa7e1ce896959", 7 | "path_with_namespace": "gitlab-qa-sandbox-group/group-to-test-milestones-883f203d/project-to-test-milestones-34bfb1ff-426fa7e1ce896959", 8 | "created_at": "2022-05-04T12:54:42.416Z", 9 | "default_branch": "main", 10 | "tag_list": [], 11 | "topics": [], 12 | "ssh_url_to_repo": "git@gitlab.com:gitlab-qa-sandbox-group/group-to-test-milestones-883f203d/project-to-test-milestones-34bfb1ff-426fa7e1ce896959.git", 13 | "http_url_to_repo": "https://gitlab.com/gitlab-qa-sandbox-group/group-to-test-milestones-883f203d/project-to-test-milestones-34bfb1ff-426fa7e1ce896959.git", 14 | "web_url": "https://gitlab.com/gitlab-qa-sandbox-group/group-to-test-milestones-883f203d/project-to-test-milestones-34bfb1ff-426fa7e1ce896959", 15 | "readme_url": null, 16 | "avatar_url": null, 17 | "forks_count": 0, 18 | "star_count": 0, 19 | "last_activity_at": "2022-05-04T12:54:42.416Z", 20 | "visibility": "public", 21 | "namespace": { 22 | "id": 52760903, 23 | "name": "group-to-test-milestones-883f203d", 24 | "path": "group-to-test-milestones-883f203d", 25 | "kind": "group", 26 | "full_path": "gitlab-qa-sandbox-group/group-to-test-milestones-883f203d", 27 | "parent_id": 4909902, 28 | "avatar_url": null, 29 | "web_url": "https://gitlab.com/groups/gitlab-qa-sandbox-group/group-to-test-milestones-883f203d" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /spec/fixtures/gitlab/user.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "username": "sytses", 4 | "name": "Sid Sijbrandij", 5 | "state": "active", 6 | "avatar_url": "https://secure.gravatar.com/avatar/78b060780d36f51a6763ac9831a4f022?s=80&d=identicon", 7 | "web_url": "https://gitlab.com/sytses", 8 | "created_at": "2012-09-14T14:10:29.000Z", 9 | "bio": "", 10 | "location": "", 11 | "public_email": "", 12 | "skype": "", 13 | "linkedin": "", 14 | "twitter": "sytses", 15 | "website_url": "", 16 | "organization": null, 17 | "job_title": "", 18 | "pronouns": null, 19 | "bot": false, 20 | "work_information": null, 21 | "followers": 93, 22 | "following": 0, 23 | "is_followed": false, 24 | "local_time": null 25 | } 26 | -------------------------------------------------------------------------------- /spec/repo_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | describe 'Repository' do 10 | let(:gemspecs) { Dir[File.join(__dir__, '../*.gemspec')].to_a } 11 | 12 | context 'NOTICE file' do 13 | let(:notice_file) { 'NOTICE.txt' } 14 | let(:notice_path) { File.join(__dir__, "../#{notice_file}") } 15 | it 'exists' do 16 | expect(File.exist?(notice_path)).to be(true) 17 | end 18 | 19 | it 'is included in our Gemspecs' do 20 | gemspecs.each do |gemspec| 21 | expect(File.read(gemspec)).to include(notice_file) 22 | end 23 | end 24 | end 25 | 26 | context 'LICENSE file' do 27 | let(:license_file) { 'LICENSE' } 28 | let(:license_path) { File.join(__dir__, "../#{license_file}") } 29 | it 'exists' do 30 | expect(File.exist?(license_path)).to be(true) 31 | end 32 | 33 | it 'is included in our Gemspecs' do 34 | gemspecs.each do |gemspec| 35 | expect(File.read(gemspec)).to include(license_file) 36 | end 37 | end 38 | end 39 | 40 | context 'License header' do 41 | let(:license_header) do 42 | %{# 43 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 44 | # or more contributor license agreements. Licensed under the Elastic License; 45 | # you may not use this file except in compliance with the Elastic License. 46 | #} 47 | end 48 | let(:code_files) { Dir['lib/{[!vendor/]**/*,*}.rb'] } 49 | 50 | it 'prefixes all code files' do 51 | code_files.each do |code_file| 52 | expect(File.read(code_file)).to start_with(license_header), "License header missing from #{code_file}" 53 | end 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'webmock/rspec' 4 | require 'rack/test' 5 | require 'active_support/time_with_zone' 6 | require 'active_support/values/time_zone' 7 | require 'active_support/core_ext/time/zones' 8 | require 'simplecov' 9 | require 'simplecov-material' 10 | 11 | # debug utils 12 | require 'pry' 13 | require 'pry-nav' 14 | 15 | Dir['./spec/support/**/*.rb'].sort.each { |f| require f } 16 | 17 | # Eneable coverage report 18 | SimpleCov.add_filter('spec') 19 | SimpleCov.formatter = SimpleCov::Formatter::MaterialFormatter 20 | SimpleCov.start 21 | 22 | $LOAD_PATH << '../lib' 23 | 24 | def connectors_fixture_path(fixture_name) 25 | File.join('spec/fixtures', fixture_name) 26 | end 27 | 28 | def connectors_fixture_raw(fixture_name) 29 | File.read(connectors_fixture_path(fixture_name), :encoding => 'utf-8') 30 | end 31 | 32 | def connectors_fixture_binary(fixture_name) 33 | File.read(connectors_fixture_path(fixture_name), :mode => 'rb') 34 | end 35 | 36 | def connectors_fixture_json(fixture_name) 37 | JSON.parse(connectors_fixture_raw(fixture_name)) 38 | end 39 | 40 | def get_class_specific_private_methods(klass) 41 | (klass.private_methods - Object.private_methods).sort 42 | end 43 | 44 | def random_string 45 | SecureRandom.hex 46 | end 47 | 48 | Time.zone = ActiveSupport::TimeZone.new('UTC') 49 | ENV['APP_ENV'] = 'test' 50 | -------------------------------------------------------------------------------- /spec/support/shared_examples.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | shared_examples 'implements all private methods of base class' do 10 | it '' do 11 | base_class_private_methods = get_class_specific_private_methods(base_class_instance) 12 | specific_class_private_methods = get_class_specific_private_methods(concrete_class_instance) 13 | 14 | expect(specific_class_private_methods).to include(*base_class_private_methods) 15 | end 16 | end 17 | 18 | shared_examples 'does not populate updated_at' do 19 | it 'returns document that does not have updated_at field' do 20 | expect(document.with_indifferent_access).to_not include(have_key(:updated_at)) 21 | end 22 | end 23 | 24 | shared_examples 'a connector' do 25 | it 'implements display_name class method' do 26 | expect(described_class.display_name).to_not be_nil 27 | end 28 | 29 | it 'implements service_type class method' do 30 | expect(described_class.service_type).to_not be_nil 31 | end 32 | 33 | it 'implements configurable_fields class method' do 34 | expect(described_class.configurable_fields).to_not be_nil 35 | end 36 | 37 | it 'implements validate filtering class method' do 38 | expect(described_class.validate_filtering).to_not be_nil 39 | end 40 | 41 | it 'configurable_fields class method returns valid configuration' do 42 | # expected configurable fields format: 43 | # { 44 | # 'key' => { 45 | # 'label' => '', 46 | # 'value' => '' 47 | # } 48 | # } 49 | configurable_fields = described_class.configurable_fields.with_indifferent_access 50 | 51 | expect(configurable_fields).to respond_to(:keys) 52 | expect(configurable_fields).to respond_to(:[]) 53 | 54 | configurable_fields.each_key do |field_name| 55 | field_definition = configurable_fields[field_name] 56 | 57 | # is a hash too 58 | expect(field_definition).to respond_to(:keys) 59 | expect(field_definition).to respond_to(:[]) 60 | 61 | expect(field_definition['label']).to_not be_nil 62 | if field_definition['value'] 63 | expect(field_definition['value']).to_not be_nil 64 | end 65 | end 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /spec/support/shared_examples/filtering.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'core/filtering/validation_status' 10 | 11 | shared_examples 'a schema validator' do 12 | it 'defines validate_against_schema method' do 13 | expect(described_class.method_defined?(:validate_against_schema)).to eq true 14 | end 15 | end 16 | 17 | shared_examples 'an advanced snippet validator' do 18 | it 'defines is_snippet_valid method' do 19 | expect(described_class.method_defined?(:is_snippet_valid)).to eq true 20 | end 21 | end 22 | 23 | shared_examples_for 'filtering is valid' do 24 | it '' do 25 | validation_result = described_class.validate_filtering(filtering) 26 | 27 | expect(validation_result[:state]).to eq(Core::Filtering::ValidationStatus::VALID) 28 | expect(validation_result[:errors]).to be_empty 29 | end 30 | end 31 | 32 | shared_examples_for 'filtering is invalid' do 33 | it '' do 34 | validation_result = described_class.validate_filtering(filtering) 35 | 36 | expect(validation_result[:state]).to eq(Core::Filtering::ValidationStatus::INVALID) 37 | expect(validation_result[:errors]).to_not be_empty 38 | expect(validation_result[:errors]).to be_an(Array) 39 | end 40 | end 41 | 42 | shared_examples_for 'simple rules are valid' do 43 | it '' do 44 | validation_result = subject.are_rules_valid 45 | 46 | expect(validation_result[:state]).to eq(Core::Filtering::ValidationStatus::VALID) 47 | expect(validation_result[:errors]).to be_empty 48 | end 49 | end 50 | 51 | shared_examples_for 'simple rules are invalid' do 52 | it '' do 53 | validation_result = subject.are_rules_valid 54 | 55 | expect(validation_result[:state]).to eq(Core::Filtering::ValidationStatus::INVALID) 56 | expect(validation_result[:errors]).to_not be_empty 57 | expect(validation_result[:errors]).to be_an(Array) 58 | expect(validation_result[:errors][0][:ids]).to eq(['simple_rules']) 59 | expect(validation_result[:errors][0][:messages]).to be_an(Array) 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /spec/utility/common_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 3 | # or more contributor license agreements. Licensed under the Elastic License; 4 | # you may not use this file except in compliance with the Elastic License. 5 | # 6 | 7 | # frozen_string_literal: true 8 | 9 | require 'utility/common' 10 | 11 | RSpec.describe Utility::Common do 12 | describe '.return_if_present' do 13 | context 'no argument is present' do 14 | it 'returns nil' do 15 | expect(Utility::Common.return_if_present).to be_nil 16 | end 17 | end 18 | 19 | context 'one non-nil argument is present' do 20 | it 'returns the non-nil argument' do 21 | expect(Utility::Common.return_if_present('one')).to eq('one') 22 | end 23 | end 24 | 25 | context 'first argument is nil and second is present' do 26 | it 'returns the second argument' do 27 | expect(Utility::Common.return_if_present(nil, 'second')).to eq('second') 28 | end 29 | end 30 | 31 | context 'first and second argument are present' do 32 | it 'returns the first argument' do 33 | expect(Utility::Common.return_if_present('first', 'second')).to eq('first') 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /spec/utility/cron_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 5 | # or more contributor license agreements. Licensed under the Elastic License; 6 | # you may not use this file except in compliance with the Elastic License. 7 | # 8 | 9 | require 'fugit' 10 | require 'spec_helper' 11 | require 'utility/cron' 12 | 13 | # see http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html 14 | RSpec.describe Utility::Cron do 15 | it 'supports various expressions' do 16 | conversions = [ 17 | ['0 15 10 * * ? 2005', '15 10 * * *'], 18 | ['0 0 12 * * ?', '0 12 * * *'], 19 | ['0 15 10 ? * *', '15 10 * * *'], 20 | ['0 15 10 * * ?', '15 10 * * *'], 21 | ['0 15 10 * * ? *', '15 10 * * *'], 22 | ['0 15 10 * * ? 2005', '15 10 * * *'], 23 | ['0 * 14 * * ?', '* 14 * * *'], 24 | ['0 0/5 14 * * ?', '0/5 14 * * *'], 25 | ['0 0/5 14,18 * * ?', '0/5 14,18 * * *'], 26 | ['0 0-5 14 * * ?', '0-5 14 * * *'], 27 | ['0 10,44 14 ? 3 WED', '10,44 14 * 3 WED'], 28 | ['0 15 10 ? * MON-FRI', '15 10 * * MON-FRI'], 29 | ['0 15 10 15 * ?', '15 10 15 * *'], 30 | ['0 0 12 1/5 * ?', '0 12 1/5 * *'], 31 | ['0 11 11 11 11 ?', '11 11 11 11 *'], 32 | ['0 20 6 ? * 5', '20 6 * * 4'], # every Thursday at 6:20 AM 33 | ['0 0 12 ? * 2', '0 12 * * 1'], # every Monday at 12:00 PM 34 | ['0 0 0 ? * 7', '0 0 * * 6'], # every Saturday at 12:00 AM 35 | ['0 0 0 ? * 0', '0 0 * * 0'], # every Sunday at 12:00 AM 36 | ] 37 | 38 | conversions.each do |quartz, crontab| 39 | expect(subject.quartz_to_crontab(quartz)).to eq(crontab) 40 | next_time = Fugit::Cron.do_parse(crontab).next_time 41 | print "### next_time for scheduler [#{quartz}], cron [#{crontab}]: [#{next_time}]\n" 42 | expect(next_time).to be > Time.now 43 | end 44 | 45 | unsupported = ['0 15 10 ? * 6#3', '0 15 10 L * ?', 46 | '0 15 10 L-2 * ?', 47 | '0 15 10 ? * 6L', 48 | '0 15 10 ? * 6L 2002-2005', 49 | '0 15 10 ? * 6#3'] 50 | 51 | unsupported.each do |quartz| 52 | expect { subject.quartz_to_crontab(quartz) }.to raise_error(Exception) 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /spec/utility/elasticsearch/mappings_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 5 | # or more contributor license agreements. Licensed under the Elastic License; 6 | # you may not use this file except in compliance with the Elastic License. 7 | # 8 | 9 | require 'spec_helper' 10 | require 'utility/elasticsearch/index/mappings' 11 | 12 | describe Utility::Elasticsearch::Index::Mappings do 13 | describe '#default_text_fields_mappings' do 14 | let(:crawler_index) { false } 15 | subject { Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(connectors_index: connectors_index, crawler_index: crawler_index) } 16 | 17 | context 'when the index is a connectors index' do 18 | let(:connectors_index) { true } 19 | 20 | it { is_expected.to be_kind_of(Hash) } 21 | it { is_expected.to include(dynamic: true, dynamic_templates: Array, properties: Hash) } 22 | it { is_expected.to include(properties: { id: Hash, _subextracted_as_of: Hash, _subextracted_version: Hash }) } 23 | end 24 | 25 | context 'when the index is not a connectors index' do 26 | let(:connectors_index) { false } 27 | 28 | it { is_expected.to be_kind_of(Hash) } 29 | it { is_expected.to include(dynamic: true, dynamic_templates: Array, properties: Hash) } 30 | it { is_expected.not_to include(properties: { id: Hash, _subextracted_as_of: Hash, _subextracted_version: Hash }) } 31 | 32 | context 'when the index is a crawler index' do 33 | let(:crawler_index) { true } 34 | let(:expected_props) do 35 | { 36 | properties: { 37 | id: Hash, 38 | additional_urls: Hash, 39 | body_content: Hash, 40 | domains: Hash, 41 | headings: Hash, 42 | last_crawled_at: Hash, 43 | links: Hash, 44 | meta_description: Hash, 45 | meta_keywords: Hash, 46 | title: Hash, 47 | url: Hash, 48 | url_host: Hash, 49 | url_path: Hash, 50 | url_path_dir1: Hash, 51 | url_path_dir2: Hash, 52 | url_path_dir3: Hash, 53 | url_port: Hash, 54 | url_scheme: Hash 55 | } 56 | } 57 | end 58 | 59 | it { is_expected.to include(expected_props) } 60 | end 61 | end 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /spec/utility/es_client_spec.rb: -------------------------------------------------------------------------------- 1 | require 'elasticsearch' 2 | require 'utility/es_client' 3 | require 'utility/environment' 4 | 5 | RSpec.describe Utility::EsClient do 6 | let(:host) { 'http://notreallyaserver' } 7 | let(:config) do 8 | { 9 | :service_type => 'example', 10 | :log_level => 'INFO', 11 | :connector_id => '1', 12 | :elasticsearch => { 13 | :api_key => 'key', 14 | :hosts => host, 15 | :disable_warnings => disable_warnings 16 | } 17 | } 18 | end 19 | 20 | let(:subject) { described_class.new(config[:elasticsearch]) } 21 | 22 | before(:each) do 23 | stub_request(:get, "#{host}:9200/") 24 | .to_return(status: 403, body: '', headers: {}) 25 | stub_request(:get, "#{host}:9200/_cluster/health") 26 | end 27 | 28 | context 'when wrapped in Utility::Environment.set_execution_environment' do 29 | around(:each) do |example| 30 | Utility::Environment.set_execution_environment(config) do 31 | example.run 32 | end 33 | end 34 | 35 | context 'when disable_warnings=false' do 36 | let(:disable_warnings) { false } 37 | it 'receives warnings from elasticsearch client' do 38 | expect { 39 | subject.cluster.health 40 | }.to output(/#{Elasticsearch::SECURITY_PRIVILEGES_VALIDATION_WARNING}/).to_stderr 41 | end 42 | end 43 | 44 | context 'when disable_warnings=true' do 45 | let(:disable_warnings) { true } 46 | it 'receives no warnings from elasticsearch client' do 47 | expect { 48 | subject.cluster.health 49 | }.to_not output.to_stderr 50 | end 51 | end 52 | end 53 | 54 | context 'when Elasticsearch::Client arguments are presented' do 55 | let(:disable_warnings) { false } 56 | 57 | before(:example) do 58 | # remove api_key to force Elasticsearch::Client pickup TLS options 59 | config[:elasticsearch].delete(:api_key) 60 | end 61 | 62 | context 'when transport_options is presented' do 63 | let(:transport_options) { { ssl: { verify: false } } } 64 | 65 | it 'configures Elasticsearch client with transport_options' do 66 | config[:elasticsearch][:transport_options] = transport_options 67 | expect(subject.transport.options[:transport_options][:ssl]).to eq(transport_options[:ssl]) 68 | end 69 | end 70 | 71 | context 'when ca_fingerprint is presented' do 72 | let(:ca_fingerprint) { '64F2593F...' } 73 | 74 | it 'configures Elasticsearch client with ca_fingerprint' do 75 | config[:elasticsearch][:ca_fingerprint] = ca_fingerprint 76 | # there is no other way to get ca_fingerprint variable 77 | expect(subject.instance_variable_get(:@transport).instance_variable_get(:@ca_fingerprint)).to eq(ca_fingerprint) 78 | end 79 | end 80 | end 81 | 82 | describe '#connection_configs' do 83 | let(:disable_warnings) { false } 84 | context 'when headers are present' do 85 | let(:headers) do 86 | { 87 | :something => 'something' 88 | } 89 | end 90 | 91 | it 'configures Elasticsearch client with headers' do 92 | config[:elasticsearch][:headers] = headers 93 | 94 | result = subject.connection_configs(config[:elasticsearch]) 95 | 96 | expect(result[:headers]).to eq(headers) 97 | end 98 | end 99 | 100 | context 'when headers are not present' do 101 | it 'configures Elasticsearch client with headers' do 102 | config[:elasticsearch][:headers] = nil 103 | 104 | result = subject.connection_configs(config[:elasticsearch]) 105 | 106 | expect(result).to_not have_key(:headers) 107 | end 108 | end 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /spec/utility/exception_tracking_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 5 | # or more contributor license agreements. Licensed under the Elastic License; 6 | # you may not use this file except in compliance with the Elastic License. 7 | # 8 | 9 | require 'spec_helper' 10 | require 'utility/exception_tracking' 11 | 12 | RSpec.describe Utility::ExceptionTracking do 13 | let(:message) { 'this is a test message' } 14 | let(:exception) { StandardError.new(message) } 15 | 16 | describe '.log_exception' do 17 | it 'can log an exception' do 18 | expect { described_class.log_exception(exception) }.to output(/#{message}/).to_stdout_from_any_process 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /spec/utility/logger_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 5 | # or more contributor license agreements. Licensed under the Elastic License; 6 | # you may not use this file except in compliance with the Elastic License. 7 | # 8 | 9 | require 'spec_helper' 10 | require 'utility/logger' 11 | 12 | RSpec.describe Utility::Logger do 13 | let(:message) { 'this is a test message' } 14 | let(:long_message) { 'This is a really long test message - it is longer than the max. This is a really long test message - it is longer than the max.' } 15 | let(:message_with_breaks) { 'This is a message with line breaks.\nThis is a message with line breaks.' } 16 | let(:message_with_tabs) { 'This is a message with tabs.\t\t\tThis is a message with tabs.' } 17 | let(:message_with_many_spaces) { ' This is a message with a lot of spaces. ' } 18 | let(:logger) { ::Logger.new(STDOUT) } 19 | 20 | before do 21 | stub_const('Utility::Logger::MAX_SHORT_MESSAGE_LENGTH', 100) 22 | allow(described_class).to receive(:logger).and_return(logger) 23 | end 24 | 25 | describe '.info' do 26 | it 'can give the connectors logger' do 27 | expect { described_class.info(message) }.to output(/#{message}/).to_stdout_from_any_process 28 | end 29 | 30 | context 'with ecs logging' do 31 | let(:logger) { EcsLogging::Logger.new(STDOUT) } 32 | 33 | it 'outputs ecs fields' do 34 | expect { described_class.info(message) }.to output(/@timestamp/).to_stdout_from_any_process 35 | expect { described_class.info(message) }.to output(/ecs\.version/).to_stdout_from_any_process 36 | end 37 | end 38 | end 39 | 40 | describe '.abbreviated_message' do 41 | it 'can shorten a long message' do 42 | expect(described_class.abbreviated_message(long_message).length).to eq(100) 43 | end 44 | 45 | it 'can clean line breaks' do 46 | expect(described_class.abbreviated_message(message_with_breaks).match(/\n/)).to be_falsey 47 | end 48 | 49 | it 'can clean tabs' do 50 | expect(described_class.abbreviated_message(message_with_tabs).match(/\t/)).to be_falsey 51 | end 52 | 53 | it 'can clean extra spaces' do 54 | msg = described_class.abbreviated_message(message_with_many_spaces) 55 | expect(msg.match(/\s{2,}/)).to be_falsey 56 | expect(msg.match(/^\s/)).to be_falsey 57 | expect(msg.match(/\s$/)).to be_falsey 58 | end 59 | end 60 | 61 | describe '.log_stacktrace' do 62 | let(:stacktrace) { 'stacktrace' } 63 | it 'outputs stacktrace' do 64 | expect { described_class.log_stacktrace(stacktrace) }.to output(/#{stacktrace}/).to_stdout_from_any_process 65 | end 66 | 67 | context 'with ecs logging' do 68 | let(:logger) { EcsLogging::Logger.new(STDOUT) } 69 | 70 | it 'outputs error.stack_trace' do 71 | expect { described_class.log_stacktrace(stacktrace) }.to output(/"error":\{"stack_trace":/).to_stdout_from_any_process 72 | expect { described_class.log_stacktrace(stacktrace) }.to output(/@timestamp/).to_stdout_from_any_process 73 | end 74 | end 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: run-stack stop-stack load-data 2 | 3 | export VERSION=8.4.0-SNAPSHOT 4 | 5 | run-stack: 6 | docker-compose up -d 7 | 8 | stop-stack: 9 | docker-compose down --volumes 10 | 11 | load-data: 12 | ./loadsample.sh 13 | -------------------------------------------------------------------------------- /tests/connectors.yml: -------------------------------------------------------------------------------- 1 | # general metadata 2 | version: "CHANGEME" 3 | repository: "git@github.com:elastic/connectors-ruby.git" 4 | revision: "main" 5 | 6 | elasticsearch: 7 | hosts: http://localhost:9200 8 | api_key: "3YTZkm4VQYSSkSlMQlSmlg" 9 | 10 | connector_id: "mongo" 11 | service_type: "mongo" 12 | log_level: info 13 | 14 | poll_interval: 3 15 | -------------------------------------------------------------------------------- /tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:${VERSION} 6 | container_name: elasticsearch 7 | environment: 8 | - cluster.name=docker-cluster 9 | - bootstrap.memory_lock=true 10 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 11 | - ELASTIC_PASSWORD=changeme 12 | - xpack.security.enabled=true 13 | - xpack.security.authc.api_key.enabled=true 14 | - discovery.type=single-node 15 | - action.destructive_requires_name=false 16 | ulimits: 17 | memlock: 18 | soft: -1 19 | hard: -1 20 | volumes: 21 | - esdata:/usr/share/elasticsearch/data 22 | ports: 23 | - 9200:9200 24 | networks: 25 | - esnet 26 | 27 | mongo: 28 | container_name: mongo 29 | image: mongo:latest 30 | volumes: 31 | - mongo:/data/db 32 | networks: 33 | - mongo-network 34 | ports: 35 | - 27018:27017 36 | restart: always 37 | kibana: 38 | image: docker.elastic.co/kibana/kibana:${VERSION} 39 | ports: 40 | - 5601:5601 41 | extra_hosts: 42 | - "host.docker.internal:host-gateway" 43 | depends_on: 44 | - "elasticsearch" 45 | environment: 46 | ELASTICSEARCH_URL: http://host.docker.internal:9200 47 | ELASTICSEARCH_HOSTS: http://host.docker.internal:9200 48 | ENTERPRISESEARCH_HOST: http://host.docker.internal:3002 49 | ELASTICSEARCH_USERNAME: kibana_system 50 | ELASTICSEARCH_PASSWORD: changeme 51 | networks: 52 | - esnet 53 | 54 | enterprise_search: 55 | image: docker.elastic.co/enterprise-search/enterprise-search:${VERSION} 56 | depends_on: 57 | - "elasticsearch" 58 | environment: 59 | - ENT_SEARCH_DEFAULT_PASSWORD=changeme 60 | - elasticsearch.username=elastic 61 | - elasticsearch.password=changeme 62 | - elasticsearch.host=http://host.docker.internal:9200 63 | - allow_es_settings_modification=true 64 | - kibana.host=http://host.docker.internal:5601 65 | - kibana.external_url=http://localhost:5601 66 | - secret_management.encryption_keys=["4a2cd3f81d39bf28738c10db0ca782095ffac07279561809eecc722e0c20eb09"] 67 | - JAVA_OPTS=-Xms2g -Xmx2g 68 | - email.account.enabled=true 69 | - email.account.smtp.auth=plain 70 | - email.account.smtp.starttls.enable=false 71 | - email.account.smtp.host=host.docker.internal 72 | - email.account.smtp.port=1025 73 | - email.account.email_defaults.from=local@example.com 74 | - DEBUG=true 75 | ports: 76 | - 3002:3002 77 | extra_hosts: 78 | - "host.docker.internal:host-gateway" 79 | networks: 80 | - esnet 81 | 82 | networks: 83 | mongo-network: 84 | driver: bridge 85 | esnet: 86 | 87 | volumes: 88 | esdata: 89 | driver: local 90 | mongo: 91 | driver: local 92 | -------------------------------------------------------------------------------- /tests/loadsample.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f sampledata.archive ]; then 2 | curl -L -o sampledata.archive https://atlas-education.s3.amazonaws.com/sampledata.archive 3 | fi 4 | docker exec -i mongo sh -c 'mongorestore --drop --archive' < sampledata.archive 5 | -------------------------------------------------------------------------------- /win32/getmsys2.vbs: -------------------------------------------------------------------------------- 1 | ' Create an HTTP object 2 | myURL = "https://github.com/msys2/msys2-installer/releases/download/2022-03-19/msys2-x86_64-20220319.exe" 3 | Set objHTTP = CreateObject( "WinHttp.WinHttpRequest.5.1" ) 4 | Set FSO = CreateObject("Scripting.FileSystemObject") 5 | scriptPath = FSO.GetParentFolderName(WScript.ScriptFullName) 6 | target = FSO.BuildPath(scriptPath, "msys2-x86_64-20220319.exe") 7 | 8 | ' Download the specified URL 9 | objHTTP.Open "GET", myURL, False 10 | objHTTP.Send 11 | intStatus = objHTTP.Status 12 | 13 | If intStatus = 200 Then 14 | WScript.Echo " " & intStatus & " A OK " +myURL 15 | Else 16 | WScript.Echo "OOPS" +myURL 17 | End If 18 | 19 | Set adoStream = CreateObject("ADODB.Stream") 20 | adoStream.Open 21 | adoStream.Type = 1 22 | adoStream.Write objHTTP.ResponseBody 23 | adoStream.Position = 0 24 | Set fileSystem = CreateObject("Scripting.FileSystemObject") 25 | If fileSystem.FileExists(target) Then fileSystem.DeleteFile target 26 | adoStream.SaveToFile target 27 | adoStream.Close 28 | 29 | -------------------------------------------------------------------------------- /win32/install.bat: -------------------------------------------------------------------------------- 1 | rem 2 | rem Development environment installer for Windows 3 | rem 4 | rem This script installs: 5 | rem - MSYS2 6 | rem - MinGW along with cmake and all the libs required to compile some gems 7 | rem - rbenv-win 8 | rem - Ruby within rbenv 9 | rem - Bundler 10 | rem 11 | 12 | @echo off 13 | setlocal 14 | 15 | for /f "delims=" %%x in (%~dp0..\.ruby-version) do set RUBY_VERSION=%%x 16 | echo "Set HOME, PATH and RBENV_ROOT for %RUBY_VERSION%" 17 | set instpath="%USERPROFILE%\.rbenv-win" 18 | set RBENV_ROOT="%instpath%" 19 | set HOME=%~dp0 20 | set PATH=C:\MSYS2\usr\bin;C:\MSYS2\usr\local\bin;%instpath%\versions\%RUBY_VERSION%\bin;%instpath%\bin;%instpath%\shims;%PATH% 21 | setx RBENV_ROOT %instpath% 22 | setx HOME %~dp0 23 | 24 | rem Get current path from registry 25 | for /f "skip=2 delims=" %%a in ('reg query HKCU\Environment /v Path') do set orgpath=%%a 26 | 27 | rem Set it back with all our new locations 28 | reg add HKCU\Environment /v Path /d "C:\MSYS2\usr\bin;C:\MSYS2\usr\local\bin;%instpath%\versions\%RUBY_VERSION%\bin;%instpath%\bin;%instpath%\shims;%orgpath:~22%" /f 29 | 30 | rem check new PATH environment 31 | for /f "skip=2 delims=" %%a in ('reg query HKCU\Environment /v Path') do set orgpath=%%a 32 | echo New PATH user local environment variable :"%orgpath:~22%" 33 | echo 34 | 35 | echo "Installing MSYS2" 36 | if not exist "%~dp0msys2-x86_64-20220319.exe" ( 37 | cscript "%~dp0getmsys2.vbs" 38 | ) 39 | 40 | if not exist C:\MSYS2\ ( 41 | call %~dp0msys2-x86_64-20220319.exe install --root C:\MSYS2 --confirm-command 42 | 43 | echo "Installing MinGW" 44 | pacman -Sy 45 | pacman --needed --noconfirm -S mingw-w64-i686-toolchain 46 | pacman --needed --noconfirm -S mingw-w64-i686-cmake 47 | pacman --needed --noconfirm -S base-devel 48 | pacman --needed --noconfirm -S git 49 | ) 50 | 51 | echo "Cloning rbenv-win" 52 | if exist %instpath%\ ( 53 | pushd %~dp0 54 | cd %instpath% 55 | git pull 56 | popd 57 | ) else ( 58 | git clone https://github.com/tarekziade/rbenv-win.git %instpath% 59 | ) 60 | 61 | echo "Installing Ruby" 62 | call rbenv install -s "%RUBY_VERSION%" 63 | call rbenv global "%RUBY_VERSION%" 64 | --------------------------------------------------------------------------------