├── .editorconfig ├── .github └── workflows │ ├── code-analysis.yml │ └── tests.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTORS.md ├── LICENSE.GPL ├── MANIFEST.in ├── Makefile ├── README.md ├── docker-compose.dev.yaml ├── docker ├── elasticsearch.Dockerfile ├── plone.Dockerfile └── worker.Dockerfile ├── docs ├── Makefile ├── conf.py ├── config.rst ├── index.rst ├── install.rst └── make.bat ├── instance.yaml ├── pyproject.toml ├── scripts └── populate.py ├── setup.py └── src └── collective ├── __init__.py └── elasticsearch ├── __init__.py ├── browser ├── __init__.py ├── configure.zcml ├── controlpanel.py ├── controlpanel_layout.pt ├── search.py └── utilviews.py ├── configure.zcml ├── indexes.py ├── interfaces.py ├── local.py ├── manager.py ├── mapping.py ├── patches ├── __init__.py └── configure.zcml ├── profiles.zcml ├── profiles ├── default │ ├── browserlayer.xml │ ├── controlpanel.xml │ ├── metadata.xml │ └── registry.xml ├── docker-dev │ └── registry.xml └── uninstall │ └── browserlayer.xml ├── query.py ├── queueprocessor.py ├── redis ├── __init__.py ├── configure.zcml ├── fetch.py ├── restapi.py └── tasks.py ├── result.py ├── services ├── __init__.py ├── configure.zcml ├── controlpanel.py └── elasticsearch.py ├── setuphandlers.py ├── testing.py ├── tests ├── __init__.py ├── assets │ ├── image.png │ ├── test.pdf │ └── test2.docx ├── test_controlpanel.py ├── test_file_schema.xml ├── test_processor.py ├── test_redis.py ├── test_search.py └── test_services.py ├── upgrades.py └── utils.py /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_style = space 3 | end_of_line = lf 4 | insert_final_newline = true 5 | trim_trailing_whitespace = true 6 | charset = utf-8 7 | 8 | [{*.py,*.cfg}] 9 | indent_size = 4 10 | 11 | [{*.html,*.dtml,*.pt,*.zpt,*.xml,*.zcml,*.js}] 12 | indent_size = 2 13 | 14 | [Makefile] 15 | indent_style = tab 16 | -------------------------------------------------------------------------------- /.github/workflows/code-analysis.yml: -------------------------------------------------------------------------------- 1 | name: Code Analysis 2 | on: 3 | push: 4 | 5 | jobs: 6 | black: 7 | name: Black 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout codebase 12 | uses: actions/checkout@v2 13 | 14 | - name: Run check 15 | uses: plone/code-analysis-action@v2 16 | with: 17 | check: 'black' 18 | 19 | flake8: 20 | name: flake8 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Checkout codebase 25 | uses: actions/checkout@v2 26 | 27 | - name: Run check 28 | uses: plone/code-analysis-action@v2 29 | with: 30 | check: 'flake8' 31 | 32 | isort: 33 | runs-on: ubuntu-latest 34 | steps: 35 | - name: Checkout codebase 36 | uses: actions/checkout@v2 37 | 38 | - name: Run check 39 | uses: plone/code-analysis-action@v2 40 | with: 41 | check: 'isort' 42 | 43 | pyroma: 44 | name: pyroma 45 | runs-on: ubuntu-latest 46 | 47 | steps: 48 | - name: Checkout codebase 49 | uses: actions/checkout@v2 50 | 51 | - name: Run check 52 | uses: plone/code-analysis-action@v2 53 | with: 54 | check: 'pyroma' 55 | 56 | zpretty: 57 | name: zpretty 58 | runs-on: ubuntu-latest 59 | 60 | steps: 61 | - name: Checkout codebase 62 | uses: actions/checkout@v2 63 | 64 | - name: Run check 65 | uses: plone/code-analysis-action@v2 66 | with: 67 | check: 'zpretty' 68 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | services: 7 | redis: 8 | image: redis:7.0.5 9 | # Set health checks to wait until redis has started 10 | options: >- 11 | --health-cmd "redis-cli ping" 12 | --health-interval 10s 13 | --health-timeout 5s 14 | --health-retries 5 15 | ports: 16 | - 6379:6379 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python: ["3.8", "3.9", "3.10"] 21 | plone: ["6.0-latest", "5.2-latest"] 22 | exclude: 23 | - plone: "5.2-latest" 24 | python: "3.9" 25 | - plone: "5.2-latest" 26 | python: "3.10" 27 | - plone: "6.0-latest" 28 | python: "3.8" 29 | - plone: "6.0-latest" 30 | python: "3.9" 31 | 32 | steps: 33 | # git checkout 34 | - uses: actions/checkout@v2 35 | 36 | - name: Setup elasticsearch docker container with ingest attachment plugin 37 | run: | 38 | docker container create --name elastictest \ 39 | -e "discovery.type=single-node" \ 40 | -e "cluster.name=docker-cluster" \ 41 | -e "http.cors.enabled=true" \ 42 | -e "http.cors.allow-origin=*" \ 43 | -e "http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization" \ 44 | -e "http.cors.allow-credentials=true" \ 45 | -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ 46 | -p 9200:9200 \ 47 | -p 9300:9300 \ 48 | elasticsearch:7.17.7; \ 49 | docker start elastictest; \ 50 | docker exec elastictest /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \ 51 | docker restart elastictest 52 | 53 | - name: Setup Plone ${{ matrix.plone }} with Python ${{ matrix.python }} 54 | id: setup 55 | uses: plone/setup-plone@v1.0.0 56 | with: 57 | python-version: ${{ matrix.python }} 58 | plone-version: ${{ matrix.plone }} 59 | 60 | - name: Install package 61 | run: | 62 | pip install -e ".[test, redis]" 63 | 64 | # test 65 | - name: test 66 | run: | 67 | zope-testrunner --auto-color --auto-progress --test-path src 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*project 2 | .coverage 3 | .coverage.* 4 | .installed.cfg 5 | .mr.developer.cfg 6 | .tox/ 7 | .vscode/ 8 | *.egg-info 9 | *.log 10 | *.mo 11 | *.py? 12 | *.swp 13 | /.settings 14 | /compiled-doc.rst 15 | /local.cfg 16 | /pyvenv.cfg 17 | bin/ 18 | buildout-cache/ 19 | develop-eggs/ 20 | dist/* 21 | eggs/ 22 | etc 23 | htmlcov/ 24 | include/ 25 | inituser 26 | lib/ 27 | lib64 28 | local/ 29 | log.html 30 | node_modules/ 31 | output.xml 32 | parts/ 33 | pip-selfcheck.json 34 | report.html 35 | reports/ 36 | test.plone_addon/ 37 | var/ 38 | venv/ 39 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 5.0.1 (unreleased) 4 | 5 | - Update elasticsearch to 7.17.7 (Ready for 8.x and apple silicon images are available) @maethu 6 | 7 | - Control-Panel: Fix potential issue with bool fields @maethu 8 | 9 | - Tests: Wait for elasticsearch service @maethu 10 | 11 | - Fix restricted object lookup @maethu 12 | 13 | - Add support for highlight feature of elasticsearch @instification 14 | 15 | - Use _old_searchResults when patching safeSearchResults @instification 16 | 17 | - Handle negative term filters (fixes #101) @instification 18 | 19 | - Check addon is installed before processing queue (fixes #108) @instification 20 | 21 | - Add support for optional es host in worker via PLONE_ELASTICSEARCH_HOST env variable @maethu 22 | 23 | - [Issue #118](https://github.com/collective/collective.elasticsearch/issues/118) Fix **ComponentLookupError** when adding a Plone Site (6.1) (@andreclimaco) 24 | 25 | ## 5.0.0 (2022-10-11) 26 | 27 | - Rename `master` branch to `main` @ericof 28 | 29 | - Drop support for Python 3.7 when using Plone 6.0 @ericof 30 | 31 | - Add support to plone.restapi and Volto @ericof 32 | 33 | 34 | ## 5.0.0a2 (2022-09-23) 35 | 36 | - Implement IIndexQueueProcessor support @ericof 37 | 38 | - Refactor ElasticSearchCatalog methods into ElasticSearchManager object @ericof 39 | 40 | - Breaking: Remove collective.elasticsearch.es @ericof 41 | 42 | - Breaking: Remove collective.elasticsearch.hooks @ericof 43 | 44 | - Refactor moveObjectsByDelta to reduce the number of calls to ElasticSearch @ericof 45 | 46 | - Reindex operations update on the catalog send only updated index to ElasticSearch @ericof 47 | 48 | - Remove collective.celery support (as it is not Python-3 compatible yet) @ericof 49 | 50 | ## 5.0.0a1 (2022-09-14) 51 | 52 | - Refactor hook.index_batch to reduce the number of calls do Elastic Search @ericof 53 | 54 | - Implement plone/code-analysis-action @ericof 55 | 56 | - Add support to Plone 6.0 @ericof 57 | 58 | - Support Python 3.7, 3.8, 3.9 and 3.10 @ericof 59 | 60 | - Drop support to Plone versions 4.3, 5.0 and 5.1 @ericof, @andreclimaco 61 | 62 | - Drop support to Python 2.7 @ericof, @andreclimaco 63 | 64 | ## 4.0.0 (2021-04-28) 65 | 66 | - BREAKING: Make changes for ES 7.x @bduncan137 67 | 68 | - Slow down tests to allow them to complete correctly @bduncan137 69 | 70 | 71 | ## 3.0.5 (2021-04-28) 72 | 73 | - [Issue #76](https://github.com/collective/collective.elasticsearch/issues/76) In 5.1+ we want to patch _unindexObject not unindexObject @ewohnlich 74 | 75 | - Explicit error logging added, if ES bulk action for indexing failed. @nazrulworld 76 | 77 | - Fix commit hook bug when content has been moved @instification 78 | 79 | 80 | ## 3.0.4 (2019-08-21) 81 | 82 | - [Issue #63](https://github.com/collective/collective.elasticsearch/issues/63) Now ensuring unicode value would for both python2 and python3 case. @nazrulworld 83 | 84 | - Now possible to search by other than `Title`, `Description` and `SearchableText` indexes. @nazrulworld 85 | 86 | 87 | ## 3.0.3 (2019-03-12) 88 | 89 | - Add missing import logger @nazrulworld 90 | 91 | 92 | ## 3.0.2 (2019-01-31) 93 | 94 | - Fix Zope DateTime convert to also handle the datetime.date type @ewohnlich 95 | 96 | 97 | ## 3.0.1 (2019-01-28) 98 | 99 | - Fix sortable_title search issue @ewohnlich 100 | 101 | 102 | ## 3.0.0 (2019-01-28) 103 | 104 | - Fix date queries to work with `min:max` as well as `minmax` @vangheem 105 | 106 | - Fix sort order parsing and implementation @vangheem 107 | 108 | - Handle upgrades with missing `es_only_indexes` properly @vangheem 109 | 110 | - Add IReindexActive to request as a flag for other code @lucid-0 111 | 112 | 113 | ## 2.0.2 (2018-11-27) 114 | 115 | 116 | - Python 3 Support @vangheem 117 | 118 | - Support ES 6 @lucid-0 119 | 120 | - Fix error causing "Server Status" on @@elastic-controlpanel to be empty. @fulv 121 | 122 | 123 | ## 2.0.1 (2018-01-05) 124 | 125 | - Prevent critical error when by chance query value is None. @thomasdesvenain 126 | 127 | - Minor code cleanup: readability, pep8, 80 cols, zca decorators. @jensens 128 | 129 | - Fix date criteria: 'minmax' instead of 'min:max' + string to date conversion @ebrehault 130 | 131 | 132 | ## 2.0.0a6 (2017-03-29) 133 | 134 | - Gracefully handle upgrades in the settings interface so it doesn't break for people upgrading. @vangheem 135 | 136 | 137 | ## 2.0.0a5 (2017-03-29) 138 | 139 | - Running indexing as admin as it is possible to initiate reindex or index on an object that you do not have permissions for @vangheem 140 | 141 | 142 | ## 2.0.0a4 (2017-03-27) 143 | 144 | - released 145 | 146 | 147 | ## 2.0.0a3 (2017-03-27) 148 | 149 | - Add a method to set the body of the request during index creation. @Gagaro 150 | 151 | - Fixed get brain in lazy list with negative indexes. @thomasdesvenain 152 | 153 | - The list of indexes that forces es search is configurable. @thomasdesvenain 154 | 155 | - Works under Plone 4.3. @thomasdesvenain 156 | 157 | - Works with archetypes contents. @thomasdesvenain 158 | 159 | ## 2.0.0a2 (2016-07-19) 160 | 161 | - We can pass a custom results factory and custom query parameters to IElasticSearchCatalog.search() method. So we can use it as a public interface for custom needs. @thomasdesvenain 162 | 163 | - Prevent from unindex before reindex when uid is unchanged, for instance at rename. Use a set for to-remove list. @thomasdesvenain 164 | 165 | - Fix indexing when removing the Title and Description indexes from Plone @vangheem 166 | 167 | ## 2.0.0a1 (2016-06-06) 168 | 169 | - upgrade to elasticsearch 2.x @vangheem 170 | 171 | ## 1.0.1a4 (2016-05-22) 172 | 173 | - provide better search query @vangheem 174 | 175 | ## 1.0.1a3 (2016-03-22) 176 | 177 | - make sure to get alias definition right @vangheem 178 | 179 | ## 1.0.1a2 (2016-03-18) 180 | 181 | - create index as an alias so you can potentially work on an existing alias without needing downtime @vangheem 182 | 183 | ## 1.0.1a1 (2016-02-25) 184 | 185 | - change default sorting to descending. [Issue #12](https://github.com/collective/collective.elasticsearch/issues/12) @neilferreira 186 | 187 | ## 1.0.0a1 (2016-02-25) 188 | 189 | - Initial release 190 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | ## Contributors 2 | 3 | - Nathan Van Gheem, vangheem@gmail.com 4 | - Wesley Barroso, wesleybl@gmail.com 5 | - André Climaco, andre.climaco@gmail.com 6 | - Érico Andrei, ericof@plone.org 7 | - Jon Pentland, jon.pentland@pretagov.co.uk 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src/collective 2 | graft docs 3 | include *.md 4 | global-exclude *.pyc 5 | # added by check_manifest.py 6 | include *.GPL 7 | include *.txt 8 | include tox.ini 9 | recursive-include scripts *.py 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ### Defensive settings for make: 2 | # https://tech.davis-hansson.com/p/make/ 3 | SHELL:=bash 4 | .ONESHELL: 5 | .SHELLFLAGS:=-xeu -o pipefail -O inherit_errexit -c 6 | .SILENT: 7 | .DELETE_ON_ERROR: 8 | MAKEFLAGS+=--warn-undefined-variables 9 | MAKEFLAGS+=--no-builtin-rules 10 | 11 | # We like colors 12 | # From: https://coderwall.com/p/izxssa/colored-makefile-for-golang-projects 13 | RED=`tput setaf 1` 14 | GREEN=`tput setaf 2` 15 | RESET=`tput sgr0` 16 | YELLOW=`tput setaf 3` 17 | 18 | PLONE5=5.2-latest 19 | PLONE6=6.0-latest 20 | 21 | INSTANCE_YAML=instance.yaml 22 | 23 | ELASTIC_SEARCH_IMAGE=elasticsearch:7.17.7 24 | ELASTIC_SEARCH_CONTAINER=elastictest 25 | 26 | REDIS_IMAGE=redis:7.0.5 27 | REDIS_CONTAINER=redistest 28 | 29 | ELASTIC_SEARCH_CONTAINERS=$$(docker ps -q -a -f "name=${ELASTIC_SEARCH_CONTAINER}" | wc -l) 30 | REDIS_CONTAINERS=$$(docker ps -q -a -f "name=${REDIS_CONTAINER}" | wc -l) 31 | 32 | # Default env for elasticsearch with redis queue 33 | DEFAULT_ENV_ES_REDIS=PLONE_REDIS_DSN=redis://localhost:6379/0 \ 34 | PLONE_BACKEND=http://localhost:8080/Plone \ 35 | PLONE_USERNAME=admin \ 36 | PLONE_PASSWORD=admin 37 | 38 | ifndef LOG_LEVEL 39 | LOG_LEVEL=INFO 40 | endif 41 | 42 | CODE_QUALITY_VERSION=2.0.0 43 | CURRENT_USER=$$(whoami) 44 | USER_INFO=$$(id -u ${CURRENT_USER}):$$(getent group ${CURRENT_USER}|cut -d: -f3) 45 | BASE_FOLDER=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) 46 | LINT=docker run -e LOG_LEVEL="${LOG_LEVEL}" --rm -v "${BASE_FOLDER}":/github/workspace plone/code-quality:${CODE_QUALITY_VERSION} check 47 | FORMAT=docker run --user="${USER_INFO}" -e LOG_LEVEL="${LOG_LEVEL}" --rm -v "${BASE_FOLDER}":/github/workspace plone/code-quality:${CODE_QUALITY_VERSION} format 48 | 49 | all: build 50 | 51 | # Add the following 'help' target to your Makefile 52 | # And add help text after each target name starting with '\#\#' 53 | .PHONY: help 54 | help: ## This help message 55 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 56 | 57 | bin/pip: 58 | @echo "$(GREEN)==> Setup Virtual Env$(RESET)" 59 | python3 -m venv . 60 | bin/pip install -U pip wheel 61 | 62 | .PHONY: cookiecutter 63 | cookiecutter: bin/pip 64 | @echo "$(GREEN)Install cookiecutter$(RESET)" 65 | bin/pip install git+https://github.com/cookiecutter/cookiecutter.git#egg=cookiecutter 66 | 67 | .PHONY: instance 68 | instance: cookiecutter ## create configuration for an zope (plone) instance 69 | @echo "$(GREEN)Create Plone/Zope configuration$(RESET)" 70 | rm -fr ./etc 71 | bin/cookiecutter -f --no-input --config-file ${INSTANCE_YAML} https://github.com/bluedynamics/cookiecutter-zope-instance 72 | 73 | .PHONY: build-plone-5 74 | build-plone-5: bin/pip ## Build Plone 5.2 75 | @echo "$(GREEN)==> Build with Plone 5.2$(RESET)" 76 | bin/pip install Paste Plone -c https://dist.plone.org/release/$(PLONE5)/constraints.txt 77 | bin/pip install "zest.releaser[recommended]" 78 | bin/pip install -e ".[test, redis]" 79 | make instance 80 | 81 | .PHONY: build-plone-6 82 | build-plone-6: bin/pip ## Build Plone 6.0 83 | @echo "$(GREEN)==> Build with Plone 6.0$(RESET)" 84 | bin/pip install Plone -c https://dist.plone.org/release/$(PLONE6)/constraints.txt 85 | bin/pip install "zest.releaser[recommended]" 86 | bin/pip install -e ".[test, redis]" 87 | make instance 88 | 89 | .PHONY: build 90 | build: build-plone-6 ## Build Plone 6.0 91 | 92 | .PHONY: clean 93 | clean: ## Remove old virtualenv and creates a new one 94 | @echo "$(RED)==> Cleaning environment and build$(RESET)" 95 | rm -rf bin lib lib64 include share etc var inituser pyvenv.cfg .installed.cfg 96 | 97 | .PHONY: format 98 | format: ## Format the codebase according to our standards 99 | @echo "$(GREEN)==> Format codebase$(RESET)" 100 | $(FORMAT) 101 | 102 | .PHONY: format-black 103 | format-black: ## Format the codebase with black 104 | @echo "$(GREEN)==> Format codebase with black$(RESET)" 105 | $(FORMAT) black ${CODEPATH} 106 | 107 | .PHONY: format-isort 108 | format-isort: ## Format the codebase with isort 109 | @echo "$(GREEN)==> Format codebase with isort$(RESET)" 110 | $(FORMAT) isort ${CODEPATH} 111 | 112 | .PHONY: format-zpretty 113 | format-zpretty: ## Format the codebase with zpretty 114 | @echo "$(GREEN)==> Format codebase with zpretty$(RESET)" 115 | $(FORMAT) zpretty ${CODEPATH} 116 | 117 | .PHONY: lint 118 | lint: ## check code style 119 | $(LINT) 120 | 121 | .PHONY: lint-black 122 | lint-black: ## validate black formating 123 | $(LINT) black ${CODEPATH} 124 | 125 | .PHONY: lint-flake8 126 | lint-flake8: ## validate black formating 127 | $(LINT) flake8 ${CODEPATH} 128 | 129 | .PHONY: lint-isort 130 | lint-isort: ## validate using isort 131 | $(LINT) isort ${CODEPATH} 132 | 133 | .PHONY: lint-pyroma 134 | lint-pyroma: ## validate using pyroma 135 | $(LINT) pyroma ${CODEPATH} 136 | 137 | .PHONY: lint-zpretty 138 | lint-zpretty: ## validate ZCML/XML using zpretty 139 | $(LINT) zpretty ${CODEPATH} 140 | 141 | .PHONY: elastic 142 | elastic: ## Create Elastic Search container 143 | @if [ $(ELASTIC_SEARCH_CONTAINERS) -eq 0 ]; then \ 144 | docker container create --name $(ELASTIC_SEARCH_CONTAINER) \ 145 | -e "discovery.type=single-node" \ 146 | -e "cluster.name=docker-cluster" \ 147 | -e "http.cors.enabled=true" \ 148 | -e "http.cors.allow-origin=*" \ 149 | -e "http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization" \ 150 | -e "http.cors.allow-credentials=true" \ 151 | -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ 152 | -p 9200:9200 \ 153 | -p 9300:9300 \ 154 | $(ELASTIC_SEARCH_IMAGE); \ 155 | docker start $(ELASTIC_SEARCH_CONTAINER); \ 156 | docker exec $(ELASTIC_SEARCH_CONTAINER) /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \ 157 | docker stop $(ELASTIC_SEARCH_CONTAINER);fi 158 | 159 | .PHONY: start-elastic 160 | start-elastic: elastic ## Start Elastic Search 161 | @echo "$(GREEN)==> Start Elastic Search$(RESET)" 162 | @docker start $(ELASTIC_SEARCH_CONTAINER) 163 | 164 | .PHONY: stop-elastic 165 | stop-elastic: ## Stop Elastic Search 166 | @echo "$(GREEN)==> Stop Elastic Search$(RESET)" 167 | @docker stop $(ELASTIC_SEARCH_CONTAINER) 168 | 169 | .PHONY: redis 170 | redis: ## Create redis Search container 171 | @if [ $(REDIS_CONTAINERS) -eq 0 ]; then \ 172 | docker container create --name $(REDIS_CONTAINER) \ 173 | -p 6379:6379 \ 174 | $(REDIS_IMAGE);fi 175 | 176 | 177 | .PHONY: start-redis 178 | start-redis: redis ## Start redis 179 | @echo "$(GREEN)==> Start redis$(RESET)" 180 | @docker start $(REDIS_CONTAINER) 181 | 182 | .PHONY: stop-redis 183 | stop-redis: ## Stop redis 184 | @echo "$(GREEN)==> Stop redis$(RESET)" 185 | @docker stop $(REDIS_CONTAINER) 186 | 187 | 188 | .PHONY: test 189 | test: ## run tests 190 | make start-elastic 191 | make start-redis 192 | PYTHONWARNINGS=ignore ./bin/zope-testrunner --auto-color --auto-progress --test-path src/ 193 | make stop-elastic 194 | make stop-redis 195 | 196 | .PHONY: start 197 | start: ## Start a Plone instance on localhost:8080 198 | PYTHONWARNINGS=ignore ./bin/runwsgi instance/etc/zope.ini 199 | 200 | .PHONY: populate 201 | populate: ## Populate site with wikipedia content 202 | PYTHONWARNINGS=ignore ./bin/zconsole run etc/zope.conf scripts/populate.py 203 | 204 | .PHONY: start-redis-support 205 | start-redis-support: ## Start a Plone instance on localhost:8080 206 | @echo "$(GREEN)==> Set env variables, PLONE_REDIS_DSN, PLONE_BACKEND, PLONE_USERNAME and PLONE_PASSWORD before start instance$(RESET)" 207 | PYTHONWARNINGS=ignore \ 208 | $(DEFAULT_ENV_ES_REDIS) \ 209 | ./bin/runwsgi instance/etc/zope.ini 210 | 211 | 212 | .PHONY: worker 213 | worker: ## Start a worker for the redis queue 214 | $(DEFAULT_ENV_ES_REDIS) ./bin/rq worker normal low --with-scheduler 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

collective.elasticsearch

2 | 3 |
4 | 5 | [![PyPI](https://img.shields.io/pypi/v/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 6 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 7 | [![PyPI - Wheel](https://img.shields.io/pypi/wheel/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 8 | [![PyPI - License](https://img.shields.io/pypi/l/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 9 | [![PyPI - Status](https://img.shields.io/pypi/status/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 10 | 11 | 12 | [![PyPI - Plone Versions](https://img.shields.io/pypi/frameworkversions/plone/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/) 13 | 14 | [![Code analysis checks](https://github.com/collective/collective.elasticsearch/actions/workflows/code-analysis.yml/badge.svg)](https://github.com/collective/collective.elasticsearch/actions/workflows/code-analysis.yml) 15 | [![Tests](https://github.com/collective/collective.elasticsearch/actions/workflows/tests.yml/badge.svg)](https://github.com/collective/collective.elasticsearch/actions/workflows/tests.yml) 16 | ![Code Style](https://img.shields.io/badge/Code%20Style-Black-000000) 17 | 18 | [![GitHub contributors](https://img.shields.io/github/contributors/collective/collective.elasticsearch)](https://github.com/collective/collective.elasticsearch) 19 | [![GitHub Repo stars](https://img.shields.io/github/stars/collective/collective.elasticsearch?style=social)](https://github.com/collective/collective.elasticsearch) 20 | 21 |
22 | 23 | ## Introduction 24 | 25 | This package aims to index all fields the portal_catalog indexes and allows you to delete the `Title`, `Description` and `SearchableText` indexes which can provide significant improvement to performance and RAM usage. 26 | 27 | Then, ElasticSearch queries are ONLY used when Title, Description and SearchableText text are in the query. Otherwise, the plone's default catalog will be used. This is because Plone's default catalog is faster on normal queries than using ElasticSearch. 28 | 29 | 30 | ## Install Elastic Search 31 | 32 | For a comprehensive documentation about the different options of installing Elastic Search, please read [their documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.7/install-elasticsearch.html). 33 | 34 | A quick start, using Docker would be: 35 | 36 | ```shell 37 | docker run \ 38 | -e "discovery.type=single-node" \ 39 | -e "cluster.name=docker-cluster" \ 40 | -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ 41 | -p 9200:9200 \ 42 | elasticsearch:7.7.0 43 | ``` 44 | 45 | ### Test the installation 46 | 47 | Run, on your shell: 48 | 49 | ```shell 50 | curl http://localhost:9200/ 51 | ``` 52 | And you should see the Hudsucker Proxy reference? "You Know, for Search" 53 | 54 | ## Install collective.elasticsearch 55 | 56 | First, add `collective.elasticsearch` to your package dependencies, or install it with `pip` (the same one used by your Plone installation): 57 | 58 | ```shell 59 | pip install collective.elasticsearch 60 | ``` 61 | 62 | Restart Plone, and go to the `Control Panel`, click in `Add-ons`, and select `Elastic Search`. 63 | 64 | Now, go to `Add-on Configuration` and: 65 | 66 | - Check "Enable" 67 | - Click "Convert Catalog" 68 | - Click "Rebuild Catalog" 69 | 70 | You now have a insanely scalable modern search engine. Now live the life of the Mind! 71 | 72 | 73 | ## Redis queue integration with blob indexing support 74 | 75 | ### TLDR 76 | 77 | ```shell 78 | docker-compose -f docker-compose.dev.yaml up -d 79 | ``` 80 | 81 | Your Plone site should be up and running: http://localhost:8080/Plone 82 | 83 | - Go to `Add-on Configuration` 84 | - Check "Enable" 85 | - Click "Convert Catalog" 86 | - Click "Rebuild Catalog" 87 | 88 | ### Why 89 | 90 | Having a queue, which does heavy and time consuming jobs asynchronous improves the responsiveness of the website and lowers 91 | the risk of having database conflicts. This implementation aims to have an almost zero impact in terms of performance for any given plone 92 | installation or given installation using collective.elasticsearch already 93 | 94 | ### How does it work 95 | 96 | - Instead of index/reindex/unindex data while committing to the DB, jobs are added to a queue in a after commit hook. 97 | - No data is extracted from any object, this all happens later 98 | - One or multiple worker execute jobs, which gather the necessary data via the RestAPI. 99 | - The extraction of the data and the indexing in elasticsearch happens via queue. 100 | 101 | Workflow: 102 | 103 | 1. Content gets created/updated 104 | 2. Commit Data to DB + Update Plone Catalog 105 | 3. Via after commit hooks jobs are getting created 106 | 4. Website is ready to use again - Request is done 107 | 5. Worker get initialized 108 | 6. A job collects values to index via plone RestAPI and indexes those values on elasticsearch 109 | 110 | There are two queues. One for normal indexing jobs and one for the heavy lifting to index binaries. 111 | Jobs from the second queue only gets pulled if the normal indexing queue is empty. 112 | 113 | Trade of: Instead of a fully indexed document in elasticsearch we have pretty fast at least one there. 114 | 115 | ### Requirements 116 | 117 | There are a couple things that need to be done manually if you want redis queue support. 118 | 119 | 120 | 1. Install redis extra from collective.elasticsearch 121 | ```shell 122 | pip install collective.elasticsearch[redis] 123 | ``` 124 | 125 | 126 | 2. Install ingest-attachment plugin for elasticsearch - by default the elasticsearch image does not have any plugins installed. 127 | 128 | ```shell 129 | docker exec CONTAINER_NAME /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \ 130 | docker restart CONTAINER_NAME 131 | ``` 132 | 133 | The container needs to be restarted, otherwise the plugin is not available 134 | 135 | 3. Communication between Redis Server, Plone and Redis worker is configured in environment variables. 136 | 137 | ```shell 138 | export PLONE_REDIS_DSN=redis://localhost:6379/0 139 | export PLONE_BACKEND=http://localhost:8080/Plone 140 | export PLONE_USERNAME=admin 141 | export PLONE_PASSWORD=admin 142 | ``` 143 | This is a example configuration for local development only. 144 | You can use the `start-redis-support` command to spin up a plone instance with the environment variables already set 145 | 146 | ```shell 147 | make start-redis-support 148 | ``` 149 | 150 | 4. Start a Redis Server 151 | 152 | Start your own or use the `start-redis` command 153 | ```shell 154 | make redis 155 | ``` 156 | 157 | 5. start a redis worker 158 | 159 | The redis worker does the "job" and indexes everything via two queues: 160 | 161 | - normal: Normal indexing/reindexing/unindexing jobs - Does basically the same thing as without redis support, but well yeah via a queue. 162 | - low: Holds jobs for expensive blob indexing 163 | 164 | The priority is handled by the python-rq worker. 165 | 166 | The rq worker needs to be started with the same environment variables present as described in 3. 167 | 168 | ```shell 169 | ./bin/rq worker normal low --with-scheduler 170 | ``` 171 | 172 | `--with-scheduler` is needed in order to retry failed jobs after a certain time period. 173 | 174 | Or yous the `worker` command 175 | ```shell 176 | make worker 177 | ``` 178 | 179 | 6. Go to the control panel and repeat the following stepts. 180 | 181 | - Check "Enable" 182 | - Click "Convert Catalog" 183 | - Click "Rebuild Catalog" 184 | 185 | ### Technical documentation for elasticsearch 186 | 187 | #### Pipeline 188 | 189 | If you hit convert in the control panel and you meet all the requirements to index blobs as well, 190 | collective.elasticsearch installs a default pipeline for the plone-index. 191 | This Pipeline coverts the binary data to text (if possible) and extends the searchableText index with the extracted data 192 | The setup uses multiple nested processors in order to extract all binary data from all fields (blob fields). 193 | 194 | The binary data is not stored in index permanently. As last step the pipeline removes the binary itself. 195 | 196 | #### ingest-attachment plugin 197 | 198 | The ingest-attachment plugin is used to extract text data with tika from any binary. 199 | 200 | 201 | ### Note on Performance 202 | 203 | Putting all the jobs into a queue is much faster then actually calculate all index values and send them to elasticsearch. 204 | This feature aims to have a minimal impact in terms of responsiveness of the plone site. 205 | 206 | 207 | ## Compatibility 208 | 209 | - Python 3 210 | - Plone 5.2 and above 211 | - Tested with Elastic Search 7.17.0 212 | 213 | ## State 214 | 215 | Support for all index column types is done EXCEPT for the DateRecurringIndex index column type. If you are doing a full text search along with a query that contains a DateRecurringIndex column, it will not work. 216 | 217 | 218 | ## Search Highlighting 219 | 220 | If you want to make use of the [Elasticsearch highlight](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html) feature you can enable it in the control panel. 221 | 222 | When enabled, it will replace the description of search results with the highlighted fragments from elastic search. 223 | 224 | ### Highlight Threshold 225 | 226 | This is the number of characters to show in the description. Fragments will be added until this threshold is met. 227 | 228 | ### Pre/Post Tags 229 | 230 | Highlighted terms can be wrapped in html which can be used to enhance the results further, such as by adding a custom background color. Note that the default Plone search results will not render html so to use this feature you will need to create a custom saearch result view. 231 | 232 | ## Developing this package 233 | 234 | Create the virtual enviroment and install all dependencies: 235 | 236 | ```shell 237 | make build 238 | ``` 239 | 240 | Start Plone in foreground: 241 | 242 | ```shell 243 | make start 244 | ``` 245 | 246 | 247 | ### Running tests 248 | 249 | ```shell 250 | make tests 251 | ``` 252 | 253 | 254 | ### Formatting the codebase 255 | 256 | ```shell 257 | make format 258 | ``` 259 | 260 | ### Linting the codebase 261 | 262 | ```shell 263 | make lint 264 | ``` 265 | 266 | ## License 267 | 268 | The project is licensed under the GPLv2. 269 | -------------------------------------------------------------------------------- /docker-compose.dev.yaml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | services: 4 | redis: 5 | image: redis:7.0.5 6 | command: redis-server --appendonly yes 7 | ports: 8 | - 6379:6379 9 | volumes: 10 | - redis_data:/data 11 | 12 | elasticsearch: 13 | build: 14 | context: . 15 | dockerfile: docker/elasticsearch.Dockerfile 16 | ports: 17 | - 9200:9200 18 | - 9300:9300 19 | environment: 20 | - discovery.type=single-node 21 | - cluster.name=docker-cluster 22 | - http.cors.enabled=true 23 | - http.cors.allow-origin=* 24 | - http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization 25 | - http.cors.allow-credentials=true 26 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 27 | volumes: 28 | - elasticsearch_data:/usr/share/elasticsearch/data 29 | 30 | worker: 31 | build: 32 | context: . 33 | dockerfile: docker/worker.Dockerfile 34 | environment: 35 | - PLONE_REDIS_DSN=redis://redis:6379/0 36 | - PLONE_BACKEND=http://plone:8080/Plone 37 | - PLONE_USERNAME=admin 38 | - PLONE_PASSWORD=admin 39 | 40 | plone: 41 | build: 42 | context: . 43 | dockerfile: docker/plone.Dockerfile 44 | environment: 45 | - PLONE_REDIS_DSN=redis://redis:6379/0 46 | - PLONE_BACKEND=http://127.0.0.1:8080/Plone 47 | - PLONE_USERNAME=admin 48 | - PLONE_PASSWORD=admin 49 | ports: 50 | - "8080:8080" 51 | depends_on: 52 | - redis 53 | - elasticsearch 54 | - worker 55 | volumes: 56 | - plone_data:/data 57 | 58 | volumes: 59 | redis_data: 60 | elasticsearch_data: 61 | plone_data: 62 | -------------------------------------------------------------------------------- /docker/elasticsearch.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM elasticsearch:7.17.7 2 | 3 | RUN bin/elasticsearch-plugin install ingest-attachment -b 4 | -------------------------------------------------------------------------------- /docker/plone.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM plone/plone-backend:6.0.0b3 2 | 3 | WORKDIR /app 4 | 5 | RUN /app/bin/pip install git+https://github.com/collective/collective.elasticsearch.git@mle-redis-rq#egg=collective.elasticsearch[redis] 6 | 7 | ENV PROFILES="collective.elasticsearch:default collective.elasticsearch:docker-dev" 8 | ENV TYPE="classic" 9 | ENV SITE="Plone" 10 | -------------------------------------------------------------------------------- /docker/worker.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM plone/plone-backend:6.0.0b3 2 | 3 | WORKDIR /app 4 | 5 | RUN /app/bin/pip install git+https://github.com/collective/collective.elasticsearch.git@mle-redis-rq#egg=collective.elasticsearch[redis] 6 | 7 | CMD /app/bin/rq worker normal low --with-scheduler --url=$PLONE_REDIS_DSN 8 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/collectiveelasticsearch.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/collectiveelasticsearch.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/collectiveelasticsearch" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/collectiveelasticsearch" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # collective.elasticsearch documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Mar 13 15:04:25 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix(es) of source filenames. 37 | # You can specify multiple suffix as a list of string: 38 | # source_suffix = ['.rst', '.md'] 39 | source_suffix = '.rst' 40 | 41 | # The encoding of source files. 42 | #source_encoding = 'utf-8-sig' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = u'collective.elasticsearch' 49 | copyright = u'Nathan Van Gheem (vangheem)' 50 | author = u'Nathan Van Gheem (vangheem)' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = u'3.0' 58 | # The full version, including alpha/beta/rc tags. 59 | release = u'3.0' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # There are two options for replacing |today|: either, you set today to some 69 | # non-false value, then it is used: 70 | #today = '' 71 | # Else, today_fmt is used as the format for a strftime call. 72 | #today_fmt = '%B %d, %Y' 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This patterns also effect to html_static_path and html_extra_path 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 78 | 79 | # The reST default role (used for this markup: `text`) to use for all 80 | # documents. 81 | #default_role = None 82 | 83 | # If true, '()' will be appended to :func: etc. cross-reference text. 84 | #add_function_parentheses = True 85 | 86 | # If true, the current module name will be prepended to all description 87 | # unit titles (such as .. function::). 88 | #add_module_names = True 89 | 90 | # If true, sectionauthor and moduleauthor directives will be shown in the 91 | # output. They are ignored by default. 92 | #show_authors = False 93 | 94 | # The name of the Pygments (syntax highlighting) style to use. 95 | pygments_style = 'sphinx' 96 | 97 | # A list of ignored prefixes for module index sorting. 98 | #modindex_common_prefix = [] 99 | 100 | # If true, keep warnings as "system message" paragraphs in the built documents. 101 | #keep_warnings = False 102 | 103 | # If true, `todo` and `todoList` produce output, else they produce nothing. 104 | todo_include_todos = False 105 | 106 | 107 | # -- Options for HTML output ---------------------------------------------- 108 | 109 | # The theme to use for HTML and HTML Help pages. See the documentation for 110 | # a list of builtin themes. 111 | html_theme = 'alabaster' 112 | 113 | # Theme options are theme-specific and customize the look and feel of a theme 114 | # further. For a list of options available for each theme, see the 115 | # documentation. 116 | #html_theme_options = {} 117 | 118 | # Add any paths that contain custom themes here, relative to this directory. 119 | #html_theme_path = [] 120 | 121 | # The name for this set of Sphinx documents. 122 | # " v documentation" by default. 123 | #html_title = u'bobtemplates.plone v3.0' 124 | 125 | # A shorter title for the navigation bar. Default is the same as html_title. 126 | #html_short_title = None 127 | 128 | # The name of an image file (relative to this directory) to place at the top 129 | # of the sidebar. 130 | #html_logo = None 131 | 132 | # The name of an image file (relative to this directory) to use as a favicon of 133 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 134 | # pixels large. 135 | #html_favicon = None 136 | 137 | # Add any paths that contain custom static files (such as style sheets) here, 138 | # relative to this directory. They are copied after the builtin static files, 139 | # so a file named "default.css" will overwrite the builtin "default.css". 140 | html_static_path = ['_static'] 141 | 142 | # Add any extra paths that contain custom files (such as robots.txt or 143 | # .htaccess) here, relative to this directory. These files are copied 144 | # directly to the root of the documentation. 145 | #html_extra_path = [] 146 | 147 | # If not None, a 'Last updated on:' timestamp is inserted at every page 148 | # bottom, using the given strftime format. 149 | # The empty string is equivalent to '%b %d, %Y'. 150 | #html_last_updated_fmt = None 151 | 152 | # If true, SmartyPants will be used to convert quotes and dashes to 153 | # typographically correct entities. 154 | #html_use_smartypants = True 155 | 156 | # Custom sidebar templates, maps document names to template names. 157 | #html_sidebars = {} 158 | 159 | # Additional templates that should be rendered to pages, maps page names to 160 | # template names. 161 | #html_additional_pages = {} 162 | 163 | # If false, no module index is generated. 164 | #html_domain_indices = True 165 | 166 | # If false, no index is generated. 167 | #html_use_index = True 168 | 169 | # If true, the index is split into individual pages for each letter. 170 | #html_split_index = False 171 | 172 | # If true, links to the reST sources are added to the pages. 173 | #html_show_sourcelink = True 174 | 175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 176 | #html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 179 | #html_show_copyright = True 180 | 181 | # If true, an OpenSearch description file will be output, and all pages will 182 | # contain a tag referring to it. The value of this option must be the 183 | # base URL from which the finished HTML is served. 184 | #html_use_opensearch = '' 185 | 186 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 187 | #html_file_suffix = None 188 | 189 | # Language to be used for generating the HTML full-text search index. 190 | # Sphinx supports the following languages: 191 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 192 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 193 | #html_search_language = 'en' 194 | 195 | # A dictionary with options for the search language support, empty by default. 196 | # 'ja' uses this config value. 197 | # 'zh' user can custom change `jieba` dictionary path. 198 | #html_search_options = {'type': 'default'} 199 | 200 | # The name of a javascript file (relative to the configuration directory) that 201 | # implements a search results scorer. If empty, the default will be used. 202 | #html_search_scorer = 'scorer.js' 203 | 204 | # Output file base name for HTML help builder. 205 | htmlhelp_basename = 'collective.elasticsearchdoc' 206 | 207 | # -- Options for LaTeX output --------------------------------------------- 208 | 209 | latex_elements = { 210 | # The paper size ('letterpaper' or 'a4paper'). 211 | #'papersize': 'letterpaper', 212 | 213 | # The font size ('10pt', '11pt' or '12pt'). 214 | #'pointsize': '10pt', 215 | 216 | # Additional stuff for the LaTeX preamble. 217 | #'preamble': '', 218 | 219 | # Latex figure (float) alignment 220 | #'figure_align': 'htbp', 221 | } 222 | 223 | # Grouping the document tree into LaTeX files. List of tuples 224 | # (source start file, target name, title, 225 | # author, documentclass [howto, manual, or own class]). 226 | latex_documents = [ 227 | ('index', 'collectiveelasticsearch.tex', u'collective.elasticsearch Documentation', 228 | u'Nathan Van Gheem', 'manual'), 229 | ] 230 | 231 | # The name of an image file (relative to this directory) to place at the top of 232 | # the title page. 233 | #latex_logo = None 234 | 235 | # For "manual" documents, if this is true, then toplevel headings are parts, 236 | # not chapters. 237 | #latex_use_parts = False 238 | 239 | # If true, show page references after internal links. 240 | #latex_show_pagerefs = False 241 | 242 | # If true, show URL addresses after external links. 243 | #latex_show_urls = False 244 | 245 | # Documents to append as an appendix to all manuals. 246 | #latex_appendices = [] 247 | 248 | # If false, no module index is generated. 249 | #latex_domain_indices = True 250 | 251 | 252 | # -- Options for manual page output --------------------------------------- 253 | 254 | # One entry per manual page. List of tuples 255 | # (source start file, name, description, authors, manual section). 256 | man_pages = [ 257 | ('index', 'collectiveelasticsearch', u'collective.elasticsearch Documentation', 258 | [u'Nathan Van Gheem'], 1) 259 | ] 260 | 261 | # If true, show URL addresses after external links. 262 | #man_show_urls = False 263 | 264 | 265 | # -- Options for Texinfo output ------------------------------------------- 266 | 267 | # Grouping the document tree into Texinfo files. List of tuples 268 | # (source start file, target name, title, author, 269 | # dir menu entry, description, category) 270 | texinfo_documents = [ 271 | ('index', 'collectiveelasticsearch', u'collective.elasticsearch Documentation', 272 | u'Nathan Van Gheem', 'collectiveelasticsearch', 'One line description of project.', 273 | 'Miscellaneous'), 274 | ] 275 | 276 | # Documents to append as an appendix to all manuals. 277 | #texinfo_appendices = [] 278 | 279 | # If false, no module index is generated. 280 | #texinfo_domain_indices = True 281 | 282 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 283 | #texinfo_show_urls = 'footnote' 284 | 285 | # If true, do not generate a @detailmenu in the "Top" node's menu. 286 | #texinfo_no_detailmenu = False 287 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | Basic configuration 5 | ------------------- 6 | 7 | - Goto Control Panel 8 | - Add "Eleastic Search" in Add-on Products 9 | - Click "Elastic Search" in "Add-on Configuration" 10 | - Enable 11 | - Click "Convert Catalog" 12 | - Click "Rebuild Catalog" 13 | 14 | 15 | Changing the index used for elasticsearch 16 | ----------------------------------------- 17 | 18 | The index used for elasticsearch is the path to the portal_catalog by default. So you don't have anything to do if 19 | you have several plone site on the same instance (the plone site id would be different). 20 | 21 | However, if you want to use the same elasticsearch instance with several plone instance, you may 22 | end up having conflicts. In that case, you may want to manually set the index used by adding the following code 23 | to the ``__init__.py`` file of your module:: 24 | 25 | from Products.CMFPlone.CatalogTool import CatalogTool 26 | from collective.elasticsearch.es import CUSTOM_INDEX_NAME_ATTR 27 | 28 | setattr(CatalogTool, CUSTOM_INDEX_NAME_ATTR, "my_elasticsearch_custom_index") 29 | 30 | 31 | Adding custom index which are not in the catalog 32 | ------------------------------------------------ 33 | 34 | An adapter is used to define the mapping between the index and the elasticsearch properties. You can override 35 | the _default_mapping attribute to add your own indexes:: 36 | 37 | 43 | 44 | :: 45 | 46 | @implementer(IMappingProvider) 47 | class MyMappingAdapter(object): 48 | 49 | _default_mapping = { 50 | 'SearchableText': {'store': False, 'type': 'text', 'index': True}, 51 | 'Title': {'store': False, 'type': 'text', 'index': True}, 52 | 'Description': {'store': False, 'type': 'text', 'index': True}, 53 | 'MyOwnIndex': {'store': False, 'type': 'text', 'index': True, 54 | } 55 | 56 | 57 | Changing the settings of the index 58 | ---------------------------------- 59 | 60 | If you want to customize your elasticsearch index, you can override the ``get_index_creation_body`` method on the ``MappingAdapter``:: 61 | 62 | @implementer(IMappingProvider) 63 | class MyMappingAdapter(object): 64 | 65 | def get_index_creation_body(self): 66 | return { 67 | "settings" : { 68 | "number_of_shards": 1, 69 | "number_of_replicas": 0 70 | } 71 | } 72 | 73 | 74 | Changing the query made to elasticsearch 75 | ---------------------------------------- 76 | 77 | The query generation is handled by another adapter:: 78 | 79 | 84 | 85 | You will have to override the ``__call__`` method to change the query. Look at the original adapter to have a better 86 | idea on what you need to change. 87 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. collective.elasticsearch documentation master file, created by 2 | sphinx-quickstart on Mon Mar 13 15:04:25 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to collective.elasticsearch's documentation! 7 | ==================================================== 8 | 9 | Overview 10 | -------- 11 | 12 | This package aims to index all fields the portal_catalog indexes 13 | and allows you to delete the `Title`, `Description` and `SearchableText` 14 | indexes which can provide significant improvement to performance and RAM usage. 15 | 16 | Then, ElasticSearch queries are ONLY used when Title, Description and SearchableText 17 | text are in the query. Otherwise, the plone's default catalog will be used. 18 | This is because Plone's default catalog is faster on normal queries than using 19 | ElasticSearch. 20 | 21 | 22 | Compatibility 23 | ------------- 24 | 25 | Only unit tested with Plone 5 with Dexterity types and archetypes. 26 | 27 | It should also work with Plone 4.3 and Plone 5.1. 28 | 29 | Deployed with Elasticsearch 7.6.0 30 | 31 | State 32 | ----- 33 | 34 | Support for all index column types is done EXCEPT for the DateRecurringIndex 35 | index column type. If you are doing a full text search along with a query that 36 | contains a DateRecurringIndex column, it will not work. 37 | 38 | 39 | Celery support 40 | -------------- 41 | 42 | This package comes with Celery support where all indexing operations will be pushed 43 | into celery to be run asynchronously. 44 | 45 | Please see instructions for collective.celery to see how this works. 46 | 47 | Contents: 48 | 49 | .. toctree:: 50 | :maxdepth: 2 51 | 52 | install 53 | config 54 | history 55 | 56 | 57 | 58 | Indices and tables 59 | ================== 60 | 61 | * :ref:`genindex` 62 | * :ref:`modindex` 63 | * :ref:`search` 64 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | collective.elasticsearch 5 | ------------------------ 6 | 7 | To install collective.elasticsearch into the global Python environment (or a workingenv), 8 | using a traditional Zope 2 instance, you can do this: 9 | 10 | * When you're reading this you have probably already run 11 | ``easy_install collective.elasticsearch``. Find out how to install setuptools 12 | (and EasyInstall) here: 13 | http://peak.telecommunity.com/DevCenter/EasyInstall 14 | 15 | * If you are using Zope 2.9 (not 2.10), get `pythonproducts`_ and install it 16 | via:: 17 | 18 | python setup.py install --home /path/to/instance 19 | 20 | into your Zope instance. 21 | 22 | * Create a file called ``collective.elasticsearch-configure.zcml`` in the 23 | ``/path/to/instance/etc/package-includes`` directory. The file 24 | should only contain this:: 25 | 26 | 27 | 28 | .. _pythonproducts: http://plone.org/products/pythonproducts 29 | 30 | 31 | Alternatively, if you are using zc.buildout and the plone.recipe.zope2instance 32 | recipe to manage your project, you can do this: 33 | 34 | * Add ``collective.elasticsearch`` to the list of eggs to install, e.g.:: 35 | 36 | [buildout] 37 | ... 38 | eggs = 39 | ... 40 | collective.elasticsearch 41 | 42 | * Tell the plone.recipe.zope2instance recipe to install a ZCML slug:: 43 | 44 | [instance] 45 | recipe = plone.recipe.zope2instance 46 | ... 47 | zcml = 48 | collective.elasticsearch 49 | 50 | * Re-run buildout, e.g. with:: 51 | 52 | $ ./bin/buildout 53 | 54 | You can skip the ZCML slug if you are going to explicitly include the package 55 | from another package's configure.zcml file. 56 | 57 | elasticsearch 58 | ------------- 59 | 60 | Less than 5 minutes: 61 | - Download & install Java 62 | - Download & install Elastic Search 63 | - bin/elasticsearch 64 | 65 | Step by Step for Ubuntu: 66 | - add-apt-repository ppa:webupd8team/java 67 | - apt-get update 68 | - apt-get install git curl oracle-java7-installer 69 | - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0-linux-x86_64.tar.gz 70 | - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0-linux-x86_64.tar.gz.sha512 71 | - shasum -a 512 -c elasticsearch-7.6.0-linux-x86_64.tar.gz.sha512 72 | - tar -xzf elasticsearch-7.6.0-linux-x86_64.tar.gz 73 | - cd elasticsearch 74 | - bin/elasticsearch 75 | 76 | Step by Step for CentOS/RedHat: 77 | - yum -y install java-1.8.0-openjdk.x86_64 78 | - alternatives --auto java 79 | - curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0.tar.gz 80 | - tar xfvz elasticsearch-7.6.0.tar.gz 81 | - cd elasticsearch 82 | - bin/elasticsearch 83 | 84 | Does it work? 85 | - curl http://localhost:9200/ 86 | - Do you see the Hudsucker Proxy reference? "You Know, for Search" 87 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\collectiveelasticsearch.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\collectiveelasticsearch.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /instance.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # This is a cookiecutter configuration context file for 3 | # 4 | # cookiecutter-zope-instance 5 | # 6 | # available options are documented at 7 | # https://github.com/bluedynamics/cookiecutter-zope-instance/ 8 | 9 | default_context: 10 | debug_mode: true 11 | verbose_security: true 12 | wsgi_listen: 0.0.0.0:8080 13 | initial_user_name: admin 14 | initial_user_password: admin 15 | load_zcml: 16 | package_includes: ['collective.elasticsearch'] 17 | db_storage: direct 18 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 88 3 | target-version = ['py38'] 4 | include = '\.pyi?$' 5 | 6 | [tool.isort] 7 | profile = "black" 8 | force_alphabetical_sort = true 9 | force_single_line = true 10 | lines_after_imports = 2 11 | line_length = 88 12 | 13 | [tool.flakeheaven.plugins] 14 | # Disable some checks. 15 | # - E501 line too long 16 | # flake8 is already testing this, with max-line-length=100000 in .flake8, 17 | # so pycodestyle should not test it. 18 | # - W503 line break before binary operator 19 | # Outdated recommendation, see https://www.flake8rules.com/rules/W503.html 20 | mccabe = ["+*"] 21 | pycodestyle = ["+*", "-E501", "-W503"] 22 | pyflakes = ["+*"] 23 | pylint = ["+*"] 24 | 25 | [tool.plone-code-analysis] 26 | checkers = ["black", "flake8", "isort", "pyroma", "zpretty"] 27 | formatters = ["black", "isort", "zpretty"] 28 | paths = "setup.py src/ scripts/" 29 | -------------------------------------------------------------------------------- /scripts/populate.py: -------------------------------------------------------------------------------- 1 | from AccessControl.SecurityManagement import newSecurityManager 2 | from AccessControl.SecurityManager import setSecurityPolicy 3 | from lxml.html import fromstring 4 | from lxml.html import tostring 5 | from multiprocessing.pool import ThreadPool as Pool 6 | from plone import api 7 | from plone.app.textfield.value import RichTextValue 8 | from Products.CMFCore.tests.base.security import OmnipotentUser 9 | from Products.CMFCore.tests.base.security import PermissiveSecurityPolicy 10 | from Testing.makerequest import makerequest 11 | from unidecode import unidecode 12 | from zope.component.hooks import setSite 13 | 14 | import os 15 | import random 16 | import requests 17 | import transaction 18 | 19 | 20 | SITE_ID = "Plone" 21 | 22 | 23 | def parse_url(url): 24 | resp = requests.get(url) 25 | return resp.content 26 | 27 | 28 | def spoofRequest(app): # NOQA W0621 29 | """ 30 | Make REQUEST variable to be available on the Zope application server. 31 | 32 | This allows acquisition to work properly 33 | """ 34 | _policy = PermissiveSecurityPolicy() 35 | setSecurityPolicy(_policy) 36 | newSecurityManager(None, OmnipotentUser().__of__(app.acl_users)) 37 | return makerequest(app) 38 | 39 | 40 | # Enable Faux HTTP request object 41 | app = spoofRequest(app) # noqa 42 | 43 | _dir = os.path.join(os.getcwd(), "src") 44 | 45 | _links = [] # type: list 46 | _toparse = [] # type: list 47 | 48 | 49 | def parse_urls(urls): 50 | with Pool(8) as pool: 51 | return pool.map(parse_url, urls) 52 | 53 | 54 | class DataReader: 55 | base_url = "https://en.wikipedia.org" 56 | base_content_url = base_url + "/wiki/" 57 | start_page = base_content_url + "Main_Page" 58 | title_selector = "#firstHeading" 59 | content_selector = "#bodyContent" 60 | 61 | def __init__(self): 62 | self.parsed = [] 63 | self.toparse = [self.start_page] 64 | self.toprocess = [] 65 | 66 | def get_content(self, html, selector, text=False): # NOQA R0201 67 | els = html.cssselect(selector) 68 | if len(els) > 0: 69 | if text: 70 | return unidecode(els[0].text_content()) 71 | return tostring(els[0]) 72 | return None 73 | 74 | def __iter__(self): 75 | while len(self.toparse) > 0: 76 | if len(self.toprocess) == 0: 77 | toparse = [ 78 | self.toparse.pop(0) for _ in range(min(20, len(self.toparse))) 79 | ] 80 | self.toprocess = parse_urls(toparse) 81 | self.parsed.extend(toparse) 82 | html = fromstring(self.toprocess.pop(0)) 83 | 84 | # get more links! 85 | for el in html.cssselect("a"): 86 | url = el.attrib.get("href", "") 87 | if url.startswith("/"): 88 | url = self.base_url + url 89 | if url.startswith(self.base_content_url) and url not in self.parsed: 90 | self.toparse.append(url) 91 | 92 | title = self.get_content(html, self.title_selector, text=True) 93 | body = self.get_content(html, self.content_selector) 94 | if not title or not body: 95 | continue 96 | 97 | yield { 98 | "title": f"{title}", 99 | "text": RichTextValue( 100 | body.decode("utf-8"), 101 | mimeType="text/html", 102 | outputMimeType="text/x-html-safe", 103 | ), 104 | } 105 | 106 | 107 | def importit(app): # NOQA W0621 108 | site = app[SITE_ID] 109 | setSite(site) 110 | per_folder = 50 111 | num_folders = 6 112 | max_depth = 4 113 | portal_types = ["Document", "News Item", "Event"] 114 | data = iter(DataReader()) 115 | 116 | def populate(parent, count=0, depth=0): 117 | if depth >= max_depth: 118 | return count 119 | for fidx in range(num_folders): 120 | count += 1 121 | fid = f"folder{fidx}" 122 | if fid in parent.objectIds(): 123 | folder = parent[fid] 124 | else: 125 | folder = api.content.create( 126 | type="Folder", 127 | title=f"Folder {fidx}", 128 | id=fid, 129 | exclude_from_nav=True, 130 | container=parent, 131 | ) 132 | for didx in range(per_folder): 133 | count += 1 134 | pid = f"page{didx}" 135 | if pid not in folder.objectIds(): 136 | payload = next(data) 137 | try: 138 | api.content.create( 139 | type=random.choice(portal_types), 140 | id=pid, 141 | container=folder, 142 | exclude_from_nav=True, 143 | **payload, 144 | ) 145 | print("created ", count) 146 | except Exception: # NOQA W0703 147 | print("skipping", count) 148 | print("commiting") 149 | transaction.commit() 150 | count = populate(folder, count, depth + 1) 151 | app._p_jar.cacheMinimize() 152 | return count 153 | 154 | populate(site) 155 | 156 | 157 | importit(app) 158 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Installer for the collective.elasticsearch package.""" 2 | from pathlib import Path 3 | from setuptools import find_packages 4 | from setuptools import setup 5 | 6 | 7 | long_description = f""" 8 | {Path("README.md").read_text()}\n 9 | {Path("CHANGELOG.md").read_text()}\n 10 | """ 11 | 12 | 13 | setup( 14 | name="collective.elasticsearch", 15 | version="5.0.1.dev0", 16 | description="elasticsearch integration with plone", 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | # Get more from https://pypi.org/classifiers/ 20 | classifiers=[ 21 | "Development Status :: 5 - Production/Stable", 22 | "Environment :: Web Environment", 23 | "Framework :: Plone :: 5.2", 24 | "Framework :: Plone :: 6.0", 25 | "Framework :: Plone :: Addon", 26 | "Framework :: Plone", 27 | "Framework :: Zope :: 4", 28 | "Framework :: Zope :: 5", 29 | "Intended Audience :: System Administrators", 30 | "License :: OSI Approved :: GNU General Public License (GPL)", 31 | "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", 32 | "Operating System :: OS Independent", 33 | "Programming Language :: Python :: 3 :: Only", 34 | "Programming Language :: Python :: 3.7", 35 | "Programming Language :: Python :: 3.8", 36 | "Programming Language :: Python :: 3.9", 37 | "Programming Language :: Python :: 3.10", 38 | "Programming Language :: Python", 39 | "Topic :: Software Development :: Libraries :: Python Modules", 40 | ], 41 | keywords="plone elasticsearch search indexing", 42 | author="Nathan Van Gheem", 43 | author_email="vangheem@gmail.com", 44 | url="https://github.com/collective/collective.elasticsearch", 45 | project_urls={ 46 | "PyPI": "https://pypi.python.org/pypi/collective.elasticsearch", 47 | "Source": "https://github.com/collective/collective.elasticsearch", 48 | "Tracker": "https://github.com/collective/collective.elasticsearch/issues", 49 | }, 50 | license="GPL version 2", 51 | packages=find_packages("src", exclude=["ez_setup"]), 52 | namespace_packages=["collective"], 53 | package_dir={"": "src"}, 54 | include_package_data=True, 55 | zip_safe=False, 56 | python_requires=">=3.7", 57 | install_requires=[ 58 | "setuptools", 59 | "elasticsearch==7.17.7", 60 | "plone.app.registry", 61 | "plone.api", 62 | "setuptools", 63 | ], 64 | extras_require={ 65 | "test": [ 66 | "plone.app.contentrules", 67 | "plone.app.contenttypes", 68 | "plone.restapi[test]", 69 | "plone.app.testing[robot]>=7.0.0a3", 70 | "plone.app.robotframework[test]>=2.0.0a5", 71 | "parameterized", 72 | ], 73 | "redis": [ 74 | "redis", 75 | "rq", 76 | "requests", 77 | "cbor2", 78 | ], 79 | }, 80 | entry_points=""" 81 | [z3c.autoinclude.plugin] 82 | target = plone 83 | [plone.autoinclude.plugin] 84 | target = plone 85 | """, 86 | ) 87 | -------------------------------------------------------------------------------- /src/collective/__init__.py: -------------------------------------------------------------------------------- 1 | __import__("pkg_resources").declare_namespace(__name__) 2 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | logger = logging.getLogger("collective.elasticsearch") 5 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/browser/__init__.py -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/configure.zcml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 10 | 17 | 18 | 26 | 27 | 35 | 36 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/controlpanel.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.interfaces import IElasticSettings 2 | from collective.elasticsearch.manager import ElasticSearchManager 3 | from collective.elasticsearch.utils import is_redis_available 4 | from elasticsearch.exceptions import ConnectionError as conerror 5 | from plone import api 6 | from plone.app.registry.browser.controlpanel import ControlPanelFormWrapper 7 | from plone.app.registry.browser.controlpanel import RegistryEditForm 8 | from plone.z3cform import layout 9 | from Products.Five.browser.pagetemplatefile import ViewPageTemplateFile 10 | from urllib3.exceptions import NewConnectionError 11 | from z3c.form import form 12 | 13 | 14 | class ElasticControlPanelForm(RegistryEditForm): 15 | form.extends(RegistryEditForm) 16 | schema = IElasticSettings 17 | 18 | label = "Elastic Search Settings" 19 | 20 | control_panel_view = "@@elastic-controlpanel" 21 | 22 | def updateWidgets(self): 23 | super().updateWidgets() 24 | if not is_redis_available(): 25 | self.widgets["use_redis"].disabled = "disabled" 26 | 27 | 28 | class ElasticControlPanelFormWrapper(ControlPanelFormWrapper): 29 | index = ViewPageTemplateFile("controlpanel_layout.pt") 30 | 31 | def __init__(self, *args, **kwargs): 32 | super().__init__(*args, **kwargs) 33 | self.portal_catalog = api.portal.get_tool("portal_catalog") 34 | self.es = ElasticSearchManager() 35 | 36 | @property 37 | def connection_status(self): 38 | try: 39 | return self.es.connection.status()["ok"] 40 | except conerror: 41 | return False 42 | except ( 43 | conerror, 44 | ConnectionError, 45 | NewConnectionError, 46 | ConnectionRefusedError, 47 | AttributeError, 48 | ): 49 | try: 50 | health_status = self.es.connection.cluster.health()["status"] 51 | return health_status in ("green", "yellow") 52 | except ( 53 | conerror, 54 | ConnectionError, 55 | NewConnectionError, 56 | ConnectionRefusedError, 57 | AttributeError, 58 | ): 59 | return False 60 | 61 | @property 62 | def es_info(self): 63 | return self.es.info 64 | 65 | @property 66 | def enabled(self): 67 | return self.es.enabled 68 | 69 | @property 70 | def active(self): 71 | return self.es.active 72 | 73 | @property 74 | def enable_data_sync(self): 75 | if self.es_info: 76 | info = dict((key, value) for key, value in self.es_info) 77 | elastic_docs = info["Number of docs"] 78 | catalog_objs = info["Number of docs (Catalog)"] 79 | if elastic_docs != catalog_objs: 80 | return dict(elastic_docs=elastic_docs, catalog_objs=catalog_objs) 81 | return False 82 | 83 | 84 | ElasticControlPanelView = layout.wrap_form( 85 | ElasticControlPanelForm, ElasticControlPanelFormWrapper 86 | ) 87 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/controlpanel_layout.pt: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 |
7 | 42 | 43 | Site Setup 44 | › 45 |

${view/label}

46 |
47 | Warning 48 | Could not connect to specified elastic search server. 49 |
50 |
51 | 52 |
53 | 54 |
55 |
56 | Actions 57 |
58 |
59 |
60 |
61 | 62 |

63 | The "Convert Catalog" action will create an index on elasticsearch, if it does not exist, and will map all indexes in the catalog. 64 |

65 | 66 | 67 |
68 |
71 |
72 | 73 |

74 | The "Synchronize" action synchronizes the elasticsearch data with the catalog data. Currently there are ${enable_data_sync/elastic_docs} documents indexed in elasticsearch and ${enable_data_sync/catalog_objs} objects indexed in catalog. 75 |

76 |
77 | Warning Could take a very long time. Use carefully. 78 |
79 | 80 | 81 |
82 |
83 |
84 | 85 |

86 | The "Rebuild Catalog" action clears the catalog and indexes all objects with an 'indexObject' method. This may take a long time. 87 |

88 |
89 | Warning 90 | Could take a very long time. Use carefully. 91 |
92 | 93 | 94 |
95 |
96 |
97 |
98 | 99 | 100 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 |
101 | Server status 102 |
${python: data[0]}${python: data[1]}
111 |
112 |
113 |
114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/search.py: -------------------------------------------------------------------------------- 1 | from Products.CMFPlone.browser import search 2 | 3 | 4 | class Search(search.Search): 5 | def munge_search_term(self, q): # NOQA R0201 6 | # We don't want to munge search terms for 7 | # EL 8 | return q 9 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/browser/utilviews.py: -------------------------------------------------------------------------------- 1 | from AccessControl import Unauthorized 2 | from Acquisition import aq_parent 3 | from collective.elasticsearch.manager import ElasticSearchManager 4 | from elasticsearch.exceptions import NotFoundError 5 | from elasticsearch.helpers import scan 6 | from plone import api 7 | from Products.CMFCore.indexing import processQueue 8 | from Products.Five.browser import BrowserView 9 | from zope.component import getMultiAdapter 10 | 11 | import logging 12 | import time 13 | import transaction 14 | 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class Utils(BrowserView): 20 | def __init__(self, context, request): 21 | self.context = context 22 | self.request = request 23 | self._count_index_object = 0 24 | self._count_del_doc_elasticsearch = 0 25 | 26 | def convert(self): 27 | if self.request.method == "POST": 28 | authenticator = getMultiAdapter( 29 | (self.context, self.request), name="authenticator" 30 | ) 31 | if not authenticator.verify(): 32 | raise Unauthorized 33 | 34 | self._es._convert_catalog_to_elastic() 35 | site = aq_parent(self.context) 36 | self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel") 37 | 38 | def rebuild(self): 39 | if self.request.method == "POST": 40 | authenticator = getMultiAdapter( 41 | (self.context, self.request), name="authenticator" 42 | ) 43 | if not authenticator.verify(): 44 | raise Unauthorized 45 | 46 | self.context.manage_catalogRebuild() 47 | 48 | site = aq_parent(self.context) 49 | self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel") 50 | 51 | def synchronize(self): 52 | if self.request.method == "POST": 53 | authenticator = getMultiAdapter( 54 | (self.context, self.request), name="authenticator" 55 | ) 56 | if not authenticator.verify(): 57 | raise Unauthorized 58 | uids_catalog = set(self._uids_catalog) 59 | uids_elasticsearch = set(self._uids_elasticsearch) 60 | uids_not_in_elasticsearch = uids_catalog.difference(uids_elasticsearch) 61 | logger.info( 62 | ( 63 | f"{len(uids_not_in_elasticsearch)} " 64 | f"non-indexed objects in elasticsearch" 65 | ) 66 | ) 67 | uids_not_in_catalog = uids_elasticsearch.difference(uids_catalog) 68 | logger.info( 69 | (f"{len(uids_not_in_catalog)} documents " f"not found in the catalog.") 70 | ) 71 | self._index_object_in_elasticsearch(uids_not_in_elasticsearch) 72 | self._delete_document_elasticsearch(uids_not_in_catalog) 73 | message = ( 74 | f"Indexed objects: {self._count_index_object} " 75 | f"Documents deleted: {self._count_del_doc_elasticsearch}" 76 | ) 77 | logger.info(message) 78 | site = aq_parent(self.context) 79 | self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel") 80 | 81 | @property 82 | def _es(self): 83 | return ElasticSearchManager() 84 | 85 | @property 86 | def _es_conn(self): 87 | return self._es.connection 88 | 89 | @property 90 | def _uids_catalog(self): 91 | logger.info("Fetching all uids indexed in the catalog...") 92 | uids = self.context.portal_catalog.uniqueValuesFor("UID") 93 | logger.info(f"Found {len(uids)} uids") 94 | return uids 95 | 96 | @property 97 | def _uids_elasticsearch(self): 98 | query = {"query": {"match_all": {}}, "_source": ["UID"]} 99 | items = scan( 100 | self._es_conn, 101 | index=self._es.index_name, 102 | query=query, 103 | preserve_order=True, 104 | size=10000, 105 | ) 106 | logger.info("Fetching all indexed uids in elasticsearch...") 107 | uids = [item["_id"] for item in items] 108 | logger.info(f"Found {len(uids)} uids") 109 | return uids 110 | 111 | def _index_object_in_elasticsearch(self, uids): 112 | amount = len(uids) 113 | for index, uid in enumerate(uids): 114 | obj = api.content.get(UID=uid) 115 | obj.indexObject() 116 | self._count_index_object += 1 117 | logging.info("indexObject: %s", "/".join(obj.getPhysicalPath())) 118 | if index % self._es.bulk_size == 0: 119 | # Force indexing in ES 120 | self.commit(wait=1) 121 | logger.info("COMMIT: %s/%s", index, amount - 1) 122 | self.commit(wait=1) 123 | 124 | def _delete_document_elasticsearch(self, uids): 125 | conn = self._es_conn 126 | amount = len(uids) 127 | for index, uid in enumerate(uids): 128 | try: 129 | conn.delete(index=self._es.index_name, id=uid) 130 | self._count_del_doc_elasticsearch += 1 131 | logging.info("delete doc: %s", uid) 132 | except NotFoundError: 133 | continue 134 | if index % self._es.bulk_size == 0: 135 | # Force indexing in ES 136 | self.commit(wait=1) 137 | logger.info("COMMIT: %s/%s", index, amount - 1) 138 | self.commit(wait=1) 139 | 140 | def commit(self, wait: int = 0): 141 | processQueue() 142 | transaction.commit() 143 | self._es.flush_indices() 144 | if wait: 145 | time.sleep(wait) 146 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/configure.zcml: -------------------------------------------------------------------------------- 1 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 31 | 37 | 38 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/indexes.py: -------------------------------------------------------------------------------- 1 | from Acquisition import aq_base 2 | from Acquisition import aq_parent 3 | from collective.elasticsearch import logger 4 | from datetime import date 5 | from datetime import datetime 6 | from DateTime import DateTime 7 | from Missing import MV 8 | from plone.folder.nogopip import GopipIndex 9 | from Products.ExtendedPathIndex.ExtendedPathIndex import ExtendedPathIndex 10 | from Products.PluginIndexes.BooleanIndex.BooleanIndex import BooleanIndex 11 | from Products.PluginIndexes.DateIndex.DateIndex import DateIndex 12 | from Products.PluginIndexes.DateRangeIndex.DateRangeIndex import DateRangeIndex 13 | from Products.PluginIndexes.FieldIndex.FieldIndex import FieldIndex 14 | from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex 15 | from Products.PluginIndexes.util import safe_callable 16 | from Products.PluginIndexes.UUIDIndex.UUIDIndex import UUIDIndex 17 | from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex 18 | 19 | 20 | def _one(val): 21 | """ 22 | if list, return first 23 | otherwise, return value 24 | """ 25 | if isinstance(val, (list, set, tuple)): 26 | return val[0] 27 | return val 28 | 29 | 30 | def _zdt(val): 31 | if isinstance(val, datetime): 32 | val = DateTime(val) 33 | elif isinstance(val, date): 34 | val = DateTime(datetime.fromordinal(val.toordinal())) 35 | elif isinstance(val, str): 36 | val = DateTime(val) 37 | return val 38 | 39 | 40 | keyword_fields = ( 41 | "allowedRolesAndUsers", 42 | "portal_type", 43 | "object_provides", 44 | "Type", 45 | "id", 46 | "cmf_uid", 47 | "sync_uid", 48 | "getId", 49 | "meta_type", 50 | "review_state", 51 | "in_reply_to", 52 | "UID", 53 | "getRawRelatedItems", 54 | "Subject", 55 | "sortable_title", 56 | ) 57 | 58 | 59 | class BaseIndex: 60 | filter_query = True 61 | 62 | def __init__(self, catalog, index): 63 | self.catalog = catalog 64 | self.index = index 65 | 66 | def create_mapping(self, name): # NOQA R0201 67 | if name in keyword_fields: 68 | return {"type": "keyword", "index": True, "store": True} 69 | return {"type": "text", "index": True, "store": False} 70 | 71 | def get_value(self, obj): 72 | value = None 73 | attrs = self.index.getIndexSourceNames() 74 | if len(attrs) > 0: 75 | attr = attrs[0] 76 | else: 77 | attr = "" 78 | if hasattr(self.index, "index_object"): 79 | value = self.index._get_object_datum(obj, attr) 80 | else: 81 | logger.info(f"catalogObject was passed bad index object {self.index}.") 82 | if value == MV: 83 | return None 84 | return value 85 | 86 | def extract(self, name, data): # NOQA R0201 87 | return data[name] or "" 88 | 89 | def _normalize_query(self, query): # NOQA R0201 90 | if isinstance(query, dict) and "query" in query: 91 | return query["query"] 92 | return query 93 | 94 | def get_query(self, name, value): 95 | value = self._normalize_query(value) 96 | if value in (None, ""): 97 | return None 98 | if isinstance(value, (list, tuple, set)): 99 | if len(value) == 0: 100 | return None 101 | return {"terms": {name: value}} 102 | if isinstance(value, dict) and "not" in value: 103 | if isinstance(value["not"], (list, tuple, set)): 104 | return { 105 | "bool": {"must_not": [{"term": {name: i}} for i in value["not"]]} 106 | } 107 | return {"bool": {"must_not": [{"term": {name: value["not"]}}]}} 108 | return {"term": {name: value}} 109 | 110 | 111 | class EKeywordIndex(BaseIndex): 112 | def extract(self, name, data): 113 | return data[name] or [] 114 | 115 | 116 | class EFieldIndex(BaseIndex): 117 | pass 118 | 119 | 120 | class EDateIndex(BaseIndex): 121 | """ 122 | XXX elastic search requires default 123 | value for searching. This could be a problem... 124 | """ 125 | 126 | missing_date = DateTime("1900/01/01") 127 | 128 | def create_mapping(self, name): 129 | return {"type": "date", "store": True} 130 | 131 | def get_value(self, obj): 132 | value = super().get_value(obj) 133 | if isinstance(value, list): 134 | if len(value) == 0: 135 | value = None 136 | else: 137 | value = value[0] 138 | if value in ("None", MV, None, ""): 139 | value = self.missing_date 140 | if isinstance(value, str): 141 | return DateTime(value).ISO8601() 142 | if isinstance(value, DateTime): 143 | return value.ISO8601() 144 | return value 145 | 146 | def get_query(self, name, value): 147 | range_ = value.get("range") 148 | query = value.get("query") 149 | if query is None: 150 | return None 151 | if range_ is None: 152 | if type(query) in (list, tuple): 153 | range_ = "min" 154 | 155 | first = _zdt(_one(query)).ISO8601() 156 | if range_ == "min": 157 | return {"range": {name: {"gte": first}}} 158 | if range_ == "max": 159 | return {"range": {name: {"lte": first}}} 160 | if ( 161 | range_ in ("min:max", "minmax") 162 | and (type(query) in (list, tuple)) 163 | and len(query) == 2 164 | ): 165 | return {"range": {name: {"gte": first, "lte": _zdt(query[1]).ISO8601()}}} 166 | return None 167 | 168 | def extract(self, name, data): 169 | try: 170 | return DateTime(super().extract(name, data)) 171 | except Exception: # NOQA W0703 172 | return None 173 | 174 | 175 | class EZCTextIndex(BaseIndex): 176 | filter_query = False 177 | 178 | def create_mapping(self, name): 179 | return {"type": "text", "index": True, "store": False} 180 | 181 | def get_value(self, obj): 182 | try: 183 | fields = self.index._indexed_attrs 184 | except Exception: # NOQA W0703 185 | fields = [self.index._fieldname] 186 | all_texts = [] 187 | for attr in fields: 188 | text = getattr(obj, attr, None) 189 | if text is None: 190 | continue 191 | if safe_callable(text): 192 | text = text() 193 | if text is None: 194 | continue 195 | if text: 196 | if isinstance( 197 | text, 198 | ( 199 | list, 200 | tuple, 201 | ), 202 | ): 203 | all_texts.extend(text) 204 | else: 205 | all_texts.append(text) 206 | # Check that we're sending only strings 207 | all_texts = filter(lambda text: isinstance(text, str), all_texts) 208 | if all_texts: 209 | return "\n".join(all_texts) 210 | return None 211 | 212 | def get_query(self, name, value): 213 | value = self._normalize_query(value) 214 | # ES doesn't care about * like zope catalog does 215 | clean_value = value.strip("*") if value else "" 216 | queries = [{"match_phrase": {name: {"query": clean_value, "slop": 2}}}] 217 | if name in ("Title", "SearchableText"): 218 | # titles have most importance... we override here... 219 | queries.append( 220 | {"match_phrase_prefix": {"Title": {"query": clean_value, "boost": 2}}} 221 | ) 222 | if name != "Title": 223 | queries.append({"match": {name: {"query": clean_value}}}) 224 | 225 | return queries 226 | 227 | 228 | class EBooleanIndex(BaseIndex): 229 | def create_mapping(self, name): 230 | return {"type": "boolean"} 231 | 232 | 233 | class EUUIDIndex(BaseIndex): 234 | pass 235 | 236 | 237 | class EExtendedPathIndex(BaseIndex): 238 | filter_query = True 239 | 240 | def create_mapping(self, name): 241 | return { 242 | "properties": { 243 | "path": {"type": "keyword", "index": True, "store": True}, 244 | "depth": {"type": "integer", "store": True}, 245 | } 246 | } 247 | 248 | def get_value(self, obj): 249 | attrs = self.index.indexed_attrs 250 | index = self.index.id if attrs is None else attrs[0] 251 | path = getattr(obj, index, None) 252 | if path is not None: 253 | if safe_callable(path): 254 | path = path() 255 | if not isinstance(path, (str, tuple)): 256 | raise TypeError( 257 | f"path value must be string or tuple of " 258 | f"strings: ({index}, {repr(path)})" 259 | ) 260 | else: 261 | try: 262 | path = obj.getPhysicalPath() 263 | except AttributeError: 264 | return None 265 | return {"path": "/".join(path), "depth": len(path) - 1} 266 | 267 | def extract(self, name, data): 268 | return data[name]["path"] 269 | 270 | def get_query(self, name, value): 271 | if isinstance(value, str): 272 | paths = value 273 | depth = -1 274 | navtree = False 275 | navtree_start = 0 276 | else: 277 | depth = value.get("depth", -1) 278 | paths = value.get("query") 279 | navtree = value.get("navtree", False) 280 | navtree_start = value.get("navtree_start", 0) 281 | if not paths: 282 | return None 283 | if isinstance(paths, str): 284 | paths = [paths] 285 | andfilters = [] 286 | for path in paths: 287 | spath = path.split("/") 288 | gtcompare = "gt" 289 | start = len(spath) - 1 290 | if navtree: 291 | start = start + navtree_start 292 | end = navtree_start + depth 293 | else: 294 | end = start + depth 295 | if navtree or depth == -1: 296 | gtcompare = "gte" 297 | filters = [] 298 | if depth == 0: 299 | andfilters.append( 300 | {"bool": {"filter": {"term": {f"{name}.path": path}}}} 301 | ) 302 | continue 303 | filters = [ 304 | {"prefix": {f"{name}.path": path}}, 305 | {"range": {f"{name}.depth": {gtcompare: start}}}, 306 | ] 307 | if depth != -1: 308 | filters.append({"range": {f"{name}.depth": {"lte": end}}}) 309 | andfilters.append({"bool": {"must": filters}}) 310 | if len(andfilters) > 1: 311 | return {"bool": {"should": andfilters}} 312 | return andfilters[0] 313 | 314 | 315 | class EGopipIndex(BaseIndex): 316 | def create_mapping(self, name): 317 | return {"type": "integer", "store": True} 318 | 319 | def get_value(self, obj): 320 | parent = aq_parent(obj) 321 | if hasattr(parent, "getObjectPosition"): 322 | return parent.getObjectPosition(obj.getId()) 323 | return None 324 | 325 | 326 | class EDateRangeIndex(BaseIndex): 327 | def create_mapping(self, name): 328 | return { 329 | "properties": { 330 | f"{name}1": {"type": "date", "store": True}, 331 | f"{name}2": {"type": "date", "store": True}, 332 | } 333 | } 334 | 335 | def get_value(self, obj): 336 | if self.index._since_field is None: 337 | return None 338 | since = getattr(obj, self.index._since_field, None) 339 | if safe_callable(since): 340 | since = since() 341 | until = getattr(obj, self.index._until_field, None) 342 | if safe_callable(until): 343 | until = until() 344 | if not since or not until: 345 | return None 346 | return { 347 | f"{self.index.id}1": since.ISO8601(), 348 | f"{self.index.id}2": until.ISO8601(), 349 | } 350 | 351 | def get_query(self, name, value): 352 | value = self._normalize_query(value) 353 | date_iso = value.ISO8601() 354 | return [ 355 | {"range": {f"{name}.{name}1": {"lte": date_iso}}}, 356 | {"range": {f"{name}.{name}2": {"gte": date_iso}}}, 357 | ] 358 | 359 | 360 | class ERecurringIndex(EDateIndex): 361 | pass 362 | 363 | 364 | INDEX_MAPPING = { 365 | KeywordIndex: EKeywordIndex, 366 | FieldIndex: EFieldIndex, 367 | DateIndex: EDateIndex, 368 | ZCTextIndex: EZCTextIndex, 369 | BooleanIndex: EBooleanIndex, 370 | UUIDIndex: EUUIDIndex, 371 | ExtendedPathIndex: EExtendedPathIndex, 372 | GopipIndex: EGopipIndex, 373 | DateRangeIndex: EDateRangeIndex, 374 | } 375 | 376 | try: 377 | from Products.DateRecurringIndex.index import DateRecurringIndex # NOQA C0412 378 | 379 | INDEX_MAPPING[DateRecurringIndex] = ERecurringIndex 380 | except ImportError: 381 | pass 382 | 383 | 384 | def getIndex(catalog, name): 385 | catalog = getattr(catalog, "_catalog", catalog) 386 | try: 387 | index = aq_base(catalog.getIndex(name)) 388 | except KeyError: 389 | return None 390 | index_type = type(index) 391 | if index_type in INDEX_MAPPING: 392 | return INDEX_MAPPING[index_type](catalog, index) 393 | return None 394 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/interfaces.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from Products.CMFCore.interfaces import IIndexQueueProcessor 3 | from typing import Dict 4 | from typing import List 5 | from typing import Tuple 6 | from zope import schema 7 | from zope.interface import Interface 8 | 9 | 10 | class IElasticSearchLayer(Interface): 11 | pass 12 | 13 | 14 | class IElasticSearchManager(Interface): 15 | pass 16 | 17 | 18 | class IMappingProvider(Interface): 19 | def get_index_creation_body(): # NOQA E0211 20 | pass 21 | 22 | def __call__(): # NOQA E0211 23 | pass 24 | 25 | 26 | class IAdditionalIndexDataProvider(Interface): 27 | def __call__(): # NOQA E0211 28 | pass 29 | 30 | 31 | class IReindexActive(Interface): 32 | pass 33 | 34 | 35 | class IQueryAssembler(Interface): 36 | def normalize(query): # NOQA E0213 37 | pass 38 | 39 | def __call__(query): # NOQA E0213 40 | pass 41 | 42 | 43 | class IElasticSettings(Interface): 44 | 45 | enabled = schema.Bool(title="Enabled", default=False, required=False) 46 | 47 | use_redis = schema.Bool( 48 | title="Use redis as queue", 49 | description=( 50 | "You can enable this option if you have installed redis, " 51 | "set the necessary env variables and started a worker." 52 | "Please check the README for more informations" 53 | ), 54 | default=False, 55 | required=False, 56 | ) 57 | 58 | hosts = schema.List( 59 | title="Hosts", 60 | default=["127.0.0.1"], 61 | unique=True, 62 | value_type=schema.TextLine(title="Host"), 63 | ) 64 | 65 | es_only_indexes = schema.Set( 66 | title="Indexes for which all searches are done through ElasticSearch", 67 | default={"Title", "Description", "SearchableText"}, 68 | value_type=schema.TextLine(title="Index"), 69 | ) 70 | 71 | sniff_on_start = schema.Bool(title="Sniff on start", default=False, required=False) 72 | 73 | sniff_on_connection_fail = schema.Bool( 74 | title="Sniff on connection fail", default=False, required=False 75 | ) 76 | 77 | sniffer_timeout = schema.Float( 78 | title="Sniffer timeout", required=False, default=None 79 | ) 80 | 81 | retry_on_timeout = schema.Bool( 82 | title="Retry on timeout", default=True, required=False 83 | ) 84 | 85 | timeout = schema.Float( 86 | title="Read timeout", 87 | description="how long before timeout connecting to elastic search", 88 | default=2.0, 89 | ) 90 | 91 | bulk_size = schema.Int( 92 | title="Bulk Size", description="bulk size for elastic queries", default=50 93 | ) 94 | 95 | highlight = schema.Bool( 96 | title="Enable Search Highlight", 97 | description="Use elasticsearch highlight feature instead of descriptions in search results", 98 | default=False, 99 | required=False, 100 | ) 101 | 102 | highlight_threshold = schema.Int( 103 | title="Highlight Threshold", 104 | description="Number of highlighted characters to display in search results descriptions", 105 | default=600, 106 | required=False, 107 | ) 108 | 109 | highlight_pre_tags = schema.Text( 110 | title="Highlight pre tags", 111 | description='Used with highlight post tags to wrap matching words. e.g. <pre class="highlight">. One tag per line', 112 | default="", 113 | required=False, 114 | ) 115 | 116 | highlight_post_tags = schema.Text( 117 | title="Higlight post tags", 118 | description="Used with highlight pre tags to wrap matching words. e.g. </pre> One tag per line", 119 | default="", 120 | required=False, 121 | ) 122 | 123 | raise_search_exception = schema.Bool( 124 | title="Raise Search Exceptions", 125 | description="If there is an error with elastic search Plone will default to trying the old catalog search. Set this to true to raise the error instead.", 126 | default=False, 127 | required=False, 128 | ) 129 | 130 | 131 | class IElasticSearchIndexQueueProcessor(IIndexQueueProcessor): 132 | """Index queue processor for elasticsearch.""" 133 | 134 | 135 | @dataclass 136 | class IndexingActions: 137 | 138 | index: Dict[str, dict] 139 | reindex: Dict[str, dict] 140 | unindex: Dict[str, dict] 141 | index_blobs: Dict[str, dict] 142 | uuid_path: Dict[str, str] 143 | 144 | def __len__(self): 145 | size = 0 146 | size += len(self.index) 147 | size += len(self.reindex) 148 | size += len(self.unindex) 149 | return size 150 | 151 | def all(self) -> List[Tuple[str, str, Dict]]: 152 | all_data = [] 153 | for attr, action in ( 154 | ("index", "index"), 155 | ("reindex", "update"), 156 | ("unindex", "delete"), 157 | ): 158 | action_data = [ 159 | (uuid, data) for uuid, data in getattr(self, attr, {}).items() 160 | ] 161 | if action_data: 162 | all_data.extend([(action, uuid, data) for uuid, data in action_data]) 163 | return all_data 164 | 165 | def all_blob_actions(self): 166 | return [(uuid, data) for uuid, data in getattr(self, "index_blobs", {}).items()] 167 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/local.py: -------------------------------------------------------------------------------- 1 | from threading import local 2 | 3 | 4 | # a thread-local object holding data for the queue 5 | localData = local() 6 | marker = [] 7 | 8 | 9 | def get_local(name, factory=lambda: None): 10 | """get named thread-local value and optionally initialize it""" 11 | value = getattr(localData, name, marker) 12 | if value is marker: 13 | value = factory() 14 | set_local(name, value) 15 | return value 16 | 17 | 18 | def set_local(name, value): 19 | """set a value for the named thread-local variable""" 20 | setattr(localData, name, value) 21 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/mapping.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.indexes import getIndex 2 | from collective.elasticsearch.interfaces import IMappingProvider 3 | from zope.interface import implementer 4 | 5 | 6 | @implementer(IMappingProvider) 7 | class MappingAdapter: 8 | _default_mapping = { 9 | "SearchableText": {"store": False, "type": "text", "index": True}, 10 | "Title": {"store": True, "type": "text", "index": True}, 11 | "Description": {"store": True, "type": "text", "index": True}, 12 | "allowedRolesAndUsers": {"store": True, "type": "keyword", "index": True}, 13 | "portal_type": {"store": True, "type": "keyword", "index": True}, 14 | } 15 | 16 | _search_attributes = [ 17 | "Title", 18 | "Description", 19 | "Subject", 20 | "contentType", 21 | "created", 22 | "modified", 23 | "effective", 24 | "hasImage", 25 | "is_folderish", 26 | "portal_type", 27 | "review_state", 28 | "path.path", 29 | ] 30 | 31 | def __init__(self, request, manager): 32 | self.request = request 33 | self.manager = manager 34 | self.catalog = manager.catalog._catalog 35 | 36 | def get_index_creation_body(self): # NOQA E0211 37 | return {} 38 | 39 | def __call__(self): 40 | manager = self.manager 41 | properties = self._default_mapping.copy() 42 | for name in self.catalog.indexes.keys(): 43 | index = getIndex(self.catalog, name) 44 | if index is not None: 45 | properties[name] = index.create_mapping(name) 46 | else: 47 | raise Exception(f"Can not locate index for {name}") 48 | 49 | conn = manager.connection 50 | index_name = manager.index_name 51 | if conn.indices.exists(index_name): 52 | # created BEFORE we started creating this as aliases to versions, 53 | # we can't go anywhere from here beside try updating... 54 | pass 55 | else: 56 | if not manager.index_version: 57 | # need to initialize version value 58 | manager._bump_index_version() 59 | index_name_v = f"{index_name}_{manager.index_version}" 60 | if not conn.indices.exists(index_name_v): 61 | conn.indices.create(index_name_v, body=self.get_index_creation_body()) 62 | if not conn.indices.exists_alias(name=index_name): 63 | conn.indices.put_alias(index=index_name_v, name=index_name) 64 | 65 | for key in properties: 66 | if key in self._search_attributes: 67 | properties[key]["store"] = True 68 | 69 | properties["attachments"] = { 70 | "properties": { 71 | "data": {"type": "binary"}, 72 | "filename": {"type": "text"}, 73 | "fieldname": {"type": "text"}, 74 | } 75 | } 76 | 77 | return {"properties": properties} 78 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/patches/__init__.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import interfaces 2 | from collective.elasticsearch.manager import ElasticSearchManager 3 | from collective.elasticsearch.utils import get_brain_from_path 4 | from plone.folder.interfaces import IOrdering 5 | from Products.CMFCore.indexing import processQueue 6 | from Products.CMFCore.interfaces import IContentish 7 | from time import process_time 8 | from zope.globalrequest import getRequest 9 | from zope.interface import alsoProvides 10 | from zope.interface import noLongerProvides 11 | 12 | import time 13 | import urllib 14 | 15 | 16 | def unrestrictedSearchResults(self, REQUEST=None, **kw): 17 | manager = ElasticSearchManager() 18 | active = manager.active 19 | method = manager.search_results if active else self._old_unrestrictedSearchResults 20 | return method(REQUEST, check_perms=False, **kw) 21 | 22 | 23 | def safeSearchResults(self, REQUEST=None, **kw): 24 | manager = ElasticSearchManager() 25 | active = manager.active 26 | method = manager.search_results if active else self._old_searchResults 27 | return method(REQUEST, check_perms=True, **kw) 28 | 29 | 30 | def manage_catalogRebuild(self, RESPONSE=None, URL1=None): # NOQA W0613 31 | """need to be publishable""" 32 | manager = ElasticSearchManager() 33 | if manager.enabled: 34 | manager._recreate_catalog() 35 | alsoProvides(getRequest(), interfaces.IReindexActive) 36 | 37 | elapse = time.time() 38 | c_elapse = process_time() 39 | 40 | self.clearFindAndRebuild() 41 | 42 | elapse = time.time() - elapse 43 | c_elapse = process_time() - c_elapse 44 | 45 | msg = f"Catalog Rebuilt\nTotal time: {elapse}\nTotal CPU time: {c_elapse}" 46 | 47 | if manager.enabled: 48 | processQueue() 49 | manager.flush_indices() 50 | noLongerProvides(getRequest(), interfaces.IReindexActive) 51 | if RESPONSE is not None: 52 | RESPONSE.redirect( 53 | URL1 54 | + "/manage_catalogAdvanced?manage_tabs_message=" 55 | + urllib.parse.quote(msg) 56 | ) 57 | 58 | 59 | def manage_catalogClear(self, *args, **kwargs): 60 | """need to be publishable""" 61 | manager = ElasticSearchManager() 62 | if manager.enabled and not manager.active: 63 | manager._recreate_catalog() 64 | return self._old_manage_catalogClear(*args, **kwargs) 65 | 66 | 67 | def uncatalog_object(self, *args, **kwargs): 68 | manager = ElasticSearchManager() 69 | if manager.active: 70 | # If ES is active, we also remove the record from there 71 | zcatalog = self._catalog 72 | data = [] 73 | for path in args: 74 | brain = get_brain_from_path(zcatalog, path) 75 | if not brain: 76 | # Path not in the catalog 77 | continue 78 | data.append(("delete", brain.UID, {})) 79 | manager.bulk(data=data) 80 | return self._old_uncatalog_object(*args, **kwargs) 81 | 82 | 83 | def get_ordered_ids(context) -> dict: 84 | """Return all object ids in a context, ordered.""" 85 | if IOrdering.providedBy(context): 86 | return {oid: idx for idx, oid in enumerate(context.idsInOrder())} 87 | else: 88 | # For Plone 5.2, we care only about Dexterity content 89 | objects = [ 90 | obj 91 | for obj in list(context._objects) 92 | if obj.get("meta_type").startswith("Dexterity") 93 | ] 94 | return {oid: idx for idx, oid in enumerate(context.getIdsSubset(objects))} 95 | 96 | 97 | def moveObjectsByDelta(self, ids, delta, subset_ids=None, suppress_events=False): 98 | manager = ElasticSearchManager() 99 | ordered = self if IOrdering.providedBy(self) else None 100 | before = get_ordered_ids(self) 101 | res = self._old_moveObjectsByDelta( 102 | ids, delta, subset_ids=subset_ids, suppress_events=suppress_events 103 | ) 104 | if manager.active: 105 | after = get_ordered_ids(self) 106 | diff = [oid for oid, idx in after.items() if idx != before[oid]] 107 | context = self.context if ordered else self 108 | for oid in diff: 109 | obj = context[oid] 110 | # We only reindex content objects 111 | if not IContentish.providedBy(obj): 112 | continue 113 | obj.reindexObject(idxs=["getObjPositionInParent"]) 114 | return res 115 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/patches/configure.zcml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 16 | 23 | 30 | 37 | 44 | 45 | 52 | 53 | 54 | 61 | 62 | 63 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles.zcml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 15 | 16 | 23 | 24 | 32 | 33 | 38 | 39 | 40 | 45 | 46 | 51 | 52 | 53 | 54 | 59 | 60 | 65 | 66 | 67 | 68 | 73 | 74 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/default/browserlayer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/default/controlpanel.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 15 | Manage portal 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/default/metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/default/registry.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/docker-dev/registry.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | elasticsearch 8 | 9 | True 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/profiles/uninstall/browserlayer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/query.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.indexes import EZCTextIndex 2 | from collective.elasticsearch.indexes import getIndex 3 | from collective.elasticsearch.interfaces import IQueryAssembler 4 | from collective.elasticsearch.utils import getESOnlyIndexes 5 | from zope.interface import implementer 6 | 7 | 8 | @implementer(IQueryAssembler) 9 | class QueryAssembler: 10 | def __init__(self, request, es): 11 | self.es = es 12 | self.catalog = es.catalog 13 | self.request = request 14 | 15 | def normalize(self, query): # NOQA R0201 16 | sort_on = [] 17 | sort = query.pop("sort_on", None) 18 | # default plone is ascending 19 | sort_order = query.pop("sort_order", "asc") 20 | if sort_order in ("descending", "reverse", "desc"): 21 | sort_order = "desc" 22 | else: 23 | sort_order = "asc" 24 | 25 | if sort: 26 | for sort_str in sort.split(","): 27 | sort_on.append({sort_str: {"order": sort_order}}) 28 | sort_on.append("_score") 29 | if "b_size" in query: 30 | del query["b_size"] 31 | if "b_start" in query: 32 | del query["b_start"] 33 | if "sort_limit" in query: 34 | del query["sort_limit"] 35 | return query, sort_on 36 | 37 | def __call__(self, dquery): 38 | filters = [] 39 | matches = [] 40 | catalog = self.catalog._catalog 41 | idxs = catalog.indexes.keys() 42 | query = {"match_all": {}} 43 | es_only_indexes = getESOnlyIndexes() 44 | for key, value in dquery.items(): 45 | if key not in idxs and key not in es_only_indexes: 46 | continue 47 | index = getIndex(catalog, key) 48 | if index is None and key in es_only_indexes: 49 | # deleted index for plone performance but still need on ES 50 | index = EZCTextIndex(catalog, key) 51 | qq = index.get_query(key, value) 52 | if qq is None: 53 | continue 54 | if index is not None and index.filter_query: 55 | if isinstance(qq, list): 56 | filters.extend(qq) 57 | else: 58 | filters.append(qq) 59 | else: 60 | if isinstance(qq, list): 61 | matches.extend(qq) 62 | else: 63 | matches.append(qq) 64 | if len(filters) == 0 and len(matches) == 0: 65 | return query 66 | query = {"bool": {}} 67 | if len(filters) > 0: 68 | query["bool"]["filter"] = filters 69 | 70 | if len(matches) > 0: 71 | query["bool"]["should"] = matches 72 | query["bool"]["minimum_should_match"] = 1 73 | return query 74 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/queueprocessor.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import logger 2 | from collective.elasticsearch.indexes import getIndex 3 | from collective.elasticsearch.interfaces import IAdditionalIndexDataProvider 4 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor 5 | from collective.elasticsearch.interfaces import IndexingActions 6 | from collective.elasticsearch.interfaces import IReindexActive 7 | from collective.elasticsearch.manager import ElasticSearchManager 8 | from collective.elasticsearch.utils import getESOnlyIndexes 9 | from collective.elasticsearch.utils import use_redis 10 | from pkg_resources import parse_version 11 | from plone import api 12 | from plone.app.uuid.utils import uuidToCatalogBrain 13 | from plone.dexterity.utils import iterSchemata 14 | from plone.indexer.interfaces import IIndexableObject 15 | from plone.indexer.interfaces import IIndexer 16 | from plone.namedfile.interfaces import INamedBlobFileField 17 | from zope.component import getAdapters 18 | from zope.component import queryMultiAdapter 19 | from zope.component.hooks import getSite 20 | from zope.globalrequest import getRequest 21 | from zope.interface import implementer 22 | from zope.schema import getFields 23 | 24 | import transaction 25 | 26 | 27 | if parse_version(api.env.plone_version()) < parse_version("6"): 28 | 29 | def uuidToObject(uuid, unrestricted=False): 30 | """Variation of this method, which support the parameter 31 | 'unrestricted', like the one from plone 6. 32 | """ 33 | 34 | brain = uuidToCatalogBrain(uuid) 35 | if brain is None: 36 | return None 37 | 38 | path = brain.getPath() 39 | 40 | if not path: 41 | return 42 | site = getSite() 43 | if site is None: 44 | return 45 | # Go to the parent of the item without restrictions. 46 | parent_path, final_path = path.rpartition("/")[::2] 47 | parent = site.unrestrictedTraverse(parent_path) 48 | # Do check restrictions for the final object. 49 | # Check if the object has restrictions 50 | if unrestricted: 51 | return parent.unrestrictedTraverse(final_path) 52 | return parent.restrictedTraverse(final_path) 53 | 54 | else: 55 | from plone.app.uuid.utils import uuidToObject 56 | 57 | 58 | @implementer(IElasticSearchIndexQueueProcessor) 59 | class IndexProcessor: 60 | """A queue processor for elasticsearch""" 61 | 62 | _manager: ElasticSearchManager = None 63 | _es_attributes = None 64 | _all_attributes = None 65 | rebuild: bool = False 66 | _actions: IndexingActions = None 67 | 68 | @property 69 | def manager(self): 70 | """Return the portal catalog.""" 71 | if not self._manager: 72 | self._manager = ElasticSearchManager() 73 | return self._manager 74 | 75 | @property 76 | def catalog(self): 77 | """Return the portal catalog.""" 78 | return api.portal.get_tool("portal_catalog") 79 | 80 | @property 81 | def es_attributes(self): 82 | """Return all attributes defined in portal catalog.""" 83 | if not self._es_attributes: 84 | self._es_attributes = getESOnlyIndexes() 85 | return self._es_attributes 86 | 87 | @property 88 | def all_attributes(self): 89 | """Return all attributes defined in portal catalog.""" 90 | if not self._all_attributes: 91 | catalog = self.catalog 92 | es_indexes = self.es_attributes 93 | catalog_indexes = set(catalog.indexes()) 94 | self._all_attributes = es_indexes.union(catalog_indexes) 95 | return self._all_attributes 96 | 97 | @property 98 | def rebuild(self): 99 | return IReindexActive.providedBy(getRequest()) 100 | 101 | @property 102 | def actions(self) -> IndexingActions: 103 | if not self._actions: 104 | self._actions = IndexingActions( 105 | index={}, 106 | reindex={}, 107 | unindex={}, 108 | index_blobs={}, 109 | uuid_path={}, 110 | ) 111 | return self._actions 112 | 113 | def _clean_up(self): 114 | self._manager = None 115 | self._es_attributes = None 116 | self._all_attributes = None 117 | self._actions = None 118 | 119 | def _uuid_path(self, obj): 120 | uuid = api.content.get_uuid(obj) if obj.portal_type != "Plone Site" else "/" 121 | path = "/".join(obj.getPhysicalPath()) 122 | return uuid, path 123 | 124 | def index(self, obj, attributes=None): 125 | """Index the specified attributes for an obj.""" 126 | if not self.manager.active: 127 | return 128 | actions = self.actions 129 | uuid, path = self._uuid_path(obj) 130 | actions.uuid_path[uuid] = path 131 | if self.rebuild: 132 | # During rebuild we index everything 133 | attributes = self.all_attributes 134 | is_reindex = False 135 | else: 136 | attributes = {att for att in attributes} if attributes else set() 137 | is_reindex = attributes and attributes != self.all_attributes 138 | data = self.get_data(uuid, attributes) 139 | blob_data = self.get_blob_data(uuid, obj) 140 | if is_reindex and uuid in actions.index: 141 | # Reindexing something that was not processed yet 142 | actions.index[uuid].update(data) 143 | return 144 | elif is_reindex: 145 | # Simple reindexing 146 | actions.reindex[uuid] = data 147 | actions.index_blobs[uuid] = blob_data 148 | return 149 | elif uuid in actions.reindex: 150 | # Remove from reindex 151 | actions.reindex.pop(uuid) 152 | 153 | elif uuid in actions.unindex: 154 | # Remove from unindex 155 | actions.unindex.pop(uuid) 156 | actions.index[uuid] = data 157 | actions.index_blobs[uuid] = blob_data 158 | 159 | def reindex(self, obj, attributes=None, update_metadata=False): 160 | """Reindex the specified attributes for an obj.""" 161 | if not self.manager.active: 162 | return 163 | self.index(obj, attributes) 164 | 165 | def unindex(self, obj): 166 | """Unindex the obj.""" 167 | if not self.manager.active: 168 | return 169 | actions = self.actions 170 | uuid, path = self._uuid_path(obj) 171 | actions.uuid_path[uuid] = path 172 | if uuid in actions.index: 173 | actions.index.pop(uuid) 174 | elif uuid in actions.reindex: 175 | actions.reindex.pop(uuid) 176 | actions.unindex[uuid] = {} 177 | 178 | def begin(self): 179 | """Transaction start.""" 180 | pass 181 | 182 | def commit(self, wait=None): 183 | """Transaction commit.""" 184 | method = self.commit_es 185 | if use_redis(): 186 | method = self.commit_redis 187 | return method(wait=wait) 188 | 189 | def commit_redis(self, wait=None): 190 | """Since we defere indexing to a external queue. We need to make sure 191 | the transaction is commited and synced with all threads. 192 | Thus for the redis integration we run the 'commit' in the 193 | addAfterCommitHook of the transaction 194 | """ 195 | 196 | transaction.get().addAfterCommitHook(self._commit_hook_redis) 197 | 198 | def _commit_hook_redis(self, wait=None): 199 | """The after commit hook from redis, includes updateing blobs as 200 | well.""" 201 | actions = self.actions 202 | items = len(actions) if actions else 0 203 | if self.manager.active and items: 204 | self.manager.bulk(data=actions.all()) 205 | 206 | # make sure attachment plugin and cbor-attachments pipeline are available 207 | pipeline = "cbor-attachments" in self.manager.connection.ingest.get_pipeline() 208 | plugin = "attachment" in self.manager.connection.cat.plugins() 209 | if pipeline and plugin: 210 | for item in self.actions.all_blob_actions(): 211 | self.manager.update_blob(item) 212 | 213 | self._clean_up() 214 | 215 | def commit_es(self, wait=None): 216 | """Transaction commit.""" 217 | actions = self.actions 218 | items = len(actions) if actions else 0 219 | if self.manager.active and items: 220 | self.manager.bulk(data=actions.all()) 221 | self._clean_up() 222 | 223 | def abort(self): 224 | """Transaction abort.""" 225 | self._clean_up() 226 | 227 | def wrap_object(self, obj): 228 | wrapped_object = None 229 | if not IIndexableObject.providedBy(obj): 230 | # This is the CMF 2.2 compatible approach, which should be used 231 | # going forward 232 | wrapper = queryMultiAdapter((obj, self.catalog), IIndexableObject) 233 | wrapped_object = wrapper if wrapper is not None else obj 234 | else: 235 | wrapped_object = obj 236 | return wrapped_object 237 | 238 | def get_data(self, uuid, attributes=None): 239 | method = self.get_data_for_es 240 | if use_redis(): 241 | method = self.get_data_for_redis 242 | return method(uuid, attributes=attributes) 243 | 244 | def get_data_for_redis(self, uuid, attributes=None): 245 | attributes = attributes if attributes else self.all_attributes 246 | index_data = {} 247 | for index_name in attributes: 248 | index_data[index_name] = None 249 | return index_data 250 | 251 | def get_data_for_es(self, uuid, attributes=None): 252 | """Data to be sent to elasticsearch.""" 253 | obj = api.portal.get() if uuid == "/" else uuidToObject(uuid, unrestricted=True) 254 | wrapped_object = self.wrap_object(obj) 255 | index_data = {} 256 | attributes = attributes if attributes else self.all_attributes 257 | catalog = self.catalog 258 | for index_name in attributes: 259 | value = None 260 | index = getIndex(catalog, index_name) 261 | if index is not None: 262 | try: 263 | value = index.get_value(wrapped_object) 264 | except Exception as exc: # NOQA W0703 265 | path = "/".join(obj.getPhysicalPath()) 266 | logger.error(f"Error indexing value: {path}: {index_name}\n{exc}") 267 | value = None 268 | if value in (None, "None"): 269 | # yes, we'll index null data... 270 | value = None 271 | elif index_name in self.es_attributes: 272 | indexer = queryMultiAdapter( 273 | (wrapped_object, catalog), IIndexer, name=index_name 274 | ) 275 | if indexer: 276 | value = indexer() 277 | else: 278 | attr = getattr(obj, index_name, None) 279 | value = attr() if callable(attr) else value 280 | # Use str, if bytes value 281 | value = ( 282 | value.decode("utf-8", "ignore") if isinstance(value, bytes) else value 283 | ) 284 | index_data[index_name] = value 285 | additional_providers = [ 286 | adapter for adapter in getAdapters((obj,), IAdditionalIndexDataProvider) 287 | ] 288 | if additional_providers: 289 | for _, adapter in additional_providers: 290 | index_data.update(adapter(catalog, index_data)) 291 | 292 | return index_data 293 | 294 | def get_blob_data(self, uuid, obj): 295 | """Go thru schemata and extract infos about blob fields""" 296 | index_data = {} 297 | portal_path_len = len(api.portal.get().getPhysicalPath()) 298 | obj_segements = obj.getPhysicalPath() 299 | relative_path = "/".join(obj_segements[portal_path_len:]) 300 | for schema in iterSchemata(obj): 301 | for name, field in getFields(schema).items(): 302 | if INamedBlobFileField.providedBy(field) and field.get(obj): 303 | index_data[name] = { 304 | "path": relative_path, 305 | "filename": field.get(obj).filename, 306 | } 307 | return index_data 308 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/redis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/redis/__init__.py -------------------------------------------------------------------------------- /src/collective/elasticsearch/redis/configure.zcml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/redis/fetch.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import requests 4 | 5 | 6 | session = requests.Session() 7 | session.headers.update({"Accept": "application/json"}) 8 | session.auth = ( 9 | str(os.environ.get("PLONE_USERNAME", None)), 10 | str(os.environ.get("PLONE_PASSWORD", None)), 11 | ) 12 | 13 | session_data = requests.Session() 14 | session_data.auth = ( 15 | str(os.environ.get("PLONE_USERNAME", None)), 16 | str(os.environ.get("PLONE_PASSWORD", None)), 17 | ) 18 | 19 | 20 | def fetch_data(uuid, attributes): 21 | backend = os.environ.get("PLONE_BACKEND", None) 22 | url = backend + "/@elasticsearch_extractdata" 23 | payload = {"uuid": uuid, "attributes:list": attributes} 24 | response = session.get(url, params=payload, verify=False, timeout=60) 25 | if response.status_code == 200: 26 | content = response.json() 27 | if "@id" in content and "data" in content: 28 | return content["data"] 29 | else: 30 | raise Exception("Bad response from Plone Backend") 31 | 32 | 33 | def fetch_blob_data(fieldname, data): 34 | backend = os.environ.get("PLONE_BACKEND", None) 35 | download_url = "/".join([backend, data[fieldname]["path"], "@@download", fieldname]) 36 | file_ = session_data.get(download_url) 37 | return io.BytesIO(file_.content) 38 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/redis/restapi.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor 2 | from plone import api 3 | from plone.restapi.serializer.converters import json_compatible 4 | from plone.restapi.services import Service 5 | from zExceptions import NotFound 6 | from zope.component import getUtility 7 | 8 | 9 | class ExtractData(Service): 10 | def reply(self): 11 | queueprocessor = getUtility( 12 | IElasticSearchIndexQueueProcessor, name="elasticsearch" 13 | ) 14 | attributes = self.request.get("attributes", []) 15 | uuid = self.request.get("uuid", None) 16 | 17 | obj = api.portal.get() if uuid == "/" else api.content.get(UID=uuid) 18 | if obj is None: 19 | raise NotFound() 20 | 21 | response = {} 22 | data = queueprocessor.get_data_for_es(uuid, attributes=attributes) 23 | response["@id"] = f"{self.context.absolute_url()}/@elasticsearch_extractdata" 24 | response["data"] = json_compatible(data) 25 | return response 26 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/redis/tasks.py: -------------------------------------------------------------------------------- 1 | from .fetch import fetch_blob_data 2 | from .fetch import fetch_data 3 | from collective.elasticsearch import local 4 | from collective.elasticsearch.manager import ElasticSearchManager 5 | from elasticsearch import Elasticsearch 6 | from rq import Queue 7 | from rq import Retry 8 | from rq.decorators import job 9 | 10 | import cbor2 11 | import os 12 | import redis 13 | 14 | 15 | REDIS_CONNECTION_KEY = "redis_connection" 16 | 17 | 18 | def redis_connection(): 19 | connection = local.get_local(REDIS_CONNECTION_KEY) 20 | if not connection: 21 | local.set_local( 22 | REDIS_CONNECTION_KEY, 23 | redis.Redis.from_url(os.environ.get("PLONE_REDIS_DSN", None)), 24 | ) 25 | connection = local.get_local(REDIS_CONNECTION_KEY) 26 | return connection 27 | 28 | 29 | def es_connection(hosts, **params): 30 | connection = local.get_local(ElasticSearchManager.connection_key) 31 | if not connection: 32 | local.set_local( 33 | ElasticSearchManager.connection_key, Elasticsearch(hosts, **params) 34 | ) 35 | connection = local.get_local(ElasticSearchManager.connection_key) 36 | return connection 37 | 38 | 39 | queue = Queue( 40 | "normal", 41 | connection=redis_connection(), 42 | is_async=os.environ.get("ZOPETESTCASE", "0") == "0", 43 | ) # Don't queue in tests 44 | 45 | queue_low = Queue( 46 | "low", 47 | connection=redis_connection(), 48 | is_async=os.environ.get("ZOPETESTCASE", "0") == "0", 49 | ) # Don't queue in tests 50 | 51 | 52 | @job(queue, connection=redis_connection(), retry=Retry(max=3, interval=30)) 53 | def bulk_update(hosts, params, index_name, body): 54 | """ 55 | Collects all the data and updates elasticsearch 56 | """ 57 | hosts = os.environ.get("PLONE_ELASTICSEARCH_HOST", hosts) 58 | connection = es_connection(hosts, **params) 59 | 60 | for item in body: 61 | if len(item) == 1 and "delete" in item[0]: 62 | continue 63 | 64 | catalog_info, payload = item 65 | action, index_info = list(catalog_info.items())[0] 66 | if action == "index": 67 | data = fetch_data(uuid=index_info["_id"], attributes=list(payload.keys())) 68 | item[1] = data 69 | elif action == "update": 70 | data = fetch_data( 71 | uuid=index_info["_id"], attributes=list(payload["doc"].keys()) 72 | ) 73 | item[1]["doc"] = data 74 | 75 | es_data = [item for sublist in body for item in sublist] 76 | connection.bulk(index=index_name, body=es_data) 77 | return "Done" 78 | 79 | 80 | @job(queue_low, connection=redis_connection()) 81 | def update_file_data(hosts, params, index_name, body): 82 | """ 83 | Get blob data from plone and index it via elasticsearch attachment pipeline 84 | """ 85 | hosts = os.environ.get("PLONE_ELASTICSEARCH_HOST", hosts) 86 | connection = es_connection(hosts, **params) 87 | uuid, data = body 88 | 89 | attachments = {"attachments": []} 90 | 91 | for fieldname, content in data.items(): 92 | file_ = fetch_blob_data(fieldname, data) 93 | attachments["attachments"].append( 94 | { 95 | "filename": content["filename"], 96 | "fieldname": fieldname, 97 | "data": file_.read(), 98 | } 99 | ) 100 | 101 | connection.update( 102 | index_name, 103 | uuid, 104 | cbor2.dumps({"doc": attachments}), 105 | headers={"content-type": "application/cbor"}, 106 | ) 107 | return "Done" 108 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/result.py: -------------------------------------------------------------------------------- 1 | from Acquisition import aq_base 2 | from Acquisition import aq_get 3 | from Acquisition import aq_parent 4 | from collective.elasticsearch import interfaces 5 | from collective.elasticsearch.utils import get_brain_from_path 6 | from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain 7 | from Products.ZCatalog.interfaces import ICatalogBrain 8 | from typing import Union 9 | from zope.component import getMultiAdapter 10 | from zope.globalrequest import getRequest 11 | from zope.interface import implementer 12 | from ZPublisher.BaseRequest import RequestContainer 13 | 14 | 15 | @implementer(ICatalogBrain) 16 | class ElasticSearchBrain: 17 | """A Brain containing only information indexed in ElasticSearch.""" 18 | 19 | def __init__(self, record: dict, catalog): 20 | self._record = record 21 | self._catalog = catalog 22 | 23 | def has_key(self, key): 24 | return key in self._record 25 | 26 | def __contains__(self, name): 27 | return name in self._record 28 | 29 | def __getattr__(self, name): 30 | if not self.__contains__(name): 31 | raise AttributeError( 32 | f"'ElasticSearchBrain' object has no attribute '{name}'" 33 | ) 34 | return self._record[name] 35 | 36 | def getPath(self): 37 | """Get the physical path for this record""" 38 | return self._record["path"]["path"] 39 | 40 | def getURL(self, relative=0): 41 | """Generate a URL for this record""" 42 | request = getRequest() 43 | return request.physicalPathToURL(self.getPath(), relative) 44 | 45 | def getObject(self, REQUEST=None): 46 | path = self.getPath().split("/") 47 | if not path: 48 | return None 49 | parent = aq_parent(self._catalog) 50 | if aq_get(parent, "REQUEST", None) is None: 51 | request = getRequest() 52 | if request is not None: 53 | # path should be absolute, starting at the physical root 54 | parent = self.getPhysicalRoot() 55 | request_container = RequestContainer(REQUEST=request) 56 | parent = aq_base(parent).__of__(request_container) 57 | if len(path) > 1: 58 | parent = parent.unrestrictedTraverse(path[:-1]) 59 | 60 | return parent.restrictedTraverse(path[-1]) 61 | 62 | def getRID(self) -> int: 63 | """Return the record ID for this object.""" 64 | return -1 65 | 66 | 67 | def BrainFactory(manager): 68 | def factory(result: dict) -> Union[AbstractCatalogBrain, ElasticSearchBrain]: 69 | catalog = manager.catalog 70 | zcatalog = catalog._catalog 71 | path = result.get("fields", {}).get("path.path", None) 72 | if type(path) in (list, tuple, set) and len(path) > 0: 73 | path = path[0] 74 | if path: 75 | brain = get_brain_from_path(zcatalog, path) 76 | if not brain: 77 | result = manager.get_record_by_path(path) 78 | brain = ElasticSearchBrain(record=result, catalog=catalog) 79 | if manager.highlight and result.get("highlight"): 80 | fragments = [] 81 | fraglen = 0 82 | for idx, i in enumerate(result["highlight"].get("SearchableText", [])): 83 | fraglen += len(i) 84 | if idx > 0 and fraglen > manager.highlight_threshold: 85 | break 86 | fragments.append(i) 87 | brain["Description"] = " ... ".join(fragments) 88 | return brain 89 | # We should handle cases where there is no path in the ES response 90 | return None 91 | 92 | return factory 93 | 94 | 95 | class ElasticResult: 96 | def __init__(self, manager, query, **query_params): 97 | assert "sort" not in query_params 98 | assert "start" not in query_params 99 | self.manager = manager 100 | self.bulk_size = manager.bulk_size 101 | qassembler = getMultiAdapter( 102 | (getRequest(), manager), interfaces.IQueryAssembler 103 | ) 104 | dquery, self.sort = qassembler.normalize(query) 105 | self.query = qassembler(dquery) 106 | 107 | # results are stored in a dictionary, keyed 108 | # but the start index of the bulk size for the 109 | # results it holds. This way we can skip around 110 | # for result data in a result object 111 | result = manager._search(self.query, sort=self.sort, **query_params)["hits"] 112 | self.results = {0: result["hits"]} 113 | self.count = result["total"]["value"] 114 | self.query_params = query_params 115 | 116 | def __len__(self): 117 | return self.count 118 | 119 | def __getitem__(self, key): 120 | """ 121 | Lazy loading es results with negative index support. 122 | We store the results in buckets of what the bulk size is. 123 | This is so you can skip around in the indexes without needing 124 | to load all the data. 125 | Example(all zero based indexing here remember): 126 | (525 results with bulk size 50) 127 | - self[0]: 0 bucket, 0 item 128 | - self[10]: 0 bucket, 10 item 129 | - self[50]: 50 bucket: 0 item 130 | - self[55]: 50 bucket: 5 item 131 | - self[352]: 350 bucket: 2 item 132 | - self[-1]: 500 bucket: 24 item 133 | - self[-2]: 500 bucket: 23 item 134 | - self[-55]: 450 bucket: 19 item 135 | """ 136 | bulk_size = self.bulk_size 137 | count = self.count 138 | if isinstance(key, slice): 139 | return [self[i] for i in range(key.start, key.end)] 140 | if key + 1 > count: 141 | raise IndexError 142 | if key < 0 and abs(key) > count: 143 | raise IndexError 144 | if key >= 0: 145 | result_key = int(key / bulk_size) * bulk_size 146 | start = result_key 147 | result_index = key % bulk_size 148 | elif key < 0: 149 | last_key = int(count / bulk_size) * bulk_size 150 | last_key = last_key if last_key else count 151 | start = result_key = int(last_key - ((abs(key) / bulk_size) * bulk_size)) 152 | if last_key == result_key: 153 | result_index = key 154 | else: 155 | result_index = (key % bulk_size) - (bulk_size - (count % last_key)) 156 | if result_key not in self.results: 157 | self.results[result_key] = self.manager._search( 158 | self.query, sort=self.sort, start=start, **self.query_params 159 | )["hits"]["hits"] 160 | return self.results[result_key][result_index] 161 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/services/__init__.py -------------------------------------------------------------------------------- /src/collective/elasticsearch/services/configure.zcml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 12 | 13 | 20 | 21 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/services/controlpanel.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.interfaces import IElasticSearchLayer 2 | from collective.elasticsearch.interfaces import IElasticSettings 3 | from plone.restapi.controlpanels import RegistryConfigletPanel 4 | from zope.component import adapter 5 | from zope.interface import Interface 6 | 7 | 8 | @adapter(Interface, IElasticSearchLayer) 9 | class ElasticSearchSettingsConfigletPanel(RegistryConfigletPanel): 10 | """Control Panel endpoint""" 11 | 12 | schema = IElasticSettings 13 | configlet_id = "elasticsearch" 14 | configlet_category_id = "Products" 15 | title = "Elastic Search Settings" 16 | group = "" 17 | schema_prefix = "collective.elasticsearch.interfaces.IElasticSettings" 18 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/services/elasticsearch.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.manager import ElasticSearchManager 2 | from plone import api 3 | from plone.restapi.deserializer import json_body 4 | from plone.restapi.services import Service 5 | 6 | 7 | class ElasticSearchService(Service): 8 | """Base service for ElasticSearch management.""" 9 | 10 | def __init__(self, context, request): 11 | super().__init__(context, request) 12 | self.es = ElasticSearchManager() 13 | 14 | 15 | class Info(ElasticSearchService): 16 | """Elastic Search information.""" 17 | 18 | def reply(self): 19 | info = self.es.info 20 | response = dict(info) 21 | response["@id"] = f"{api.portal.get().absolute_url()}/@elasticsearch" 22 | return response 23 | 24 | 25 | class Maintenance(ElasticSearchService): 26 | """Elastic Search integration management.""" 27 | 28 | def reply(self): 29 | data = json_body(self.request) 30 | action = data.get("action") 31 | if action == "convert": 32 | self.es._convert_catalog_to_elastic() 33 | elif action == "rebuild": 34 | catalog = api.portal.get_tool("portal_catalog") 35 | catalog.manage_catalogRebuild() 36 | else: 37 | return self.reply_no_content(status=400) 38 | return self.reply_no_content() 39 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/setuphandlers.py: -------------------------------------------------------------------------------- 1 | from Products.CMFPlone.interfaces import INonInstallable 2 | from zope.interface import implementer 3 | 4 | 5 | @implementer(INonInstallable) 6 | class HiddenProfiles: 7 | @staticmethod 8 | def getNonInstallableProfiles(): # NOQA C0103 9 | """Hide uninstall profile from site-creation and quickinstaller.""" 10 | return [ 11 | "collective.elasticsearch:uninstall", 12 | ] 13 | 14 | 15 | def post_install(context): # NOQA W0613 16 | """Post install script""" 17 | # Do something at the end of the installation of this package. 18 | 19 | 20 | def post_content(context): # NOQA W0613 21 | """Post content script""" 22 | 23 | 24 | def uninstall(context): # NOQA W0613 25 | """Uninstall script""" 26 | # Do something at the end of the uninstallation of this package. 27 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/testing.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import utils 2 | from plone import api 3 | from plone.app.contenttypes.testing import PLONE_APP_CONTENTTYPES_FIXTURE 4 | from plone.app.testing import applyProfile 5 | from plone.app.testing import FunctionalTesting 6 | from plone.app.testing import IntegrationTesting 7 | from plone.app.testing import PloneSandboxLayer 8 | from plone.app.testing import setRoles 9 | from plone.app.testing import SITE_OWNER_NAME 10 | from plone.app.testing import SITE_OWNER_PASSWORD 11 | from plone.app.testing import TEST_USER_ID 12 | from plone.testing import zope 13 | 14 | import collective.elasticsearch 15 | import os 16 | import redis 17 | import time 18 | 19 | 20 | MAX_CONNECTION_RETRIES = 20 21 | 22 | 23 | class ElasticSearch(PloneSandboxLayer): 24 | 25 | defaultBases = (PLONE_APP_CONTENTTYPES_FIXTURE,) 26 | 27 | def setUpZope(self, app, configurationContext): 28 | super().setUpZope(app, configurationContext) 29 | self.loadZCML(package=collective.elasticsearch) 30 | 31 | def setUpPloneSite(self, portal): 32 | super().setUpPloneSite(portal) 33 | # install into the Plone site 34 | applyProfile(portal, "collective.elasticsearch:default") 35 | setRoles(portal, TEST_USER_ID, ("Member", "Manager")) 36 | workflowTool = api.portal.get_tool("portal_workflow") 37 | workflowTool.setDefaultChain("plone_workflow") 38 | 39 | 40 | ElasticSearch_FIXTURE = ElasticSearch() 41 | ElasticSearch_INTEGRATION_TESTING = IntegrationTesting( 42 | bases=(ElasticSearch_FIXTURE,), name="ElasticSearch:Integration" 43 | ) 44 | ElasticSearch_FUNCTIONAL_TESTING = FunctionalTesting( 45 | bases=(ElasticSearch_FIXTURE,), name="ElasticSearch:Functional" 46 | ) 47 | ElasticSearch_API_TESTING = FunctionalTesting( 48 | bases=(ElasticSearch_FIXTURE, zope.WSGI_SERVER_FIXTURE), 49 | name="ElasticSearch:API", 50 | ) 51 | 52 | 53 | class RedisElasticSearch(ElasticSearch): 54 | def setUpPloneSite(self, portal): 55 | super().setUpPloneSite(portal) 56 | 57 | # Setup environ for redis testing 58 | os.environ["PLONE_BACKEND"] = portal.absolute_url() 59 | os.environ["PLONE_USERNAME"] = SITE_OWNER_NAME 60 | os.environ["PLONE_PASSWORD"] = SITE_OWNER_PASSWORD 61 | os.environ["PLONE_REDIS_DSN"] = "redis://localhost:6379/0" 62 | 63 | # Make sure tasks are not handled async in tests 64 | # from collective.elasticsearch.redis.tasks import queue 65 | # queue._is_async = False 66 | 67 | utils.get_settings().use_redis = True 68 | self._wait_for_redis_service() 69 | 70 | def _wait_for_redis_service(self): 71 | from collective.elasticsearch.redis.tasks import redis_connection 72 | 73 | counter = 0 74 | while True: 75 | if counter == MAX_CONNECTION_RETRIES: 76 | raise Exception("Cannot connect to redis service") 77 | try: 78 | if redis_connection().ping(): 79 | break 80 | except redis.ConnectionError: 81 | time.sleep(1) 82 | counter += 1 83 | 84 | 85 | ElasticSearch_REDIS_FIXTURE = RedisElasticSearch() 86 | ElasticSearch_REDIS_TESTING = FunctionalTesting( 87 | bases=(zope.WSGI_SERVER_FIXTURE, ElasticSearch_REDIS_FIXTURE), 88 | name="ElasticSearch:Redis", 89 | ) 90 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import utils 2 | from collective.elasticsearch.browser.controlpanel import ElasticControlPanelView 3 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor 4 | from collective.elasticsearch.manager import ElasticSearchManager 5 | from collective.elasticsearch.testing import ElasticSearch_API_TESTING 6 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING 7 | from collective.elasticsearch.testing import ElasticSearch_INTEGRATION_TESTING 8 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING 9 | from plone import api 10 | from Products.CMFCore.indexing import processQueue 11 | from zope.component import getUtility 12 | 13 | import os 14 | import time 15 | import transaction 16 | import unittest 17 | 18 | 19 | MAX_CONNECTION_RETRIES = 20 20 | 21 | 22 | class BaseTest(unittest.TestCase): 23 | layer = ElasticSearch_INTEGRATION_TESTING 24 | 25 | def get_processor(self): 26 | return getUtility(IElasticSearchIndexQueueProcessor, name="elasticsearch") 27 | 28 | def setUp(self): 29 | super().setUp() 30 | self.portal = self.layer["portal"] 31 | self.request = self.layer["request"] 32 | self.request.environ["testing"] = True 33 | self.app = self.layer["app"] 34 | 35 | os.environ["PLONE_BACKEND"] = self.portal.absolute_url() 36 | 37 | settings = utils.get_settings() 38 | # disable sniffing hosts in tests because docker... 39 | settings.sniffer_timeout = None 40 | settings.enabled = True 41 | settings.sniffer_timeout = 0.0 42 | 43 | # Raise elastic search exceptions 44 | settings.raise_search_exception = True 45 | 46 | self._wait_for_es_service() 47 | 48 | self.catalog = api.portal.get_tool("portal_catalog") 49 | self.catalog._elasticcustomindex = "plone-test-index" 50 | self.es = ElasticSearchManager() 51 | 52 | self.catalog.manage_catalogRebuild() 53 | # need to commit here so all tests start with a baseline 54 | # of elastic enabled 55 | time.sleep(0.1) 56 | self.commit() 57 | 58 | def commit(self, wait: int = 0): 59 | processQueue() 60 | transaction.commit() 61 | self.es.flush_indices() 62 | if wait: 63 | time.sleep(wait) 64 | 65 | def tearDown(self): 66 | super().tearDown() 67 | real_index_name = f"{self.es.real_index_name}_1" 68 | index_name = self.es.index_name 69 | conn = self.es.connection 70 | conn.indices.delete_alias(index=real_index_name, name=index_name) 71 | conn.indices.delete(index=real_index_name) 72 | conn.indices.flush() 73 | # Wait ES remove the index 74 | time.sleep(0.1) 75 | 76 | def _wait_for_es_service(self): 77 | controlpanel = ElasticControlPanelView(self.portal, self.request) 78 | counter = 0 79 | while not controlpanel.connection_status: 80 | if counter == MAX_CONNECTION_RETRIES: 81 | raise Exception("Cannot connect to elasticsearch service") 82 | time.sleep(1) 83 | counter += 1 84 | 85 | 86 | class BaseFunctionalTest(BaseTest): 87 | layer = ElasticSearch_FUNCTIONAL_TESTING 88 | 89 | def search(self, query: dict): 90 | return self.catalog(**query) 91 | 92 | def total_results(self, query: dict): 93 | results = self.search(query) 94 | return len(results) 95 | 96 | 97 | class BaseAPITest(BaseTest): 98 | 99 | layer = ElasticSearch_API_TESTING 100 | 101 | 102 | class BaseRedisTest(BaseTest): 103 | 104 | layer = ElasticSearch_REDIS_TESTING 105 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/assets/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/image.png -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/assets/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/test.pdf -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/assets/test2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/test2.docx -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_controlpanel.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.browser.controlpanel import ElasticControlPanelView 2 | from collective.elasticsearch.tests import BaseRedisTest 3 | from unittest import mock 4 | 5 | import os 6 | 7 | 8 | ENV_FOR_REDIS = { 9 | "PLONE_REDIS_DSN": "", 10 | "PLONE_BACKEND": "", 11 | "PLONE_USERNAME": "", 12 | "PLONE_PASSWORD": "", 13 | } 14 | 15 | 16 | class TestControlPanel(BaseRedisTest): 17 | def test_use_redis_checkbox_is_disabled_enabled(self): 18 | controlpanel = ElasticControlPanelView(self.portal, self.request) 19 | controlpanel.update() 20 | 21 | self.assertIsNone(controlpanel.form_instance.widgets["use_redis"].disabled) 22 | 23 | with mock.patch.dict(os.environ, ENV_FOR_REDIS): 24 | controlpanel.update() 25 | self.assertEqual( 26 | "disabled", controlpanel.form_instance.widgets["use_redis"].disabled 27 | ) 28 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_file_schema.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 11 | 12 | False 13 | Title 14 | 15 | 18 | 19 | False 20 | Description 21 | 22 | 26 | 27 | File 28 | 29 | 33 | 34 | File 2 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_processor.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING 2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING 3 | from collective.elasticsearch.tests import BaseFunctionalTest 4 | from collective.elasticsearch.utils import getESOnlyIndexes 5 | from collective.elasticsearch.utils import getUID 6 | from parameterized import parameterized_class 7 | from plone import api 8 | from plone.app.contentrules.actions.move import MoveAction 9 | from plone.app.contentrules.tests.dummy import DummyEvent 10 | from plone.app.testing import login 11 | from plone.app.testing import TEST_USER_PASSWORD 12 | from plone.contentrules.rule.interfaces import IExecutable 13 | from Products.CMFCore.indexing import processQueue 14 | from zope.component import getMultiAdapter 15 | 16 | 17 | @parameterized_class( 18 | [ 19 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 20 | {"layer": ElasticSearch_REDIS_TESTING}, 21 | ] 22 | ) 23 | class TestQueueProcessor(BaseFunctionalTest): 24 | def test_has_right_brain_data(self): 25 | processor = self.get_processor() 26 | current_length = len(self.catalog._catalog.uids) 27 | obj = api.content.create(self.portal, "Event", "event", title="Some Event") 28 | uuid = getUID(obj) 29 | self.assertEqual(current_length + 1, len(self.catalog._catalog.uids)) 30 | processQueue() 31 | actions = processor.actions 32 | self.assertIn(uuid, actions.index) 33 | self.portal.manage_delObjects(["event"]) 34 | # uid not actually removed until this if catalog optimized 35 | processQueue() 36 | actions = processor.actions 37 | self.assertNotIn(uuid, actions.index) 38 | self.assertEqual(current_length, len(self.catalog._catalog.uids)) 39 | self.assertIn(uuid, actions.unindex) 40 | 41 | def test_rename_object(self): 42 | processor = self.get_processor() 43 | current_length = len(self.catalog._catalog.uids) 44 | obj = api.content.create(self.portal, "Event", "event1", title="Some Event") 45 | obj_uid = getUID(obj) 46 | self.assertEqual(current_length + 1, len(self.catalog._catalog.uids)) 47 | api.content.rename(self.portal.event1, new_id="event2") 48 | self.assertIn(obj_uid, processor.actions.index) 49 | self.assertNotIn(obj_uid, processor.actions.unindex) 50 | 51 | def test_delete_object(self): 52 | processor = self.get_processor() 53 | obj = api.content.create( 54 | self.portal, "Event", "event_to_delete", title="Some Event" 55 | ) 56 | obj_uid = getUID(obj) 57 | self.portal.manage_delObjects(["event_to_delete"]) 58 | processQueue() 59 | self.assertIn(obj_uid, processor.actions.unindex) 60 | 61 | def test_moved_content(self): 62 | """content moved by content rules should remove the original catalog 63 | entry 64 | """ 65 | processor = self.get_processor() 66 | target = api.content.create(container=self.portal, type="Folder", id="target") 67 | source = api.content.create(container=self.portal, type="Folder", id="source") 68 | e = MoveAction() 69 | e.target_folder = "/target" 70 | 71 | obj = api.content.create(container=source, type="Document", id="doc") 72 | obj_uid = getUID(obj) 73 | ex = getMultiAdapter((target, e, DummyEvent(obj)), IExecutable) 74 | self.assertEqual(True, ex()) 75 | self.assertIn(obj_uid, processor.actions.index) 76 | 77 | def test_index_even_if_access_to_obj_might_be_restricted(self): 78 | processor = self.get_processor() 79 | user = api.user.create( 80 | username="worker", 81 | email="ordinary_person@example.com", 82 | password=TEST_USER_PASSWORD, 83 | roles=("Member",), 84 | ) 85 | 86 | folder = api.content.create(self.portal, "Folder", "folder1", title="A folder") 87 | folder.manage_permission( 88 | "Access contents information", roles=["Manager"], acquire=False 89 | ) 90 | obj = api.content.create(folder, "Event", "event1", title="Some Event") 91 | 92 | login(self.portal, user.getId()) 93 | obj.reindexObject() 94 | processQueue() 95 | self.assertIn(obj.UID(), processor.actions.index) 96 | 97 | 98 | @parameterized_class( 99 | [ 100 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 101 | {"layer": ElasticSearch_REDIS_TESTING}, 102 | ] 103 | ) 104 | class TestMoveReindex(BaseFunctionalTest): 105 | def setUp(self): 106 | super().setUp() 107 | # Content on the Plone Site 108 | site_documents = [] 109 | for idx in range(10): 110 | content = api.content.create( 111 | self.portal, "Document", f"document-{idx}", title=f"Page {idx}" 112 | ) 113 | site_documents.append((content.id, getUID(content))) 114 | self.folder = api.content.create( 115 | container=self.portal, type="Folder", id="folder" 116 | ) 117 | folder_documents = [] 118 | for idx in range(10): 119 | content = api.content.create( 120 | self.folder, "Event", f"event-{idx}", title=f"Event {idx}" 121 | ) 122 | folder_documents.append((content.id, getUID(content))) 123 | 124 | self.site_docs = site_documents 125 | self.folder_docs = folder_documents 126 | self.commit(wait=1) 127 | 128 | def test_change_position_site(self): 129 | processor = self.get_processor() 130 | portal = self.portal 131 | # Move last object to top 132 | doc_id, doc_uuid = self.site_docs[-1] 133 | portal.moveObjectsToTop(doc_id) 134 | processQueue() 135 | self.assertIn(doc_uuid, processor.actions.reindex) 136 | # Only reindex getObjPositionInParent 137 | idxs = list(processor.actions.reindex[doc_uuid].keys()) 138 | self.assertEqual(len(idxs), 1) 139 | self.assertEqual(idxs[0], "getObjPositionInParent") 140 | 141 | def test_change_position_folder(self): 142 | processor = self.get_processor() 143 | folder = self.folder 144 | # Move last object to top 145 | doc_id, doc_uuid = self.folder_docs[-1] 146 | folder.moveObjectsToTop(doc_id) 147 | processQueue() 148 | self.assertIn(doc_uuid, processor.actions.reindex) 149 | # Only reindex getObjPositionInParent 150 | idxs = list(processor.actions.reindex[doc_uuid].keys()) 151 | self.assertEqual(len(idxs), 1) 152 | self.assertEqual(idxs[0], "getObjPositionInParent") 153 | 154 | 155 | @parameterized_class( 156 | [ 157 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 158 | {"layer": ElasticSearch_REDIS_TESTING}, 159 | ] 160 | ) 161 | class TestRemoveIndexFromCatalog(BaseFunctionalTest): 162 | def setUp(self): 163 | super().setUp() 164 | # Create a content with the word fancy 165 | self.document = api.content.create( 166 | container=self.portal, 167 | type="Document", 168 | id="a-document", 169 | title="A Fancy Title", 170 | ) 171 | # Force indexing in ES 172 | self.commit(wait=1) 173 | # Now delete the index from the catalog 174 | zcatalog = self.catalog._catalog 175 | # Delete indexes that should be only in ES 176 | idxs = getESOnlyIndexes() 177 | for idx in idxs: 178 | zcatalog.delIndex(idx) 179 | self.commit() 180 | 181 | def test_reindex_object(self): 182 | processor = self.get_processor() 183 | document = self.document 184 | document.title = "Common title" 185 | document.reindexObject(idxs=["SearchableText", "Title"]) 186 | processQueue() 187 | actions = processor.actions 188 | uid = getUID(document) 189 | self.assertIn(uid, actions.reindex) 190 | self.assertIn("SearchableText", actions.reindex[uid]) 191 | 192 | if self.layer == ElasticSearch_FUNCTIONAL_TESTING: 193 | self.assertIn("Common", actions.reindex[uid]["SearchableText"]) 194 | self.assertIn("Title", actions.reindex[uid]) 195 | self.assertIn("Common", actions.reindex[uid]["Title"]) 196 | if self.layer == ElasticSearch_REDIS_TESTING: 197 | # There is a slight change in the API for redis. We do no extract 198 | # any data at this time. 199 | self.assertIsNone(actions.reindex[uid]["Title"]) 200 | self.assertIsNone(actions.reindex[uid]["SearchableText"]) 201 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_redis.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import utils 2 | from collective.elasticsearch.tests import BaseFunctionalTest 3 | from collective.elasticsearch.tests import BaseRedisTest 4 | from plone import api 5 | from plone.app.testing import SITE_OWNER_NAME 6 | from plone.app.testing import SITE_OWNER_PASSWORD 7 | from plone.app.textfield import RichTextValue 8 | from plone.dexterity.fti import DexterityFTIModificationDescription 9 | from plone.dexterity.fti import ftiModified 10 | from plone.namedfile.file import NamedBlobFile 11 | from plone.namedfile.file import NamedBlobImage 12 | from plone.restapi.testing import RelativeSession 13 | from unittest import mock 14 | from zope.lifecycleevent import ObjectModifiedEvent 15 | 16 | import io 17 | import json 18 | import os 19 | import transaction 20 | 21 | 22 | ENV_FOR_REDIS = { 23 | "PLONE_REDIS_DSN": "redis://localhost:6379/0", 24 | "PLONE_BACKEND": "http://localhost", 25 | "PLONE_USERNAME": "admin", 26 | "PLONE_PASSWORD": "password", 27 | } 28 | 29 | 30 | class TestRedisUtils(BaseFunctionalTest): 31 | def test_redis_not_available_if_environ_vars_are_missing(self): 32 | 33 | self.assertFalse( 34 | utils.is_redis_available(), "Env vars are missing, this should be false" 35 | ) 36 | 37 | with mock.patch.dict(os.environ, ENV_FOR_REDIS): 38 | self.assertTrue( 39 | True, 40 | "All env vars ar available, this should be true", 41 | ) 42 | 43 | 44 | class TestUseRedis(BaseRedisTest): 45 | def test_use_redis_if_configured(self): 46 | utils.get_settings().use_redis = False 47 | self.assertFalse(utils.use_redis(), "Using redis should be disabled") 48 | 49 | utils.get_settings().use_redis = True 50 | self.assertTrue(utils.use_redis(), "Using redis should be enabled") 51 | 52 | 53 | class TestExtractRestApiEndpoint(BaseRedisTest): 54 | def setUp(self): 55 | super().setUp() 56 | self.portal_url = self.portal.absolute_url() 57 | self.endpoint = f"{self.portal_url}/@elasticsearch_extractdata" 58 | 59 | self.api_session = RelativeSession(self.portal_url) 60 | self.api_session.headers.update({"Accept": "application/json"}) 61 | self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD) 62 | 63 | self.obj = api.content.create( 64 | self.portal, 65 | "Document", 66 | "page", 67 | title="New Content", 68 | text=RichTextValue("

abc

"), 69 | ) 70 | transaction.commit() 71 | 72 | def tearDown(self): 73 | self.api_session.close() 74 | 75 | def test_extract_all_data_via_endpoint(self): 76 | params = {"uuid": self.obj.UID()} 77 | response = self.api_session.get(self.endpoint, params=params) 78 | self.assertEqual(response.status_code, 200) 79 | self.assertEqual(response.headers.get("Content-Type"), "application/json") 80 | self.assertEqual(self.endpoint, response.json()["@id"]) 81 | content = response.json()["data"] 82 | processor = self.get_processor() 83 | 84 | self.maxDiff = None 85 | self.assertDictEqual( 86 | json.loads(json.dumps(processor.get_data_for_es(self.obj.UID()))), content 87 | ) 88 | 89 | def test_extract_certain_attributes_via_endpoint(self): 90 | params = { 91 | "uuid": self.obj.UID(), 92 | "attributes:list": ["SearchableText", "Title", "id"], 93 | } 94 | response = self.api_session.get(self.endpoint, params=params) 95 | self.assertEqual(response.status_code, 200) 96 | self.assertEqual(response.headers.get("Content-Type"), "application/json") 97 | self.assertEqual(self.endpoint, response.json()["@id"]) 98 | content = response.json()["data"] 99 | processor = self.get_processor() 100 | 101 | self.maxDiff = None 102 | self.assertDictEqual( 103 | json.loads( 104 | json.dumps( 105 | processor.get_data_for_es( 106 | self.obj.UID(), 107 | attributes=params["attributes:list"], 108 | ) 109 | ) 110 | ), 111 | content, 112 | ) 113 | 114 | def test_404_if_obj_not_found(self): 115 | response = self.api_session.get(self.endpoint, params={"uuid": "dummy-uid"}) 116 | self.assertEqual(response.status_code, 404) 117 | 118 | def test_extract_endoint_respects_view_permission(self): 119 | 120 | api_session = RelativeSession(self.portal_url) 121 | api_session.headers.update({"Accept": "application/json"}) 122 | 123 | self.obj.manage_permission("View", roles=[]) 124 | transaction.commit() 125 | 126 | params = {"uuid": self.obj.UID()} 127 | response = self.api_session.get(self.endpoint, params=params) 128 | self.assertEqual(response.status_code, 401) 129 | 130 | 131 | class TestIndexBlobs(BaseRedisTest): 132 | def setUp(self): 133 | super().setUp() 134 | 135 | def _setup_sample_file(self): 136 | file_path = os.path.join(os.path.dirname(__file__), "assets/test.pdf") 137 | with io.FileIO(file_path, "rb") as pdf: 138 | _file = api.content.create( 139 | container=api.portal.get(), 140 | type="File", 141 | id="test-file", 142 | file=NamedBlobFile(data=pdf.read(), filename="test.pdf"), 143 | ) 144 | self.commit(wait=1) 145 | return _file 146 | 147 | def _set_model_file(self, fti, path_to_xml): 148 | fti.model_file = path_to_xml 149 | ftiModified( 150 | fti, 151 | ObjectModifiedEvent( 152 | fti, DexterityFTIModificationDescription("model_file", "") 153 | ), 154 | ) 155 | 156 | def test_index_data_from_file(self): 157 | self._setup_sample_file() 158 | query = {"SearchableText": "text"} 159 | cat_results = self.catalog._old_searchResults(**query) 160 | self.assertEqual(0, len(cat_results), "Expect no result") 161 | es_results = self.catalog(**query) 162 | self.assertEqual(1, len(es_results), "Expect 1 item") 163 | 164 | def test_update_and_delete_file(self): 165 | file_ = self._setup_sample_file() 166 | file_path = os.path.join(os.path.dirname(__file__), "assets/test2.docx") 167 | with io.FileIO(file_path, "rb") as word: 168 | file_.file = NamedBlobFile(data=word.read(), filename="test2.docx") 169 | file_.reindexObject() 170 | self.commit(wait=1) 171 | 172 | query = {"SearchableText": "Lorem"} 173 | es_results = self.catalog(**query) 174 | self.assertEqual(1, len(es_results), "Expect 1 item") 175 | 176 | self.portal.manage_delObjects(ids=[file_.getId()]) 177 | self.commit(wait=1) 178 | 179 | query = {"SearchableText": "lorem"} 180 | es_results = self.catalog(**query) 181 | self.assertEqual(0, len(es_results), "Expect no item") 182 | 183 | def test_make_sure_binary_data_are_removed_from_es(self): 184 | file_ = self._setup_sample_file() 185 | es_data = self.es.connection.get(self.es.index_name, file_.UID()) 186 | self.assertIsNone(es_data["_source"]["attachments"][0]["data"]) 187 | 188 | def test_multiple_file_fields(self): 189 | fti = self.portal.portal_types.File 190 | self._set_model_file(fti, "collective.elasticsearch.tests:test_file_schema.xml") 191 | file_path_1 = os.path.join(os.path.dirname(__file__), "assets/test.pdf") 192 | file_path_2 = os.path.join(os.path.dirname(__file__), "assets/test2.docx") 193 | with io.FileIO(file_path_1, "rb") as pdf, io.FileIO(file_path_2, "rb") as word: 194 | file_ = api.content.create( 195 | container=api.portal.get(), 196 | type="File", 197 | id="test-file-multiple-file-fields", 198 | file=NamedBlobFile(data=pdf.read(), filename="test.pdf"), 199 | file2=NamedBlobFile(data=word.read(), filename="test2.docx"), 200 | ) 201 | self.commit(wait=1) 202 | 203 | query = {"SearchableText": "lorem"} 204 | es_results = self.catalog(**query) 205 | self.assertEqual(1, len(es_results), "Expect 1 item") 206 | 207 | query = {"SearchableText": "text"} 208 | es_results = self.catalog(**query) 209 | self.assertEqual(1, len(es_results), "Expect 1 item") 210 | 211 | es_data = self.es.connection.get(self.es.index_name, file_.UID()) 212 | self.assertIsNone(es_data["_source"]["attachments"][0]["data"]) 213 | self.assertIsNone(es_data["_source"]["attachments"][1]["data"]) 214 | 215 | file_.file2 = None 216 | file_.reindexObject() 217 | self.commit(wait=1) 218 | 219 | query = {"SearchableText": "lorem"} 220 | es_results = self.catalog(**query) 221 | self.assertEqual(0, len(es_results), "Expect 0 item") 222 | 223 | self._set_model_file(fti, "plone.app.contenttypes.schema:file.xml") 224 | 225 | def test_dont_queue_blob_extraction_jobs_if_not_possible(self): 226 | settings = {"index": {"default_pipeline": None}} 227 | self.es.connection.indices.put_settings(body=settings, index=self.es.index_name) 228 | self.es.connection.ingest.delete_pipeline("cbor-attachments") 229 | file_path = os.path.join(os.path.dirname(__file__), "assets/test2.docx") 230 | with io.FileIO(file_path, "rb") as pdf: 231 | self._file = api.content.create( 232 | container=api.portal.get(), 233 | type="File", 234 | id="test-file2", 235 | file=NamedBlobFile(data=pdf.read(), filename="test2.docx"), 236 | ) 237 | self.commit(wait=1) 238 | 239 | query = {"SearchableText": "lorem"} 240 | es_results = self.catalog(**query) 241 | self.assertEqual(0, len(es_results), "Expect 0 item") 242 | 243 | def test_do_not_index_data_from_images(self): 244 | file_path = os.path.join(os.path.dirname(__file__), "assets/image.png") 245 | with io.FileIO(file_path, "rb") as image: 246 | _image = api.content.create( 247 | container=api.portal.get(), 248 | type="Image", 249 | id="test-file", 250 | image=NamedBlobImage(data=image.read(), filename="image.png"), 251 | ) 252 | self.commit(wait=1) 253 | 254 | es_data = self.es.connection.get(self.es.index_name, _image.UID()) 255 | self.assertNotIn( 256 | "attachments", 257 | es_data["_source"], 258 | "Expect not attachments on es data for a image", 259 | ) 260 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_search.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING 2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING 3 | from collective.elasticsearch.tests import BaseFunctionalTest 4 | from collective.elasticsearch.utils import get_settings 5 | from collective.elasticsearch.utils import getESOnlyIndexes 6 | from DateTime import DateTime 7 | from parameterized import parameterized 8 | from parameterized import parameterized_class 9 | from plone import api 10 | from Products.ZCatalog.interfaces import ICatalogBrain 11 | 12 | 13 | EVENT_KLASS = "plone.app.event.dx.interfaces.IDXEvent" 14 | DOCUMENT_KLASS = "plone.app.contenttypes.interfaces.IDocument" 15 | 16 | 17 | @parameterized_class( 18 | [ 19 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 20 | {"layer": ElasticSearch_REDIS_TESTING}, 21 | ] 22 | ) 23 | class TestSearch(BaseFunctionalTest): 24 | 25 | event_klass = EVENT_KLASS 26 | document_klass = DOCUMENT_KLASS 27 | 28 | def test_field_index_query(self): 29 | api.content.create(self.portal, "Event", "event", title="Some Event") 30 | self.commit(wait=1) 31 | query = {"portal_type": "Event", "Title:": "some event"} 32 | self.assertEqual(self.total_results(query), 1) 33 | 34 | def test_keyword_index_query(self): 35 | api.content.create(self.portal, "Event", "event", title="Some Event") 36 | self.commit(wait=1) 37 | query = {"object_provides": [self.event_klass], "SearchableText": "Event"} 38 | self.assertEqual(self.total_results(query), 1) 39 | 40 | def test_multi_keyword_index_query(self): 41 | api.content.create(self.portal, "Event", "event", title="New Content") 42 | api.content.create(self.portal, "Document", "page", title="New Content") 43 | self.commit(wait=1) 44 | query = { 45 | "object_provides": [self.event_klass, self.document_klass], 46 | "SearchableText": "new content", 47 | } 48 | self.assertEqual(self.total_results(query), 2) 49 | 50 | def test_date_index_query(self): 51 | start = DateTime() 52 | events = [] 53 | for idx in range(5): 54 | event = api.content.create( 55 | self.portal, 56 | "Event", 57 | f"event{idx}", 58 | title=f"Some Event {idx}", 59 | effective=DateTime("2015/09/25 20:00"), 60 | ) 61 | events.append(event) 62 | self.commit(wait=1) 63 | end = DateTime() 64 | query = { 65 | "created": { 66 | "query": (start, end), 67 | "range": "minmax", 68 | }, 69 | "portal_type": "Event", 70 | } 71 | cat_results = self.catalog._old_searchResults(**query) 72 | self.assertEqual(len(cat_results), self.total_results(query)) 73 | self.assertEqual(len(cat_results), len(events)) 74 | 75 | query = { 76 | "effective": {"query": DateTime().latestTime(), "range": "min"}, 77 | "portal_type": "Event", 78 | } 79 | cat_results = self.catalog._old_searchResults(**query) 80 | self.assertEqual(len(cat_results), self.total_results(query)) 81 | self.assertEqual(len(cat_results), 0) 82 | 83 | query = { 84 | "effective": {"query": DateTime().latestTime(), "range": "max"}, 85 | "portal_type": "Event", 86 | } 87 | cat_results = self.catalog._old_searchResults(**query) 88 | self.assertEqual(len(cat_results), self.total_results(query)) 89 | self.assertEqual(len(cat_results), 5) 90 | 91 | def test_text_index_query(self): 92 | for idx in range(5): 93 | api.content.create( 94 | self.portal, "Document", f"page{idx}", title=f"Page {idx}" 95 | ) 96 | # should not show up in results 97 | events = [] 98 | for idx in range(5): 99 | event = api.content.create( 100 | self.portal, "Event", f"event{idx}", title=f"Some Event {idx}" 101 | ) 102 | events.append(event) 103 | 104 | self.commit(wait=1) 105 | 106 | query = {"Title": "Some Event"} 107 | self.assertEqual(self.total_results(query), len(events)) 108 | 109 | query = {"Title": "Some Event 1", "sort_on": "getObjPositionInParent"} 110 | el_results = self.search(query) 111 | self.assertTrue("Some Event 1" in [b.Title for b in el_results]) 112 | self.assertEqual(el_results[0].Title, "Some Event 1") 113 | 114 | def test_path_index_query(self): 115 | folder1 = api.content.create( 116 | self.portal, "Folder", "folder0", title="New Content 0" 117 | ) 118 | for idx in range(1, 4): 119 | api.content.create( 120 | folder1, "Document", f"page{idx}", title=f"New Content {idx}" 121 | ) 122 | folder2 = api.content.create( 123 | folder1, "Folder", "folder4", title="New Content 4" 124 | ) 125 | folder3 = api.content.create( 126 | folder2, "Folder", "folder5", title="New Content 5" 127 | ) 128 | for idx in range(6, 9): 129 | api.content.create( 130 | folder3, "Document", f"page{idx}", title=f"New Content {idx}" 131 | ) 132 | 133 | self.commit(wait=1) 134 | query = { 135 | "path": {"depth": 0, "query": "/plone/folder0"}, 136 | "SearchableText": "new content", 137 | } 138 | self.assertEqual(self.total_results(query), 1) 139 | query = { 140 | "path": {"depth": 1, "query": "/plone/folder0"}, 141 | "SearchableText": "new content", 142 | } 143 | self.assertEqual(self.total_results(query), 4) 144 | query = { 145 | "path": {"depth": -1, "query": "/plone/folder0"}, 146 | "SearchableText": "new content", 147 | } 148 | self.assertEqual(self.total_results(query), 9) 149 | query = { 150 | "path": {"depth": 1, "query": "/plone"}, 151 | "SearchableText": "new content", 152 | } 153 | self.assertEqual(self.total_results(query), 1) 154 | # this proves its wrong 155 | query = { 156 | "path": {"query": "/plone/folder0", "navtree_start": 0, "navtree": 1}, 157 | "is_default_page": False, 158 | "SearchableText": "new content", 159 | } 160 | self.assertEqual(self.total_results(query), 9) 161 | 162 | def test_combined_query(self): 163 | api.content.create(self.portal, "Folder", "folder1", title="Folder 1") 164 | self.commit(wait=1) 165 | query = { 166 | "path": {"depth": 1, "query": "/plone"}, 167 | "portal_type": "Folder", 168 | "is_default_page": False, 169 | "SearchableText": "folder", 170 | } 171 | self.assertEqual(self.total_results(query), 1) 172 | 173 | def test_highlight_query(self): 174 | settings = get_settings() 175 | settings.highlight = True 176 | settings.highlight_pre_tags = "" 177 | settings.highlight_post_tags = "" 178 | api.content.create(self.portal, "Document", "page", title="Some Page") 179 | self.commit(wait=1) 180 | query = {"SearchableText": "some"} 181 | results = self.search(query) 182 | self.assertEqual(len(results), 1) 183 | self.assertEqual(results[0].Description, "page Some Page") 184 | 185 | def test_not_query(self): 186 | api.content.create(self.portal, "Document", "page", title="New Content") 187 | api.content.create(self.portal, "Event", "event", title="New Event") 188 | self.commit(wait=1) 189 | query = { 190 | "portal_type": {"not": ["Event", "News Item"]}, 191 | "SearchableText": "New", 192 | } 193 | self.assertEqual(self.total_results(query), 1) 194 | 195 | 196 | @parameterized_class( 197 | [ 198 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 199 | {"layer": ElasticSearch_REDIS_TESTING}, 200 | ] 201 | ) 202 | class TestBrains(BaseFunctionalTest): 203 | def setUp(self): 204 | super().setUp() 205 | self.event = api.content.create( 206 | self.portal, "Event", "event", title="Some Event" 207 | ) 208 | self.commit(wait=1) 209 | 210 | def test_one_result_index_0(self): 211 | el_results = self.search({"portal_type": "Event", "Title": "Some Event"}) 212 | self.assertEqual(len(el_results), 1) 213 | brain = el_results[0] 214 | self.assertEqual(brain.getObject(), self.event) 215 | self.assertEqual(brain.portal_type, "Event") 216 | self.assertEqual(brain.getURL(), self.event.absolute_url()) 217 | self.assertEqual(brain.getPath(), "/plone/event") 218 | 219 | def test_one_result_index_last(self): 220 | el_results = self.search({"portal_type": "Event", "Title": "Some Event"}) 221 | self.assertEqual(len(el_results), 1) 222 | brain = el_results[-1] 223 | self.assertEqual(brain.getObject(), self.event) 224 | self.assertEqual(brain.portal_type, "Event") 225 | self.assertEqual(brain.getURL(), self.event.absolute_url()) 226 | self.assertEqual(brain.getPath(), "/plone/event") 227 | 228 | def test_two_results(self): 229 | api.content.create(self.portal, "Event", "event2", title="Some Event") 230 | self.commit(wait=1) 231 | 232 | el_results = self.search( 233 | { 234 | "portal_type": "Event", 235 | "Title": "Some Event", 236 | "sort_on": "getId", 237 | "sort_order": "descending", 238 | } 239 | ) 240 | self.assertEqual(len(el_results), 2) 241 | brain = el_results[0] 242 | self.assertEqual(brain.getId, "event2") 243 | brain = el_results[1] 244 | self.assertEqual(brain.getId, "event") 245 | 246 | brain = el_results[-1] 247 | self.assertEqual(brain.getId, "event") 248 | brain = el_results[-2] 249 | self.assertEqual(brain.getId, "event2") 250 | 251 | 252 | @parameterized_class( 253 | [ 254 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 255 | {"layer": ElasticSearch_REDIS_TESTING}, 256 | ] 257 | ) 258 | class TestBrainsIndexing(BaseFunctionalTest): 259 | def setUp(self): 260 | super().setUp() 261 | for idx in range(120): 262 | api.content.create( 263 | self.portal, "Document", f"{idx:04d}page", title=f"Page {idx}" 264 | ) 265 | self.commit(wait=1) 266 | self.el_results = self.search( 267 | { 268 | "portal_type": "Document", 269 | "sort_on": "getId", 270 | "sort_order": "asc", 271 | } 272 | ) 273 | 274 | def test_all_indexed(self): 275 | self.assertEqual(len(self.el_results), 120) 276 | 277 | @parameterized.expand( 278 | [ 279 | (0, "0000page"), 280 | (-1, "0119page"), 281 | (-50, "0070page"), 282 | (-55, "0065page"), 283 | (-100, "0020page"), 284 | ] 285 | ) 286 | def test_ordering(self, result_idx, expected): 287 | self.assertEqual(self.el_results[result_idx].getId, expected) 288 | 289 | 290 | @parameterized_class( 291 | [ 292 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 293 | {"layer": ElasticSearch_REDIS_TESTING}, 294 | ] 295 | ) 296 | class TestCatalogRecordDeleted(BaseFunctionalTest): 297 | def setUp(self): 298 | super().setUp() 299 | zcatalog = self.catalog._catalog 300 | self.event = api.content.create( 301 | self.portal, "Event", "event-test", title="Gone Event" 302 | ) 303 | self.commit(wait=1) 304 | path = "/".join(self.event.getPhysicalPath()) 305 | zcatalog.uncatalogObject(path) 306 | self.commit() 307 | 308 | def test_search_results(self): 309 | el_results = self.search({"portal_type": "Event", "Title": "Gone Event"}) 310 | self.assertEqual(len(el_results), 1) 311 | brain = el_results[0] 312 | self.assertTrue(ICatalogBrain.providedBy(brain)) 313 | self.assertEqual(brain.getRID(), -1) 314 | # Test data from elastic will populate the brain 315 | self.assertEqual(brain.portal_type, "Event") 316 | self.assertEqual(brain.Title, "Gone Event") 317 | # Test 318 | self.assertEqual(brain.getPath(), "/plone/event-test") 319 | self.assertEqual(brain.getURL(), self.event.absolute_url()) 320 | self.assertEqual(brain.getObject(), self.event) 321 | 322 | 323 | @parameterized_class( 324 | [ 325 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 326 | {"layer": ElasticSearch_REDIS_TESTING}, 327 | ] 328 | ) 329 | class TestDeleteObjectNotReflectedOnES(BaseFunctionalTest): 330 | def setUp(self): 331 | super().setUp() 332 | zcatalog = self.catalog._catalog 333 | self.event = api.content.create( 334 | self.portal, "Event", "event-test", title="Gone Event" 335 | ) 336 | self.commit(wait=1) 337 | path = "/".join(self.event.getPhysicalPath()) 338 | zcatalog.uncatalogObject(path) 339 | self.portal._delObject("event-test", suppress_events=True) 340 | self.commit() 341 | 342 | def test_search_results(self): 343 | el_results = self.search({"portal_type": "Event", "Title": "Gone Event"}) 344 | self.assertEqual(len(el_results), 1) 345 | brain = el_results[0] 346 | self.assertTrue(ICatalogBrain.providedBy(brain)) 347 | self.assertEqual(brain.getRID(), -1) 348 | # Test data from elastic will populate the brain 349 | self.assertEqual(brain.portal_type, "Event") 350 | self.assertEqual(brain.Title, "Gone Event") 351 | # Test 352 | self.assertEqual(brain.getPath(), "/plone/event-test") 353 | self.assertEqual(brain.getURL(), self.event.absolute_url()) 354 | with self.assertRaises(KeyError): 355 | brain.getObject() 356 | 357 | 358 | @parameterized_class( 359 | [ 360 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 361 | {"layer": ElasticSearch_REDIS_TESTING}, 362 | ] 363 | ) 364 | class TestUncatalogRemoveOnES(BaseFunctionalTest): 365 | def setUp(self): 366 | super().setUp() 367 | self.event = api.content.create( 368 | self.portal, "Event", "event-test", title="Gone Event" 369 | ) 370 | self.commit(wait=1) 371 | path = "/".join(self.event.getPhysicalPath()) 372 | catalog = self.catalog 373 | catalog.uncatalog_object(path) 374 | self.commit(wait=1) 375 | 376 | def test_search_results(self): 377 | el_results = self.search({"portal_type": "Event", "Title": "Gone Event"}) 378 | self.assertEqual(len(el_results), 0) 379 | 380 | 381 | @parameterized_class( 382 | [ 383 | {"layer": ElasticSearch_FUNCTIONAL_TESTING}, 384 | {"layer": ElasticSearch_REDIS_TESTING}, 385 | ] 386 | ) 387 | class TestSearchOnRemovedIndex(BaseFunctionalTest): 388 | def setUp(self): 389 | super().setUp() 390 | # Create a content with the word fancy 391 | self.document = api.content.create( 392 | container=self.portal, 393 | type="Document", 394 | id="a-document", 395 | title="A Fancy Title", 396 | ) 397 | # Force indexing in ES 398 | self.commit(wait=1) 399 | # Now delete the index from the catalog 400 | zcatalog = self.catalog._catalog 401 | # Delete indexes that should be only in ES 402 | idxs = getESOnlyIndexes() 403 | for idx in idxs: 404 | zcatalog.delIndex(idx) 405 | self.commit() 406 | 407 | def test_search_results(self): 408 | el_results = self.search({"portal_type": "Document", "SearchableText": "Fancy"}) 409 | self.assertEqual(len(el_results), 1) 410 | self.assertEqual(el_results[0].getId, self.document.id) 411 | 412 | def test_search_results_after_reindex(self): 413 | # Update title 414 | document = self.document 415 | document.title = "Common title" 416 | document.reindexObject(idxs=["SearchableText", "Title"]) 417 | self.commit(wait=1) 418 | # Search for the old title 419 | el_results = self.search({"portal_type": "Document", "SearchableText": "Fancy"}) 420 | self.assertEqual(len(el_results), 0) 421 | # Search for the new title 422 | el_results = self.search( 423 | {"portal_type": "Document", "SearchableText": "Common"} 424 | ) 425 | self.assertEqual(len(el_results), 1) 426 | self.assertEqual(el_results[0].getId, self.document.id) 427 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/tests/test_services.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch.testing import ElasticSearch_API_TESTING 2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING 3 | from collective.elasticsearch.tests import BaseAPITest 4 | from parameterized import parameterized_class 5 | from plone.app.testing import SITE_OWNER_NAME 6 | from plone.app.testing import SITE_OWNER_PASSWORD 7 | from plone.restapi.testing import RelativeSession 8 | 9 | 10 | @parameterized_class( 11 | [{"layer": ElasticSearch_API_TESTING}, {"layer": ElasticSearch_REDIS_TESTING}] 12 | ) 13 | class TestService(BaseAPITest): 14 | def setUp(self): 15 | super().setUp() 16 | self.portal = self.layer["portal"] 17 | self.portal_url = self.portal.absolute_url() 18 | self.request = self.portal.REQUEST 19 | self.api_session = RelativeSession(self.portal_url) 20 | self.api_session.headers.update({"Accept": "application/json"}) 21 | self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD) 22 | 23 | def tearDown(self): 24 | self.api_session.close() 25 | 26 | def test_get(self): 27 | response = self.api_session.get("/@elasticsearch") 28 | 29 | self.assertEqual(response.status_code, 200) 30 | self.assertEqual(response.headers.get("Content-Type"), "application/json") 31 | 32 | results = response.json() 33 | self.assertEqual(results["@id"], f"{self.portal.absolute_url()}/@elasticsearch") 34 | self.assertIn("Cluster Name", results.keys()) 35 | self.assertIn("Elastic Search Version", results.keys()) 36 | self.assertIn("Number of docs (Catalog)", results.keys()) 37 | self.assertIn("Index Name", results.keys()) 38 | self.assertIn("Number of docs", results.keys()) 39 | self.assertIn("Deleted docs", results.keys()) 40 | self.assertIn("Size", results.keys()) 41 | self.assertIn("Query Count", results.keys()) 42 | 43 | def test_post_convert(self): 44 | response = self.api_session.post("/@elasticsearch", json={"action": "convert"}) 45 | 46 | self.assertEqual(response.status_code, 204) 47 | 48 | def test_post_rebuild(self): 49 | response = self.api_session.post("/@elasticsearch", json={"action": "rebuild"}) 50 | 51 | self.assertEqual(response.status_code, 204) 52 | 53 | def test_post_invalid(self): 54 | response = self.api_session.post( 55 | "/@elasticsearch", json={"action": "bad_action"} 56 | ) 57 | 58 | self.assertEqual(response.status_code, 400) 59 | 60 | def test_control_panel_registered(self): 61 | response = self.api_session.get("/@controlpanels") 62 | data = response.json() 63 | titles = [panel["title"] for panel in data] 64 | self.assertIn("Elastic search", titles) 65 | 66 | def test_control_panel_schema(self): 67 | response = self.api_session.get("/@controlpanels/elasticsearch") 68 | data = response.json() 69 | self.assertEqual(data["title"], "Elastic search") 70 | self.assertEqual(data["group"], "Add-on Configuration") 71 | self.assertTrue(data["data"]["enabled"]) 72 | self.assertTrue("enabled", data["schema"]["fieldsets"][0]["fields"]) 73 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/upgrades.py: -------------------------------------------------------------------------------- 1 | from Products.CMFCore.utils import getToolByName 2 | 3 | 4 | def update_registry(context): 5 | portal_setup = getToolByName(context, "portal_setup") 6 | portal_setup.runImportStepFromProfile( 7 | "profile-collective.elasticsearch:default", 8 | "plone.app.registry", 9 | run_dependencies=False, 10 | ) 11 | -------------------------------------------------------------------------------- /src/collective/elasticsearch/utils.py: -------------------------------------------------------------------------------- 1 | from collective.elasticsearch import logger 2 | from collective.elasticsearch.interfaces import IElasticSettings 3 | from plone.registry.interfaces import IRegistry 4 | from plone.uuid.interfaces import IUUID 5 | from Products.ZCatalog import ZCatalog 6 | from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain 7 | from typing import List 8 | from zope.component import getUtility 9 | 10 | import math 11 | import os 12 | import pkg_resources 13 | 14 | 15 | HAS_REDIS_MODULE = False 16 | try: 17 | pkg_resources.get_distribution("redis") 18 | HAS_REDIS_MODULE = True 19 | except pkg_resources.DistributionNotFound: 20 | HAS_REDIS_MODULE = False 21 | 22 | 23 | def getUID(obj): 24 | value = IUUID(obj, None) 25 | if not value and hasattr(obj, "UID"): 26 | value = obj.UID() 27 | return value 28 | 29 | 30 | def get_brain_from_path(zcatalog: ZCatalog, path: str) -> AbstractCatalogBrain: 31 | rid = zcatalog.uids.get(path) 32 | if isinstance(rid, int): 33 | try: 34 | return zcatalog[rid] 35 | except KeyError: 36 | logger.error(f"Couldn't get catalog entry for path: {path}") 37 | else: 38 | logger.error(f"Got a key for path that is not integer: {path}") 39 | return None 40 | 41 | 42 | def get_settings(): 43 | """Return IElasticSettings values.""" 44 | try: 45 | registry = getUtility(IRegistry) 46 | settings = registry.forInterface(IElasticSettings, check=False) 47 | except Exception: # noQA 48 | settings = None 49 | return settings 50 | 51 | 52 | def get_connection_settings(): 53 | settings = get_settings() 54 | return settings.hosts, { 55 | "retry_on_timeout": settings.retry_on_timeout, 56 | "sniff_on_connection_fail": settings.sniff_on_connection_fail, 57 | "sniff_on_start": settings.sniff_on_start, 58 | "sniffer_timeout": settings.sniffer_timeout, 59 | "timeout": settings.timeout, 60 | } 61 | 62 | 63 | def getESOnlyIndexes(): 64 | settings = get_settings() 65 | try: 66 | indexes = settings.es_only_indexes 67 | return set(indexes) if indexes else set() 68 | except (KeyError, AttributeError): 69 | return {"Title", "Description", "SearchableText"} 70 | 71 | 72 | def batches(data: list, size: int) -> List[List]: 73 | """Create a batch of lists from a base list.""" 74 | return [data[i : i + size] for i in range(0, len(data), size)] # noQA 75 | 76 | 77 | def format_size_mb(value: int) -> str: 78 | """Format a size, in bytes, to mb.""" 79 | value = value / 1024.0 / 1024.0 80 | return f"{int(math.ceil(value))} MB" 81 | 82 | 83 | def is_redis_available(): 84 | """Determens if redis could be available""" 85 | env_variables = [ 86 | HAS_REDIS_MODULE, 87 | os.environ.get("PLONE_REDIS_DSN", None), 88 | os.environ.get("PLONE_USERNAME", None), 89 | os.environ.get("PLONE_PASSWORD", None), 90 | os.environ.get("PLONE_BACKEND", None), 91 | ] 92 | return all(env_variables) 93 | 94 | 95 | def use_redis(): 96 | """ 97 | Determens if redis queueing should be used or not. 98 | """ 99 | return is_redis_available() and get_settings().use_redis 100 | --------------------------------------------------------------------------------