├── .editorconfig
├── .github
    └── workflows
    │   ├── code-analysis.yml
    │   └── tests.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTORS.md
├── LICENSE.GPL
├── MANIFEST.in
├── Makefile
├── README.md
├── docker-compose.dev.yaml
├── docker
    ├── elasticsearch.Dockerfile
    ├── plone.Dockerfile
    └── worker.Dockerfile
├── docs
    ├── Makefile
    ├── conf.py
    ├── config.rst
    ├── index.rst
    ├── install.rst
    └── make.bat
├── instance.yaml
├── pyproject.toml
├── scripts
    └── populate.py
├── setup.py
└── src
    └── collective
        ├── __init__.py
        └── elasticsearch
            ├── __init__.py
            ├── browser
                ├── __init__.py
                ├── configure.zcml
                ├── controlpanel.py
                ├── controlpanel_layout.pt
                ├── search.py
                └── utilviews.py
            ├── configure.zcml
            ├── indexes.py
            ├── interfaces.py
            ├── local.py
            ├── manager.py
            ├── mapping.py
            ├── patches
                ├── __init__.py
                └── configure.zcml
            ├── profiles.zcml
            ├── profiles
                ├── default
                │   ├── browserlayer.xml
                │   ├── controlpanel.xml
                │   ├── metadata.xml
                │   └── registry.xml
                ├── docker-dev
                │   └── registry.xml
                └── uninstall
                │   └── browserlayer.xml
            ├── query.py
            ├── queueprocessor.py
            ├── redis
                ├── __init__.py
                ├── configure.zcml
                ├── fetch.py
                ├── restapi.py
                └── tasks.py
            ├── result.py
            ├── services
                ├── __init__.py
                ├── configure.zcml
                ├── controlpanel.py
                └── elasticsearch.py
            ├── setuphandlers.py
            ├── testing.py
            ├── tests
                ├── __init__.py
                ├── assets
                │   ├── image.png
                │   ├── test.pdf
                │   └── test2.docx
                ├── test_controlpanel.py
                ├── test_file_schema.xml
                ├── test_processor.py
                ├── test_redis.py
                ├── test_search.py
                └── test_services.py
            ├── upgrades.py
            └── utils.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | [*]
 2 | indent_style = space
 3 | end_of_line = lf
 4 | insert_final_newline = true
 5 | trim_trailing_whitespace = true
 6 | charset = utf-8
 7 | 
 8 | [{*.py,*.cfg}]
 9 | indent_size = 4
10 | 
11 | [{*.html,*.dtml,*.pt,*.zpt,*.xml,*.zcml,*.js}]
12 | indent_size = 2
13 | 
14 | [Makefile]
15 | indent_style = tab
16 | 


--------------------------------------------------------------------------------
/.github/workflows/code-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: Code Analysis
 2 | on:
 3 |   push:
 4 | 
 5 | jobs:
 6 |   black:
 7 |     name: Black
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |       - name: Checkout codebase
12 |         uses: actions/checkout@v2
13 | 
14 |       - name: Run check
15 |         uses: plone/code-analysis-action@v2
16 |         with:
17 |           check: 'black'
18 | 
19 |   flake8:
20 |     name: flake8
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |       - name: Checkout codebase
25 |         uses: actions/checkout@v2
26 | 
27 |       - name: Run check
28 |         uses: plone/code-analysis-action@v2
29 |         with:
30 |           check: 'flake8'
31 | 
32 |   isort:
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |       - name: Checkout codebase
36 |         uses: actions/checkout@v2
37 | 
38 |       - name: Run check
39 |         uses: plone/code-analysis-action@v2
40 |         with:
41 |           check: 'isort'
42 | 
43 |   pyroma:
44 |     name: pyroma
45 |     runs-on: ubuntu-latest
46 | 
47 |     steps:
48 |       - name: Checkout codebase
49 |         uses: actions/checkout@v2
50 | 
51 |       - name: Run check
52 |         uses: plone/code-analysis-action@v2
53 |         with:
54 |           check: 'pyroma'
55 | 
56 |   zpretty:
57 |     name: zpretty
58 |     runs-on: ubuntu-latest
59 | 
60 |     steps:
61 |       - name: Checkout codebase
62 |         uses: actions/checkout@v2
63 | 
64 |       - name: Run check
65 |         uses: plone/code-analysis-action@v2
66 |         with:
67 |           check: 'zpretty'
68 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     services:
 7 |       redis:
 8 |         image: redis:7.0.5
 9 |         # Set health checks to wait until redis has started
10 |         options: >-
11 |           --health-cmd "redis-cli ping"
12 |           --health-interval 10s
13 |           --health-timeout 5s
14 |           --health-retries 5
15 |         ports:
16 |           - 6379:6379
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         python: ["3.8", "3.9", "3.10"]
21 |         plone: ["6.0-latest", "5.2-latest"]
22 |         exclude:
23 |           - plone: "5.2-latest"
24 |             python: "3.9"
25 |           - plone: "5.2-latest"
26 |             python: "3.10"
27 |           - plone: "6.0-latest"
28 |             python: "3.8"
29 |           - plone: "6.0-latest"
30 |             python: "3.9"
31 | 
32 |     steps:
33 |       # git checkout
34 |       - uses: actions/checkout@v2
35 | 
36 |       - name: Setup elasticsearch docker container with ingest attachment plugin
37 |         run: |
38 |           docker container create --name elastictest \
39 |           -e "discovery.type=single-node" \
40 |           -e "cluster.name=docker-cluster" \
41 |           -e "http.cors.enabled=true" \
42 |           -e "http.cors.allow-origin=*" \
43 |           -e "http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization" \
44 |           -e "http.cors.allow-credentials=true" \
45 |           -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
46 |           -p 9200:9200 \
47 |           -p 9300:9300 \
48 |           elasticsearch:7.17.7; \
49 |           docker start elastictest; \
50 |           docker exec elastictest /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \
51 |           docker restart elastictest
52 | 
53 |       - name: Setup Plone ${{ matrix.plone }} with Python ${{ matrix.python }}
54 |         id: setup
55 |         uses: plone/setup-plone@v1.0.0
56 |         with:
57 |           python-version: ${{ matrix.python }}
58 |           plone-version: ${{ matrix.plone }}
59 | 
60 |       - name: Install package
61 |         run: |
62 |           pip install -e ".[test, redis]"
63 | 
64 |       # test
65 |       - name: test
66 |         run: |
67 |           zope-testrunner --auto-color --auto-progress --test-path src
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .*project
 2 | .coverage
 3 | .coverage.*
 4 | .installed.cfg
 5 | .mr.developer.cfg
 6 | .tox/
 7 | .vscode/
 8 | *.egg-info
 9 | *.log
10 | *.mo
11 | *.py?
12 | *.swp
13 | /.settings
14 | /compiled-doc.rst
15 | /local.cfg
16 | /pyvenv.cfg
17 | bin/
18 | buildout-cache/
19 | develop-eggs/
20 | dist/*
21 | eggs/
22 | etc
23 | htmlcov/
24 | include/
25 | inituser
26 | lib/
27 | lib64
28 | local/
29 | log.html
30 | node_modules/
31 | output.xml
32 | parts/
33 | pip-selfcheck.json
34 | report.html
35 | reports/
36 | test.plone_addon/
37 | var/
38 | venv/
39 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## 5.0.1 (unreleased)
  4 | 
  5 | - Update elasticsearch to 7.17.7 (Ready for 8.x and apple silicon images are available) @maethu
  6 | 
  7 | - Control-Panel: Fix potential issue with bool fields @maethu
  8 | 
  9 | - Tests: Wait for elasticsearch service @maethu
 10 | 
 11 | - Fix restricted object lookup @maethu
 12 | 
 13 | - Add support for highlight feature of elasticsearch @instification
 14 | 
 15 | - Use _old_searchResults when patching safeSearchResults @instification
 16 | 
 17 | - Handle negative term filters (fixes #101) @instification
 18 | 
 19 | - Check addon is installed before processing queue (fixes #108) @instification
 20 | 
 21 | - Add support for optional es host in worker via PLONE_ELASTICSEARCH_HOST env variable @maethu
 22 | 
 23 | - [Issue #118](https://github.com/collective/collective.elasticsearch/issues/118) Fix **ComponentLookupError** when adding a Plone Site (6.1) (@andreclimaco)
 24 | 
 25 | ## 5.0.0 (2022-10-11)
 26 | 
 27 | - Rename `master` branch to `main` @ericof
 28 | 
 29 | - Drop support for Python 3.7 when using Plone 6.0 @ericof
 30 | 
 31 | - Add support to plone.restapi and Volto @ericof
 32 | 
 33 | 
 34 | ## 5.0.0a2 (2022-09-23)
 35 | 
 36 | - Implement IIndexQueueProcessor support @ericof
 37 | 
 38 | - Refactor ElasticSearchCatalog methods into ElasticSearchManager object @ericof
 39 | 
 40 | - Breaking: Remove collective.elasticsearch.es @ericof
 41 | 
 42 | - Breaking: Remove collective.elasticsearch.hooks @ericof
 43 | 
 44 | - Refactor moveObjectsByDelta to reduce the number of calls to ElasticSearch @ericof
 45 | 
 46 | - Reindex operations update on the catalog send only updated index to ElasticSearch @ericof
 47 | 
 48 | - Remove collective.celery support (as it is not Python-3 compatible yet) @ericof
 49 | 
 50 | ## 5.0.0a1 (2022-09-14)
 51 | 
 52 | - Refactor hook.index_batch to reduce the number of calls do Elastic Search @ericof
 53 | 
 54 | - Implement plone/code-analysis-action @ericof
 55 | 
 56 | - Add support to Plone 6.0 @ericof
 57 | 
 58 | - Support Python 3.7, 3.8, 3.9 and 3.10 @ericof
 59 | 
 60 | - Drop support to Plone versions 4.3, 5.0 and 5.1 @ericof, @andreclimaco
 61 | 
 62 | - Drop support to Python 2.7 @ericof, @andreclimaco
 63 | 
 64 | ## 4.0.0 (2021-04-28)
 65 | 
 66 | - BREAKING: Make changes for ES 7.x @bduncan137
 67 | 
 68 | - Slow down tests to allow them to complete correctly @bduncan137
 69 | 
 70 | 
 71 | ## 3.0.5 (2021-04-28)
 72 | 
 73 | - [Issue #76](https://github.com/collective/collective.elasticsearch/issues/76) In 5.1+ we want to patch _unindexObject not unindexObject @ewohnlich
 74 | 
 75 | - Explicit error logging added, if ES bulk action for indexing failed. @nazrulworld
 76 | 
 77 | - Fix commit hook bug when content has been moved @instification
 78 | 
 79 | 
 80 | ## 3.0.4 (2019-08-21)
 81 | 
 82 | - [Issue #63](https://github.com/collective/collective.elasticsearch/issues/63) Now ensuring unicode value would for both python2 and python3 case. @nazrulworld
 83 | 
 84 | - Now possible to search by other than `Title`, `Description` and `SearchableText` indexes. @nazrulworld
 85 | 
 86 | 
 87 | ## 3.0.3 (2019-03-12)
 88 | 
 89 | - Add missing import logger @nazrulworld
 90 | 
 91 | 
 92 | ## 3.0.2 (2019-01-31)
 93 | 
 94 | - Fix Zope DateTime convert to also handle the datetime.date type @ewohnlich
 95 | 
 96 | 
 97 | ## 3.0.1 (2019-01-28)
 98 | 
 99 | - Fix sortable_title search issue @ewohnlich
100 | 
101 | 
102 | ## 3.0.0 (2019-01-28)
103 | 
104 | - Fix date queries to work with `min:max` as well as `minmax` @vangheem
105 | 
106 | - Fix sort order parsing and implementation @vangheem
107 | 
108 | - Handle upgrades with missing `es_only_indexes` properly @vangheem
109 | 
110 | - Add IReindexActive to request as a flag for other code @lucid-0
111 | 
112 | 
113 | ## 2.0.2 (2018-11-27)
114 | 
115 | 
116 | - Python 3 Support @vangheem
117 | 
118 | - Support ES 6 @lucid-0
119 | 
120 | - Fix error causing "Server Status" on @@elastic-controlpanel to be empty. @fulv
121 | 
122 | 
123 | ## 2.0.1 (2018-01-05)
124 | 
125 | - Prevent critical error when by chance query value is None. @thomasdesvenain
126 | 
127 | - Minor code cleanup: readability, pep8, 80 cols, zca decorators. @jensens
128 | 
129 | - Fix date criteria: 'minmax' instead of 'min:max' + string to date conversion @ebrehault
130 | 
131 | 
132 | ## 2.0.0a6 (2017-03-29)
133 | 
134 | - Gracefully handle upgrades in the settings interface so it doesn't break for people upgrading. @vangheem
135 | 
136 | 
137 | ## 2.0.0a5 (2017-03-29)
138 | 
139 | - Running indexing as admin as it is possible to initiate reindex or index on an object that you do not have permissions for @vangheem
140 | 
141 | 
142 | ## 2.0.0a4 (2017-03-27)
143 | 
144 | - released
145 | 
146 | 
147 | ## 2.0.0a3 (2017-03-27)
148 | 
149 | - Add a method to set the body of the request during index creation. @Gagaro
150 | 
151 | - Fixed get brain in lazy list with negative indexes. @thomasdesvenain
152 | 
153 | - The list of indexes that forces es search is configurable. @thomasdesvenain
154 | 
155 | - Works under Plone 4.3. @thomasdesvenain
156 | 
157 | - Works with archetypes contents. @thomasdesvenain
158 | 
159 | ## 2.0.0a2 (2016-07-19)
160 | 
161 | - We can pass a custom results factory and custom query parameters to IElasticSearchCatalog.search() method. So we can use it as a public interface for custom needs. @thomasdesvenain
162 | 
163 | - Prevent from unindex before reindex when uid is unchanged, for instance at rename. Use a set for to-remove list. @thomasdesvenain
164 | 
165 | - Fix indexing when removing the Title and Description indexes from Plone @vangheem
166 | 
167 | ## 2.0.0a1 (2016-06-06)
168 | 
169 | - upgrade to elasticsearch 2.x @vangheem
170 | 
171 | ## 1.0.1a4 (2016-05-22)
172 | 
173 | - provide better search query @vangheem
174 | 
175 | ## 1.0.1a3 (2016-03-22)
176 | 
177 | - make sure to get alias definition right @vangheem
178 | 
179 | ## 1.0.1a2 (2016-03-18)
180 | 
181 | - create index as an alias so you can potentially work on an existing alias without needing downtime @vangheem
182 | 
183 | ## 1.0.1a1 (2016-02-25)
184 | 
185 | - change default sorting to descending. [Issue #12](https://github.com/collective/collective.elasticsearch/issues/12) @neilferreira
186 | 
187 | ## 1.0.0a1 (2016-02-25)
188 | 
189 | - Initial release
190 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | ## Contributors
2 | 
3 | - Nathan Van Gheem, vangheem@gmail.com
4 | - Wesley Barroso, wesleybl@gmail.com
5 | - André Climaco, andre.climaco@gmail.com
6 | - Érico Andrei, ericof@plone.org
7 | - Jon Pentland, jon.pentland@pretagov.co.uk
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | graft src/collective
 2 | graft docs
 3 | include *.md
 4 | global-exclude *.pyc
 5 | # added by check_manifest.py
 6 | include *.GPL
 7 | include *.txt
 8 | include tox.ini
 9 | recursive-include scripts *.py
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | ### Defensive settings for make:
  2 | #     https://tech.davis-hansson.com/p/make/
  3 | SHELL:=bash
  4 | .ONESHELL:
  5 | .SHELLFLAGS:=-xeu -o pipefail -O inherit_errexit -c
  6 | .SILENT:
  7 | .DELETE_ON_ERROR:
  8 | MAKEFLAGS+=--warn-undefined-variables
  9 | MAKEFLAGS+=--no-builtin-rules
 10 | 
 11 | # We like colors
 12 | # From: https://coderwall.com/p/izxssa/colored-makefile-for-golang-projects
 13 | RED=`tput setaf 1`
 14 | GREEN=`tput setaf 2`
 15 | RESET=`tput sgr0`
 16 | YELLOW=`tput setaf 3`
 17 | 
 18 | PLONE5=5.2-latest
 19 | PLONE6=6.0-latest
 20 | 
 21 | INSTANCE_YAML=instance.yaml
 22 | 
 23 | ELASTIC_SEARCH_IMAGE=elasticsearch:7.17.7
 24 | ELASTIC_SEARCH_CONTAINER=elastictest
 25 | 
 26 | REDIS_IMAGE=redis:7.0.5
 27 | REDIS_CONTAINER=redistest
 28 | 
 29 | ELASTIC_SEARCH_CONTAINERS=$$(docker ps -q -a -f "name=${ELASTIC_SEARCH_CONTAINER}" | wc -l)
 30 | REDIS_CONTAINERS=$$(docker ps -q -a -f "name=${REDIS_CONTAINER}" | wc -l)
 31 | 
 32 | # Default env for elasticsearch with redis queue
 33 | DEFAULT_ENV_ES_REDIS=PLONE_REDIS_DSN=redis://localhost:6379/0 \
 34 | 	PLONE_BACKEND=http://localhost:8080/Plone \
 35 | 	PLONE_USERNAME=admin \
 36 | 	PLONE_PASSWORD=admin
 37 | 
 38 | ifndef LOG_LEVEL
 39 | 	LOG_LEVEL=INFO
 40 | endif
 41 | 
 42 | CODE_QUALITY_VERSION=2.0.0
 43 | CURRENT_USER=$$(whoami)
 44 | USER_INFO=$$(id -u ${CURRENT_USER}):$$(getent group ${CURRENT_USER}|cut -d: -f3)
 45 | BASE_FOLDER=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 46 | LINT=docker run -e LOG_LEVEL="${LOG_LEVEL}" --rm -v "${BASE_FOLDER}":/github/workspace plone/code-quality:${CODE_QUALITY_VERSION} check
 47 | FORMAT=docker run --user="${USER_INFO}" -e LOG_LEVEL="${LOG_LEVEL}" --rm -v "${BASE_FOLDER}":/github/workspace plone/code-quality:${CODE_QUALITY_VERSION} format
 48 | 
 49 | all: build
 50 | 
 51 | # Add the following 'help' target to your Makefile
 52 | # And add help text after each target name starting with '\#\#'
 53 | .PHONY: help
 54 | help: ## This help message
 55 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 56 | 
 57 | bin/pip:
 58 | 	@echo "$(GREEN)==> Setup Virtual Env$(RESET)"
 59 | 	python3 -m venv .
 60 | 	bin/pip install -U pip wheel
 61 | 
 62 | .PHONY: cookiecutter
 63 | cookiecutter: bin/pip
 64 | 	@echo "$(GREEN)Install cookiecutter$(RESET)"
 65 | 	bin/pip install git+https://github.com/cookiecutter/cookiecutter.git#egg=cookiecutter
 66 | 
 67 | .PHONY: instance
 68 | instance: cookiecutter ## create configuration for an zope (plone) instance
 69 | 	@echo "$(GREEN)Create Plone/Zope configuration$(RESET)"
 70 | 	rm -fr ./etc
 71 | 	bin/cookiecutter -f --no-input --config-file ${INSTANCE_YAML} https://github.com/bluedynamics/cookiecutter-zope-instance
 72 | 
 73 | .PHONY: build-plone-5
 74 | build-plone-5: bin/pip ## Build Plone 5.2
 75 | 	@echo "$(GREEN)==> Build with Plone 5.2$(RESET)"
 76 | 	bin/pip install Paste Plone -c https://dist.plone.org/release/$(PLONE5)/constraints.txt
 77 | 	bin/pip install "zest.releaser[recommended]"
 78 | 	bin/pip install -e ".[test, redis]"
 79 | 	make instance
 80 | 
 81 | .PHONY: build-plone-6
 82 | build-plone-6: bin/pip ## Build Plone 6.0
 83 | 	@echo "$(GREEN)==> Build with Plone 6.0$(RESET)"
 84 | 	bin/pip install Plone -c https://dist.plone.org/release/$(PLONE6)/constraints.txt
 85 | 	bin/pip install "zest.releaser[recommended]"
 86 | 	bin/pip install -e ".[test, redis]"
 87 | 	make instance
 88 | 
 89 | .PHONY: build
 90 | build: build-plone-6 ## Build Plone 6.0
 91 | 
 92 | .PHONY: clean
 93 | clean: ## Remove old virtualenv and creates a new one
 94 | 	@echo "$(RED)==> Cleaning environment and build$(RESET)"
 95 | 	rm -rf bin lib lib64 include share etc var inituser pyvenv.cfg .installed.cfg
 96 | 
 97 | .PHONY: format
 98 | format: ## Format the codebase according to our standards
 99 | 	@echo "$(GREEN)==> Format codebase$(RESET)"
100 | 	$(FORMAT)
101 | 
102 | .PHONY: format-black
103 | format-black:  ## Format the codebase with black
104 | 	@echo "$(GREEN)==> Format codebase with black$(RESET)"
105 | 	$(FORMAT) black ${CODEPATH}
106 | 
107 | .PHONY: format-isort
108 | format-isort:  ## Format the codebase with isort
109 | 	@echo "$(GREEN)==> Format codebase with isort$(RESET)"
110 | 	$(FORMAT) isort ${CODEPATH}
111 | 
112 | .PHONY: format-zpretty
113 | format-zpretty:  ## Format the codebase with zpretty
114 | 	@echo "$(GREEN)==> Format codebase with zpretty$(RESET)"
115 | 	$(FORMAT) zpretty ${CODEPATH}
116 | 
117 | .PHONY: lint
118 | lint: ## check code style
119 | 	$(LINT)
120 | 
121 | .PHONY: lint-black
122 | lint-black: ## validate black formating
123 | 	$(LINT) black ${CODEPATH}
124 | 
125 | .PHONY: lint-flake8
126 | lint-flake8: ## validate black formating
127 | 	$(LINT) flake8 ${CODEPATH}
128 | 
129 | .PHONY: lint-isort
130 | lint-isort: ## validate using isort
131 | 	$(LINT) isort ${CODEPATH}
132 | 
133 | .PHONY: lint-pyroma
134 | lint-pyroma: ## validate using pyroma
135 | 	$(LINT) pyroma ${CODEPATH}
136 | 
137 | .PHONY: lint-zpretty
138 | lint-zpretty: ## validate ZCML/XML using zpretty
139 | 	$(LINT) zpretty ${CODEPATH}
140 | 
141 | .PHONY: elastic
142 | elastic: ## Create Elastic Search container
143 | 	@if [ $(ELASTIC_SEARCH_CONTAINERS) -eq 0 ]; then \
144 | 		docker container create --name $(ELASTIC_SEARCH_CONTAINER) \
145 | 		-e "discovery.type=single-node" \
146 | 		-e "cluster.name=docker-cluster" \
147 | 		-e "http.cors.enabled=true" \
148 | 		-e "http.cors.allow-origin=*" \
149 | 		-e "http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization" \
150 | 		-e "http.cors.allow-credentials=true" \
151 | 		-e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
152 | 		-p 9200:9200 \
153 | 		-p 9300:9300 \
154 | 		$(ELASTIC_SEARCH_IMAGE); \
155 | 		docker start $(ELASTIC_SEARCH_CONTAINER); \
156 | 		docker exec $(ELASTIC_SEARCH_CONTAINER) /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \
157 | 		docker stop $(ELASTIC_SEARCH_CONTAINER);fi
158 | 
159 | .PHONY: start-elastic
160 | start-elastic: elastic ## Start Elastic Search
161 | 	@echo "$(GREEN)==> Start Elastic Search$(RESET)"
162 | 	@docker start $(ELASTIC_SEARCH_CONTAINER)
163 | 
164 | .PHONY: stop-elastic
165 | stop-elastic: ## Stop Elastic Search
166 | 	@echo "$(GREEN)==> Stop Elastic Search$(RESET)"
167 | 	@docker stop $(ELASTIC_SEARCH_CONTAINER)
168 | 
169 | .PHONY: redis
170 | redis: ## Create redis Search container
171 | 	@if [ $(REDIS_CONTAINERS) -eq 0 ]; then \
172 | 		docker container create --name $(REDIS_CONTAINER) \
173 | 		-p 6379:6379 \
174 | 		$(REDIS_IMAGE);fi
175 | 
176 | 
177 | .PHONY: start-redis
178 | start-redis: redis ## Start redis
179 | 	@echo "$(GREEN)==> Start redis$(RESET)"
180 | 	@docker start $(REDIS_CONTAINER)
181 | 
182 | .PHONY: stop-redis
183 | stop-redis: ## Stop redis
184 | 	@echo "$(GREEN)==> Stop redis$(RESET)"
185 | 	@docker stop $(REDIS_CONTAINER)
186 | 
187 | 
188 | .PHONY: test
189 | test: ## run tests
190 | 	make start-elastic
191 | 	make start-redis
192 | 	PYTHONWARNINGS=ignore ./bin/zope-testrunner --auto-color --auto-progress --test-path src/
193 | 	make stop-elastic
194 | 	make stop-redis
195 | 
196 | .PHONY: start
197 | start: ## Start a Plone instance on localhost:8080
198 | 	PYTHONWARNINGS=ignore ./bin/runwsgi instance/etc/zope.ini
199 | 
200 | .PHONY: populate
201 | populate: ## Populate site with wikipedia content
202 | 	PYTHONWARNINGS=ignore ./bin/zconsole run etc/zope.conf scripts/populate.py
203 | 
204 | .PHONY: start-redis-support
205 | start-redis-support: ## Start a Plone instance on localhost:8080
206 | 	@echo "$(GREEN)==> Set env variables, PLONE_REDIS_DSN, PLONE_BACKEND, PLONE_USERNAME and PLONE_PASSWORD before start instance$(RESET)"
207 | 	PYTHONWARNINGS=ignore \
208 | 	$(DEFAULT_ENV_ES_REDIS) \
209 | 	./bin/runwsgi instance/etc/zope.ini
210 | 
211 | 
212 | .PHONY: worker
213 | worker: ## Start a worker for the redis queue
214 | 	$(DEFAULT_ENV_ES_REDIS) ./bin/rq worker normal low --with-scheduler
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">collective.elasticsearch</h1>
  2 | 
  3 | <div align="center">
  4 | 
  5 | [![PyPI](https://img.shields.io/pypi/v/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
  6 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
  7 | [![PyPI - Wheel](https://img.shields.io/pypi/wheel/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
  8 | [![PyPI - License](https://img.shields.io/pypi/l/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
  9 | [![PyPI - Status](https://img.shields.io/pypi/status/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
 10 | 
 11 | 
 12 | [![PyPI - Plone Versions](https://img.shields.io/pypi/frameworkversions/plone/collective.elasticsearch)](https://pypi.org/project/collective.elasticsearch/)
 13 | 
 14 | [![Code analysis checks](https://github.com/collective/collective.elasticsearch/actions/workflows/code-analysis.yml/badge.svg)](https://github.com/collective/collective.elasticsearch/actions/workflows/code-analysis.yml)
 15 | [![Tests](https://github.com/collective/collective.elasticsearch/actions/workflows/tests.yml/badge.svg)](https://github.com/collective/collective.elasticsearch/actions/workflows/tests.yml)
 16 | ![Code Style](https://img.shields.io/badge/Code%20Style-Black-000000)
 17 | 
 18 | [![GitHub contributors](https://img.shields.io/github/contributors/collective/collective.elasticsearch)](https://github.com/collective/collective.elasticsearch)
 19 | [![GitHub Repo stars](https://img.shields.io/github/stars/collective/collective.elasticsearch?style=social)](https://github.com/collective/collective.elasticsearch)
 20 | 
 21 | </div>
 22 | 
 23 | ## Introduction
 24 | 
 25 | This package aims to index all fields the portal_catalog indexes and allows you to delete the `Title`, `Description` and `SearchableText` indexes which can provide significant improvement to performance and RAM usage.
 26 | 
 27 | Then, ElasticSearch queries are ONLY used when Title, Description and SearchableText text are in the query. Otherwise, the plone's default catalog will be used. This is because Plone's default catalog is faster on normal queries than using ElasticSearch.
 28 | 
 29 | 
 30 | ## Install Elastic Search
 31 | 
 32 | For a comprehensive documentation about the different options of installing Elastic Search, please read [their documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.7/install-elasticsearch.html).
 33 | 
 34 | A quick start, using Docker would be:
 35 | 
 36 | ```shell
 37 | docker run \
 38 | 		-e "discovery.type=single-node" \
 39 | 		-e "cluster.name=docker-cluster" \
 40 | 		-e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
 41 | 		-p 9200:9200 \
 42 | 		elasticsearch:7.7.0
 43 | ```
 44 | 
 45 | ### Test the installation
 46 | 
 47 | Run, on your shell:
 48 | 
 49 | ```shell
 50 | curl http://localhost:9200/
 51 | ```
 52 | And you should see the Hudsucker Proxy reference? "You Know, for Search"
 53 | 
 54 | ## Install collective.elasticsearch
 55 | 
 56 | First, add `collective.elasticsearch` to your package dependencies, or install it with `pip` (the same one used by your Plone installation):
 57 | 
 58 | ```shell
 59 | pip install collective.elasticsearch
 60 | ```
 61 | 
 62 | Restart Plone, and go to the `Control Panel`, click in `Add-ons`, and select `Elastic Search`.
 63 | 
 64 | Now, go to `Add-on Configuration` and:
 65 | 
 66 | - Check "Enable"
 67 | - Click "Convert Catalog"
 68 | - Click "Rebuild Catalog"
 69 | 
 70 | You now have a insanely scalable modern search engine. Now live the life of the Mind!
 71 | 
 72 | 
 73 | ## Redis queue integration with blob indexing support
 74 | 
 75 | ### TLDR
 76 | 
 77 | ```shell
 78 | docker-compose -f docker-compose.dev.yaml up -d
 79 | ```
 80 | 
 81 | Your Plone site should be up and running: http://localhost:8080/Plone
 82 | 
 83 | - Go to `Add-on Configuration`
 84 | - Check "Enable"
 85 | - Click "Convert Catalog"
 86 | - Click "Rebuild Catalog"
 87 | 
 88 | ### Why
 89 | 
 90 | Having a queue, which does heavy and time consuming jobs asynchronous improves the responsiveness of the website and lowers
 91 | the risk of having database conflicts. This implementation aims to have an almost zero impact in terms of performance for any given plone
 92 | installation or given installation using collective.elasticsearch already
 93 | 
 94 | ### How does it work
 95 | 
 96 | - Instead of index/reindex/unindex data while committing to the DB, jobs are added to a queue in a after commit hook.
 97 | - No data is extracted from any object, this all happens later
 98 | - One or multiple worker execute jobs, which gather the necessary data via the RestAPI.
 99 | - The extraction of the data and the indexing in elasticsearch happens via queue.
100 | 
101 | Workflow:
102 | 
103 | 1. Content gets created/updated
104 | 2. Commit Data to DB + Update Plone Catalog
105 | 3. Via after commit hooks jobs are getting created
106 | 4. Website is ready to use again - Request is done
107 | 5. Worker get initialized
108 | 6. A job collects values to index via plone RestAPI and indexes those values on elasticsearch
109 | 
110 | There are two queues. One for normal indexing jobs and one for the heavy lifting to index binaries.
111 | Jobs from the second queue only gets pulled if the normal indexing queue is empty.
112 | 
113 | Trade of: Instead of a fully indexed document in elasticsearch we have pretty fast at least one there.
114 | 
115 | ### Requirements
116 | 
117 | There are a couple things that need to be done manually if you want redis queue support.
118 | 
119 | 
120 | 1. Install redis extra from collective.elasticsearch
121 | ```shell
122 | pip install collective.elasticsearch[redis]
123 | ```
124 | 
125 | 
126 | 2. Install ingest-attachment plugin for elasticsearch - by default the elasticsearch image does not have any plugins installed.
127 | 
128 | ```shell
129 | docker exec CONTAINER_NAME /bin/sh -c "bin/elasticsearch-plugin install ingest-attachment -b"; \
130 | docker restart CONTAINER_NAME
131 | ```
132 | 
133 | The container needs to be restarted, otherwise the plugin is not available
134 | 
135 | 3. Communication between Redis Server, Plone and Redis worker is configured in environment variables.
136 | 
137 | ```shell
138 | export PLONE_REDIS_DSN=redis://localhost:6379/0
139 | export PLONE_BACKEND=http://localhost:8080/Plone
140 | export PLONE_USERNAME=admin
141 | export PLONE_PASSWORD=admin
142 | ```
143 | This is a example configuration for local development only.
144 | You can use the `start-redis-support` command to spin up a plone instance with the environment variables already set
145 | 
146 | ```shell
147 | make start-redis-support
148 | ```
149 | 
150 | 4. Start a Redis Server
151 | 
152 | Start your own or use the `start-redis` command
153 | ```shell
154 | make redis
155 | ```
156 | 
157 | 5. start a redis worker
158 | 
159 | The redis worker does the "job" and indexes everything via two queues:
160 | 
161 | - normal: Normal indexing/reindexing/unindexing jobs - Does basically the same thing as without redis support, but well yeah via a queue.
162 | - low: Holds jobs for expensive blob indexing
163 | 
164 | The priority is handled by the python-rq worker.
165 | 
166 | The rq worker needs to be started with the same environment variables present as described in 3. 
167 | 
168 | ```shell
169 | ./bin/rq worker normal low --with-scheduler
170 | ```
171 | 
172 | `--with-scheduler` is needed in order to retry failed jobs after a certain time period.
173 | 
174 | Or yous the `worker` command
175 | ```shell
176 | make worker
177 | ```
178 | 
179 | 6. Go to the control panel and repeat the following stepts.
180 | 
181 | - Check "Enable"
182 | - Click "Convert Catalog"
183 | - Click "Rebuild Catalog"
184 | 
185 | ### Technical documentation for elasticsearch
186 | 
187 | #### Pipeline
188 | 
189 | If you hit convert in the control panel and you meet all the requirements to index blobs as well,
190 | collective.elasticsearch installs a default pipeline for the plone-index.
191 | This Pipeline coverts the binary data to text (if possible) and extends the searchableText index with the extracted data
192 | The setup uses multiple nested processors in order to extract all binary data from all fields (blob fields).
193 | 
194 | The binary data is not stored in index permanently. As last step the pipeline removes the binary itself.
195 | 
196 | #### ingest-attachment plugin
197 | 
198 | The ingest-attachment plugin is used to extract text data with tika from any binary.
199 | 
200 | 
201 | ### Note on Performance
202 | 
203 | Putting all the jobs into a queue is much faster then actually calculate all index values and send them to elasticsearch.
204 | This feature aims to have a minimal impact in terms of responsiveness of the plone site.
205 | 
206 | 
207 | ## Compatibility
208 | 
209 | - Python 3
210 | - Plone 5.2 and above
211 | - Tested with Elastic Search 7.17.0
212 | 
213 | ## State
214 | 
215 | Support for all index column types is done EXCEPT for the DateRecurringIndex index column type. If you are doing a full text search along with a query that contains a DateRecurringIndex column, it will not work.
216 | 
217 | 
218 | ## Search Highlighting
219 | 
220 | If you want to make use of the [Elasticsearch highlight](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html) feature you can enable it in the control panel.
221 | 
222 | When enabled, it will replace the description of search results with the highlighted fragments from elastic search.
223 | 
224 | ### Highlight Threshold
225 | 
226 | This is the number of characters to show in the description. Fragments will be added until this threshold is met.
227 | 
228 | ### Pre/Post Tags
229 | 
230 | Highlighted terms can be wrapped in html which can be used to enhance the results further, such as by adding a custom background color. Note that the default Plone search results will not render html so to use this feature you will need to create a custom saearch result view.
231 | 
232 | ## Developing this package
233 | 
234 | Create the virtual enviroment and install all dependencies:
235 | 
236 | ```shell
237 | make build
238 | ```
239 | 
240 | Start Plone in foreground:
241 | 
242 | ```shell
243 | make start
244 | ```
245 | 
246 | 
247 | ### Running tests
248 | 
249 | ```shell
250 | make tests
251 | ```
252 | 
253 | 
254 | ### Formatting the codebase
255 | 
256 | ```shell
257 | make format
258 | ```
259 | 
260 | ### Linting the codebase
261 | 
262 | ```shell
263 | make lint
264 | ```
265 | 
266 | ## License
267 | 
268 | The project is licensed under the GPLv2.
269 | 


--------------------------------------------------------------------------------
/docker-compose.dev.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.8"
 2 | 
 3 | services:
 4 |   redis:
 5 |     image: redis:7.0.5
 6 |     command: redis-server --appendonly yes
 7 |     ports:
 8 |       - 6379:6379
 9 |     volumes:
10 |       - redis_data:/data
11 | 
12 |   elasticsearch:
13 |     build:
14 |       context: .
15 |       dockerfile: docker/elasticsearch.Dockerfile
16 |     ports:
17 |       - 9200:9200
18 |       - 9300:9300
19 |     environment:
20 |       - discovery.type=single-node
21 |       - cluster.name=docker-cluster
22 |       - http.cors.enabled=true
23 |       - http.cors.allow-origin=*
24 |       - http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization
25 |       - http.cors.allow-credentials=true
26 |       - ES_JAVA_OPTS=-Xms512m -Xmx512m
27 |     volumes:
28 |       - elasticsearch_data:/usr/share/elasticsearch/data
29 | 
30 |   worker:
31 |     build:
32 |       context: .
33 |       dockerfile: docker/worker.Dockerfile
34 |     environment:
35 |       - PLONE_REDIS_DSN=redis://redis:6379/0
36 |       - PLONE_BACKEND=http://plone:8080/Plone
37 |       - PLONE_USERNAME=admin
38 |       - PLONE_PASSWORD=admin
39 | 
40 |   plone:
41 |     build:
42 |       context: .
43 |       dockerfile: docker/plone.Dockerfile
44 |     environment:
45 |       - PLONE_REDIS_DSN=redis://redis:6379/0
46 |       - PLONE_BACKEND=http://127.0.0.1:8080/Plone
47 |       - PLONE_USERNAME=admin
48 |       - PLONE_PASSWORD=admin
49 |     ports:
50 |     - "8080:8080"
51 |     depends_on:
52 |       - redis
53 |       - elasticsearch
54 |       - worker
55 |     volumes:
56 |       - plone_data:/data
57 | 
58 | volumes:
59 |   redis_data:
60 |   elasticsearch_data:
61 |   plone_data:
62 | 


--------------------------------------------------------------------------------
/docker/elasticsearch.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM elasticsearch:7.17.7
2 | 
3 | RUN bin/elasticsearch-plugin install ingest-attachment -b
4 | 


--------------------------------------------------------------------------------
/docker/plone.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM plone/plone-backend:6.0.0b3
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | RUN /app/bin/pip install git+https://github.com/collective/collective.elasticsearch.git@mle-redis-rq#egg=collective.elasticsearch[redis]
 6 | 
 7 | ENV PROFILES="collective.elasticsearch:default collective.elasticsearch:docker-dev"
 8 | ENV TYPE="classic"
 9 | ENV SITE="Plone"
10 | 


--------------------------------------------------------------------------------
/docker/worker.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM plone/plone-backend:6.0.0b3
2 | 
3 | WORKDIR /app
4 | 
5 | RUN /app/bin/pip install git+https://github.com/collective/collective.elasticsearch.git@mle-redis-rq#egg=collective.elasticsearch[redis]
6 | 
7 | CMD /app/bin/rq worker normal low --with-scheduler --url=$PLONE_REDIS_DSN
8 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/collectiveelasticsearch.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/collectiveelasticsearch.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/collectiveelasticsearch"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/collectiveelasticsearch"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # collective.elasticsearch documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Mar 13 15:04:25 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = []
 32 | 
 33 | # Add any paths that contain templates here, relative to this directory.
 34 | templates_path = ['_templates']
 35 | 
 36 | # The suffix(es) of source filenames.
 37 | # You can specify multiple suffix as a list of string:
 38 | # source_suffix = ['.rst', '.md']
 39 | source_suffix = '.rst'
 40 | 
 41 | # The encoding of source files.
 42 | #source_encoding = 'utf-8-sig'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = u'collective.elasticsearch'
 49 | copyright = u'Nathan Van Gheem (vangheem)'
 50 | author = u'Nathan Van Gheem (vangheem)'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = u'3.0'
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = u'3.0'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #
 64 | # This is also used if you do content translation via gettext catalogs.
 65 | # Usually you set "language" from the command line for these cases.
 66 | language = None
 67 | 
 68 | # There are two options for replacing |today|: either, you set today to some
 69 | # non-false value, then it is used:
 70 | #today = ''
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | #today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | # This patterns also effect to html_static_path and html_extra_path
 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 78 | 
 79 | # The reST default role (used for this markup: `text`) to use for all
 80 | # documents.
 81 | #default_role = None
 82 | 
 83 | # If true, '()' will be appended to :func: etc. cross-reference text.
 84 | #add_function_parentheses = True
 85 | 
 86 | # If true, the current module name will be prepended to all description
 87 | # unit titles (such as .. function::).
 88 | #add_module_names = True
 89 | 
 90 | # If true, sectionauthor and moduleauthor directives will be shown in the
 91 | # output. They are ignored by default.
 92 | #show_authors = False
 93 | 
 94 | # The name of the Pygments (syntax highlighting) style to use.
 95 | pygments_style = 'sphinx'
 96 | 
 97 | # A list of ignored prefixes for module index sorting.
 98 | #modindex_common_prefix = []
 99 | 
100 | # If true, keep warnings as "system message" paragraphs in the built documents.
101 | #keep_warnings = False
102 | 
103 | # If true, `todo` and `todoList` produce output, else they produce nothing.
104 | todo_include_todos = False
105 | 
106 | 
107 | # -- Options for HTML output ----------------------------------------------
108 | 
109 | # The theme to use for HTML and HTML Help pages.  See the documentation for
110 | # a list of builtin themes.
111 | html_theme = 'alabaster'
112 | 
113 | # Theme options are theme-specific and customize the look and feel of a theme
114 | # further.  For a list of options available for each theme, see the
115 | # documentation.
116 | #html_theme_options = {}
117 | 
118 | # Add any paths that contain custom themes here, relative to this directory.
119 | #html_theme_path = []
120 | 
121 | # The name for this set of Sphinx documents.
122 | # "<project> v<release> documentation" by default.
123 | #html_title = u'bobtemplates.plone v3.0'
124 | 
125 | # A shorter title for the navigation bar.  Default is the same as html_title.
126 | #html_short_title = None
127 | 
128 | # The name of an image file (relative to this directory) to place at the top
129 | # of the sidebar.
130 | #html_logo = None
131 | 
132 | # The name of an image file (relative to this directory) to use as a favicon of
133 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
134 | # pixels large.
135 | #html_favicon = None
136 | 
137 | # Add any paths that contain custom static files (such as style sheets) here,
138 | # relative to this directory. They are copied after the builtin static files,
139 | # so a file named "default.css" will overwrite the builtin "default.css".
140 | html_static_path = ['_static']
141 | 
142 | # Add any extra paths that contain custom files (such as robots.txt or
143 | # .htaccess) here, relative to this directory. These files are copied
144 | # directly to the root of the documentation.
145 | #html_extra_path = []
146 | 
147 | # If not None, a 'Last updated on:' timestamp is inserted at every page
148 | # bottom, using the given strftime format.
149 | # The empty string is equivalent to '%b %d, %Y'.
150 | #html_last_updated_fmt = None
151 | 
152 | # If true, SmartyPants will be used to convert quotes and dashes to
153 | # typographically correct entities.
154 | #html_use_smartypants = True
155 | 
156 | # Custom sidebar templates, maps document names to template names.
157 | #html_sidebars = {}
158 | 
159 | # Additional templates that should be rendered to pages, maps page names to
160 | # template names.
161 | #html_additional_pages = {}
162 | 
163 | # If false, no module index is generated.
164 | #html_domain_indices = True
165 | 
166 | # If false, no index is generated.
167 | #html_use_index = True
168 | 
169 | # If true, the index is split into individual pages for each letter.
170 | #html_split_index = False
171 | 
172 | # If true, links to the reST sources are added to the pages.
173 | #html_show_sourcelink = True
174 | 
175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
176 | #html_show_sphinx = True
177 | 
178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
179 | #html_show_copyright = True
180 | 
181 | # If true, an OpenSearch description file will be output, and all pages will
182 | # contain a <link> tag referring to it.  The value of this option must be the
183 | # base URL from which the finished HTML is served.
184 | #html_use_opensearch = ''
185 | 
186 | # This is the file name suffix for HTML files (e.g. ".xhtml").
187 | #html_file_suffix = None
188 | 
189 | # Language to be used for generating the HTML full-text search index.
190 | # Sphinx supports the following languages:
191 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
192 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
193 | #html_search_language = 'en'
194 | 
195 | # A dictionary with options for the search language support, empty by default.
196 | # 'ja' uses this config value.
197 | # 'zh' user can custom change `jieba` dictionary path.
198 | #html_search_options = {'type': 'default'}
199 | 
200 | # The name of a javascript file (relative to the configuration directory) that
201 | # implements a search results scorer. If empty, the default will be used.
202 | #html_search_scorer = 'scorer.js'
203 | 
204 | # Output file base name for HTML help builder.
205 | htmlhelp_basename = 'collective.elasticsearchdoc'
206 | 
207 | # -- Options for LaTeX output ---------------------------------------------
208 | 
209 | latex_elements = {
210 | # The paper size ('letterpaper' or 'a4paper').
211 | #'papersize': 'letterpaper',
212 | 
213 | # The font size ('10pt', '11pt' or '12pt').
214 | #'pointsize': '10pt',
215 | 
216 | # Additional stuff for the LaTeX preamble.
217 | #'preamble': '',
218 | 
219 | # Latex figure (float) alignment
220 | #'figure_align': 'htbp',
221 | }
222 | 
223 | # Grouping the document tree into LaTeX files. List of tuples
224 | # (source start file, target name, title,
225 | #  author, documentclass [howto, manual, or own class]).
226 | latex_documents = [
227 |   ('index', 'collectiveelasticsearch.tex', u'collective.elasticsearch Documentation',
228 |    u'Nathan Van Gheem', 'manual'),
229 | ]
230 | 
231 | # The name of an image file (relative to this directory) to place at the top of
232 | # the title page.
233 | #latex_logo = None
234 | 
235 | # For "manual" documents, if this is true, then toplevel headings are parts,
236 | # not chapters.
237 | #latex_use_parts = False
238 | 
239 | # If true, show page references after internal links.
240 | #latex_show_pagerefs = False
241 | 
242 | # If true, show URL addresses after external links.
243 | #latex_show_urls = False
244 | 
245 | # Documents to append as an appendix to all manuals.
246 | #latex_appendices = []
247 | 
248 | # If false, no module index is generated.
249 | #latex_domain_indices = True
250 | 
251 | 
252 | # -- Options for manual page output ---------------------------------------
253 | 
254 | # One entry per manual page. List of tuples
255 | # (source start file, name, description, authors, manual section).
256 | man_pages = [
257 |     ('index', 'collectiveelasticsearch', u'collective.elasticsearch Documentation',
258 |      [u'Nathan Van Gheem'], 1)
259 | ]
260 | 
261 | # If true, show URL addresses after external links.
262 | #man_show_urls = False
263 | 
264 | 
265 | # -- Options for Texinfo output -------------------------------------------
266 | 
267 | # Grouping the document tree into Texinfo files. List of tuples
268 | # (source start file, target name, title, author,
269 | #  dir menu entry, description, category)
270 | texinfo_documents = [
271 |   ('index', 'collectiveelasticsearch', u'collective.elasticsearch Documentation',
272 |    u'Nathan Van Gheem', 'collectiveelasticsearch', 'One line description of project.',
273 |    'Miscellaneous'),
274 | ]
275 | 
276 | # Documents to append as an appendix to all manuals.
277 | #texinfo_appendices = []
278 | 
279 | # If false, no module index is generated.
280 | #texinfo_domain_indices = True
281 | 
282 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
283 | #texinfo_show_urls = 'footnote'
284 | 
285 | # If true, do not generate a @detailmenu in the "Top" node's menu.
286 | #texinfo_no_detailmenu = False
287 | 


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | =============
 3 | 
 4 | Basic configuration
 5 | -------------------
 6 | 
 7 | - Goto Control Panel
 8 | - Add "Eleastic Search" in Add-on Products
 9 | - Click "Elastic Search" in "Add-on Configuration"
10 | - Enable
11 | - Click "Convert Catalog"
12 | - Click "Rebuild Catalog"
13 | 
14 | 
15 | Changing the index used for elasticsearch
16 | -----------------------------------------
17 | 
18 | The index used for elasticsearch is the path to the portal_catalog by default. So you don't have anything to do if
19 | you have several plone site on the same instance (the plone site id would be different).
20 | 
21 | However, if you want to use the same elasticsearch instance with several plone instance, you may
22 | end up having conflicts. In that case, you may want to manually set the index used by adding the following code
23 | to the ``__init__.py`` file of your module::
24 | 
25 |     from Products.CMFPlone.CatalogTool import CatalogTool
26 |     from collective.elasticsearch.es import CUSTOM_INDEX_NAME_ATTR
27 | 
28 |     setattr(CatalogTool, CUSTOM_INDEX_NAME_ATTR, "my_elasticsearch_custom_index")
29 | 
30 | 
31 | Adding custom index which are not in the catalog
32 | ------------------------------------------------
33 | 
34 | An adapter is used to define the mapping between the index and the elasticsearch properties. You can override
35 | the _default_mapping attribute to add your own indexes::
36 | 
37 |     <adapter
38 |         factory=".mapping.MyMappingAdapter"
39 |         provides="collective.elasticsearch.interfaces.IMappingProvider"
40 |         for="zope.interface.Interface
41 |              collective.elasticsearch.interfaces.IElasticSearchCatalog"
42 |         layer=".layers.MyLayer" />
43 | 
44 | ::
45 | 
46 |     @implementer(IMappingProvider)
47 |     class MyMappingAdapter(object):
48 | 
49 |         _default_mapping = {
50 |             'SearchableText': {'store': False, 'type': 'text', 'index': True},
51 |             'Title': {'store': False, 'type': 'text', 'index': True},
52 |             'Description': {'store': False, 'type': 'text', 'index': True},
53 |             'MyOwnIndex': {'store': False, 'type': 'text', 'index': True,
54 |         }
55 | 
56 | 
57 | Changing the settings of the index
58 | ----------------------------------
59 | 
60 | If you want to customize your elasticsearch index, you can override the ``get_index_creation_body`` method on the ``MappingAdapter``::
61 | 
62 |     @implementer(IMappingProvider)
63 |     class MyMappingAdapter(object):
64 | 
65 |         def get_index_creation_body(self):
66 |             return {
67 |                 "settings" : {
68 |                     "number_of_shards": 1,
69 |                     "number_of_replicas": 0
70 |                 }
71 |             }
72 | 
73 | 
74 | Changing the query made to elasticsearch
75 | ----------------------------------------
76 | 
77 | The query generation is handled by another adapter::
78 | 
79 |     <adapter
80 |         factory=".query.QueryAssembler"
81 |         provides=".interfaces.IQueryAssembler"
82 |         for="zope.interface.Interface
83 |              .interfaces.IElasticSearchCatalog" />
84 | 
85 | You will have to override the ``__call__`` method to change the query. Look at the original adapter to have a better
86 | idea on what you need to change.
87 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. collective.elasticsearch documentation master file, created by
 2 |    sphinx-quickstart on Mon Mar 13 15:04:25 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to collective.elasticsearch's documentation!
 7 | ====================================================
 8 | 
 9 | Overview
10 | --------
11 | 
12 | This package aims to index all fields the portal_catalog indexes
13 | and allows you to delete the `Title`, `Description` and `SearchableText`
14 | indexes which can provide significant improvement to performance and RAM usage.
15 | 
16 | Then, ElasticSearch queries are ONLY used when Title, Description and SearchableText
17 | text are in the query. Otherwise, the plone's default catalog will be used.
18 | This is because Plone's default catalog is faster on normal queries than using
19 | ElasticSearch.
20 | 
21 | 
22 | Compatibility
23 | -------------
24 | 
25 | Only unit tested with Plone 5 with Dexterity types and archetypes.
26 | 
27 | It should also work with Plone 4.3 and Plone 5.1.
28 | 
29 | Deployed with Elasticsearch 7.6.0
30 | 
31 | State
32 | -----
33 | 
34 | Support for all index column types is done EXCEPT for the DateRecurringIndex
35 | index column type. If you are doing a full text search along with a query that
36 | contains a DateRecurringIndex column, it will not work.
37 | 
38 | 
39 | Celery support
40 | --------------
41 | 
42 | This package comes with Celery support where all indexing operations will be pushed
43 | into celery to be run asynchronously.
44 | 
45 | Please see instructions for collective.celery to see how this works.
46 | 
47 | Contents:
48 | 
49 | .. toctree::
50 |    :maxdepth: 2
51 | 
52 |    install
53 |    config
54 |    history
55 | 
56 | 
57 | 
58 | Indices and tables
59 | ==================
60 | 
61 | * :ref:`genindex`
62 | * :ref:`modindex`
63 | * :ref:`search`
64 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | collective.elasticsearch
 5 | ------------------------
 6 | 
 7 | To install collective.elasticsearch into the global Python environment (or a workingenv),
 8 | using a traditional Zope 2 instance, you can do this:
 9 | 
10 | * When you're reading this you have probably already run
11 |   ``easy_install collective.elasticsearch``. Find out how to install setuptools
12 |   (and EasyInstall) here:
13 |   http://peak.telecommunity.com/DevCenter/EasyInstall
14 | 
15 | * If you are using Zope 2.9 (not 2.10), get `pythonproducts`_ and install it
16 |   via::
17 | 
18 |     python setup.py install --home /path/to/instance
19 | 
20 | into your Zope instance.
21 | 
22 | * Create a file called ``collective.elasticsearch-configure.zcml`` in the
23 |   ``/path/to/instance/etc/package-includes`` directory.  The file
24 |   should only contain this::
25 | 
26 |     <include package="collective.elasticsearch" />
27 | 
28 | .. _pythonproducts: http://plone.org/products/pythonproducts
29 | 
30 | 
31 | Alternatively, if you are using zc.buildout and the plone.recipe.zope2instance
32 | recipe to manage your project, you can do this:
33 | 
34 | * Add ``collective.elasticsearch`` to the list of eggs to install, e.g.::
35 | 
36 |     [buildout]
37 |     ...
38 |     eggs =
39 |         ...
40 |         collective.elasticsearch
41 | 
42 | * Tell the plone.recipe.zope2instance recipe to install a ZCML slug::
43 | 
44 |     [instance]
45 |     recipe = plone.recipe.zope2instance
46 |     ...
47 |     zcml =
48 |         collective.elasticsearch
49 | 
50 | * Re-run buildout, e.g. with::
51 | 
52 |     $ ./bin/buildout
53 | 
54 | You can skip the ZCML slug if you are going to explicitly include the package
55 | from another package's configure.zcml file.
56 | 
57 | elasticsearch
58 | -------------
59 | 
60 | Less than 5 minutes:
61 |     - Download & install Java
62 |     - Download & install Elastic Search
63 |     - bin/elasticsearch
64 | 
65 | Step by Step for Ubuntu:
66 |     - add-apt-repository ppa:webupd8team/java
67 |     - apt-get update
68 |     - apt-get install git curl oracle-java7-installer
69 |     - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0-linux-x86_64.tar.gz
70 |     - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0-linux-x86_64.tar.gz.sha512
71 |     - shasum -a 512 -c elasticsearch-7.6.0-linux-x86_64.tar.gz.sha512 
72 |     - tar -xzf elasticsearch-7.6.0-linux-x86_64.tar.gz
73 |     - cd elasticsearch
74 |     - bin/elasticsearch
75 | 
76 | Step by Step for CentOS/RedHat:
77 |     - yum -y install java-1.8.0-openjdk.x86_64
78 |     - alternatives --auto java
79 |     - curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.0.tar.gz
80 |     - tar xfvz elasticsearch-7.6.0.tar.gz
81 |     - cd elasticsearch
82 |     - bin/elasticsearch
83 | 
84 | Does it work?
85 |     - curl http://localhost:9200/
86 |     - Do you see the Hudsucker Proxy reference? "You Know, for Search"
87 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\collectiveelasticsearch.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\collectiveelasticsearch.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/instance.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # This is a cookiecutter configuration context file for
 3 | #
 4 | #   cookiecutter-zope-instance
 5 | #
 6 | # available options are documented at
 7 | # https://github.com/bluedynamics/cookiecutter-zope-instance/
 8 | 
 9 | default_context:
10 |     debug_mode: true
11 |     verbose_security: true
12 |     wsgi_listen: 0.0.0.0:8080
13 |     initial_user_name: admin
14 |     initial_user_password: admin
15 |     load_zcml:
16 |         package_includes: ['collective.elasticsearch']
17 |     db_storage: direct
18 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 88
 3 | target-version = ['py38']
 4 | include = '\.pyi?$'
 5 | 
 6 | [tool.isort]
 7 | profile = "black"
 8 | force_alphabetical_sort = true
 9 | force_single_line = true
10 | lines_after_imports = 2
11 | line_length = 88
12 | 
13 | [tool.flakeheaven.plugins]
14 | # Disable some checks.
15 | # - E501 line too long
16 | #   flake8 is already testing this, with max-line-length=100000 in .flake8,
17 | #   so pycodestyle should not test it.
18 | # - W503 line break before binary operator
19 | #   Outdated recommendation, see https://www.flake8rules.com/rules/W503.html
20 | mccabe = ["+*"]
21 | pycodestyle = ["+*", "-E501", "-W503"]
22 | pyflakes = ["+*"]
23 | pylint = ["+*"]
24 | 
25 | [tool.plone-code-analysis]
26 | checkers = ["black", "flake8", "isort", "pyroma", "zpretty"]
27 | formatters = ["black", "isort", "zpretty"]
28 | paths = "setup.py src/ scripts/"
29 | 


--------------------------------------------------------------------------------
/scripts/populate.py:
--------------------------------------------------------------------------------
  1 | from AccessControl.SecurityManagement import newSecurityManager
  2 | from AccessControl.SecurityManager import setSecurityPolicy
  3 | from lxml.html import fromstring
  4 | from lxml.html import tostring
  5 | from multiprocessing.pool import ThreadPool as Pool
  6 | from plone import api
  7 | from plone.app.textfield.value import RichTextValue
  8 | from Products.CMFCore.tests.base.security import OmnipotentUser
  9 | from Products.CMFCore.tests.base.security import PermissiveSecurityPolicy
 10 | from Testing.makerequest import makerequest
 11 | from unidecode import unidecode
 12 | from zope.component.hooks import setSite
 13 | 
 14 | import os
 15 | import random
 16 | import requests
 17 | import transaction
 18 | 
 19 | 
 20 | SITE_ID = "Plone"
 21 | 
 22 | 
 23 | def parse_url(url):
 24 |     resp = requests.get(url)
 25 |     return resp.content
 26 | 
 27 | 
 28 | def spoofRequest(app):  # NOQA W0621
 29 |     """
 30 |     Make REQUEST variable to be available on the Zope application server.
 31 | 
 32 |     This allows acquisition to work properly
 33 |     """
 34 |     _policy = PermissiveSecurityPolicy()
 35 |     setSecurityPolicy(_policy)
 36 |     newSecurityManager(None, OmnipotentUser().__of__(app.acl_users))
 37 |     return makerequest(app)
 38 | 
 39 | 
 40 | # Enable Faux HTTP request object
 41 | app = spoofRequest(app)  # noqa
 42 | 
 43 | _dir = os.path.join(os.getcwd(), "src")
 44 | 
 45 | _links = []  # type: list
 46 | _toparse = []  # type: list
 47 | 
 48 | 
 49 | def parse_urls(urls):
 50 |     with Pool(8) as pool:
 51 |         return pool.map(parse_url, urls)
 52 | 
 53 | 
 54 | class DataReader:
 55 |     base_url = "https://en.wikipedia.org"
 56 |     base_content_url = base_url + "/wiki/"
 57 |     start_page = base_content_url + "Main_Page"
 58 |     title_selector = "#firstHeading"
 59 |     content_selector = "#bodyContent"
 60 | 
 61 |     def __init__(self):
 62 |         self.parsed = []
 63 |         self.toparse = [self.start_page]
 64 |         self.toprocess = []
 65 | 
 66 |     def get_content(self, html, selector, text=False):  # NOQA R0201
 67 |         els = html.cssselect(selector)
 68 |         if len(els) > 0:
 69 |             if text:
 70 |                 return unidecode(els[0].text_content())
 71 |             return tostring(els[0])
 72 |         return None
 73 | 
 74 |     def __iter__(self):
 75 |         while len(self.toparse) > 0:
 76 |             if len(self.toprocess) == 0:
 77 |                 toparse = [
 78 |                     self.toparse.pop(0) for _ in range(min(20, len(self.toparse)))
 79 |                 ]
 80 |                 self.toprocess = parse_urls(toparse)
 81 |                 self.parsed.extend(toparse)
 82 |             html = fromstring(self.toprocess.pop(0))
 83 | 
 84 |             # get more links!
 85 |             for el in html.cssselect("a"):
 86 |                 url = el.attrib.get("href", "")
 87 |                 if url.startswith("/"):
 88 |                     url = self.base_url + url
 89 |                 if url.startswith(self.base_content_url) and url not in self.parsed:
 90 |                     self.toparse.append(url)
 91 | 
 92 |             title = self.get_content(html, self.title_selector, text=True)
 93 |             body = self.get_content(html, self.content_selector)
 94 |             if not title or not body:
 95 |                 continue
 96 | 
 97 |             yield {
 98 |                 "title": f"{title}",
 99 |                 "text": RichTextValue(
100 |                     body.decode("utf-8"),
101 |                     mimeType="text/html",
102 |                     outputMimeType="text/x-html-safe",
103 |                 ),
104 |             }
105 | 
106 | 
107 | def importit(app):  # NOQA W0621
108 |     site = app[SITE_ID]
109 |     setSite(site)
110 |     per_folder = 50
111 |     num_folders = 6
112 |     max_depth = 4
113 |     portal_types = ["Document", "News Item", "Event"]
114 |     data = iter(DataReader())
115 | 
116 |     def populate(parent, count=0, depth=0):
117 |         if depth >= max_depth:
118 |             return count
119 |         for fidx in range(num_folders):
120 |             count += 1
121 |             fid = f"folder{fidx}"
122 |             if fid in parent.objectIds():
123 |                 folder = parent[fid]
124 |             else:
125 |                 folder = api.content.create(
126 |                     type="Folder",
127 |                     title=f"Folder {fidx}",
128 |                     id=fid,
129 |                     exclude_from_nav=True,
130 |                     container=parent,
131 |                 )
132 |             for didx in range(per_folder):
133 |                 count += 1
134 |                 pid = f"page{didx}"
135 |                 if pid not in folder.objectIds():
136 |                     payload = next(data)
137 |                     try:
138 |                         api.content.create(
139 |                             type=random.choice(portal_types),
140 |                             id=pid,
141 |                             container=folder,
142 |                             exclude_from_nav=True,
143 |                             **payload,
144 |                         )
145 |                         print("created ", count)
146 |                     except Exception:  # NOQA W0703
147 |                         print("skipping", count)
148 |             print("commiting")
149 |             transaction.commit()
150 |             count = populate(folder, count, depth + 1)
151 |             app._p_jar.cacheMinimize()
152 |         return count
153 | 
154 |     populate(site)
155 | 
156 | 
157 | importit(app)
158 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Installer for the collective.elasticsearch package."""
 2 | from pathlib import Path
 3 | from setuptools import find_packages
 4 | from setuptools import setup
 5 | 
 6 | 
 7 | long_description = f"""
 8 | {Path("README.md").read_text()}\n
 9 | {Path("CHANGELOG.md").read_text()}\n
10 | """
11 | 
12 | 
13 | setup(
14 |     name="collective.elasticsearch",
15 |     version="5.0.1.dev0",
16 |     description="elasticsearch integration with plone",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     # Get more from https://pypi.org/classifiers/
20 |     classifiers=[
21 |         "Development Status :: 5 - Production/Stable",
22 |         "Environment :: Web Environment",
23 |         "Framework :: Plone :: 5.2",
24 |         "Framework :: Plone :: 6.0",
25 |         "Framework :: Plone :: Addon",
26 |         "Framework :: Plone",
27 |         "Framework :: Zope :: 4",
28 |         "Framework :: Zope :: 5",
29 |         "Intended Audience :: System Administrators",
30 |         "License :: OSI Approved :: GNU General Public License (GPL)",
31 |         "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
32 |         "Operating System :: OS Independent",
33 |         "Programming Language :: Python :: 3 :: Only",
34 |         "Programming Language :: Python :: 3.7",
35 |         "Programming Language :: Python :: 3.8",
36 |         "Programming Language :: Python :: 3.9",
37 |         "Programming Language :: Python :: 3.10",
38 |         "Programming Language :: Python",
39 |         "Topic :: Software Development :: Libraries :: Python Modules",
40 |     ],
41 |     keywords="plone elasticsearch search indexing",
42 |     author="Nathan Van Gheem",
43 |     author_email="vangheem@gmail.com",
44 |     url="https://github.com/collective/collective.elasticsearch",
45 |     project_urls={
46 |         "PyPI": "https://pypi.python.org/pypi/collective.elasticsearch",
47 |         "Source": "https://github.com/collective/collective.elasticsearch",
48 |         "Tracker": "https://github.com/collective/collective.elasticsearch/issues",
49 |     },
50 |     license="GPL version 2",
51 |     packages=find_packages("src", exclude=["ez_setup"]),
52 |     namespace_packages=["collective"],
53 |     package_dir={"": "src"},
54 |     include_package_data=True,
55 |     zip_safe=False,
56 |     python_requires=">=3.7",
57 |     install_requires=[
58 |         "setuptools",
59 |         "elasticsearch==7.17.7",
60 |         "plone.app.registry",
61 |         "plone.api",
62 |         "setuptools",
63 |     ],
64 |     extras_require={
65 |         "test": [
66 |             "plone.app.contentrules",
67 |             "plone.app.contenttypes",
68 |             "plone.restapi[test]",
69 |             "plone.app.testing[robot]>=7.0.0a3",
70 |             "plone.app.robotframework[test]>=2.0.0a5",
71 |             "parameterized",
72 |         ],
73 |         "redis": [
74 |             "redis",
75 |             "rq",
76 |             "requests",
77 |             "cbor2",
78 |         ],
79 |     },
80 |     entry_points="""
81 |     [z3c.autoinclude.plugin]
82 |     target = plone
83 |     [plone.autoinclude.plugin]
84 |     target = plone
85 |     """,
86 | )
87 | 


--------------------------------------------------------------------------------
/src/collective/__init__.py:
--------------------------------------------------------------------------------
1 | __import__("pkg_resources").declare_namespace(__name__)
2 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | 
4 | logger = logging.getLogger("collective.elasticsearch")
5 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/browser/__init__.py


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/configure.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:browser="http://namespaces.zope.org/browser"
 4 |     xmlns:zcml="http://namespaces.zope.org/zcml"
 5 |     i18n_domain="collective.elasticsearch"
 6 |     >
 7 | 
 8 |   <include package="plone.app.layout" />
 9 | 
10 |   <browser:page
11 |       name="elastic-controlpanel"
12 |       for="Products.CMFPlone.interfaces.IPloneSiteRoot"
13 |       class=".controlpanel.ElasticControlPanelView"
14 |       permission="cmf.ManagePortal"
15 |       layer="..interfaces.IElasticSearchLayer"
16 |       />
17 | 
18 |   <browser:page
19 |       name="elastic-convert"
20 |       for="Products.CMFPlone.interfaces.basetool.IPloneCatalogTool"
21 |       class=".utilviews.Utils"
22 |       attribute="convert"
23 |       permission="cmf.ManagePortal"
24 |       layer="..interfaces.IElasticSearchLayer"
25 |       />
26 | 
27 |   <browser:page
28 |       name="elastic-rebuild"
29 |       for="Products.CMFPlone.interfaces.basetool.IPloneCatalogTool"
30 |       class=".utilviews.Utils"
31 |       attribute="rebuild"
32 |       permission="cmf.ManagePortal"
33 |       layer="..interfaces.IElasticSearchLayer"
34 |       />
35 | 
36 |   <browser:page
37 |       name="elastic-synchronize"
38 |       for="Products.CMFPlone.interfaces.basetool.IPloneCatalogTool"
39 |       class=".utilviews.Utils"
40 |       attribute="synchronize"
41 |       permission="cmf.ManagePortal"
42 |       layer="..interfaces.IElasticSearchLayer"
43 |       />
44 | 
45 | </configure>
46 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/controlpanel.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.interfaces import IElasticSettings
 2 | from collective.elasticsearch.manager import ElasticSearchManager
 3 | from collective.elasticsearch.utils import is_redis_available
 4 | from elasticsearch.exceptions import ConnectionError as conerror
 5 | from plone import api
 6 | from plone.app.registry.browser.controlpanel import ControlPanelFormWrapper
 7 | from plone.app.registry.browser.controlpanel import RegistryEditForm
 8 | from plone.z3cform import layout
 9 | from Products.Five.browser.pagetemplatefile import ViewPageTemplateFile
10 | from urllib3.exceptions import NewConnectionError
11 | from z3c.form import form
12 | 
13 | 
14 | class ElasticControlPanelForm(RegistryEditForm):
15 |     form.extends(RegistryEditForm)
16 |     schema = IElasticSettings
17 | 
18 |     label = "Elastic Search Settings"
19 | 
20 |     control_panel_view = "@@elastic-controlpanel"
21 | 
22 |     def updateWidgets(self):
23 |         super().updateWidgets()
24 |         if not is_redis_available():
25 |             self.widgets["use_redis"].disabled = "disabled"
26 | 
27 | 
28 | class ElasticControlPanelFormWrapper(ControlPanelFormWrapper):
29 |     index = ViewPageTemplateFile("controlpanel_layout.pt")
30 | 
31 |     def __init__(self, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 |         self.portal_catalog = api.portal.get_tool("portal_catalog")
34 |         self.es = ElasticSearchManager()
35 | 
36 |     @property
37 |     def connection_status(self):
38 |         try:
39 |             return self.es.connection.status()["ok"]
40 |         except conerror:
41 |             return False
42 |         except (
43 |             conerror,
44 |             ConnectionError,
45 |             NewConnectionError,
46 |             ConnectionRefusedError,
47 |             AttributeError,
48 |         ):
49 |             try:
50 |                 health_status = self.es.connection.cluster.health()["status"]
51 |                 return health_status in ("green", "yellow")
52 |             except (
53 |                 conerror,
54 |                 ConnectionError,
55 |                 NewConnectionError,
56 |                 ConnectionRefusedError,
57 |                 AttributeError,
58 |             ):
59 |                 return False
60 | 
61 |     @property
62 |     def es_info(self):
63 |         return self.es.info
64 | 
65 |     @property
66 |     def enabled(self):
67 |         return self.es.enabled
68 | 
69 |     @property
70 |     def active(self):
71 |         return self.es.active
72 | 
73 |     @property
74 |     def enable_data_sync(self):
75 |         if self.es_info:
76 |             info = dict((key, value) for key, value in self.es_info)
77 |             elastic_docs = info["Number of docs"]
78 |             catalog_objs = info["Number of docs (Catalog)"]
79 |             if elastic_docs != catalog_objs:
80 |                 return dict(elastic_docs=elastic_docs, catalog_objs=catalog_objs)
81 |             return False
82 | 
83 | 
84 | ElasticControlPanelView = layout.wrap_form(
85 |     ElasticControlPanelForm, ElasticControlPanelFormWrapper
86 | )
87 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/controlpanel_layout.pt:
--------------------------------------------------------------------------------
  1 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" xmlns:tal="http://xml.zope.org/namespaces/tal"
  2 |     xmlns:metal="http://xml.zope.org/namespaces/metal" xmlns:i18n="http://xml.zope.org/namespaces/i18n" lang="en"
  3 |     metal:use-macro="here/prefs_main_template/macros/master" i18n:domain="plone">
  4 | 
  5 | <body>
  6 |     <div metal:fill-slot="prefs_configlet_main" tal:define="status view/connection_status">
  7 |         <style>
  8 |             #actions .rebuild {
  9 |                 padding: 5px;
 10 |                 border: 1px solid #ccc;
 11 |                 background-color: #f1f1f1;
 12 |             }
 13 |             .card {
 14 |                 position: relative;
 15 |                 display: flex;
 16 |                 flex-direction: column;
 17 |                 min-width: 0;
 18 |                 word-wrap: break-word;
 19 |                 background-color: #fff;
 20 |                 background-clip: border-box;
 21 |                 border: 1px solid rgba(0, 0, 0, .125);
 22 |                 border-radius: .25rem;
 23 |                 margin-bottom: 1em;
 24 |             }
 25 |             .card-header {
 26 |                 padding: .5rem 1rem;
 27 |                 margin-bottom: 0;
 28 |                 background-color: rgba(0, 0, 0, .03);
 29 |                 border-bottom: 1px solid rgba(0, 0, 0, .125);
 30 |             }
 31 |             .card-body {
 32 |                 flex: 1 1 auto;
 33 |                 padding: 1rem 1rem;
 34 |             }
 35 |             .actionsForm  {
 36 |               padding-bottom: 12px
 37 |             }
 38 |             .actionsForm .portalMessage{
 39 |               margin-bottom: 5px;
 40 |             }
 41 |         </style>
 42 |         <a href="${portal_url}/@@overview-controlpanel" id="setup-link" class="link-parent" i18n:translate="">
 43 |             Site Setup
 44 |         </a> &rsaquo;
 45 |         <h1 class="documentFirstHeading">${view/label}</h1>
 46 |         <div class="portalMessage warning" role="status" tal:condition="python: not status">
 47 |             <strong>Warning</strong>
 48 |             Could not connect to specified elastic search server.
 49 |         </div>
 50 |         <div id="layout-contents">
 51 |             <span tal:replace="structure view/contents" />
 52 |         </div>
 53 |         <tal:el tal:condition="view/enabled">
 54 |             <div class="card" tal:define="purl string:${view/portal_catalog/absolute_url}" tal:condition="status">
 55 |                 <div class="card-header">
 56 |                     Actions
 57 |                 </div>
 58 |                 <div class="card-body">
 59 |                     <div class="col-xs-12 col-sm-12">
 60 |                         <form class="actionsForm" method="POST" action="${purl}/@@elastic-convert">
 61 |                             <span tal:replace="structure context/@@authenticator/authenticator" />
 62 |                             <p class="formHelp">
 63 |                               The <b>"Convert Catalog"</b> action will create an index on elasticsearch, if it does not exist, and will map all indexes in the catalog.
 64 |                             </p>
 65 |                             <button type="submit" name="convert" class="btn btn-primary">Convert Catalog</button>
 66 |                         </form>
 67 |                     </div>
 68 |                     <div class="col-xs-12 col-sm-12"
 69 |                         tal:define="enable_data_sync view/enable_data_sync"
 70 |                         tal:condition="enable_data_sync">
 71 |                         <form class="actionsForm" method="POST" action="${purl}/@@elastic-synchronize">
 72 |                           <span tal:replace="structure context/@@authenticator/authenticator" />
 73 |                           <p class="formHelp">
 74 |                             The <b>"Synchronize"</b> action synchronizes the elasticsearch data with the catalog data. Currently there are <b>${enable_data_sync/elastic_docs}</b> documents indexed in elasticsearch and <b>${enable_data_sync/catalog_objs}</b> objects indexed in catalog.
 75 |                           </p>
 76 |                           <div class="portalMessage warning" role="status">
 77 |                             <strong>Warning</strong> Could take a very long time. Use carefully.
 78 |                           </div>
 79 |                           <button type="submit" name="synchronize" class="btn btn-primary">Synchronize</button>
 80 |                         </form>
 81 |                     </div>
 82 |                     <div class="col-xs-12 col-sm-12" tal:condition="view/active">
 83 |                         <form class="actionsForm" method="POST" class="rebuild" action="${purl}/@@elastic-rebuild">
 84 |                             <span tal:replace="structure context/@@authenticator/authenticator" />
 85 |                             <p class="formHelp">
 86 |                               The <b>"Rebuild Catalog"</b> action clears the catalog and indexes all objects with an 'indexObject' method. This may take a long time.
 87 |                             </p>
 88 |                             <div class="portalMessage warning" role="status">
 89 |                               <strong>Warning</strong>
 90 |                               Could take a very long time. Use carefully.
 91 |                             </div>
 92 |                             <button type="submit" name="convert" class="btn btn-danger">Rebuild Catalog</button>
 93 |                         </form>
 94 |                     </div>
 95 |                 </div>
 96 |             </div>
 97 |             <div tal:condition="status" id="status" tal:define="shards status/_shards/total|python:0;">
 98 |                 <table class="listing table table-stripped">
 99 |                     <thead>
100 |                         <th colspan="2" class="table-primary">
101 |                             Server status
102 |                         </th>
103 |                     </thead>
104 |                     <tbody>
105 |                         <tr tal:repeat="data view/es_info">
106 |                             <td>${python: data[0]}</td>
107 |                             <td>${python: data[1]}</td>
108 |                         </tr>
109 |                     </tbody>
110 |                 </table>
111 |             </div>
112 |         </tal:el>
113 |     </div>
114 | </body>
115 | 
116 | </html>
117 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/search.py:
--------------------------------------------------------------------------------
1 | from Products.CMFPlone.browser import search
2 | 
3 | 
4 | class Search(search.Search):
5 |     def munge_search_term(self, q):  # NOQA R0201
6 |         # We don't want to munge search terms for
7 |         # EL
8 |         return q
9 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/browser/utilviews.py:
--------------------------------------------------------------------------------
  1 | from AccessControl import Unauthorized
  2 | from Acquisition import aq_parent
  3 | from collective.elasticsearch.manager import ElasticSearchManager
  4 | from elasticsearch.exceptions import NotFoundError
  5 | from elasticsearch.helpers import scan
  6 | from plone import api
  7 | from Products.CMFCore.indexing import processQueue
  8 | from Products.Five.browser import BrowserView
  9 | from zope.component import getMultiAdapter
 10 | 
 11 | import logging
 12 | import time
 13 | import transaction
 14 | 
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class Utils(BrowserView):
 20 |     def __init__(self, context, request):
 21 |         self.context = context
 22 |         self.request = request
 23 |         self._count_index_object = 0
 24 |         self._count_del_doc_elasticsearch = 0
 25 | 
 26 |     def convert(self):
 27 |         if self.request.method == "POST":
 28 |             authenticator = getMultiAdapter(
 29 |                 (self.context, self.request), name="authenticator"
 30 |             )
 31 |             if not authenticator.verify():
 32 |                 raise Unauthorized
 33 | 
 34 |             self._es._convert_catalog_to_elastic()
 35 |         site = aq_parent(self.context)
 36 |         self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel")
 37 | 
 38 |     def rebuild(self):
 39 |         if self.request.method == "POST":
 40 |             authenticator = getMultiAdapter(
 41 |                 (self.context, self.request), name="authenticator"
 42 |             )
 43 |             if not authenticator.verify():
 44 |                 raise Unauthorized
 45 | 
 46 |             self.context.manage_catalogRebuild()
 47 | 
 48 |         site = aq_parent(self.context)
 49 |         self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel")
 50 | 
 51 |     def synchronize(self):
 52 |         if self.request.method == "POST":
 53 |             authenticator = getMultiAdapter(
 54 |                 (self.context, self.request), name="authenticator"
 55 |             )
 56 |             if not authenticator.verify():
 57 |                 raise Unauthorized
 58 |             uids_catalog = set(self._uids_catalog)
 59 |             uids_elasticsearch = set(self._uids_elasticsearch)
 60 |             uids_not_in_elasticsearch = uids_catalog.difference(uids_elasticsearch)
 61 |             logger.info(
 62 |                 (
 63 |                     f"{len(uids_not_in_elasticsearch)} "
 64 |                     f"non-indexed objects in elasticsearch"
 65 |                 )
 66 |             )
 67 |             uids_not_in_catalog = uids_elasticsearch.difference(uids_catalog)
 68 |             logger.info(
 69 |                 (f"{len(uids_not_in_catalog)} documents " f"not found in the catalog.")
 70 |             )
 71 |             self._index_object_in_elasticsearch(uids_not_in_elasticsearch)
 72 |             self._delete_document_elasticsearch(uids_not_in_catalog)
 73 |             message = (
 74 |                 f"Indexed objects: {self._count_index_object} "
 75 |                 f"Documents deleted: {self._count_del_doc_elasticsearch}"
 76 |             )
 77 |             logger.info(message)
 78 |         site = aq_parent(self.context)
 79 |         self.request.response.redirect(f"{site.absolute_url()}/@@elastic-controlpanel")
 80 | 
 81 |     @property
 82 |     def _es(self):
 83 |         return ElasticSearchManager()
 84 | 
 85 |     @property
 86 |     def _es_conn(self):
 87 |         return self._es.connection
 88 | 
 89 |     @property
 90 |     def _uids_catalog(self):
 91 |         logger.info("Fetching all uids indexed in the catalog...")
 92 |         uids = self.context.portal_catalog.uniqueValuesFor("UID")
 93 |         logger.info(f"Found {len(uids)} uids")
 94 |         return uids
 95 | 
 96 |     @property
 97 |     def _uids_elasticsearch(self):
 98 |         query = {"query": {"match_all": {}}, "_source": ["UID"]}
 99 |         items = scan(
100 |             self._es_conn,
101 |             index=self._es.index_name,
102 |             query=query,
103 |             preserve_order=True,
104 |             size=10000,
105 |         )
106 |         logger.info("Fetching all indexed uids in elasticsearch...")
107 |         uids = [item["_id"] for item in items]
108 |         logger.info(f"Found {len(uids)} uids")
109 |         return uids
110 | 
111 |     def _index_object_in_elasticsearch(self, uids):
112 |         amount = len(uids)
113 |         for index, uid in enumerate(uids):
114 |             obj = api.content.get(UID=uid)
115 |             obj.indexObject()
116 |             self._count_index_object += 1
117 |             logging.info("indexObject: %s", "/".join(obj.getPhysicalPath()))
118 |             if index % self._es.bulk_size == 0:
119 |                 # Force indexing in ES
120 |                 self.commit(wait=1)
121 |                 logger.info("COMMIT: %s/%s", index, amount - 1)
122 |         self.commit(wait=1)
123 | 
124 |     def _delete_document_elasticsearch(self, uids):
125 |         conn = self._es_conn
126 |         amount = len(uids)
127 |         for index, uid in enumerate(uids):
128 |             try:
129 |                 conn.delete(index=self._es.index_name, id=uid)
130 |                 self._count_del_doc_elasticsearch += 1
131 |                 logging.info("delete doc: %s", uid)
132 |             except NotFoundError:
133 |                 continue
134 |             if index % self._es.bulk_size == 0:
135 |                 # Force indexing in ES
136 |                 self.commit(wait=1)
137 |                 logger.info("COMMIT: %s/%s", index, amount - 1)
138 |         self.commit(wait=1)
139 | 
140 |     def commit(self, wait: int = 0):
141 |         processQueue()
142 |         transaction.commit()
143 |         self._es.flush_indices()
144 |         if wait:
145 |             time.sleep(wait)
146 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/configure.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:five="http://namespaces.zope.org/five"
 4 |     xmlns:genericsetup="http://namespaces.zope.org/genericsetup"
 5 |     xmlns:monkey="http://namespaces.plone.org/monkey"
 6 |     xmlns:zcml="http://namespaces.zope.org/zcml"
 7 |     i18n_domain="collective.elasticsearch"
 8 |     >
 9 | 
10 |   <include package="plone.app.registry" />
11 |   <include package="plone.restapi" />
12 | 
13 |   <include package=".browser" />
14 |   <include package=".patches" />
15 |   <include package=".services" />
16 | 
17 |   <configure zcml:condition="installed redis">
18 |     <configure zcml:condition="installed rq">
19 |       <include package=".redis" />
20 |     </configure>
21 |   </configure>
22 | 
23 |   <include file="profiles.zcml" />
24 | 
25 |   <adapter
26 |       factory=".mapping.MappingAdapter"
27 |       provides=".interfaces.IMappingProvider"
28 |       for="zope.interface.Interface
29 |            .interfaces.IElasticSearchManager"
30 |       />
31 |   <adapter
32 |       factory=".query.QueryAssembler"
33 |       provides=".interfaces.IQueryAssembler"
34 |       for="zope.interface.Interface
35 |            .interfaces.IElasticSearchManager"
36 |       />
37 | 
38 |   <utility
39 |       factory=".queueprocessor.IndexProcessor"
40 |       name="elasticsearch"
41 |       />
42 | 
43 | </configure>
44 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/indexes.py:
--------------------------------------------------------------------------------
  1 | from Acquisition import aq_base
  2 | from Acquisition import aq_parent
  3 | from collective.elasticsearch import logger
  4 | from datetime import date
  5 | from datetime import datetime
  6 | from DateTime import DateTime
  7 | from Missing import MV
  8 | from plone.folder.nogopip import GopipIndex
  9 | from Products.ExtendedPathIndex.ExtendedPathIndex import ExtendedPathIndex
 10 | from Products.PluginIndexes.BooleanIndex.BooleanIndex import BooleanIndex
 11 | from Products.PluginIndexes.DateIndex.DateIndex import DateIndex
 12 | from Products.PluginIndexes.DateRangeIndex.DateRangeIndex import DateRangeIndex
 13 | from Products.PluginIndexes.FieldIndex.FieldIndex import FieldIndex
 14 | from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex
 15 | from Products.PluginIndexes.util import safe_callable
 16 | from Products.PluginIndexes.UUIDIndex.UUIDIndex import UUIDIndex
 17 | from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
 18 | 
 19 | 
 20 | def _one(val):
 21 |     """
 22 |     if list, return first
 23 |     otherwise, return value
 24 |     """
 25 |     if isinstance(val, (list, set, tuple)):
 26 |         return val[0]
 27 |     return val
 28 | 
 29 | 
 30 | def _zdt(val):
 31 |     if isinstance(val, datetime):
 32 |         val = DateTime(val)
 33 |     elif isinstance(val, date):
 34 |         val = DateTime(datetime.fromordinal(val.toordinal()))
 35 |     elif isinstance(val, str):
 36 |         val = DateTime(val)
 37 |     return val
 38 | 
 39 | 
 40 | keyword_fields = (
 41 |     "allowedRolesAndUsers",
 42 |     "portal_type",
 43 |     "object_provides",
 44 |     "Type",
 45 |     "id",
 46 |     "cmf_uid",
 47 |     "sync_uid",
 48 |     "getId",
 49 |     "meta_type",
 50 |     "review_state",
 51 |     "in_reply_to",
 52 |     "UID",
 53 |     "getRawRelatedItems",
 54 |     "Subject",
 55 |     "sortable_title",
 56 | )
 57 | 
 58 | 
 59 | class BaseIndex:
 60 |     filter_query = True
 61 | 
 62 |     def __init__(self, catalog, index):
 63 |         self.catalog = catalog
 64 |         self.index = index
 65 | 
 66 |     def create_mapping(self, name):  # NOQA R0201
 67 |         if name in keyword_fields:
 68 |             return {"type": "keyword", "index": True, "store": True}
 69 |         return {"type": "text", "index": True, "store": False}
 70 | 
 71 |     def get_value(self, obj):
 72 |         value = None
 73 |         attrs = self.index.getIndexSourceNames()
 74 |         if len(attrs) > 0:
 75 |             attr = attrs[0]
 76 |         else:
 77 |             attr = ""
 78 |         if hasattr(self.index, "index_object"):
 79 |             value = self.index._get_object_datum(obj, attr)
 80 |         else:
 81 |             logger.info(f"catalogObject was passed bad index object {self.index}.")
 82 |         if value == MV:
 83 |             return None
 84 |         return value
 85 | 
 86 |     def extract(self, name, data):  # NOQA R0201
 87 |         return data[name] or ""
 88 | 
 89 |     def _normalize_query(self, query):  # NOQA R0201
 90 |         if isinstance(query, dict) and "query" in query:
 91 |             return query["query"]
 92 |         return query
 93 | 
 94 |     def get_query(self, name, value):
 95 |         value = self._normalize_query(value)
 96 |         if value in (None, ""):
 97 |             return None
 98 |         if isinstance(value, (list, tuple, set)):
 99 |             if len(value) == 0:
100 |                 return None
101 |             return {"terms": {name: value}}
102 |         if isinstance(value, dict) and "not" in value:
103 |             if isinstance(value["not"], (list, tuple, set)):
104 |                 return {
105 |                     "bool": {"must_not": [{"term": {name: i}} for i in value["not"]]}
106 |                 }
107 |             return {"bool": {"must_not": [{"term": {name: value["not"]}}]}}
108 |         return {"term": {name: value}}
109 | 
110 | 
111 | class EKeywordIndex(BaseIndex):
112 |     def extract(self, name, data):
113 |         return data[name] or []
114 | 
115 | 
116 | class EFieldIndex(BaseIndex):
117 |     pass
118 | 
119 | 
120 | class EDateIndex(BaseIndex):
121 |     """
122 |     XXX elastic search requires default
123 |     value for searching. This could be a problem...
124 |     """
125 | 
126 |     missing_date = DateTime("1900/01/01")
127 | 
128 |     def create_mapping(self, name):
129 |         return {"type": "date", "store": True}
130 | 
131 |     def get_value(self, obj):
132 |         value = super().get_value(obj)
133 |         if isinstance(value, list):
134 |             if len(value) == 0:
135 |                 value = None
136 |             else:
137 |                 value = value[0]
138 |         if value in ("None", MV, None, ""):
139 |             value = self.missing_date
140 |         if isinstance(value, str):
141 |             return DateTime(value).ISO8601()
142 |         if isinstance(value, DateTime):
143 |             return value.ISO8601()
144 |         return value
145 | 
146 |     def get_query(self, name, value):
147 |         range_ = value.get("range")
148 |         query = value.get("query")
149 |         if query is None:
150 |             return None
151 |         if range_ is None:
152 |             if type(query) in (list, tuple):
153 |                 range_ = "min"
154 | 
155 |         first = _zdt(_one(query)).ISO8601()
156 |         if range_ == "min":
157 |             return {"range": {name: {"gte": first}}}
158 |         if range_ == "max":
159 |             return {"range": {name: {"lte": first}}}
160 |         if (
161 |             range_ in ("min:max", "minmax")
162 |             and (type(query) in (list, tuple))
163 |             and len(query) == 2
164 |         ):
165 |             return {"range": {name: {"gte": first, "lte": _zdt(query[1]).ISO8601()}}}
166 |         return None
167 | 
168 |     def extract(self, name, data):
169 |         try:
170 |             return DateTime(super().extract(name, data))
171 |         except Exception:  # NOQA W0703
172 |             return None
173 | 
174 | 
175 | class EZCTextIndex(BaseIndex):
176 |     filter_query = False
177 | 
178 |     def create_mapping(self, name):
179 |         return {"type": "text", "index": True, "store": False}
180 | 
181 |     def get_value(self, obj):
182 |         try:
183 |             fields = self.index._indexed_attrs
184 |         except Exception:  # NOQA W0703
185 |             fields = [self.index._fieldname]
186 |         all_texts = []
187 |         for attr in fields:
188 |             text = getattr(obj, attr, None)
189 |             if text is None:
190 |                 continue
191 |             if safe_callable(text):
192 |                 text = text()
193 |             if text is None:
194 |                 continue
195 |             if text:
196 |                 if isinstance(
197 |                     text,
198 |                     (
199 |                         list,
200 |                         tuple,
201 |                     ),
202 |                 ):
203 |                     all_texts.extend(text)
204 |                 else:
205 |                     all_texts.append(text)
206 |         # Check that we're sending only strings
207 |         all_texts = filter(lambda text: isinstance(text, str), all_texts)
208 |         if all_texts:
209 |             return "\n".join(all_texts)
210 |         return None
211 | 
212 |     def get_query(self, name, value):
213 |         value = self._normalize_query(value)
214 |         # ES doesn't care about * like zope catalog does
215 |         clean_value = value.strip("*") if value else ""
216 |         queries = [{"match_phrase": {name: {"query": clean_value, "slop": 2}}}]
217 |         if name in ("Title", "SearchableText"):
218 |             # titles have most importance... we override here...
219 |             queries.append(
220 |                 {"match_phrase_prefix": {"Title": {"query": clean_value, "boost": 2}}}
221 |             )
222 |         if name != "Title":
223 |             queries.append({"match": {name: {"query": clean_value}}})
224 | 
225 |         return queries
226 | 
227 | 
228 | class EBooleanIndex(BaseIndex):
229 |     def create_mapping(self, name):
230 |         return {"type": "boolean"}
231 | 
232 | 
233 | class EUUIDIndex(BaseIndex):
234 |     pass
235 | 
236 | 
237 | class EExtendedPathIndex(BaseIndex):
238 |     filter_query = True
239 | 
240 |     def create_mapping(self, name):
241 |         return {
242 |             "properties": {
243 |                 "path": {"type": "keyword", "index": True, "store": True},
244 |                 "depth": {"type": "integer", "store": True},
245 |             }
246 |         }
247 | 
248 |     def get_value(self, obj):
249 |         attrs = self.index.indexed_attrs
250 |         index = self.index.id if attrs is None else attrs[0]
251 |         path = getattr(obj, index, None)
252 |         if path is not None:
253 |             if safe_callable(path):
254 |                 path = path()
255 |             if not isinstance(path, (str, tuple)):
256 |                 raise TypeError(
257 |                     f"path value must be string or tuple of "
258 |                     f"strings: ({index}, {repr(path)})"
259 |                 )
260 |         else:
261 |             try:
262 |                 path = obj.getPhysicalPath()
263 |             except AttributeError:
264 |                 return None
265 |         return {"path": "/".join(path), "depth": len(path) - 1}
266 | 
267 |     def extract(self, name, data):
268 |         return data[name]["path"]
269 | 
270 |     def get_query(self, name, value):
271 |         if isinstance(value, str):
272 |             paths = value
273 |             depth = -1
274 |             navtree = False
275 |             navtree_start = 0
276 |         else:
277 |             depth = value.get("depth", -1)
278 |             paths = value.get("query")
279 |             navtree = value.get("navtree", False)
280 |             navtree_start = value.get("navtree_start", 0)
281 |         if not paths:
282 |             return None
283 |         if isinstance(paths, str):
284 |             paths = [paths]
285 |         andfilters = []
286 |         for path in paths:
287 |             spath = path.split("/")
288 |             gtcompare = "gt"
289 |             start = len(spath) - 1
290 |             if navtree:
291 |                 start = start + navtree_start
292 |                 end = navtree_start + depth
293 |             else:
294 |                 end = start + depth
295 |             if navtree or depth == -1:
296 |                 gtcompare = "gte"
297 |             filters = []
298 |             if depth == 0:
299 |                 andfilters.append(
300 |                     {"bool": {"filter": {"term": {f"{name}.path": path}}}}
301 |                 )
302 |                 continue
303 |             filters = [
304 |                 {"prefix": {f"{name}.path": path}},
305 |                 {"range": {f"{name}.depth": {gtcompare: start}}},
306 |             ]
307 |             if depth != -1:
308 |                 filters.append({"range": {f"{name}.depth": {"lte": end}}})
309 |             andfilters.append({"bool": {"must": filters}})
310 |         if len(andfilters) > 1:
311 |             return {"bool": {"should": andfilters}}
312 |         return andfilters[0]
313 | 
314 | 
315 | class EGopipIndex(BaseIndex):
316 |     def create_mapping(self, name):
317 |         return {"type": "integer", "store": True}
318 | 
319 |     def get_value(self, obj):
320 |         parent = aq_parent(obj)
321 |         if hasattr(parent, "getObjectPosition"):
322 |             return parent.getObjectPosition(obj.getId())
323 |         return None
324 | 
325 | 
326 | class EDateRangeIndex(BaseIndex):
327 |     def create_mapping(self, name):
328 |         return {
329 |             "properties": {
330 |                 f"{name}1": {"type": "date", "store": True},
331 |                 f"{name}2": {"type": "date", "store": True},
332 |             }
333 |         }
334 | 
335 |     def get_value(self, obj):
336 |         if self.index._since_field is None:
337 |             return None
338 |         since = getattr(obj, self.index._since_field, None)
339 |         if safe_callable(since):
340 |             since = since()
341 |         until = getattr(obj, self.index._until_field, None)
342 |         if safe_callable(until):
343 |             until = until()
344 |         if not since or not until:
345 |             return None
346 |         return {
347 |             f"{self.index.id}1": since.ISO8601(),
348 |             f"{self.index.id}2": until.ISO8601(),
349 |         }
350 | 
351 |     def get_query(self, name, value):
352 |         value = self._normalize_query(value)
353 |         date_iso = value.ISO8601()
354 |         return [
355 |             {"range": {f"{name}.{name}1": {"lte": date_iso}}},
356 |             {"range": {f"{name}.{name}2": {"gte": date_iso}}},
357 |         ]
358 | 
359 | 
360 | class ERecurringIndex(EDateIndex):
361 |     pass
362 | 
363 | 
364 | INDEX_MAPPING = {
365 |     KeywordIndex: EKeywordIndex,
366 |     FieldIndex: EFieldIndex,
367 |     DateIndex: EDateIndex,
368 |     ZCTextIndex: EZCTextIndex,
369 |     BooleanIndex: EBooleanIndex,
370 |     UUIDIndex: EUUIDIndex,
371 |     ExtendedPathIndex: EExtendedPathIndex,
372 |     GopipIndex: EGopipIndex,
373 |     DateRangeIndex: EDateRangeIndex,
374 | }
375 | 
376 | try:
377 |     from Products.DateRecurringIndex.index import DateRecurringIndex  # NOQA C0412
378 | 
379 |     INDEX_MAPPING[DateRecurringIndex] = ERecurringIndex
380 | except ImportError:
381 |     pass
382 | 
383 | 
384 | def getIndex(catalog, name):
385 |     catalog = getattr(catalog, "_catalog", catalog)
386 |     try:
387 |         index = aq_base(catalog.getIndex(name))
388 |     except KeyError:
389 |         return None
390 |     index_type = type(index)
391 |     if index_type in INDEX_MAPPING:
392 |         return INDEX_MAPPING[index_type](catalog, index)
393 |     return None
394 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/interfaces.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from Products.CMFCore.interfaces import IIndexQueueProcessor
  3 | from typing import Dict
  4 | from typing import List
  5 | from typing import Tuple
  6 | from zope import schema
  7 | from zope.interface import Interface
  8 | 
  9 | 
 10 | class IElasticSearchLayer(Interface):
 11 |     pass
 12 | 
 13 | 
 14 | class IElasticSearchManager(Interface):
 15 |     pass
 16 | 
 17 | 
 18 | class IMappingProvider(Interface):
 19 |     def get_index_creation_body():  # NOQA E0211
 20 |         pass
 21 | 
 22 |     def __call__():  # NOQA E0211
 23 |         pass
 24 | 
 25 | 
 26 | class IAdditionalIndexDataProvider(Interface):
 27 |     def __call__():  # NOQA E0211
 28 |         pass
 29 | 
 30 | 
 31 | class IReindexActive(Interface):
 32 |     pass
 33 | 
 34 | 
 35 | class IQueryAssembler(Interface):
 36 |     def normalize(query):  # NOQA E0213
 37 |         pass
 38 | 
 39 |     def __call__(query):  # NOQA E0213
 40 |         pass
 41 | 
 42 | 
 43 | class IElasticSettings(Interface):
 44 | 
 45 |     enabled = schema.Bool(title="Enabled", default=False, required=False)
 46 | 
 47 |     use_redis = schema.Bool(
 48 |         title="Use redis as queue",
 49 |         description=(
 50 |             "You can enable this option if you have installed redis, "
 51 |             "set the necessary env variables and started a worker."
 52 |             "Please check the README for more informations"
 53 |         ),
 54 |         default=False,
 55 |         required=False,
 56 |     )
 57 | 
 58 |     hosts = schema.List(
 59 |         title="Hosts",
 60 |         default=["127.0.0.1"],
 61 |         unique=True,
 62 |         value_type=schema.TextLine(title="Host"),
 63 |     )
 64 | 
 65 |     es_only_indexes = schema.Set(
 66 |         title="Indexes for which all searches are done through ElasticSearch",
 67 |         default={"Title", "Description", "SearchableText"},
 68 |         value_type=schema.TextLine(title="Index"),
 69 |     )
 70 | 
 71 |     sniff_on_start = schema.Bool(title="Sniff on start", default=False, required=False)
 72 | 
 73 |     sniff_on_connection_fail = schema.Bool(
 74 |         title="Sniff on connection fail", default=False, required=False
 75 |     )
 76 | 
 77 |     sniffer_timeout = schema.Float(
 78 |         title="Sniffer timeout", required=False, default=None
 79 |     )
 80 | 
 81 |     retry_on_timeout = schema.Bool(
 82 |         title="Retry on timeout", default=True, required=False
 83 |     )
 84 | 
 85 |     timeout = schema.Float(
 86 |         title="Read timeout",
 87 |         description="how long before timeout connecting to elastic search",
 88 |         default=2.0,
 89 |     )
 90 | 
 91 |     bulk_size = schema.Int(
 92 |         title="Bulk Size", description="bulk size for elastic queries", default=50
 93 |     )
 94 | 
 95 |     highlight = schema.Bool(
 96 |         title="Enable Search Highlight",
 97 |         description="Use elasticsearch highlight feature instead of descriptions in search results",
 98 |         default=False,
 99 |         required=False,
100 |     )
101 | 
102 |     highlight_threshold = schema.Int(
103 |         title="Highlight Threshold",
104 |         description="Number of highlighted characters to display in search results descriptions",
105 |         default=600,
106 |         required=False,
107 |     )
108 | 
109 |     highlight_pre_tags = schema.Text(
110 |         title="Highlight pre tags",
111 |         description='Used with highlight post tags to wrap matching words. e.g. &lt;pre class="highlight"&gt;. One tag per line',
112 |         default="",
113 |         required=False,
114 |     )
115 | 
116 |     highlight_post_tags = schema.Text(
117 |         title="Higlight post tags",
118 |         description="Used with highlight pre tags to wrap matching words. e.g. &lt;/pre&gt; One tag per line",
119 |         default="",
120 |         required=False,
121 |     )
122 | 
123 |     raise_search_exception = schema.Bool(
124 |         title="Raise Search Exceptions",
125 |         description="If there is an error with elastic search Plone will default to trying the old catalog search. Set this to true to raise the error instead.",
126 |         default=False,
127 |         required=False,
128 |     )
129 | 
130 | 
131 | class IElasticSearchIndexQueueProcessor(IIndexQueueProcessor):
132 |     """Index queue processor for elasticsearch."""
133 | 
134 | 
135 | @dataclass
136 | class IndexingActions:
137 | 
138 |     index: Dict[str, dict]
139 |     reindex: Dict[str, dict]
140 |     unindex: Dict[str, dict]
141 |     index_blobs: Dict[str, dict]
142 |     uuid_path: Dict[str, str]
143 | 
144 |     def __len__(self):
145 |         size = 0
146 |         size += len(self.index)
147 |         size += len(self.reindex)
148 |         size += len(self.unindex)
149 |         return size
150 | 
151 |     def all(self) -> List[Tuple[str, str, Dict]]:
152 |         all_data = []
153 |         for attr, action in (
154 |             ("index", "index"),
155 |             ("reindex", "update"),
156 |             ("unindex", "delete"),
157 |         ):
158 |             action_data = [
159 |                 (uuid, data) for uuid, data in getattr(self, attr, {}).items()
160 |             ]
161 |             if action_data:
162 |                 all_data.extend([(action, uuid, data) for uuid, data in action_data])
163 |         return all_data
164 | 
165 |     def all_blob_actions(self):
166 |         return [(uuid, data) for uuid, data in getattr(self, "index_blobs", {}).items()]
167 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/local.py:
--------------------------------------------------------------------------------
 1 | from threading import local
 2 | 
 3 | 
 4 | # a thread-local object holding data for the queue
 5 | localData = local()
 6 | marker = []
 7 | 
 8 | 
 9 | def get_local(name, factory=lambda: None):
10 |     """get named thread-local value and optionally initialize it"""
11 |     value = getattr(localData, name, marker)
12 |     if value is marker:
13 |         value = factory()
14 |         set_local(name, value)
15 |     return value
16 | 
17 | 
18 | def set_local(name, value):
19 |     """set a value for the named thread-local variable"""
20 |     setattr(localData, name, value)
21 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/mapping.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.indexes import getIndex
 2 | from collective.elasticsearch.interfaces import IMappingProvider
 3 | from zope.interface import implementer
 4 | 
 5 | 
 6 | @implementer(IMappingProvider)
 7 | class MappingAdapter:
 8 |     _default_mapping = {
 9 |         "SearchableText": {"store": False, "type": "text", "index": True},
10 |         "Title": {"store": True, "type": "text", "index": True},
11 |         "Description": {"store": True, "type": "text", "index": True},
12 |         "allowedRolesAndUsers": {"store": True, "type": "keyword", "index": True},
13 |         "portal_type": {"store": True, "type": "keyword", "index": True},
14 |     }
15 | 
16 |     _search_attributes = [
17 |         "Title",
18 |         "Description",
19 |         "Subject",
20 |         "contentType",
21 |         "created",
22 |         "modified",
23 |         "effective",
24 |         "hasImage",
25 |         "is_folderish",
26 |         "portal_type",
27 |         "review_state",
28 |         "path.path",
29 |     ]
30 | 
31 |     def __init__(self, request, manager):
32 |         self.request = request
33 |         self.manager = manager
34 |         self.catalog = manager.catalog._catalog
35 | 
36 |     def get_index_creation_body(self):  # NOQA E0211
37 |         return {}
38 | 
39 |     def __call__(self):
40 |         manager = self.manager
41 |         properties = self._default_mapping.copy()
42 |         for name in self.catalog.indexes.keys():
43 |             index = getIndex(self.catalog, name)
44 |             if index is not None:
45 |                 properties[name] = index.create_mapping(name)
46 |             else:
47 |                 raise Exception(f"Can not locate index for {name}")
48 | 
49 |         conn = manager.connection
50 |         index_name = manager.index_name
51 |         if conn.indices.exists(index_name):
52 |             # created BEFORE we started creating this as aliases to versions,
53 |             # we can't go anywhere from here beside try updating...
54 |             pass
55 |         else:
56 |             if not manager.index_version:
57 |                 # need to initialize version value
58 |                 manager._bump_index_version()
59 |             index_name_v = f"{index_name}_{manager.index_version}"
60 |             if not conn.indices.exists(index_name_v):
61 |                 conn.indices.create(index_name_v, body=self.get_index_creation_body())
62 |             if not conn.indices.exists_alias(name=index_name):
63 |                 conn.indices.put_alias(index=index_name_v, name=index_name)
64 | 
65 |         for key in properties:
66 |             if key in self._search_attributes:
67 |                 properties[key]["store"] = True
68 | 
69 |         properties["attachments"] = {
70 |             "properties": {
71 |                 "data": {"type": "binary"},
72 |                 "filename": {"type": "text"},
73 |                 "fieldname": {"type": "text"},
74 |             }
75 |         }
76 | 
77 |         return {"properties": properties}
78 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/patches/__init__.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch import interfaces
  2 | from collective.elasticsearch.manager import ElasticSearchManager
  3 | from collective.elasticsearch.utils import get_brain_from_path
  4 | from plone.folder.interfaces import IOrdering
  5 | from Products.CMFCore.indexing import processQueue
  6 | from Products.CMFCore.interfaces import IContentish
  7 | from time import process_time
  8 | from zope.globalrequest import getRequest
  9 | from zope.interface import alsoProvides
 10 | from zope.interface import noLongerProvides
 11 | 
 12 | import time
 13 | import urllib
 14 | 
 15 | 
 16 | def unrestrictedSearchResults(self, REQUEST=None, **kw):
 17 |     manager = ElasticSearchManager()
 18 |     active = manager.active
 19 |     method = manager.search_results if active else self._old_unrestrictedSearchResults
 20 |     return method(REQUEST, check_perms=False, **kw)
 21 | 
 22 | 
 23 | def safeSearchResults(self, REQUEST=None, **kw):
 24 |     manager = ElasticSearchManager()
 25 |     active = manager.active
 26 |     method = manager.search_results if active else self._old_searchResults
 27 |     return method(REQUEST, check_perms=True, **kw)
 28 | 
 29 | 
 30 | def manage_catalogRebuild(self, RESPONSE=None, URL1=None):  # NOQA W0613
 31 |     """need to be publishable"""
 32 |     manager = ElasticSearchManager()
 33 |     if manager.enabled:
 34 |         manager._recreate_catalog()
 35 |         alsoProvides(getRequest(), interfaces.IReindexActive)
 36 | 
 37 |     elapse = time.time()
 38 |     c_elapse = process_time()
 39 | 
 40 |     self.clearFindAndRebuild()
 41 | 
 42 |     elapse = time.time() - elapse
 43 |     c_elapse = process_time() - c_elapse
 44 | 
 45 |     msg = f"Catalog Rebuilt\nTotal time: {elapse}\nTotal CPU time: {c_elapse}"
 46 | 
 47 |     if manager.enabled:
 48 |         processQueue()
 49 |         manager.flush_indices()
 50 |         noLongerProvides(getRequest(), interfaces.IReindexActive)
 51 |     if RESPONSE is not None:
 52 |         RESPONSE.redirect(
 53 |             URL1
 54 |             + "/manage_catalogAdvanced?manage_tabs_message="
 55 |             + urllib.parse.quote(msg)
 56 |         )
 57 | 
 58 | 
 59 | def manage_catalogClear(self, *args, **kwargs):
 60 |     """need to be publishable"""
 61 |     manager = ElasticSearchManager()
 62 |     if manager.enabled and not manager.active:
 63 |         manager._recreate_catalog()
 64 |     return self._old_manage_catalogClear(*args, **kwargs)
 65 | 
 66 | 
 67 | def uncatalog_object(self, *args, **kwargs):
 68 |     manager = ElasticSearchManager()
 69 |     if manager.active:
 70 |         # If ES is active, we also remove the record from there
 71 |         zcatalog = self._catalog
 72 |         data = []
 73 |         for path in args:
 74 |             brain = get_brain_from_path(zcatalog, path)
 75 |             if not brain:
 76 |                 # Path not in the catalog
 77 |                 continue
 78 |             data.append(("delete", brain.UID, {}))
 79 |         manager.bulk(data=data)
 80 |     return self._old_uncatalog_object(*args, **kwargs)
 81 | 
 82 | 
 83 | def get_ordered_ids(context) -> dict:
 84 |     """Return all object ids in a context, ordered."""
 85 |     if IOrdering.providedBy(context):
 86 |         return {oid: idx for idx, oid in enumerate(context.idsInOrder())}
 87 |     else:
 88 |         # For Plone 5.2, we care only about Dexterity content
 89 |         objects = [
 90 |             obj
 91 |             for obj in list(context._objects)
 92 |             if obj.get("meta_type").startswith("Dexterity")
 93 |         ]
 94 |         return {oid: idx for idx, oid in enumerate(context.getIdsSubset(objects))}
 95 | 
 96 | 
 97 | def moveObjectsByDelta(self, ids, delta, subset_ids=None, suppress_events=False):
 98 |     manager = ElasticSearchManager()
 99 |     ordered = self if IOrdering.providedBy(self) else None
100 |     before = get_ordered_ids(self)
101 |     res = self._old_moveObjectsByDelta(
102 |         ids, delta, subset_ids=subset_ids, suppress_events=suppress_events
103 |     )
104 |     if manager.active:
105 |         after = get_ordered_ids(self)
106 |         diff = [oid for oid, idx in after.items() if idx != before[oid]]
107 |         context = self.context if ordered else self
108 |         for oid in diff:
109 |             obj = context[oid]
110 |             # We only reindex content objects
111 |             if not IContentish.providedBy(obj):
112 |                 continue
113 |             obj.reindexObject(idxs=["getObjPositionInParent"])
114 |     return res
115 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/patches/configure.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:monkey="http://namespaces.plone.org/monkey"
 4 |     xmlns:zcml="http://namespaces.zope.org/zcml"
 5 |     i18n_domain="collective.elasticsearch"
 6 |     >
 7 | 
 8 |   <!-- CMFPlone CatalogTool patches -->
 9 |   <monkey:patch
10 |       original="searchResults"
11 |       replacement=".safeSearchResults"
12 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
13 |       description="searchResults"
14 |       preserveOriginal="True"
15 |       />
16 |   <monkey:patch
17 |       original="__call__"
18 |       replacement=".safeSearchResults"
19 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
20 |       description="searchResults"
21 |       preserveOriginal="True"
22 |       />
23 |   <monkey:patch
24 |       original="unrestrictedSearchResults"
25 |       replacement=".unrestrictedSearchResults"
26 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
27 |       description="unrestrictedSearchResults"
28 |       preserveOriginal="True"
29 |       />
30 |   <monkey:patch
31 |       original="manage_catalogRebuild"
32 |       replacement=".manage_catalogRebuild"
33 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
34 |       description="manage_catalogRebuild"
35 |       preserveOriginal="True"
36 |       />
37 |   <monkey:patch
38 |       original="manage_catalogClear"
39 |       replacement=".manage_catalogClear"
40 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
41 |       description="manage_catalogClear"
42 |       preserveOriginal="True"
43 |       />
44 | 
45 |   <monkey:patch
46 |       original="uncatalog_object"
47 |       replacement=".uncatalog_object"
48 |       class="Products.CMFPlone.CatalogTool.CatalogTool"
49 |       description="uncatalog_object"
50 |       preserveOriginal="True"
51 |       />
52 |   <!-- Reordering support -->
53 |   <configure zcml:condition="not-have plone-60">
54 |     <monkey:patch
55 |         original="moveObjectsByDelta"
56 |         replacement=".moveObjectsByDelta"
57 |         class="Products.CMFPlone.Portal.PloneSite"
58 |         description="moveObjectsByDelta"
59 |         preserveOriginal="True"
60 |         />
61 |   </configure>
62 | 
63 |   <monkey:patch
64 |       original="moveObjectsByDelta"
65 |       replacement=".moveObjectsByDelta"
66 |       class="plone.folder.default.DefaultOrdering"
67 |       description="moveObjectsByDelta"
68 |       preserveOriginal="True"
69 |       />
70 | 
71 | </configure>
72 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:genericsetup="http://namespaces.zope.org/genericsetup"
 4 |     i18n_domain="collective.elasticsearch"
 5 |     >
 6 | 
 7 |   <genericsetup:registerProfile
 8 |       name="default"
 9 |       title="Elastic Search"
10 |       description="Installs the Elastic Search plone integration package"
11 |       provides="Products.GenericSetup.interfaces.EXTENSION"
12 |       directory="profiles/default"
13 |       post_handler=".setuphandlers.post_install"
14 |       />
15 | 
16 |   <genericsetup:registerProfile
17 |       name="docker-dev"
18 |       title="Elastic Search used within a docker setup"
19 |       description="Installs the Elastic Search plone integration package"
20 |       provides="Products.GenericSetup.interfaces.EXTENSION"
21 |       directory="profiles/docker-dev"
22 |       />
23 | 
24 |   <genericsetup:registerProfile
25 |       name="uninstall"
26 |       title="Elastic Search (uninstall)"
27 |       description="Uninstalls package Elastic Search."
28 |       provides="Products.GenericSetup.interfaces.EXTENSION"
29 |       directory="profiles/uninstall"
30 |       post_handler=".setuphandlers.uninstall"
31 |       />
32 | 
33 |   <genericsetup:upgradeSteps
34 |       profile="collective.elasticsearch:default"
35 |       source="*"
36 |       destination="1"
37 |       >
38 | </genericsetup:upgradeSteps>
39 | 
40 |   <genericsetup:upgradeSteps
41 |       profile="collective.elasticsearch:default"
42 |       source="1"
43 |       destination="2"
44 |       >
45 | 
46 |     <genericsetup:upgradeStep
47 |         title="Update registry."
48 |         description=""
49 |         handler=".upgrades.update_registry"
50 |         />
51 | 
52 |   </genericsetup:upgradeSteps>
53 | 
54 |   <genericsetup:upgradeSteps
55 |       profile="collective.elasticsearch:default"
56 |       source="2"
57 |       destination="3"
58 |       >
59 | 
60 |     <genericsetup:upgradeStep
61 |         title="Update registry."
62 |         description=""
63 |         handler=".upgrades.update_registry"
64 |         />
65 | 
66 |   </genericsetup:upgradeSteps>
67 | 
68 |   <genericsetup:upgradeSteps
69 |       profile="collective.elasticsearch:default"
70 |       source="3"
71 |       destination="4"
72 |       >
73 | 
74 |     <genericsetup:upgradeStep
75 |         title="Update registry."
76 |         description=""
77 |         handler=".upgrades.update_registry"
78 |         />
79 | 
80 |   </genericsetup:upgradeSteps>
81 | 
82 | </configure>
83 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/default/browserlayer.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <layers>
3 |   <layer interface="collective.elasticsearch.interfaces.IElasticSearchLayer"
4 |          name="collective.elasticsearch"
5 |   />
6 | </layers>
7 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/default/controlpanel.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <object xmlns:i18n="http://xml.zope.org/namespaces/i18n"
 3 |         name="portal_controlpanel"
 4 |         i18n:domain="plone"
 5 | >
 6 |   <configlet action_id="elasticsearch"
 7 |              appId="elasticsearch"
 8 |              category="Products"
 9 |              condition_expr=""
10 |              title="Elastic search"
11 |              url_expr="string:${portal_url}/@@elastic-controlpanel"
12 |              visible="True"
13 |              i18n:attributes="title"
14 |   >
15 |     <permission>Manage portal</permission>
16 |   </configlet>
17 | </object>
18 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/default/metadata.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <metadata>
3 |   <version>4</version>
4 |   <dependencies>
5 | </dependencies>
6 | </metadata>
7 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/default/registry.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <registry>
3 |   <records interface="collective.elasticsearch.interfaces.IElasticSettings" />
4 | </registry>
5 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/docker-dev/registry.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <registry>
 3 |   <records interface="collective.elasticsearch.interfaces.IElasticSettings"
 4 |            prefix="collective.elasticsearch.interfaces.IElasticSettings"
 5 |   >
 6 |     <value key="hosts">
 7 |       <element>elasticsearch</element>
 8 |     </value>
 9 |     <value key="use_redis">True</value>
10 |   </records>
11 | </registry>
12 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/profiles/uninstall/browserlayer.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <layers>
3 |   <layer name="collective.elasticsearch"
4 |          remove="true"
5 |   />
6 | </layers>
7 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/query.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.indexes import EZCTextIndex
 2 | from collective.elasticsearch.indexes import getIndex
 3 | from collective.elasticsearch.interfaces import IQueryAssembler
 4 | from collective.elasticsearch.utils import getESOnlyIndexes
 5 | from zope.interface import implementer
 6 | 
 7 | 
 8 | @implementer(IQueryAssembler)
 9 | class QueryAssembler:
10 |     def __init__(self, request, es):
11 |         self.es = es
12 |         self.catalog = es.catalog
13 |         self.request = request
14 | 
15 |     def normalize(self, query):  # NOQA R0201
16 |         sort_on = []
17 |         sort = query.pop("sort_on", None)
18 |         # default plone is ascending
19 |         sort_order = query.pop("sort_order", "asc")
20 |         if sort_order in ("descending", "reverse", "desc"):
21 |             sort_order = "desc"
22 |         else:
23 |             sort_order = "asc"
24 | 
25 |         if sort:
26 |             for sort_str in sort.split(","):
27 |                 sort_on.append({sort_str: {"order": sort_order}})
28 |         sort_on.append("_score")
29 |         if "b_size" in query:
30 |             del query["b_size"]
31 |         if "b_start" in query:
32 |             del query["b_start"]
33 |         if "sort_limit" in query:
34 |             del query["sort_limit"]
35 |         return query, sort_on
36 | 
37 |     def __call__(self, dquery):
38 |         filters = []
39 |         matches = []
40 |         catalog = self.catalog._catalog
41 |         idxs = catalog.indexes.keys()
42 |         query = {"match_all": {}}
43 |         es_only_indexes = getESOnlyIndexes()
44 |         for key, value in dquery.items():
45 |             if key not in idxs and key not in es_only_indexes:
46 |                 continue
47 |             index = getIndex(catalog, key)
48 |             if index is None and key in es_only_indexes:
49 |                 # deleted index for plone performance but still need on ES
50 |                 index = EZCTextIndex(catalog, key)
51 |             qq = index.get_query(key, value)
52 |             if qq is None:
53 |                 continue
54 |             if index is not None and index.filter_query:
55 |                 if isinstance(qq, list):
56 |                     filters.extend(qq)
57 |                 else:
58 |                     filters.append(qq)
59 |             else:
60 |                 if isinstance(qq, list):
61 |                     matches.extend(qq)
62 |                 else:
63 |                     matches.append(qq)
64 |         if len(filters) == 0 and len(matches) == 0:
65 |             return query
66 |         query = {"bool": {}}
67 |         if len(filters) > 0:
68 |             query["bool"]["filter"] = filters
69 | 
70 |         if len(matches) > 0:
71 |             query["bool"]["should"] = matches
72 |             query["bool"]["minimum_should_match"] = 1
73 |         return query
74 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/queueprocessor.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch import logger
  2 | from collective.elasticsearch.indexes import getIndex
  3 | from collective.elasticsearch.interfaces import IAdditionalIndexDataProvider
  4 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor
  5 | from collective.elasticsearch.interfaces import IndexingActions
  6 | from collective.elasticsearch.interfaces import IReindexActive
  7 | from collective.elasticsearch.manager import ElasticSearchManager
  8 | from collective.elasticsearch.utils import getESOnlyIndexes
  9 | from collective.elasticsearch.utils import use_redis
 10 | from pkg_resources import parse_version
 11 | from plone import api
 12 | from plone.app.uuid.utils import uuidToCatalogBrain
 13 | from plone.dexterity.utils import iterSchemata
 14 | from plone.indexer.interfaces import IIndexableObject
 15 | from plone.indexer.interfaces import IIndexer
 16 | from plone.namedfile.interfaces import INamedBlobFileField
 17 | from zope.component import getAdapters
 18 | from zope.component import queryMultiAdapter
 19 | from zope.component.hooks import getSite
 20 | from zope.globalrequest import getRequest
 21 | from zope.interface import implementer
 22 | from zope.schema import getFields
 23 | 
 24 | import transaction
 25 | 
 26 | 
 27 | if parse_version(api.env.plone_version()) < parse_version("6"):
 28 | 
 29 |     def uuidToObject(uuid, unrestricted=False):
 30 |         """Variation of this method, which support the parameter
 31 |         'unrestricted', like the one from plone 6.
 32 |         """
 33 | 
 34 |         brain = uuidToCatalogBrain(uuid)
 35 |         if brain is None:
 36 |             return None
 37 | 
 38 |         path = brain.getPath()
 39 | 
 40 |         if not path:
 41 |             return
 42 |         site = getSite()
 43 |         if site is None:
 44 |             return
 45 |         # Go to the parent of the item without restrictions.
 46 |         parent_path, final_path = path.rpartition("/")[::2]
 47 |         parent = site.unrestrictedTraverse(parent_path)
 48 |         # Do check restrictions for the final object.
 49 |         # Check if the object has restrictions
 50 |         if unrestricted:
 51 |             return parent.unrestrictedTraverse(final_path)
 52 |         return parent.restrictedTraverse(final_path)
 53 | 
 54 | else:
 55 |     from plone.app.uuid.utils import uuidToObject
 56 | 
 57 | 
 58 | @implementer(IElasticSearchIndexQueueProcessor)
 59 | class IndexProcessor:
 60 |     """A queue processor for elasticsearch"""
 61 | 
 62 |     _manager: ElasticSearchManager = None
 63 |     _es_attributes = None
 64 |     _all_attributes = None
 65 |     rebuild: bool = False
 66 |     _actions: IndexingActions = None
 67 | 
 68 |     @property
 69 |     def manager(self):
 70 |         """Return the portal catalog."""
 71 |         if not self._manager:
 72 |             self._manager = ElasticSearchManager()
 73 |         return self._manager
 74 | 
 75 |     @property
 76 |     def catalog(self):
 77 |         """Return the portal catalog."""
 78 |         return api.portal.get_tool("portal_catalog")
 79 | 
 80 |     @property
 81 |     def es_attributes(self):
 82 |         """Return all attributes defined in portal catalog."""
 83 |         if not self._es_attributes:
 84 |             self._es_attributes = getESOnlyIndexes()
 85 |         return self._es_attributes
 86 | 
 87 |     @property
 88 |     def all_attributes(self):
 89 |         """Return all attributes defined in portal catalog."""
 90 |         if not self._all_attributes:
 91 |             catalog = self.catalog
 92 |             es_indexes = self.es_attributes
 93 |             catalog_indexes = set(catalog.indexes())
 94 |             self._all_attributes = es_indexes.union(catalog_indexes)
 95 |         return self._all_attributes
 96 | 
 97 |     @property
 98 |     def rebuild(self):
 99 |         return IReindexActive.providedBy(getRequest())
100 | 
101 |     @property
102 |     def actions(self) -> IndexingActions:
103 |         if not self._actions:
104 |             self._actions = IndexingActions(
105 |                 index={},
106 |                 reindex={},
107 |                 unindex={},
108 |                 index_blobs={},
109 |                 uuid_path={},
110 |             )
111 |         return self._actions
112 | 
113 |     def _clean_up(self):
114 |         self._manager = None
115 |         self._es_attributes = None
116 |         self._all_attributes = None
117 |         self._actions = None
118 | 
119 |     def _uuid_path(self, obj):
120 |         uuid = api.content.get_uuid(obj) if obj.portal_type != "Plone Site" else "/"
121 |         path = "/".join(obj.getPhysicalPath())
122 |         return uuid, path
123 | 
124 |     def index(self, obj, attributes=None):
125 |         """Index the specified attributes for an obj."""
126 |         if not self.manager.active:
127 |             return
128 |         actions = self.actions
129 |         uuid, path = self._uuid_path(obj)
130 |         actions.uuid_path[uuid] = path
131 |         if self.rebuild:
132 |             # During rebuild we index everything
133 |             attributes = self.all_attributes
134 |             is_reindex = False
135 |         else:
136 |             attributes = {att for att in attributes} if attributes else set()
137 |             is_reindex = attributes and attributes != self.all_attributes
138 |         data = self.get_data(uuid, attributes)
139 |         blob_data = self.get_blob_data(uuid, obj)
140 |         if is_reindex and uuid in actions.index:
141 |             # Reindexing something that was not processed yet
142 |             actions.index[uuid].update(data)
143 |             return
144 |         elif is_reindex:
145 |             # Simple reindexing
146 |             actions.reindex[uuid] = data
147 |             actions.index_blobs[uuid] = blob_data
148 |             return
149 |         elif uuid in actions.reindex:
150 |             # Remove from reindex
151 |             actions.reindex.pop(uuid)
152 | 
153 |         elif uuid in actions.unindex:
154 |             # Remove from unindex
155 |             actions.unindex.pop(uuid)
156 |         actions.index[uuid] = data
157 |         actions.index_blobs[uuid] = blob_data
158 | 
159 |     def reindex(self, obj, attributes=None, update_metadata=False):
160 |         """Reindex the specified attributes for an obj."""
161 |         if not self.manager.active:
162 |             return
163 |         self.index(obj, attributes)
164 | 
165 |     def unindex(self, obj):
166 |         """Unindex the obj."""
167 |         if not self.manager.active:
168 |             return
169 |         actions = self.actions
170 |         uuid, path = self._uuid_path(obj)
171 |         actions.uuid_path[uuid] = path
172 |         if uuid in actions.index:
173 |             actions.index.pop(uuid)
174 |         elif uuid in actions.reindex:
175 |             actions.reindex.pop(uuid)
176 |         actions.unindex[uuid] = {}
177 | 
178 |     def begin(self):
179 |         """Transaction start."""
180 |         pass
181 | 
182 |     def commit(self, wait=None):
183 |         """Transaction commit."""
184 |         method = self.commit_es
185 |         if use_redis():
186 |             method = self.commit_redis
187 |         return method(wait=wait)
188 | 
189 |     def commit_redis(self, wait=None):
190 |         """Since we defere indexing to a external queue. We need to make sure
191 |         the transaction is commited and synced with all threads.
192 |         Thus for the redis integration we run the 'commit' in the
193 |         addAfterCommitHook of the transaction
194 |         """
195 | 
196 |         transaction.get().addAfterCommitHook(self._commit_hook_redis)
197 | 
198 |     def _commit_hook_redis(self, wait=None):
199 |         """The after commit hook from redis, includes updateing blobs as
200 |         well."""
201 |         actions = self.actions
202 |         items = len(actions) if actions else 0
203 |         if self.manager.active and items:
204 |             self.manager.bulk(data=actions.all())
205 | 
206 |         # make sure attachment plugin and cbor-attachments pipeline are available
207 |         pipeline = "cbor-attachments" in self.manager.connection.ingest.get_pipeline()
208 |         plugin = "attachment" in self.manager.connection.cat.plugins()
209 |         if pipeline and plugin:
210 |             for item in self.actions.all_blob_actions():
211 |                 self.manager.update_blob(item)
212 | 
213 |         self._clean_up()
214 | 
215 |     def commit_es(self, wait=None):
216 |         """Transaction commit."""
217 |         actions = self.actions
218 |         items = len(actions) if actions else 0
219 |         if self.manager.active and items:
220 |             self.manager.bulk(data=actions.all())
221 |         self._clean_up()
222 | 
223 |     def abort(self):
224 |         """Transaction abort."""
225 |         self._clean_up()
226 | 
227 |     def wrap_object(self, obj):
228 |         wrapped_object = None
229 |         if not IIndexableObject.providedBy(obj):
230 |             # This is the CMF 2.2 compatible approach, which should be used
231 |             # going forward
232 |             wrapper = queryMultiAdapter((obj, self.catalog), IIndexableObject)
233 |             wrapped_object = wrapper if wrapper is not None else obj
234 |         else:
235 |             wrapped_object = obj
236 |         return wrapped_object
237 | 
238 |     def get_data(self, uuid, attributes=None):
239 |         method = self.get_data_for_es
240 |         if use_redis():
241 |             method = self.get_data_for_redis
242 |         return method(uuid, attributes=attributes)
243 | 
244 |     def get_data_for_redis(self, uuid, attributes=None):
245 |         attributes = attributes if attributes else self.all_attributes
246 |         index_data = {}
247 |         for index_name in attributes:
248 |             index_data[index_name] = None
249 |         return index_data
250 | 
251 |     def get_data_for_es(self, uuid, attributes=None):
252 |         """Data to be sent to elasticsearch."""
253 |         obj = api.portal.get() if uuid == "/" else uuidToObject(uuid, unrestricted=True)
254 |         wrapped_object = self.wrap_object(obj)
255 |         index_data = {}
256 |         attributes = attributes if attributes else self.all_attributes
257 |         catalog = self.catalog
258 |         for index_name in attributes:
259 |             value = None
260 |             index = getIndex(catalog, index_name)
261 |             if index is not None:
262 |                 try:
263 |                     value = index.get_value(wrapped_object)
264 |                 except Exception as exc:  # NOQA W0703
265 |                     path = "/".join(obj.getPhysicalPath())
266 |                     logger.error(f"Error indexing value: {path}: {index_name}\n{exc}")
267 |                     value = None
268 |                 if value in (None, "None"):
269 |                     # yes, we'll index null data...
270 |                     value = None
271 |             elif index_name in self.es_attributes:
272 |                 indexer = queryMultiAdapter(
273 |                     (wrapped_object, catalog), IIndexer, name=index_name
274 |                 )
275 |                 if indexer:
276 |                     value = indexer()
277 |                 else:
278 |                     attr = getattr(obj, index_name, None)
279 |                     value = attr() if callable(attr) else value
280 |             # Use str, if bytes value
281 |             value = (
282 |                 value.decode("utf-8", "ignore") if isinstance(value, bytes) else value
283 |             )
284 |             index_data[index_name] = value
285 |         additional_providers = [
286 |             adapter for adapter in getAdapters((obj,), IAdditionalIndexDataProvider)
287 |         ]
288 |         if additional_providers:
289 |             for _, adapter in additional_providers:
290 |                 index_data.update(adapter(catalog, index_data))
291 | 
292 |         return index_data
293 | 
294 |     def get_blob_data(self, uuid, obj):
295 |         """Go thru schemata and extract infos about blob fields"""
296 |         index_data = {}
297 |         portal_path_len = len(api.portal.get().getPhysicalPath())
298 |         obj_segements = obj.getPhysicalPath()
299 |         relative_path = "/".join(obj_segements[portal_path_len:])
300 |         for schema in iterSchemata(obj):
301 |             for name, field in getFields(schema).items():
302 |                 if INamedBlobFileField.providedBy(field) and field.get(obj):
303 |                     index_data[name] = {
304 |                         "path": relative_path,
305 |                         "filename": field.get(obj).filename,
306 |                     }
307 |         return index_data
308 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/redis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/redis/__init__.py


--------------------------------------------------------------------------------
/src/collective/elasticsearch/redis/configure.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:plone="http://namespaces.plone.org/plone"
 4 |     >
 5 | 
 6 |   <include package="plone.restapi" />
 7 | 
 8 |   <plone:service
 9 |       method="GET"
10 |       factory=".restapi.ExtractData"
11 |       for="*"
12 |       permission="zope2.View"
13 |       name="@elasticsearch_extractdata"
14 |       />
15 | 
16 | </configure>
17 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/redis/fetch.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import requests
 4 | 
 5 | 
 6 | session = requests.Session()
 7 | session.headers.update({"Accept": "application/json"})
 8 | session.auth = (
 9 |     str(os.environ.get("PLONE_USERNAME", None)),
10 |     str(os.environ.get("PLONE_PASSWORD", None)),
11 | )
12 | 
13 | session_data = requests.Session()
14 | session_data.auth = (
15 |     str(os.environ.get("PLONE_USERNAME", None)),
16 |     str(os.environ.get("PLONE_PASSWORD", None)),
17 | )
18 | 
19 | 
20 | def fetch_data(uuid, attributes):
21 |     backend = os.environ.get("PLONE_BACKEND", None)
22 |     url = backend + "/@elasticsearch_extractdata"
23 |     payload = {"uuid": uuid, "attributes:list": attributes}
24 |     response = session.get(url, params=payload, verify=False, timeout=60)
25 |     if response.status_code == 200:
26 |         content = response.json()
27 |         if "@id" in content and "data" in content:
28 |             return content["data"]
29 |     else:
30 |         raise Exception("Bad response from Plone Backend")
31 | 
32 | 
33 | def fetch_blob_data(fieldname, data):
34 |     backend = os.environ.get("PLONE_BACKEND", None)
35 |     download_url = "/".join([backend, data[fieldname]["path"], "@@download", fieldname])
36 |     file_ = session_data.get(download_url)
37 |     return io.BytesIO(file_.content)
38 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/redis/restapi.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor
 2 | from plone import api
 3 | from plone.restapi.serializer.converters import json_compatible
 4 | from plone.restapi.services import Service
 5 | from zExceptions import NotFound
 6 | from zope.component import getUtility
 7 | 
 8 | 
 9 | class ExtractData(Service):
10 |     def reply(self):
11 |         queueprocessor = getUtility(
12 |             IElasticSearchIndexQueueProcessor, name="elasticsearch"
13 |         )
14 |         attributes = self.request.get("attributes", [])
15 |         uuid = self.request.get("uuid", None)
16 | 
17 |         obj = api.portal.get() if uuid == "/" else api.content.get(UID=uuid)
18 |         if obj is None:
19 |             raise NotFound()
20 | 
21 |         response = {}
22 |         data = queueprocessor.get_data_for_es(uuid, attributes=attributes)
23 |         response["@id"] = f"{self.context.absolute_url()}/@elasticsearch_extractdata"
24 |         response["data"] = json_compatible(data)
25 |         return response
26 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/redis/tasks.py:
--------------------------------------------------------------------------------
  1 | from .fetch import fetch_blob_data
  2 | from .fetch import fetch_data
  3 | from collective.elasticsearch import local
  4 | from collective.elasticsearch.manager import ElasticSearchManager
  5 | from elasticsearch import Elasticsearch
  6 | from rq import Queue
  7 | from rq import Retry
  8 | from rq.decorators import job
  9 | 
 10 | import cbor2
 11 | import os
 12 | import redis
 13 | 
 14 | 
 15 | REDIS_CONNECTION_KEY = "redis_connection"
 16 | 
 17 | 
 18 | def redis_connection():
 19 |     connection = local.get_local(REDIS_CONNECTION_KEY)
 20 |     if not connection:
 21 |         local.set_local(
 22 |             REDIS_CONNECTION_KEY,
 23 |             redis.Redis.from_url(os.environ.get("PLONE_REDIS_DSN", None)),
 24 |         )
 25 |         connection = local.get_local(REDIS_CONNECTION_KEY)
 26 |     return connection
 27 | 
 28 | 
 29 | def es_connection(hosts, **params):
 30 |     connection = local.get_local(ElasticSearchManager.connection_key)
 31 |     if not connection:
 32 |         local.set_local(
 33 |             ElasticSearchManager.connection_key, Elasticsearch(hosts, **params)
 34 |         )
 35 |         connection = local.get_local(ElasticSearchManager.connection_key)
 36 |     return connection
 37 | 
 38 | 
 39 | queue = Queue(
 40 |     "normal",
 41 |     connection=redis_connection(),
 42 |     is_async=os.environ.get("ZOPETESTCASE", "0") == "0",
 43 | )  # Don't queue in tests
 44 | 
 45 | queue_low = Queue(
 46 |     "low",
 47 |     connection=redis_connection(),
 48 |     is_async=os.environ.get("ZOPETESTCASE", "0") == "0",
 49 | )  # Don't queue in tests
 50 | 
 51 | 
 52 | @job(queue, connection=redis_connection(), retry=Retry(max=3, interval=30))
 53 | def bulk_update(hosts, params, index_name, body):
 54 |     """
 55 |     Collects all the data and updates elasticsearch
 56 |     """
 57 |     hosts = os.environ.get("PLONE_ELASTICSEARCH_HOST", hosts)
 58 |     connection = es_connection(hosts, **params)
 59 | 
 60 |     for item in body:
 61 |         if len(item) == 1 and "delete" in item[0]:
 62 |             continue
 63 | 
 64 |         catalog_info, payload = item
 65 |         action, index_info = list(catalog_info.items())[0]
 66 |         if action == "index":
 67 |             data = fetch_data(uuid=index_info["_id"], attributes=list(payload.keys()))
 68 |             item[1] = data
 69 |         elif action == "update":
 70 |             data = fetch_data(
 71 |                 uuid=index_info["_id"], attributes=list(payload["doc"].keys())
 72 |             )
 73 |             item[1]["doc"] = data
 74 | 
 75 |     es_data = [item for sublist in body for item in sublist]
 76 |     connection.bulk(index=index_name, body=es_data)
 77 |     return "Done"
 78 | 
 79 | 
 80 | @job(queue_low, connection=redis_connection())
 81 | def update_file_data(hosts, params, index_name, body):
 82 |     """
 83 |     Get blob data from plone and index it via elasticsearch attachment pipeline
 84 |     """
 85 |     hosts = os.environ.get("PLONE_ELASTICSEARCH_HOST", hosts)
 86 |     connection = es_connection(hosts, **params)
 87 |     uuid, data = body
 88 | 
 89 |     attachments = {"attachments": []}
 90 | 
 91 |     for fieldname, content in data.items():
 92 |         file_ = fetch_blob_data(fieldname, data)
 93 |         attachments["attachments"].append(
 94 |             {
 95 |                 "filename": content["filename"],
 96 |                 "fieldname": fieldname,
 97 |                 "data": file_.read(),
 98 |             }
 99 |         )
100 | 
101 |     connection.update(
102 |         index_name,
103 |         uuid,
104 |         cbor2.dumps({"doc": attachments}),
105 |         headers={"content-type": "application/cbor"},
106 |     )
107 |     return "Done"
108 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/result.py:
--------------------------------------------------------------------------------
  1 | from Acquisition import aq_base
  2 | from Acquisition import aq_get
  3 | from Acquisition import aq_parent
  4 | from collective.elasticsearch import interfaces
  5 | from collective.elasticsearch.utils import get_brain_from_path
  6 | from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain
  7 | from Products.ZCatalog.interfaces import ICatalogBrain
  8 | from typing import Union
  9 | from zope.component import getMultiAdapter
 10 | from zope.globalrequest import getRequest
 11 | from zope.interface import implementer
 12 | from ZPublisher.BaseRequest import RequestContainer
 13 | 
 14 | 
 15 | @implementer(ICatalogBrain)
 16 | class ElasticSearchBrain:
 17 |     """A Brain containing only information indexed in ElasticSearch."""
 18 | 
 19 |     def __init__(self, record: dict, catalog):
 20 |         self._record = record
 21 |         self._catalog = catalog
 22 | 
 23 |     def has_key(self, key):
 24 |         return key in self._record
 25 | 
 26 |     def __contains__(self, name):
 27 |         return name in self._record
 28 | 
 29 |     def __getattr__(self, name):
 30 |         if not self.__contains__(name):
 31 |             raise AttributeError(
 32 |                 f"'ElasticSearchBrain' object has no attribute '{name}'"
 33 |             )
 34 |         return self._record[name]
 35 | 
 36 |     def getPath(self):
 37 |         """Get the physical path for this record"""
 38 |         return self._record["path"]["path"]
 39 | 
 40 |     def getURL(self, relative=0):
 41 |         """Generate a URL for this record"""
 42 |         request = getRequest()
 43 |         return request.physicalPathToURL(self.getPath(), relative)
 44 | 
 45 |     def getObject(self, REQUEST=None):
 46 |         path = self.getPath().split("/")
 47 |         if not path:
 48 |             return None
 49 |         parent = aq_parent(self._catalog)
 50 |         if aq_get(parent, "REQUEST", None) is None:
 51 |             request = getRequest()
 52 |             if request is not None:
 53 |                 # path should be absolute, starting at the physical root
 54 |                 parent = self.getPhysicalRoot()
 55 |                 request_container = RequestContainer(REQUEST=request)
 56 |                 parent = aq_base(parent).__of__(request_container)
 57 |         if len(path) > 1:
 58 |             parent = parent.unrestrictedTraverse(path[:-1])
 59 | 
 60 |         return parent.restrictedTraverse(path[-1])
 61 | 
 62 |     def getRID(self) -> int:
 63 |         """Return the record ID for this object."""
 64 |         return -1
 65 | 
 66 | 
 67 | def BrainFactory(manager):
 68 |     def factory(result: dict) -> Union[AbstractCatalogBrain, ElasticSearchBrain]:
 69 |         catalog = manager.catalog
 70 |         zcatalog = catalog._catalog
 71 |         path = result.get("fields", {}).get("path.path", None)
 72 |         if type(path) in (list, tuple, set) and len(path) > 0:
 73 |             path = path[0]
 74 |         if path:
 75 |             brain = get_brain_from_path(zcatalog, path)
 76 |             if not brain:
 77 |                 result = manager.get_record_by_path(path)
 78 |                 brain = ElasticSearchBrain(record=result, catalog=catalog)
 79 |             if manager.highlight and result.get("highlight"):
 80 |                 fragments = []
 81 |                 fraglen = 0
 82 |                 for idx, i in enumerate(result["highlight"].get("SearchableText", [])):
 83 |                     fraglen += len(i)
 84 |                     if idx > 0 and fraglen > manager.highlight_threshold:
 85 |                         break
 86 |                     fragments.append(i)
 87 |                 brain["Description"] = " ... ".join(fragments)
 88 |             return brain
 89 |         # We should handle cases where there is no path in the ES response
 90 |         return None
 91 | 
 92 |     return factory
 93 | 
 94 | 
 95 | class ElasticResult:
 96 |     def __init__(self, manager, query, **query_params):
 97 |         assert "sort" not in query_params
 98 |         assert "start" not in query_params
 99 |         self.manager = manager
100 |         self.bulk_size = manager.bulk_size
101 |         qassembler = getMultiAdapter(
102 |             (getRequest(), manager), interfaces.IQueryAssembler
103 |         )
104 |         dquery, self.sort = qassembler.normalize(query)
105 |         self.query = qassembler(dquery)
106 | 
107 |         # results are stored in a dictionary, keyed
108 |         # but the start index of the bulk size for the
109 |         # results it holds. This way we can skip around
110 |         # for result data in a result object
111 |         result = manager._search(self.query, sort=self.sort, **query_params)["hits"]
112 |         self.results = {0: result["hits"]}
113 |         self.count = result["total"]["value"]
114 |         self.query_params = query_params
115 | 
116 |     def __len__(self):
117 |         return self.count
118 | 
119 |     def __getitem__(self, key):
120 |         """
121 |         Lazy loading es results with negative index support.
122 |         We store the results in buckets of what the bulk size is.
123 |         This is so you can skip around in the indexes without needing
124 |         to load all the data.
125 |         Example(all zero based indexing here remember):
126 |             (525 results with bulk size 50)
127 |             - self[0]: 0 bucket, 0 item
128 |             - self[10]: 0 bucket, 10 item
129 |             - self[50]: 50 bucket: 0 item
130 |             - self[55]: 50 bucket: 5 item
131 |             - self[352]: 350 bucket: 2 item
132 |             - self[-1]: 500 bucket: 24 item
133 |             - self[-2]: 500 bucket: 23 item
134 |             - self[-55]: 450 bucket: 19 item
135 |         """
136 |         bulk_size = self.bulk_size
137 |         count = self.count
138 |         if isinstance(key, slice):
139 |             return [self[i] for i in range(key.start, key.end)]
140 |         if key + 1 > count:
141 |             raise IndexError
142 |         if key < 0 and abs(key) > count:
143 |             raise IndexError
144 |         if key >= 0:
145 |             result_key = int(key / bulk_size) * bulk_size
146 |             start = result_key
147 |             result_index = key % bulk_size
148 |         elif key < 0:
149 |             last_key = int(count / bulk_size) * bulk_size
150 |             last_key = last_key if last_key else count
151 |             start = result_key = int(last_key - ((abs(key) / bulk_size) * bulk_size))
152 |             if last_key == result_key:
153 |                 result_index = key
154 |             else:
155 |                 result_index = (key % bulk_size) - (bulk_size - (count % last_key))
156 |         if result_key not in self.results:
157 |             self.results[result_key] = self.manager._search(
158 |                 self.query, sort=self.sort, start=start, **self.query_params
159 |             )["hits"]["hits"]
160 |         return self.results[result_key][result_index]
161 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/services/__init__.py


--------------------------------------------------------------------------------
/src/collective/elasticsearch/services/configure.zcml:
--------------------------------------------------------------------------------
 1 | <configure
 2 |     xmlns="http://namespaces.zope.org/zope"
 3 |     xmlns:plone="http://namespaces.plone.org/plone"
 4 |     >
 5 | 
 6 |   <include package="plone.restapi" />
 7 | 
 8 |   <adapter
 9 |       factory=".controlpanel.ElasticSearchSettingsConfigletPanel"
10 |       name="elasticsearch"
11 |       />
12 | 
13 |   <plone:service
14 |       method="GET"
15 |       factory=".elasticsearch.Info"
16 |       for="Products.CMFCore.interfaces.ISiteRoot"
17 |       permission="cmf.ManagePortal"
18 |       name="@elasticsearch"
19 |       />
20 | 
21 |   <plone:service
22 |       method="POST"
23 |       factory=".elasticsearch.Maintenance"
24 |       for="Products.CMFCore.interfaces.ISiteRoot"
25 |       permission="cmf.ManagePortal"
26 |       name="@elasticsearch"
27 |       />
28 | 
29 | </configure>
30 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/services/controlpanel.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.interfaces import IElasticSearchLayer
 2 | from collective.elasticsearch.interfaces import IElasticSettings
 3 | from plone.restapi.controlpanels import RegistryConfigletPanel
 4 | from zope.component import adapter
 5 | from zope.interface import Interface
 6 | 
 7 | 
 8 | @adapter(Interface, IElasticSearchLayer)
 9 | class ElasticSearchSettingsConfigletPanel(RegistryConfigletPanel):
10 |     """Control Panel endpoint"""
11 | 
12 |     schema = IElasticSettings
13 |     configlet_id = "elasticsearch"
14 |     configlet_category_id = "Products"
15 |     title = "Elastic Search Settings"
16 |     group = ""
17 |     schema_prefix = "collective.elasticsearch.interfaces.IElasticSettings"
18 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/services/elasticsearch.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.manager import ElasticSearchManager
 2 | from plone import api
 3 | from plone.restapi.deserializer import json_body
 4 | from plone.restapi.services import Service
 5 | 
 6 | 
 7 | class ElasticSearchService(Service):
 8 |     """Base service for ElasticSearch management."""
 9 | 
10 |     def __init__(self, context, request):
11 |         super().__init__(context, request)
12 |         self.es = ElasticSearchManager()
13 | 
14 | 
15 | class Info(ElasticSearchService):
16 |     """Elastic Search information."""
17 | 
18 |     def reply(self):
19 |         info = self.es.info
20 |         response = dict(info)
21 |         response["@id"] = f"{api.portal.get().absolute_url()}/@elasticsearch"
22 |         return response
23 | 
24 | 
25 | class Maintenance(ElasticSearchService):
26 |     """Elastic Search integration management."""
27 | 
28 |     def reply(self):
29 |         data = json_body(self.request)
30 |         action = data.get("action")
31 |         if action == "convert":
32 |             self.es._convert_catalog_to_elastic()
33 |         elif action == "rebuild":
34 |             catalog = api.portal.get_tool("portal_catalog")
35 |             catalog.manage_catalogRebuild()
36 |         else:
37 |             return self.reply_no_content(status=400)
38 |         return self.reply_no_content()
39 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/setuphandlers.py:
--------------------------------------------------------------------------------
 1 | from Products.CMFPlone.interfaces import INonInstallable
 2 | from zope.interface import implementer
 3 | 
 4 | 
 5 | @implementer(INonInstallable)
 6 | class HiddenProfiles:
 7 |     @staticmethod
 8 |     def getNonInstallableProfiles():  # NOQA C0103
 9 |         """Hide uninstall profile from site-creation and quickinstaller."""
10 |         return [
11 |             "collective.elasticsearch:uninstall",
12 |         ]
13 | 
14 | 
15 | def post_install(context):  # NOQA W0613
16 |     """Post install script"""
17 |     # Do something at the end of the installation of this package.
18 | 
19 | 
20 | def post_content(context):  # NOQA W0613
21 |     """Post content script"""
22 | 
23 | 
24 | def uninstall(context):  # NOQA W0613
25 |     """Uninstall script"""
26 |     # Do something at the end of the uninstallation of this package.
27 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/testing.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch import utils
 2 | from plone import api
 3 | from plone.app.contenttypes.testing import PLONE_APP_CONTENTTYPES_FIXTURE
 4 | from plone.app.testing import applyProfile
 5 | from plone.app.testing import FunctionalTesting
 6 | from plone.app.testing import IntegrationTesting
 7 | from plone.app.testing import PloneSandboxLayer
 8 | from plone.app.testing import setRoles
 9 | from plone.app.testing import SITE_OWNER_NAME
10 | from plone.app.testing import SITE_OWNER_PASSWORD
11 | from plone.app.testing import TEST_USER_ID
12 | from plone.testing import zope
13 | 
14 | import collective.elasticsearch
15 | import os
16 | import redis
17 | import time
18 | 
19 | 
20 | MAX_CONNECTION_RETRIES = 20
21 | 
22 | 
23 | class ElasticSearch(PloneSandboxLayer):
24 | 
25 |     defaultBases = (PLONE_APP_CONTENTTYPES_FIXTURE,)
26 | 
27 |     def setUpZope(self, app, configurationContext):
28 |         super().setUpZope(app, configurationContext)
29 |         self.loadZCML(package=collective.elasticsearch)
30 | 
31 |     def setUpPloneSite(self, portal):
32 |         super().setUpPloneSite(portal)
33 |         # install into the Plone site
34 |         applyProfile(portal, "collective.elasticsearch:default")
35 |         setRoles(portal, TEST_USER_ID, ("Member", "Manager"))
36 |         workflowTool = api.portal.get_tool("portal_workflow")
37 |         workflowTool.setDefaultChain("plone_workflow")
38 | 
39 | 
40 | ElasticSearch_FIXTURE = ElasticSearch()
41 | ElasticSearch_INTEGRATION_TESTING = IntegrationTesting(
42 |     bases=(ElasticSearch_FIXTURE,), name="ElasticSearch:Integration"
43 | )
44 | ElasticSearch_FUNCTIONAL_TESTING = FunctionalTesting(
45 |     bases=(ElasticSearch_FIXTURE,), name="ElasticSearch:Functional"
46 | )
47 | ElasticSearch_API_TESTING = FunctionalTesting(
48 |     bases=(ElasticSearch_FIXTURE, zope.WSGI_SERVER_FIXTURE),
49 |     name="ElasticSearch:API",
50 | )
51 | 
52 | 
53 | class RedisElasticSearch(ElasticSearch):
54 |     def setUpPloneSite(self, portal):
55 |         super().setUpPloneSite(portal)
56 | 
57 |         # Setup environ for redis testing
58 |         os.environ["PLONE_BACKEND"] = portal.absolute_url()
59 |         os.environ["PLONE_USERNAME"] = SITE_OWNER_NAME
60 |         os.environ["PLONE_PASSWORD"] = SITE_OWNER_PASSWORD
61 |         os.environ["PLONE_REDIS_DSN"] = "redis://localhost:6379/0"
62 | 
63 |         # Make sure tasks are not handled async in tests
64 |         # from collective.elasticsearch.redis.tasks import queue
65 |         # queue._is_async = False
66 | 
67 |         utils.get_settings().use_redis = True
68 |         self._wait_for_redis_service()
69 | 
70 |     def _wait_for_redis_service(self):
71 |         from collective.elasticsearch.redis.tasks import redis_connection
72 | 
73 |         counter = 0
74 |         while True:
75 |             if counter == MAX_CONNECTION_RETRIES:
76 |                 raise Exception("Cannot connect to redis service")
77 |             try:
78 |                 if redis_connection().ping():
79 |                     break
80 |             except redis.ConnectionError:
81 |                 time.sleep(1)
82 |                 counter += 1
83 | 
84 | 
85 | ElasticSearch_REDIS_FIXTURE = RedisElasticSearch()
86 | ElasticSearch_REDIS_TESTING = FunctionalTesting(
87 |     bases=(zope.WSGI_SERVER_FIXTURE, ElasticSearch_REDIS_FIXTURE),
88 |     name="ElasticSearch:Redis",
89 | )
90 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/__init__.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch import utils
  2 | from collective.elasticsearch.browser.controlpanel import ElasticControlPanelView
  3 | from collective.elasticsearch.interfaces import IElasticSearchIndexQueueProcessor
  4 | from collective.elasticsearch.manager import ElasticSearchManager
  5 | from collective.elasticsearch.testing import ElasticSearch_API_TESTING
  6 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING
  7 | from collective.elasticsearch.testing import ElasticSearch_INTEGRATION_TESTING
  8 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING
  9 | from plone import api
 10 | from Products.CMFCore.indexing import processQueue
 11 | from zope.component import getUtility
 12 | 
 13 | import os
 14 | import time
 15 | import transaction
 16 | import unittest
 17 | 
 18 | 
 19 | MAX_CONNECTION_RETRIES = 20
 20 | 
 21 | 
 22 | class BaseTest(unittest.TestCase):
 23 |     layer = ElasticSearch_INTEGRATION_TESTING
 24 | 
 25 |     def get_processor(self):
 26 |         return getUtility(IElasticSearchIndexQueueProcessor, name="elasticsearch")
 27 | 
 28 |     def setUp(self):
 29 |         super().setUp()
 30 |         self.portal = self.layer["portal"]
 31 |         self.request = self.layer["request"]
 32 |         self.request.environ["testing"] = True
 33 |         self.app = self.layer["app"]
 34 | 
 35 |         os.environ["PLONE_BACKEND"] = self.portal.absolute_url()
 36 | 
 37 |         settings = utils.get_settings()
 38 |         # disable sniffing hosts in tests because docker...
 39 |         settings.sniffer_timeout = None
 40 |         settings.enabled = True
 41 |         settings.sniffer_timeout = 0.0
 42 | 
 43 |         # Raise elastic search exceptions
 44 |         settings.raise_search_exception = True
 45 | 
 46 |         self._wait_for_es_service()
 47 | 
 48 |         self.catalog = api.portal.get_tool("portal_catalog")
 49 |         self.catalog._elasticcustomindex = "plone-test-index"
 50 |         self.es = ElasticSearchManager()
 51 | 
 52 |         self.catalog.manage_catalogRebuild()
 53 |         # need to commit here so all tests start with a baseline
 54 |         # of elastic enabled
 55 |         time.sleep(0.1)
 56 |         self.commit()
 57 | 
 58 |     def commit(self, wait: int = 0):
 59 |         processQueue()
 60 |         transaction.commit()
 61 |         self.es.flush_indices()
 62 |         if wait:
 63 |             time.sleep(wait)
 64 | 
 65 |     def tearDown(self):
 66 |         super().tearDown()
 67 |         real_index_name = f"{self.es.real_index_name}_1"
 68 |         index_name = self.es.index_name
 69 |         conn = self.es.connection
 70 |         conn.indices.delete_alias(index=real_index_name, name=index_name)
 71 |         conn.indices.delete(index=real_index_name)
 72 |         conn.indices.flush()
 73 |         # Wait ES remove the index
 74 |         time.sleep(0.1)
 75 | 
 76 |     def _wait_for_es_service(self):
 77 |         controlpanel = ElasticControlPanelView(self.portal, self.request)
 78 |         counter = 0
 79 |         while not controlpanel.connection_status:
 80 |             if counter == MAX_CONNECTION_RETRIES:
 81 |                 raise Exception("Cannot connect to elasticsearch service")
 82 |             time.sleep(1)
 83 |             counter += 1
 84 | 
 85 | 
 86 | class BaseFunctionalTest(BaseTest):
 87 |     layer = ElasticSearch_FUNCTIONAL_TESTING
 88 | 
 89 |     def search(self, query: dict):
 90 |         return self.catalog(**query)
 91 | 
 92 |     def total_results(self, query: dict):
 93 |         results = self.search(query)
 94 |         return len(results)
 95 | 
 96 | 
 97 | class BaseAPITest(BaseTest):
 98 | 
 99 |     layer = ElasticSearch_API_TESTING
100 | 
101 | 
102 | class BaseRedisTest(BaseTest):
103 | 
104 |     layer = ElasticSearch_REDIS_TESTING
105 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/assets/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/image.png


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/assets/test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/test.pdf


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/assets/test2.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/collective/collective.elasticsearch/58f3f479cac40f33e79348016da42fb34149886f/src/collective/elasticsearch/tests/assets/test2.docx


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_controlpanel.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.browser.controlpanel import ElasticControlPanelView
 2 | from collective.elasticsearch.tests import BaseRedisTest
 3 | from unittest import mock
 4 | 
 5 | import os
 6 | 
 7 | 
 8 | ENV_FOR_REDIS = {
 9 |     "PLONE_REDIS_DSN": "",
10 |     "PLONE_BACKEND": "",
11 |     "PLONE_USERNAME": "",
12 |     "PLONE_PASSWORD": "",
13 | }
14 | 
15 | 
16 | class TestControlPanel(BaseRedisTest):
17 |     def test_use_redis_checkbox_is_disabled_enabled(self):
18 |         controlpanel = ElasticControlPanelView(self.portal, self.request)
19 |         controlpanel.update()
20 | 
21 |         self.assertIsNone(controlpanel.form_instance.widgets["use_redis"].disabled)
22 | 
23 |         with mock.patch.dict(os.environ, ENV_FOR_REDIS):
24 |             controlpanel.update()
25 |             self.assertEqual(
26 |                 "disabled", controlpanel.form_instance.widgets["use_redis"].disabled
27 |             )
28 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_file_schema.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <model xmlns="http://namespaces.plone.org/supermodel/schema"
 3 |        xmlns:i18n="http://xml.zope.org/namespaces/i18n"
 4 |        xmlns:marshal="http://namespaces.plone.org/supermodel/marshal"
 5 |        i18n:domain="plone"
 6 | >
 7 |   <schema>
 8 |     <field name="title"
 9 |            type="zope.schema.TextLine"
10 |     >
11 |       <description />
12 |       <required>False</required>
13 |       <title i18n:translate="">Title</title>
14 |     </field>
15 |     <field name="description"
16 |            type="zope.schema.Text"
17 |     >
18 |       <description />
19 |       <required>False</required>
20 |       <title i18n:translate="">Description</title>
21 |     </field>
22 |     <field marshal:primary="true"
23 |            name="file"
24 |            type="plone.namedfile.field.NamedBlobFile"
25 |     >
26 |       <description />
27 |       <title i18n:translate="label_file">File</title>
28 |     </field>
29 |     <field marshal:primary="true"
30 |            name="file2"
31 |            type="plone.namedfile.field.NamedBlobFile"
32 |     >
33 |       <description />
34 |       <title i18n:translate="label_file">File 2</title>
35 |     </field>
36 |   </schema>
37 | </model>
38 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_processor.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING
  2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING
  3 | from collective.elasticsearch.tests import BaseFunctionalTest
  4 | from collective.elasticsearch.utils import getESOnlyIndexes
  5 | from collective.elasticsearch.utils import getUID
  6 | from parameterized import parameterized_class
  7 | from plone import api
  8 | from plone.app.contentrules.actions.move import MoveAction
  9 | from plone.app.contentrules.tests.dummy import DummyEvent
 10 | from plone.app.testing import login
 11 | from plone.app.testing import TEST_USER_PASSWORD
 12 | from plone.contentrules.rule.interfaces import IExecutable
 13 | from Products.CMFCore.indexing import processQueue
 14 | from zope.component import getMultiAdapter
 15 | 
 16 | 
 17 | @parameterized_class(
 18 |     [
 19 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
 20 |         {"layer": ElasticSearch_REDIS_TESTING},
 21 |     ]
 22 | )
 23 | class TestQueueProcessor(BaseFunctionalTest):
 24 |     def test_has_right_brain_data(self):
 25 |         processor = self.get_processor()
 26 |         current_length = len(self.catalog._catalog.uids)
 27 |         obj = api.content.create(self.portal, "Event", "event", title="Some Event")
 28 |         uuid = getUID(obj)
 29 |         self.assertEqual(current_length + 1, len(self.catalog._catalog.uids))
 30 |         processQueue()
 31 |         actions = processor.actions
 32 |         self.assertIn(uuid, actions.index)
 33 |         self.portal.manage_delObjects(["event"])
 34 |         # uid not actually removed until this if catalog optimized
 35 |         processQueue()
 36 |         actions = processor.actions
 37 |         self.assertNotIn(uuid, actions.index)
 38 |         self.assertEqual(current_length, len(self.catalog._catalog.uids))
 39 |         self.assertIn(uuid, actions.unindex)
 40 | 
 41 |     def test_rename_object(self):
 42 |         processor = self.get_processor()
 43 |         current_length = len(self.catalog._catalog.uids)
 44 |         obj = api.content.create(self.portal, "Event", "event1", title="Some Event")
 45 |         obj_uid = getUID(obj)
 46 |         self.assertEqual(current_length + 1, len(self.catalog._catalog.uids))
 47 |         api.content.rename(self.portal.event1, new_id="event2")
 48 |         self.assertIn(obj_uid, processor.actions.index)
 49 |         self.assertNotIn(obj_uid, processor.actions.unindex)
 50 | 
 51 |     def test_delete_object(self):
 52 |         processor = self.get_processor()
 53 |         obj = api.content.create(
 54 |             self.portal, "Event", "event_to_delete", title="Some Event"
 55 |         )
 56 |         obj_uid = getUID(obj)
 57 |         self.portal.manage_delObjects(["event_to_delete"])
 58 |         processQueue()
 59 |         self.assertIn(obj_uid, processor.actions.unindex)
 60 | 
 61 |     def test_moved_content(self):
 62 |         """content moved by content rules should remove the original catalog
 63 |         entry
 64 |         """
 65 |         processor = self.get_processor()
 66 |         target = api.content.create(container=self.portal, type="Folder", id="target")
 67 |         source = api.content.create(container=self.portal, type="Folder", id="source")
 68 |         e = MoveAction()
 69 |         e.target_folder = "/target"
 70 | 
 71 |         obj = api.content.create(container=source, type="Document", id="doc")
 72 |         obj_uid = getUID(obj)
 73 |         ex = getMultiAdapter((target, e, DummyEvent(obj)), IExecutable)
 74 |         self.assertEqual(True, ex())
 75 |         self.assertIn(obj_uid, processor.actions.index)
 76 | 
 77 |     def test_index_even_if_access_to_obj_might_be_restricted(self):
 78 |         processor = self.get_processor()
 79 |         user = api.user.create(
 80 |             username="worker",
 81 |             email="ordinary_person@example.com",
 82 |             password=TEST_USER_PASSWORD,
 83 |             roles=("Member",),
 84 |         )
 85 | 
 86 |         folder = api.content.create(self.portal, "Folder", "folder1", title="A folder")
 87 |         folder.manage_permission(
 88 |             "Access contents information", roles=["Manager"], acquire=False
 89 |         )
 90 |         obj = api.content.create(folder, "Event", "event1", title="Some Event")
 91 | 
 92 |         login(self.portal, user.getId())
 93 |         obj.reindexObject()
 94 |         processQueue()
 95 |         self.assertIn(obj.UID(), processor.actions.index)
 96 | 
 97 | 
 98 | @parameterized_class(
 99 |     [
100 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
101 |         {"layer": ElasticSearch_REDIS_TESTING},
102 |     ]
103 | )
104 | class TestMoveReindex(BaseFunctionalTest):
105 |     def setUp(self):
106 |         super().setUp()
107 |         # Content on the Plone Site
108 |         site_documents = []
109 |         for idx in range(10):
110 |             content = api.content.create(
111 |                 self.portal, "Document", f"document-{idx}", title=f"Page {idx}"
112 |             )
113 |             site_documents.append((content.id, getUID(content)))
114 |         self.folder = api.content.create(
115 |             container=self.portal, type="Folder", id="folder"
116 |         )
117 |         folder_documents = []
118 |         for idx in range(10):
119 |             content = api.content.create(
120 |                 self.folder, "Event", f"event-{idx}", title=f"Event {idx}"
121 |             )
122 |             folder_documents.append((content.id, getUID(content)))
123 | 
124 |         self.site_docs = site_documents
125 |         self.folder_docs = folder_documents
126 |         self.commit(wait=1)
127 | 
128 |     def test_change_position_site(self):
129 |         processor = self.get_processor()
130 |         portal = self.portal
131 |         # Move last object to top
132 |         doc_id, doc_uuid = self.site_docs[-1]
133 |         portal.moveObjectsToTop(doc_id)
134 |         processQueue()
135 |         self.assertIn(doc_uuid, processor.actions.reindex)
136 |         # Only reindex getObjPositionInParent
137 |         idxs = list(processor.actions.reindex[doc_uuid].keys())
138 |         self.assertEqual(len(idxs), 1)
139 |         self.assertEqual(idxs[0], "getObjPositionInParent")
140 | 
141 |     def test_change_position_folder(self):
142 |         processor = self.get_processor()
143 |         folder = self.folder
144 |         # Move last object to top
145 |         doc_id, doc_uuid = self.folder_docs[-1]
146 |         folder.moveObjectsToTop(doc_id)
147 |         processQueue()
148 |         self.assertIn(doc_uuid, processor.actions.reindex)
149 |         # Only reindex getObjPositionInParent
150 |         idxs = list(processor.actions.reindex[doc_uuid].keys())
151 |         self.assertEqual(len(idxs), 1)
152 |         self.assertEqual(idxs[0], "getObjPositionInParent")
153 | 
154 | 
155 | @parameterized_class(
156 |     [
157 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
158 |         {"layer": ElasticSearch_REDIS_TESTING},
159 |     ]
160 | )
161 | class TestRemoveIndexFromCatalog(BaseFunctionalTest):
162 |     def setUp(self):
163 |         super().setUp()
164 |         # Create a content with the word fancy
165 |         self.document = api.content.create(
166 |             container=self.portal,
167 |             type="Document",
168 |             id="a-document",
169 |             title="A Fancy Title",
170 |         )
171 |         # Force indexing in ES
172 |         self.commit(wait=1)
173 |         # Now delete the index from the catalog
174 |         zcatalog = self.catalog._catalog
175 |         # Delete indexes that should be only in ES
176 |         idxs = getESOnlyIndexes()
177 |         for idx in idxs:
178 |             zcatalog.delIndex(idx)
179 |         self.commit()
180 | 
181 |     def test_reindex_object(self):
182 |         processor = self.get_processor()
183 |         document = self.document
184 |         document.title = "Common title"
185 |         document.reindexObject(idxs=["SearchableText", "Title"])
186 |         processQueue()
187 |         actions = processor.actions
188 |         uid = getUID(document)
189 |         self.assertIn(uid, actions.reindex)
190 |         self.assertIn("SearchableText", actions.reindex[uid])
191 | 
192 |         if self.layer == ElasticSearch_FUNCTIONAL_TESTING:
193 |             self.assertIn("Common", actions.reindex[uid]["SearchableText"])
194 |             self.assertIn("Title", actions.reindex[uid])
195 |             self.assertIn("Common", actions.reindex[uid]["Title"])
196 |         if self.layer == ElasticSearch_REDIS_TESTING:
197 |             # There is a slight change in the API for redis. We do no extract
198 |             # any data at this time.
199 |             self.assertIsNone(actions.reindex[uid]["Title"])
200 |             self.assertIsNone(actions.reindex[uid]["SearchableText"])
201 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_redis.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch import utils
  2 | from collective.elasticsearch.tests import BaseFunctionalTest
  3 | from collective.elasticsearch.tests import BaseRedisTest
  4 | from plone import api
  5 | from plone.app.testing import SITE_OWNER_NAME
  6 | from plone.app.testing import SITE_OWNER_PASSWORD
  7 | from plone.app.textfield import RichTextValue
  8 | from plone.dexterity.fti import DexterityFTIModificationDescription
  9 | from plone.dexterity.fti import ftiModified
 10 | from plone.namedfile.file import NamedBlobFile
 11 | from plone.namedfile.file import NamedBlobImage
 12 | from plone.restapi.testing import RelativeSession
 13 | from unittest import mock
 14 | from zope.lifecycleevent import ObjectModifiedEvent
 15 | 
 16 | import io
 17 | import json
 18 | import os
 19 | import transaction
 20 | 
 21 | 
 22 | ENV_FOR_REDIS = {
 23 |     "PLONE_REDIS_DSN": "redis://localhost:6379/0",
 24 |     "PLONE_BACKEND": "http://localhost",
 25 |     "PLONE_USERNAME": "admin",
 26 |     "PLONE_PASSWORD": "password",
 27 | }
 28 | 
 29 | 
 30 | class TestRedisUtils(BaseFunctionalTest):
 31 |     def test_redis_not_available_if_environ_vars_are_missing(self):
 32 | 
 33 |         self.assertFalse(
 34 |             utils.is_redis_available(), "Env vars are missing, this should be false"
 35 |         )
 36 | 
 37 |         with mock.patch.dict(os.environ, ENV_FOR_REDIS):
 38 |             self.assertTrue(
 39 |                 True,
 40 |                 "All env vars ar available, this should be true",
 41 |             )
 42 | 
 43 | 
 44 | class TestUseRedis(BaseRedisTest):
 45 |     def test_use_redis_if_configured(self):
 46 |         utils.get_settings().use_redis = False
 47 |         self.assertFalse(utils.use_redis(), "Using redis should be disabled")
 48 | 
 49 |         utils.get_settings().use_redis = True
 50 |         self.assertTrue(utils.use_redis(), "Using redis should be enabled")
 51 | 
 52 | 
 53 | class TestExtractRestApiEndpoint(BaseRedisTest):
 54 |     def setUp(self):
 55 |         super().setUp()
 56 |         self.portal_url = self.portal.absolute_url()
 57 |         self.endpoint = f"{self.portal_url}/@elasticsearch_extractdata"
 58 | 
 59 |         self.api_session = RelativeSession(self.portal_url)
 60 |         self.api_session.headers.update({"Accept": "application/json"})
 61 |         self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD)
 62 | 
 63 |         self.obj = api.content.create(
 64 |             self.portal,
 65 |             "Document",
 66 |             "page",
 67 |             title="New Content",
 68 |             text=RichTextValue("<p>abc</p>"),
 69 |         )
 70 |         transaction.commit()
 71 | 
 72 |     def tearDown(self):
 73 |         self.api_session.close()
 74 | 
 75 |     def test_extract_all_data_via_endpoint(self):
 76 |         params = {"uuid": self.obj.UID()}
 77 |         response = self.api_session.get(self.endpoint, params=params)
 78 |         self.assertEqual(response.status_code, 200)
 79 |         self.assertEqual(response.headers.get("Content-Type"), "application/json")
 80 |         self.assertEqual(self.endpoint, response.json()["@id"])
 81 |         content = response.json()["data"]
 82 |         processor = self.get_processor()
 83 | 
 84 |         self.maxDiff = None
 85 |         self.assertDictEqual(
 86 |             json.loads(json.dumps(processor.get_data_for_es(self.obj.UID()))), content
 87 |         )
 88 | 
 89 |     def test_extract_certain_attributes_via_endpoint(self):
 90 |         params = {
 91 |             "uuid": self.obj.UID(),
 92 |             "attributes:list": ["SearchableText", "Title", "id"],
 93 |         }
 94 |         response = self.api_session.get(self.endpoint, params=params)
 95 |         self.assertEqual(response.status_code, 200)
 96 |         self.assertEqual(response.headers.get("Content-Type"), "application/json")
 97 |         self.assertEqual(self.endpoint, response.json()["@id"])
 98 |         content = response.json()["data"]
 99 |         processor = self.get_processor()
100 | 
101 |         self.maxDiff = None
102 |         self.assertDictEqual(
103 |             json.loads(
104 |                 json.dumps(
105 |                     processor.get_data_for_es(
106 |                         self.obj.UID(),
107 |                         attributes=params["attributes:list"],
108 |                     )
109 |                 )
110 |             ),
111 |             content,
112 |         )
113 | 
114 |     def test_404_if_obj_not_found(self):
115 |         response = self.api_session.get(self.endpoint, params={"uuid": "dummy-uid"})
116 |         self.assertEqual(response.status_code, 404)
117 | 
118 |     def test_extract_endoint_respects_view_permission(self):
119 | 
120 |         api_session = RelativeSession(self.portal_url)
121 |         api_session.headers.update({"Accept": "application/json"})
122 | 
123 |         self.obj.manage_permission("View", roles=[])
124 |         transaction.commit()
125 | 
126 |         params = {"uuid": self.obj.UID()}
127 |         response = self.api_session.get(self.endpoint, params=params)
128 |         self.assertEqual(response.status_code, 401)
129 | 
130 | 
131 | class TestIndexBlobs(BaseRedisTest):
132 |     def setUp(self):
133 |         super().setUp()
134 | 
135 |     def _setup_sample_file(self):
136 |         file_path = os.path.join(os.path.dirname(__file__), "assets/test.pdf")
137 |         with io.FileIO(file_path, "rb") as pdf:
138 |             _file = api.content.create(
139 |                 container=api.portal.get(),
140 |                 type="File",
141 |                 id="test-file",
142 |                 file=NamedBlobFile(data=pdf.read(), filename="test.pdf"),
143 |             )
144 |         self.commit(wait=1)
145 |         return _file
146 | 
147 |     def _set_model_file(self, fti, path_to_xml):
148 |         fti.model_file = path_to_xml
149 |         ftiModified(
150 |             fti,
151 |             ObjectModifiedEvent(
152 |                 fti, DexterityFTIModificationDescription("model_file", "")
153 |             ),
154 |         )
155 | 
156 |     def test_index_data_from_file(self):
157 |         self._setup_sample_file()
158 |         query = {"SearchableText": "text"}
159 |         cat_results = self.catalog._old_searchResults(**query)
160 |         self.assertEqual(0, len(cat_results), "Expect no result")
161 |         es_results = self.catalog(**query)
162 |         self.assertEqual(1, len(es_results), "Expect 1 item")
163 | 
164 |     def test_update_and_delete_file(self):
165 |         file_ = self._setup_sample_file()
166 |         file_path = os.path.join(os.path.dirname(__file__), "assets/test2.docx")
167 |         with io.FileIO(file_path, "rb") as word:
168 |             file_.file = NamedBlobFile(data=word.read(), filename="test2.docx")
169 |             file_.reindexObject()
170 |         self.commit(wait=1)
171 | 
172 |         query = {"SearchableText": "Lorem"}
173 |         es_results = self.catalog(**query)
174 |         self.assertEqual(1, len(es_results), "Expect 1 item")
175 | 
176 |         self.portal.manage_delObjects(ids=[file_.getId()])
177 |         self.commit(wait=1)
178 | 
179 |         query = {"SearchableText": "lorem"}
180 |         es_results = self.catalog(**query)
181 |         self.assertEqual(0, len(es_results), "Expect no item")
182 | 
183 |     def test_make_sure_binary_data_are_removed_from_es(self):
184 |         file_ = self._setup_sample_file()
185 |         es_data = self.es.connection.get(self.es.index_name, file_.UID())
186 |         self.assertIsNone(es_data["_source"]["attachments"][0]["data"])
187 | 
188 |     def test_multiple_file_fields(self):
189 |         fti = self.portal.portal_types.File
190 |         self._set_model_file(fti, "collective.elasticsearch.tests:test_file_schema.xml")
191 |         file_path_1 = os.path.join(os.path.dirname(__file__), "assets/test.pdf")
192 |         file_path_2 = os.path.join(os.path.dirname(__file__), "assets/test2.docx")
193 |         with io.FileIO(file_path_1, "rb") as pdf, io.FileIO(file_path_2, "rb") as word:
194 |             file_ = api.content.create(
195 |                 container=api.portal.get(),
196 |                 type="File",
197 |                 id="test-file-multiple-file-fields",
198 |                 file=NamedBlobFile(data=pdf.read(), filename="test.pdf"),
199 |                 file2=NamedBlobFile(data=word.read(), filename="test2.docx"),
200 |             )
201 |         self.commit(wait=1)
202 | 
203 |         query = {"SearchableText": "lorem"}
204 |         es_results = self.catalog(**query)
205 |         self.assertEqual(1, len(es_results), "Expect 1 item")
206 | 
207 |         query = {"SearchableText": "text"}
208 |         es_results = self.catalog(**query)
209 |         self.assertEqual(1, len(es_results), "Expect 1 item")
210 | 
211 |         es_data = self.es.connection.get(self.es.index_name, file_.UID())
212 |         self.assertIsNone(es_data["_source"]["attachments"][0]["data"])
213 |         self.assertIsNone(es_data["_source"]["attachments"][1]["data"])
214 | 
215 |         file_.file2 = None
216 |         file_.reindexObject()
217 |         self.commit(wait=1)
218 | 
219 |         query = {"SearchableText": "lorem"}
220 |         es_results = self.catalog(**query)
221 |         self.assertEqual(0, len(es_results), "Expect 0 item")
222 | 
223 |         self._set_model_file(fti, "plone.app.contenttypes.schema:file.xml")
224 | 
225 |     def test_dont_queue_blob_extraction_jobs_if_not_possible(self):
226 |         settings = {"index": {"default_pipeline": None}}
227 |         self.es.connection.indices.put_settings(body=settings, index=self.es.index_name)
228 |         self.es.connection.ingest.delete_pipeline("cbor-attachments")
229 |         file_path = os.path.join(os.path.dirname(__file__), "assets/test2.docx")
230 |         with io.FileIO(file_path, "rb") as pdf:
231 |             self._file = api.content.create(
232 |                 container=api.portal.get(),
233 |                 type="File",
234 |                 id="test-file2",
235 |                 file=NamedBlobFile(data=pdf.read(), filename="test2.docx"),
236 |             )
237 |         self.commit(wait=1)
238 | 
239 |         query = {"SearchableText": "lorem"}
240 |         es_results = self.catalog(**query)
241 |         self.assertEqual(0, len(es_results), "Expect 0 item")
242 | 
243 |     def test_do_not_index_data_from_images(self):
244 |         file_path = os.path.join(os.path.dirname(__file__), "assets/image.png")
245 |         with io.FileIO(file_path, "rb") as image:
246 |             _image = api.content.create(
247 |                 container=api.portal.get(),
248 |                 type="Image",
249 |                 id="test-file",
250 |                 image=NamedBlobImage(data=image.read(), filename="image.png"),
251 |             )
252 |         self.commit(wait=1)
253 | 
254 |         es_data = self.es.connection.get(self.es.index_name, _image.UID())
255 |         self.assertNotIn(
256 |             "attachments",
257 |             es_data["_source"],
258 |             "Expect not attachments on es data for a image",
259 |         )
260 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_search.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch.testing import ElasticSearch_FUNCTIONAL_TESTING
  2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING
  3 | from collective.elasticsearch.tests import BaseFunctionalTest
  4 | from collective.elasticsearch.utils import get_settings
  5 | from collective.elasticsearch.utils import getESOnlyIndexes
  6 | from DateTime import DateTime
  7 | from parameterized import parameterized
  8 | from parameterized import parameterized_class
  9 | from plone import api
 10 | from Products.ZCatalog.interfaces import ICatalogBrain
 11 | 
 12 | 
 13 | EVENT_KLASS = "plone.app.event.dx.interfaces.IDXEvent"
 14 | DOCUMENT_KLASS = "plone.app.contenttypes.interfaces.IDocument"
 15 | 
 16 | 
 17 | @parameterized_class(
 18 |     [
 19 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
 20 |         {"layer": ElasticSearch_REDIS_TESTING},
 21 |     ]
 22 | )
 23 | class TestSearch(BaseFunctionalTest):
 24 | 
 25 |     event_klass = EVENT_KLASS
 26 |     document_klass = DOCUMENT_KLASS
 27 | 
 28 |     def test_field_index_query(self):
 29 |         api.content.create(self.portal, "Event", "event", title="Some Event")
 30 |         self.commit(wait=1)
 31 |         query = {"portal_type": "Event", "Title:": "some event"}
 32 |         self.assertEqual(self.total_results(query), 1)
 33 | 
 34 |     def test_keyword_index_query(self):
 35 |         api.content.create(self.portal, "Event", "event", title="Some Event")
 36 |         self.commit(wait=1)
 37 |         query = {"object_provides": [self.event_klass], "SearchableText": "Event"}
 38 |         self.assertEqual(self.total_results(query), 1)
 39 | 
 40 |     def test_multi_keyword_index_query(self):
 41 |         api.content.create(self.portal, "Event", "event", title="New Content")
 42 |         api.content.create(self.portal, "Document", "page", title="New Content")
 43 |         self.commit(wait=1)
 44 |         query = {
 45 |             "object_provides": [self.event_klass, self.document_klass],
 46 |             "SearchableText": "new content",
 47 |         }
 48 |         self.assertEqual(self.total_results(query), 2)
 49 | 
 50 |     def test_date_index_query(self):
 51 |         start = DateTime()
 52 |         events = []
 53 |         for idx in range(5):
 54 |             event = api.content.create(
 55 |                 self.portal,
 56 |                 "Event",
 57 |                 f"event{idx}",
 58 |                 title=f"Some Event {idx}",
 59 |                 effective=DateTime("2015/09/25 20:00"),
 60 |             )
 61 |             events.append(event)
 62 |         self.commit(wait=1)
 63 |         end = DateTime()
 64 |         query = {
 65 |             "created": {
 66 |                 "query": (start, end),
 67 |                 "range": "minmax",
 68 |             },
 69 |             "portal_type": "Event",
 70 |         }
 71 |         cat_results = self.catalog._old_searchResults(**query)
 72 |         self.assertEqual(len(cat_results), self.total_results(query))
 73 |         self.assertEqual(len(cat_results), len(events))
 74 | 
 75 |         query = {
 76 |             "effective": {"query": DateTime().latestTime(), "range": "min"},
 77 |             "portal_type": "Event",
 78 |         }
 79 |         cat_results = self.catalog._old_searchResults(**query)
 80 |         self.assertEqual(len(cat_results), self.total_results(query))
 81 |         self.assertEqual(len(cat_results), 0)
 82 | 
 83 |         query = {
 84 |             "effective": {"query": DateTime().latestTime(), "range": "max"},
 85 |             "portal_type": "Event",
 86 |         }
 87 |         cat_results = self.catalog._old_searchResults(**query)
 88 |         self.assertEqual(len(cat_results), self.total_results(query))
 89 |         self.assertEqual(len(cat_results), 5)
 90 | 
 91 |     def test_text_index_query(self):
 92 |         for idx in range(5):
 93 |             api.content.create(
 94 |                 self.portal, "Document", f"page{idx}", title=f"Page {idx}"
 95 |             )
 96 |             # should not show up in results
 97 |         events = []
 98 |         for idx in range(5):
 99 |             event = api.content.create(
100 |                 self.portal, "Event", f"event{idx}", title=f"Some Event {idx}"
101 |             )
102 |             events.append(event)
103 | 
104 |         self.commit(wait=1)
105 | 
106 |         query = {"Title": "Some Event"}
107 |         self.assertEqual(self.total_results(query), len(events))
108 | 
109 |         query = {"Title": "Some Event 1", "sort_on": "getObjPositionInParent"}
110 |         el_results = self.search(query)
111 |         self.assertTrue("Some Event 1" in [b.Title for b in el_results])
112 |         self.assertEqual(el_results[0].Title, "Some Event 1")
113 | 
114 |     def test_path_index_query(self):
115 |         folder1 = api.content.create(
116 |             self.portal, "Folder", "folder0", title="New Content 0"
117 |         )
118 |         for idx in range(1, 4):
119 |             api.content.create(
120 |                 folder1, "Document", f"page{idx}", title=f"New Content {idx}"
121 |             )
122 |         folder2 = api.content.create(
123 |             folder1, "Folder", "folder4", title="New Content 4"
124 |         )
125 |         folder3 = api.content.create(
126 |             folder2, "Folder", "folder5", title="New Content 5"
127 |         )
128 |         for idx in range(6, 9):
129 |             api.content.create(
130 |                 folder3, "Document", f"page{idx}", title=f"New Content {idx}"
131 |             )
132 | 
133 |         self.commit(wait=1)
134 |         query = {
135 |             "path": {"depth": 0, "query": "/plone/folder0"},
136 |             "SearchableText": "new content",
137 |         }
138 |         self.assertEqual(self.total_results(query), 1)
139 |         query = {
140 |             "path": {"depth": 1, "query": "/plone/folder0"},
141 |             "SearchableText": "new content",
142 |         }
143 |         self.assertEqual(self.total_results(query), 4)
144 |         query = {
145 |             "path": {"depth": -1, "query": "/plone/folder0"},
146 |             "SearchableText": "new content",
147 |         }
148 |         self.assertEqual(self.total_results(query), 9)
149 |         query = {
150 |             "path": {"depth": 1, "query": "/plone"},
151 |             "SearchableText": "new content",
152 |         }
153 |         self.assertEqual(self.total_results(query), 1)
154 |         # this proves its wrong
155 |         query = {
156 |             "path": {"query": "/plone/folder0", "navtree_start": 0, "navtree": 1},
157 |             "is_default_page": False,
158 |             "SearchableText": "new content",
159 |         }
160 |         self.assertEqual(self.total_results(query), 9)
161 | 
162 |     def test_combined_query(self):
163 |         api.content.create(self.portal, "Folder", "folder1", title="Folder 1")
164 |         self.commit(wait=1)
165 |         query = {
166 |             "path": {"depth": 1, "query": "/plone"},
167 |             "portal_type": "Folder",
168 |             "is_default_page": False,
169 |             "SearchableText": "folder",
170 |         }
171 |         self.assertEqual(self.total_results(query), 1)
172 | 
173 |     def test_highlight_query(self):
174 |         settings = get_settings()
175 |         settings.highlight = True
176 |         settings.highlight_pre_tags = "<em>"
177 |         settings.highlight_post_tags = "</em>"
178 |         api.content.create(self.portal, "Document", "page", title="Some Page")
179 |         self.commit(wait=1)
180 |         query = {"SearchableText": "some"}
181 |         results = self.search(query)
182 |         self.assertEqual(len(results), 1)
183 |         self.assertEqual(results[0].Description, "page <em>Some</em> Page")
184 | 
185 |     def test_not_query(self):
186 |         api.content.create(self.portal, "Document", "page", title="New Content")
187 |         api.content.create(self.portal, "Event", "event", title="New Event")
188 |         self.commit(wait=1)
189 |         query = {
190 |             "portal_type": {"not": ["Event", "News Item"]},
191 |             "SearchableText": "New",
192 |         }
193 |         self.assertEqual(self.total_results(query), 1)
194 | 
195 | 
196 | @parameterized_class(
197 |     [
198 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
199 |         {"layer": ElasticSearch_REDIS_TESTING},
200 |     ]
201 | )
202 | class TestBrains(BaseFunctionalTest):
203 |     def setUp(self):
204 |         super().setUp()
205 |         self.event = api.content.create(
206 |             self.portal, "Event", "event", title="Some Event"
207 |         )
208 |         self.commit(wait=1)
209 | 
210 |     def test_one_result_index_0(self):
211 |         el_results = self.search({"portal_type": "Event", "Title": "Some Event"})
212 |         self.assertEqual(len(el_results), 1)
213 |         brain = el_results[0]
214 |         self.assertEqual(brain.getObject(), self.event)
215 |         self.assertEqual(brain.portal_type, "Event")
216 |         self.assertEqual(brain.getURL(), self.event.absolute_url())
217 |         self.assertEqual(brain.getPath(), "/plone/event")
218 | 
219 |     def test_one_result_index_last(self):
220 |         el_results = self.search({"portal_type": "Event", "Title": "Some Event"})
221 |         self.assertEqual(len(el_results), 1)
222 |         brain = el_results[-1]
223 |         self.assertEqual(brain.getObject(), self.event)
224 |         self.assertEqual(brain.portal_type, "Event")
225 |         self.assertEqual(brain.getURL(), self.event.absolute_url())
226 |         self.assertEqual(brain.getPath(), "/plone/event")
227 | 
228 |     def test_two_results(self):
229 |         api.content.create(self.portal, "Event", "event2", title="Some Event")
230 |         self.commit(wait=1)
231 | 
232 |         el_results = self.search(
233 |             {
234 |                 "portal_type": "Event",
235 |                 "Title": "Some Event",
236 |                 "sort_on": "getId",
237 |                 "sort_order": "descending",
238 |             }
239 |         )
240 |         self.assertEqual(len(el_results), 2)
241 |         brain = el_results[0]
242 |         self.assertEqual(brain.getId, "event2")
243 |         brain = el_results[1]
244 |         self.assertEqual(brain.getId, "event")
245 | 
246 |         brain = el_results[-1]
247 |         self.assertEqual(brain.getId, "event")
248 |         brain = el_results[-2]
249 |         self.assertEqual(brain.getId, "event2")
250 | 
251 | 
252 | @parameterized_class(
253 |     [
254 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
255 |         {"layer": ElasticSearch_REDIS_TESTING},
256 |     ]
257 | )
258 | class TestBrainsIndexing(BaseFunctionalTest):
259 |     def setUp(self):
260 |         super().setUp()
261 |         for idx in range(120):
262 |             api.content.create(
263 |                 self.portal, "Document", f"{idx:04d}page", title=f"Page {idx}"
264 |             )
265 |         self.commit(wait=1)
266 |         self.el_results = self.search(
267 |             {
268 |                 "portal_type": "Document",
269 |                 "sort_on": "getId",
270 |                 "sort_order": "asc",
271 |             }
272 |         )
273 | 
274 |     def test_all_indexed(self):
275 |         self.assertEqual(len(self.el_results), 120)
276 | 
277 |     @parameterized.expand(
278 |         [
279 |             (0, "0000page"),
280 |             (-1, "0119page"),
281 |             (-50, "0070page"),
282 |             (-55, "0065page"),
283 |             (-100, "0020page"),
284 |         ]
285 |     )
286 |     def test_ordering(self, result_idx, expected):
287 |         self.assertEqual(self.el_results[result_idx].getId, expected)
288 | 
289 | 
290 | @parameterized_class(
291 |     [
292 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
293 |         {"layer": ElasticSearch_REDIS_TESTING},
294 |     ]
295 | )
296 | class TestCatalogRecordDeleted(BaseFunctionalTest):
297 |     def setUp(self):
298 |         super().setUp()
299 |         zcatalog = self.catalog._catalog
300 |         self.event = api.content.create(
301 |             self.portal, "Event", "event-test", title="Gone Event"
302 |         )
303 |         self.commit(wait=1)
304 |         path = "/".join(self.event.getPhysicalPath())
305 |         zcatalog.uncatalogObject(path)
306 |         self.commit()
307 | 
308 |     def test_search_results(self):
309 |         el_results = self.search({"portal_type": "Event", "Title": "Gone Event"})
310 |         self.assertEqual(len(el_results), 1)
311 |         brain = el_results[0]
312 |         self.assertTrue(ICatalogBrain.providedBy(brain))
313 |         self.assertEqual(brain.getRID(), -1)
314 |         # Test data from elastic will populate the brain
315 |         self.assertEqual(brain.portal_type, "Event")
316 |         self.assertEqual(brain.Title, "Gone Event")
317 |         # Test
318 |         self.assertEqual(brain.getPath(), "/plone/event-test")
319 |         self.assertEqual(brain.getURL(), self.event.absolute_url())
320 |         self.assertEqual(brain.getObject(), self.event)
321 | 
322 | 
323 | @parameterized_class(
324 |     [
325 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
326 |         {"layer": ElasticSearch_REDIS_TESTING},
327 |     ]
328 | )
329 | class TestDeleteObjectNotReflectedOnES(BaseFunctionalTest):
330 |     def setUp(self):
331 |         super().setUp()
332 |         zcatalog = self.catalog._catalog
333 |         self.event = api.content.create(
334 |             self.portal, "Event", "event-test", title="Gone Event"
335 |         )
336 |         self.commit(wait=1)
337 |         path = "/".join(self.event.getPhysicalPath())
338 |         zcatalog.uncatalogObject(path)
339 |         self.portal._delObject("event-test", suppress_events=True)
340 |         self.commit()
341 | 
342 |     def test_search_results(self):
343 |         el_results = self.search({"portal_type": "Event", "Title": "Gone Event"})
344 |         self.assertEqual(len(el_results), 1)
345 |         brain = el_results[0]
346 |         self.assertTrue(ICatalogBrain.providedBy(brain))
347 |         self.assertEqual(brain.getRID(), -1)
348 |         # Test data from elastic will populate the brain
349 |         self.assertEqual(brain.portal_type, "Event")
350 |         self.assertEqual(brain.Title, "Gone Event")
351 |         # Test
352 |         self.assertEqual(brain.getPath(), "/plone/event-test")
353 |         self.assertEqual(brain.getURL(), self.event.absolute_url())
354 |         with self.assertRaises(KeyError):
355 |             brain.getObject()
356 | 
357 | 
358 | @parameterized_class(
359 |     [
360 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
361 |         {"layer": ElasticSearch_REDIS_TESTING},
362 |     ]
363 | )
364 | class TestUncatalogRemoveOnES(BaseFunctionalTest):
365 |     def setUp(self):
366 |         super().setUp()
367 |         self.event = api.content.create(
368 |             self.portal, "Event", "event-test", title="Gone Event"
369 |         )
370 |         self.commit(wait=1)
371 |         path = "/".join(self.event.getPhysicalPath())
372 |         catalog = self.catalog
373 |         catalog.uncatalog_object(path)
374 |         self.commit(wait=1)
375 | 
376 |     def test_search_results(self):
377 |         el_results = self.search({"portal_type": "Event", "Title": "Gone Event"})
378 |         self.assertEqual(len(el_results), 0)
379 | 
380 | 
381 | @parameterized_class(
382 |     [
383 |         {"layer": ElasticSearch_FUNCTIONAL_TESTING},
384 |         {"layer": ElasticSearch_REDIS_TESTING},
385 |     ]
386 | )
387 | class TestSearchOnRemovedIndex(BaseFunctionalTest):
388 |     def setUp(self):
389 |         super().setUp()
390 |         # Create a content with the word fancy
391 |         self.document = api.content.create(
392 |             container=self.portal,
393 |             type="Document",
394 |             id="a-document",
395 |             title="A Fancy Title",
396 |         )
397 |         # Force indexing in ES
398 |         self.commit(wait=1)
399 |         # Now delete the index from the catalog
400 |         zcatalog = self.catalog._catalog
401 |         # Delete indexes that should be only in ES
402 |         idxs = getESOnlyIndexes()
403 |         for idx in idxs:
404 |             zcatalog.delIndex(idx)
405 |         self.commit()
406 | 
407 |     def test_search_results(self):
408 |         el_results = self.search({"portal_type": "Document", "SearchableText": "Fancy"})
409 |         self.assertEqual(len(el_results), 1)
410 |         self.assertEqual(el_results[0].getId, self.document.id)
411 | 
412 |     def test_search_results_after_reindex(self):
413 |         # Update title
414 |         document = self.document
415 |         document.title = "Common title"
416 |         document.reindexObject(idxs=["SearchableText", "Title"])
417 |         self.commit(wait=1)
418 |         # Search for the old title
419 |         el_results = self.search({"portal_type": "Document", "SearchableText": "Fancy"})
420 |         self.assertEqual(len(el_results), 0)
421 |         # Search for the new title
422 |         el_results = self.search(
423 |             {"portal_type": "Document", "SearchableText": "Common"}
424 |         )
425 |         self.assertEqual(len(el_results), 1)
426 |         self.assertEqual(el_results[0].getId, self.document.id)
427 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/tests/test_services.py:
--------------------------------------------------------------------------------
 1 | from collective.elasticsearch.testing import ElasticSearch_API_TESTING
 2 | from collective.elasticsearch.testing import ElasticSearch_REDIS_TESTING
 3 | from collective.elasticsearch.tests import BaseAPITest
 4 | from parameterized import parameterized_class
 5 | from plone.app.testing import SITE_OWNER_NAME
 6 | from plone.app.testing import SITE_OWNER_PASSWORD
 7 | from plone.restapi.testing import RelativeSession
 8 | 
 9 | 
10 | @parameterized_class(
11 |     [{"layer": ElasticSearch_API_TESTING}, {"layer": ElasticSearch_REDIS_TESTING}]
12 | )
13 | class TestService(BaseAPITest):
14 |     def setUp(self):
15 |         super().setUp()
16 |         self.portal = self.layer["portal"]
17 |         self.portal_url = self.portal.absolute_url()
18 |         self.request = self.portal.REQUEST
19 |         self.api_session = RelativeSession(self.portal_url)
20 |         self.api_session.headers.update({"Accept": "application/json"})
21 |         self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD)
22 | 
23 |     def tearDown(self):
24 |         self.api_session.close()
25 | 
26 |     def test_get(self):
27 |         response = self.api_session.get("/@elasticsearch")
28 | 
29 |         self.assertEqual(response.status_code, 200)
30 |         self.assertEqual(response.headers.get("Content-Type"), "application/json")
31 | 
32 |         results = response.json()
33 |         self.assertEqual(results["@id"], f"{self.portal.absolute_url()}/@elasticsearch")
34 |         self.assertIn("Cluster Name", results.keys())
35 |         self.assertIn("Elastic Search Version", results.keys())
36 |         self.assertIn("Number of docs (Catalog)", results.keys())
37 |         self.assertIn("Index Name", results.keys())
38 |         self.assertIn("Number of docs", results.keys())
39 |         self.assertIn("Deleted docs", results.keys())
40 |         self.assertIn("Size", results.keys())
41 |         self.assertIn("Query Count", results.keys())
42 | 
43 |     def test_post_convert(self):
44 |         response = self.api_session.post("/@elasticsearch", json={"action": "convert"})
45 | 
46 |         self.assertEqual(response.status_code, 204)
47 | 
48 |     def test_post_rebuild(self):
49 |         response = self.api_session.post("/@elasticsearch", json={"action": "rebuild"})
50 | 
51 |         self.assertEqual(response.status_code, 204)
52 | 
53 |     def test_post_invalid(self):
54 |         response = self.api_session.post(
55 |             "/@elasticsearch", json={"action": "bad_action"}
56 |         )
57 | 
58 |         self.assertEqual(response.status_code, 400)
59 | 
60 |     def test_control_panel_registered(self):
61 |         response = self.api_session.get("/@controlpanels")
62 |         data = response.json()
63 |         titles = [panel["title"] for panel in data]
64 |         self.assertIn("Elastic search", titles)
65 | 
66 |     def test_control_panel_schema(self):
67 |         response = self.api_session.get("/@controlpanels/elasticsearch")
68 |         data = response.json()
69 |         self.assertEqual(data["title"], "Elastic search")
70 |         self.assertEqual(data["group"], "Add-on Configuration")
71 |         self.assertTrue(data["data"]["enabled"])
72 |         self.assertTrue("enabled", data["schema"]["fieldsets"][0]["fields"])
73 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/upgrades.py:
--------------------------------------------------------------------------------
 1 | from Products.CMFCore.utils import getToolByName
 2 | 
 3 | 
 4 | def update_registry(context):
 5 |     portal_setup = getToolByName(context, "portal_setup")
 6 |     portal_setup.runImportStepFromProfile(
 7 |         "profile-collective.elasticsearch:default",
 8 |         "plone.app.registry",
 9 |         run_dependencies=False,
10 |     )
11 | 


--------------------------------------------------------------------------------
/src/collective/elasticsearch/utils.py:
--------------------------------------------------------------------------------
  1 | from collective.elasticsearch import logger
  2 | from collective.elasticsearch.interfaces import IElasticSettings
  3 | from plone.registry.interfaces import IRegistry
  4 | from plone.uuid.interfaces import IUUID
  5 | from Products.ZCatalog import ZCatalog
  6 | from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain
  7 | from typing import List
  8 | from zope.component import getUtility
  9 | 
 10 | import math
 11 | import os
 12 | import pkg_resources
 13 | 
 14 | 
 15 | HAS_REDIS_MODULE = False
 16 | try:
 17 |     pkg_resources.get_distribution("redis")
 18 |     HAS_REDIS_MODULE = True
 19 | except pkg_resources.DistributionNotFound:
 20 |     HAS_REDIS_MODULE = False
 21 | 
 22 | 
 23 | def getUID(obj):
 24 |     value = IUUID(obj, None)
 25 |     if not value and hasattr(obj, "UID"):
 26 |         value = obj.UID()
 27 |     return value
 28 | 
 29 | 
 30 | def get_brain_from_path(zcatalog: ZCatalog, path: str) -> AbstractCatalogBrain:
 31 |     rid = zcatalog.uids.get(path)
 32 |     if isinstance(rid, int):
 33 |         try:
 34 |             return zcatalog[rid]
 35 |         except KeyError:
 36 |             logger.error(f"Couldn't get catalog entry for path: {path}")
 37 |     else:
 38 |         logger.error(f"Got a key for path that is not integer: {path}")
 39 |     return None
 40 | 
 41 | 
 42 | def get_settings():
 43 |     """Return IElasticSettings values."""
 44 |     try:
 45 |         registry = getUtility(IRegistry)
 46 |         settings = registry.forInterface(IElasticSettings, check=False)
 47 |     except Exception:  # noQA
 48 |         settings = None
 49 |     return settings
 50 | 
 51 | 
 52 | def get_connection_settings():
 53 |     settings = get_settings()
 54 |     return settings.hosts, {
 55 |         "retry_on_timeout": settings.retry_on_timeout,
 56 |         "sniff_on_connection_fail": settings.sniff_on_connection_fail,
 57 |         "sniff_on_start": settings.sniff_on_start,
 58 |         "sniffer_timeout": settings.sniffer_timeout,
 59 |         "timeout": settings.timeout,
 60 |     }
 61 | 
 62 | 
 63 | def getESOnlyIndexes():
 64 |     settings = get_settings()
 65 |     try:
 66 |         indexes = settings.es_only_indexes
 67 |         return set(indexes) if indexes else set()
 68 |     except (KeyError, AttributeError):
 69 |         return {"Title", "Description", "SearchableText"}
 70 | 
 71 | 
 72 | def batches(data: list, size: int) -> List[List]:
 73 |     """Create a batch of lists from a base list."""
 74 |     return [data[i : i + size] for i in range(0, len(data), size)]  # noQA
 75 | 
 76 | 
 77 | def format_size_mb(value: int) -> str:
 78 |     """Format a size, in bytes, to mb."""
 79 |     value = value / 1024.0 / 1024.0
 80 |     return f"{int(math.ceil(value))} MB"
 81 | 
 82 | 
 83 | def is_redis_available():
 84 |     """Determens if redis could be available"""
 85 |     env_variables = [
 86 |         HAS_REDIS_MODULE,
 87 |         os.environ.get("PLONE_REDIS_DSN", None),
 88 |         os.environ.get("PLONE_USERNAME", None),
 89 |         os.environ.get("PLONE_PASSWORD", None),
 90 |         os.environ.get("PLONE_BACKEND", None),
 91 |     ]
 92 |     return all(env_variables)
 93 | 
 94 | 
 95 | def use_redis():
 96 |     """
 97 |     Determens if redis queueing should be used or not.
 98 |     """
 99 |     return is_redis_available() and get_settings().use_redis
100 | 


--------------------------------------------------------------------------------