├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE.md ├── PULL_REQUEST_TEMPLATE.md ├── README.md ├── UPGRADE.md ├── composer.json ├── docs ├── 00_Configuration_Values.md ├── 20_Categories.md ├── 29_Custom_Request_Header.md ├── 30_Restrictions.md ├── 40_Meta.md ├── 50_Crawler_Events.md ├── 60_Document_Modification.md └── 90_Frontend_Implementation.md └── src └── LuceneSearchBundle ├── Command ├── CrawlCommand.php └── DocumentModifierCommand.php ├── Configuration ├── Categories │ └── CategoriesInterface.php └── Configuration.php ├── Controller ├── Admin │ └── SettingsController.php ├── AutoCompleteController.php ├── FrontendController.php └── ListController.php ├── DependencyInjection ├── Compiler │ ├── CategoriesPass.php │ └── TaskPass.php ├── Configuration.php └── LuceneSearchExtension.php ├── Doctrine └── DBAL │ └── ConnectionKeepAlive.php ├── Event ├── AssetResourceRestrictionEvent.php ├── CrawlerRequestHeaderEvent.php ├── DocumentModificationEvent.php ├── HtmlParserEvent.php ├── PdfParserEvent.php └── RestrictionContextEvent.php ├── EventListener ├── DocumentMetaDataListener.php ├── MaintenanceListener.php ├── MaintenanceQueueListener.php └── MaintenanceRunCrawlerListener.php ├── Helper ├── HighlighterHelper.php ├── LuceneHelper.php └── StringHelper.php ├── Logger ├── AbstractLogger.php ├── ConsoleLogger.php └── Logger.php ├── LuceneSearchBundle.php ├── LuceneSearchEvents.php ├── Modifier ├── DocumentModifier.php └── QueuedDocumentModifier.php ├── Organizer ├── Dispatcher │ └── HandlerDispatcher.php └── Handler │ ├── AbstractHandler.php │ ├── StateHandler.php │ └── StoreHandler.php ├── Resources ├── config │ ├── pimcore │ │ ├── config.yml │ │ └── routing.yml │ ├── services.yml │ └── services │ │ ├── commands.yml │ │ ├── controller.yml │ │ ├── event.yml │ │ ├── helper.yml │ │ ├── modifier.yml │ │ ├── organizer.yml │ │ ├── system.yml │ │ ├── tasks.yml │ │ └── twig.yml ├── install │ └── config.yml ├── public │ ├── css │ │ └── admin.css │ ├── img │ │ ├── ajax-loader.gif │ │ ├── lucene.png │ │ ├── lucene_white.png │ │ ├── plugin.png │ │ ├── search-logo.png │ │ └── services.svg │ └── js │ │ └── backend │ │ ├── settings.js │ │ └── startup.js ├── translations │ └── admin.en.yml └── views │ └── List │ ├── Partial │ ├── Pagination │ │ ├── default.html.twig │ │ └── single.html.twig │ └── suggestions.html.twig │ ├── error.html.twig │ └── result.html.twig ├── Task ├── AbstractTask.php ├── Crawler │ ├── CrawlerTask.php │ ├── Event │ │ ├── Logger.php │ │ └── Statistics.php │ ├── Filter │ │ ├── Discovery │ │ │ ├── NegativeUriFilter.php │ │ │ └── UriFilter.php │ │ ├── FilterPersistor.php │ │ ├── LogDispatcher.php │ │ └── PostFetch │ │ │ ├── MaxContentSizeFilter.php │ │ │ └── MimeTypeFilter.php │ ├── Listener │ │ └── Abort.php │ └── PersistenceHandler │ │ └── FileSerializedResourcePersistenceHandler.php ├── Parser │ └── ParserTask.php ├── System │ ├── ShutDownTask.php │ └── StartUpTask.php ├── TaskInterface.php └── TaskManager.php ├── Tool ├── CrawlerState.php └── Install.php └── Twig └── Extension ├── CategoriesExtension.php ├── CrawlerExtension.php └── PaginationExtension.php /.gitignore: -------------------------------------------------------------------------------- 1 | ###################### 2 | # Compiled source # 3 | ###################### 4 | *.com 5 | *.class 6 | *.dll 7 | *.exe 8 | *.o 9 | *.so 10 | 11 | ###################### 12 | # Packages # 13 | ###################### 14 | # it's better to unpack these files and commit the raw source 15 | # git has its own built in compression methods 16 | *.7z 17 | *.dmg 18 | *.gz 19 | *.iso 20 | *.jar 21 | 22 | ###################### 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | 27 | ###################### 28 | # Global # 29 | ###################### 30 | .DS_Store 31 | .DS_Store\? 32 | ._* 33 | .Spotlight-V100 34 | .Trashes 35 | Icon\? 36 | *.sublime-workspace 37 | *.sublime-project 38 | atlassian-ide-plugin.xml 39 | .idea/ 40 | .project 41 | ehthumbs.db 42 | Thumbs.db 43 | Vagrantfile 44 | .vagrant 45 | php-cgi.core 46 | .sass-cache 47 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 5 | 6 | ## Our Standards 7 | Examples of behavior that contributes to creating a positive environment include: 8 | 9 | * Using welcoming and inclusive language 10 | * Being respectful of differing viewpoints and experiences 11 | * Gracefully accepting constructive criticism 12 | * Focusing on what is best for the community 13 | * Showing empathy towards other community members 14 | 15 | Examples of unacceptable behavior by participants include: 16 | 17 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 18 | * Trolling, insulting/derogatory comments, and personal or political attacks 19 | * Public or private harassment 20 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 21 | * Other conduct which could reasonably be considered inappropriate in a professional setting 22 | 23 | ## Our Responsibilities 24 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 25 | 26 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 27 | 28 | ## Scope 29 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 30 | 31 | ## Enforcement 32 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at support@dachcom.ch. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 33 | 34 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 35 | 36 | ## Attribution 37 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 38 | 39 | [homepage]: http://contributor-covenant.org 40 | [version]: http://contributor-covenant.org/version/1/4/ 41 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to a Members 2 | 3 | ## Bug Reports & Feature Requests 4 | The Members team heavily uses (and loves!) GitHub for all of our software management. 5 | We use GitHub issues exclusively to track all bugs and features. 6 | 7 | * [Open an issue](https://github.com/dachcom-digital/pimcore-members/issues) here on GitHub. 8 | If you can, **please provide a fix and create a pull request (PR) instead**; this will automatically create an issue for you. 9 | * Report security issues **only** to support@dachcom.ch 10 | * Please be patient as not all items will be tested immediately - remember, Members is open source and free of charge. 11 | * Occasionally we'll close issues if they appear stale or are too vague - please don't take this personally! 12 | Please feel free to re-open issues we've closed if there's something we've missed and they still need to be addressed. 13 | 14 | ## Contributing Pull Requests 15 | PR's are even better than issues. 16 | We gladly accept community pull requests. 17 | There are a few necessary steps before we can accept a pull request: 18 | 19 | * [Open an issue](https://github.com/dachcom-digital/pimcore-members/issues) describing the problem that you are looking to solve in 20 | your PR (if one is not already open), and your approach to solving it (no necessary for bug fixes - only feature contributions). 21 | * [Fork us!](https://help.github.com/articles/fork-a-repo/) Code! Follow the coding standards PSR-1, PSR-2 and PSR-4. 22 | * [Send a pull request](https://help.github.com/articles/using-pull-requests/) from your fork’s branch to our `master` branch. 23 | 24 | ### Contributor License Agreement 25 | The following terms are used throughout this agreement: 26 | 27 | * **You** - the person or legal entity including its affiliates asked to accept this agreement. An affiliate is any 28 | entity that controls or is controlled by the legal entity, or is under common control with it. 29 | 30 | * **Project** - is an umbrella term that refers to any and all Members projects. 31 | 32 | * **Contribution** - any type of work that is submitted to a Project, including any modifications or additions to 33 | existing work. 34 | 35 | * **Submitted** - conveyed to a Project via a pull request, commit, issue, or any form of electronic, written, or 36 | verbal communication with Members, contributors or maintainers. 37 | 38 | #### 1. Grant of Copyright License. 39 | Subject to the terms and conditions of this agreement, You grant to the Projects’ maintainers, contributors, users and 40 | to Members a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, 41 | prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your contributions and such 42 | derivative works. Except for this license, You reserve all rights, title, and interest in your contributions. 43 | 44 | #### 2. Grant of Patent License. 45 | Subject to the terms and conditions of this agreement, You grant to the Projects’ maintainers, contributors, users and 46 | to dachcom-digital/members a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) 47 | patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer your contributions, where 48 | such license applies only to those patent claims licensable by you that are necessarily infringed by your contribution 49 | or by combination of your contribution with the project to which this contribution was submitted. 50 | 51 | If any entity institutes patent litigation - including cross-claim or counterclaim in a lawsuit - against You alleging 52 | that your contribution or any project it was submitted to constitutes or is responsible for direct or contributory 53 | patent infringement, then any patent licenses granted to that entity under this agreement shall terminate as of the 54 | date such litigation is filed. 55 | 56 | #### 3. Source of Contribution. 57 | Your contribution is either your original creation, based upon previous work that, to the best of your knowledge, is 58 | covered under an appropriate open source license and you have the right under that license to submit that work with 59 | modifications, whether created in whole or in part by you, or you have clearly identified the source of the contribution 60 | and any license or other restriction (like related patents, trademarks, and license agreements) of which you are 61 | personally aware. 62 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | | Q | A 2 | | ---------------- | ----- 3 | | Bug report? | yes/no 4 | | Feature request? | yes/no 5 | | BC Break report? | yes/no 6 | | RFC? | yes/no 7 | 8 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | Copyright (C) 2017 DACHCOM.DIGITAL 3 | 4 | This software is available under the GNU General Public License version 3 (GPLv3). 5 | 6 | ### GNU General Public License version 3 (GPLv3) 7 | If you decide to choose the GPLv3 license, you must comply with the following terms: 8 | 9 | This program is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program. If not, see . 21 | 22 | [GNU General Public License](lhttps://www.gnu.org/licenses/gpl-3.0.en.html) -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | | Q | A 2 | | ------------- | --- 3 | | Bug fix? | yes/no 4 | | New feature? | yes/no 5 | | BC breaks? | no 6 | | Deprecations? | yes/no 7 | | Fixed tickets | #... 8 | 9 | 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pimcore Lucene Search 2 | ![lucenesearch crawler](https://cloud.githubusercontent.com/assets/700119/25579028/7da66f40-2e74-11e7-8da5-988d61feb2e2.jpg) 3 | 4 | ## Note 5 | The Pimcore Lucene Search Bundle will be marked as abandoned as soon the [Dynamic Search Bundle](https://github.com/dachcom-digital/pimcore-dynamic-search) reached a stable state. 6 | After that, bugfixing will be supported in some cases. However, PRs are always welcome. 7 | 8 | ### Requirements 9 | - Pimcore >= 5.8 10 | - Pimcore >= 6.0 11 | 12 | #### Pimcore 4 13 | Get the Pimcore4 Version [here](https://github.com/dachcom-digital/pimcore-lucene-search/tree/pimcore4). 14 | 15 | ### Installation 16 | 1. Add code below to your `composer.json` 17 | 2. Activate & install it through the ExtensionManager 18 | 19 | ```json 20 | "require" : { 21 | "dachcom-digital/lucene-search" : "~2.3.0" 22 | } 23 | ``` 24 | 25 | ### Configuration 26 | To enable LuceneSearch, add those lines to your `AppBundle/Resources/config/pimcore/config.yml`: 27 | 28 | ```yaml 29 | lucene_search: 30 | enabled: true 31 | ``` 32 | 33 | A complete setup could look like this: 34 | 35 | ```yaml 36 | lucene_search: 37 | enabled: true 38 | fuzzy_search_results: false 39 | search_suggestion: true 40 | seeds: 41 | - 'http://your-domain.dev' 42 | filter: 43 | valid_links: 44 | - '@^http://your-domain.dev.*@i' 45 | view: 46 | max_per_page: 10 47 | crawler: 48 | content_max_size: 4 49 | content_start_indicator: '' 50 | content_end_indicator: '' 51 | ``` 52 | 53 | You need to add the config parameter to your config.yml to override the default values. 54 | Execute this command to get some information about all the config elements of LuceneSearch: 55 | 56 | ```bash 57 | # configuration about all config parameters 58 | $ bin/console config:dump-reference LuceneSearchBundle 59 | 60 | # configuration info about the "fuzzy_search_results" parameter 61 | $ bin/console config:dump-reference LuceneSearchBundle fuzzy_search_results 62 | ``` 63 | 64 | We also added a [detailed documentation](docs/00_Configuration_Values.md) about all possible config values. 65 | 66 | ### Features 67 | * Maintenance driven indexing 68 | * Auto Complete 69 | * Restricted Documents & Usergroups ([member](https://github.com/dachcom-digital/pimcore-members) plugin recommended but not required) 70 | 71 | ### Usage 72 | 73 | **Default** 74 | The crawler Engine will start automatically every night by default. Please check that the pimcore default maintenance script is properly installed. 75 | 76 | **Command Line Command** 77 | If you want to start the crawler manually, use this command: 78 | 79 | ``` 80 | $ php bin/console lucenesearch:crawl -f -v 81 | ``` 82 | 83 | | command | short command | type | description | 84 | |:---|:---|:---|:---| 85 | | ```force``` | `-f` | force crawler start | sometimes the crawler stuck because of a critical error mostly triggered because of wrong configuration. use this command to force a restart | 86 | | ```verbose``` | `-v` | show some logs | good for debugging. you'll get some additional information about filtered and forbidden links while crawling. | 87 | 88 | ## Logs 89 | You'll find some logs from the last crawl in your backend (at the bottom on the LuceneSearch settings page). Of course you'll also find some logs in your `var/logs` folder. 90 | **Note:** please enable the debug mode in pimcore settings to get all types of logs. 91 | 92 | ## Further Information 93 | 94 | - [Categories](docs/20_Categories.md): Learn more about category based crawling / searching. 95 | - [Custom Header](docs/29_Custom_Request_Header.md): Learn how to add custom headers to the crawler request (like a auth token). 96 | - [Restrictions](docs/30_Restrictions.md): Learn more about restricted crawling / indexing. 97 | - [Custom Meta Content](docs/40_Meta.md): Learn more about crawling / searching custom meta. 98 | - [Crawler Events](docs/50_Crawler_Events.md): Hook into crawler process to add custom fields to index. 99 | - [Lucene Document Modification](docs/60_Document_Modification.md): Remove or change availability of lucene documents within a pimcore update/deletion event. 100 | - [Frontend Implementation](docs/90_Frontend_Implementation.md): Get a step by step walkthrough to implement lucene search into your website. 101 | 102 | ## Copyright and license 103 | Copyright: [DACHCOM.DIGITAL](http://dachcom-digital.ch) 104 | For licensing details please visit [LICENSE.md](LICENSE.md) 105 | 106 | ## Upgrade Info 107 | Before updating, please [check our upgrade notes!](UPGRADE.md) -------------------------------------------------------------------------------- /UPGRADE.md: -------------------------------------------------------------------------------- 1 | # Upgrade Notes 2 | 3 | #### Update from Version 2.1.x to Version 2.2.0 4 | - Pimcore 6 Compatibility 5 | - Fix encoding in lucene url field 6 | 7 | #### Update from Version 2.1.1 to Version 2.1.2 8 | - Availability Flag implemented. 9 | - DocumentModifier implemented. See [Docs](https://github.com/dachcom-digital/pimcore-lucene-search/blob/master/docs/60_Index_Manipulation.md). 10 | - Various Clean-Ups and try/catch improvements. 11 | - Do not index pages with other status codes than 200. 12 | - [Milestone](https://github.com/dachcom-digital/pimcore-lucene-search/milestone/7?closed=1) 13 | 14 | #### Update from Version 2.1.0 to Version 2.1.1 15 | - Implemented [PackageVersionTrait](https://github.com/pimcore/pimcore/blob/master/lib/Extension/Bundle/Traits/PackageVersionTrait.php). 16 | - [Milestone](https://github.com/dachcom-digital/pimcore-lucene-search/milestone/5?closed=1) 17 | 18 | #### Update from Version 2.0.x to Version 2.1.0 19 | - **[REMOVED FEATURE]**: The SiteMap Feature has been removed. Please remove the `lucene_search.sitemap.render` config element **before** updating! 20 | - **[CRITICAL BUGFIX]**: There was a wrong path assignment for the tmp persistence manager. Please delete the `/var/tmp/ls-crawler-tmp` folder immediately. 21 | 22 | #### Update from Version 2.0.x to Version 2.0.2 23 | - **[NEW FEATURE]**: [Query/Hash Url Filter](docs/00_Configuration_Values.md) implemented. 24 | 25 | #### Update from Version 1.x to Version 2.0.0 26 | TBD -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dachcom-digital/lucene-search", 3 | "type": "pimcore-bundle", 4 | "license": "GPL-3.0+", 5 | "description": "Pimcore 5.x Website Indexer (powered by Zend Search Lucene)", 6 | "keywords": ["pimcore", "search", "lucene"], 7 | "homepage": "https://github.com/dachcom-digital/pimcore-lucene-search", 8 | "authors": [ 9 | { 10 | "name": "DACHCOM.DIGITAL Stefan Hagspiel", 11 | "email": "shagspiel@dachcom.ch", 12 | "homepage": "http://www.dachcom.com/", 13 | "role": "Developer" 14 | } 15 | ], 16 | "autoload": { 17 | "psr-4": { 18 | "LuceneSearchBundle\\": "src/LuceneSearchBundle" 19 | } 20 | }, 21 | 22 | "extra": { 23 | "pimcore": { 24 | "bundles": [ 25 | "LuceneSearchBundle\\LuceneSearchBundle" 26 | ] 27 | } 28 | }, 29 | "require": { 30 | "pimcore/pimcore": "^5.8.0 | ^6.0.0", 31 | "vdb/php-spider": "^0.3 | ^0.4", 32 | "zf1/zend-search-lucene": "~1.12" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /docs/00_Configuration_Values.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | Here you'll find all the configuration possibilities, default values and also some description. 4 | 5 | | Name | Type | Default | Description | 6 | |------------------------------------------|------|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 7 | | enabled | bool | false | Enable and configure the search frontend if you want to include a full text search on your website. | 8 | | fuzzy_search_results | bool | false | Fuzzy search results: When enabled, a fuzzy search is performed. The search will automatically include related terms. | 9 | | search_suggestion | bool | true | Search suggestions: When enabled, a fuzzy search for similar search terms is performed. If no results could be found for the search term entered by the user, similar search terms are presented as suggestions. | 10 | | own_host_only | bool | true | Own Host only: Limit search (and crawling) results to results from the current seed (sub-)domain only. | 11 | | allow_subdomains | bool | false | Allow Subdomains: Limit search (and crawling) results to allow / disallow subomdains of current seed. | 12 | | seeds | array | [] | Start-Urls (Seeds): Specify start URLs for the crawler. Please enter with protocol! e.g. http://www.pimcore.org and enter a starting URL on your main domain first and any other domains next. | 13 | | categories | service | ~ | If search results should be displayed by categories, please enter all valid categories here. The crawler sorts a page into a category if it contains a html meta tag with the name "`lucene-search:categories`". | 14 | | filter:allow_query_in_url | bool | false | When true, LuceneSearch will crawl urls with query fragments. | 15 | | filter:allow_hash_in_url | bool | false | When true, LuceneSearch will crawl urls with hash fragments. | 16 | | filter:valid_links | array | [] | Regex for valid Uris: Specify PREG regexes with start and end delimiter to define allowed links. e.g. `@^http://www\.pimcore\.org*@i` | 17 | | filter:user_invalid_links | array | [] | Regex for forbidden Uris: Specify PREG regexes for links which should be ignored by the crawler. The crawler does not even follow these links e.g. `@^http://www\.pimcore\.org\/community*@i` | 18 | | allowed_mime_types | array | ['text/html', 'application/pdf'] | Supported: `text/html`, `application/pdf` | 19 | | allowed_schemes | array | ['http'] | Define which url Schemes are allowed. (eg. http and/or https). Default is http. | 20 | | crawler:max_link_depth | int | 15 | To avoid loops produced by relative links on a website, a maximum link depth needs to be set. Please choose the value suited to the website to crawl. | 21 | | crawler:max_download_limit | int | 0 | Maximum links to crawl: Constrain crawler to a specific limit of crawled links. Defaults is 0 which means no limit. | 22 | | crawler:content_max_size | int | 0 | Maximum content size (in MB): crawler ignores resources if its size exceeds limit (mostly useful for asset indexing). Defaults is 0 which means no limit. | 23 | | crawler:content_start_indicator | string | ~ | You can limit the page content relevant for searching by surrounding it with certain html comments. The crawler will still parse the entire page to find links, but only the specified area within your html comments is used when searching for a term. String specifying content start for search. | 24 | | crawler:content_end_indicator | string | ~ | String specifying content end for search. | 25 | | crawler:content_exclude_start_indicator: | string | ~ | String specifying exclude content start for search. | 26 | | crawler:content_exclude_end_indicator | string | ~ | String specifying exclude content end for search. | 27 | | locale:ignore_language | bool | false | Receive search results from all languages, set to false to limit search results to the current language only. The current language is retrieved from the registry, the language of any page in the search result index is extracted by the crawler (Content-Language Http header, html tag lang attribute or html meta tag content-language) | 28 | | locale:ignore_country | bool | true | Receive search results from all countries, set to false to limit search results to country only. The current country is retrieved from the search result index. it is extracted by the crawler (html meta tag country) | 29 | | restriction:enabled | bool | false | Document Restriction: Ignore Document restrictions. Set to true if you're using the [Pimcore/MembersBundle](https://github.com/dachcom-digital/pimcore-members) | 30 | | boost:documents | int | 1 | Document Boost Factor | 31 | | boost:assets | int | 1 | Asset Boost Factor | 32 | | view:max_per_page | int | 10 | Max Results per Page | 33 | | view:max_suggestions | int | 10 | Max Suggestions | -------------------------------------------------------------------------------- /docs/20_Categories.md: -------------------------------------------------------------------------------- 1 | # Categories 2 | It's possible to activate a category based indexing / searching. 3 | 4 | ### Configuration 5 | 6 | ```yaml 7 | lucene_search: 8 | enabled: true 9 | categories: AppBundle\LuceneSearch\Services\Categories 10 | ``` 11 | 12 | You need a custom service for that which implements the `LuceneSearchBundle\Configuration\Categories\CategoriesInterface` interface. 13 | So you're class may looks like this: 14 | 15 | ```php 16 | 1, 'label' => 'Category 1'], 32 | [ 'id' => 2, 'label' => 'Category 2'], 33 | ]; 34 | } 35 | 36 | } 37 | ``` 38 | 39 | To inform the lucene search crawler about those categories we need to add another meta element. 40 | As you can see it's also possible to add multiple categories per document. 41 | 42 | ```html 43 | {% if lucene_search_crawler_active() %} 44 | 45 | {% endif %} 46 | ``` 47 | 48 | Congratulations, you're done. From now on the categories get stored into the lucene index. 49 | 50 | ### Twig Extension 51 | If you need the categories in your template, you could use the following snipped: 52 | 53 | ```html 54 | {% for category in lucene_search_get_categories() %} 55 | Id: {{ category.id }}, Label: {{ category.label}} 56 | {% endfor %} 57 | ``` 58 | 59 | ### Templating 60 | If you want to know how to implement the categories in frontend, checkout our [frontend implementation advice](90_Frontend_Implementation.md). 61 | -------------------------------------------------------------------------------- /docs/29_Custom_Request_Header.md: -------------------------------------------------------------------------------- 1 | # Custom Request Header 2 | 3 | Add some header information to the crawler request. 4 | 5 | > The [Members](https://github.com/dachcom-digital/pimcore-members) Bundle adds a auth header element by default. 6 | 7 | ## Event 8 | 9 | | Name | Class | Setter | 10 | |---------------------|-------------|-------------------------------| 11 | | `lucene_search.task.crawler.request_header` | Event\CrawlerRequestHeaderEvent | addHeader | 12 | 13 | ## Example: Auth 14 | 15 | ```yaml 16 | parameters: 17 | lucene_search_user_name: 'Crawler' 18 | lucene_search_password: 'crawler@universe.org' 19 | 20 | 21 | AppBundle\EventListener\CrawlerHeader: 22 | arguments: 23 | $userName: '%lucene_search_user_name%' 24 | $password: '%lucene_search_password%' 25 | tags: 26 | - { name: kernel.event_subscriber } 27 | ``` 28 | 29 | ```php 30 | 'addHeaderToLuceneCrawler' 44 | ]; 45 | } 46 | 47 | public function addHeaderToLuceneCrawler(CrawlerRequestHeaderEvent $event) 48 | { 49 | //example 1: token auth. 50 | $event->addHeader([ 51 | 'name' => 'x-auth-token', 52 | 'value' => 'your-special-token', 53 | 'identifier' => 'lucene-search-token-auth' 54 | ]); 55 | 56 | //example 2: basic auth. 57 | $event->addHeader([ 58 | 'name' => 'Authorization', 59 | 'value' => 'Basic ' . base64_encode('USERNAME:PASSWORD'), 60 | 'identifier' => 'lucene-search-basic-auth' 61 | ]); 62 | } 63 | } 64 | ``` -------------------------------------------------------------------------------- /docs/30_Restrictions.md: -------------------------------------------------------------------------------- 1 | # Restrictions 2 | If you want a seamless integration of protected document crawling, install our [Members](https://github.com/dachcom-digital/pimcore-members) bundle. 3 | 4 | 5 | ### Documents 6 | Each document needs a meta tag in the head section. the crawler extract and stores the usergroup id(s) from that meta property.. 7 | 8 | > If you're using the Members Bundle this meta property gets assigned automatically. 9 | 10 | **Meta Property Example** 11 | ```html 12 | 13 | ``` 14 | 15 | If the document is restricted to a specific user group, the meta `content` contains its id. Otherwise, the meta property needs to be filled with a `default` value. 16 | 17 | ### Assets 18 | Since Assets does not have a html view, you need to catch an event (`lucene_search.task.parser.asset_restriction`). 19 | > If you're using the Members Bundle this event is already implemented. 20 | 21 | ## Asset Language restriction 22 | Because Assets does not have any language hierarchy, you need to add a property called `assigned_language`. This Property will be installed during the install process of LuceneSearch. 23 | If you add some additional language afterwards, you need to add this language to the property. if you do not set any information at all, the asset will be found in any language context. 24 | 25 | ## Asset Country restriction 26 | Because Assets does not have any country hierarchy, you need to add a property called `assigned_country`. This Property will be installed during the install process of LuceneSearch. 27 | If you add some additional countries afterwards, you need to add this country to the property. if you do not set any information at all, the asset will be found in any country context. 28 | 29 | ## Events 30 | 31 | | Name | Class | Setter | 32 | |---------------------|-------------|-------------------------------| 33 | | `lucene_search.task.parser.asset_restriction` | Event\AssetResourceRestrictionEvent | setRestrictions, setAsset | 34 | | `lucene_search.frontend.restriction_context` | Event\RestrictionContextEvent | setAllowedRestrictionGroups | -------------------------------------------------------------------------------- /docs/40_Meta.md: -------------------------------------------------------------------------------- 1 | # Custom Meta Content 2 | 3 | In some cases you need to add some content or keywords to improve the search accuracy. 4 | But it's not meant for the public crawlers like Google. LuceneSearch is using a custom meta property called `lucene-search:meta`. 5 | This Element should be visible while crawling only. 6 | 7 | **Example:** 8 | 9 | ```html 10 | {% if lucene_search_crawler_active() %} 11 | 12 | {% endif %} 13 | ``` 14 | 15 | ## Custom Meta in Documents 16 | It's also possible to add the custom meta property in backend. 17 | 18 | Open *Document* => *Settings* go to *Meta Data* and add a new field: 19 | 20 | ```html 21 | 22 | ``` 23 | 24 | > **Note:** Currently it's not possible to hide this meta tag if you're adding it via backend since pimcore provides no way to add/remove/modify those elements programmatically. 25 | 26 | ## Custom Meta in Objects 27 | Because Object may have some front-end capability (a news detail page for example), you have to integrate the custom meta field by yourself (see example above). 28 | 29 | ## Custom Meta in Assets 30 | TBD -------------------------------------------------------------------------------- /docs/50_Crawler_Events.md: -------------------------------------------------------------------------------- 1 | # Crawler Events 2 | 3 | Hook into crawler process to add custom fields to current lucene document. 4 | 5 | ## HtmlParserEvent params 6 | 7 | ### Document Id 8 | The crawler will always add the ID of the current indexed pimcore document to the params array. 9 | You can access it using `$params['document_id']`. 10 | 11 | > **Note!** The document id is not available in the lucene index unless you're adding it via the parser event (see example below) 12 | 13 | ### Object Id 14 | The crawler will check for the presence of a meta tag called `lucene-search:objectId`. 15 | If the meta tag is present, the objectId will be passed to the event inside the params array. 16 | You can access it using `$params['object_id']`. 17 | 18 | Since it is not possible to automatically detect the current object id, you need to add it by yourself. 19 | This is an example how you could implement the *lucene-search:objectId* meta tag: 20 | 21 | ```html 22 | {% if lucene_search_crawler_active() %} 23 | {% do pimcore_head_meta().appendName('lucene-search:objectId', product.id) %} 24 | {% endif %} 25 | ``` 26 | 27 | ## Configuration 28 | 29 | ```yaml 30 | AppBundle\EventListener\LuceneSearchParserListener: 31 | autowire: true 32 | tags: 33 | - { name: kernel.event_subscriber } 34 | ``` 35 | 36 | ## Services 37 | 38 | ```php 39 | 'parseHtml', 55 | LuceneSearchEvents::LUCENE_SEARCH_PARSER_PDF_DOCUMENT => 'parsePdf', 56 | ]; 57 | } 58 | 59 | public function parseHtml(HtmlParserEvent $event) 60 | { 61 | $luceneDoc = $event->getDocument(); 62 | $html = $event->getHtml(); 63 | $params = $event->getParams(); 64 | 65 | if (!empty($params['document_id'])) { 66 | $document = \Pimcore\Model\Document::getById($params['document_id']); 67 | $documentIdField = \Zend_Search_Lucene_Field::keyword('documentId', $document->getId()); 68 | $luceneDoc->addField($documentIdField); 69 | } 70 | 71 | if (!empty($params['object_id'])) { 72 | $object = DataObject::getById($params['object_id']); 73 | $objectIdField = \Zend_Search_Lucene_Field::keyword('objectId', $object->getId()); 74 | $luceneDoc->addField($objectIdField); 75 | } 76 | 77 | // additional fields 78 | $field = \Zend_Search_Lucene_Field::text('myCustomField', 'Custom field content', $params['encoding']); 79 | $field->boost = 5; 80 | 81 | $luceneDoc->addField($field); 82 | 83 | $event->setDocument($luceneDoc); 84 | } 85 | 86 | public function parsePdf(PdfParserEvent $event) 87 | { 88 | $luceneDoc = $event->getDocument(); 89 | $content = $event->getContent(); 90 | $assetMetaData = $event->getAssetMetaData(); 91 | $params = $event->getParams(); 92 | 93 | $field = \Zend_Search_Lucene_Field::text('myCustomField', 'Custom field content', $params['encoding']); 94 | $luceneDoc->addField($field); 95 | 96 | $event->setDocument($luceneDoc); 97 | } 98 | } 99 | ``` 100 | -------------------------------------------------------------------------------- /docs/60_Document_Modification.md: -------------------------------------------------------------------------------- 1 | # Lucene Document Modification 2 | 3 | It's possible to modify an indexed document. 4 | 5 | Use the `DocumentModifier` class to: 6 | 7 | - mark Lucene-Document as available 8 | - mark Lucene-Document as unavailable 9 | - mark Lucene-Document as deleted (remove from index until next crawl) 10 | 11 | **Note:** The availability check works within the maintenance cycle so there is a dispatch delay up to 5 minutes depending on your maintenance cron settings! 12 | 13 | ## Warning! 14 | There are some limitations while changing lucene documents. 15 | If we change the availability of documents, we can't just update an existing document 16 | since Zend Lucene does not allow us to modify exiting documents. Instead we need to add them as new documents. 17 | Read more about it [here](https://framework.zend.com/manual/1.12/en/zend.search.lucene.index-creation.html#zend.search.lucene.index-creation.document-updating). 18 | 19 | ### Boost 20 | Because of complex lucene indexing strategies, it's not possible to re-gather the boost factor of documents **and** fields. 21 | So you need to hook into the `lucene_search.modifier.document` event and add those boost values again (see example event below). 22 | 23 | ### UnStored Fields 24 | Currently it's not possible to re-add fields with type `\Zend_Search_Lucene_Field::unStored` since they are not available in the query document! 25 | If you're changing the availability of documents with `Unstored` fields, they're gone after updating! 26 | Read more about field types [here](https://framework.zend.com/manual/1.10/en/zend.search.lucene.overview.html#zend.search.lucene.index-creation.understanding-field-types). 27 | 28 | Solution: Hook into the `lucene_search.modifier.document` event and add them again (see example event below). 29 | 30 | ## Implementation 31 | 32 | ```yaml 33 | AppBundle\EventListener\IndexManipulator: 34 | autowire: true 35 | tags: 36 | - { name: kernel.event_subscriber } 37 | ``` 38 | 39 | ```php 40 | documentModifier = $documentModifier; 59 | } 60 | 61 | public static function getSubscribedEvents() 62 | { 63 | return [ 64 | DocumentEvents::PRE_UPDATE => 'onPreUpdate', 65 | DocumentEvents::PRE_DELETE => 'onPreDelete', 66 | LuceneSearchEvents::LUCENE_SEARCH_DOCUMENT_MODIFICATION => 'onModification', 67 | ]; 68 | } 69 | 70 | public function onPreUpdate(DocumentEvent $event) 71 | { 72 | $document = $event->getDocument(); 73 | 74 | try { 75 | // get current document from db (without changed values) 76 | $storedDocument = Document::getById($document->getId(), true); 77 | } catch (\Exception $e) { 78 | $storedDocument = null; 79 | } 80 | 81 | // check if untouched db entity has same status. if so = save resources and skip updating. 82 | if ($storedDocument instanceof Document) { 83 | if ($storedDocument->getPublished() === $document->getPublished()) { 84 | return; 85 | } 86 | } 87 | 88 | if ($document->isPublished() === true) { 89 | $marker = DocumentModifier::MARK_AVAILABLE; 90 | } else { 91 | $marker = DocumentModifier::MARK_UNAVAILABLE; 92 | } 93 | 94 | // way 1: use a custom lucene query (slower but could be a complex query) 95 | // yourCustomMetaIdentifier: you need to add custom Keyword via the lucene_search.task.parser.html_parser event 96 | $term = new \Zend_Search_Lucene_Index_Term($document->getProperty('yourCustomMetaIdentifierProperty'), 'yourIdentifier'); 97 | $query = new \Zend_Search_Lucene_Search_Query_Term($term); 98 | $this->documentModifier->markDocumentsViaQuery($query, $marker); 99 | 100 | // way 2: use simple term index (faster but only one term possible) 101 | // yourCustomMetaIdentifier: you need to add custom Keyword via the lucene_search.task.parser.html_parser event 102 | $term = new \Zend_Search_Lucene_Index_Term($document->getProperty('yourCustomMetaIdentifierProperty'), 'yourIdentifier'); 103 | $this->documentModifier->markDocumentsViaTerm($term, $marker); 104 | 105 | } 106 | 107 | public function onPreDelete(DocumentEvent $event) 108 | { 109 | $document = $event->getDocument(); 110 | 111 | // yourCustomMetaIdentifier: you need to add custom Keyword via the lucene_search.task.parser.html_parser event 112 | $term = new \Zend_Search_Lucene_Index_Term($document->getProperty('yourCustomMetaIdentifierProperty'), 'yourIdentifier'); 113 | $this->documentModifier->markDocumentsViaTerm($term, DocumentModifier::MARK_DELETED); 114 | } 115 | 116 | /** 117 | * You only need this method if you want to re-add boost values or unstored fields. 118 | * 119 | * @param DocumentModificationEvent $event 120 | */ 121 | public function onModification(DocumentModificationEvent $event) 122 | { 123 | $document = $event->getDocument(); 124 | 125 | $someConditionsAreTrue = false; 126 | 127 | // use this event to re-add boost values 128 | if ($someConditionsAreTrue === true) { 129 | $document->boost = 999; 130 | $event->setDocument($document); 131 | } 132 | } 133 | } 134 | ``` -------------------------------------------------------------------------------- /docs/90_Frontend_Implementation.md: -------------------------------------------------------------------------------- 1 | # Lucene Search FrontEnd 2 | This guide will help you to implement a search page into your website in seconds. 3 | 4 | ### Optional: Create a Layout/Controller 5 | > Note: This is only required if you're starting a project from scratch. 6 | 7 | - Create a layout in `app\Resources\views\layout.html.twig` 8 | - Add some markup to your layout: 9 | 10 | ```twig 11 | 12 | 13 | 14 | {# your head data #} 15 | 16 | 17 |
18 | {% block content %} 19 | {# your main content data #} 20 | {% endblock %} 21 |
22 | 23 | 24 | ``` 25 | - Create a controller, name it `DefaultController` 26 | - Create a method, name it `searchAction(Request $request);` 27 | 28 | ### Setup Search Page 29 | - Create a document, call it "search" 30 | - Optional: In document settings, set controller to `DefaulController` and Method to `searchAction`. 31 | - Create a view template (eg. `app\Resource\views\Default\search.html.twig`) 32 | - Add some twig markup to the view: 33 | 34 | ```twig 35 | {% extends 'layout.html.twig' %} 36 | 37 | {# note: the "content" block must be available in your master layout, see optional config above. #} 38 | {% block content %} 39 | {{ render(controller('LuceneSearchBundle:List:getResult')) }} 40 | {% endblock %} 41 | ``` 42 | 43 | This will load the result template from `@LuceneSearch/Resources/views/List/result.html.twig`. 44 | If you want to use your own custom templates to display the search results, place them inside 45 | `app/Resources/LuceneSearchBundle/views/List/*.html.twig` (see symfony [documentation](https://symfony.com/doc/current/templating/overriding.html) for further details). 46 | 47 | ### Ajax AutoComplete 48 | Use this snippet to allow ajax driven auto-complete search. you may want to use this [plugin](https://github.com/devbridge/jQuery-Autocomplete) to do the job. 49 | 50 | 1. Add some JS files (in your layout for example): 51 | 52 | ```html 53 | 54 | 55 | ``` 56 | 57 | 2. Add auto-complete to your project: 58 | 59 | ```javascript 60 | $(function() { 61 | 62 | var $el = $('input.search-field'), 63 | language = $el.data('language'), //optional 64 | country = $el.data('country'), 65 | $categoryEl = $el.closest('form').find('select.categories'), 66 | categories = []; //optional 67 | 68 | $el.autocomplete({ 69 | minChars: 3, 70 | triggerSelectOnValidInput: false, 71 | lookup: function(term, done) { 72 | //update on every lookup because user may have changed the dropdown selection. 73 | categories = $categoryEl.val(); //optional 74 | $.getJSON( 75 | '/lucence-search/auto-complete', 76 | { 77 | q: term, 78 | language : language, 79 | country: country, 80 | categories: categories 81 | }, 82 | function(data) { 83 | var result = { suggestions : [] }; 84 | if(data.length > 0) { 85 | $.each(data, function(index, suggestion) { 86 | result.suggestions.push( {value : suggestion }); 87 | }); 88 | } 89 | done(result); 90 | }); 91 | }, 92 | onSelect: function(result) { 93 | $el.val(result.value); 94 | $el.parents('form').submit(); 95 | } 96 | }); 97 | }); 98 | ``` 99 | 100 | 3. Place this html snippet on top of your layout for example: 101 | 102 | ```html 103 | 127 | ``` 128 | 129 | 4. Done. Now try to search something without hitting return. 130 | 131 | > Don't forget to start your crawler before testing the auto-completer. 132 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Command/CrawlCommand.php: -------------------------------------------------------------------------------- 1 | taskManager = $taskManager; 28 | } 29 | 30 | /** 31 | * {@inheritdoc} 32 | */ 33 | protected function configure() 34 | { 35 | $this 36 | ->setName('lucenesearch:crawl') 37 | ->setDescription('LuceneSearch Website Crawler') 38 | ->addOption('force', 'f', 39 | InputOption::VALUE_NONE, 40 | 'Force Crawl Start'); 41 | } 42 | 43 | /** 44 | * @param InputInterface $input 45 | * @param OutputInterface $output 46 | * 47 | * @return void 48 | */ 49 | protected function execute(InputInterface $input, OutputInterface $output) 50 | { 51 | $consoleLogger = new ConsoleLogger(); 52 | $consoleLogger->setConsoleOutput($output); 53 | $this->taskManager->setLogger($consoleLogger); 54 | 55 | try { 56 | $this->taskManager->processTaskChain(['force' => $input->getOption('force')]); 57 | } catch (\Exception $e) { 58 | $output->writeln(sprintf('LuceneSearch: Error while crawling: %s.', $e->getMessage())); 59 | } 60 | 61 | $output->writeln('LuceneSearch: Finished crawl.'); 62 | 63 | } 64 | 65 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Command/DocumentModifierCommand.php: -------------------------------------------------------------------------------- 1 | queuedDocumentModifier = $queuedDocumentModifier; 37 | } 38 | 39 | /** 40 | * {@inheritdoc} 41 | */ 42 | protected function configure() 43 | { 44 | $this 45 | ->setHidden(true) 46 | ->setName('lucenesearch:modifier:resolve') 47 | ->setDescription('For internal use only'); 48 | } 49 | 50 | /** 51 | * @param InputInterface $input 52 | * @param OutputInterface $output 53 | * 54 | * @return int|null|void 55 | */ 56 | protected function execute(InputInterface $input, OutputInterface $output) 57 | { 58 | $this->queuedDocumentModifier->resolveQueue(); 59 | } 60 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Configuration/Categories/CategoriesInterface.php: -------------------------------------------------------------------------------- 1 | false, 13 | 'forceStop' => false, 14 | 'running' => false, 15 | 'started' => null, 16 | 'finished' => null 17 | ]; 18 | 19 | const SYSTEM_CONFIG_DIR_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle'; 20 | 21 | const SYSTEM_CONFIG_FILE_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/config.yml'; 22 | 23 | const STATE_FILE_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/state.cnf'; 24 | 25 | const CRAWLER_LOG_FILE_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/crawler.log'; 26 | 27 | const CRAWLER_PROCESS_FILE_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/processing.tmp'; 28 | 29 | const CRAWLER_URI_FILTER_FILE_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/uri-filter.tmp'; 30 | 31 | const CRAWLER_PERSISTENCE_STORE_DIR_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/persistence-store'; 32 | 33 | const CRAWLER_TMP_ASSET_DIR_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/tmp-assets'; 34 | 35 | const INDEX_DIR_PATH = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/index'; 36 | 37 | const INDEX_DIR_PATH_GENESIS = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/index/genesis'; 38 | 39 | const INDEX_DIR_PATH_STABLE = PIMCORE_PRIVATE_VAR . '/bundles/LuceneSearchBundle/index/stable'; 40 | 41 | /** 42 | * @var PimcoreBundleManager 43 | */ 44 | protected $bundleManager; 45 | 46 | /** 47 | * @var Filesystem 48 | */ 49 | private $fileSystem; 50 | 51 | /** 52 | * @var array 53 | */ 54 | private $config; 55 | 56 | /** 57 | * @var array 58 | */ 59 | private $systemConfig; 60 | 61 | /** 62 | * @var CategoriesInterface 63 | */ 64 | private $categoryService; 65 | 66 | /** 67 | * Configuration constructor. 68 | * 69 | * @param PimcoreBundleManager $bundleManager 70 | */ 71 | public function __construct(PimcoreBundleManager $bundleManager) 72 | { 73 | $this->bundleManager = $bundleManager; 74 | $this->fileSystem = new FileSystem(); 75 | } 76 | 77 | /** 78 | * @param array $config 79 | */ 80 | public function setConfig($config = []) 81 | { 82 | $this->config = $config; 83 | } 84 | 85 | /** 86 | * @param $slot 87 | * 88 | * @return mixed 89 | */ 90 | public function getConfig($slot) 91 | { 92 | return $this->config[$slot]; 93 | } 94 | 95 | /** 96 | * @param array $config 97 | */ 98 | public function setSystemConfig($config = []) 99 | { 100 | $this->systemConfig = $config; 101 | } 102 | 103 | /** 104 | * @param null $slot 105 | * 106 | * @return mixed 107 | */ 108 | public function getSystemConfig($slot = null) 109 | { 110 | return $this->systemConfig[$slot]; 111 | } 112 | 113 | /** 114 | * @param null $slot 115 | * 116 | * @return mixed 117 | */ 118 | public function getStateConfig($slot = null) 119 | { 120 | if (!$this->fileSystem->exists(Configuration::STATE_FILE_PATH)) { 121 | $content = serialize(Configuration::STATE_DEFAULT_VALUES); 122 | $this->fileSystem->appendToFile(Configuration::STATE_FILE_PATH, $content); 123 | } 124 | 125 | $data = file_get_contents(self::STATE_FILE_PATH); 126 | $arrayData = unserialize($data); 127 | 128 | return $slot == null ? $arrayData : $arrayData[$slot]; 129 | } 130 | 131 | /** 132 | * @param $slot 133 | * @param $value 134 | * 135 | * @throws \Exception 136 | */ 137 | public function setStateConfig($slot, $value) 138 | { 139 | $content = $this->getStateConfig(); 140 | 141 | if (!in_array($slot, array_keys($content))) { 142 | throw new \Exception('invalid state config slot "' . $slot . '"'); 143 | } 144 | 145 | $content[$slot] = $value; 146 | 147 | $this->fileSystem->dumpFile(self::STATE_FILE_PATH, serialize($content)); 148 | } 149 | 150 | /** 151 | * @param CategoriesInterface $categoryService 152 | */ 153 | public function setCategoryService(CategoriesInterface $categoryService) 154 | { 155 | $this->categoryService = $categoryService; 156 | } 157 | 158 | /** 159 | * @return array 160 | */ 161 | public function getCategories() 162 | { 163 | if (!$this->categoryService instanceof CategoriesInterface) { 164 | return []; 165 | } 166 | 167 | return $this->categoryService->getCategories(); 168 | } 169 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Controller/Admin/SettingsController.php: -------------------------------------------------------------------------------- 1 | json(['logData' => $data]); 21 | } 22 | 23 | public function getStateAction(Configuration $configManager, StateHandler $stateHandler) 24 | { 25 | $canStart = true; 26 | 27 | $currentState = $stateHandler->getCrawlerState(); 28 | 29 | $configComplete = $stateHandler->getConfigCompletionState() === 'complete'; 30 | 31 | if ($configComplete === false || 32 | $currentState === StateHandler::CRAWLER_STATE_ACTIVE || 33 | $stateHandler->isCrawlerInForceStart() === true 34 | ) { 35 | $canStart = false; 36 | } 37 | 38 | $canStop = true; 39 | 40 | if ($configComplete === false || 41 | $currentState !== StateHandler::CRAWLER_STATE_ACTIVE || 42 | $stateHandler->isCrawlerInForceStop() === true 43 | ) { 44 | $canStop = false; 45 | } 46 | 47 | return $this->json( 48 | [ 49 | 'state' => $stateHandler->getCrawlerStateDescription(), 50 | 'enabled' => $configManager->getConfig('enabled'), 51 | 'canStart' => $canStart, 52 | 'canStop' => $canStop 53 | ] 54 | ); 55 | } 56 | 57 | public function startCrawlerAction(StateHandler $stateHandler) 58 | { 59 | $stateHandler->forceCrawlerStartOnNextMaintenance(); 60 | 61 | return $this->json(['success' => true]); 62 | } 63 | 64 | public function stopCrawlerAction(StateHandler $stateHandler) 65 | { 66 | $stateHandler->stopCrawler(true); 67 | 68 | return $this->json(['success' => true]); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Controller/AutoCompleteController.php: -------------------------------------------------------------------------------- 1 | luceneHelper->wildcardFindTerms($this->query, $this->frontendIndex); 19 | 20 | // try to find fuzzy related terms if not wildcard terms has been found 21 | if (empty($terms)) { 22 | $terms = $this->luceneHelper->fuzzyFindTerms($this->query, $this->frontendIndex); 23 | } 24 | 25 | $suggestions = []; 26 | $counter = 1; 27 | 28 | foreach ($terms as $term) { 29 | $t = $term->text; 30 | 31 | //check if term can be found for current language 32 | $hits = null; 33 | 34 | $query = new \Zend_Search_Lucene_Search_Query_Boolean(); 35 | $userQuery = \Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8'); 36 | $query->addSubquery($userQuery, true); 37 | 38 | $this->addAdditionalSubQueries($query); 39 | 40 | $validHits = $this->getValidHits($this->frontendIndex->find($query)); 41 | 42 | if (count($validHits) > 0 and !in_array($t, $suggestions)) { 43 | $suggestions[] = $t; 44 | 45 | if ($counter >= $this->maxSuggestions) { 46 | break; 47 | } 48 | 49 | $counter++; 50 | } 51 | } 52 | 53 | $data = []; 54 | foreach ($suggestions as $suggestion) { 55 | $data[] = $suggestion; 56 | } 57 | 58 | return new JsonResponse($data); 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Controller/ListController.php: -------------------------------------------------------------------------------- 1 | highlighterHelper = $highlighterHelper; 20 | } 21 | 22 | /** 23 | * @return \Symfony\Component\HttpFoundation\Response 24 | */ 25 | public function getResultAction() 26 | { 27 | $requestQuery = $this->requestStack->getMasterRequest()->query; 28 | 29 | try { 30 | $query = new \Zend_Search_Lucene_Search_Query_Boolean(); 31 | 32 | $field = $requestQuery->get('field'); 33 | 34 | if (!empty($field)) { 35 | \Zend_Search_Lucene::setDefaultSearchField($field); 36 | } 37 | 38 | $searchResults = []; 39 | $validHits = []; 40 | 41 | if (!empty($this->query)) { 42 | //fuzzy search term if enabled 43 | if ($this->fuzzySearchResults) { 44 | $this->query = str_replace(' ', '~ ', $this->query); 45 | $this->query .= '~'; 46 | \Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3); 47 | } 48 | 49 | $userQuery = \Zend_Search_Lucene_Search_QueryParser::parse($this->query, 'utf-8'); 50 | $query->addSubquery($userQuery, true); 51 | 52 | $this->addAdditionalSubQueries($query); 53 | 54 | $validHits = $this->getValidHits($this->frontendIndex->find($query)); 55 | 56 | $start = $this->perPage * ($this->currentPage - 1); 57 | $end = $start + ($this->perPage - 1); 58 | 59 | if ($end > count($validHits) - 1) { 60 | $end = count($validHits) - 1; 61 | } 62 | 63 | for ($i = $start; $i <= $end; $i++) { 64 | 65 | /** @var \Zend_Search_Lucene_Search_QueryHit $hit */ 66 | $hit = $validHits[$i]; 67 | 68 | /** @var \Zend_Search_Lucene_Document $doc */ 69 | $doc = $hit->getDocument(); 70 | $availableFieldNames = $doc->getFieldNames(); 71 | 72 | $url = in_array('url', $availableFieldNames) ? $doc->getField('url')->value : null; 73 | $title = in_array('title', $availableFieldNames) ? $doc->getField('title')->value : null; 74 | $content = in_array('content', $availableFieldNames) ? $doc->getField('content')->value : null; 75 | 76 | $searchResult['boost'] = $doc->boost; 77 | $searchResult['title'] = $title; 78 | 79 | $searchResult['url'] = $url; 80 | $searchResult['summary'] = $this->highlighterHelper->getSummaryForUrl($content, $this->untouchedQuery); 81 | 82 | //H1, description and imageTags are not available in pdf files. 83 | if (in_array('h1', $availableFieldNames)) { 84 | $searchResult['h1'] = $doc->getField('h1')->value; 85 | } 86 | 87 | if (in_array('description', $availableFieldNames)) { 88 | $searchResult['description'] = $this->highlighterHelper->getSummaryForUrl($doc->getField('description')->value, 89 | $this->untouchedQuery); 90 | } 91 | 92 | if (in_array('imageTags', $availableFieldNames)) { 93 | $searchResult['imageTags'] = $doc->getField('imageTags')->value; 94 | } 95 | 96 | $searchResult['categories'] = []; 97 | 98 | if (in_array('categories', $availableFieldNames)) { 99 | $categories = $doc->getField('categories')->value; 100 | if (!empty($categories)) { 101 | $searchResult['categories'] = $this->mapCategories($categories); 102 | } 103 | } 104 | 105 | $searchResults[] = $searchResult; 106 | unset($searchResult); 107 | } 108 | } 109 | 110 | $suggestions = false; 111 | if ($this->searchSuggestion && count($searchResults) === 0) { 112 | $suggestions = $this->getFuzzySuggestions(); 113 | } 114 | 115 | $currentPageResultStart = $this->perPage * ($this->currentPage - 1); 116 | $currentPageResultEnd = $currentPageResultStart + $this->perPage; 117 | 118 | if ($currentPageResultEnd > count($validHits)) { 119 | $currentPageResultEnd = count($validHits); 120 | } 121 | 122 | $pages = 0; 123 | 124 | if (count($validHits) > 0) { 125 | $pages = ceil(count($validHits) / $this->perPage); 126 | } 127 | 128 | $viewParams = [ 129 | 'searchCurrentPage' => $this->currentPage, 130 | 'searchAllPages' => $pages, 131 | 'searchCategory' => $this->queryCategories, 132 | 'searchAvailableCategories' => $this->categories, 133 | 'searchSuggestions' => $suggestions, 134 | 'searchLanguage' => $this->searchLanguage, 135 | 'searchCountry' => $this->searchCountry, 136 | 'searchPerPage' => $this->perPage, 137 | 'searchTotalHits' => count($validHits), 138 | 'searchQuery' => $this->untouchedQuery, 139 | 'searchHasResults' => count($searchResults) > 0, 140 | 'searchResults' => $searchResults, 141 | 'searchCurrentPageResultStart' => $currentPageResultStart + 1, 142 | 'searchCurrentPageResultEnd' => $currentPageResultEnd 143 | ]; 144 | 145 | $viewName = 'result'; 146 | 147 | } catch (\Exception $e) { 148 | 149 | $viewParams = [ 150 | 'error' => true, 151 | 'errorMessage' => $e->getMessage() . ' (' . $e->getFile() . ' Line: ' . $e->getLine() . ')', 152 | ]; 153 | 154 | $viewName = 'error'; 155 | } 156 | 157 | return $this->renderTemplate('@LuceneSearch/List/' . $viewName . '.html.twig', $viewParams); 158 | } 159 | 160 | /** 161 | * look for similar search terms 162 | * 163 | * @return array 164 | */ 165 | protected function getFuzzySuggestions() 166 | { 167 | $suggestions = []; 168 | 169 | if (empty($this->untouchedQuery)) { 170 | return $suggestions; 171 | } 172 | 173 | $terms = $this->luceneHelper->fuzzyFindTerms($this->untouchedQuery, $this->frontendIndex, 3); 174 | 175 | // reduce fuzzy prefix to 0 and try again 176 | if (empty($terms) || count($terms) < 1) { 177 | $terms = $this->luceneHelper->fuzzyFindTerms($this->untouchedQuery, $this->frontendIndex, 0); 178 | } 179 | 180 | if (!is_array($terms)) { 181 | return $suggestions; 182 | } 183 | 184 | $counter = 0; 185 | 186 | foreach ($terms as $term) { 187 | 188 | $query = new \Zend_Search_Lucene_Search_Query_Boolean(); 189 | 190 | $termText = $term->text; 191 | 192 | try { 193 | $userQuery = \Zend_Search_Lucene_Search_QueryParser::parse($termText, 'utf-8'); 194 | } catch (\Zend_Search_Lucene_Exception $e) { 195 | continue; 196 | } 197 | 198 | $query->addSubquery($userQuery, true); 199 | 200 | $this->addAdditionalSubQueries($query); 201 | 202 | try { 203 | $validHits = $this->getValidHits($this->frontendIndex->find($query)); 204 | } catch (\Zend_Search_Lucene_Exception $e) { 205 | $validHits = []; 206 | } 207 | 208 | if (count($validHits) > 0 && !in_array($termText, $suggestions)) { 209 | $suggestions[] = $termText; 210 | 211 | if ($counter >= $this->maxSuggestions) { 212 | break; 213 | } 214 | 215 | $counter++; 216 | } 217 | } 218 | 219 | return $suggestions; 220 | } 221 | 222 | /** 223 | * @param string $documentCategories 224 | * 225 | * @return array 226 | */ 227 | protected function mapCategories($documentCategories = '') 228 | { 229 | $categoryStore = []; 230 | $validCategories = $this->configuration->getCategories(); 231 | 232 | if (empty($validCategories)) { 233 | return $categoryStore; 234 | } 235 | 236 | $categories = explode(',', $documentCategories); 237 | 238 | foreach ($categories as $categoryId) { 239 | $key = array_search($categoryId, array_column($validCategories, 'id')); 240 | if ($key !== false) { 241 | $categoryStore[] = ['id' => $categoryId, 'label' => $validCategories[$key]['label']]; 242 | } 243 | } 244 | 245 | return $categoryStore; 246 | 247 | } 248 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/DependencyInjection/Compiler/CategoriesPass.php: -------------------------------------------------------------------------------- 1 | getParameter('lucene_search.categories'); 24 | 25 | if (!$categoryServiceName) { 26 | return; 27 | } 28 | 29 | if (!$container->hasDefinition($categoryServiceName)) { 30 | throw new \InvalidArgumentException(sprintf('Service "%s" not found', $categoryServiceName)); 31 | } 32 | 33 | $categoriesService = $container->get($categoryServiceName); 34 | if (!$categoriesService instanceof CategoriesInterface) { 35 | throw new \Exception(get_class($categoriesService) . ' needs to implement the CategoriesInterface.'); 36 | } 37 | 38 | $container->getDefinition(Configuration::class)->addMethodCall('setCategoryService', 39 | [new Reference($categoryServiceName)]); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/DependencyInjection/Compiler/TaskPass.php: -------------------------------------------------------------------------------- 1 | has(TaskManager::class)) { 21 | return; 22 | } 23 | 24 | $definition = $container->findDefinition(TaskManager::class); 25 | $tasks = $this->findAndSortTaggedServices('lucene_search.task', $container); 26 | 27 | if (empty($tasks)) { 28 | throw new RuntimeException('You must tag at least one tak as "lucene_search.task".'); 29 | } 30 | 31 | foreach ($tasks as $id => $task) { 32 | $definition->addMethodCall('addTask', [$task, (string)$task]); 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/DependencyInjection/Configuration.php: -------------------------------------------------------------------------------- 1 | root('lucene_search'); 14 | 15 | $rootNode 16 | ->children() 17 | ->booleanNode('enabled') 18 | ->isRequired() 19 | ->info('Enable and configure the search frontend if you want to include a full text search on your website.') 20 | ->end() 21 | ->booleanNode('fuzzy_search_results') 22 | ->isRequired() 23 | ->info('Fuzzy search results: When enabled, a fuzzy search is performed. The search will automatically include related terms.') 24 | ->end() 25 | ->booleanNode('search_suggestion') 26 | ->isRequired() 27 | ->info('Search suggestions: When enabled, a fuzzy search for similar search terms is performed. If no results could be found for the search term entered by the user, similar search terms are presented as suggestions.') 28 | ->end() 29 | ->booleanNode('own_host_only') 30 | ->isRequired() 31 | ->info('Own Host only: Limit search (and crawling) results to results from the current seed (sub-)domain only.') 32 | ->end() 33 | ->booleanNode('allow_subdomains') 34 | ->isRequired() 35 | ->info('Allow Subdomains: Limit search (and crawling) results to allow / disallow subomdains of current seed.') 36 | ->end() 37 | ->arrayNode('seeds') 38 | ->isRequired() 39 | ->info('Start-Urls (Seeds): Specify start URLs for the crawler. Please enter with protocol! e.g. http://www.pimcore.org and enter a starting URL on your main domain first and any subdomains next, because the domain of the first URL will be used as the main domain for sitemap generation.') 40 | ->prototype('scalar')->end() 41 | ->end() 42 | ->scalarNode('categories') 43 | ->info('Categories: If search results should be displayed by categories, please enter all valid categories here. The crawler sorts a page into a category if it contains a html meta tag with the name cat.') 44 | ->end() 45 | ->arrayNode('filter') 46 | ->children() 47 | ->booleanNode('allow_query_in_url') 48 | ->defaultFalse() 49 | ->info('When true, LuceneSearch will crawl urls with query fragments.') 50 | ->end() 51 | ->booleanNode('allow_hash_in_url') 52 | ->defaultFalse() 53 | ->info('When true, LuceneSearch will crawl urls with hash fragments.') 54 | ->end() 55 | ->arrayNode('valid_links') 56 | ->info('Regex for valid Uris: Specify PREG regex with start and end delimiter to define allowed links. e.g. @^http://www\.pimcore\.org*@i') 57 | ->prototype('scalar')->end() 58 | ->end() 59 | ->arrayNode('user_invalid_links') 60 | ->info('Regex for forbidden Uris: Specify PREG regex for links which should be ignored by the crawler. The crawler does not even follow these links e.g. @^www\.pimcore\.org\/community*@i') 61 | ->prototype('scalar')->end() 62 | ->end() 63 | ->scalarNode('core_invalid_links') 64 | ->info('Invalid Links/Extensions defined by core. You can\'nt override this.') 65 | ->cannotBeOverwritten() 66 | ->end() 67 | ->end() 68 | ->end() 69 | ->arrayNode('allowed_mime_types') 70 | ->info('Allowed MIME-Types. (Supported: text/html, application/pdf') 71 | ->prototype('scalar')->end() 72 | ->end() 73 | ->arrayNode('allowed_schemes') 74 | ->prototype('scalar')->end() 75 | ->info('Allowed Schemes: Define which url Schemes are allowed. (eg. http and/or https). Default is http.') 76 | ->end() 77 | ->arrayNode('crawler') 78 | ->children() 79 | ->integerNode('max_link_depth') 80 | ->info('Maximum link depth: To avoid loops produced by relative links on a website, a maximum link depth needs to be set. Please choose the value suited to the website to crawl, the default value is 15.') 81 | ->end() 82 | ->integerNode('max_download_limit') 83 | ->info('Maximum links to crawl: Constrain crawler to a specific limit of crawled links. Defaults is 0 which means no limit.') 84 | ->end() 85 | ->floatNode('content_max_size') 86 | ->info('Maximum content size (in MB): crawler ignores resources if its size exceeds limit (mostly useful for asset indexing). Defaults is 0 which means no limit.') 87 | ->end() 88 | ->scalarNode('content_start_indicator') 89 | ->info('You can limit the page content relevant for searching by surrounding it with certain html comments. The crawler will still parse the entire page to find links, but only the specified area wihin your html comments is used when searching for a term. String specifying content start for search.') 90 | ->end() 91 | ->scalarNode('content_end_indicator') 92 | ->info('String specifying content end for search.') 93 | ->end() 94 | ->scalarNode('content_exclude_start_indicator') 95 | ->info('String specifying exclude content start for search.') 96 | ->end() 97 | ->scalarNode('content_exclude_end_indicator') 98 | ->info('String specifying exclude content end for search.') 99 | ->end() 100 | ->end() 101 | ->end() 102 | ->arrayNode('locale') 103 | ->children() 104 | ->booleanNode('ignore_language') 105 | ->info('Receive search results from all languages, set to false to limit search results to the current language only. The current language is retrieved from the registy, the language of any page in the search result index is extracted by the crawler (Content-Language Http header, html tag lang attribute or html meta tag content-language)') 106 | ->isRequired() 107 | ->end() 108 | ->booleanNode('ignore_country') 109 | ->isRequired() 110 | ->info('Receive search results from all countries, set to false to limit search results to country only. The current country is retrieved from the search result index. it is extracted by the crawler (html meta tag country)') 111 | ->end() 112 | ->end() 113 | ->end() 114 | ->arrayNode('restriction') 115 | ->children() 116 | ->booleanNode('enabled') 117 | ->isRequired() 118 | ->info('Document Restriction: Ignore Document restrictions. Set to true if you\'re using the Pimcore/MembersBundle') 119 | ->end() 120 | ->end() 121 | ->end() 122 | ->arrayNode('boost') 123 | ->children() 124 | ->integerNode('documents') 125 | ->info('Document Boost Factor') 126 | ->end() 127 | ->integerNode('assets') 128 | ->info('Asset Boost Factor') 129 | ->end() 130 | ->end() 131 | ->end() 132 | ->arrayNode('view') 133 | ->children() 134 | ->integerNode('max_per_page') 135 | ->info('Max Results per Page') 136 | ->end() 137 | ->integerNode('max_suggestions') 138 | ->info('Max Suggestions') 139 | ->end() 140 | ->end() 141 | ->end() 142 | ->end() 143 | ; 144 | 145 | return $treeBuilder; 146 | } 147 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/DependencyInjection/LuceneSearchExtension.php: -------------------------------------------------------------------------------- 1 | processConfiguration($configuration, $configs); 22 | 23 | $loader = new YamlFileLoader($container, new FileLocator([__DIR__ . '/../Resources/config'])); 24 | $loader->load('services.yml'); 25 | 26 | $configManagerDefinition = $container->getDefinition(BundleConfiguration::class); 27 | $configManagerDefinition->addMethodCall('setConfig', [$config]); 28 | 29 | if (file_exists(BundleConfiguration::SYSTEM_CONFIG_FILE_PATH)) { 30 | $bundleConfig = Yaml::parse(file_get_contents(BundleConfiguration::SYSTEM_CONFIG_FILE_PATH)); 31 | $configManagerDefinition->addMethodCall('setSystemConfig', [$bundleConfig]); 32 | } 33 | 34 | $container->setParameter('lucene_search.categories', $config['categories']); 35 | 36 | } 37 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Doctrine/DBAL/ConnectionKeepAlive.php: -------------------------------------------------------------------------------- 1 | connections = []; 27 | $this->isAttached = false; 28 | } 29 | 30 | /** 31 | * Detach Kick Event 32 | */ 33 | public function detach() 34 | { 35 | unregister_tick_function([$this, 'kick']); 36 | $this->isAttached = false; 37 | } 38 | 39 | /** 40 | * Attach Kick Event 41 | */ 42 | public function attach() 43 | { 44 | if ($this->isAttached || register_tick_function([$this, 'kick'])) { 45 | $this->isAttached = true; 46 | return; 47 | } 48 | throw new \RuntimeException('Unable to attach keep alive to the system'); 49 | } 50 | 51 | /** 52 | * @param Connection $logConnection 53 | */ 54 | public function addConnection(Connection $logConnection) 55 | { 56 | $this->connections[spl_object_hash($logConnection)] = $logConnection; 57 | } 58 | 59 | /** 60 | * @throws \Exception 61 | */ 62 | public function kick() 63 | { 64 | foreach ($this->connections as $conn) { 65 | try { 66 | $conn->executeQuery('SELECT 1')->closeCursor(); 67 | } catch (\Exception $e) { 68 | if ($conn === null || stripos($e->getMessage(), 'SQLSTATE[HY000]: General error: 2006 MySQL server has gone away') === false) { 69 | throw $e; 70 | } 71 | $conn->close(); 72 | $conn->connect(); 73 | } 74 | } 75 | } 76 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/AssetResourceRestrictionEvent.php: -------------------------------------------------------------------------------- 1 | resource = $resource; 34 | } 35 | 36 | /** 37 | * @return Resource 38 | */ 39 | public function getResource() 40 | { 41 | return $this->resource; 42 | } 43 | 44 | /** 45 | * @param $restrictions array|null 46 | */ 47 | public function setRestrictions($restrictions) 48 | { 49 | $this->restrictions = $restrictions; 50 | } 51 | 52 | /** 53 | * @return array|null 54 | */ 55 | public function getRestrictions() 56 | { 57 | return $this->restrictions; 58 | } 59 | 60 | /** 61 | * @param $asset Asset 62 | */ 63 | public function setAsset(Asset $asset) 64 | { 65 | $this->asset = $asset; 66 | } 67 | 68 | /** 69 | * @return Asset 70 | */ 71 | public function getAsset() 72 | { 73 | return $this->asset; 74 | } 75 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/CrawlerRequestHeaderEvent.php: -------------------------------------------------------------------------------- 1 | headers[] = $header; 30 | } 31 | 32 | /** 33 | * @return Resource 34 | */ 35 | public function getHeaders() 36 | { 37 | return $this->headers; 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/DocumentModificationEvent.php: -------------------------------------------------------------------------------- 1 | document = $document; 28 | $this->marking = $marking; 29 | } 30 | 31 | /** 32 | * @param \Zend_Search_Lucene_Document $document 33 | * 34 | * @return \Zend_Search_Lucene_Document 35 | */ 36 | public function setDocument(\Zend_Search_Lucene_Document $document) 37 | { 38 | return $this->document = $document; 39 | } 40 | 41 | /** 42 | * @return \Zend_Search_Lucene_Document 43 | */ 44 | public function getDocument() 45 | { 46 | return $this->document; 47 | } 48 | 49 | /** 50 | * @return string 51 | */ 52 | public function getMarking() 53 | { 54 | return $this->marking; 55 | } 56 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/HtmlParserEvent.php: -------------------------------------------------------------------------------- 1 | document = $document; 40 | $this->parsedHtml = $parsedHtml; 41 | $this->fullHtml = $fullHtml; 42 | $this->params = $params; 43 | } 44 | 45 | /** 46 | * @param \Zend_Search_Lucene_Document $document 47 | * 48 | * @return \Zend_Search_Lucene_Document 49 | */ 50 | public function setDocument(\Zend_Search_Lucene_Document $document) 51 | { 52 | return $this->document = $document; 53 | } 54 | 55 | /** 56 | * @return \Zend_Search_Lucene_Document 57 | */ 58 | public function getDocument() 59 | { 60 | return $this->document; 61 | } 62 | 63 | /** 64 | * @deprecated Use getParsedHtml() instead. 65 | * 66 | * @return string 67 | */ 68 | public function getHtml() 69 | { 70 | return $this->getParsedHtml(); 71 | } 72 | 73 | /** 74 | * @return string 75 | */ 76 | public function getParsedHtml() 77 | { 78 | return $this->parsedHtml; 79 | } 80 | 81 | /** 82 | * @return string 83 | */ 84 | public function getFullHtml() 85 | { 86 | return $this->fullHtml; 87 | } 88 | 89 | /** 90 | * @return array 91 | */ 92 | public function getParams() 93 | { 94 | return $this->params; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/PdfParserEvent.php: -------------------------------------------------------------------------------- 1 | document = $document; 41 | $this->content = $content; 42 | $this->assetMetaData = $assetMetaData; 43 | $this->params = $params; 44 | } 45 | 46 | /** 47 | * @param \Zend_Search_Lucene_Document $document 48 | * 49 | * @return \Zend_Search_Lucene_Document 50 | */ 51 | public function setDocument(\Zend_Search_Lucene_Document $document) 52 | { 53 | return $this->document = $document; 54 | } 55 | 56 | /** 57 | * @return \Zend_Search_Lucene_Document 58 | */ 59 | public function getDocument() 60 | { 61 | return $this->document; 62 | } 63 | 64 | /** 65 | * @return string 66 | */ 67 | public function getContent() 68 | { 69 | return $this->content; 70 | } 71 | 72 | /** 73 | * @return array 74 | */ 75 | public function getAssetMetaData() 76 | { 77 | return $this->assetMetaData; 78 | } 79 | 80 | /** 81 | * @return array 82 | */ 83 | public function getParams() 84 | { 85 | return $this->params; 86 | } 87 | 88 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Event/RestrictionContextEvent.php: -------------------------------------------------------------------------------- 1 | restrictionGroups = $restrictionGroups; 26 | } 27 | 28 | /** 29 | * @return array|null 30 | */ 31 | public function getAllowedRestrictionGroups() 32 | { 33 | return $this->restrictionGroups; 34 | } 35 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/EventListener/DocumentMetaDataListener.php: -------------------------------------------------------------------------------- 1 | crawlerState = $crawlerState; 38 | $this->documentResolver = $documentResolver; 39 | $this->headMeta = $headMeta; 40 | } 41 | 42 | /** 43 | * @param GetResponseEvent $event 44 | */ 45 | public function onKernelRequest(GetResponseEvent $event) 46 | { 47 | if (!$this->crawlerState->isLuceneSearchCrawler()) { 48 | return; 49 | } 50 | 51 | $request = $event->getRequest(); 52 | 53 | if (!$event->isMasterRequest()) { 54 | return; 55 | } 56 | 57 | $document = $this->documentResolver->getDocument($request); 58 | 59 | if ($document instanceof Page) { 60 | $this->headMeta->addRaw(''); 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/EventListener/MaintenanceListener.php: -------------------------------------------------------------------------------- 1 | handlerDispatcher = $handlerDispatcher; 40 | $this->queuedDocumentModifier = $queuedDocumentModifier; 41 | $this->taskManager = $taskManager; 42 | } 43 | 44 | /** 45 | * @param MaintenanceEvent $event 46 | */ 47 | public function runQueuedDocumentModifier(MaintenanceEvent $event) 48 | { 49 | $mainCrawlerIsActive = $this->handlerDispatcher->getStateHandler()->getCrawlerState() === StateHandler::CRAWLER_STATE_ACTIVE; 50 | 51 | // new index is on its way. wait for new index arrival. 52 | if ($mainCrawlerIsActive === true) { 53 | return; 54 | } 55 | 56 | $event->getManager()->registerJob(new Job('lucene_search.maintenance.queued_modifier', [$this->queuedDocumentModifier, 'resolveQueue'])); 57 | } 58 | 59 | /** 60 | * @param MaintenanceEvent $event 61 | */ 62 | public function runCrawler(MaintenanceEvent $event) 63 | { 64 | $event->getManager()->registerJob(new Job('lucene_search.maintenance.crawler', [$this, 'checkCrawlerCycle'])); 65 | } 66 | 67 | /** 68 | * Run Crawler in given time range 69 | */ 70 | public function checkCrawlerCycle() 71 | { 72 | if ($this->handlerDispatcher->getStateHandler()->isCrawlerEnabled() === false) { 73 | return; 74 | } 75 | 76 | $currentHour = date('H', time()); 77 | 78 | $running = $this->handlerDispatcher->getStateHandler()->getCrawlerState() === StateHandler::CRAWLER_STATE_ACTIVE; 79 | 80 | $lastStarted = $this->handlerDispatcher->getStateHandler()->getCrawlerLastStarted(); 81 | $lastFinished = $this->handlerDispatcher->getStateHandler()->getCrawlerLastFinished(); 82 | $forceStart = $this->handlerDispatcher->getStateHandler()->isCrawlerInForceStart(); 83 | $aDayAgo = time() - (24 * 60 * 60); 84 | 85 | /** 86 | * + If Crawler is not running 87 | * + If last start of Crawler is initial or a day ago 88 | * + If it's between 1 + 3 o clock in the night 89 | * + OR if its force 90 | * => RUN 91 | */ 92 | if ($running === false && 93 | (((is_bool($lastStarted) || $lastStarted <= $aDayAgo) && $currentHour > 1 && $currentHour < 3) || $forceStart) 94 | ) { 95 | \Pimcore\Logger::debug('LuceneSearch: crawling started from maintenance listener.'); 96 | 97 | $logger = new Logger(); 98 | $this->taskManager->setLogger($logger); 99 | 100 | try { 101 | $this->taskManager->processTaskChain(['force' => false]); 102 | } catch (\Exception $e) { 103 | \Pimcore\Logger::error('LuceneSearch: error while running crawler in maintenance.', $e->getTrace()); 104 | } 105 | 106 | /** 107 | * + If Crawler is Running 108 | * + If last stop of crawler is before last start 109 | * + If last start is older than one day 110 | * => We have some errors: EXIT CRAWLING! 111 | */ 112 | } elseif ($running === true && $lastFinished < $lastStarted && $lastStarted <= $aDayAgo) { 113 | \Pimcore\Logger::error('LuceneSearch: There seems to be a problem with the search crawler! Trying to stop it.'); 114 | $this->handlerDispatcher->getStateHandler()->stopCrawler(true); 115 | } 116 | } 117 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/EventListener/MaintenanceQueueListener.php: -------------------------------------------------------------------------------- 1 | handlerDispatcher = $handlerDispatcher; 29 | $this->queuedDocumentModifier = $queuedDocumentModifier; 30 | } 31 | 32 | /** 33 | * {@inheritdoc} 34 | */ 35 | public function execute() 36 | { 37 | $mainCrawlerIsActive = $this->handlerDispatcher->getStateHandler()->getCrawlerState() === StateHandler::CRAWLER_STATE_ACTIVE; 38 | 39 | // new index is on its way. wait for new index arrival. 40 | if ($mainCrawlerIsActive === true) { 41 | return; 42 | } 43 | 44 | $this->queuedDocumentModifier->resolveQueue(); 45 | } 46 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/EventListener/MaintenanceRunCrawlerListener.php: -------------------------------------------------------------------------------- 1 | handlerDispatcher = $handlerDispatcher; 30 | $this->taskManager = $taskManager; 31 | } 32 | 33 | /** 34 | * {@inheritdoc} 35 | */ 36 | public function execute() 37 | { 38 | $this->checkCrawlerCycle(); 39 | } 40 | 41 | /** 42 | * Run Crawler in given time range 43 | */ 44 | public function checkCrawlerCycle() 45 | { 46 | if ($this->handlerDispatcher->getStateHandler()->isCrawlerEnabled() === false) { 47 | return; 48 | } 49 | 50 | $currentHour = date('H', time()); 51 | 52 | $running = $this->handlerDispatcher->getStateHandler()->getCrawlerState() === StateHandler::CRAWLER_STATE_ACTIVE; 53 | 54 | $lastStarted = $this->handlerDispatcher->getStateHandler()->getCrawlerLastStarted(); 55 | $lastFinished = $this->handlerDispatcher->getStateHandler()->getCrawlerLastFinished(); 56 | $forceStart = $this->handlerDispatcher->getStateHandler()->isCrawlerInForceStart(); 57 | $aDayAgo = time() - (24 * 60 * 60); 58 | 59 | /** 60 | * + If Crawler is not running 61 | * + If last start of Crawler is initial or a day ago 62 | * + If it's between 1 + 3 o clock in the night 63 | * + OR if its force 64 | * => RUN 65 | */ 66 | if ($running === false && 67 | (((is_bool($lastStarted) || $lastStarted <= $aDayAgo) && $currentHour > 1 && $currentHour < 3) || $forceStart) 68 | ) { 69 | \Pimcore\Logger::debug('LuceneSearch: crawling started from maintenance listener.'); 70 | 71 | $logger = new Logger(); 72 | $this->taskManager->setLogger($logger); 73 | 74 | try { 75 | $this->taskManager->processTaskChain(['force' => false]); 76 | } catch (\Exception $e) { 77 | \Pimcore\Logger::error('LuceneSearch: error while running crawler in maintenance.', $e->getTrace()); 78 | } 79 | 80 | /** 81 | * + If Crawler is Running 82 | * + If last stop of crawler is before last start 83 | * + If last start is older than one day 84 | * => We have some errors: EXIT CRAWLING! 85 | */ 86 | } elseif ($running === true && $lastFinished < $lastStarted && $lastStarted <= $aDayAgo) { 87 | \Pimcore\Logger::error('LuceneSearch: There seems to be a problem with the search crawler! Trying to stop it.'); 88 | $this->handlerDispatcher->getStateHandler()->stopCrawler(true); 89 | } 90 | } 91 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Helper/HighlighterHelper.php: -------------------------------------------------------------------------------- 1 | getHighlightedSummary($content, $queryElements); 26 | 27 | if ($summary === false) { 28 | return substr($content, 0, self::SUMMARY_LENGTH); 29 | } 30 | 31 | return $summary; 32 | } 33 | 34 | /** 35 | * finds the query strings position in the text 36 | * 37 | * @param string $text 38 | * @param string $queryStr 39 | * 40 | * @return int 41 | */ 42 | protected function findPosInSummary($text, $queryStr) 43 | { 44 | $pos = stripos($text, ' ' . $queryStr . ' '); 45 | if ($pos === false) { 46 | $pos = stripos($text, '"' . $queryStr . '"'); 47 | } 48 | if ($pos === false) { 49 | $pos = stripos($text, '"' . $queryStr . '"'); 50 | } 51 | if ($pos === false) { 52 | $pos = stripos($text, ' ' . $queryStr . '-'); 53 | } 54 | if ($pos === false) { 55 | $pos = stripos($text, '-' . $queryStr . ' '); 56 | } 57 | if ($pos === false) { 58 | $pos = stripos($text, $queryStr . ' '); 59 | } 60 | if ($pos === false) { 61 | $pos = stripos($text, ' ' . $queryStr); 62 | } 63 | if ($pos === false) { 64 | $pos = stripos($text, $queryStr); 65 | } 66 | 67 | return $pos; 68 | } 69 | 70 | /** 71 | * extracts summary with highlighted search word from source text 72 | * 73 | * @param string $text 74 | * @param string[] $queryTokens 75 | * 76 | * @return string 77 | */ 78 | protected function getHighlightedSummary($text, $queryTokens) 79 | { 80 | $pos = false; 81 | $tokenInUse = $queryTokens[0]; 82 | 83 | foreach ($queryTokens as $queryStr) { 84 | $tokenInUse = $queryStr; 85 | $pos = $this->findPosInSummary($text, $queryStr); 86 | 87 | if ($pos !== false) { 88 | break; 89 | } 90 | } 91 | 92 | if ($pos !== false) { 93 | $start = $pos - 100; 94 | 95 | if ($start < 0) { 96 | $start = 0; 97 | } 98 | 99 | $summary = substr($text, $start, self::SUMMARY_LENGTH + strlen($tokenInUse)); 100 | $summary = trim($summary); 101 | 102 | $tokens = explode(' ', $summary); 103 | 104 | if (strtolower($tokens[0]) != strtolower($tokenInUse)) { 105 | $tokens = array_slice($tokens, 1, -1); 106 | } else { 107 | $tokens = array_slice($tokens, 0, -1); 108 | } 109 | 110 | $trimmedSummary = implode(' ', $tokens); 111 | 112 | foreach ($queryTokens as $queryStr) { 113 | $trimmedSummary = preg_replace('@([ \'")(-:.,;])(' . $queryStr . ')([ \'")(-:.,;])@si', 114 | " \\1\\2\\3", $trimmedSummary); 115 | $trimmedSummary = preg_replace('@^(' . $queryStr . ')([ \'")(-:.,;])@si', 116 | " \\1\\2", $trimmedSummary); 117 | $trimmedSummary = preg_replace('@([ \'")(-:.,;])(' . $queryStr . ')$@si', 118 | " \\1\\2", $trimmedSummary); 119 | } 120 | 121 | return empty($trimmedSummary) ? false : $trimmedSummary; 122 | } 123 | 124 | return false; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Helper/LuceneHelper.php: -------------------------------------------------------------------------------- 1 | rewrite($index)->getQueryTerms(); 30 | } catch (\Zend_Search_Lucene_Exception $e) { 31 | return []; 32 | } 33 | 34 | return $terms; 35 | 36 | } 37 | 38 | /** 39 | * find matching terms beginning with query string 40 | * 41 | * @param string $queryStr 42 | * @param \Zend_Search_Lucene_Interface $index 43 | * 44 | * @return array $hits 45 | */ 46 | public function wildcardFindTerms($queryStr, \Zend_Search_Lucene_Interface $index) 47 | { 48 | $pattern = new \Zend_Search_Lucene_Index_Term($queryStr . '*'); 49 | $userQuery = new \Zend_Search_Lucene_Search_Query_Wildcard($pattern); 50 | \Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(2); 51 | 52 | try { 53 | $terms = $userQuery->rewrite($index)->getQueryTerms(); 54 | } catch (\Zend_Search_Lucene_Exception $e) { 55 | return []; 56 | } 57 | 58 | return $terms; 59 | 60 | } 61 | 62 | /** 63 | * @param $term 64 | * 65 | * @return string 66 | */ 67 | public function cleanTerm($term) 68 | { 69 | return trim( 70 | preg_replace('|\s{2,}|', ' ', 71 | preg_replace('|[^\p{L}\p{N} ]/u|', ' ', 72 | strtolower( 73 | strip_tags( 74 | str_replace(["\n", '<'], [' ', ' <'], $term) 75 | ) 76 | ) 77 | ) 78 | ) 79 | ); 80 | } 81 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Helper/StringHelper.php: -------------------------------------------------------------------------------- 1 | ', '"', "'", '&'], '', $queryFromRequest); 18 | 19 | return $queryFromRequest; 20 | } 21 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Logger/AbstractLogger.php: -------------------------------------------------------------------------------- 1 | prefix = '[' . rtrim($prefix) . '] '; 28 | } 29 | 30 | /** 31 | * @return mixed 32 | */ 33 | protected function getPrefix() 34 | { 35 | return $this->prefix; 36 | } 37 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Logger/ConsoleLogger.php: -------------------------------------------------------------------------------- 1 | consoleOutput = $output; 25 | $this->verbosity = $output->getVerbosity(); 26 | } 27 | 28 | /** 29 | * @param $message 30 | * @param $level 31 | * @param bool $logToBackend 32 | * @param bool $logToSystem 33 | * 34 | * @return void 35 | */ 36 | public function log($message, $level = 'debug', $logToBackend = true, $logToSystem = true) 37 | { 38 | parent::log($message, $level, $logToBackend, $logToSystem); 39 | $this->addToConsoleLog($message, $level); 40 | } 41 | 42 | /** 43 | * print some lines to console if available 44 | * 45 | * @param $message 46 | * @param $level 47 | * 48 | * @return bool 49 | */ 50 | protected function addToConsoleLog($message, $level = 'debug') 51 | { 52 | if (!$this->consoleOutput instanceof Output\OutputInterface) { 53 | return false; 54 | } 55 | 56 | if ($this->verbosity !== Output\OutputInterface::VERBOSITY_VERBOSE) { 57 | return false; 58 | } 59 | 60 | $message = $this->getPrefix() . $message; 61 | 62 | $debugLevel = 'fg=white'; 63 | if ($level === 'debug') { 64 | $debugLevel = 'fg=white'; 65 | } elseif ($level === 'debugHighlight') { 66 | $debugLevel = 'comment'; 67 | } elseif ($level === 'info') { 68 | $debugLevel = 'comment'; 69 | } elseif ($level === 'error') { 70 | $debugLevel = 'error'; 71 | } 72 | 73 | $string = sprintf('<%s>' . str_replace('%', '%%', $message) . '', $debugLevel, $debugLevel); 74 | $this->consoleOutput->writeln($string, $this->verbosity); 75 | } 76 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Logger/Logger.php: -------------------------------------------------------------------------------- 1 | getSystemPrefix() . $this->getPrefix() . $message, $this->getRealLevel($level)); 26 | } 27 | 28 | if ($logToBackend === true) { 29 | $file = Configuration::CRAWLER_LOG_FILE_PATH; 30 | $log = date('d.m.Y H:i') . '|' . $this->getRealLevel($level) . '|' . $message . "\n"; 31 | file_put_contents($file, $log, FILE_APPEND); 32 | } 33 | } 34 | 35 | /** 36 | * @param $level 37 | * 38 | * @return string 39 | */ 40 | private function getRealLevel($level) 41 | { 42 | if ($level === 'debugHighlight') { 43 | return 'debug'; 44 | } 45 | 46 | return $level; 47 | } 48 | 49 | /** 50 | * @return string 51 | */ 52 | private function getSystemPrefix() 53 | { 54 | return 'LuceneSearch: '; 55 | } 56 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/LuceneSearchBundle.php: -------------------------------------------------------------------------------- 1 | addCompilerPass(new TaskPass()); 24 | $container->addCompilerPass(new CategoriesPass()); 25 | } 26 | 27 | /** 28 | * {@inheritdoc} 29 | */ 30 | public function getInstaller() 31 | { 32 | return $this->container->get(Install::class); 33 | } 34 | 35 | /** 36 | * {@inheritdoc} 37 | */ 38 | public function getJsPaths() 39 | { 40 | return [ 41 | '/bundles/lucenesearch/js/backend/startup.js', 42 | '/bundles/lucenesearch/js/backend/settings.js' 43 | ]; 44 | } 45 | 46 | /** 47 | * {@inheritdoc} 48 | */ 49 | public function getCssPaths() 50 | { 51 | return [ 52 | '/bundles/lucenesearch/css/admin.css' 53 | ]; 54 | } 55 | 56 | /** 57 | * @inheritDoc 58 | */ 59 | protected function getComposerPackageName(): string 60 | { 61 | return self::PACKAGE_NAME; 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/LuceneSearchEvents.php: -------------------------------------------------------------------------------- 1 | addJob(['marking' => $marking, 'query' => $query, 'type' => 'query']); 26 | } 27 | 28 | /** 29 | * @param \Zend_Search_Lucene_Index_Term $term 30 | * @param string $marking 31 | */ 32 | public function markDocumentsViaTerm(\Zend_Search_Lucene_Index_Term $term, $marking = self::MARK_AVAILABLE) 33 | { 34 | // trigger command to run heavy processes in background 35 | $this->addJob(['marking' => $marking, 'term' => $term, 'type' => 'term']); 36 | } 37 | 38 | /** 39 | * @return \Zend_Search_Lucene_Interface 40 | */ 41 | public function getIndex() 42 | { 43 | return \Zend_Search_Lucene::open(Configuration::INDEX_DIR_PATH_STABLE); 44 | } 45 | 46 | /** 47 | * @return bool 48 | */ 49 | public function hasActiveJobs() 50 | { 51 | $activeJobs = $this->getActiveJobs(); 52 | return count($activeJobs) > 0; 53 | } 54 | 55 | /** 56 | * @param bool $populateWithData 57 | * 58 | * @return array 59 | */ 60 | public function getActiveJobs($populateWithData = false) 61 | { 62 | $activeJobs = TmpStore::getIdsByTag(DocumentModifier::TEMP_STORE_TAG); 63 | 64 | if ($populateWithData === false) { 65 | return is_array($activeJobs) ? $activeJobs : []; 66 | } 67 | 68 | if (!is_array($activeJobs)) { 69 | return []; 70 | } 71 | 72 | $jobs = []; 73 | foreach ($activeJobs as $processId) { 74 | 75 | $process = $this->getJob($processId); 76 | if (!$process instanceof TmpStore) { 77 | continue; 78 | } 79 | 80 | $jobs[] = $process; 81 | } 82 | 83 | return $jobs; 84 | } 85 | 86 | /** 87 | * Remove all existing Modifier Jobs in Queue. 88 | */ 89 | public function clearActiveJobs() 90 | { 91 | $activeJobs = $this->getActiveJobs(); 92 | foreach ($activeJobs as $activeJobId) { 93 | TmpStore::delete($activeJobId); 94 | } 95 | } 96 | 97 | /** 98 | * Add a modifier Job to the Queue. 99 | * 100 | * @param array $options 101 | */ 102 | public function addJob(array $options) 103 | { 104 | $jobId = $this->getJobId(); 105 | 106 | try { 107 | TmpStore::add($this->getJobId(), $options, self::TEMP_STORE_TAG); 108 | } catch (\Exception $e) { 109 | \Pimcore\Logger::error(sprintf('LuceneSearch: Could not add job (%s) to queue.', $jobId), $e->getTrace()); 110 | } 111 | } 112 | 113 | /** 114 | * @param $processId 115 | * 116 | * @return null|TmpStore 117 | */ 118 | public function getJob($processId) 119 | { 120 | $job = null; 121 | try { 122 | $job = TmpStore::get($processId); 123 | } catch (\Exception $e) { 124 | return null; 125 | } 126 | 127 | return $job; 128 | } 129 | 130 | /** 131 | * @param $processId 132 | */ 133 | public function deleteJob($processId) 134 | { 135 | try { 136 | TmpStore::delete($processId); 137 | } catch (\Exception $e) { 138 | \Pimcore\Logger::error(sprintf('LuceneSearch: Could not delete queued job with id %s', $processId)); 139 | } 140 | } 141 | 142 | /** 143 | * @return string 144 | */ 145 | private function getJobId() 146 | { 147 | return uniqid('lucene_modifier-job-'); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Modifier/QueuedDocumentModifier.php: -------------------------------------------------------------------------------- 1 | documentModifier = $documentModifier; 43 | $this->eventDispatcher = $eventDispatcher; 44 | } 45 | 46 | /** 47 | * Load all queued jobs, trigger lucene query and process given documents. 48 | */ 49 | public function resolveQueue() 50 | { 51 | if ($this->documentModifier->hasActiveJobs() === false) { 52 | return; 53 | } 54 | 55 | // modifier is running, wait for next cycle. 56 | if ($this->queueIsLocked()) { 57 | return; 58 | } 59 | 60 | $this->lockQueue(); 61 | 62 | /** @var TmpStore[] $sortedProcesses */ 63 | $sortedProcesses = $this->documentModifier->getActiveJobs(true); 64 | 65 | usort($sortedProcesses, function ($a, $b) { 66 | /** 67 | * @var $a TmpStore 68 | * @var $b TmpStore 69 | */ 70 | return strtotime($a->getDate()) - strtotime($b->getDate()); 71 | }); 72 | 73 | $this->index = $this->documentModifier->getIndex(); 74 | 75 | foreach ($sortedProcesses as $process) { 76 | 77 | /** @var array $data */ 78 | $data = $process->getData(); 79 | $type = $data['type']; 80 | $marking = $data['marking']; 81 | 82 | $documentIds = $type === 'query' ? $this->getDocumentIdsByQuery($data['query']) : $this->getDocumentIdsByTerm($data['term']); 83 | 84 | try { 85 | if ($marking === DocumentModifier::MARK_AVAILABLE || $marking === DocumentModifier::MARK_UNAVAILABLE) { 86 | $this->changeDocumentsAvailability($documentIds, $marking); 87 | } elseif ($marking === DocumentModifier::MARK_DELETED) { 88 | $this->deleteDocuments($documentIds); 89 | } 90 | } catch (\Exception $e) { 91 | \Pimcore\Logger::error('LuceneSearch: Document Modifier Error: ' . $e->getMessage(), $e->getTrace()); 92 | } 93 | 94 | $this->documentModifier->deleteJob($process->getId()); 95 | 96 | } 97 | 98 | if ($this->indexModified === true) { 99 | $this->index->optimize(); 100 | } 101 | 102 | $this->unlockQueue(); 103 | } 104 | 105 | /** 106 | * @param array $documentIds 107 | * @param $marking 108 | * 109 | * @throws \Zend_Search_Lucene_Exception 110 | */ 111 | protected function changeDocumentsAvailability(array $documentIds, $marking) 112 | { 113 | if (count($documentIds) === 0) { 114 | return; 115 | } 116 | 117 | foreach ($documentIds as $documentId) { 118 | 119 | $newDocument = new \Zend_Search_Lucene_Document(); 120 | $currentDocument = $this->index->getDocument($documentId); 121 | 122 | // document is already marked as deleted: skip check. 123 | if ($this->index->isDeleted($documentId)) { 124 | continue; 125 | } 126 | 127 | //check if state is same. if so, skip modification. 128 | $currentInternalValue = null; 129 | if (in_array('internalAvailability', $currentDocument->getFieldNames())) { 130 | $currentInternalValue = $currentDocument->getField('internalAvailability')->value; 131 | } 132 | 133 | if ($currentInternalValue === $marking) { 134 | continue; 135 | } 136 | 137 | $this->indexModified = true; 138 | 139 | foreach ($currentDocument->getFieldNames() as $name) { 140 | 141 | if ($name === 'internalAvailability') { 142 | continue; 143 | } 144 | 145 | $newDocument->addField($currentDocument->getField($name)); 146 | } 147 | 148 | $newDocument->addField(\Zend_Search_Lucene_Field::keyword('internalAvailability', $marking)); 149 | 150 | $modificationEvent = new DocumentModificationEvent($newDocument, $marking); 151 | $this->eventDispatcher->dispatch( 152 | LuceneSearchEvents::LUCENE_SEARCH_DOCUMENT_MODIFICATION, 153 | $modificationEvent 154 | ); 155 | 156 | $this->index->delete($documentId); 157 | $this->index->addDocument($modificationEvent->getDocument()); 158 | $this->index->commit(); 159 | } 160 | } 161 | 162 | /** 163 | * @param array $documentIds 164 | * 165 | * @throws \Zend_Search_Lucene_Exception 166 | */ 167 | protected function deleteDocuments(array $documentIds) 168 | { 169 | if (count($documentIds) === 0) { 170 | return; 171 | } 172 | 173 | $this->indexModified = true; 174 | 175 | foreach ($documentIds as $documentId) { 176 | if (!$this->index->isDeleted($documentId)) { 177 | $this->index->delete($documentId); 178 | $this->index->commit(); 179 | } 180 | } 181 | } 182 | 183 | /** 184 | * @param \Zend_Search_Lucene_Index_Term $term 185 | * 186 | * @return array 187 | */ 188 | protected function getDocumentIdsByTerm(\Zend_Search_Lucene_Index_Term $term) 189 | { 190 | try { 191 | $documentIds = $this->index->termDocs($term); 192 | } catch (\Exception $e) { 193 | return []; 194 | } 195 | 196 | return $documentIds; 197 | } 198 | 199 | /** 200 | * @param \Zend_Search_Lucene_Search_Query_Term $query 201 | * 202 | * @return array 203 | */ 204 | protected function getDocumentIdsByQuery(\Zend_Search_Lucene_Search_Query_Term $query) 205 | { 206 | try { 207 | $hits = $this->index->find($query); 208 | } catch (\Exception $e) { 209 | return []; 210 | } 211 | 212 | if (!is_array($hits) || count($hits) === 0) { 213 | return []; 214 | } 215 | 216 | $documentIds = []; 217 | foreach ($hits as $hit) { 218 | 219 | if (!$hit instanceof \Zend_Search_Lucene_Search_QueryHit) { 220 | continue; 221 | } 222 | 223 | $documentIds[] = $hit->id; 224 | } 225 | 226 | return $documentIds; 227 | } 228 | 229 | /** 230 | * Lock Queue for cycle 231 | */ 232 | protected function lockQueue() 233 | { 234 | if ($this->queueIsLocked() === true) { 235 | return; 236 | } 237 | 238 | TmpStore::add(self::LOCK_ID, ['running' => true]); 239 | } 240 | 241 | /** 242 | * Unlock Queue for cycle 243 | */ 244 | protected function unlockQueue() 245 | { 246 | TmpStore::delete(self::LOCK_ID); 247 | } 248 | 249 | /** 250 | * @return bool 251 | */ 252 | protected function queueIsLocked() 253 | { 254 | return TmpStore::get(self::LOCK_ID) instanceof TmpStore; 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Organizer/Dispatcher/HandlerDispatcher.php: -------------------------------------------------------------------------------- 1 | stateHandler = $stateHandler; 29 | $this->storeHandler = $storeHandler; 30 | } 31 | 32 | /** 33 | * @return StateHandler 34 | */ 35 | public function getStateHandler() 36 | { 37 | return $this->stateHandler; 38 | } 39 | 40 | /** 41 | * @return StoreHandler 42 | */ 43 | public function getStoreHandler() 44 | { 45 | return $this->storeHandler; 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Organizer/Handler/AbstractHandler.php: -------------------------------------------------------------------------------- 1 | translator = $translator; 35 | $this->configuration = $configuration; 36 | $this->fileSystem = new FileSystem(); 37 | } 38 | 39 | /** 40 | * @todo check locale 41 | * 42 | * @param $key 43 | * 44 | * @return mixed 45 | */ 46 | protected function getTranslation($key) 47 | { 48 | $translationCatalog = $this->translator->getCatalogue('en'); 49 | $translations = $translationCatalog->get($key, 'admin'); 50 | 51 | return $translations; 52 | } 53 | 54 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Organizer/Handler/StateHandler.php: -------------------------------------------------------------------------------- 1 | configuration->getConfig('enabled') === true; 19 | } 20 | 21 | /** 22 | * @return string 23 | */ 24 | public function getCrawlerState() 25 | { 26 | if ($this->fileSystem->exists(Configuration::CRAWLER_PROCESS_FILE_PATH)) { 27 | return self::CRAWLER_STATE_ACTIVE; 28 | } 29 | 30 | return self::CRAWLER_STATE_IDLE; 31 | } 32 | 33 | public function getCrawlerLastStarted() 34 | { 35 | return $this->configuration->getStateConfig('started'); 36 | } 37 | 38 | public function getCrawlerLastFinished() 39 | { 40 | return $this->configuration->getStateConfig('finished'); 41 | } 42 | 43 | public function isCrawlerInForceStart() 44 | { 45 | return $this->configuration->getStateConfig('forceStart'); 46 | } 47 | 48 | public function isCrawlerInForceStop() 49 | { 50 | return $this->configuration->getStateConfig('forceStop'); 51 | } 52 | 53 | /** 54 | * @return array|bool 55 | */ 56 | public function getCrawlerStateDescription() 57 | { 58 | $messages = []; 59 | 60 | if ($this->isCrawlerEnabled() === false) { 61 | return false; 62 | } 63 | 64 | if ($this->configuration->getStateConfig('running')) { 65 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler_running'); 66 | } else { 67 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler_not_running'); 68 | } 69 | 70 | $started = 'never'; 71 | $finished = 'never'; 72 | 73 | if (!is_bool($this->configuration->getStateConfig('started'))) { 74 | $started = date('d.m.Y H:i', (double)$this->configuration->getStateConfig('started')); 75 | } 76 | 77 | if (!is_bool($this->configuration->getStateConfig('finished'))) { 78 | $finished = date('d.m.Y H:i', (double)$this->configuration->getStateConfig('finished')); 79 | } 80 | 81 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler_last_started') . ': ' . $started . '. '; 82 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler_last_finished') . ': ' . $finished . '. '; 83 | 84 | if ($this->getConfigCompletionState() === 'incomplete') { 85 | $messages[] = 'ERROR: ' . $this->getTranslation('lucenesearch_frontend_config_incomplete'); 86 | } else { 87 | if ($this->configuration->getStateConfig('forceStart')) { 88 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler') . ': '; 89 | $messages[] = $this->getTranslation('lucenesearch_frontend_crawler_start_on_next_maintenance'); 90 | } 91 | } 92 | 93 | return $messages; 94 | } 95 | 96 | /** 97 | * @param bool $forceStart 98 | * 99 | * @return bool 100 | */ 101 | public function startCrawler($forceStart = false) 102 | { 103 | $this->fileSystem->touch(Configuration::CRAWLER_PROCESS_FILE_PATH); 104 | 105 | $this->configuration->setStateConfig('started', time()); 106 | $this->configuration->setStateConfig('forceStart', $forceStart); 107 | $this->configuration->setStateConfig('forceStop', false); 108 | $this->configuration->setStateConfig('running', true); 109 | $this->configuration->setStateConfig('finished', null); 110 | 111 | \Pimcore\Logger::debug('LuceneSearch: Starting crawl'); 112 | 113 | return true; 114 | } 115 | 116 | /** 117 | * @return bool 118 | */ 119 | public function forceCrawlerStartOnNextMaintenance() 120 | { 121 | $this->configuration->setStateConfig('forceStart', true); 122 | 123 | \Pimcore\Logger::debug('LuceneSearch: forced to starting crawl'); 124 | 125 | return true; 126 | } 127 | 128 | /** 129 | * @param bool $forcedStop 130 | * 131 | * @return bool 132 | */ 133 | public function stopCrawler($forcedStop = false) 134 | { 135 | $this->fileSystem->remove(Configuration::CRAWLER_PROCESS_FILE_PATH); 136 | 137 | $this->configuration->setStateConfig('finished', time()); 138 | $this->configuration->setStateConfig('forceStart', false); 139 | $this->configuration->setStateConfig('running', false); 140 | $this->configuration->setStateConfig('forceStop', $forcedStop); 141 | 142 | \Pimcore\Logger::debug('LuceneSearch: Stopping crawl'); 143 | 144 | return true; 145 | } 146 | 147 | /** 148 | * @return string 149 | */ 150 | public function getConfigCompletionState() 151 | { 152 | $frontEndUrls = $this->configuration->getConfig('seeds'); 153 | $filterLinks = $this->configuration->getConfig('filter'); 154 | $validLinks = $filterLinks['valid_links']; 155 | 156 | if (empty($frontEndUrls) || empty($validLinks)) { 157 | return 'incomplete'; 158 | } else { 159 | return 'complete'; 160 | } 161 | } 162 | 163 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Organizer/Handler/StoreHandler.php: -------------------------------------------------------------------------------- 1 | documentModifier = $documentModifier; 21 | } 22 | 23 | public function resetGenesisIndex() 24 | { 25 | \Pimcore\Logger::debug('LuceneSearch: Reset Genesis Index'); 26 | 27 | if ($this->fileSystem->exists(Configuration::INDEX_DIR_PATH_GENESIS)) { 28 | $this->removeFolder(Configuration::INDEX_DIR_PATH_GENESIS); 29 | $this->fileSystem->mkdir(Configuration::INDEX_DIR_PATH_GENESIS, 0755); 30 | } 31 | } 32 | 33 | public function riseGenesisToStable() 34 | { 35 | //first delete current stable 36 | if ($this->fileSystem->exists(Configuration::INDEX_DIR_PATH_GENESIS)) { 37 | 38 | if ($this->fileSystem->exists(Configuration::INDEX_DIR_PATH_STABLE)) { 39 | $this->removeFolder(Configuration::INDEX_DIR_PATH_STABLE); 40 | } 41 | 42 | //copy genesis to stable 43 | $this->copyFolder(Configuration::INDEX_DIR_PATH_GENESIS, Configuration::INDEX_DIR_PATH_STABLE); 44 | } 45 | } 46 | 47 | /** 48 | * Reset Resource Persistence Store 49 | */ 50 | public function resetPersistenceStore() 51 | { 52 | \Pimcore\Logger::debug('LuceneSearch: Reset Persistence Store'); 53 | 54 | if ($this->fileSystem->exists(Configuration::CRAWLER_PERSISTENCE_STORE_DIR_PATH)) { 55 | $this->removeFolder(Configuration::CRAWLER_PERSISTENCE_STORE_DIR_PATH); 56 | } 57 | 58 | $this->fileSystem->mkdir(Configuration::CRAWLER_PERSISTENCE_STORE_DIR_PATH, 0755); 59 | 60 | } 61 | 62 | /** 63 | * Reset Resource Persistence Store 64 | */ 65 | public function resetAssetTmp() 66 | { 67 | \Pimcore\Logger::debug('LuceneSearch: Reset Asset Tmp'); 68 | 69 | if ($this->fileSystem->exists(Configuration::CRAWLER_TMP_ASSET_DIR_PATH)) { 70 | $this->removeFolder(Configuration::CRAWLER_TMP_ASSET_DIR_PATH); 71 | } 72 | 73 | $this->fileSystem->mkdir(Configuration::CRAWLER_TMP_ASSET_DIR_PATH, 0755); 74 | 75 | } 76 | 77 | /** 78 | * Remove existing modifier documents 79 | */ 80 | public function clearQueuedDocumentModifiers() 81 | { 82 | \Pimcore\Logger::debug('LuceneSearch: Remove Queued Document Modifiers'); 83 | 84 | $this->documentModifier->clearActiveJobs(); 85 | } 86 | 87 | /** 88 | * Rest Uri Filter Store 89 | */ 90 | public function resetUriFilterPersistenceStore() 91 | { 92 | \Pimcore\Logger::debug('LuceneSearch: Reset Uri Filter Persistence Store'); 93 | 94 | if ($this->fileSystem->exists(Configuration::CRAWLER_URI_FILTER_FILE_PATH)) { 95 | $this->fileSystem->remove(Configuration::CRAWLER_URI_FILTER_FILE_PATH); 96 | } 97 | } 98 | 99 | /** 100 | * Reset Logs 101 | */ 102 | public function resetLogs() 103 | { 104 | \Pimcore\Logger::debug('LuceneSearch: Reset Logs'); 105 | $this->fileSystem->dumpFile(Configuration::CRAWLER_LOG_FILE_PATH, ''); 106 | } 107 | 108 | /** 109 | * @param $from 110 | * @param $to 111 | */ 112 | private function copyFolder($from, $to) 113 | { 114 | if (!$this->fileSystem->exists($to)) { 115 | $this->fileSystem->mkdir($to); 116 | } 117 | 118 | $this->fileSystem->mirror($from, $to, null, ['override' => true, 'delete' => true]); 119 | } 120 | 121 | /** 122 | * @param $path 123 | * @param string $pattern 124 | */ 125 | private function removeFolder($path, $pattern = '*') 126 | { 127 | $files = glob($path . "/$pattern"); 128 | 129 | foreach ($files as $file) { 130 | if (is_dir($file) and !in_array($file, ['..', '.'])) { 131 | $this->removeFolder($file, $pattern); 132 | rmdir($file); 133 | } elseif (is_file($file) and ($file != __FILE__)) { 134 | unlink($file); 135 | } 136 | } 137 | } 138 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/pimcore/config.yml: -------------------------------------------------------------------------------- 1 | lucene_search: 2 | 3 | enabled: false 4 | 5 | fuzzy_search_results: false 6 | search_suggestion: true 7 | 8 | own_host_only: true 9 | allow_subdomains: false 10 | 11 | seeds: [] 12 | categories: ~ 13 | 14 | filter: 15 | valid_links: [] 16 | user_invalid_links: [] 17 | core_invalid_links: '@.*\.(js|JS|gif|GIF|jpg|JPG|png|PNG|ico|ICO|eps|jpeg|JPEG|bmp|BMP|css|CSS|sit|wmf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe|mp3|MP3|kmz|gpx|kml|swf|SWF)$@' 18 | 19 | allowed_mime_types: 20 | - 'text/html' 21 | - 'application/pdf' 22 | allowed_schemes: 23 | - 'http' 24 | 25 | crawler: 26 | max_link_depth: 15 27 | max_download_limit: 0 28 | content_max_size: 0 29 | content_start_indicator: ~ 30 | content_end_indicator: ~ 31 | content_exclude_start_indicator: ~ 32 | content_exclude_end_indicator: ~ 33 | 34 | locale: 35 | ignore_language: false 36 | ignore_country: true 37 | 38 | restriction: 39 | enabled: false 40 | 41 | boost: 42 | documents: 1 43 | assets: 1 44 | 45 | view: 46 | max_per_page: 10 47 | max_suggestions: 10 -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/pimcore/routing.yml: -------------------------------------------------------------------------------- 1 | # backend 2 | lucene_search.controller.admin.get_state: 3 | path: /admin/lucene-search/settings/get/state 4 | defaults: { _controller: LuceneSearchBundle\Controller\Admin\SettingsController::getStateAction } 5 | lucene_search.controller.admin.get_logs: 6 | path: /admin/lucene-search/settings/logs/get 7 | defaults: { _controller: LuceneSearchBundle\Controller\Admin\SettingsController::getLogAction } 8 | lucene_search.controller.admin.crawler.start: 9 | path: /admin/lucene-search/settings/crawler/start 10 | defaults: { _controller: LuceneSearchBundle\Controller\Admin\SettingsController::startCrawlerAction } 11 | lucene_search.controller.admin.crawler.stop: 12 | path: /admin/lucene-search/settings/crawler/stop 13 | defaults: { _controller: LuceneSearchBundle\Controller\Admin\SettingsController::stopCrawlerAction } 14 | 15 | # frontend 16 | lucene_search.controller.auto_complete.search: 17 | path: /lucence-search/auto-complete 18 | defaults: { _controller: LuceneSearchBundle\Controller\AutoCompleteController:searchAction } 19 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services.yml: -------------------------------------------------------------------------------- 1 | imports: 2 | - { resource: services/*.yml } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/commands.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | LuceneSearchBundle\Command\CrawlCommand: 4 | autowire: true 5 | autoconfigure: true 6 | 7 | LuceneSearchBundle\Command\DocumentModifierCommand: 8 | autowire: true 9 | autoconfigure: true 10 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/controller.yml: -------------------------------------------------------------------------------- 1 | services: 2 | LuceneSearchBundle\Controller\Admin\SettingsController: 3 | tags: 4 | - { name: controller.service_arguments } 5 | 6 | LuceneSearchBundle\Controller\FrontendController: 7 | public: true 8 | autowire: true 9 | 10 | LuceneSearchBundle\Controller\AutoCompleteController: 11 | parent: LuceneSearchBundle\Controller\FrontendController 12 | public: true 13 | autowire: true 14 | 15 | LuceneSearchBundle\Controller\ListController: 16 | parent: LuceneSearchBundle\Controller\FrontendController 17 | public: true 18 | autowire: true 19 | calls: 20 | - [setHighlighterHelper, ['@LuceneSearchBundle\Helper\HighlighterHelper']] 21 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/event.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | _defaults: 4 | autowire: true 5 | autoconfigure: true 6 | public: false 7 | 8 | # event listener 9 | LuceneSearchBundle\EventListener\MaintenanceQueueListener: 10 | tags: 11 | - { name: pimcore.maintenance.task, type: lucene_run_queued_document_modifier } 12 | 13 | LuceneSearchBundle\EventListener\MaintenanceRunCrawlerListener: 14 | tags: 15 | - { name: pimcore.maintenance.task, type: lucene_run_crawler } 16 | 17 | LuceneSearchBundle\EventListener\DocumentMetaDataListener: 18 | tags: 19 | - { name: kernel.event_listener, event: kernel.request, method: onKernelRequest } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/helper.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | # helper 4 | LuceneSearchBundle\Helper\LuceneHelper: ~ 5 | 6 | LuceneSearchBundle\Helper\StringHelper: ~ 7 | 8 | LuceneSearchBundle\Helper\HighlighterHelper: ~ -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/modifier.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | _defaults: 4 | autowire: true 5 | autoconfigure: true 6 | public: false 7 | 8 | LuceneSearchBundle\Modifier\QueuedDocumentModifier: ~ 9 | 10 | LuceneSearchBundle\Modifier\DocumentModifier: ~ 11 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/organizer.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | # organizer dispatcher 4 | LuceneSearchBundle\Organizer\Dispatcher\HandlerDispatcher: 5 | autowire: true 6 | public: false 7 | 8 | # organizer handler 9 | LuceneSearchBundle\Organizer\Handler\AbstractHandler: 10 | abstract: true 11 | autowire: true 12 | public: false 13 | 14 | # organizer state 15 | LuceneSearchBundle\Organizer\Handler\StateHandler: 16 | parent: LuceneSearchBundle\Organizer\Handler\AbstractHandler 17 | autowire: true 18 | public: true 19 | 20 | # organizer store 21 | LuceneSearchBundle\Organizer\Handler\StoreHandler: 22 | parent: LuceneSearchBundle\Organizer\Handler\AbstractHandler 23 | autowire: true 24 | public: true 25 | calls: 26 | - [setDocumentModifier, ['@LuceneSearchBundle\Modifier\DocumentModifier']] 27 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/system.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | _defaults: 4 | autowire: true 5 | autoconfigure: true 6 | 7 | # tool installer 8 | LuceneSearchBundle\Tool\Install: 9 | public: true 10 | 11 | # configuration 12 | LuceneSearchBundle\Configuration\Configuration: ~ 13 | 14 | # tool crawler state 15 | LuceneSearchBundle\Tool\CrawlerState: 16 | public: true 17 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/tasks.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | # task manager 4 | LuceneSearchBundle\Task\TaskManager: 5 | autowire: true 6 | public: true 7 | calls: 8 | - ['setTaskIterators', ['@=service("LuceneSearchBundle\\Configuration\\Configuration").getConfig("seeds")']] 9 | 10 | # abstract task 11 | LuceneSearchBundle\Task\AbstractTask: 12 | abstract: true 13 | autowire: true 14 | public: false 15 | 16 | # tasks 17 | LuceneSearchBundle\Task\System\StartUpTask: 18 | parent: LuceneSearchBundle\Task\AbstractTask 19 | autowire: true 20 | public: false 21 | tags: 22 | - { name: lucene_search.task, priority: 80 } 23 | 24 | LuceneSearchBundle\Task\Crawler\CrawlerTask: 25 | parent: LuceneSearchBundle\Task\AbstractTask 26 | autowire: true 27 | public: false 28 | tags: 29 | - { name: lucene_search.task, priority: 60 } 30 | calls: 31 | - ['setEventListener', ['@event_dispatcher']] 32 | 33 | LuceneSearchBundle\Task\Parser\ParserTask: 34 | parent: LuceneSearchBundle\Task\AbstractTask 35 | autowire: true 36 | public: false 37 | tags: 38 | - { name: lucene_search.task, priority: 40 } 39 | calls: 40 | - ['setEventListener', ['@event_dispatcher']] 41 | 42 | LuceneSearchBundle\Task\System\ShutDownTask: 43 | parent: LuceneSearchBundle\Task\AbstractTask 44 | autowire: true 45 | public: false 46 | tags: 47 | - { name: lucene_search.task, priority: 0 } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/config/services/twig.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | _defaults: 4 | autowire: true 5 | autoconfigure: true 6 | public: false 7 | 8 | # twig extensions 9 | LuceneSearchBundle\Twig\Extension\PaginationExtension: 10 | tags: 11 | - { name: twig.extension } 12 | 13 | LuceneSearchBundle\Twig\Extension\CrawlerExtension: 14 | tags: 15 | - { name: twig.extension } 16 | 17 | LuceneSearchBundle\Twig\Extension\CategoriesExtension: 18 | tags: 19 | - { name: twig.extension } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/install/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.0.0 -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/css/admin.css: -------------------------------------------------------------------------------- 1 | .lucenesearch_icon_settings { 2 | background: url(/bundles/lucenesearch/img/services.svg) center center no-repeat !important; 3 | } 4 | 5 | .lucenesearch_icon { 6 | background-image: url(/bundles/lucenesearch/img/lucene.png) !important; 7 | } 8 | 9 | body.pimcore_version_6 .lucenesearch_icon { 10 | background-image: url(/bundles/lucenesearch/img/lucene_white.png) !important; 11 | } 12 | 13 | .lucenesearch_icon_plugins { 14 | background: url(/bundles/lucenesearch/img/img/plugin.png) no-repeat scroll left center transparent !important; 15 | } 16 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dachcom-digital/pimcore-lucene-search/aa70b0857b8cf9350460d382199e737f3ccdab98/src/LuceneSearchBundle/Resources/public/img/ajax-loader.gif -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/lucene.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dachcom-digital/pimcore-lucene-search/aa70b0857b8cf9350460d382199e737f3ccdab98/src/LuceneSearchBundle/Resources/public/img/lucene.png -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/lucene_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dachcom-digital/pimcore-lucene-search/aa70b0857b8cf9350460d382199e737f3ccdab98/src/LuceneSearchBundle/Resources/public/img/lucene_white.png -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/plugin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dachcom-digital/pimcore-lucene-search/aa70b0857b8cf9350460d382199e737f3ccdab98/src/LuceneSearchBundle/Resources/public/img/plugin.png -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/search-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dachcom-digital/pimcore-lucene-search/aa70b0857b8cf9350460d382199e737f3ccdab98/src/LuceneSearchBundle/Resources/public/img/search-logo.png -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/img/services.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/public/js/backend/startup.js: -------------------------------------------------------------------------------- 1 | pimcore.registerNS('pimcore.layout.toolbar'); 2 | pimcore.registerNS('pimcore.plugin.luceneSearch'); 3 | 4 | pimcore.plugin.luceneSearch = Class.create(pimcore.plugin.admin, { 5 | 6 | isInitialized: false, 7 | 8 | getClassName: function () { 9 | return 'pimcore.plugin.luceneSearch'; 10 | }, 11 | 12 | initialize: function () { 13 | pimcore.plugin.broker.registerPlugin(this); 14 | }, 15 | 16 | uninstall: function () { 17 | }, 18 | 19 | pimcoreReady: function (params, broker) { 20 | 21 | var user = pimcore.globalmanager.get('user'); 22 | 23 | if (user.isAllowed('plugins')) { 24 | 25 | var luceneMenu = new Ext.Action({ 26 | id: 'lucenesearch', text: t('lucenesearch_settings'), iconCls: 'lucenesearch_icon', handler: this.openSettings 27 | }); 28 | 29 | layoutToolbar.settingsMenu.add(luceneMenu); 30 | 31 | } 32 | 33 | }, 34 | 35 | openSettings: function () { 36 | try { 37 | pimcore.globalmanager.get('lucenesearch_settings').activate(); 38 | } catch (e) { 39 | pimcore.globalmanager.add('lucenesearch_settings', new pimcore.plugin.luceneSearch.settings()); 40 | } 41 | } 42 | 43 | }); 44 | 45 | new pimcore.plugin.luceneSearch(); -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/translations/admin.en.yml: -------------------------------------------------------------------------------- 1 | lucenesearch_failed_to_setup_search_config: 'Failed to setup search config' 2 | lucenesearch_installed_already: 'LuceneSearch is installed already.' 3 | lucenesearch_could_not_create_dir: 'Could not install LuceneSearch, could not create directory ' 4 | lucenesearch_index_dir_not_configured: 'Could not install LuceneSearch, index directory is not configured.' 5 | lucenesearch_install_successfully: 'Lucene Search has been successfully installed' 6 | lucenesearch_uninstalled_successfully: 'Lucene Search has been successfully uninstalled' 7 | lucenesearch_uninstall_failed: 'Could not uninstall LuceneSearch' 8 | lucenesearch_installed_successfully: 'LuceneSearch installed successfully. Please reload UI to activate plugin.' 9 | lucenesearch_search_index_on_install: 'Index all resources during install' 10 | lucenesearch_frontend_settings_empty_text: 'Please enter your URL and hit enter' 11 | lucenesearch_frontend_allowed_empty_text: 'Please enter Regex incl. start and and delimiter and hit enter' 12 | lucenesearch_frontend_forbidden_empty_text: 'Please enter Regex incl. start and and delimiter and hit enter' 13 | lucenesearch_not_ready_for_install: 'Plugin is not ready for installation. Please check permissions of plugin and website directories and the /tmp directory. (rwx for php user!)' 14 | lucenesearch_search_index_optimization: 'Search Index Optimization' 15 | lucenesearch_search_index_optimization_success: 'The search index could be optimized successfully.' 16 | lucenesearch_search_index_optimization_failure: 'Search index optimization failed!' 17 | lucenesearch_frontend_crawler_running: 'Website crawler is running!' 18 | lucenesearch_frontend_crawler_not_running: 'Website crawler not running' 19 | lucenesearch_backend_crawler_running: 'Backend crawler is running!' 20 | lucenesearch_backend_crawler_not_running: 'Backend crawler not running' 21 | lucenesearch_frontend_crawler_last_started: 'last start' 22 | lucenesearch_frontend_crawler_last_finished: 'last end' 23 | lucenesearch_plugin_description: 'Description.' 24 | lucenesearch_settings: 'Lucene Search Settings' 25 | lucenesearch_frontend_settings: 'Frontend and Crawler Settings' 26 | lucenesearch_frontend_enabled: 'Frontend search active' 27 | lucenesearch_backend: 'Backend' 28 | lucenesearch_start_crawler: 'Start' 29 | lucenesearch_stop_crawler: 'Stop' 30 | lucenesearch_frontend_crawler: 'Frontend Crawler' 31 | lucenesearch_backend_crawler: 'Backend Crawler' 32 | lucenesearch_status: 'State' 33 | lucenesearch_frontend_crawler_start_on_next_maintenance: 'Forcing crawler start with next maintenance.' 34 | lucenesearch_frontend_mandatory_fields: 'These fields are mandatory for the crawler to work' 35 | lucenesearch_frontend_config_incomplete: 'Crawler ist not ready to be started. Please complete crawler configuration below' 36 | lucenesearch_frontend_crawler_stop_failed_description: 'The crawler did not respond to the stop command within the last 10 Seconds, do you want to force stop it? Forcing the stop does not guarantee that all child processes have finished. It is advised to wait a couple of minutes before starting the crawler again.' 37 | lucenesearch_frontend_crawler_stop_failed: 'Could not stop crawler' 38 | lucenesearch_please_wait: 'Please wait ...' 39 | lucenesearch_log: 'Logs' -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/views/List/Partial/Pagination/default.html.twig: -------------------------------------------------------------------------------- 1 |
2 | 3 | {% if currentSearchPage > 1 %} 4 | 5 | {% endif %} 6 | 7 | {% for i in searchPageStart..searchPageEnd %} 8 | {{ i }} 9 | {% endfor %} 10 | 11 | {% if currentSearchPage > currentSearchPage %} 12 | 13 | {% endif %} 14 | 15 |
-------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/views/List/Partial/Pagination/single.html.twig: -------------------------------------------------------------------------------- 1 |
2 | 3 | {% if searchAllPages > 1 %} 4 | {% if searchAllPages > currentSearchPage %} 5 | {{ 'next page'|trans }} 6 | {% endif %} 7 | {% endif %} 8 | 9 |
-------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/views/List/Partial/suggestions.html.twig: -------------------------------------------------------------------------------- 1 | {% if searchSuggestions is not empty %} 2 | 3 | {{ 'Did you mean'|trans }}: 4 | {% for i,suggestion in searchSuggestions %} 5 | {{ suggestion }}{{ searchSuggestions|length -1 != i ? ',' : '' }} 6 | {% endfor %} 7 | 8 | {% endif %} -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/views/List/error.html.twig: -------------------------------------------------------------------------------- 1 | {% block lucene_search_content %} 2 | 3 |
4 | 5 |
6 | 7 |
8 | {{ errorMessage }} 9 |
10 | 11 |
12 | 13 |
14 | 15 | {% endblock lucene_search_content %} -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Resources/views/List/result.html.twig: -------------------------------------------------------------------------------- 1 | {% block lucene_search_content %} 2 |
3 |
4 |

{{ pimcore_input('searchTitle') }}

5 |
6 | {% if searchHasResults %} 7 | {{ 'Result from'|trans }} {{ searchCurrentPageResultStart }} - {{ searchCurrentPageResultEnd }} {{ 'of'|trans }} {{ searchTotalHits }} 8 | {% endif %} 9 |
10 |
11 |
12 | 13 |
14 |
15 |
16 | {% if searchHasResults %} 17 |

{{ 'We found %d entries for "%s".'|trans|format(searchTotalHits, searchQuery) }}

18 |
    19 | {% for i,searchResult in searchResults %} 20 |
  • 21 | {% if searchResult.title is not empty %} 22 |
    {{ searchResult.title }}
    23 | {% endif %} 24 | 25 | {% if searchResult.categories is iterable and searchResult.categories is not empty %} 26 | {{ 'categories'|trans }}: 27 | {% for category in searchResult.categories %} 28 | {{ category.label }}{{ loop.last == false ? ', ' : ''}} 29 | {% endfor %} 30 | 31 | {% endif %} 32 | {% if searchResult.description is defined and searchResult.description is not empty %} 33 |

    {{ searchResult.description|raw }}

    34 | {% elseif searchResult.summary is defined and searchResult.summary is not empty %} 35 |

    {{ searchResult.summary|raw }} ...

    36 | {% endif %} 37 |
    38 | {{ 'read more'|trans }} 39 |
  • 40 | {% endfor %} 41 |
42 | 43 | {% if searchAllPages > 1 %} 44 | {{ lucene_search_pagination({'viewTemplate' : 'default'}) }} 45 | {% endif %} 46 | 47 | {% else %} 48 | {% if searchQuery is not empty %} 49 |
50 |
{{ 'no search results found'|trans }}
51 | {% include '@LuceneSearch/List/Partial/suggestions.html.twig' %} 52 |
53 | {% endif %} 54 | {% endif %} 55 |
56 |
57 |
58 | {% endblock lucene_search_content %} -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/AbstractTask.php: -------------------------------------------------------------------------------- 1 | configuration = $configuration; 69 | $this->handlerDispatcher = $handlerDispatcher; 70 | } 71 | 72 | /** 73 | * @param AbstractLogger $logger 74 | * 75 | * @return $this 76 | */ 77 | public function setLogger(AbstractLogger $logger) 78 | { 79 | $this->logger = $logger; 80 | return $this; 81 | } 82 | 83 | /** 84 | * @param array $options 85 | * 86 | * @return $this 87 | */ 88 | public function setOptions(array $options = []) 89 | { 90 | $this->options = $options; 91 | return $this; 92 | } 93 | 94 | /** 95 | * @param $message 96 | * @param string $level 97 | * @param bool $logToBackend 98 | * @param bool $logToSystem 99 | * 100 | * @return void 101 | */ 102 | public function log($message, $level = 'debug', $logToBackend = true, $logToSystem = true) 103 | { 104 | $this->logger->log($message, $level, $logToBackend, $logToSystem); 105 | } 106 | 107 | /** 108 | * Add Signal Listener to allow task cancellation and clean up 109 | */ 110 | public function addSignalListener() 111 | { 112 | if (php_sapi_name() === 'cli') { 113 | if (function_exists('pcntl_signal')) { 114 | pcntl_signal(SIGTERM, [$this, 'handleCliSignal']); 115 | pcntl_signal(SIGINT, [$this, 'handleCliSignal']); 116 | pcntl_signal(SIGHUP, [$this, 'handleCliSignal']); 117 | pcntl_signal(SIGQUIT, [$this, 'handleCliSignal']); 118 | } 119 | } 120 | } 121 | 122 | /** 123 | * Simple kill process if no callback has been defined. 124 | * 125 | * @param null $signo 126 | */ 127 | public function handleCliSignal($signo = null) 128 | { 129 | $this->log(sprintf('[task.%s] has been interrupted by signal (%s).', $this->prefix, $signo), 'debugHighlight'); 130 | exit; 131 | } 132 | 133 | /** 134 | * @param bool $isLastCycle 135 | */ 136 | public function setIsLastCycle($isLastCycle = false) 137 | { 138 | $this->isLastCycle = $isLastCycle; 139 | } 140 | 141 | /** 142 | * @return bool 143 | */ 144 | public function isLastCycle() 145 | { 146 | return $this->isLastCycle; 147 | } 148 | 149 | /** 150 | * @param bool $isLastTask 151 | */ 152 | public function setIsLastTask($isLastTask = false) 153 | { 154 | $this->isLastTask = $isLastTask; 155 | 156 | } 157 | 158 | /** 159 | * @return bool 160 | */ 161 | public function isLastTask() 162 | { 163 | return $this->isLastTask; 164 | } 165 | 166 | /** 167 | * @param bool $isFirstCycle 168 | */ 169 | public function setIsFirstCycle($isFirstCycle = false) 170 | { 171 | $this->isFirstCycle = $isFirstCycle; 172 | } 173 | 174 | /** 175 | * @return bool 176 | */ 177 | public function isFirstCycle() 178 | { 179 | return $this->isFirstCycle; 180 | } 181 | 182 | /** 183 | * @param bool $isFirstTask 184 | */ 185 | public function setIsFirstTask($isFirstTask = false) 186 | { 187 | $this->isFirstTask = $isFirstTask; 188 | 189 | } 190 | 191 | /** 192 | * @return bool 193 | */ 194 | public function isFirstTask() 195 | { 196 | return $this->isLastTask; 197 | } 198 | 199 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Event/Logger.php: -------------------------------------------------------------------------------- 1 | debug = $debug; 30 | $this->logger = $logger; 31 | } 32 | 33 | /** 34 | * @return array 35 | */ 36 | public static function getSubscribedEvents() 37 | { 38 | return [ 39 | SpiderEvents::SPIDER_CRAWL_FILTER_POSTFETCH => 'logFiltered', 40 | SpiderEvents::SPIDER_CRAWL_FILTER_PREFETCH => 'logFiltered', 41 | SpiderEvents::SPIDER_CRAWL_POST_ENQUEUE => 'logQueued', 42 | SpiderEvents::SPIDER_CRAWL_RESOURCE_PERSISTED => 'logPersisted', 43 | SpiderEvents::SPIDER_CRAWL_ERROR_REQUEST => 'logFailed', 44 | SpiderEvents::SPIDER_CRAWL_POST_REQUEST => 'logCrawled', 45 | SpiderEvents::SPIDER_CRAWL_USER_STOPPED => 'logStoppedBySignal', 46 | LuceneSearchEvents::LUCENE_SEARCH_CRAWLER_INTERRUPTED => 'logStopped' 47 | ]; 48 | } 49 | 50 | /** 51 | * @param GenericEvent $event 52 | */ 53 | public function logQueued(GenericEvent $event) 54 | { 55 | $this->logEvent('queued', $event); 56 | } 57 | 58 | /** 59 | * @param GenericEvent $event 60 | */ 61 | public function logPersisted(GenericEvent $event) 62 | { 63 | $this->logEvent('persisted', $event); 64 | } 65 | 66 | /** 67 | * @param GenericEvent $event 68 | */ 69 | public function logFiltered(GenericEvent $event) 70 | { 71 | $filterType = $event->hasArgument('filterType') ? $event->getArgument('filterType') . '.' : ''; 72 | $name = $filterType . 'filtered'; 73 | $this->logEvent($name, $event); 74 | } 75 | 76 | /** 77 | * @param GenericEvent $event 78 | */ 79 | public function logFailed(GenericEvent $event) 80 | { 81 | $message = preg_replace('/\s+/S', ' ', $event->getArgument('message')); 82 | $this->logEvent('failed', $event, 'error', $message); 83 | } 84 | 85 | /** 86 | * @param Event $event 87 | */ 88 | public function logStoppedBySignal(Event $event) 89 | { 90 | $logEvent = new GenericEvent($this, ['errorMessage' => 'crawling canceled (lost signal)']); 91 | $this->logEvent('stopped', $logEvent, 'debugHighlight', $logEvent->getArgument('errorMessage')); 92 | } 93 | 94 | /** 95 | * @param GenericEvent $event 96 | */ 97 | public function logStopped(GenericEvent $event) 98 | { 99 | $this->logEvent('stopped', $event, 'debugHighlight', $event->getArgument('errorMessage')); 100 | } 101 | 102 | /** 103 | * @param GenericEvent $event 104 | */ 105 | public function logCrawled(GenericEvent $event) 106 | { 107 | $this->logEvent('uri.crawled', $event, 'debugHighlight'); 108 | } 109 | 110 | /** 111 | * @param $name 112 | * @param GenericEvent $event 113 | * @param $debugLevel 114 | * @param string $additionalMessage 115 | */ 116 | protected function logEvent($name, GenericEvent $event, $debugLevel = 'debug', $additionalMessage = '') 117 | { 118 | $triggerLog = in_array($name, [ 119 | 'uri.crawled', 120 | 'uri.match.invalid.filtered', 121 | 'uri.match.forbidden.filtered', 122 | 'filtered', 123 | 'failed', 124 | 'stopped' 125 | ]); 126 | 127 | $logToBackend = in_array($name, ['filtered', 'failed']); 128 | $logToSystem = $this->debug === true; 129 | 130 | if ($triggerLog) { 131 | 132 | $prefix = '[spider.' . $name . '] '; 133 | 134 | $message = $prefix; 135 | if (!empty($additionalMessage)) { 136 | $message .= $additionalMessage . ' '; 137 | } 138 | 139 | $message .= $event->hasArgument('uri') ? $event->getArgument('uri')->toString() : '[uri not available]'; 140 | 141 | $this->logger->log($message, $debugLevel, $logToBackend, $logToSystem); 142 | } 143 | } 144 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Event/Statistics.php: -------------------------------------------------------------------------------- 1 | 'addToFiltered', 44 | SpiderEvents::SPIDER_CRAWL_FILTER_PREFETCH => 'addToFiltered', 45 | SpiderEvents::SPIDER_CRAWL_POST_ENQUEUE => 'addToQueued', 46 | SpiderEvents::SPIDER_CRAWL_RESOURCE_PERSISTED => 'addToPersisted', 47 | SpiderEvents::SPIDER_CRAWL_ERROR_REQUEST => 'addToFailed' 48 | ]; 49 | } 50 | 51 | /** 52 | * @param GenericEvent $event 53 | */ 54 | public function addToQueued(GenericEvent $event) 55 | { 56 | $this->queued++; 57 | } 58 | 59 | /** 60 | * @param GenericEvent $event 61 | */ 62 | public function addToPersisted(GenericEvent $event) 63 | { 64 | $this->persisted++; 65 | } 66 | 67 | /** 68 | * @param GenericEvent $event 69 | */ 70 | public function addToFiltered(GenericEvent $event) 71 | { 72 | $this->filtered++; 73 | } 74 | 75 | /** 76 | * @param GenericEvent $event 77 | */ 78 | public function addToFailed(GenericEvent $event) 79 | { 80 | $this->failed++; 81 | } 82 | 83 | /** 84 | * @return int 85 | */ 86 | public function getQueued() 87 | { 88 | return $this->queued; 89 | } 90 | 91 | /** 92 | * @return int 93 | */ 94 | public function getPersisted() 95 | { 96 | return $this->persisted; 97 | } 98 | 99 | /** 100 | * @return int 101 | */ 102 | public function getFiltered() 103 | { 104 | return $this->filtered; 105 | } 106 | 107 | /** 108 | * @return int 109 | */ 110 | public function getFailed() 111 | { 112 | return $this->failed; 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Filter/Discovery/NegativeUriFilter.php: -------------------------------------------------------------------------------- 1 | regexBag = $regexBag; 27 | $this->setDispatcher($dispatcher); 28 | } 29 | 30 | /** 31 | * @param UriInterface $uri 32 | * 33 | * @return bool 34 | */ 35 | public function match(UriInterface $uri) 36 | { 37 | foreach ($this->regexBag as $regex) { 38 | if (preg_match($regex, $uri->toString())) { 39 | return false; 40 | } 41 | } 42 | 43 | $this->notifyDispatcher($uri, 'uri.match.invalid'); 44 | 45 | return true; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Filter/Discovery/UriFilter.php: -------------------------------------------------------------------------------- 1 | regexBag = $regexBag; 27 | $this->setDispatcher($dispatcher); 28 | } 29 | 30 | /** 31 | * @param UriInterface $uri 32 | * 33 | * @return bool 34 | */ 35 | public function match(UriInterface $uri) 36 | { 37 | foreach ($this->regexBag as $regex) { 38 | if (preg_match($regex, $uri->toString())) { 39 | $this->notifyDispatcher($uri, 'uri.match.forbidden'); 40 | return true; 41 | } 42 | } 43 | 44 | return false; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Filter/LogDispatcher.php: -------------------------------------------------------------------------------- 1 | persistor = new FilterPersistor(); 32 | $this->dispatcher = $dispatcher; 33 | } 34 | 35 | /** 36 | * @param $uri 37 | * @param $filterType 38 | */ 39 | function notifyDispatcher($uri, $filterType) 40 | { 41 | 42 | $stringUri = $uri->toString(); 43 | $saveUri = md5($stringUri); 44 | 45 | if ($this->persistor->get($saveUri) === false) { 46 | $this->filtered[] = $saveUri; 47 | $this->persistor->set($saveUri, time()); 48 | $event = new GenericEvent($this, ['uri' => $uri, 'filterType' => $filterType]); 49 | $this->dispatcher->dispatch(SpiderEvents::SPIDER_CRAWL_FILTER_PREFETCH, $event); 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Filter/PostFetch/MaxContentSizeFilter.php: -------------------------------------------------------------------------------- 1 | maxFileSize = (float)$maxFileSize; 23 | } 24 | 25 | /** 26 | * @param Resource $resource 27 | * 28 | * @return bool 29 | */ 30 | public function match(Resource $resource) 31 | { 32 | $size = $resource->getResponse()->getBody()->getSize(); 33 | $sizeMb = $size / 1024 / 1024; 34 | 35 | if ($this->maxFileSize == 0 || $sizeMb <= $this->maxFileSize) { 36 | return false; 37 | } 38 | 39 | return true; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Filter/PostFetch/MimeTypeFilter.php: -------------------------------------------------------------------------------- 1 | allowedMimeType = $allowedMimeType; 23 | } 24 | 25 | /** 26 | * @param Resource $resource 27 | * 28 | * @return bool 29 | */ 30 | public function match(Resource $resource) 31 | { 32 | $hasContentType = count( 33 | array_intersect( 34 | array_map( 35 | function ($allowed) use ($resource) { 36 | $contentTypeInfo = $resource->getResponse()->getHeaderLine('Content-Type'); 37 | $contentType = explode(';', $contentTypeInfo); //only get content type, ignore charset. 38 | return $allowed === $contentType[0]; 39 | }, 40 | $this->allowedMimeType 41 | ), 42 | [true] 43 | ) 44 | ) > 0; 45 | 46 | return !$hasContentType; 47 | } 48 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/Listener/Abort.php: -------------------------------------------------------------------------------- 1 | spider = $spider; 25 | } 26 | 27 | /** 28 | * @param Event $event 29 | */ 30 | public function checkCrawlerState(Event $event) 31 | { 32 | if (!file_exists(Configuration::CRAWLER_PROCESS_FILE_PATH)) { 33 | $this->spider->getDispatcher()->dispatch(LuceneSearchEvents::LUCENE_SEARCH_CRAWLER_INTERRUPTED, 34 | new GenericEvent($this, [ 35 | 'uri' => $event->getArgument('uri'), 36 | 'errorMessage' => 'crawling aborted by user (tmp file while crawling has suddenly gone.)' 37 | ])); 38 | } 39 | } 40 | 41 | /** 42 | * @param Event $event 43 | */ 44 | public function stopCrawler(Event $event) 45 | { 46 | exit; 47 | } 48 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/Crawler/PersistenceHandler/FileSerializedResourcePersistenceHandler.php: -------------------------------------------------------------------------------- 1 | defaultFilename; 25 | } else { 26 | $pathFragments = explode('/', $path); 27 | if (strpos(end($pathFragments), '.') === false) { 28 | $path .= '/' . $this->defaultFilename; 29 | } 30 | } 31 | 32 | return $path; 33 | } 34 | 35 | /** 36 | * @param Resource $resource 37 | */ 38 | public function persist(Resource $resource) 39 | { 40 | $path = rtrim($this->getResultPath() . $this->getFileSystemPath($resource), '/'); 41 | if (!is_dir($path)) { 42 | mkdir($path, 0777, true); 43 | } 44 | 45 | $file = new \SplFileObject($path . DIRECTORY_SEPARATOR . $this->getFileSystemFilename($resource), 'w'); 46 | $this->totalSizePersisted += $file->fwrite(serialize($resource)); 47 | } 48 | 49 | /** 50 | * @return Resource 51 | */ 52 | public function current() 53 | { 54 | return unserialize($this->getIterator()->current()->getContents()); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/System/ShutDownTask.php: -------------------------------------------------------------------------------- 1 | logger->setPrefix($this->prefix); 30 | 31 | if ($this->isLastCycle() === false) { 32 | return false; 33 | } 34 | 35 | $this->logger->log('Stopping crawling...', 'debug', false, false); 36 | 37 | $this->handlerDispatcher->getStoreHandler()->resetPersistenceStore(); 38 | $this->handlerDispatcher->getStoreHandler()->resetUriFilterPersistenceStore(); 39 | $this->handlerDispatcher->getStoreHandler()->riseGenesisToStable(); 40 | $this->handlerDispatcher->getStoreHandler()->resetAssetTmp(); 41 | $this->handlerDispatcher->getStoreHandler()->clearQueuedDocumentModifiers(); 42 | 43 | $this->handlerDispatcher->getStateHandler()->stopCrawler(); 44 | 45 | return true; 46 | } 47 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/System/StartUpTask.php: -------------------------------------------------------------------------------- 1 | isFirstCycle() === false) { 22 | return true; 23 | } 24 | 25 | if ($this->handlerDispatcher->getStateHandler()->getCrawlerState() == StateHandler::CRAWLER_STATE_ACTIVE) { 26 | if (isset($this->options['force']) && $this->options['force'] === true) { 27 | $this->handlerDispatcher->getStateHandler()->stopCrawler(true); 28 | } else { 29 | return false; 30 | } 31 | } 32 | 33 | return true; 34 | } 35 | 36 | /** 37 | * @param mixed $crawlData 38 | * 39 | * @return bool 40 | */ 41 | public function process($crawlData) 42 | { 43 | $this->logger->setPrefix($this->prefix); 44 | 45 | if ($this->isFirstCycle() === false) { 46 | return false; 47 | } 48 | 49 | $this->logger->log('start crawling...', 'debug', false, false); 50 | 51 | $this->handlerDispatcher->getStoreHandler()->resetGenesisIndex(); 52 | $this->handlerDispatcher->getStoreHandler()->resetPersistenceStore(); 53 | $this->handlerDispatcher->getStoreHandler()->resetAssetTmp(); 54 | $this->handlerDispatcher->getStoreHandler()->resetLogs(); 55 | 56 | $this->handlerDispatcher->getStateHandler()->startCrawler(); 57 | 58 | return true; 59 | } 60 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Task/TaskInterface.php: -------------------------------------------------------------------------------- 1 | tasks = []; 36 | } 37 | 38 | /** 39 | * @param $task 40 | * @param $id 41 | */ 42 | public function addTask($task, $id) 43 | { 44 | $this->tasks[] = ['id' => $id, 'task' => $task]; 45 | } 46 | 47 | /** 48 | * @param AbstractLogger $logger 49 | */ 50 | public function setLogger(AbstractLogger $logger) 51 | { 52 | $this->logger = $logger; 53 | } 54 | 55 | /** 56 | * @param array $taskIterators 57 | */ 58 | public function setTaskIterators(array $taskIterators) 59 | { 60 | $this->taskIterators = $taskIterators; 61 | } 62 | 63 | /** 64 | * @param array $options 65 | * 66 | * @throws \Exception 67 | */ 68 | public function processTaskChain($options = []) 69 | { 70 | $processData = []; 71 | 72 | if (empty($this->taskIterators)) { 73 | throw new \Exception('no valid task iterators defined!'); 74 | } 75 | 76 | $this->bootChain(); 77 | 78 | foreach ($this->taskIterators as $iteratorIndex => $iterator) { 79 | 80 | foreach ($this->tasks as $taskIndex => $task) { 81 | 82 | /** @var AbstractTask $taskClass */ 83 | $taskClass = $task['task']; 84 | 85 | $options['iterator'] = $iterator; 86 | 87 | $taskClass->setIsFirstCycle($iteratorIndex == 0); 88 | $taskClass->setIsFirstTask($taskIndex == 0); 89 | $taskClass->setIsLastCycle($iteratorIndex === count($this->taskIterators) - 1); 90 | $taskClass->setIsLastTask($taskIndex === count($this->tasks) - 1); 91 | $taskClass->setOptions($options); 92 | 93 | if ($taskClass->isValid()) { 94 | $taskClass->setLogger($this->logger); 95 | $processData = $taskClass->process($processData); 96 | } else { 97 | $this->shutDownChain(); 98 | $this->logger->log('There was an error while processing task (' . $task['id'] . '). please check your logs.'); 99 | exit; 100 | } 101 | } 102 | } 103 | 104 | $this->shutDownChain(); 105 | } 106 | 107 | private function bootChain() 108 | { 109 | \Pimcore::collectGarbage(); 110 | 111 | $this->keepAlive = new ConnectionKeepAlive(); 112 | $this->keepAlive->addConnection(\Pimcore\Db::getConnection()); 113 | $this->keepAlive->attach(); 114 | } 115 | 116 | private function shutDownChain() 117 | { 118 | $this->keepAlive->detach(); 119 | } 120 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Tool/CrawlerState.php: -------------------------------------------------------------------------------- 1 | getHeaders(); 15 | 16 | if (empty($headers)) { 17 | return $isLuceneSearch; 18 | } 19 | 20 | foreach ($headers as $name => $value) { 21 | if ($name === 'Lucene-Search') { 22 | $pluginVersion = $value; 23 | $isLuceneSearch = true; 24 | break; 25 | } 26 | } 27 | 28 | return $isLuceneSearch; 29 | } 30 | 31 | /** 32 | * @return array|false 33 | */ 34 | private function getHeaders() 35 | { 36 | if (!function_exists('getallheaders')) { 37 | $headers = []; 38 | foreach ($_SERVER as $name => $value) { 39 | if (substr($name, 0, 5) == 'HTTP_') { 40 | $headers[str_replace(' ', '-', ucwords(strtolower(str_replace('_', ' ', substr($name, 5)))))] = $value; 41 | } 42 | } 43 | 44 | return $headers; 45 | } else { 46 | return getallheaders(); 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Tool/Install.php: -------------------------------------------------------------------------------- 1 | installSourcesPath = __DIR__ . '/../Resources/install'; 44 | $this->fileSystem = new Filesystem(); 45 | $this->currentVersion = Versions::getVersion(LuceneSearchBundle::PACKAGE_NAME); 46 | } 47 | 48 | /** 49 | * {@inheritdoc} 50 | */ 51 | public function install() 52 | { 53 | $this->installOrUpdateConfigFile(); 54 | $this->createDirectories(); 55 | $this->installProperties(); 56 | } 57 | 58 | /** 59 | * install or update config file 60 | */ 61 | private function installOrUpdateConfigFile() 62 | { 63 | if (!$this->fileSystem->exists(Configuration::SYSTEM_CONFIG_DIR_PATH)) { 64 | $this->fileSystem->mkdir(Configuration::SYSTEM_CONFIG_DIR_PATH); 65 | } 66 | 67 | $config = ['version' => $this->currentVersion]; 68 | $yml = Yaml::dump($config); 69 | file_put_contents(Configuration::SYSTEM_CONFIG_FILE_PATH, $yml); 70 | 71 | if (!$this->fileSystem->exists(Configuration::STATE_FILE_PATH)) { 72 | $content = serialize(Configuration::STATE_DEFAULT_VALUES); 73 | $this->fileSystem->appendToFile(Configuration::STATE_FILE_PATH, $content); 74 | } 75 | 76 | } 77 | 78 | /** 79 | * @return bool 80 | */ 81 | public function createDirectories() 82 | { 83 | if (!$this->fileSystem->exists(Configuration::CRAWLER_PERSISTENCE_STORE_DIR_PATH)) { 84 | $this->fileSystem->mkdir(Configuration::CRAWLER_PERSISTENCE_STORE_DIR_PATH, 0755); 85 | } 86 | 87 | if (!$this->fileSystem->exists(Configuration::INDEX_DIR_PATH)) { 88 | $this->fileSystem->mkdir(Configuration::INDEX_DIR_PATH, 0755); 89 | } 90 | 91 | if (!$this->fileSystem->exists(Configuration::INDEX_DIR_PATH_STABLE)) { 92 | $this->fileSystem->mkdir(Configuration::INDEX_DIR_PATH_STABLE, 0755); 93 | } 94 | 95 | if (!$this->fileSystem->exists(Configuration::INDEX_DIR_PATH_GENESIS)) { 96 | $this->fileSystem->mkdir(Configuration::INDEX_DIR_PATH_GENESIS, 0755); 97 | } 98 | 99 | return true; 100 | } 101 | 102 | /** 103 | * 104 | */ 105 | public function installProperties() 106 | { 107 | $propertiesToInstall = [ 108 | 'assigned_language' => [ 109 | 'name' => 'Assigned Language', 110 | 'description' => 'Set a specific language which lucene search should respect while crawling.' 111 | ], 112 | 'assigned_country' => [ 113 | 'name' => 'Assigned Country', 114 | 'description' => 'Set a specific country which lucene search should respect while crawling.' 115 | ] 116 | ]; 117 | 118 | foreach ($propertiesToInstall as $propertyKey => $propertyData) { 119 | $defProperty = Property\Predefined::getByKey($propertyKey); 120 | 121 | if (!$defProperty instanceof Property\Predefined) { 122 | $data = 'all,'; 123 | if ($propertyKey === 'assigned_language') { 124 | $languages = \Pimcore\Tool::getValidLanguages(); 125 | foreach ($languages as $language) { 126 | $data .= $language . ','; 127 | } 128 | } 129 | 130 | $data = rtrim($data, ','); 131 | 132 | $property = new Property\Predefined(); 133 | $property->setType('select'); 134 | $property->setName($propertyData['name']); 135 | $property->setKey($propertyKey); 136 | $property->setDescription($propertyData['description']); 137 | $property->setCtype('asset'); 138 | $property->setData('all'); 139 | $property->setConfig($data); 140 | $property->setInheritable(false); 141 | $property->save(); 142 | } 143 | } 144 | 145 | } 146 | 147 | /** 148 | * {@inheritdoc} 149 | */ 150 | public function update() 151 | { 152 | $this->installOrUpdateConfigFile(); 153 | } 154 | 155 | /** 156 | * {@inheritdoc} 157 | */ 158 | public function uninstall() 159 | { 160 | if ($this->fileSystem->exists(Configuration::SYSTEM_CONFIG_FILE_PATH)) { 161 | $this->fileSystem->remove(Configuration::SYSTEM_CONFIG_FILE_PATH); 162 | } 163 | } 164 | 165 | /** 166 | * {@inheritdoc} 167 | */ 168 | public function isInstalled() 169 | { 170 | return $this->fileSystem->exists(Configuration::SYSTEM_CONFIG_FILE_PATH); 171 | } 172 | 173 | /** 174 | * {@inheritdoc} 175 | */ 176 | public function canBeInstalled() 177 | { 178 | return !$this->fileSystem->exists(Configuration::SYSTEM_CONFIG_FILE_PATH); 179 | } 180 | 181 | /** 182 | * {@inheritdoc} 183 | */ 184 | public function canBeUninstalled() 185 | { 186 | return $this->fileSystem->exists(Configuration::SYSTEM_CONFIG_FILE_PATH); 187 | } 188 | 189 | /** 190 | * {@inheritdoc} 191 | */ 192 | public function needsReloadAfterInstall() 193 | { 194 | return false; 195 | } 196 | 197 | /** 198 | * {@inheritdoc} 199 | */ 200 | public function canBeUpdated() 201 | { 202 | $needUpdate = false; 203 | if ($this->fileSystem->exists(Configuration::SYSTEM_CONFIG_FILE_PATH)) { 204 | $config = Yaml::parse(file_get_contents(Configuration::SYSTEM_CONFIG_FILE_PATH)); 205 | if ($config['version'] !== $this->currentVersion) { 206 | $needUpdate = true; 207 | } 208 | } 209 | 210 | return $needUpdate; 211 | } 212 | 213 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Twig/Extension/CategoriesExtension.php: -------------------------------------------------------------------------------- 1 | configuration = $configuration; 22 | } 23 | 24 | /** 25 | * {@inheritdoc} 26 | */ 27 | public function getFunctions() 28 | { 29 | return [ 30 | new \Twig_Function('lucene_search_get_categories', [$this, 'getCategoriesList']) 31 | ]; 32 | } 33 | 34 | /** 35 | * @param null $options 36 | * 37 | * @return array 38 | */ 39 | public function getCategoriesList($options = null) 40 | { 41 | $categories = $this->configuration->getCategories(); 42 | return $categories; 43 | } 44 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Twig/Extension/CrawlerExtension.php: -------------------------------------------------------------------------------- 1 | crawlerState = $crawlerState; 17 | } 18 | 19 | /** 20 | * {@inheritdoc} 21 | */ 22 | public function getFunctions() 23 | { 24 | return [ 25 | new \Twig_Function('lucene_search_crawler_active', [$this, 'checkCrawlerState']) 26 | ]; 27 | } 28 | 29 | public function checkCrawlerState() 30 | { 31 | return $this->crawlerState->isLuceneSearchCrawler(); 32 | } 33 | } -------------------------------------------------------------------------------- /src/LuceneSearchBundle/Twig/Extension/PaginationExtension.php: -------------------------------------------------------------------------------- 1 | true, 15 | 'needs_context' => true, 16 | 'is_safe' => ['html'] 17 | ]), 18 | new \Twig_Function('lucene_search_pagination_url', [$this, 'getPaginationUrl'], [ 19 | 'needs_context' => true, 20 | 'is_safe' => ['html'] 21 | ]), 22 | ]; 23 | } 24 | 25 | /** 26 | * @param \Twig_Environment $environment 27 | * @param array $context 28 | * @param null $options 29 | * 30 | * @return string 31 | * @throws \Twig_Error_Loader 32 | * @throws \Twig_Error_Runtime 33 | * @throws \Twig_Error_Syntax 34 | */ 35 | public function getPagination(\Twig_Environment $environment, $context = [], $options = null) 36 | { 37 | $defaults = [ 38 | 'paginationUrl' => '', 39 | 'paginationElements' => 5, 40 | 'viewTemplate' => 'default', 41 | 'paginationClass' => 'paginator' 42 | ]; 43 | 44 | $params = array_merge($defaults, $options); 45 | 46 | $pageStart = 1; 47 | $searchCurrentPage = (int)$context['searchCurrentPage']; 48 | $searchAllPages = (int)$context['searchAllPages']; 49 | 50 | if ($searchCurrentPage > ceil($params['paginationElements'] / 2)) { 51 | $pageStart = $searchCurrentPage - 2; 52 | } 53 | 54 | $pageEnd = $pageStart + $params['paginationElements']; 55 | 56 | if ($pageEnd > $searchAllPages) { 57 | $pageEnd = $searchAllPages; 58 | } 59 | 60 | $paginationUrlInfo = parse_url($params['paginationUrl']); 61 | 62 | $path = ''; 63 | $scheme = ''; 64 | $host = ''; 65 | 66 | if (isset($paginationUrlInfo['query']) && !empty($paginationUrlInfo['query'])) { 67 | $q = $paginationUrlInfo['query']; 68 | $paginationUrl = '?' . $q . (substr($q, -1) === '&' ? '' : '&'); 69 | } else { 70 | $paginationUrl = '?'; 71 | } 72 | 73 | if (isset($paginationUrlInfo['path']) && !empty($paginationUrlInfo['path'])) { 74 | $path = $paginationUrlInfo['path']; 75 | } 76 | 77 | if (isset($paginationUrlInfo['scheme']) && !empty($paginationUrlInfo['scheme'])) { 78 | $scheme = $paginationUrlInfo['scheme'] . '://'; 79 | } 80 | 81 | if (isset($paginationUrlInfo['host']) && !empty($paginationUrlInfo['host'])) { 82 | $host = $paginationUrlInfo['host']; 83 | } 84 | 85 | $viewParams = [ 86 | 'searchUrl' => $scheme . $host . $path . $paginationUrl, 87 | 'currentSearchPage' => $searchCurrentPage, 88 | 'searchAllPages' => $searchAllPages, 89 | 'searchPageStart' => $pageStart, 90 | 'searchPageEnd' => $pageEnd, 91 | 'searchUrlData' => $this->getPaginationUrl($context), 92 | 'class' => $params['paginationClass'] 93 | ]; 94 | 95 | return $environment->render( 96 | '@LuceneSearch/List/Partial/Pagination/' . $params['viewTemplate'] . '.html.twig', 97 | $viewParams 98 | ); 99 | } 100 | 101 | /** 102 | * @param array $context 103 | * @param null $query 104 | * 105 | * @return string 106 | */ 107 | public function getPaginationUrl($context = [], $query = null) 108 | { 109 | $params = [ 110 | 'language' => !empty($context['searchLanguage']) ? $context['searchLanguage'] : null, 111 | 'country' => !empty($context['searchCountry']) ? $context['searchCountry'] : null, 112 | 'category' => !empty($context['searchCategory']) ? $context['searchCategory'] : null, 113 | 'q' => !empty($query) ? $query : $context['searchQuery'] 114 | ]; 115 | 116 | return http_build_query($params); 117 | } 118 | } --------------------------------------------------------------------------------