├── CHANGELOG.md ├── LICENSE ├── README.md ├── composer.json ├── renovate.json └── src └── voku └── helper ├── StopWords.php ├── StopWordsLanguageNotExists.php └── stopwords ├── ar.php ├── bg.php ├── ca.php ├── cz.php ├── da.php ├── de.php ├── el.php ├── en.php ├── eo.php ├── es.php ├── et.php ├── fi.php ├── fr.php ├── hi.php ├── hr.php ├── hu.php ├── id.php ├── it.php ├── ka.php ├── lt.php ├── lv.php ├── nl.php ├── no.php ├── pl.php ├── pt.php ├── ro.php ├── ru.php ├── sk.php ├── sv.php ├── tr.php ├── uk.php └── vi.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change log 2 | All notable changes to this project will be documented in this file. 3 | This project adheres to [Semantic Versioning](http://semver.org/). 4 | 5 | ## [Unreleased] 6 | 7 | ## [2.0.1] - 2018-11-23 8 | ### Fix 9 | - switch czech and catalan stopwords | thx@retep007 10 | 11 | ## [2.0.0] - 2017-11-26 12 | ### Changed 13 | - "php": ">=7.0" 14 | 15 | ## [1.2.0] - 2017-05-22 16 | ### Changed 17 | - add more languages 18 | 19 | ## [1.1.0] - 2017-05-12 20 | ### Changed 21 | - add more languages 22 | 23 | ## [1.0.0] - 2017-05-05 24 | ### Changed 25 | - init 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Lars Moelleken 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/voku/stop-words.svg?branch=master)](https://travis-ci.org/voku/stop-words) 2 | [![Coverage Status](https://coveralls.io/repos/github/voku/stop-words/badge.svg?branch=master)](https://coveralls.io/github/voku/stop-words?branch=master) 3 | [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/voku/stop-words/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/voku/stop-words/?branch=master) 4 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/dabeb6d93ead41309e4bbf80c0ec984e)](https://www.codacy.com/app/voku/stop-words?utm_source=github.com&utm_medium=referral&utm_content=voku/stop-words&utm_campaign=Badge_Grade) 5 | [![SensioLabsInsight](https://insight.sensiolabs.com/projects/316837f1-afb0-4ea5-938e-340527eeb4e6/mini.png)](https://insight.sensiolabs.com/projects/316837f1-afb0-4ea5-938e-340527eeb4e6) 6 | [![Latest Stable Version](https://poser.pugx.org/voku/stop-words/v/stable)](https://packagist.org/packages/voku/stop-words) 7 | [![Total Downloads](https://poser.pugx.org/voku/stop-words/downloads)](https://packagist.org/packages/voku/stop-words) 8 | [![Latest Unstable Version](https://poser.pugx.org/voku/stop-words/v/unstable)](https://packagist.org/packages/voku/stop-words) 9 | [![License](https://poser.pugx.org/voku/stop-words/license)](https://packagist.org/packages/voku/stop-words) 10 | 11 | # Stop-Words 12 | 13 | ## Description 14 | 15 | A collection of stop words stop words in various languages for e.g. search-functions. 16 | 17 | * [Installation](#installation) 18 | * [Usage](#usage) 19 | * [History](#history) 20 | 21 | ## Installation 22 | 23 | 1. Install and use [composer](https://getcomposer.org/doc/00-intro.md) in your project. 24 | 2. Require this package via composer: 25 | 26 | ```sh 27 | composer require voku/stop-words 28 | ``` 29 | 30 | ## Usage 31 | 32 | ```php 33 | use voku\helper\StopWords; 34 | 35 | $stopWords = new StopWords(); 36 | $stopWords->getStopWordsFromLanguage('de'); 37 | ``` 38 | 39 | Available languages 40 | ------------------- 41 | * Arabic (ar) 42 | * Bulgarian (bg) 43 | * Catalan (ca) 44 | * Croatian (hr) 45 | * Czech (cz) 46 | * Danish (da) 47 | * Dutch (nl) 48 | * English (en) 49 | * Esperanto (eo) 50 | * Estonian (et) 51 | * Finnish (fi) 52 | * French (fr) 53 | * Georgian (ka) 54 | * German (de) 55 | * Greek (el) 56 | * Hindi (hi) 57 | * Hungarian (hu) 58 | * Indonesian (id) 59 | * Italian (it) 60 | * Latvian (lv) 61 | * Lithuanian (lt) 62 | * Norwegian (no) 63 | * Polish (pl) 64 | * Portuguese (pt) 65 | * Romanian (ro) 66 | * Russian (ru) 67 | * Slovak (sk) 68 | * Spanish (es) 69 | * Swedish (sv) 70 | * Turkish (tr) 71 | * Ukrainian (uk) 72 | * Vietnamese (vi) 73 | 74 | ## History 75 | See [CHANGELOG](CHANGELOG.md) for the full history of changes. 76 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "voku/stop-words", 3 | "description": "Stop-Words via PHP", 4 | "keywords": [ 5 | "stop-words", 6 | "stop words" 7 | ], 8 | "type": "library", 9 | "license": "MIT", 10 | "authors": [ 11 | { 12 | "name": "Lars Moelleken", 13 | "homepage": "http://www.moelleken.org/" 14 | } 15 | ], 16 | "autoload": { 17 | "psr-4": { 18 | "voku\\": "src/voku/" 19 | } 20 | }, 21 | "require": { 22 | "php": ">=7.0.0" 23 | }, 24 | "require-dev": { 25 | "phpunit/phpunit": "~6.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:base" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /src/voku/helper/StopWords.php: -------------------------------------------------------------------------------- 1 | stopWords[$language] = $this->getData($language); 71 | } 72 | 73 | /** 74 | * Get data from "/data/*.php". 75 | * 76 | * @param string $file 77 | * 78 | * @return array

Will return an empty array on error.

79 | */ 80 | private function getData(string $file): array 81 | { 82 | static $RESULT_STOP_WORDS_CACHE = array(); 83 | 84 | if (isset($RESULT_STOP_WORDS_CACHE[$file])) { 85 | return $RESULT_STOP_WORDS_CACHE[$file]; 86 | } 87 | 88 | $file = __DIR__ . '/stopwords/' . $file . '.php'; 89 | if (file_exists($file)) { 90 | /** @noinspection PhpIncludeInspection */ 91 | $RESULT_STOP_WORDS_CACHE[$file] = require $file; 92 | } else { 93 | $RESULT_STOP_WORDS_CACHE[$file] = array(); 94 | } 95 | 96 | return $RESULT_STOP_WORDS_CACHE[$file]; 97 | } 98 | 99 | /** 100 | * Get the stop-words from one language. 101 | * 102 | * @param string $language 103 | * 104 | * @return array 105 | * 106 | * @throws StopWordsLanguageNotExists 107 | */ 108 | public function getStopWordsFromLanguage(string $language = 'de'): array 109 | { 110 | if (\in_array($language, self::$availableLanguages, true) === false) { 111 | throw new StopWordsLanguageNotExists('language not supported: ' . $language); 112 | } 113 | 114 | if (!isset($this->stopWords[$language])) { 115 | $this->loadLanguageData($language); 116 | } 117 | 118 | return $this->stopWords[$language]; 119 | } 120 | 121 | private function loadLanguageDataAll() 122 | { 123 | foreach (self::$availableLanguages as $language) { 124 | if (!isset($this->stopWords[$language])) { 125 | $this->loadLanguageData($language); 126 | } 127 | } 128 | } 129 | 130 | /** 131 | * Get all stop-words from all languages. 132 | * 133 | * @return array 134 | * 135 | * @throws StopWordsLanguageNotExists 136 | */ 137 | public function getStopWordsAll(): array 138 | { 139 | $this->loadLanguageDataAll(); 140 | 141 | return $this->stopWords; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/voku/helper/StopWordsLanguageNotExists.php: -------------------------------------------------------------------------------- 1 |