├── tests ├── __init__.py └── italian_dictionary_test.py ├── requirements.txt ├── test_requirements.txt ├── italian_dictionary ├── __init__.py ├── exceptions.py ├── dictionary.py └── scraper.py ├── .travis.yml ├── setup.py ├── LICENSE ├── .github └── workflows │ └── main.yml ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | codecov 2 | pytest-cov -------------------------------------------------------------------------------- /italian_dictionary/__init__.py: -------------------------------------------------------------------------------- 1 | from .dictionary import get_definition 2 | -------------------------------------------------------------------------------- /italian_dictionary/exceptions.py: -------------------------------------------------------------------------------- 1 | class WordNotFoundError(Exception): 2 | pass 3 | 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | cache: pip 4 | 5 | python: 6 | - "3.6" 7 | - "3.7" 8 | - "3.8" 9 | install: 10 | - pip install codecov 11 | - pip install pytest-cov 12 | - pip install -r requirements.txt 13 | 14 | script: 15 | - python -m pytest --cov 16 | - codecov 17 | -------------------------------------------------------------------------------- /italian_dictionary/dictionary.py: -------------------------------------------------------------------------------- 1 | from italian_dictionary import scraper 2 | 3 | 4 | # ------italian_dictionary------- 5 | def get_definition(word, all_data=True, limit=None): 6 | if all_data: 7 | return scraper.get_data(word) 8 | else: 9 | defs = scraper.get_data(word, all_data=False) 10 | if limit is not None and len(defs) > limit: 11 | del defs[limit:] 12 | return defs 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="italian_dictionary", 8 | version="1.5", 9 | author="sphoneix", 10 | author_email="simone.pugliese21@gmail.com", 11 | description="A package which retrieves meaning and other informations about italian words.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/sphoneix22/italian_dictionary", 15 | packages=setuptools.find_packages(), 16 | python_requires='>=3.6', 17 | install_requires = [ 18 | 'beautifulsoup4' 19 | ], 20 | classifiers=( 21 | "Programming Language :: Python :: 3.6", 22 | "Programming Language :: Python :: 3.7", 23 | "Programming Language :: Python :: 3.8", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | ), 27 | ) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2018] [Simone Pugliese] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | # This workflow contains a single job called "build" 19 | build: 20 | # The type of runner that the job will run on 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | python-version: [3.6, 3.7, 3.8, 3.9] 25 | 26 | 27 | steps: 28 | 29 | - uses: actions/checkout@v2 30 | 31 | - name: Set up Python 32 | uses: actions/setup-python@v2 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install -r requirements.txt 39 | python -m pip install -r test_requirements.txt 40 | 41 | - name: Run tests 42 | run: | 43 | python -m pytest --cov 44 | 45 | - name: Send coverage 46 | uses: codecov/codecov-action@v1 47 | with: 48 | token: ${{secrets.CODECOV_TOKEN}} 49 | -------------------------------------------------------------------------------- /tests/italian_dictionary_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from italian_dictionary.dictionary import get_definition 4 | from italian_dictionary import exceptions 5 | 6 | class Test_ItalianDictionary(): 7 | def test_getonlydef(self): 8 | word = 'albero' 9 | defs = get_definition(word, all_data=False) 10 | assert type(defs) is list 11 | assert len(defs) > 0 12 | limit_defs = get_definition(word, limit=1, all_data=False) 13 | assert len(limit_defs) == 1 14 | def test_alldata(self): 15 | word = 'albero' 16 | data = get_definition(word) 17 | assert type(data) is dict 18 | for key in data.keys(): 19 | assert data[key] is not None 20 | for letter in data['lemma']: 21 | assert letter != ' ' 22 | def test_specialcharacter(self): 23 | word = 'perchè' 24 | data = get_definition(word) 25 | assert type(data) is dict 26 | assert len(data) > 0 27 | def test_one_syllable(self): 28 | word = 'a' 29 | data = get_definition(word) 30 | assert type(data) is dict 31 | assert len(data) > 0 32 | assert len(data['sillabe']) == 1 33 | def test_verb(self): 34 | word = 'essere' 35 | data = get_definition(word) 36 | assert type(data) is dict 37 | assert len(data) > 0 38 | 39 | class TestErrors: 40 | def test_errors(self): 41 | with pytest.raises(exceptions.WordNotFoundError): 42 | get_definition('nfdifneif') 43 | with pytest.raises(exceptions.WordNotFoundError): 44 | get_definition('afefemmm') 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | \.idea/ 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/sphoneix22/italian_dictionary.svg?branch=master)](https://travis-ci.org/sphoneix22/italian_dictionary) 2 | [![codecov](https://codecov.io/gh/sphoneix22/italian_dictionary/branch/master/graph/badge.svg)](https://codecov.io/gh/sphoneix22/italian_dictionary) 3 | [![PyPI version](https://badge.fury.io/py/italian-dictionary.svg)](https://badge.fury.io/py/italian-dictionary) 4 | ![Python](https://img.shields.io/pypi/pyversions/Django.svg) 5 | ![PRS](https://img.shields.io/badge/PRs-Welcome-green.svg) 6 | 7 | 8 | # ItalianDictionary 9 | 10 | This package searches for word meanings on [dizionario-italiano](https://www.dizionario-italiano.it). 11 | ## Install 12 | ```bash 13 | pip install italian-dictionary 14 | ``` 15 | ## Usage 16 | ```python 17 | import italian_dictionary 18 | 19 | # Use this to get only the meaning 20 | definition = italian_dictionary.get_definition('cane', limit=3, all_data=False) 21 | 22 | #Use this to get all datas of a word (all_data default is True) 23 | datas = italian_dictionary.get_definition('albero') 24 | ``` 25 | #### Complete data response 26 | This function will return a dictionary like this: 27 | ```python 28 | { 29 | 'sillabe': ['al', 'be', 'ro'], 30 | 'lemma': 'àlbero', 31 | 'pronuncia': ' /ˈalbero/', 32 | 'grammatica': ['sostantivo maschile'], 33 | 'definizione': ['pianta con fusto alto, legnoso, provvisto di rami nella parte superiore', 34 | "MARINERIA -- palo che regge i pennoni con le vele e tutta l'attrezzatura", 35 | 'MECCANICA -- parte rotante, generalmente cilindrica, che, in una macchina, ha la funzione di trasmettere potenza meccanica da un organo a un altro'], 36 | 'locuzioni': ["linea d'asse o d'alberi di una nave", 37 | 'ad albero che cade dàgli dàgli', 38 | 'svasare un albero', 39 | 'albero portaelica', 40 | 'albero a calcese', 41 | 'albero castalio', 42 | 'albero matricino', 43 | 'alberi a mezzovento', 44 | 'albero optronico', 45 | 'albero pizzuto', 46 | 'andare agli alberi pizzuti', 47 | 'alberi rinterzati', 48 | 'albero del sego'] 49 | } 50 | ``` 51 | ## Tests 52 | To run tests you need ```pytest``` 53 | When in project folder: 54 | ```python -m pytest``` 55 | -------------------------------------------------------------------------------- /italian_dictionary/scraper.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | import urllib.request as request 3 | from urllib import parse 4 | 5 | from italian_dictionary import exceptions 6 | 7 | URL = "https://www.dizionario-italiano.it/dizionario-italiano.php?parola={}100" 8 | 9 | 10 | def build_url(base_url): 11 | scheme, netloc, path, query, fragment = parse.urlsplit(base_url) 12 | query = parse.quote(query, safe="?=/") 13 | return parse.urlunsplit((scheme, netloc, path, query, fragment)) # replacing special characters 14 | 15 | 16 | def get_soup(url): 17 | sauce = request.urlopen(url).read() 18 | soup = bs4.BeautifulSoup(sauce, 'html.parser') 19 | return soup 20 | 21 | 22 | def get_lemma(soup): 23 | lemma = soup.find('span', class_='lemma') 24 | if lemma is not None: 25 | return lemma.find(text=True, recursive=False).rstrip() # Getting only span text + removing white spaces at the end 26 | 27 | 28 | def get_sillabe(soup, word): 29 | lemma = soup.find(class_='lemma') 30 | small_list = lemma.find_all_next('small') 31 | for el in small_list: 32 | if el.parent not in lemma.children: 33 | try: 34 | sillabe = el.span.find(text=True, recursive=False) 35 | except AttributeError: # Word has no syllable division 36 | return [word] 37 | break 38 | 39 | split_indexes = [pos for pos, char in enumerate(sillabe) if char == "|"] 40 | # necessario perchè le sillabazioni contengono gli accenti di pronuncia 41 | tmp = list(word) 42 | for i in split_indexes: 43 | tmp = tmp[0:i] + ["|"] + tmp[i:] 44 | sillabe = ''.join(tmp).split("|") 45 | return sillabe 46 | 47 | 48 | def get_pronuncia(soup): 49 | pronuncia = soup.find('span', class_="paradigma") 50 | return pronuncia.text[10:] 51 | 52 | 53 | def get_grammatica(soup): 54 | gram = soup.find_all('span', class_="grammatica") 55 | return [x.text for x in gram] 56 | 57 | 58 | def get_locuzioni(soup): 59 | bad_loc = soup.find_all('span', class_='cit_ita_1') 60 | loc = [x.text for x in bad_loc] 61 | return loc 62 | 63 | 64 | def get_defs(soup): 65 | defs = [] 66 | for definitions in soup.find_all('span', class_='italiano'): 67 | children_content = '' 68 | for children in definitions.findChildren(): 69 | if children.string is None: 70 | continue 71 | try: 72 | if children.attrs['class'][0] in ('esempi', 'autore'): 73 | continue 74 | else: 75 | children_content += children.text 76 | children_content += ' ' 77 | children.decompose() 78 | except KeyError: 79 | continue 80 | if children_content != '': 81 | defs.append(f"{children_content.upper()} -- {definitions.text.replace('()', '')}") 82 | else: 83 | defs.append(definitions.text) 84 | if len(defs) == 0: 85 | raise exceptions.WordNotFoundError() 86 | return defs 87 | 88 | 89 | def get_data(word, all_data=True): 90 | url = build_url(URL.format(word)) 91 | soup = get_soup(url) 92 | if all_data is False: 93 | return get_defs(soup) 94 | 95 | data = {'definizione': get_defs(soup), 'sillabe': get_sillabe(soup, word), 'lemma': get_lemma(soup), 96 | 'pronuncia': get_pronuncia(soup), 'grammatica': get_grammatica(soup), 'locuzioni': get_locuzioni(soup), 97 | 'url': url} 98 | return data 99 | --------------------------------------------------------------------------------