├── tests ├── __init__.py ├── test_search.py ├── test_conversion.py ├── test_replacement.py └── test_emoji_char.py ├── docs ├── requirements.txt ├── Makefile ├── index.rst └── conf.py ├── MANIFEST.in ├── .readthedocs.yml ├── .coveragerc ├── pyproject.toml ├── dev.requirements.txt ├── .editorconfig ├── emoji_data_python ├── conversion.py ├── __init__.py ├── search.py ├── replacement.py └── emoji_char.py ├── update.py ├── setup.py ├── LICENSE ├── .github └── workflows │ └── workflow.yml ├── README.md ├── .gitignore └── .pylintrc /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_autodoc_typehints 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include emoji_data_python/data/*.json 3 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | 4 | python: 5 | version: 3.6 6 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | */python?.?/* 4 | */site-packages/nose/* 5 | fail_under = 100 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | include = '\.pyi?$' 4 | exclude = ''' 5 | /( 6 | \.eggs 7 | | \.git 8 | | \.hg 9 | | \.mypy_cache 10 | | \.tox 11 | | v?env 12 | | _build 13 | | buck-out 14 | | builds? 15 | | dist 16 | )/ 17 | ''' 18 | -------------------------------------------------------------------------------- /dev.requirements.txt: -------------------------------------------------------------------------------- 1 | # setup.py requirements 2 | -e . 3 | 4 | # Updating from source emoji data 5 | requests>=2.25.1,<3.0.0 6 | 7 | # Testing 8 | pytest>=8.2.2,<9.0.0 9 | pytest-cov>=5.0.0,<6.0.0 10 | 11 | # Linting 12 | pylint>=3.2.5,<4.0.0 13 | mypy>=1.10.1,<2.0.0 14 | black>=24.4.2,<=25.0.0 15 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [*.md] 13 | trim_trailing_whitespace = false 14 | 15 | [*.json] 16 | indent_size = 2 17 | 18 | [*.{yml,yaml}] 19 | indent_size = 2 20 | -------------------------------------------------------------------------------- /emoji_data_python/conversion.py: -------------------------------------------------------------------------------- 1 | def unified_to_char(code_point: str) -> str: 2 | """Renders a character from its hexadecimal codepoint 3 | 4 | :param code_point: Character code point ex: `'261D'` 5 | 6 | >>> emoji_data_python.unified_to_char('1F603') 7 | '😃' 8 | """ 9 | return "".join([chr(int(code, 16)) for code in code_point.split("-")]) 10 | 11 | 12 | def char_to_unified(chars: str) -> str: 13 | """Returns a characters unified codepoint 14 | 15 | :param chars: Emoji character ex: `'🇿🇦'` 16 | 17 | >>> emoji_data_python.char_to_unified('🇿🇦') 18 | '1F1FF-1F1E6' 19 | """ 20 | return "-".join([f"{ord(char):04x}".upper() for char in chars]) 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = EmojiDataPython 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /update.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import logging 4 | from requests import get 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | logger = logging.getLogger(__name__) 8 | 9 | JSON_LOCATION = 'https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json' 10 | 11 | logger.info(f'📶 Downloading data from {JSON_LOCATION}') 12 | r = get(JSON_LOCATION) 13 | r.raise_for_status() 14 | 15 | output_path = path.join(path.dirname(__file__), 'emoji_data_python/data/emoji.json') 16 | logger.info(f'📝 Writing data to {output_path}') 17 | with open(output_path, 'w') as f: 18 | f.write(r.text) 19 | 20 | logger.info('✅ Done, successfully updated data, ' 21 | 'run "python -m unittest discover" to make sure the format is still supported') 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import setuptools 4 | 5 | module_path = os.path.join(os.path.dirname(__file__), "emoji_data_python.py") 6 | 7 | setuptools.setup( 8 | name="emoji_data_python", 9 | version="0.0.0", 10 | url="https://github.com/alexmick/emoji-data-python/", 11 | 12 | author="Alexander Micklewright", 13 | 14 | description="Python emoji toolkit", 15 | long_description="Full documentation available on https://emoji-data-python.readthedocs.io/en/latest/", 16 | 17 | zip_safe=False, 18 | platforms="any", 19 | 20 | python_requires=">=3.6.0", 21 | install_requires=[""], 22 | packages=setuptools.find_packages(), 23 | include_package_data=True, 24 | 25 | classifiers=[ 26 | "Intended Audience :: Developers", 27 | "Operating System :: OS Independent", 28 | "Programming Language :: Python", 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.6", 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from emoji_data_python import all_doublebyte, find_by_shortname, find_by_name, emoji_data 4 | 5 | 6 | class SearchTestCase(unittest.TestCase): 7 | def test_all_emoji_length(self): 8 | self.assertEqual(1903, len(emoji_data)) 9 | 10 | def test_all_double_bytes(self): 11 | self.assertEqual('HASH KEY', all_doublebyte()[0].name) # HASH_KEY is the first double byte char 12 | 13 | def test_find_by_shortname(self): 14 | self.assertEqual(1, len(find_by_shortname('wave'))) 15 | self.assertEqual('WAVING HAND SIGN', find_by_shortname('wave')[0].name) 16 | 17 | def test_find_by_shortname_unique(self): 18 | self.assertEqual(34, len(find_by_shortname('heart'))) 19 | self.assertEqual(14, len(find_by_shortname('moon'))) 20 | 21 | def test_find_by_name(self): 22 | self.assertEqual('COUPLE WITH HEART: MAN, MAN', find_by_name('heart')[0].name) 23 | self.assertEqual('SUNRISE OVER MOUNTAINS', find_by_name('sun')[0].name) 24 | self.assertEqual(3, len(find_by_name('earth'))) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Cal Henderson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /emoji_data_python/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import path 3 | from typing import List, Dict 4 | 5 | from .conversion import unified_to_char, char_to_unified 6 | from .emoji_char import EmojiChar 7 | from .replacement import replace_colons, get_emoji_regex 8 | from .search import all_doublebyte, find_by_shortname, find_by_name 9 | 10 | # Read json data on module load to be cached 11 | with open(path.join(path.dirname(__file__), "data/emoji.json"), "r", encoding="utf-8") as full_data: 12 | # Load and parse emoji data from json into EmojiChar objects 13 | emoji_data = [EmojiChar(data_blob) for data_blob in json.loads(full_data.read())] # type: List[EmojiChar] 14 | 15 | # Build a cached dictionary of short names for quicker access, short code keys are normalized with underscores 16 | emoji_short_names = {emoji.short_name.replace("-", "_"): emoji for emoji in emoji_data} # type: Dict[str, EmojiChar] 17 | 18 | # Add other short names if they are not already used as a primary short name for an other emoji 19 | for emoji in emoji_data: 20 | for short_name in emoji.short_names: 21 | if short_name not in emoji_short_names: 22 | emoji_short_names[short_name] = emoji 23 | 24 | 25 | __all__ = [ 26 | "unified_to_char", 27 | "char_to_unified", 28 | "EmojiChar", 29 | "replace_colons", 30 | "get_emoji_regex", 31 | "all_doublebyte", 32 | "find_by_shortname", 33 | "find_by_name", 34 | "emoji_data", 35 | "emoji_short_names", 36 | ] 37 | -------------------------------------------------------------------------------- /tests/test_conversion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from emoji_data_python import unified_to_char, char_to_unified 4 | 5 | 6 | class EmojiConversionTestCase(unittest.TestCase): 7 | def test_unified_to_char(self): 8 | self.assertEqual('\u261D', unified_to_char('261D')) 9 | 10 | def test_longer_unified(self): 11 | self.assertEqual('\U0001F1E6', unified_to_char('1F1E6')) 12 | 13 | def test_multiple_unified_to_char(self): 14 | self.assertEqual('👨‍🌾', unified_to_char('1F468-200D-1F33E')) 15 | self.assertEqual('👩🏼‍🌾', unified_to_char('1F469-1F3FC-200D-1F33E')) 16 | self.assertEqual('🇳🇬', unified_to_char('1F1F3-1F1EC')) 17 | self.assertEqual('\U0001F1F3\U0001F1EC', unified_to_char('1F1F3-1F1EC')) 18 | self.assertEqual('4⃣', unified_to_char('0034-20E3')) 19 | self.assertEqual('\u0034\u20E3', unified_to_char('0034-20E3')) 20 | 21 | def test_char_to_unified(self): 22 | self.assertEqual('261D', char_to_unified('\u261D')) 23 | 24 | def test_longer_char_to_unified(self): 25 | self.assertEqual('1F1E6', char_to_unified('\U0001F1E6')) 26 | 27 | def test_multiple_char_to_unified(self): 28 | self.assertEqual('1F469-1F3FC-200D-1F33E', char_to_unified('👩🏼‍🌾')) 29 | self.assertEqual('1F468-200D-1F33E', char_to_unified('👨‍🌾')) 30 | self.assertEqual('1F1F3-1F1EC', char_to_unified('🇳🇬')) 31 | self.assertEqual('1F1F3-1F1EC', char_to_unified('\U0001F1F3\U0001F1EC')) 32 | self.assertEqual('0034-20E3', char_to_unified('4⃣')) 33 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Emoji Data Python documentation 2 | =============================== 3 | 4 | This is the documentation for the `emoji_data_python` module 5 | 6 | Also available in :ref:`Aphabetical order ` 7 | 8 | Module documentation 9 | -------------------- 10 | 11 | .. automodule:: emoji_data_python 12 | :members: unified_to_char, char_to_unified, replace_colons, all_doublebyte, find_by_shortname, find_by_name, get_emoji_regex 13 | 14 | .. attribute:: emoji_data 15 | 16 | List of all emoji as :mod:`emoji_data_python.EmojiChar` objects. 17 | 18 | >>> len(emoji_data_python.emoji_data) 19 | 489 20 | 21 | .. attribute:: emoji_short_codes 22 | 23 | Dict of all emoji as :mod:`emoji_data_python.EmojiChar` objects indexed by short names. 24 | 25 | **Note** : All short names (even secondary) are indexed. If any conflicts are found, only the emoji who has the conflicitng shortname as primary name is indexed under that name 26 | ie. if an emoji has a secondary short name that is already taken as primary for an other emoji, this will not be referenced under that shortname 27 | 28 | >>> emoji_data_python.emoji_short_names['hearts'].__dict__ 29 | { 30 | 'name': 'BLACK HEART SUIT', 31 | 'unified': '2665', 32 | 'variations': ['2665-FE0F'], 33 | 'docomo': 'E68D', 34 | 'au': 'EAA5', 35 | 'softbank': 'E20C', 36 | 'google': 'FEB1A', 37 | 'short_name': 'hearts', 38 | 'short_names': ['hearts'], 39 | 'text': None, 40 | 'texts': None, 41 | 'category': 'Symbols', 42 | 'sort_order': 245, 43 | 'added_in': '1.1', 44 | 'skin_variations': {}, 45 | 'obsoletes': None, 46 | 'obsoleted_by': None 47 | } 48 | 49 | 50 | Classes 51 | ------- 52 | 53 | .. autoclass:: EmojiChar 54 | :members: 55 | -------------------------------------------------------------------------------- /emoji_data_python/search.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .emoji_char import EmojiChar 4 | 5 | 6 | def find_by_shortname(name: str) -> List[EmojiChar]: 7 | """Finds all emoji with `name` in their short_names 8 | 9 | :param name: string to find in short names 10 | 11 | >>> emoji_data_python.find_by_shortname('moon') 12 | [ 13 | EmojiChar("NEW MOON SYMBOL"), 14 | EmojiChar("WAXING CRESCENT MOON SYMBOL"), 15 | EmojiChar("FIRST QUARTER MOON SYMBOL"), 16 | EmojiChar("WAXING GIBBOUS MOON SYMBOL"), 17 | EmojiChar("FULL MOON SYMBOL"), 18 | EmojiChar("WANING GIBBOUS MOON SYMBOL"), 19 | EmojiChar("LAST QUARTER MOON SYMBOL"), 20 | EmojiChar("WANING CRESCENT MOON SYMBOL"), 21 | EmojiChar("CRESCENT MOON"), 22 | EmojiChar("NEW MOON WITH FACE"), 23 | EmojiChar("FIRST QUARTER MOON WITH FACE"), 24 | EmojiChar("LAST QUARTER MOON WITH FACE"), 25 | EmojiChar("FULL MOON WITH FACE"), 26 | ] 27 | """ 28 | from emoji_data_python import emoji_short_names # pylint: disable=import-outside-toplevel 29 | 30 | name = name.replace("-", "_") 31 | res_list = [emoji_short_names[key] for key in emoji_short_names.keys() if name in key] 32 | return list(set(res_list)) # Keep only unique values 33 | 34 | 35 | def find_by_name(name: str) -> List[EmojiChar]: 36 | """Finds emoji with `name` in their full name 37 | 38 | :param name: string to find in full names 39 | """ 40 | from emoji_data_python import emoji_data # pylint: disable=import-outside-toplevel 41 | 42 | return [emoji for emoji in emoji_data if emoji.name and name.upper() in emoji.name] 43 | 44 | 45 | def all_doublebyte() -> List[EmojiChar]: 46 | """Returns all emoji coded on two or more bytes""" 47 | from emoji_data_python import emoji_data # pylint: disable=import-outside-toplevel 48 | 49 | return [emoji for emoji in emoji_data if emoji.is_doublebyte] 50 | -------------------------------------------------------------------------------- /emoji_data_python/replacement.py: -------------------------------------------------------------------------------- 1 | import re 2 | from functools import cache 3 | from typing import cast 4 | 5 | 6 | def replace_colons(text: str, strip: bool = False) -> str: 7 | """Parses a string with colon encoded emoji and renders found emoji. 8 | Unknown emoji are left as is unless `strip` is set to `True` 9 | 10 | :param text: String of text to parse and replace 11 | :param strip: Whether to strip unknown codes or to leave them as `:unknown:` 12 | 13 | >>> emoji_data_python.replace_colons('Hello world ! :wave::skin-tone-3: :earth_africa: :exclamation:') 14 | 'Hello world ! 👋🏼 🌍 ❗' 15 | """ 16 | # pylint: disable=import-outside-toplevel 17 | from emoji_data_python import emoji_short_names, EmojiChar 18 | 19 | def emoji_repl(matchobj) -> str: 20 | emoji_match = matchobj.group(1) 21 | base_emoji = emoji_short_names.get(emoji_match.strip(":").replace("-", "_")) 22 | 23 | if matchobj.lastindex == 2: 24 | skin_tone_match = matchobj.group(2) 25 | skin_tone = cast(EmojiChar, emoji_short_names.get(skin_tone_match.strip(":"))) 26 | 27 | if base_emoji is None: 28 | return f'{emoji_match if strip is False else ""}{skin_tone.char}' 29 | 30 | emoji_with_skin_tone = base_emoji.skin_variations.get(skin_tone.unified) 31 | if emoji_with_skin_tone is None: 32 | return f"{base_emoji.char}{skin_tone.char}" 33 | return emoji_with_skin_tone.char 34 | 35 | if base_emoji is None: 36 | return f'{emoji_match if strip is False else ""}' 37 | return base_emoji.char 38 | 39 | return re.sub(r"(\:[a-zA-Z0-9-_+]+\:)(\:skin-tone-[2-6]\:)?", emoji_repl, text) 40 | 41 | 42 | @cache 43 | def get_emoji_regex(): 44 | """Returns a regex to match any emoji 45 | 46 | >>> emoji_data_python.get_emoji_regex().findall('Hello world ! 👋🏼 🌍 ❗') 47 | ['👋', '🏼', '🌍', '❗'] 48 | """ 49 | from emoji_data_python import emoji_data # pylint: disable=import-outside-toplevel 50 | 51 | # Sort emojis by length to make sure mulit-character emojis are 52 | # matched first 53 | 54 | emojis = sorted([emoji.char for emoji in emoji_data], key=len, reverse=True) 55 | pattern = "(" + "|".join(re.escape(u) for u in emojis) + ")" 56 | return re.compile(pattern) 57 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: CI pipeline 5 | 6 | on: 7 | push: 8 | pull_request: 9 | 10 | jobs: 11 | lint: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python 16 | uses: actions/setup-python@v3 17 | with: 18 | python-version: 3.12 19 | - name: Install dev dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r dev.requirements.txt 23 | - name: Lint with black 24 | run: black --check emoji_data_python 25 | - name: Lint with pylint 26 | run: pylint emoji_data_python 27 | - name : mypy type checking (allowed to fail for now) 28 | run: mypy emoji_data_python || true 29 | 30 | test: 31 | runs-on: ubuntu-latest 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | python-version: ["3.9", "3.12"] 36 | 37 | steps: 38 | - uses: actions/checkout@v4 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@v3 41 | with: 42 | python-version: ${{ matrix.python-version }} 43 | - name: Install dev dependencies 44 | run: | 45 | python -m pip install --upgrade pip 46 | pip install -r dev.requirements.txt 47 | - name: Test with pytest 48 | run: | 49 | pytest --cov-config .coveragerc --cov=emoji_data_python tests/ --cov-report term 50 | 51 | deploy: 52 | if: github.ref_type == 'tag' 53 | needs: 54 | - test 55 | - lint 56 | runs-on: ubuntu-latest 57 | steps: 58 | - uses: actions/checkout@v4 59 | - name: Set up Python 60 | uses: actions/setup-python@v3 61 | with: 62 | python-version: 3.12 63 | - name: Install dependencies 64 | run: | 65 | python -m pip install --upgrade pip 66 | pip install wheel twine setuptools 67 | pip install . 68 | - name: Build and publish 69 | env: 70 | TWINE_USERNAME: __token__ 71 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 72 | run: | 73 | sed -i "s|version=\"0.0.0\",|version=\"${GITHUB_REF/refs\/tags\//}\",|g" setup.py 74 | python setup.py sdist bdist_wheel 75 | twine upload dist/* 76 | -------------------------------------------------------------------------------- /tests/test_replacement.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from emoji_data_python import replace_colons, get_emoji_regex 4 | 5 | 6 | class ReplaceColonsTestCase(unittest.TestCase): 7 | def test_replace_colons(self): 8 | self.assertEqual('😄', replace_colons(':smile:')) 9 | self.assertEqual('😗', replace_colons(':kissing:')) 10 | 11 | def test_skin_tone(self): 12 | self.assertEqual('👋🏼', replace_colons(':wave::skin-tone-3:')) 13 | 14 | def test_skin_tone_appended_to_emoji_with_no_skin_tone(self): 15 | self.assertEqual('💩🏼', replace_colons(':poop::skin-tone-3:')) 16 | 17 | def test_underscore_hyphenated_codes(self): 18 | self.assertEqual('😙', replace_colons(':kissing_smiling_eyes:')) 19 | self.assertEqual('😘', replace_colons(':kissing-heart:')) 20 | 21 | def test_main_shortname_precedence(self): 22 | """There are two emoji for the family shortcode, one as the main short_name and one in the short_names_list""" 23 | self.assertEqual('👪', replace_colons(':family:')) 24 | 25 | def test_zwj_emoji(self): 26 | """These emoji are joined by a Zero Width Joiner""" 27 | self.assertEqual('👨‍👩‍👦', replace_colons(':man-woman-boy:')) 28 | self.assertEqual('👨‍🌾', replace_colons(':male-farmer:')) 29 | 30 | def test_zwj_emoji_skin_tone(self): 31 | """This tests zwj emojis that also have a skin tone""" 32 | self.assertEqual('👨🏼‍🌾', replace_colons(':male-farmer::skin-tone-3:')) 33 | 34 | def test_unknown_code(self): 35 | self.assertEqual('💩💩 :poo:🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:')) 36 | self.assertEqual('💩:poo: 🐶 :poo:', replace_colons(':poop::poo: :dog: :poo:')) 37 | 38 | def test_strip_unknown_code(self): 39 | self.assertEqual('💩💩 🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:', strip=True)) 40 | self.assertEqual('💩 🐶 ', replace_colons(':poop::poo: :dog: :poo:', strip=True)) 41 | 42 | def test_multiline_sentence(self): 43 | self.assertEqual(""" 44 | Hello 👋 world 🌍 ! 45 | How are you ❓""", 46 | 47 | replace_colons(""" 48 | Hello :wave: world :earth_africa: ! 49 | How are you :question:""") 50 | ) 51 | 52 | def test_emoji_regex(self): 53 | regex = get_emoji_regex() 54 | self.assertRegex('😄', regex) 55 | self.assertRegex('👪', regex) 56 | self.assertNotRegex('hello :wave: l → ▶', regex) 57 | res = regex.findall('💩💩 🏼 bla 👋🏼') 58 | self.assertEqual(len(res), 5) # Wave + skin tone counts as two 59 | -------------------------------------------------------------------------------- /tests/test_emoji_char.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from emoji_data_python import EmojiChar 4 | 5 | 6 | class EmojiCharTestCase(unittest.TestCase): 7 | def setUp(self): 8 | self.emoji = EmojiChar({ 9 | "name": "WHITE UP POINTING INDEX", 10 | "unified": "261D", 11 | "variations": [ 12 | "261D-FE0F" 13 | ], 14 | "docomo": None, 15 | "au": "E4F6", 16 | "softbank": "E00F", 17 | "google": "FEB98", 18 | "image": "261d.png", 19 | "sheet_x": 1, 20 | "sheet_y": 2, 21 | "short_name": "point_up", 22 | "short_names": [ 23 | "point_up" 24 | ], 25 | "text": None, 26 | "texts": None, 27 | "category": "People", 28 | "sort_order": 116, 29 | "added_in": "1.4", 30 | "has_img_apple": True, 31 | "has_img_google": True, 32 | "has_img_twitter": True, 33 | "has_img_emojione": False, 34 | "has_img_facebook": False, 35 | "has_img_messenger": False, 36 | "skin_variations": { 37 | "1F3FB": { 38 | "unified": "261D-1F3FB", 39 | "image": "261d-1f3fb.png", 40 | "sheet_x": 1, 41 | "sheet_y": 3, 42 | "added_in": "6.0", 43 | "has_img_apple": True, 44 | "has_img_google": False, 45 | "has_img_twitter": False, 46 | "has_img_emojione": False, 47 | "has_img_facebook": False, 48 | "has_img_messenger": False 49 | }, 50 | }, 51 | "obsoletes": "ABCD-1234", 52 | "obsoleted_by": "5678-90EF" 53 | }) 54 | 55 | def test_init(self): 56 | self.assertEqual("261D", self.emoji.unified) 57 | self.assertEqual("point_up", self.emoji.short_names[0]) 58 | self.assertEqual("People", self.emoji.category) 59 | self.assertEqual("1.4", self.emoji.added_in) 60 | self.assertEqual("261d.png", self.emoji.image) 61 | self.assertEqual(1, self.emoji.sheet_x) 62 | self.assertEqual(True, self.emoji.has_img_apple) 63 | self.assertEqual(False, self.emoji.has_img_emojione) 64 | 65 | def test_skin_variations(self): 66 | self.assertEqual("261D-1F3FB", self.emoji.skin_variations['1F3FB'].unified) 67 | self.assertEqual(True, self.emoji.skin_variations['1F3FB'].has_img_apple) 68 | 69 | def test_char(self): 70 | self.assertEqual('☝', self.emoji.char) 71 | 72 | def test_all_variations(self): 73 | self.assertEqual(['261D', '261D-FE0F', '261D-1F3FB'], self.emoji.all_variations) 74 | 75 | def test_chars(self): 76 | self.assertEqual('☝', self.emoji.chars[0]) 77 | self.assertEqual('☝️', self.emoji.chars[1]) # Not the same char, this is \u261D\uFE0F 78 | 79 | def test_doublebyte(self): 80 | self.assertEqual(False, self.emoji.is_doublebyte) 81 | 82 | def test_str(self): 83 | self.assertEqual('WHITE UP POINTING INDEX', self.emoji.__str__()) 84 | 85 | def test_repr(self): 86 | self.assertEqual('EmojiChar("WHITE UP POINTING INDEX")', self.emoji.__repr__()) 87 | 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # emoji_data_python - A python emoji toolkit 2 | 3 | ![CI pipeline](https://github.com/alexmick/emoji-data-python/workflows/CI%20pipeline/badge.svg) 4 | [![PyPI](https://img.shields.io/pypi/v/emoji-data-python.svg)](https://pypi.python.org/pypi/emoji-data-python) 5 | [![Documentation Status](https://readthedocs.org/projects/emoji-data-python/badge/?version=latest)](http://emoji-data-python.readthedocs.io/en/latest/?badge=latest) 6 | 7 | 8 | Forget about handling obscure shortcodes from slack or user input, this lib knows all about them ! 9 | 10 | ### Features 11 | 12 | * Conversion from slack-style colon short codes : `:smiley:` ⇒ 😃 13 | * Lookup emoji by official name or any known short name 14 | * Render emoji chars from their codepoint 15 | 16 | ## Installing 17 | 18 | Installation should be as simple as : 19 | 20 | ```bash 21 | pip install emoji_data_python 22 | ``` 23 | 24 | ## Usage 25 | ```python 26 | >>> import emoji_data_python 27 | >>> len(emoji_data_python.emoji_data) 28 | 1489 29 | >>> emoji_data_python.emoji_short_names['hearts'].__dict__ 30 | { 31 | 'name': 'BLACK HEART SUIT', 32 | 'unified': '2665', 33 | 'variations': ['2665-FE0F'], 34 | 'docomo': 'E68D', 35 | 'au': 'EAA5', 36 | 'softbank': 'E20C', 37 | 'google': 'FEB1A', 38 | 'image': '2665.png', 39 | 'sheet_x': 1, 40 | 'sheet_y': 34, 41 | 'short_name': 'hearts', 42 | 'short_names': ['hearts'], 43 | 'text': None, 44 | 'texts': None, 45 | 'category': 'Symbols', 46 | 'sort_order': 245, 47 | 'added_in': '1.1', 48 | 'has_img_apple': True, 49 | 'has_img_google': True, 50 | 'has_img_twitter': True, 51 | 'has_img_emojione': True, 52 | 'has_img_facebook': True, 53 | 'has_img_messenger': True, 54 | 'skin_variations': {}, 55 | 'obsoletes': None, 56 | 'obsoleted_by': None 57 | } 58 | >>> emoji_data_python.find_by_shortname('moon') 59 | [ 60 | EmojiChar("NEW MOON SYMBOL"), 61 | EmojiChar("WAXING CRESCENT MOON SYMBOL"), 62 | EmojiChar("FIRST QUARTER MOON SYMBOL"), 63 | EmojiChar("WAXING GIBBOUS MOON SYMBOL"), 64 | EmojiChar("FULL MOON SYMBOL"), 65 | EmojiChar("WANING GIBBOUS MOON SYMBOL"), 66 | EmojiChar("LAST QUARTER MOON SYMBOL"), 67 | EmojiChar("WANING CRESCENT MOON SYMBOL"), 68 | EmojiChar("CRESCENT MOON"), 69 | EmojiChar("NEW MOON WITH FACE"), 70 | EmojiChar("FIRST QUARTER MOON WITH FACE"), 71 | EmojiChar("LAST QUARTER MOON WITH FACE"), 72 | EmojiChar("FULL MOON WITH FACE"), 73 | ] 74 | >>> [ (emoji.name, emoji.short_name, emoji.char) for emoji in emoji_data_python.find_by_name('tree')] 75 | [ 76 | ('EVERGREEN TREE', 'evergreen_tree', '🌲'), 77 | ('DECIDUOUS TREE', 'deciduous_tree', '🌳'), 78 | ('PALM TREE', 'palm_tree', '🌴'), 79 | ('CHRISTMAS TREE', 'christmas_tree', '🎄'), 80 | ('TANABATA TREE', 'tanabata_tree', '🎋') 81 | ] 82 | >>> emoji_data_python.replace_colons('Hello world ! :wave::skin-tone-3: :earth_africa: :exclamation:') 83 | 'Hello world ! 👋🏼 🌍 ❗' 84 | ``` 85 | 86 | ## Documentation 87 | 88 | Documentation is autogenerated by Sphinx. With the right dependencies you should be able to build the docs : 89 | 90 | ```bash 91 | cd docs/ 92 | pip install -r requirements.txt # Install sphinx and its plugins 93 | make html 94 | cd _build/html 95 | python -m SimpleHttpServer # Serve the docs on localhost:5000 96 | ``` 97 | 98 | A pre-built hosted version is available here : http://emoji-data-python.readthedocs.io/en/latest/ 99 | 100 | ## Testing 101 | 102 | ```bash 103 | python -m unittest discover 104 | ``` 105 | 106 | ###### Thanks to [iamcal](https://github.com/iamcal/emoji-data) for the complete emoji data. This project is merely a python wrapper for his work. 107 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### SublimeText template 3 | # cache files for sublime text 4 | *.tmlanguage.cache 5 | *.tmPreferences.cache 6 | *.stTheme.cache 7 | 8 | # workspace files are user-specific 9 | *.sublime-workspace 10 | 11 | # project files should be checked into the repository, unless a significant 12 | # proportion of contributors will probably not be using SublimeText 13 | # *.sublime-project 14 | 15 | # sftp configuration file 16 | sftp-config.json 17 | 18 | # Package control specific files 19 | Package Control.last-run 20 | Package Control.ca-list 21 | Package Control.ca-bundle 22 | Package Control.system-ca-bundle 23 | Package Control.cache/ 24 | Package Control.ca-certs/ 25 | Package Control.merged-ca-bundle 26 | Package Control.user-ca-bundle 27 | oscrypto-ca-bundle.crt 28 | bh_unicode_properties.cache 29 | 30 | # Sublime-github package stores a github token in this file 31 | # https://packagecontrol.io/packages/sublime-github 32 | GitHub.sublime-settings 33 | ### Example user template template 34 | ### Example user template 35 | 36 | # IntelliJ project files 37 | .idea 38 | *.iml 39 | out 40 | gen### Python template 41 | # Byte-compiled / optimized / DLL files 42 | __pycache__/ 43 | *.py[cod] 44 | *$py.class 45 | .mypy_cache 46 | 47 | # C extensions 48 | *.so 49 | 50 | # Distribution / packaging 51 | .Python 52 | env/ 53 | build/ 54 | develop-eggs/ 55 | dist/ 56 | downloads/ 57 | eggs/ 58 | .eggs/ 59 | lib/ 60 | lib64/ 61 | parts/ 62 | sdist/ 63 | var/ 64 | wheels/ 65 | *.egg-info/ 66 | .installed.cfg 67 | *.egg 68 | 69 | # PyInstaller 70 | # Usually these files are written by a python script from a template 71 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 72 | *.manifest 73 | *.spec 74 | 75 | # Installer logs 76 | pip-log.txt 77 | pip-delete-this-directory.txt 78 | 79 | # Unit test / coverage reports 80 | htmlcov/ 81 | .tox/ 82 | .coverage 83 | .coverage.* 84 | .cache 85 | nosetests.xml 86 | coverage.xml 87 | *,cover 88 | .hypothesis/ 89 | 90 | # Translations 91 | *.mo 92 | *.pot 93 | 94 | # Django stuff: 95 | *.log 96 | local_settings.py 97 | 98 | # Flask stuff: 99 | instance/ 100 | .webassets-cache 101 | 102 | # Scrapy stuff: 103 | .scrapy 104 | 105 | # Sphinx documentation 106 | docs/_build/ 107 | 108 | # PyBuilder 109 | target/ 110 | 111 | # Jupyter Notebook 112 | .ipynb_checkpoints 113 | 114 | # pyenv 115 | .python-version 116 | 117 | # celery beat schedule file 118 | celerybeat-schedule 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # dotenv 124 | .env 125 | 126 | # virtualenv 127 | .venv 128 | venv/ 129 | ENV/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | ### JetBrains template 137 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 138 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 139 | 140 | # User-specific stuff: 141 | .idea/**/workspace.xml 142 | .idea/**/tasks.xml 143 | .idea/dictionaries 144 | 145 | # Sensitive or high-churn files: 146 | .idea/**/dataSources/ 147 | .idea/**/dataSources.ids 148 | .idea/**/dataSources.xml 149 | .idea/**/dataSources.local.xml 150 | .idea/**/sqlDataSources.xml 151 | .idea/**/dynamic.xml 152 | .idea/**/uiDesigner.xml 153 | 154 | # Gradle: 155 | .idea/**/gradle.xml 156 | .idea/**/libraries 157 | 158 | # Mongo Explorer plugin: 159 | .idea/**/mongoSettings.xml 160 | 161 | ## File-based project format: 162 | *.iws 163 | 164 | ## Plugin-specific files: 165 | 166 | # IntelliJ 167 | /out/ 168 | 169 | # mpeltonen/sbt-idea plugin 170 | .idea_modules/ 171 | 172 | # JIRA plugin 173 | atlassian-ide-plugin.xml 174 | 175 | # Crashlytics plugin (for Android Studio and IntelliJ) 176 | com_crashlytics_export_strings.xml 177 | crashlytics.properties 178 | crashlytics-build.properties 179 | fabric.properties 180 | ### VirtualEnv template 181 | # Virtualenv 182 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 183 | .Python 184 | [Bb]in 185 | [Ii]nclude 186 | [Ll]ib 187 | [Ll]ib64 188 | [Ll]ocal 189 | [Ss]cripts 190 | pyvenv.cfg 191 | .venv 192 | pip-selfcheck.json 193 | ### macOS template 194 | *.DS_Store 195 | .AppleDouble 196 | .LSOverride 197 | 198 | # Icon must end with two \r 199 | Icon 200 | 201 | 202 | # Thumbnails 203 | ._* 204 | 205 | # Files that might appear in the root of a volume 206 | .DocumentRevisions-V100 207 | .fseventsd 208 | .Spotlight-V100 209 | .TemporaryItems 210 | .Trashes 211 | .VolumeIcon.icns 212 | .com.apple.timemachine.donotpresent 213 | 214 | # Directories potentially created on remote AFP share 215 | .AppleDB 216 | .AppleDesktop 217 | Network Trash Folder 218 | Temporary Items 219 | .apdisk 220 | -------------------------------------------------------------------------------- /emoji_data_python/emoji_char.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Dict 2 | 3 | from .conversion import unified_to_char 4 | 5 | 6 | # pylint: disable=too-many-instance-attributes 7 | class EmojiChar: 8 | """Represents an emoji character as parsed from the json data""" 9 | 10 | def __init__(self, data_blob: dict) -> None: 11 | """Parse data into EmojiChar 12 | 13 | :param data_blob: Dictionary of values loaded from the json format in `emoji.json` 14 | 15 | >>> emoji.__dict__ 16 | { 17 | 'name': 'BLACK HEART SUIT', 18 | 'unified': '2665', 19 | 'variations': ['2665-FE0F'], 20 | 'docomo': 'E68D', 21 | 'au': 'EAA5', 22 | 'softbank': 'E20C', 23 | 'google': 'FEB1A', 24 | 'image': '2665.png', 25 | 'sheet_x': 1, 26 | 'sheet_y': 34, 27 | 'short_name': 'hearts', 28 | 'short_names': ['hearts'], 29 | 'text': None, 30 | 'texts': None, 31 | 'category': 'Symbols', 32 | 'sort_order': 245, 33 | 'added_in': '1.1', 34 | 'has_img_apple': True, 35 | 'has_img_google': True, 36 | 'has_img_twitter': True, 37 | 'has_img_emojione': True, 38 | 'has_img_facebook': True, 39 | 'has_img_messenger': True, 40 | 'skin_variations': {}, 41 | 'obsoletes': None, 42 | 'obsoleted_by': None 43 | } 44 | """ 45 | self.name = data_blob.get("name") # type: Optional[str] 46 | self.unified = data_blob.get("unified") # type: str 47 | self.variations = data_blob.get("variations", []) # type: List[str] 48 | 49 | self.docomo = data_blob.get("docomo") # type: Optional[str] 50 | # pylint: disable=invalid-name 51 | self.au = data_blob.get("au") # type: Optional[str] 52 | self.softbank = data_blob.get("softbank") # type: Optional[str] 53 | self.google = data_blob.get("google") # type: Optional[str] 54 | self.image = data_blob.get("image") # type: Optional[str] 55 | self.sheet_x = data_blob.get("sheet_x") # type: int 56 | self.sheet_y = data_blob.get("sheet_y") # type: int 57 | 58 | self.short_name = data_blob.get("short_name") # type: Optional[str] 59 | self.short_names = data_blob.get("short_names") # type: List[str] 60 | self.text = data_blob.get("text") # type: Optional[str] 61 | self.texts = data_blob.get("texts") # type: List[str] 62 | 63 | self.category = data_blob.get("category") # type: Optional[str] 64 | self.sort_order = data_blob.get("sort_order") # type: int 65 | self.added_in = data_blob.get("added_in") # type: str 66 | 67 | self.has_img_apple = data_blob.get("has_img_apple") # type: bool 68 | self.has_img_google = data_blob.get("has_img_google") # type: bool 69 | self.has_img_twitter = data_blob.get("has_img_twitter") # type: bool 70 | self.has_img_emojione = data_blob.get("has_img_emojione") # type: bool 71 | self.has_img_facebook = data_blob.get("has_img_facebook") # type: bool 72 | self.has_img_messenger = data_blob.get("has_img_messenger") # type: bool 73 | 74 | variations = data_blob.get("skin_variations", {}) 75 | self.skin_variations = { 76 | code: EmojiChar(variation) for code, variation in variations.items() 77 | } # type: Dict[str, EmojiChar] 78 | 79 | self.obsoletes = data_blob.get("obsoletes") # type: Optional[str] 80 | self.obsoleted_by = data_blob.get("obsoleted_by") # type: Optional[str] 81 | 82 | @property 83 | def all_variations(self) -> List[str]: 84 | """Lists all possible codepoint variations for given emoji. 85 | 86 | See :mod:`emoji_data_python.EmojiChar.chars` for a rendered version 87 | 88 | >>> emoji.all_variations 89 | ['261D', '261D-FE0F', '261D-1F3FB'] 90 | """ 91 | return ( 92 | [self.unified] 93 | + self.variations 94 | + [self.unified + "-" + variation for variation in self.skin_variations.keys()] 95 | ) 96 | 97 | @property 98 | def char(self) -> str: 99 | """Returns rendered char for emoji 100 | 101 | >>> emoji.char 102 | '👋' 103 | """ 104 | return unified_to_char(self.unified) 105 | 106 | @property 107 | def chars(self) -> List[str]: 108 | """Lists all possible *rendered* codepoint variations for given emoji. 109 | This is useful when trying to find this particular emoji in a string by looking for any variation. 110 | 111 | >>> emoji.chars 112 | ['👋', '👋🏻', '👋🏼', '👋🏽', '👋🏾', '👋🏿'] 113 | """ 114 | 115 | return list(map(unified_to_char, self.all_variations)) 116 | 117 | @property 118 | def is_doublebyte(self) -> bool: 119 | """`True` if emoji is coded on two or more bytes""" 120 | return "-" in self.unified 121 | 122 | def __str__(self): 123 | return self.name or self.short_name or self.unified 124 | 125 | def __repr__(self): 126 | return f'EmojiChar("{self!s}")' 127 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Emoji Data Python documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Jun 9 12:07:07 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.abspath('..')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['sphinx.ext.autodoc', 35 | 'sphinx.ext.coverage', 36 | 'sphinx.ext.viewcode', 37 | 'sphinx.ext.githubpages', 38 | 'sphinx_autodoc_typehints'] 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # The suffix(es) of source filenames. 44 | # You can specify multiple suffix as a list of string: 45 | # 46 | # source_suffix = ['.rst', '.md'] 47 | source_suffix = '.rst' 48 | 49 | # The master toctree document. 50 | master_doc = 'index' 51 | 52 | # General information about the project. 53 | project = 'Emoji Data Python' 54 | copyright = '2017, Alexander Micklewright' 55 | author = 'Alexander Micklewright' 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | version = '1.0' 63 | # The full version, including alpha/beta/rc tags. 64 | release = '1.0.1' 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | # 69 | # This is also used if you do content translation via gettext catalogs. 70 | # Usually you set "language" from the command line for these cases. 71 | language = None 72 | 73 | # List of patterns, relative to source directory, that match files and 74 | # directories to ignore when looking for source files. 75 | # This patterns also effect to html_static_path and html_extra_path 76 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 77 | 78 | # The name of the Pygments (syntax highlighting) style to use. 79 | pygments_style = 'sphinx' 80 | 81 | # If true, `todo` and `todoList` produce output, else they produce nothing. 82 | todo_include_todos = False 83 | 84 | 85 | # -- Options for HTML output ---------------------------------------------- 86 | 87 | # The theme to use for HTML and HTML Help pages. See the documentation for 88 | # a list of builtin themes. 89 | # 90 | html_theme = 'alabaster' 91 | 92 | # Theme options are theme-specific and customize the look and feel of a theme 93 | # further. For a list of options available for each theme, see the 94 | # documentation. 95 | # 96 | # html_theme_options = {} 97 | 98 | # Add any paths that contain custom static files (such as style sheets) here, 99 | # relative to this directory. They are copied after the builtin static files, 100 | # so a file named "default.css" will overwrite the builtin "default.css". 101 | html_static_path = ['_static'] 102 | 103 | 104 | # -- Options for HTMLHelp output ------------------------------------------ 105 | 106 | # Output file base name for HTML help builder. 107 | htmlhelp_basename = 'EmojiDataPythondoc' 108 | 109 | 110 | # -- Options for LaTeX output --------------------------------------------- 111 | 112 | latex_elements = { 113 | # The paper size ('letterpaper' or 'a4paper'). 114 | # 115 | # 'papersize': 'letterpaper', 116 | 117 | # The font size ('10pt', '11pt' or '12pt'). 118 | # 119 | # 'pointsize': '10pt', 120 | 121 | # Additional stuff for the LaTeX preamble. 122 | # 123 | # 'preamble': '', 124 | 125 | # Latex figure (float) alignment 126 | # 127 | # 'figure_align': 'htbp', 128 | } 129 | 130 | # Grouping the document tree into LaTeX files. List of tuples 131 | # (source start file, target name, title, 132 | # author, documentclass [howto, manual, or own class]). 133 | latex_documents = [ 134 | (master_doc, 'EmojiDataPython.tex', 'Emoji Data Python Documentation', 135 | 'Alexander Micklewright', 'manual'), 136 | ] 137 | 138 | 139 | # -- Options for manual page output --------------------------------------- 140 | 141 | # One entry per manual page. List of tuples 142 | # (source start file, name, description, authors, manual section). 143 | man_pages = [ 144 | (master_doc, 'emojidatapython', 'Emoji Data Python Documentation', 145 | [author], 1) 146 | ] 147 | 148 | 149 | # -- Options for Texinfo output ------------------------------------------- 150 | 151 | # Grouping the document tree into Texinfo files. List of tuples 152 | # (source start file, target name, title, author, 153 | # dir menu entry, description, category) 154 | texinfo_documents = [ 155 | (master_doc, 'EmojiDataPython', 'Emoji Data Python Documentation', 156 | author, 'EmojiDataPython', 'One line description of project.', 157 | 'Miscellaneous'), 158 | ] 159 | 160 | autoclass_content = 'both' 161 | 162 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist=marisa_trie 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 21 | # number of processors available to use. 22 | jobs=0 23 | 24 | # Control the amount of potential inferred values when inferring a single 25 | # object. This can help the performance when dealing with large functions or 26 | # complex, nested conditions. 27 | limit-inference-results=100 28 | 29 | # List of plugins (as comma separated values of python module names) to load, 30 | # usually to register additional checkers. 31 | load-plugins= 32 | 33 | # Pickle collected data for later comparisons. 34 | persistent=yes 35 | 36 | # Specify a configuration file. 37 | #rcfile= 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=missing-class-docstring, 64 | missing-module-docstring, 65 | missing-function-docstring, 66 | inherit-non-class, 67 | too-few-public-methods, 68 | unnecessary-pass, 69 | cyclic-import 70 | 71 | # Enable the message, report, category or checker with the given id(s). You can 72 | # either give multiple identifier separated by comma (,) or put this option 73 | # multiple time (only on the command line, not in the configuration file where 74 | # it should appear only once). See also the "--disable" option for examples. 75 | enable=c-extension-no-member, useless-suppression 76 | 77 | 78 | [REPORTS] 79 | 80 | # Python expression which should return a score less than or equal to 10. You 81 | # have access to the variables 'error', 'warning', 'refactor', and 'convention' 82 | # which contain the number of messages in each category, as well as 'statement' 83 | # which is the total number of statements analyzed. This score is used by the 84 | # global evaluation report (RP0004). 85 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 86 | 87 | # Template used to display messages. This is a python new-style format string 88 | # used to format the message information. See doc for all details. 89 | #msg-template= 90 | 91 | # Set the output format. Available formats are text, parseable, colorized, json 92 | # and msvs (visual studio). You can also give a reporter class, e.g. 93 | # mypackage.mymodule.MyReporterClass. 94 | output-format=colorized 95 | 96 | # Tells whether to display a full report or only the messages. 97 | reports=yes 98 | 99 | # Activate the evaluation score. 100 | score=yes 101 | 102 | 103 | [REFACTORING] 104 | 105 | # Maximum number of nested blocks for function / method body 106 | max-nested-blocks=5 107 | 108 | # Complete name of functions that never returns. When checking for 109 | # inconsistent-return-statements if a never returning function is called then 110 | # it will be considered as an explicit return statement and no message will be 111 | # printed. 112 | never-returning-functions=sys.exit 113 | 114 | 115 | [LOGGING] 116 | 117 | # Format style used to check logging format string. `old` means using % 118 | # formatting, `new` is for `{}` formatting,and `fstr` is for f-strings. 119 | logging-format-style=new 120 | 121 | # Logging modules to check that the string format arguments are in logging 122 | # function parameter format. 123 | logging-modules=logging 124 | 125 | 126 | [SPELLING] 127 | 128 | # Limits count of emitted suggestions for spelling mistakes. 129 | max-spelling-suggestions=4 130 | 131 | # Spelling dictionary name. Available dictionaries: none. To make it work, 132 | # install the python-enchant package. 133 | spelling-dict= 134 | 135 | # Listc of comma separated words that should not be checked. 136 | spelling-ignore-words= 137 | 138 | # A path to a file that contains the private dictionary; one word per line. 139 | spelling-private-dict-file= 140 | 141 | # Tells whether to store unknown words to the private dictionary (see the 142 | # --spelling-private-dict-file option) instead of raising a message. 143 | spelling-store-unknown-words=no 144 | 145 | 146 | [MISCELLANEOUS] 147 | 148 | # List of note tags to take in consideration, separated by a comma. 149 | notes=FIXME, 150 | XXX, 151 | TODO 152 | 153 | 154 | [TYPECHECK] 155 | 156 | # List of decorators that produce context managers, such as 157 | # contextlib.contextmanager. Add to this list to register other decorators that 158 | # produce valid context managers. 159 | contextmanager-decorators=contextlib.contextmanager 160 | 161 | # List of members which are set dynamically and missed by pylint inference 162 | # system, and so shouldn't trigger E1101 when accessed. Python regular 163 | # expressions are accepted. 164 | generated-members= 165 | 166 | # Tells whether missing members accessed in mixin class should be ignored. A 167 | # mixin class is detected if its name ends with "mixin" (case insensitive). 168 | ignore-mixin-members=yes 169 | 170 | # Tells whether to warn about missing members when the owner of the attribute 171 | # is inferred to be None. 172 | ignore-none=yes 173 | 174 | # This flag controls whether pylint should warn about no-member and similar 175 | # checks whenever an opaque object is returned when inferring. The inference 176 | # can return multiple potential results while evaluating a Python object, but 177 | # some branches might not be evaluated, which results in partial inference. In 178 | # that case, it might be useful to still emit no-member and other checks for 179 | # the rest of the inferred objects. 180 | ignore-on-opaque-inference=yes 181 | 182 | # List of class names for which member attributes should not be checked (useful 183 | # for classes with dynamically set attributes). This supports the use of 184 | # qualified names. 185 | ignored-classes=optparse.Values,thread._local,_thread._local 186 | 187 | # List of module names for which member attributes should not be checked 188 | # (useful for modules/projects where namespaces are manipulated during runtime 189 | # and thus existing member attributes cannot be deduced by static analysis). It 190 | # supports qualified module names, as well as Unix pattern matching. 191 | ignored-modules=alembic.context 192 | 193 | # Show a hint with possible names when a member name was not found. The aspect 194 | # of finding the hint is based on edit distance. 195 | missing-member-hint=yes 196 | 197 | # The minimum edit distance a name should have in order to be considered a 198 | # similar match for a missing member name. 199 | missing-member-hint-distance=1 200 | 201 | # The total number of similar names that should be taken in consideration when 202 | # showing a hint for a missing member. 203 | missing-member-max-choices=1 204 | 205 | # List of decorators that change the signature of a decorated function. 206 | signature-mutators= 207 | 208 | 209 | [VARIABLES] 210 | 211 | # List of additional names supposed to be defined in builtins. Remember that 212 | # you should avoid defining new builtins when possible. 213 | additional-builtins= 214 | 215 | # Tells whether unused global variables should be treated as a violation. 216 | allow-global-unused-variables=yes 217 | 218 | # List of strings which can identify a callback function by name. A callback 219 | # name must start or end with one of those strings. 220 | callbacks=cb_, 221 | _cb 222 | 223 | # A regular expression matching the name of dummy variables (i.e. expected to 224 | # not be used). 225 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 226 | 227 | # Argument names that match this expression will be ignored. Default to name 228 | # with leading underscore. 229 | ignored-argument-names=_.*|^ignored_|^unused_ 230 | 231 | # Tells whether we should check for unused import in __init__ files. 232 | init-import=no 233 | 234 | # List of qualified module names which can have objects that can redefine 235 | # builtins. 236 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 237 | 238 | 239 | [FORMAT] 240 | 241 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 242 | expected-line-ending-format=LF 243 | 244 | # Regexp for a line that is allowed to be longer than the limit. 245 | ignore-long-lines=^\s*(# )??$ 246 | 247 | # Number of spaces of indent required inside a hanging or continued line. 248 | indent-after-paren=4 249 | 250 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 251 | # tab). 252 | indent-string=' ' 253 | 254 | # Maximum number of characters on a single line. 255 | max-line-length=120 256 | 257 | # Maximum number of lines in a module. 258 | max-module-lines=1000 259 | 260 | # List of optional constructs for which whitespace checking is disabled. `dict- 261 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 262 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 263 | # `empty-line` allows space-only lines. 264 | #no-space-check= 265 | 266 | # Allow the body of a class to be on the same line as the declaration if body 267 | # contains single statement. 268 | single-line-class-stmt=no 269 | 270 | # Allow the body of an if to be on the same line as the test if there is no 271 | # else. 272 | single-line-if-stmt=no 273 | 274 | 275 | [SIMILARITIES] 276 | 277 | # Ignore comments when computing similarities. 278 | ignore-comments=yes 279 | 280 | # Ignore docstrings when computing similarities. 281 | ignore-docstrings=yes 282 | 283 | # Ignore imports when computing similarities. 284 | ignore-imports=yes 285 | 286 | # Minimum lines number of a similarity. 287 | min-similarity-lines=4 288 | 289 | 290 | [BASIC] 291 | 292 | # Naming style matching correct argument names. 293 | argument-naming-style=snake_case 294 | 295 | # Regular expression matching correct argument names. Overrides argument- 296 | # naming-style. 297 | #argument-rgx= 298 | 299 | # Naming style matching correct attribute names. 300 | # attr-naming-style=snake_case 301 | 302 | # Regular expression matching correct attribute names. Overrides attr-naming- 303 | # style. 304 | attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*)|(id))$ 305 | 306 | # Bad variable names which should always be refused, separated by a comma. 307 | bad-names=foo, 308 | bar, 309 | baz, 310 | toto, 311 | tutu, 312 | tata 313 | 314 | # Naming style matching correct class attribute names. 315 | # class-attribute-naming-style=snake_case 316 | 317 | # Regular expression matching correct class attribute names. Overrides class- 318 | # attribute-naming-style. 319 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__)|(id))$ 320 | 321 | # Naming style matching correct class names. 322 | class-naming-style=PascalCase 323 | 324 | # Regular expression matching correct class names. Overrides class-naming- 325 | # style. 326 | #class-rgx= 327 | 328 | # Naming style matching correct constant names. 329 | const-naming-style=UPPER_CASE 330 | 331 | # Regular expression matching correct constant names. Overrides const-naming- 332 | # style. 333 | #const-rgx= 334 | 335 | # Minimum line length for functions/classes that require docstrings, shorter 336 | # ones are exempt. 337 | docstring-min-length=-1 338 | 339 | # Naming style matching correct function names. 340 | function-naming-style=snake_case 341 | 342 | # Regular expression matching correct function names. Overrides function- 343 | # naming-style. 344 | #function-rgx= 345 | 346 | # Good variable names which should always be accepted, separated by a comma. 347 | good-names=i, 348 | j, 349 | k, 350 | ex, 351 | Run, 352 | _ 353 | 354 | # Include a hint for the correct naming format with invalid-name. 355 | include-naming-hint=no 356 | 357 | # Naming style matching correct inline iteration names. 358 | inlinevar-naming-style=snake_case 359 | 360 | # Regular expression matching correct inline iteration names. Overrides 361 | # inlinevar-naming-style. 362 | #inlinevar-rgx= 363 | 364 | # Naming style matching correct method names. 365 | method-naming-style=snake_case 366 | 367 | # Regular expression matching correct method names. Overrides method-naming- 368 | # style. 369 | #method-rgx= 370 | 371 | # Naming style matching correct module names. 372 | module-naming-style=snake_case 373 | 374 | # Regular expression matching correct module names. Overrides module-naming- 375 | # style. 376 | #module-rgx= 377 | 378 | # Colon-delimited sets of names that determine each other's naming style when 379 | # the name regexes allow several styles. 380 | name-group= 381 | 382 | # Regular expression which should only match function or class names that do 383 | # not require a docstring. 384 | no-docstring-rgx=^_ 385 | 386 | # List of decorators that produce properties, such as abc.abstractproperty. Add 387 | # to this list to register other decorators that produce valid properties. 388 | # These decorators are taken in consideration only for invalid-name. 389 | property-classes=abc.abstractproperty 390 | 391 | # Naming style matching correct variable names. 392 | variable-naming-style=snake_case 393 | 394 | # Regular expression matching correct variable names. Overrides variable- 395 | # naming-style. 396 | #variable-rgx= 397 | 398 | 399 | [STRING] 400 | 401 | # This flag controls whether the implicit-str-concat-in-sequence should 402 | # generate a warning on implicit string concatenation in sequences defined over 403 | # several lines. 404 | check-str-concat-over-line-jumps=yes 405 | 406 | 407 | [IMPORTS] 408 | 409 | # List of modules that can be imported at any level, not just the top level 410 | # one. 411 | allow-any-import-level=tensorflow, keras 412 | 413 | # Allow wildcard imports from modules that define __all__. 414 | allow-wildcard-with-all=no 415 | 416 | # Analyse import fallback blocks. This can be used to support both Python 2 and 417 | # 3 compatible code, which means that the block might have code that exists 418 | # only in one or another interpreter, leading to false positives when analysed. 419 | analyse-fallback-blocks=no 420 | 421 | # Deprecated modules which should not be used, separated by a comma. 422 | deprecated-modules=optparse,tkinter.tix 423 | 424 | # Create a graph of external dependencies in the given file (report RP0402 must 425 | # not be disabled). 426 | ext-import-graph= 427 | 428 | # Create a graph of every (i.e. internal and external) dependencies in the 429 | # given file (report RP0402 must not be disabled). 430 | import-graph= 431 | 432 | # Create a graph of internal dependencies in the given file (report RP0402 must 433 | # not be disabled). 434 | int-import-graph= 435 | 436 | # Force import order to recognize a module as part of the standard 437 | # compatibility libraries. 438 | known-standard-library= 439 | 440 | # Force import order to recognize a module as part of a third party library. 441 | known-third-party=enchant 442 | 443 | # Couples of modules and preferred modules, separated by a comma. 444 | preferred-modules= 445 | 446 | 447 | [CLASSES] 448 | 449 | # List of method names used to declare (i.e. assign) instance attributes. 450 | defining-attr-methods=__init__, 451 | __new__, 452 | setUp, 453 | __post_init__ 454 | 455 | # List of member names, which should be excluded from the protected access 456 | # warning. 457 | exclude-protected=_asdict, 458 | _fields, 459 | _replace, 460 | _source, 461 | _make 462 | 463 | # List of valid names for the first argument in a class method. 464 | valid-classmethod-first-arg=cls 465 | 466 | # List of valid names for the first argument in a metaclass class method. 467 | valid-metaclass-classmethod-first-arg=cls 468 | 469 | 470 | [DESIGN] 471 | 472 | # Maximum number of arguments for function / method. 473 | max-args=5 474 | 475 | # Maximum number of attributes for a class (see R0902). 476 | max-attributes=7 477 | 478 | # Maximum number of boolean expressions in an if statement (see R0916). 479 | max-bool-expr=5 480 | 481 | # Maximum number of branch for function / method body. 482 | max-branches=12 483 | 484 | # Maximum number of locals for function / method body. 485 | max-locals=15 486 | 487 | # Maximum number of parents for a class (see R0901). 488 | max-parents=7 489 | 490 | # Maximum number of public methods for a class (see R0904). 491 | max-public-methods=20 492 | 493 | # Maximum number of return / yield for function / method body. 494 | max-returns=6 495 | 496 | # Maximum number of statements in function / method body. 497 | max-statements=50 498 | 499 | # Minimum number of public methods for a class (see R0903). 500 | min-public-methods=1 501 | 502 | 503 | [EXCEPTIONS] 504 | 505 | # Exceptions that will emit a warning when being caught. Defaults to 506 | # "BaseException, Exception". 507 | overgeneral-exceptions=builtins.BaseException, 508 | builtins.Exception 509 | --------------------------------------------------------------------------------