├── tests
    ├── __init__.py
    ├── test_search.py
    ├── test_conversion.py
    ├── test_replacement.py
    └── test_emoji_char.py
├── docs
    ├── requirements.txt
    ├── Makefile
    ├── index.rst
    └── conf.py
├── MANIFEST.in
├── .readthedocs.yml
├── .coveragerc
├── pyproject.toml
├── dev.requirements.txt
├── .editorconfig
├── emoji_data_python
    ├── conversion.py
    ├── __init__.py
    ├── search.py
    ├── replacement.py
    └── emoji_char.py
├── update.py
├── setup.py
├── LICENSE
├── .github
    └── workflows
    │   └── workflow.yml
├── README.md
├── .gitignore
└── .pylintrc


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_autodoc_typehints
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include emoji_data_python/data/*.json
3 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |     image: latest
3 | 
4 | python:
5 |     version: 3.6
6 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit =
3 |     */python?.?/*
4 |     */site-packages/nose/*
5 | fail_under = 100
6 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 120
 3 | include = '\.pyi?$'
 4 | exclude = '''
 5 | /(
 6 |     \.eggs
 7 |   | \.git
 8 |   | \.hg
 9 |   | \.mypy_cache
10 |   | \.tox
11 |   | v?env
12 |   | _build
13 |   | buck-out
14 |   | builds?
15 |   | dist
16 | )/
17 | '''
18 | 


--------------------------------------------------------------------------------
/dev.requirements.txt:
--------------------------------------------------------------------------------
 1 | # setup.py requirements
 2 | -e .
 3 | 
 4 | # Updating from source emoji data
 5 | requests>=2.25.1,<3.0.0
 6 | 
 7 | # Testing
 8 | pytest>=8.2.2,<9.0.0
 9 | pytest-cov>=5.0.0,<6.0.0
10 | 
11 | # Linting
12 | pylint>=3.2.5,<4.0.0
13 | mypy>=1.10.1,<2.0.0
14 | black>=24.4.2,<=25.0.0
15 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | indent_style = space
 6 | indent_size = 4
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | 
12 | [*.md]
13 | trim_trailing_whitespace = false
14 | 
15 | [*.json]
16 | indent_size = 2
17 | 
18 | [*.{yml,yaml}]
19 | indent_size = 2
20 | 


--------------------------------------------------------------------------------
/emoji_data_python/conversion.py:
--------------------------------------------------------------------------------
 1 | def unified_to_char(code_point: str) -> str:
 2 |     """Renders a character from its hexadecimal codepoint
 3 | 
 4 |     :param code_point: Character code point ex: `'261D'`
 5 | 
 6 |     >>> emoji_data_python.unified_to_char('1F603')
 7 |     '😃'
 8 |     """
 9 |     return "".join([chr(int(code, 16)) for code in code_point.split("-")])
10 | 
11 | 
12 | def char_to_unified(chars: str) -> str:
13 |     """Returns a characters unified codepoint
14 | 
15 |     :param chars: Emoji character ex: `'🇿🇦'`
16 | 
17 |     >>> emoji_data_python.char_to_unified('🇿🇦')
18 |     '1F1FF-1F1E6'
19 |     """
20 |     return "-".join([f"{ord(char):04x}".upper() for char in chars])
21 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = EmojiDataPython
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/update.py:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | 
 3 | import logging
 4 | from requests import get
 5 | 
 6 | logging.basicConfig(level=logging.INFO)
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | JSON_LOCATION = 'https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json'
10 | 
11 | logger.info(f'📶  Downloading data from {JSON_LOCATION}')
12 | r = get(JSON_LOCATION)
13 | r.raise_for_status()
14 | 
15 | output_path = path.join(path.dirname(__file__), 'emoji_data_python/data/emoji.json')
16 | logger.info(f'📝  Writing data to {output_path}')
17 | with open(output_path, 'w') as f:
18 |     f.write(r.text)
19 | 
20 | logger.info('✅  Done, successfully updated data, '
21 |             'run "python -m unittest discover" to make sure the format is still supported')
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import setuptools
 4 | 
 5 | module_path = os.path.join(os.path.dirname(__file__), "emoji_data_python.py")
 6 | 
 7 | setuptools.setup(
 8 |     name="emoji_data_python",
 9 |     version="0.0.0",
10 |     url="https://github.com/alexmick/emoji-data-python/",
11 | 
12 |     author="Alexander Micklewright",
13 | 
14 |     description="Python emoji toolkit",
15 |     long_description="Full documentation available on https://emoji-data-python.readthedocs.io/en/latest/",
16 | 
17 |     zip_safe=False,
18 |     platforms="any",
19 | 
20 |     python_requires=">=3.6.0",
21 |     install_requires=[""],
22 |     packages=setuptools.find_packages(),
23 |     include_package_data=True,
24 | 
25 |     classifiers=[
26 |         "Intended Audience :: Developers",
27 |         "Operating System :: OS Independent",
28 |         "Programming Language :: Python",
29 |         "Programming Language :: Python :: 3",
30 |         "Programming Language :: Python :: 3.6",
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from emoji_data_python import all_doublebyte, find_by_shortname, find_by_name, emoji_data
 4 | 
 5 | 
 6 | class SearchTestCase(unittest.TestCase):
 7 |     def test_all_emoji_length(self):
 8 |         self.assertEqual(1903, len(emoji_data))
 9 | 
10 |     def test_all_double_bytes(self):
11 |         self.assertEqual('HASH KEY', all_doublebyte()[0].name)  # HASH_KEY is the first double byte char
12 | 
13 |     def test_find_by_shortname(self):
14 |         self.assertEqual(1, len(find_by_shortname('wave')))
15 |         self.assertEqual('WAVING HAND SIGN', find_by_shortname('wave')[0].name)
16 | 
17 |     def test_find_by_shortname_unique(self):
18 |         self.assertEqual(34, len(find_by_shortname('heart')))
19 |         self.assertEqual(14, len(find_by_shortname('moon')))
20 | 
21 |     def test_find_by_name(self):
22 |         self.assertEqual('COUPLE WITH HEART: MAN, MAN', find_by_name('heart')[0].name)
23 |         self.assertEqual('SUNRISE OVER MOUNTAINS', find_by_name('sun')[0].name)
24 |         self.assertEqual(3, len(find_by_name('earth')))
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Cal Henderson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/emoji_data_python/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from os import path
 3 | from typing import List, Dict
 4 | 
 5 | from .conversion import unified_to_char, char_to_unified
 6 | from .emoji_char import EmojiChar
 7 | from .replacement import replace_colons, get_emoji_regex
 8 | from .search import all_doublebyte, find_by_shortname, find_by_name
 9 | 
10 | # Read json data on module load to be cached
11 | with open(path.join(path.dirname(__file__), "data/emoji.json"), "r", encoding="utf-8") as full_data:
12 |     # Load and parse emoji data from json into EmojiChar objects
13 |     emoji_data = [EmojiChar(data_blob) for data_blob in json.loads(full_data.read())]  # type: List[EmojiChar]
14 | 
15 | # Build a cached dictionary of short names for quicker access, short code keys are normalized with underscores
16 | emoji_short_names = {emoji.short_name.replace("-", "_"): emoji for emoji in emoji_data}  # type: Dict[str, EmojiChar]
17 | 
18 | # Add other short names if they are not already used as a primary short name for an other emoji
19 | for emoji in emoji_data:
20 |     for short_name in emoji.short_names:
21 |         if short_name not in emoji_short_names:
22 |             emoji_short_names[short_name] = emoji
23 | 
24 | 
25 | __all__ = [
26 |     "unified_to_char",
27 |     "char_to_unified",
28 |     "EmojiChar",
29 |     "replace_colons",
30 |     "get_emoji_regex",
31 |     "all_doublebyte",
32 |     "find_by_shortname",
33 |     "find_by_name",
34 |     "emoji_data",
35 |     "emoji_short_names",
36 | ]
37 | 


--------------------------------------------------------------------------------
/tests/test_conversion.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from emoji_data_python import unified_to_char, char_to_unified
 4 | 
 5 | 
 6 | class EmojiConversionTestCase(unittest.TestCase):
 7 |     def test_unified_to_char(self):
 8 |         self.assertEqual('\u261D', unified_to_char('261D'))
 9 | 
10 |     def test_longer_unified(self):
11 |         self.assertEqual('\U0001F1E6', unified_to_char('1F1E6'))
12 | 
13 |     def test_multiple_unified_to_char(self):
14 |         self.assertEqual('👨‍🌾', unified_to_char('1F468-200D-1F33E'))
15 |         self.assertEqual('👩🏼‍🌾', unified_to_char('1F469-1F3FC-200D-1F33E'))
16 |         self.assertEqual('🇳🇬', unified_to_char('1F1F3-1F1EC'))
17 |         self.assertEqual('\U0001F1F3\U0001F1EC', unified_to_char('1F1F3-1F1EC'))
18 |         self.assertEqual('4⃣', unified_to_char('0034-20E3'))
19 |         self.assertEqual('\u0034\u20E3', unified_to_char('0034-20E3'))
20 | 
21 |     def test_char_to_unified(self):
22 |         self.assertEqual('261D', char_to_unified('\u261D'))
23 | 
24 |     def test_longer_char_to_unified(self):
25 |         self.assertEqual('1F1E6', char_to_unified('\U0001F1E6'))
26 | 
27 |     def test_multiple_char_to_unified(self):
28 |         self.assertEqual('1F469-1F3FC-200D-1F33E', char_to_unified('👩🏼‍🌾'))
29 |         self.assertEqual('1F468-200D-1F33E', char_to_unified('👨‍🌾'))
30 |         self.assertEqual('1F1F3-1F1EC', char_to_unified('🇳🇬'))
31 |         self.assertEqual('1F1F3-1F1EC', char_to_unified('\U0001F1F3\U0001F1EC'))
32 |         self.assertEqual('0034-20E3', char_to_unified('4⃣'))
33 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Emoji Data Python documentation
 2 | ===============================
 3 | 
 4 | This is the documentation for the `emoji_data_python` module
 5 | 
 6 | Also available in :ref:`Aphabetical order <genindex>`
 7 | 
 8 | Module documentation
 9 | --------------------
10 | 
11 | .. automodule:: emoji_data_python
12 |     :members: unified_to_char, char_to_unified, replace_colons, all_doublebyte, find_by_shortname, find_by_name, get_emoji_regex
13 | 
14 |     .. attribute:: emoji_data
15 | 
16 |         List of all emoji as :mod:`emoji_data_python.EmojiChar` objects.
17 | 
18 |         >>> len(emoji_data_python.emoji_data)
19 |         489
20 | 
21 |     .. attribute:: emoji_short_codes
22 | 
23 |         Dict of all emoji as :mod:`emoji_data_python.EmojiChar` objects indexed by short names.
24 | 
25 |         **Note** : All short names (even secondary) are indexed. If any conflicts are found, only the emoji who has the conflicitng shortname as primary name is indexed under that name
26 |         ie. if an emoji has a secondary short name that is already taken as primary for an other emoji, this will not be referenced under that shortname
27 | 
28 |         >>> emoji_data_python.emoji_short_names['hearts'].__dict__
29 |         {
30 |             'name': 'BLACK HEART SUIT',
31 |             'unified': '2665',
32 |             'variations': ['2665-FE0F'],
33 |             'docomo': 'E68D',
34 |             'au': 'EAA5',
35 |             'softbank': 'E20C',
36 |             'google': 'FEB1A',
37 |             'short_name': 'hearts',
38 |             'short_names': ['hearts'],
39 |             'text': None,
40 |             'texts': None,
41 |             'category': 'Symbols',
42 |             'sort_order': 245,
43 |             'added_in': '1.1',
44 |             'skin_variations': {},
45 |             'obsoletes': None,
46 |             'obsoleted_by': None
47 |         }
48 | 
49 | 
50 | Classes
51 | -------
52 | 
53 | .. autoclass:: EmojiChar
54 |     :members:
55 | 


--------------------------------------------------------------------------------
/emoji_data_python/search.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from .emoji_char import EmojiChar
 4 | 
 5 | 
 6 | def find_by_shortname(name: str) -> List[EmojiChar]:
 7 |     """Finds all emoji with `name` in their short_names
 8 | 
 9 |     :param name: string to find in short names
10 | 
11 |     >>> emoji_data_python.find_by_shortname('moon')
12 |     [
13 |         EmojiChar("NEW MOON SYMBOL"),
14 |         EmojiChar("WAXING CRESCENT MOON SYMBOL"),
15 |         EmojiChar("FIRST QUARTER MOON SYMBOL"),
16 |         EmojiChar("WAXING GIBBOUS MOON SYMBOL"),
17 |         EmojiChar("FULL MOON SYMBOL"),
18 |         EmojiChar("WANING GIBBOUS MOON SYMBOL"),
19 |         EmojiChar("LAST QUARTER MOON SYMBOL"),
20 |         EmojiChar("WANING CRESCENT MOON SYMBOL"),
21 |         EmojiChar("CRESCENT MOON"),
22 |         EmojiChar("NEW MOON WITH FACE"),
23 |         EmojiChar("FIRST QUARTER MOON WITH FACE"),
24 |         EmojiChar("LAST QUARTER MOON WITH FACE"),
25 |         EmojiChar("FULL MOON WITH FACE"),
26 |     ]
27 |     """
28 |     from emoji_data_python import emoji_short_names  # pylint: disable=import-outside-toplevel
29 | 
30 |     name = name.replace("-", "_")
31 |     res_list = [emoji_short_names[key] for key in emoji_short_names.keys() if name in key]
32 |     return list(set(res_list))  # Keep only unique values
33 | 
34 | 
35 | def find_by_name(name: str) -> List[EmojiChar]:
36 |     """Finds emoji with `name` in their full name
37 | 
38 |     :param name: string to find in full names
39 |     """
40 |     from emoji_data_python import emoji_data  # pylint: disable=import-outside-toplevel
41 | 
42 |     return [emoji for emoji in emoji_data if emoji.name and name.upper() in emoji.name]
43 | 
44 | 
45 | def all_doublebyte() -> List[EmojiChar]:
46 |     """Returns all emoji coded on two or more bytes"""
47 |     from emoji_data_python import emoji_data  # pylint: disable=import-outside-toplevel
48 | 
49 |     return [emoji for emoji in emoji_data if emoji.is_doublebyte]
50 | 


--------------------------------------------------------------------------------
/emoji_data_python/replacement.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from functools import cache
 3 | from typing import cast
 4 | 
 5 | 
 6 | def replace_colons(text: str, strip: bool = False) -> str:
 7 |     """Parses a string with colon encoded emoji and renders found emoji.
 8 |     Unknown emoji are left as is unless `strip` is set to `True`
 9 | 
10 |     :param text: String of text to parse and replace
11 |     :param strip: Whether to strip unknown codes or to leave them as `:unknown:`
12 | 
13 |     >>> emoji_data_python.replace_colons('Hello world ! :wave::skin-tone-3: :earth_africa: :exclamation:')
14 |     'Hello world ! 👋🏼 🌍 ❗'
15 |     """
16 |     # pylint: disable=import-outside-toplevel
17 |     from emoji_data_python import emoji_short_names, EmojiChar
18 | 
19 |     def emoji_repl(matchobj) -> str:
20 |         emoji_match = matchobj.group(1)
21 |         base_emoji = emoji_short_names.get(emoji_match.strip(":").replace("-", "_"))
22 | 
23 |         if matchobj.lastindex == 2:
24 |             skin_tone_match = matchobj.group(2)
25 |             skin_tone = cast(EmojiChar, emoji_short_names.get(skin_tone_match.strip(":")))
26 | 
27 |             if base_emoji is None:
28 |                 return f'{emoji_match if strip is False else ""}{skin_tone.char}'
29 | 
30 |             emoji_with_skin_tone = base_emoji.skin_variations.get(skin_tone.unified)
31 |             if emoji_with_skin_tone is None:
32 |                 return f"{base_emoji.char}{skin_tone.char}"
33 |             return emoji_with_skin_tone.char
34 | 
35 |         if base_emoji is None:
36 |             return f'{emoji_match if strip is False else ""}'
37 |         return base_emoji.char
38 | 
39 |     return re.sub(r"(\:[a-zA-Z0-9-_+]+\:)(\:skin-tone-[2-6]\:)?", emoji_repl, text)
40 | 
41 | 
42 | @cache
43 | def get_emoji_regex():
44 |     """Returns a regex to match any emoji
45 | 
46 |     >>> emoji_data_python.get_emoji_regex().findall('Hello world ! 👋🏼 🌍 ❗')
47 |     ['👋', '🏼', '🌍', '❗']
48 |     """
49 |     from emoji_data_python import emoji_data  # pylint: disable=import-outside-toplevel
50 | 
51 |     # Sort emojis by length to make sure mulit-character emojis are
52 |     # matched first
53 | 
54 |     emojis = sorted([emoji.char for emoji in emoji_data], key=len, reverse=True)
55 |     pattern = "(" + "|".join(re.escape(u) for u in emojis) + ")"
56 |     return re.compile(pattern)
57 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: CI pipeline
 5 | 
 6 | on:
 7 |   push:
 8 |   pull_request:
 9 | 
10 | jobs:
11 |   lint:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v3
17 |         with:
18 |           python-version: 3.12
19 |       - name: Install dev dependencies
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           pip install -r dev.requirements.txt
23 |       - name: Lint with black
24 |         run: black --check emoji_data_python
25 |       - name: Lint with pylint
26 |         run: pylint emoji_data_python
27 |       - name : mypy type checking (allowed to fail for now)
28 |         run: mypy emoji_data_python || true
29 | 
30 |   test:
31 |     runs-on: ubuntu-latest
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         python-version: ["3.9", "3.12"]
36 | 
37 |     steps:
38 |       - uses: actions/checkout@v4
39 |       - name: Set up Python ${{ matrix.python-version }}
40 |         uses: actions/setup-python@v3
41 |         with:
42 |           python-version: ${{ matrix.python-version }}
43 |       - name: Install dev dependencies
44 |         run: |
45 |           python -m pip install --upgrade pip
46 |           pip install -r dev.requirements.txt
47 |       - name: Test with pytest
48 |         run: |
49 |           pytest --cov-config .coveragerc --cov=emoji_data_python tests/ --cov-report term
50 | 
51 |   deploy:
52 |     if: github.ref_type == 'tag'
53 |     needs:
54 |       - test
55 |       - lint
56 |     runs-on: ubuntu-latest
57 |     steps:
58 |       - uses: actions/checkout@v4
59 |       - name: Set up Python
60 |         uses: actions/setup-python@v3
61 |         with:
62 |           python-version: 3.12
63 |       - name: Install dependencies
64 |         run: |
65 |           python -m pip install --upgrade pip
66 |           pip install wheel twine setuptools
67 |           pip install .
68 |       - name: Build and publish
69 |         env:
70 |           TWINE_USERNAME: __token__
71 |           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
72 |         run: |
73 |           sed -i "s|version=\"0.0.0\",|version=\"${GITHUB_REF/refs\/tags\//}\",|g" setup.py
74 |           python setup.py sdist bdist_wheel
75 |           twine upload dist/*
76 | 


--------------------------------------------------------------------------------
/tests/test_replacement.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from emoji_data_python import replace_colons, get_emoji_regex
 4 | 
 5 | 
 6 | class ReplaceColonsTestCase(unittest.TestCase):
 7 |     def test_replace_colons(self):
 8 |         self.assertEqual('😄', replace_colons(':smile:'))
 9 |         self.assertEqual('😗', replace_colons(':kissing:'))
10 | 
11 |     def test_skin_tone(self):
12 |         self.assertEqual('👋🏼', replace_colons(':wave::skin-tone-3:'))
13 | 
14 |     def test_skin_tone_appended_to_emoji_with_no_skin_tone(self):
15 |         self.assertEqual('💩🏼', replace_colons(':poop::skin-tone-3:'))
16 | 
17 |     def test_underscore_hyphenated_codes(self):
18 |         self.assertEqual('😙', replace_colons(':kissing_smiling_eyes:'))
19 |         self.assertEqual('😘', replace_colons(':kissing-heart:'))
20 | 
21 |     def test_main_shortname_precedence(self):
22 |         """There are two emoji for the family shortcode, one as the main short_name and one in the short_names_list"""
23 |         self.assertEqual('👪', replace_colons(':family:'))
24 | 
25 |     def test_zwj_emoji(self):
26 |         """These emoji are joined by a Zero Width Joiner"""
27 |         self.assertEqual('👨‍👩‍👦', replace_colons(':man-woman-boy:'))
28 |         self.assertEqual('👨‍🌾', replace_colons(':male-farmer:'))
29 | 
30 |     def test_zwj_emoji_skin_tone(self):
31 |         """This tests zwj emojis that also have a skin tone"""
32 |         self.assertEqual('👨🏼‍🌾', replace_colons(':male-farmer::skin-tone-3:'))
33 | 
34 |     def test_unknown_code(self):
35 |         self.assertEqual('💩💩 :poo:🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:'))
36 |         self.assertEqual('💩:poo: 🐶 :poo:', replace_colons(':poop::poo: :dog: :poo:'))
37 | 
38 |     def test_strip_unknown_code(self):
39 |         self.assertEqual('💩💩 🏼', replace_colons(':hankey::poop: :poo::skin-tone-3:', strip=True))
40 |         self.assertEqual('💩 🐶 ', replace_colons(':poop::poo: :dog: :poo:', strip=True))
41 | 
42 |     def test_multiline_sentence(self):
43 |         self.assertEqual("""
44 | Hello 👋 world 🌍 !
45 | How are you ❓""",
46 | 
47 |             replace_colons("""
48 | Hello :wave: world :earth_africa: !
49 | How are you :question:""")
50 |         )
51 | 
52 |     def test_emoji_regex(self):
53 |         regex = get_emoji_regex()
54 |         self.assertRegex('😄', regex)
55 |         self.assertRegex('👪', regex)
56 |         self.assertNotRegex('hello :wave: l → ▶', regex)
57 |         res = regex.findall('💩💩 🏼 bla 👋🏼')
58 |         self.assertEqual(len(res), 5)  # Wave + skin tone counts as two
59 | 


--------------------------------------------------------------------------------
/tests/test_emoji_char.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from emoji_data_python import EmojiChar
 4 | 
 5 | 
 6 | class EmojiCharTestCase(unittest.TestCase):
 7 |     def setUp(self):
 8 |         self.emoji = EmojiChar({
 9 |             "name": "WHITE UP POINTING INDEX",
10 |             "unified": "261D",
11 |             "variations": [
12 |                 "261D-FE0F"
13 |             ],
14 |             "docomo": None,
15 |             "au": "E4F6",
16 |             "softbank": "E00F",
17 |             "google": "FEB98",
18 |             "image": "261d.png",
19 |             "sheet_x": 1,
20 |             "sheet_y": 2,
21 |             "short_name": "point_up",
22 |             "short_names": [
23 |                 "point_up"
24 |             ],
25 |             "text": None,
26 |             "texts": None,
27 |             "category": "People",
28 |             "sort_order": 116,
29 |             "added_in": "1.4",
30 |             "has_img_apple": True,
31 |             "has_img_google": True,
32 |             "has_img_twitter": True,
33 |             "has_img_emojione": False,
34 |             "has_img_facebook": False,
35 |             "has_img_messenger": False,
36 |             "skin_variations": {
37 |                 "1F3FB": {
38 |                     "unified": "261D-1F3FB",
39 |                     "image": "261d-1f3fb.png",
40 |                     "sheet_x": 1,
41 |                     "sheet_y": 3,
42 |                     "added_in": "6.0",
43 |                     "has_img_apple": True,
44 |                     "has_img_google": False,
45 |                     "has_img_twitter": False,
46 |                     "has_img_emojione": False,
47 |                     "has_img_facebook": False,
48 |                     "has_img_messenger": False
49 |                 },
50 |             },
51 |             "obsoletes": "ABCD-1234",
52 |             "obsoleted_by": "5678-90EF"
53 |         })
54 | 
55 |     def test_init(self):
56 |         self.assertEqual("261D", self.emoji.unified)
57 |         self.assertEqual("point_up", self.emoji.short_names[0])
58 |         self.assertEqual("People", self.emoji.category)
59 |         self.assertEqual("1.4", self.emoji.added_in)
60 |         self.assertEqual("261d.png", self.emoji.image)
61 |         self.assertEqual(1, self.emoji.sheet_x)
62 |         self.assertEqual(True, self.emoji.has_img_apple)
63 |         self.assertEqual(False, self.emoji.has_img_emojione)
64 | 
65 |     def test_skin_variations(self):
66 |         self.assertEqual("261D-1F3FB", self.emoji.skin_variations['1F3FB'].unified)
67 |         self.assertEqual(True, self.emoji.skin_variations['1F3FB'].has_img_apple)
68 | 
69 |     def test_char(self):
70 |         self.assertEqual('☝', self.emoji.char)
71 | 
72 |     def test_all_variations(self):
73 |         self.assertEqual(['261D', '261D-FE0F', '261D-1F3FB'], self.emoji.all_variations)
74 | 
75 |     def test_chars(self):
76 |         self.assertEqual('☝', self.emoji.chars[0])
77 |         self.assertEqual('☝️', self.emoji.chars[1])  # Not the same char, this is \u261D\uFE0F
78 | 
79 |     def test_doublebyte(self):
80 |         self.assertEqual(False, self.emoji.is_doublebyte)
81 | 
82 |     def test_str(self):
83 |         self.assertEqual('WHITE UP POINTING INDEX', self.emoji.__str__())
84 | 
85 |     def test_repr(self):
86 |         self.assertEqual('EmojiChar("WHITE UP POINTING INDEX")', self.emoji.__repr__())
87 | 
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # emoji_data_python - A python emoji toolkit
  2 | 
  3 | ![CI pipeline](https://github.com/alexmick/emoji-data-python/workflows/CI%20pipeline/badge.svg)
  4 | [![PyPI](https://img.shields.io/pypi/v/emoji-data-python.svg)](https://pypi.python.org/pypi/emoji-data-python)
  5 | [![Documentation Status](https://readthedocs.org/projects/emoji-data-python/badge/?version=latest)](http://emoji-data-python.readthedocs.io/en/latest/?badge=latest)
  6 | 
  7 | 
  8 | Forget about handling obscure shortcodes from slack or user input, this lib knows all about them !
  9 | 
 10 | ### Features
 11 | 
 12 | * Conversion from slack-style colon short codes : `:smiley:` ⇒ 😃
 13 | * Lookup emoji by official name or any known short name
 14 | * Render emoji chars from their codepoint
 15 | 
 16 | ## Installing
 17 | 
 18 | Installation should be as simple as :
 19 | 
 20 | ```bash
 21 | pip install emoji_data_python
 22 | ```
 23 | 
 24 | ## Usage
 25 | ```python
 26 | >>> import emoji_data_python
 27 | >>> len(emoji_data_python.emoji_data)
 28 | 1489
 29 | >>> emoji_data_python.emoji_short_names['hearts'].__dict__
 30 | {
 31 |     'name': 'BLACK HEART SUIT',
 32 |     'unified': '2665',
 33 |     'variations': ['2665-FE0F'],
 34 |     'docomo': 'E68D',
 35 |     'au': 'EAA5',
 36 |     'softbank': 'E20C',
 37 |     'google': 'FEB1A',
 38 |     'image': '2665.png',
 39 |     'sheet_x': 1,
 40 |     'sheet_y': 34,
 41 |     'short_name': 'hearts',
 42 |     'short_names': ['hearts'],
 43 |     'text': None,
 44 |     'texts': None,
 45 |     'category': 'Symbols',
 46 |     'sort_order': 245,
 47 |     'added_in': '1.1',
 48 |     'has_img_apple': True,
 49 |     'has_img_google': True,
 50 |     'has_img_twitter': True,
 51 |     'has_img_emojione': True,
 52 |     'has_img_facebook': True,
 53 |     'has_img_messenger': True,
 54 |     'skin_variations': {},
 55 |     'obsoletes': None,
 56 |     'obsoleted_by': None
 57 | }
 58 | >>> emoji_data_python.find_by_shortname('moon')
 59 | [
 60 |     EmojiChar("NEW MOON SYMBOL"),
 61 |     EmojiChar("WAXING CRESCENT MOON SYMBOL"),
 62 |     EmojiChar("FIRST QUARTER MOON SYMBOL"),
 63 |     EmojiChar("WAXING GIBBOUS MOON SYMBOL"),
 64 |     EmojiChar("FULL MOON SYMBOL"),
 65 |     EmojiChar("WANING GIBBOUS MOON SYMBOL"),
 66 |     EmojiChar("LAST QUARTER MOON SYMBOL"),
 67 |     EmojiChar("WANING CRESCENT MOON SYMBOL"),
 68 |     EmojiChar("CRESCENT MOON"),
 69 |     EmojiChar("NEW MOON WITH FACE"),
 70 |     EmojiChar("FIRST QUARTER MOON WITH FACE"),
 71 |     EmojiChar("LAST QUARTER MOON WITH FACE"),
 72 |     EmojiChar("FULL MOON WITH FACE"),
 73 | ]
 74 | >>> [ (emoji.name, emoji.short_name, emoji.char) for emoji in emoji_data_python.find_by_name('tree')]
 75 | [
 76 |     ('EVERGREEN TREE', 'evergreen_tree', '🌲'),
 77 |     ('DECIDUOUS TREE', 'deciduous_tree', '🌳'),
 78 |     ('PALM TREE', 'palm_tree', '🌴'),
 79 |     ('CHRISTMAS TREE', 'christmas_tree', '🎄'),
 80 |     ('TANABATA TREE', 'tanabata_tree', '🎋')
 81 | ]
 82 | >>> emoji_data_python.replace_colons('Hello world ! :wave::skin-tone-3: :earth_africa: :exclamation:')
 83 | 'Hello world ! 👋🏼 🌍 ❗'
 84 | ```
 85 | 
 86 | ## Documentation
 87 | 
 88 | Documentation is autogenerated by Sphinx. With the right dependencies you should be able to build the docs :
 89 | 
 90 | ```bash
 91 | cd docs/
 92 | pip install -r requirements.txt # Install sphinx and its plugins
 93 | make html
 94 | cd _build/html
 95 | python -m SimpleHttpServer # Serve the docs on localhost:5000
 96 | ```
 97 | 
 98 | A pre-built hosted version is available here : http://emoji-data-python.readthedocs.io/en/latest/
 99 | 
100 | ## Testing
101 | 
102 | ```bash
103 | python -m unittest discover
104 | ```
105 | 
106 | ###### Thanks to [iamcal](https://github.com/iamcal/emoji-data) for the complete emoji data. This project is merely a python wrapper for his work.
107 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### SublimeText template
  3 | # cache files for sublime text
  4 | *.tmlanguage.cache
  5 | *.tmPreferences.cache
  6 | *.stTheme.cache
  7 | 
  8 | # workspace files are user-specific
  9 | *.sublime-workspace
 10 | 
 11 | # project files should be checked into the repository, unless a significant
 12 | # proportion of contributors will probably not be using SublimeText
 13 | # *.sublime-project
 14 | 
 15 | # sftp configuration file
 16 | sftp-config.json
 17 | 
 18 | # Package control specific files
 19 | Package Control.last-run
 20 | Package Control.ca-list
 21 | Package Control.ca-bundle
 22 | Package Control.system-ca-bundle
 23 | Package Control.cache/
 24 | Package Control.ca-certs/
 25 | Package Control.merged-ca-bundle
 26 | Package Control.user-ca-bundle
 27 | oscrypto-ca-bundle.crt
 28 | bh_unicode_properties.cache
 29 | 
 30 | # Sublime-github package stores a github token in this file
 31 | # https://packagecontrol.io/packages/sublime-github
 32 | GitHub.sublime-settings
 33 | ### Example user template template
 34 | ### Example user template
 35 | 
 36 | # IntelliJ project files
 37 | .idea
 38 | *.iml
 39 | out
 40 | gen### Python template
 41 | # Byte-compiled / optimized / DLL files
 42 | __pycache__/
 43 | *.py[cod]
 44 | *$py.class
 45 | .mypy_cache
 46 | 
 47 | # C extensions
 48 | *.so
 49 | 
 50 | # Distribution / packaging
 51 | .Python
 52 | env/
 53 | build/
 54 | develop-eggs/
 55 | dist/
 56 | downloads/
 57 | eggs/
 58 | .eggs/
 59 | lib/
 60 | lib64/
 61 | parts/
 62 | sdist/
 63 | var/
 64 | wheels/
 65 | *.egg-info/
 66 | .installed.cfg
 67 | *.egg
 68 | 
 69 | # PyInstaller
 70 | #  Usually these files are written by a python script from a template
 71 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 72 | *.manifest
 73 | *.spec
 74 | 
 75 | # Installer logs
 76 | pip-log.txt
 77 | pip-delete-this-directory.txt
 78 | 
 79 | # Unit test / coverage reports
 80 | htmlcov/
 81 | .tox/
 82 | .coverage
 83 | .coverage.*
 84 | .cache
 85 | nosetests.xml
 86 | coverage.xml
 87 | *,cover
 88 | .hypothesis/
 89 | 
 90 | # Translations
 91 | *.mo
 92 | *.pot
 93 | 
 94 | # Django stuff:
 95 | *.log
 96 | local_settings.py
 97 | 
 98 | # Flask stuff:
 99 | instance/
100 | .webassets-cache
101 | 
102 | # Scrapy stuff:
103 | .scrapy
104 | 
105 | # Sphinx documentation
106 | docs/_build/
107 | 
108 | # PyBuilder
109 | target/
110 | 
111 | # Jupyter Notebook
112 | .ipynb_checkpoints
113 | 
114 | # pyenv
115 | .python-version
116 | 
117 | # celery beat schedule file
118 | celerybeat-schedule
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # dotenv
124 | .env
125 | 
126 | # virtualenv
127 | .venv
128 | venv/
129 | ENV/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | ### JetBrains template
137 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
138 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
139 | 
140 | # User-specific stuff:
141 | .idea/**/workspace.xml
142 | .idea/**/tasks.xml
143 | .idea/dictionaries
144 | 
145 | # Sensitive or high-churn files:
146 | .idea/**/dataSources/
147 | .idea/**/dataSources.ids
148 | .idea/**/dataSources.xml
149 | .idea/**/dataSources.local.xml
150 | .idea/**/sqlDataSources.xml
151 | .idea/**/dynamic.xml
152 | .idea/**/uiDesigner.xml
153 | 
154 | # Gradle:
155 | .idea/**/gradle.xml
156 | .idea/**/libraries
157 | 
158 | # Mongo Explorer plugin:
159 | .idea/**/mongoSettings.xml
160 | 
161 | ## File-based project format:
162 | *.iws
163 | 
164 | ## Plugin-specific files:
165 | 
166 | # IntelliJ
167 | /out/
168 | 
169 | # mpeltonen/sbt-idea plugin
170 | .idea_modules/
171 | 
172 | # JIRA plugin
173 | atlassian-ide-plugin.xml
174 | 
175 | # Crashlytics plugin (for Android Studio and IntelliJ)
176 | com_crashlytics_export_strings.xml
177 | crashlytics.properties
178 | crashlytics-build.properties
179 | fabric.properties
180 | ### VirtualEnv template
181 | # Virtualenv
182 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
183 | .Python
184 | [Bb]in
185 | [Ii]nclude
186 | [Ll]ib
187 | [Ll]ib64
188 | [Ll]ocal
189 | [Ss]cripts
190 | pyvenv.cfg
191 | .venv
192 | pip-selfcheck.json
193 | ### macOS template
194 | *.DS_Store
195 | .AppleDouble
196 | .LSOverride
197 | 
198 | # Icon must end with two \r
199 | Icon
200 | 
201 | 
202 | # Thumbnails
203 | ._*
204 | 
205 | # Files that might appear in the root of a volume
206 | .DocumentRevisions-V100
207 | .fseventsd
208 | .Spotlight-V100
209 | .TemporaryItems
210 | .Trashes
211 | .VolumeIcon.icns
212 | .com.apple.timemachine.donotpresent
213 | 
214 | # Directories potentially created on remote AFP share
215 | .AppleDB
216 | .AppleDesktop
217 | Network Trash Folder
218 | Temporary Items
219 | .apdisk
220 | 


--------------------------------------------------------------------------------
/emoji_data_python/emoji_char.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List, Dict
  2 | 
  3 | from .conversion import unified_to_char
  4 | 
  5 | 
  6 | # pylint: disable=too-many-instance-attributes
  7 | class EmojiChar:
  8 |     """Represents an emoji character as parsed from the json data"""
  9 | 
 10 |     def __init__(self, data_blob: dict) -> None:
 11 |         """Parse data into EmojiChar
 12 | 
 13 |         :param data_blob: Dictionary of values loaded from the json format in `emoji.json`
 14 | 
 15 |         >>> emoji.__dict__
 16 |         {
 17 |             'name': 'BLACK HEART SUIT',
 18 |             'unified': '2665',
 19 |             'variations': ['2665-FE0F'],
 20 |             'docomo': 'E68D',
 21 |             'au': 'EAA5',
 22 |             'softbank': 'E20C',
 23 |             'google': 'FEB1A',
 24 |             'image': '2665.png',
 25 |             'sheet_x': 1,
 26 |             'sheet_y': 34,
 27 |             'short_name': 'hearts',
 28 |             'short_names': ['hearts'],
 29 |             'text': None,
 30 |             'texts': None,
 31 |             'category': 'Symbols',
 32 |             'sort_order': 245,
 33 |             'added_in': '1.1',
 34 |             'has_img_apple': True,
 35 |             'has_img_google': True,
 36 |             'has_img_twitter': True,
 37 |             'has_img_emojione': True,
 38 |             'has_img_facebook': True,
 39 |             'has_img_messenger': True,
 40 |             'skin_variations': {},
 41 |             'obsoletes': None,
 42 |             'obsoleted_by': None
 43 |         }
 44 |         """
 45 |         self.name = data_blob.get("name")  # type: Optional[str]
 46 |         self.unified = data_blob.get("unified")  # type: str
 47 |         self.variations = data_blob.get("variations", [])  # type: List[str]
 48 | 
 49 |         self.docomo = data_blob.get("docomo")  # type: Optional[str]
 50 |         # pylint: disable=invalid-name
 51 |         self.au = data_blob.get("au")  # type: Optional[str]
 52 |         self.softbank = data_blob.get("softbank")  # type: Optional[str]
 53 |         self.google = data_blob.get("google")  # type: Optional[str]
 54 |         self.image = data_blob.get("image")  # type: Optional[str]
 55 |         self.sheet_x = data_blob.get("sheet_x")  # type: int
 56 |         self.sheet_y = data_blob.get("sheet_y")  # type: int
 57 | 
 58 |         self.short_name = data_blob.get("short_name")  # type: Optional[str]
 59 |         self.short_names = data_blob.get("short_names")  # type: List[str]
 60 |         self.text = data_blob.get("text")  # type: Optional[str]
 61 |         self.texts = data_blob.get("texts")  # type: List[str]
 62 | 
 63 |         self.category = data_blob.get("category")  # type: Optional[str]
 64 |         self.sort_order = data_blob.get("sort_order")  # type: int
 65 |         self.added_in = data_blob.get("added_in")  # type: str
 66 | 
 67 |         self.has_img_apple = data_blob.get("has_img_apple")  # type: bool
 68 |         self.has_img_google = data_blob.get("has_img_google")  # type: bool
 69 |         self.has_img_twitter = data_blob.get("has_img_twitter")  # type: bool
 70 |         self.has_img_emojione = data_blob.get("has_img_emojione")  # type: bool
 71 |         self.has_img_facebook = data_blob.get("has_img_facebook")  # type: bool
 72 |         self.has_img_messenger = data_blob.get("has_img_messenger")  # type: bool
 73 | 
 74 |         variations = data_blob.get("skin_variations", {})
 75 |         self.skin_variations = {
 76 |             code: EmojiChar(variation) for code, variation in variations.items()
 77 |         }  # type: Dict[str, EmojiChar]
 78 | 
 79 |         self.obsoletes = data_blob.get("obsoletes")  # type: Optional[str]
 80 |         self.obsoleted_by = data_blob.get("obsoleted_by")  # type: Optional[str]
 81 | 
 82 |     @property
 83 |     def all_variations(self) -> List[str]:
 84 |         """Lists all possible codepoint variations for given emoji.
 85 | 
 86 |         See :mod:`emoji_data_python.EmojiChar.chars` for a rendered version
 87 | 
 88 |         >>> emoji.all_variations
 89 |         ['261D', '261D-FE0F', '261D-1F3FB']
 90 |         """
 91 |         return (
 92 |             [self.unified]
 93 |             + self.variations
 94 |             + [self.unified + "-" + variation for variation in self.skin_variations.keys()]
 95 |         )
 96 | 
 97 |     @property
 98 |     def char(self) -> str:
 99 |         """Returns rendered char for emoji
100 | 
101 |         >>> emoji.char
102 |         '👋'
103 |         """
104 |         return unified_to_char(self.unified)
105 | 
106 |     @property
107 |     def chars(self) -> List[str]:
108 |         """Lists all possible *rendered* codepoint variations for given emoji.
109 |         This is useful when trying to find this particular emoji in a string by looking for any variation.
110 | 
111 |         >>> emoji.chars
112 |         ['👋', '👋🏻', '👋🏼', '👋🏽', '👋🏾', '👋🏿']
113 |         """
114 | 
115 |         return list(map(unified_to_char, self.all_variations))
116 | 
117 |     @property
118 |     def is_doublebyte(self) -> bool:
119 |         """`True` if emoji is coded on two or more bytes"""
120 |         return "-" in self.unified
121 | 
122 |     def __str__(self):
123 |         return self.name or self.short_name or self.unified
124 | 
125 |     def __repr__(self):
126 |         return f'EmojiChar("{self!s}")'
127 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Emoji Data Python documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jun  9 12:07:07 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('..'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.autodoc',
 35 |     'sphinx.ext.coverage',
 36 |     'sphinx.ext.viewcode',
 37 |     'sphinx.ext.githubpages',
 38 |     'sphinx_autodoc_typehints']
 39 | 
 40 | # Add any paths that contain templates here, relative to this directory.
 41 | templates_path = ['_templates']
 42 | 
 43 | # The suffix(es) of source filenames.
 44 | # You can specify multiple suffix as a list of string:
 45 | #
 46 | # source_suffix = ['.rst', '.md']
 47 | source_suffix = '.rst'
 48 | 
 49 | # The master toctree document.
 50 | master_doc = 'index'
 51 | 
 52 | # General information about the project.
 53 | project = 'Emoji Data Python'
 54 | copyright = '2017, Alexander Micklewright'
 55 | author = 'Alexander Micklewright'
 56 | 
 57 | # The version info for the project you're documenting, acts as replacement for
 58 | # |version| and |release|, also used in various other places throughout the
 59 | # built documents.
 60 | #
 61 | # The short X.Y version.
 62 | version = '1.0'
 63 | # The full version, including alpha/beta/rc tags.
 64 | release = '1.0.1'
 65 | 
 66 | # The language for content autogenerated by Sphinx. Refer to documentation
 67 | # for a list of supported languages.
 68 | #
 69 | # This is also used if you do content translation via gettext catalogs.
 70 | # Usually you set "language" from the command line for these cases.
 71 | language = None
 72 | 
 73 | # List of patterns, relative to source directory, that match files and
 74 | # directories to ignore when looking for source files.
 75 | # This patterns also effect to html_static_path and html_extra_path
 76 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 77 | 
 78 | # The name of the Pygments (syntax highlighting) style to use.
 79 | pygments_style = 'sphinx'
 80 | 
 81 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 82 | todo_include_todos = False
 83 | 
 84 | 
 85 | # -- Options for HTML output ----------------------------------------------
 86 | 
 87 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 88 | # a list of builtin themes.
 89 | #
 90 | html_theme = 'alabaster'
 91 | 
 92 | # Theme options are theme-specific and customize the look and feel of a theme
 93 | # further.  For a list of options available for each theme, see the
 94 | # documentation.
 95 | #
 96 | # html_theme_options = {}
 97 | 
 98 | # Add any paths that contain custom static files (such as style sheets) here,
 99 | # relative to this directory. They are copied after the builtin static files,
100 | # so a file named "default.css" will overwrite the builtin "default.css".
101 | html_static_path = ['_static']
102 | 
103 | 
104 | # -- Options for HTMLHelp output ------------------------------------------
105 | 
106 | # Output file base name for HTML help builder.
107 | htmlhelp_basename = 'EmojiDataPythondoc'
108 | 
109 | 
110 | # -- Options for LaTeX output ---------------------------------------------
111 | 
112 | latex_elements = {
113 |     # The paper size ('letterpaper' or 'a4paper').
114 |     #
115 |     # 'papersize': 'letterpaper',
116 | 
117 |     # The font size ('10pt', '11pt' or '12pt').
118 |     #
119 |     # 'pointsize': '10pt',
120 | 
121 |     # Additional stuff for the LaTeX preamble.
122 |     #
123 |     # 'preamble': '',
124 | 
125 |     # Latex figure (float) alignment
126 |     #
127 |     # 'figure_align': 'htbp',
128 | }
129 | 
130 | # Grouping the document tree into LaTeX files. List of tuples
131 | # (source start file, target name, title,
132 | #  author, documentclass [howto, manual, or own class]).
133 | latex_documents = [
134 |     (master_doc, 'EmojiDataPython.tex', 'Emoji Data Python Documentation',
135 |      'Alexander Micklewright', 'manual'),
136 | ]
137 | 
138 | 
139 | # -- Options for manual page output ---------------------------------------
140 | 
141 | # One entry per manual page. List of tuples
142 | # (source start file, name, description, authors, manual section).
143 | man_pages = [
144 |     (master_doc, 'emojidatapython', 'Emoji Data Python Documentation',
145 |      [author], 1)
146 | ]
147 | 
148 | 
149 | # -- Options for Texinfo output -------------------------------------------
150 | 
151 | # Grouping the document tree into Texinfo files. List of tuples
152 | # (source start file, target name, title, author,
153 | #  dir menu entry, description, category)
154 | texinfo_documents = [
155 |     (master_doc, 'EmojiDataPython', 'Emoji Data Python Documentation',
156 |      author, 'EmojiDataPython', 'One line description of project.',
157 |      'Miscellaneous'),
158 | ]
159 | 
160 | autoclass_content = 'both'
161 | 
162 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code.
  6 | extension-pkg-whitelist=marisa_trie
  7 | 
  8 | # Add files or directories to the blacklist. They should be base names, not
  9 | # paths.
 10 | ignore=CVS
 11 | 
 12 | # Add files or directories matching the regex patterns to the blacklist. The
 13 | # regex matches against base names, not paths.
 14 | ignore-patterns=
 15 | 
 16 | # Python code to execute, usually for sys.path manipulation such as
 17 | # pygtk.require().
 18 | #init-hook=
 19 | 
 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 21 | # number of processors available to use.
 22 | jobs=0
 23 | 
 24 | # Control the amount of potential inferred values when inferring a single
 25 | # object. This can help the performance when dealing with large functions or
 26 | # complex, nested conditions.
 27 | limit-inference-results=100
 28 | 
 29 | # List of plugins (as comma separated values of python module names) to load,
 30 | # usually to register additional checkers.
 31 | load-plugins=
 32 | 
 33 | # Pickle collected data for later comparisons.
 34 | persistent=yes
 35 | 
 36 | # Specify a configuration file.
 37 | #rcfile=
 38 | 
 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 40 | # user-friendly hints instead of false-positive error messages.
 41 | suggestion-mode=yes
 42 | 
 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 44 | # active Python interpreter and may run arbitrary code.
 45 | unsafe-load-any-extension=no
 46 | 
 47 | 
 48 | [MESSAGES CONTROL]
 49 | 
 50 | # Only show warnings with the listed confidence levels. Leave empty to show
 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
 52 | confidence=
 53 | 
 54 | # Disable the message, report, category or checker with the given id(s). You
 55 | # can either give multiple identifiers separated by comma (,) or put this
 56 | # option multiple times (only on the command line, not in the configuration
 57 | # file where it should appear only once). You can also use "--disable=all" to
 58 | # disable everything first and then reenable specific checks. For example, if
 59 | # you want to run only the similarities checker, you can use "--disable=all
 60 | # --enable=similarities". If you want to run only the classes checker, but have
 61 | # no Warning level messages displayed, use "--disable=all --enable=classes
 62 | # --disable=W".
 63 | disable=missing-class-docstring,
 64 |         missing-module-docstring,
 65 |         missing-function-docstring,
 66 |         inherit-non-class,
 67 |         too-few-public-methods,
 68 |         unnecessary-pass,
 69 |         cyclic-import
 70 | 
 71 | # Enable the message, report, category or checker with the given id(s). You can
 72 | # either give multiple identifier separated by comma (,) or put this option
 73 | # multiple time (only on the command line, not in the configuration file where
 74 | # it should appear only once). See also the "--disable" option for examples.
 75 | enable=c-extension-no-member, useless-suppression
 76 | 
 77 | 
 78 | [REPORTS]
 79 | 
 80 | # Python expression which should return a score less than or equal to 10. You
 81 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
 82 | # which contain the number of messages in each category, as well as 'statement'
 83 | # which is the total number of statements analyzed. This score is used by the
 84 | # global evaluation report (RP0004).
 85 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 86 | 
 87 | # Template used to display messages. This is a python new-style format string
 88 | # used to format the message information. See doc for all details.
 89 | #msg-template=
 90 | 
 91 | # Set the output format. Available formats are text, parseable, colorized, json
 92 | # and msvs (visual studio). You can also give a reporter class, e.g.
 93 | # mypackage.mymodule.MyReporterClass.
 94 | output-format=colorized
 95 | 
 96 | # Tells whether to display a full report or only the messages.
 97 | reports=yes
 98 | 
 99 | # Activate the evaluation score.
100 | score=yes
101 | 
102 | 
103 | [REFACTORING]
104 | 
105 | # Maximum number of nested blocks for function / method body
106 | max-nested-blocks=5
107 | 
108 | # Complete name of functions that never returns. When checking for
109 | # inconsistent-return-statements if a never returning function is called then
110 | # it will be considered as an explicit return statement and no message will be
111 | # printed.
112 | never-returning-functions=sys.exit
113 | 
114 | 
115 | [LOGGING]
116 | 
117 | # Format style used to check logging format string. `old` means using %
118 | # formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
119 | logging-format-style=new
120 | 
121 | # Logging modules to check that the string format arguments are in logging
122 | # function parameter format.
123 | logging-modules=logging
124 | 
125 | 
126 | [SPELLING]
127 | 
128 | # Limits count of emitted suggestions for spelling mistakes.
129 | max-spelling-suggestions=4
130 | 
131 | # Spelling dictionary name. Available dictionaries: none. To make it work,
132 | # install the python-enchant package.
133 | spelling-dict=
134 | 
135 | # Listc of comma separated words that should not be checked.
136 | spelling-ignore-words=
137 | 
138 | # A path to a file that contains the private dictionary; one word per line.
139 | spelling-private-dict-file=
140 | 
141 | # Tells whether to store unknown words to the private dictionary (see the
142 | # --spelling-private-dict-file option) instead of raising a message.
143 | spelling-store-unknown-words=no
144 | 
145 | 
146 | [MISCELLANEOUS]
147 | 
148 | # List of note tags to take in consideration, separated by a comma.
149 | notes=FIXME,
150 |       XXX,
151 |       TODO
152 | 
153 | 
154 | [TYPECHECK]
155 | 
156 | # List of decorators that produce context managers, such as
157 | # contextlib.contextmanager. Add to this list to register other decorators that
158 | # produce valid context managers.
159 | contextmanager-decorators=contextlib.contextmanager
160 | 
161 | # List of members which are set dynamically and missed by pylint inference
162 | # system, and so shouldn't trigger E1101 when accessed. Python regular
163 | # expressions are accepted.
164 | generated-members=
165 | 
166 | # Tells whether missing members accessed in mixin class should be ignored. A
167 | # mixin class is detected if its name ends with "mixin" (case insensitive).
168 | ignore-mixin-members=yes
169 | 
170 | # Tells whether to warn about missing members when the owner of the attribute
171 | # is inferred to be None.
172 | ignore-none=yes
173 | 
174 | # This flag controls whether pylint should warn about no-member and similar
175 | # checks whenever an opaque object is returned when inferring. The inference
176 | # can return multiple potential results while evaluating a Python object, but
177 | # some branches might not be evaluated, which results in partial inference. In
178 | # that case, it might be useful to still emit no-member and other checks for
179 | # the rest of the inferred objects.
180 | ignore-on-opaque-inference=yes
181 | 
182 | # List of class names for which member attributes should not be checked (useful
183 | # for classes with dynamically set attributes). This supports the use of
184 | # qualified names.
185 | ignored-classes=optparse.Values,thread._local,_thread._local
186 | 
187 | # List of module names for which member attributes should not be checked
188 | # (useful for modules/projects where namespaces are manipulated during runtime
189 | # and thus existing member attributes cannot be deduced by static analysis). It
190 | # supports qualified module names, as well as Unix pattern matching.
191 | ignored-modules=alembic.context
192 | 
193 | # Show a hint with possible names when a member name was not found. The aspect
194 | # of finding the hint is based on edit distance.
195 | missing-member-hint=yes
196 | 
197 | # The minimum edit distance a name should have in order to be considered a
198 | # similar match for a missing member name.
199 | missing-member-hint-distance=1
200 | 
201 | # The total number of similar names that should be taken in consideration when
202 | # showing a hint for a missing member.
203 | missing-member-max-choices=1
204 | 
205 | # List of decorators that change the signature of a decorated function.
206 | signature-mutators=
207 | 
208 | 
209 | [VARIABLES]
210 | 
211 | # List of additional names supposed to be defined in builtins. Remember that
212 | # you should avoid defining new builtins when possible.
213 | additional-builtins=
214 | 
215 | # Tells whether unused global variables should be treated as a violation.
216 | allow-global-unused-variables=yes
217 | 
218 | # List of strings which can identify a callback function by name. A callback
219 | # name must start or end with one of those strings.
220 | callbacks=cb_,
221 |           _cb
222 | 
223 | # A regular expression matching the name of dummy variables (i.e. expected to
224 | # not be used).
225 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
226 | 
227 | # Argument names that match this expression will be ignored. Default to name
228 | # with leading underscore.
229 | ignored-argument-names=_.*|^ignored_|^unused_
230 | 
231 | # Tells whether we should check for unused import in __init__ files.
232 | init-import=no
233 | 
234 | # List of qualified module names which can have objects that can redefine
235 | # builtins.
236 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
237 | 
238 | 
239 | [FORMAT]
240 | 
241 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
242 | expected-line-ending-format=LF
243 | 
244 | # Regexp for a line that is allowed to be longer than the limit.
245 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
246 | 
247 | # Number of spaces of indent required inside a hanging or continued line.
248 | indent-after-paren=4
249 | 
250 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
251 | # tab).
252 | indent-string='    '
253 | 
254 | # Maximum number of characters on a single line.
255 | max-line-length=120
256 | 
257 | # Maximum number of lines in a module.
258 | max-module-lines=1000
259 | 
260 | # List of optional constructs for which whitespace checking is disabled. `dict-
261 | # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
262 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
263 | # `empty-line` allows space-only lines.
264 | #no-space-check=
265 | 
266 | # Allow the body of a class to be on the same line as the declaration if body
267 | # contains single statement.
268 | single-line-class-stmt=no
269 | 
270 | # Allow the body of an if to be on the same line as the test if there is no
271 | # else.
272 | single-line-if-stmt=no
273 | 
274 | 
275 | [SIMILARITIES]
276 | 
277 | # Ignore comments when computing similarities.
278 | ignore-comments=yes
279 | 
280 | # Ignore docstrings when computing similarities.
281 | ignore-docstrings=yes
282 | 
283 | # Ignore imports when computing similarities.
284 | ignore-imports=yes
285 | 
286 | # Minimum lines number of a similarity.
287 | min-similarity-lines=4
288 | 
289 | 
290 | [BASIC]
291 | 
292 | # Naming style matching correct argument names.
293 | argument-naming-style=snake_case
294 | 
295 | # Regular expression matching correct argument names. Overrides argument-
296 | # naming-style.
297 | #argument-rgx=
298 | 
299 | # Naming style matching correct attribute names.
300 | # attr-naming-style=snake_case
301 | 
302 | # Regular expression matching correct attribute names. Overrides attr-naming-
303 | # style.
304 | attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*)|(id))$
305 | 
306 | # Bad variable names which should always be refused, separated by a comma.
307 | bad-names=foo,
308 |           bar,
309 |           baz,
310 |           toto,
311 |           tutu,
312 |           tata
313 | 
314 | # Naming style matching correct class attribute names.
315 | # class-attribute-naming-style=snake_case
316 | 
317 | # Regular expression matching correct class attribute names. Overrides class-
318 | # attribute-naming-style.
319 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__)|(id))$
320 | 
321 | # Naming style matching correct class names.
322 | class-naming-style=PascalCase
323 | 
324 | # Regular expression matching correct class names. Overrides class-naming-
325 | # style.
326 | #class-rgx=
327 | 
328 | # Naming style matching correct constant names.
329 | const-naming-style=UPPER_CASE
330 | 
331 | # Regular expression matching correct constant names. Overrides const-naming-
332 | # style.
333 | #const-rgx=
334 | 
335 | # Minimum line length for functions/classes that require docstrings, shorter
336 | # ones are exempt.
337 | docstring-min-length=-1
338 | 
339 | # Naming style matching correct function names.
340 | function-naming-style=snake_case
341 | 
342 | # Regular expression matching correct function names. Overrides function-
343 | # naming-style.
344 | #function-rgx=
345 | 
346 | # Good variable names which should always be accepted, separated by a comma.
347 | good-names=i,
348 |            j,
349 |            k,
350 |            ex,
351 |            Run,
352 |            _
353 | 
354 | # Include a hint for the correct naming format with invalid-name.
355 | include-naming-hint=no
356 | 
357 | # Naming style matching correct inline iteration names.
358 | inlinevar-naming-style=snake_case
359 | 
360 | # Regular expression matching correct inline iteration names. Overrides
361 | # inlinevar-naming-style.
362 | #inlinevar-rgx=
363 | 
364 | # Naming style matching correct method names.
365 | method-naming-style=snake_case
366 | 
367 | # Regular expression matching correct method names. Overrides method-naming-
368 | # style.
369 | #method-rgx=
370 | 
371 | # Naming style matching correct module names.
372 | module-naming-style=snake_case
373 | 
374 | # Regular expression matching correct module names. Overrides module-naming-
375 | # style.
376 | #module-rgx=
377 | 
378 | # Colon-delimited sets of names that determine each other's naming style when
379 | # the name regexes allow several styles.
380 | name-group=
381 | 
382 | # Regular expression which should only match function or class names that do
383 | # not require a docstring.
384 | no-docstring-rgx=^_
385 | 
386 | # List of decorators that produce properties, such as abc.abstractproperty. Add
387 | # to this list to register other decorators that produce valid properties.
388 | # These decorators are taken in consideration only for invalid-name.
389 | property-classes=abc.abstractproperty
390 | 
391 | # Naming style matching correct variable names.
392 | variable-naming-style=snake_case
393 | 
394 | # Regular expression matching correct variable names. Overrides variable-
395 | # naming-style.
396 | #variable-rgx=
397 | 
398 | 
399 | [STRING]
400 | 
401 | # This flag controls whether the implicit-str-concat-in-sequence should
402 | # generate a warning on implicit string concatenation in sequences defined over
403 | # several lines.
404 | check-str-concat-over-line-jumps=yes
405 | 
406 | 
407 | [IMPORTS]
408 | 
409 | # List of modules that can be imported at any level, not just the top level
410 | # one.
411 | allow-any-import-level=tensorflow, keras
412 | 
413 | # Allow wildcard imports from modules that define __all__.
414 | allow-wildcard-with-all=no
415 | 
416 | # Analyse import fallback blocks. This can be used to support both Python 2 and
417 | # 3 compatible code, which means that the block might have code that exists
418 | # only in one or another interpreter, leading to false positives when analysed.
419 | analyse-fallback-blocks=no
420 | 
421 | # Deprecated modules which should not be used, separated by a comma.
422 | deprecated-modules=optparse,tkinter.tix
423 | 
424 | # Create a graph of external dependencies in the given file (report RP0402 must
425 | # not be disabled).
426 | ext-import-graph=
427 | 
428 | # Create a graph of every (i.e. internal and external) dependencies in the
429 | # given file (report RP0402 must not be disabled).
430 | import-graph=
431 | 
432 | # Create a graph of internal dependencies in the given file (report RP0402 must
433 | # not be disabled).
434 | int-import-graph=
435 | 
436 | # Force import order to recognize a module as part of the standard
437 | # compatibility libraries.
438 | known-standard-library=
439 | 
440 | # Force import order to recognize a module as part of a third party library.
441 | known-third-party=enchant
442 | 
443 | # Couples of modules and preferred modules, separated by a comma.
444 | preferred-modules=
445 | 
446 | 
447 | [CLASSES]
448 | 
449 | # List of method names used to declare (i.e. assign) instance attributes.
450 | defining-attr-methods=__init__,
451 |                       __new__,
452 |                       setUp,
453 |                       __post_init__
454 | 
455 | # List of member names, which should be excluded from the protected access
456 | # warning.
457 | exclude-protected=_asdict,
458 |                   _fields,
459 |                   _replace,
460 |                   _source,
461 |                   _make
462 | 
463 | # List of valid names for the first argument in a class method.
464 | valid-classmethod-first-arg=cls
465 | 
466 | # List of valid names for the first argument in a metaclass class method.
467 | valid-metaclass-classmethod-first-arg=cls
468 | 
469 | 
470 | [DESIGN]
471 | 
472 | # Maximum number of arguments for function / method.
473 | max-args=5
474 | 
475 | # Maximum number of attributes for a class (see R0902).
476 | max-attributes=7
477 | 
478 | # Maximum number of boolean expressions in an if statement (see R0916).
479 | max-bool-expr=5
480 | 
481 | # Maximum number of branch for function / method body.
482 | max-branches=12
483 | 
484 | # Maximum number of locals for function / method body.
485 | max-locals=15
486 | 
487 | # Maximum number of parents for a class (see R0901).
488 | max-parents=7
489 | 
490 | # Maximum number of public methods for a class (see R0904).
491 | max-public-methods=20
492 | 
493 | # Maximum number of return / yield for function / method body.
494 | max-returns=6
495 | 
496 | # Maximum number of statements in function / method body.
497 | max-statements=50
498 | 
499 | # Minimum number of public methods for a class (see R0903).
500 | min-public-methods=1
501 | 
502 | 
503 | [EXCEPTIONS]
504 | 
505 | # Exceptions that will emit a warning when being caught. Defaults to
506 | # "BaseException, Exception".
507 | overgeneral-exceptions=builtins.BaseException,
508 |                        builtins.Exception
509 | 


--------------------------------------------------------------------------------