├── .gitignore
├── sign_language_recognition
    ├── __init__.py
    ├── kaggle_asl_signs
    │   ├── README.md
    │   ├── kaggle_test.py
    │   └── __init__.py
    └── bin.py
├── .pre-commit-config.yaml
├── .github
    └── workflows
    │   ├── lint.yaml
    │   └── test.yaml
├── README.md
├── LICENSE
└── pyproject.toml


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/


--------------------------------------------------------------------------------
/sign_language_recognition/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sign_language_recognition/kaggle_asl_signs/README.md:
--------------------------------------------------------------------------------
1 | # Kaggle ASL Signs
2 | 
3 | This directory includes the winning entry for the
4 | [ASL Signs](https://www.kaggle.com/competitions/asl-signs) Kaggle competition.
5 | 
6 | The files are hosted on HuggingFace under
7 | [sign/kaggle-asl-signs-1st-place](https://huggingface.co/sign/kaggle-asl-signs-1st-place).


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     files: ^sign_language_recognition/
 4 |     hooks:
 5 |       - id: pylint
 6 |         name: pylint
 7 |         entry: pylint
 8 |         language: system
 9 |         types: [python]
10 |       - id: pytest
11 |         name: pytest
12 |         entry: pytest sign_language_recognition
13 |         language: system
14 |         types: [python]
15 | 
16 | 


--------------------------------------------------------------------------------
/sign_language_recognition/kaggle_asl_signs/kaggle_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from sign_language_recognition.kaggle_asl_signs import prob_to_label
 4 | 
 5 | 
 6 | class KaggleASLSignsCase(unittest.TestCase):
 7 |     def test_label_mapping(self):
 8 |         label = prob_to_label([0.1, 0.2, 0.7])
 9 |         self.assertEqual(label, "airplane")
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     unittest.main()
14 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [ master, main ]
 7 |   pull_request:
 8 |     branches: [ master, main ]
 9 | 
10 | 
11 | jobs:
12 |   test:
13 |     name: Lint
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - uses: actions/setup-python@v4
19 |         with:
20 |           python-version: '3.10'
21 | 
22 |       - name: Install Requirements
23 |         run: pip install .[dev]
24 | 
25 |       - name: Lint Code
26 |         run: pylint sign_language_recognition
27 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [ master, main ]
 7 |   pull_request:
 8 |     branches: [ master, main ]
 9 | 
10 | 
11 | jobs:
12 |   test:
13 |     name: Test
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - uses: actions/setup-python@v4
19 |         with:
20 |           python-version: '3.10'
21 | 
22 |       - name: Install Requirements
23 |         run: pip install .[dev]
24 | 
25 |       - name: Test Code
26 |         run: pytest sign_language_recognition
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sign Language Recognition Models
 2 | 
 3 | Sign language recognition labels lexical signs from an isolated sign video.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```bash
 8 | pip install git+https://github.com/sign-language-processing/recognition
 9 | ```
10 | 
11 | ### [Kaggle ASL Signs](sign_language_recognition/kaggle_asl_signs)
12 | 
13 | The winning entry for the ASL Signs Kaggle competition.
14 | 
15 | ```py
16 | from sign_language_recognition.kaggle_asl_signs import predict
17 | 
18 | pose = ... # Load pose from a file
19 | class_probabilities = predict(pose)
20 | gloss = predict(pose, label=True)
21 | ```
22 | 
23 | Or in CLI, given a `.pose` file, and an ELAN file with a `SIGN` tier:
24 | 
25 | ```bash
26 | sign_language_recognition --model="kaggle_asl_signs" --pose="sign.pose" --elan="sign.eaf"
27 | ```
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Sign Language Processing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "sign-language-recognition"
 3 | description = "Sign Language Recognition Models For General Use"
 4 | version = "0.0.1"
 5 | authors = [
 6 |     { name = "Amit Moryossef", email = "amitmoryossef@gmail.com" }
 7 | ]
 8 | readme = "README.md"
 9 | dependencies = [
10 |     "pose-format>=0.3.2",
11 |     "numpy",
12 |     "tflite-runtime",
13 |     "pympi-ling", # Working with ELAN files in CLI
14 |     "huggingface-hub", # for inference using a huggingface hosted model
15 | ]
16 | 
17 | [project.optional-dependencies]
18 | dev = [
19 |     "pytest",
20 |     "pylint",
21 | ]
22 | 
23 | [tool.yapf]
24 | based_on_style = "google"
25 | column_limit = 120
26 | 
27 | [tool.pylint]
28 | max-line-length = 120
29 | disable = [
30 |     "C0114", # Missing module docstring
31 |     "C0115", # Missing class docstring
32 |     "C0116", # Missing function or method docstring
33 |     "C0415", # Import outside toplevel
34 | ]
35 | good-names = ["i", "f", "x", "y"]
36 | 
37 | [tool.pylint.typecheck]
38 | generated-members = ["torch.*", "numpy.*", "cv2.*"]
39 | 
40 | [tool.setuptools]
41 | packages = [
42 |     "sign_language_recognition",
43 |     "sign_language_recognition.kaggle_asl_signs"
44 | ]
45 | 
46 | [tool.setuptools.package-data]
47 | sign_language_recognition = ["**/*.json"]
48 | 
49 | [tool.pytest.ini_options]
50 | addopts = "-v"
51 | testpaths = ["sign_language_recognition"]
52 | 
53 | [project.scripts]
54 | sign_language_recognition = "sign_language_recognition.bin:main"


--------------------------------------------------------------------------------
/sign_language_recognition/bin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import importlib
 5 | import math
 6 | 
 7 | from pose_format.pose import Pose
 8 | from tqdm import tqdm
 9 | import pympi
10 | 
11 | 
12 | def get_args():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--model', required=True, type=str,
15 |                         choices=['kaggle_asl_signs'],
16 |                         help='model to use')
17 |     parser.add_argument('--pose', required=True, type=str, help='path to input pose file')
18 |     parser.add_argument('--elan', required=True, type=str, help='path to elan file')
19 | 
20 |     return parser.parse_args()
21 | 
22 | 
23 | def main():
24 |     args = get_args()
25 | 
26 |     module = importlib.import_module(f"sign_language_recognition.{args.model}")
27 | 
28 |     print('Loading input pose...')
29 |     with open(args.pose, 'rb') as pose_file:
30 |         pose = Pose.read(pose_file.read())
31 | 
32 |     print('Loading ELAN file...')
33 |     eaf = pympi.Elan.Eaf(file_path=args.elan, author="sign-langauge-processing/recognition")
34 |     sign_annotations = eaf.get_annotation_data_for_tier('SIGN')
35 | 
36 |     print('Predicting signs...')
37 |     for segment in tqdm(sign_annotations):
38 |         start_frame = int((segment[0] / 1000) * pose.body.fps)
39 |         end_frame = math.ceil((segment[1] / 1000) * pose.body.fps)
40 | 
41 |         cropped_pose = Pose(
42 |             header=pose.header,
43 |             body=pose.body[start_frame:end_frame]
44 |         )
45 |         gloss = module.predict(cropped_pose, label=True)
46 |         eaf.remove_annotation('SIGN', segment[0])
47 |         eaf.add_annotation('SIGN', segment[0], segment[1], gloss)
48 | 
49 |     print('Saving ELAN file...')
50 |     eaf.to_file(args.elan)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 |     # python -m sign_language_recognition.bin --model="kaggle_asl_signs" --pose="sign.pose" --elan="sign.eaf"
56 | 


--------------------------------------------------------------------------------
/sign_language_recognition/kaggle_asl_signs/__init__.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | 
 3 | import json
 4 | import numpy as np
 5 | from pose_format import Pose
 6 | from huggingface_hub import hf_hub_download
 7 | 
 8 | try:
 9 |     import tflite_runtime.interpreter as tflite
10 | except (ImportError, ModuleNotFoundError):
11 |     import tensorflow as tf
12 | 
13 |     tflite = tf.lite
14 | 
15 | HUGGINGFACE_REPO_ID = "sign/kaggle-asl-signs-1st-place"
16 | 
17 | 
18 | @lru_cache(maxsize=1)
19 | def get_paths():
20 |     model_path = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename="model.tflite")
21 |     index_map_path = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename="sign_to_prediction_index_map.json")
22 |     return {
23 |         "model": model_path,
24 |         "index_map": index_map_path
25 |     }
26 | 
27 | 
28 | @lru_cache(maxsize=1)
29 | def get_model_runner():
30 |     paths = get_paths()
31 |     model = tflite.Interpreter(paths["model"])
32 |     return model.get_signature_runner('serving_default')
33 | 
34 | 
35 | def prepare_pose(pose: Pose):
36 |     # Reorder the pose components based on the Kaggle implementation
37 |     pose = pose.get_components([
38 |         "FACE_LANDMARKS",
39 |         "LEFT_HAND_LANDMARKS",
40 |         "POSE_LANDMARKS",
41 |         "RIGHT_HAND_LANDMARKS"
42 |     ])
43 | 
44 |     # scale the point values
45 |     pose.body.data = np.float32(pose.body.data / np.array(
46 |         [pose.header.dimensions.width, pose.header.dimensions.height, 1]))
47 | 
48 |     # normalize the pose
49 |     info = pose.header.normalization_info(p1=("POSE_LANDMARKS", "RIGHT_SHOULDER"),
50 |                                           p2=("POSE_LANDMARKS", "LEFT_SHOULDER"))
51 |     pose = pose.normalize(info)
52 | 
53 |     return pose
54 | 
55 | 
56 | @lru_cache(maxsize=1)
57 | def get_labels():
58 |     paths = get_paths()
59 |     with open(paths["index_map"], 'r', encoding="utf-8") as f:
60 |         index_map = json.load(f)
61 | 
62 |     # Invert the index map
63 |     return {v: k for k, v in index_map.items()}
64 | 
65 | 
66 | def prob_to_label(prob):
67 |     labels = get_labels()
68 |     return labels[np.argmax(prob)]
69 | 
70 | 
71 | def predict(pose: Pose, label=False):
72 |     prepared_pose = prepare_pose(pose)
73 |     tensor = prepared_pose.body.data.filled(0)
74 |     prediction_fn = get_model_runner()
75 | 
76 |     output = prediction_fn(inputs=tensor)
77 |     class_prob = output['outputs'].reshape(-1)
78 | 
79 |     if label:
80 |         return prob_to_label(class_prob)
81 | 
82 |     return class_prob
83 | 


--------------------------------------------------------------------------------