├── .gitignore ├── sign_language_recognition ├── __init__.py ├── kaggle_asl_signs │ ├── README.md │ ├── kaggle_test.py │ └── __init__.py └── bin.py ├── .pre-commit-config.yaml ├── .github └── workflows │ ├── lint.yaml │ └── test.yaml ├── README.md ├── LICENSE └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ -------------------------------------------------------------------------------- /sign_language_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sign_language_recognition/kaggle_asl_signs/README.md: -------------------------------------------------------------------------------- 1 | # Kaggle ASL Signs 2 | 3 | This directory includes the winning entry for the 4 | [ASL Signs](https://www.kaggle.com/competitions/asl-signs) Kaggle competition. 5 | 6 | The files are hosted on HuggingFace under 7 | [sign/kaggle-asl-signs-1st-place](https://huggingface.co/sign/kaggle-asl-signs-1st-place). -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | files: ^sign_language_recognition/ 4 | hooks: 5 | - id: pylint 6 | name: pylint 7 | entry: pylint 8 | language: system 9 | types: [python] 10 | - id: pytest 11 | name: pytest 12 | entry: pytest sign_language_recognition 13 | language: system 14 | types: [python] 15 | 16 | -------------------------------------------------------------------------------- /sign_language_recognition/kaggle_asl_signs/kaggle_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from sign_language_recognition.kaggle_asl_signs import prob_to_label 4 | 5 | 6 | class KaggleASLSignsCase(unittest.TestCase): 7 | def test_label_mapping(self): 8 | label = prob_to_label([0.1, 0.2, 0.7]) 9 | self.assertEqual(label, "airplane") 10 | 11 | 12 | if __name__ == '__main__': 13 | unittest.main() 14 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | 4 | on: 5 | push: 6 | branches: [ master, main ] 7 | pull_request: 8 | branches: [ master, main ] 9 | 10 | 11 | jobs: 12 | test: 13 | name: Lint 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.10' 21 | 22 | - name: Install Requirements 23 | run: pip install .[dev] 24 | 25 | - name: Lint Code 26 | run: pylint sign_language_recognition 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | 4 | on: 5 | push: 6 | branches: [ master, main ] 7 | pull_request: 8 | branches: [ master, main ] 9 | 10 | 11 | jobs: 12 | test: 13 | name: Test 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.10' 21 | 22 | - name: Install Requirements 23 | run: pip install .[dev] 24 | 25 | - name: Test Code 26 | run: pytest sign_language_recognition 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sign Language Recognition Models 2 | 3 | Sign language recognition labels lexical signs from an isolated sign video. 4 | 5 | ## Usage 6 | 7 | ```bash 8 | pip install git+https://github.com/sign-language-processing/recognition 9 | ``` 10 | 11 | ### [Kaggle ASL Signs](sign_language_recognition/kaggle_asl_signs) 12 | 13 | The winning entry for the ASL Signs Kaggle competition. 14 | 15 | ```py 16 | from sign_language_recognition.kaggle_asl_signs import predict 17 | 18 | pose = ... # Load pose from a file 19 | class_probabilities = predict(pose) 20 | gloss = predict(pose, label=True) 21 | ``` 22 | 23 | Or in CLI, given a `.pose` file, and an ELAN file with a `SIGN` tier: 24 | 25 | ```bash 26 | sign_language_recognition --model="kaggle_asl_signs" --pose="sign.pose" --elan="sign.eaf" 27 | ``` 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sign Language Processing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sign-language-recognition" 3 | description = "Sign Language Recognition Models For General Use" 4 | version = "0.0.1" 5 | authors = [ 6 | { name = "Amit Moryossef", email = "amitmoryossef@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | dependencies = [ 10 | "pose-format>=0.3.2", 11 | "numpy", 12 | "tflite-runtime", 13 | "pympi-ling", # Working with ELAN files in CLI 14 | "huggingface-hub", # for inference using a huggingface hosted model 15 | ] 16 | 17 | [project.optional-dependencies] 18 | dev = [ 19 | "pytest", 20 | "pylint", 21 | ] 22 | 23 | [tool.yapf] 24 | based_on_style = "google" 25 | column_limit = 120 26 | 27 | [tool.pylint] 28 | max-line-length = 120 29 | disable = [ 30 | "C0114", # Missing module docstring 31 | "C0115", # Missing class docstring 32 | "C0116", # Missing function or method docstring 33 | "C0415", # Import outside toplevel 34 | ] 35 | good-names = ["i", "f", "x", "y"] 36 | 37 | [tool.pylint.typecheck] 38 | generated-members = ["torch.*", "numpy.*", "cv2.*"] 39 | 40 | [tool.setuptools] 41 | packages = [ 42 | "sign_language_recognition", 43 | "sign_language_recognition.kaggle_asl_signs" 44 | ] 45 | 46 | [tool.setuptools.package-data] 47 | sign_language_recognition = ["**/*.json"] 48 | 49 | [tool.pytest.ini_options] 50 | addopts = "-v" 51 | testpaths = ["sign_language_recognition"] 52 | 53 | [project.scripts] 54 | sign_language_recognition = "sign_language_recognition.bin:main" -------------------------------------------------------------------------------- /sign_language_recognition/bin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import importlib 5 | import math 6 | 7 | from pose_format.pose import Pose 8 | from tqdm import tqdm 9 | import pympi 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--model', required=True, type=str, 15 | choices=['kaggle_asl_signs'], 16 | help='model to use') 17 | parser.add_argument('--pose', required=True, type=str, help='path to input pose file') 18 | parser.add_argument('--elan', required=True, type=str, help='path to elan file') 19 | 20 | return parser.parse_args() 21 | 22 | 23 | def main(): 24 | args = get_args() 25 | 26 | module = importlib.import_module(f"sign_language_recognition.{args.model}") 27 | 28 | print('Loading input pose...') 29 | with open(args.pose, 'rb') as pose_file: 30 | pose = Pose.read(pose_file.read()) 31 | 32 | print('Loading ELAN file...') 33 | eaf = pympi.Elan.Eaf(file_path=args.elan, author="sign-langauge-processing/recognition") 34 | sign_annotations = eaf.get_annotation_data_for_tier('SIGN') 35 | 36 | print('Predicting signs...') 37 | for segment in tqdm(sign_annotations): 38 | start_frame = int((segment[0] / 1000) * pose.body.fps) 39 | end_frame = math.ceil((segment[1] / 1000) * pose.body.fps) 40 | 41 | cropped_pose = Pose( 42 | header=pose.header, 43 | body=pose.body[start_frame:end_frame] 44 | ) 45 | gloss = module.predict(cropped_pose, label=True) 46 | eaf.remove_annotation('SIGN', segment[0]) 47 | eaf.add_annotation('SIGN', segment[0], segment[1], gloss) 48 | 49 | print('Saving ELAN file...') 50 | eaf.to_file(args.elan) 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | # python -m sign_language_recognition.bin --model="kaggle_asl_signs" --pose="sign.pose" --elan="sign.eaf" 56 | -------------------------------------------------------------------------------- /sign_language_recognition/kaggle_asl_signs/__init__.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | import json 4 | import numpy as np 5 | from pose_format import Pose 6 | from huggingface_hub import hf_hub_download 7 | 8 | try: 9 | import tflite_runtime.interpreter as tflite 10 | except (ImportError, ModuleNotFoundError): 11 | import tensorflow as tf 12 | 13 | tflite = tf.lite 14 | 15 | HUGGINGFACE_REPO_ID = "sign/kaggle-asl-signs-1st-place" 16 | 17 | 18 | @lru_cache(maxsize=1) 19 | def get_paths(): 20 | model_path = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename="model.tflite") 21 | index_map_path = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename="sign_to_prediction_index_map.json") 22 | return { 23 | "model": model_path, 24 | "index_map": index_map_path 25 | } 26 | 27 | 28 | @lru_cache(maxsize=1) 29 | def get_model_runner(): 30 | paths = get_paths() 31 | model = tflite.Interpreter(paths["model"]) 32 | return model.get_signature_runner('serving_default') 33 | 34 | 35 | def prepare_pose(pose: Pose): 36 | # Reorder the pose components based on the Kaggle implementation 37 | pose = pose.get_components([ 38 | "FACE_LANDMARKS", 39 | "LEFT_HAND_LANDMARKS", 40 | "POSE_LANDMARKS", 41 | "RIGHT_HAND_LANDMARKS" 42 | ]) 43 | 44 | # scale the point values 45 | pose.body.data = np.float32(pose.body.data / np.array( 46 | [pose.header.dimensions.width, pose.header.dimensions.height, 1])) 47 | 48 | # normalize the pose 49 | info = pose.header.normalization_info(p1=("POSE_LANDMARKS", "RIGHT_SHOULDER"), 50 | p2=("POSE_LANDMARKS", "LEFT_SHOULDER")) 51 | pose = pose.normalize(info) 52 | 53 | return pose 54 | 55 | 56 | @lru_cache(maxsize=1) 57 | def get_labels(): 58 | paths = get_paths() 59 | with open(paths["index_map"], 'r', encoding="utf-8") as f: 60 | index_map = json.load(f) 61 | 62 | # Invert the index map 63 | return {v: k for k, v in index_map.items()} 64 | 65 | 66 | def prob_to_label(prob): 67 | labels = get_labels() 68 | return labels[np.argmax(prob)] 69 | 70 | 71 | def predict(pose: Pose, label=False): 72 | prepared_pose = prepare_pose(pose) 73 | tensor = prepared_pose.body.data.filled(0) 74 | prediction_fn = get_model_runner() 75 | 76 | output = prediction_fn(inputs=tensor) 77 | class_prob = output['outputs'].reshape(-1) 78 | 79 | if label: 80 | return prob_to_label(class_prob) 81 | 82 | return class_prob 83 | --------------------------------------------------------------------------------