├── .gitattributes ├── .gitignore ├── Assets ├── DataSet Examples.png ├── Demo Gif.gif ├── Demo Video.mov ├── Model_Accuracy.png ├── Ours Dataset Preview.jpg └── Samples_Kaggle_Image.png ├── Data ├── DataBase │ └── database.sqlite ├── Model │ ├── model.json │ └── weights.h5 ├── With_Data_Augmentation │ ├── Checkpoints │ │ └── best_weights.h5 │ ├── Model │ │ ├── model.json │ │ └── weights.h5 │ ├── acc.jpg │ └── val_acc.jpg ├── Without_Data_Augmentation │ ├── Checkpoints │ │ └── best_weights.h5 │ ├── Model │ │ ├── model.json │ │ └── weights.h5 │ ├── acc.jpg │ └── val_acc.jpg └── download_dataset.sh ├── LICENSE ├── README.md ├── database_process.py ├── get_dataset.py ├── get_model.py ├── live.py ├── predict.py ├── requirements.txt └── train.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.json filter=lfs diff=lfs merge=lfs -text 2 | *.h5 filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .DS_Store 3 | /Data/Train_Data 4 | /Data/npy_dataset 5 | /Data/Checkpoints 6 | -------------------------------------------------------------------------------- /Assets/DataSet Examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/DataSet Examples.png -------------------------------------------------------------------------------- /Assets/Demo Gif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/Demo Gif.gif -------------------------------------------------------------------------------- /Assets/Demo Video.mov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/Demo Video.mov -------------------------------------------------------------------------------- /Assets/Model_Accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/Model_Accuracy.png -------------------------------------------------------------------------------- /Assets/Ours Dataset Preview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/Ours Dataset Preview.jpg -------------------------------------------------------------------------------- /Assets/Samples_Kaggle_Image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Assets/Samples_Kaggle_Image.png -------------------------------------------------------------------------------- /Data/DataBase/database.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Data/DataBase/database.sqlite -------------------------------------------------------------------------------- /Data/Model/model.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:576ab13f46b00293c67f76b2de8d3f8573d0666209c1aee4eca1a50e16b14efa 3 | size 7096 4 | -------------------------------------------------------------------------------- /Data/Model/weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9ac513c2d09da761999fcd6b8d140b670d1c3c13534bc005e52f05ae3461885c 3 | size 18070760 4 | -------------------------------------------------------------------------------- /Data/With_Data_Augmentation/Checkpoints/best_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2ddcf8983288a4130fbcef83b5336eb8662950805c4844ffe2f92cb738974598 3 | size 18070760 4 | -------------------------------------------------------------------------------- /Data/With_Data_Augmentation/Model/model.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:576ab13f46b00293c67f76b2de8d3f8573d0666209c1aee4eca1a50e16b14efa 3 | size 7096 4 | -------------------------------------------------------------------------------- /Data/With_Data_Augmentation/Model/weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9ac513c2d09da761999fcd6b8d140b670d1c3c13534bc005e52f05ae3461885c 3 | size 18070760 4 | -------------------------------------------------------------------------------- /Data/With_Data_Augmentation/acc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Data/With_Data_Augmentation/acc.jpg -------------------------------------------------------------------------------- /Data/With_Data_Augmentation/val_acc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Data/With_Data_Augmentation/val_acc.jpg -------------------------------------------------------------------------------- /Data/Without_Data_Augmentation/Checkpoints/best_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1f35eba4ce6d306e073e40f1a6510131b39371b07930902cbe70e2d2709179e0 3 | size 18070760 4 | -------------------------------------------------------------------------------- /Data/Without_Data_Augmentation/Model/model.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:576ab13f46b00293c67f76b2de8d3f8573d0666209c1aee4eca1a50e16b14efa 3 | size 7096 4 | -------------------------------------------------------------------------------- /Data/Without_Data_Augmentation/Model/weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:043287a82ab08b9c31dd6ae46ddbce9ea6d5613c9b62bba9239aad4474c6383a 3 | size 18070760 4 | -------------------------------------------------------------------------------- /Data/Without_Data_Augmentation/acc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Data/Without_Data_Augmentation/acc.jpg -------------------------------------------------------------------------------- /Data/Without_Data_Augmentation/val_acc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ardamavi/Vocalize-Sign-Language/19b7fb2c2768ae0bb77656973ae9bd97446eb2a1/Data/Without_Data_Augmentation/val_acc.jpg -------------------------------------------------------------------------------- /Data/download_dataset.sh: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | # https://github.com/ardamavi/Sign-Language-Digits-Dataset 3 | 4 | wget https://github.com/ardamavi/Sign-Language-Digits-Dataset/archive/master.zip 5 | unzip master.zip 6 | mv Sign-Language-Digits-Dataset-master/Dataset Train_Data 7 | rm -rf master.zip Sign-Language-Digits-Dataset-master 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Arda Mavi 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vocalize Sign Language 2 | ### By Arda Mavi 3 | 4 | Vocalization sign language with deep learning. 5 | 6 | In this project we use our own `Sign Language Dataset`. 7 | 8 | Vocalization sign language iOS App: [Vocalization-Sign-Language-iOS](https://github.com/ardamavi/Vocalization-Sign-Language-iOS) 9 | 10 | |Demo| 11 | |:-:| 12 | || 13 | |[Watch Demo Videos 🔊 ](https://github.com/ardamavi/Vocalize-Sign-Language/blob/master/Assets)| 14 | 15 | # Contents: 16 | [For Users](#for-users) 17 | - [Important Notes For Users](#important-notes-for-users) 18 | - [Additional Info](#additional-info) 19 | - [Running Program](#running-program) 20 | 21 | [For Developers](#for-developers) 22 | - [Getting Dataset](#getting-dataset) 23 | - [Artificial Intelligence Model Accuracy](#artificial-intelligence-model-accuracy) 24 | - [Model Architecture](#model-architecture) 25 | - [Model Training](#model-training) 26 | - [Using Tensorboard](#using-tensorboard) 27 | - [Creating Dataset](#creating-dataset) 28 | 29 | ! [Important Notes](#important-notes) 30 | 31 | # For Users: 32 | 33 | ### Important Notes For Users: 34 | - This project works best in the white background and good light. 35 | 36 | ### Additional Info: 37 | In this project, I added deep learning to my old lip reading project [SesimVar](https://github.com/ardamavi/SesimVar)(Turkish). 38 | 39 | ## Running program: 40 | Note: If you are failed, look up `For Development` title in bellow. 41 | 42 | ### Using Live Vocalize Command: 43 | `python3 live.py` 44 | Note: If you want, you can change the delay time. 45 | 46 | ### Using Predict Command: 47 | `python3 predict.py ` 48 | 49 | # For Developers: 50 | 51 | ## Getting Dataset: 52 | `cd Data && chmod +x download_dataset.sh && ./download_dataset.sh` 53 | 54 | ### New dataset is available on Kaggle: [ 27 Class Sign Language Dataset - Kaggle](https://www.kaggle.com/ardamavi/27-class-sign-language-dataset) 55 | ### `(Updated) Model` coming soon! 56 | 57 | |Demo| 58 | |:-:| 59 | || 60 | |[Watch Demo Videos 🔊 ](https://github.com/ardamavi/Vocalize-Sign-Language/blob/master/Assets)| 61 | 62 | |DataSet Examples|Model Accuracy| 63 | |:-:|:-:| 64 | ||| 65 | 66 | 67 | #### Dataset: 68 | In this project we use our own [Sign Language Digits Dataset](https://github.com/ardamavi/Sign-Language-Digits-Dataset). 69 | 70 | #### Dataset Preview: 71 | 72 | 73 | ### Artificial Intelligence Model Accuracy: 74 | At the end of 25 epochs, 97% accuracy was achieved in the test without data augmentation: 75 | 76 | 77 | 78 |
79 | At the end of 25 epochs, 99% accuracy was achieved in the test with data augmentation: 80 | 81 | 82 | 83 | ### Model Architecture: 84 | - Input Data 85 | Shape: 64x64x1 86 | 87 | - Convolutional Layer 88 | 32 filter 89 | Filter shape: 3x3 90 | Strides: 1x1 91 | Padding: Same 92 | 93 | - Activation 94 | Function: ReLu 95 | 96 | - Convolutional Layer 97 | 64 filter 98 | Filter shape: 3x3 99 | Strides: 1x1 100 | Padding: Same 101 | 102 | - Activation 103 | Function: ReLu 104 | 105 | - Max Pooling 106 | Pool shape: 2x2 107 | Strides: 2x2 108 | 109 | - Convolutional Layer 110 | 64 filter 111 | Filter shape: 3x3 112 | Strides: 1x1 113 | Padding: Same 114 | 115 | - Activation 116 | Function: ReLu 117 | 118 | - Max Pooling 119 | Pool shape: 2x2 120 | Strides: 2x2 121 | 122 | - Convolutional Layer 123 | 128 filter 124 | Filter shape: 3x3 125 | Strides: 1x1 126 | Padding: Same 127 | 128 | - Activation 129 | Function: ReLu 130 | 131 | - Max Pooling 132 | Pool shape: 2x2 133 | Strides: 2x2 134 | 135 | - Flatten 136 | 137 | - Dense 138 | Size: 526 139 | 140 | - Activation 141 | Function: ReLu 142 | 143 | - Dropout 144 | Rate: 0.5 145 | 146 | - Dense 147 | Size: 128 148 | 149 | - Activation 150 | Function: ReLu 151 | 152 | - Dropout 153 | Rate: 0.5 154 | 155 | - Dense 156 | Size: Class size in dataset 157 | 158 | - Activation 159 | Function: Softmax 160 | 161 | ##### Optimizer: Adadelta 162 | ##### Loss: Categorical Crossentropy 163 | 164 | Total params: 4,507,864
165 | Trainable params: 4,507,864
166 | Non-trainable params: 0 167 | 168 | ### Model Training: 169 | `python3 train.py` 170 | 171 | Not forget to [download dataset](#getting-dataset) before training! 172 | 173 | ### Using TensorBoard: 174 | `tensorboard --logdir=Data/Checkpoints/logs` 175 | 176 | ### Creating Dataset: 177 | For getting dataset look up [Getting Dataset](#getting-dataset) title in this file. 178 | 179 | For your own dataset: 180 | - Create 'Data/Train_Data' folder. 181 | - Create folder in 'Data/Train_Data' folder and rename what you want to add char or string. 182 | - In your created char or string named folder add much photos about created char or string named folder. 183 | Note: We work on 64x64 image also if you use bigger, program will automatically return to 64x64. 184 | 185 | ### Important Notes: 186 | - Used Python Version: 3.6.0 187 | - Install necessary modules with `sudo pip3 install -r requirements.txt` command. 188 | - Install OpenCV (We use version: 3.2.0-dev) 189 | 190 | 191 | ## Cite as: 192 | Mavi, A., (2020), “A New Dataset and Proposed Convolutional Neural Network Architecture for Classification of American Sign Language Digits”, arXiv:2011.08927 [cs.CV] 193 | -------------------------------------------------------------------------------- /database_process.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import os 3 | import sqlite3 4 | 5 | def set_sql_connect(database_name): 6 | return sqlite3.connect(database_name) 7 | def set_sql_cursor(database_connect): 8 | return database_connect.cursor() 9 | 10 | def close_connect(vt): 11 | if vt: 12 | vt.commit() 13 | vt.close 14 | 15 | def set_connect_and_cursor(path='Data/DataBase/database.sqlite'): 16 | if not os.path.exists('Data/DataBase'): 17 | os.makedirs('Data/DataBase') 18 | vt = set_sql_connect(path) 19 | db = set_sql_cursor(vt) 20 | return vt, db 21 | 22 | def create_table(table_name, columns): 23 | vt, db = set_connect_and_cursor() 24 | db.execute("CREATE TABLE IF NOT EXISTS {0} ({1})".format(table_name, columns)) 25 | close_connect(vt) 26 | 27 | def get_data(sql_command): 28 | vt, db = set_connect_and_cursor() 29 | db.execute(sql_command) 30 | gelen_veri = db.fetchall() 31 | close_connect(vt) 32 | return gelen_veri 33 | 34 | def add_data(table, adding): 35 | vt, db = set_connect_and_cursor() 36 | db.execute("INSERT INTO '{0}' VALUES ({1})".format(table, adding)) 37 | close_connect(vt) 38 | -------------------------------------------------------------------------------- /get_dataset.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import os 3 | import numpy as np 4 | from os import listdir 5 | from scipy.misc import imread, imresize 6 | from keras.utils import to_categorical 7 | from database_process import create_table, add_data 8 | from sklearn.model_selection import train_test_split 9 | 10 | # Settings: 11 | img_size = 64 12 | channel_size = 1 13 | num_class = 10 14 | test_size = 0.2 15 | 16 | 17 | def get_img(data_path): 18 | # Getting image array from path: 19 | img = imread(data_path, flatten = True if channel_size == 1 else False) 20 | img = imresize(img, (img_size, img_size, channel_size)) 21 | return img 22 | 23 | def get_dataset(dataset_path='Data/Train_Data'): 24 | # Getting all data from data path: 25 | try: 26 | X = np.load('Data/npy_dataset/X.npy') 27 | Y = np.load('Data/npy_dataset/Y.npy') 28 | except: 29 | if os.path.exists('Data/DataBase/database.sqlite'): 30 | os.remove('Data/DataBase/database.sqlite') 31 | # Create database: 32 | create_table('id_char','id, char') 33 | labels = listdir(dataset_path) # Geting labels 34 | X = [] 35 | Y = [] 36 | count_categori = [-1,''] # For encode labels 37 | for i, label in enumerate(labels): 38 | datas_path = dataset_path+'/'+label 39 | for data in listdir(datas_path): 40 | img = get_img(datas_path+'/'+data) 41 | X.append(img) 42 | # For encode labels: 43 | if label != count_categori[1]: 44 | count_categori[0] += 1 45 | count_categori[1] = label 46 | add_data('id_char', "{0}, '{1}'".format(count_categori[0], count_categori[1])) 47 | Y.append(count_categori[0]) 48 | # Create dateset: 49 | X = 1-np.array(X).astype('float32')/255. 50 | X = X.reshape(X.shape[0], img_size, img_size, channel_size) 51 | Y = np.array(Y).astype('float32') 52 | Y = to_categorical(Y, len(labels)) 53 | if not os.path.exists('Data/npy_dataset/'): 54 | os.makedirs('Data/npy_dataset/') 55 | np.save('Data/npy_dataset/X.npy', X) 56 | np.save('Data/npy_dataset/Y.npy', Y) 57 | X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) 58 | return X, X_test, Y, Y_test 59 | 60 | if __name__ == '__main__': 61 | get_dataset() 62 | -------------------------------------------------------------------------------- /get_model.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import os 3 | from keras.models import Model 4 | from database_process import get_data 5 | from keras.layers import Input, Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout 6 | 7 | def save_model(model): 8 | if not os.path.exists('Data/Model/'): 9 | os.makedirs('Data/Model/') 10 | model_json = model.to_json() 11 | with open("Data/Model/model.json", "w") as model_file: 12 | model_file.write(model_json) 13 | # serialize weights to HDF5 14 | model.save_weights("Data/Model/weights.h5") 15 | print('Model and weights saved') 16 | return 17 | 18 | 19 | image_size = 64 20 | channel_size = 1 21 | 22 | def get_model(): 23 | num_class = len(get_data('SELECT id FROM "id_char"')) 24 | 25 | inputs = Input(shape=(image_size, image_size, channel_size)) 26 | 27 | conv_1 = Conv2D(32, (3,3), strides=(1,1), padding='same')(inputs) 28 | act_1 = Activation('relu')(conv_1) 29 | 30 | conv_2 = Conv2D(64, (3,3), strides=(1,1), padding='same')(act_1) 31 | act_2 = Activation('relu')(conv_2) 32 | 33 | pooling_1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(act_2) 34 | 35 | conv_3 = Conv2D(64, (3,3), strides=(1,1), padding='same')(pooling_1) 36 | act_3 = Activation('relu')(conv_3) 37 | 38 | pooling_2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(act_3) 39 | 40 | conv_4 = Conv2D(128, (3,3), strides=(1,1), padding='same')(pooling_2) 41 | act_4 = Activation('relu')(conv_4) 42 | 43 | pooling_3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(act_4) 44 | 45 | flat_1 = Flatten()(pooling_3) 46 | 47 | fc = Dense(526)(flat_1) 48 | fc = Activation('relu')(fc) 49 | fc = Dropout(0.5)(fc) 50 | 51 | fc = Dense(128)(fc) 52 | fc = Activation('relu')(fc) 53 | fc = Dropout(0.5)(fc) 54 | 55 | fc = Dense(num_class)(fc) 56 | outputs = Activation('softmax')(fc) 57 | 58 | model = Model(inputs=inputs, outputs=outputs) 59 | 60 | model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) 61 | 62 | print(model.summary()) 63 | 64 | return model 65 | 66 | if __name__ == '__main__': 67 | save_model(get_model()) 68 | -------------------------------------------------------------------------------- /live.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import os 3 | import cv2 4 | import platform 5 | import numpy as np 6 | from predict import predict 7 | from scipy.misc import imresize 8 | from multiprocessing import Process 9 | from keras.models import model_from_json 10 | 11 | img_size = 64 12 | channel_size = 1 13 | 14 | def main(): 15 | # Getting model: 16 | model_file = open('Data/Model/model.json', 'r') 17 | model = model_file.read() 18 | model_file.close() 19 | model = model_from_json(model) 20 | # Getting weights 21 | model.load_weights("Data/Model/weights.h5") 22 | 23 | print('Press "ESC" button for exit.') 24 | 25 | # Get image from camera, get predict and say it with another process, repeat. 26 | cap = cv2.VideoCapture(0) 27 | old_char = '' 28 | while 1: 29 | ret, img = cap.read() 30 | 31 | # Cropping image: 32 | img_height, img_width = img.shape[:2] 33 | side_width = int((img_width-img_height)/2) 34 | img = img[0:img_height, side_width:side_width+img_height] 35 | 36 | # Show window: 37 | cv2.imshow('VSL', cv2.flip(img,1)) # cv2.flip(img,1) : Flip(mirror effect) for easy handling. 38 | 39 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 40 | img = imresize(img, (img_size, img_size, channel_size)) 41 | img = 1-np.array(img).astype('float32')/255. 42 | img = img.reshape(1, img_size, img_size, channel_size) 43 | 44 | Y_string, Y_possibility = predict(model, img) 45 | 46 | if Y_possibility < 0.4: # For secondary vocalization 47 | old_char = '' 48 | 49 | if(platform.system() == 'Darwin') and old_char != Y_string and Y_possibility > 0.6: 50 | print(Y_string, Y_possibility) 51 | arg = 'say {0}'.format(Y_string) 52 | # Say predict with multiprocessing 53 | Process(target=os.system, args=(arg,)).start() 54 | old_char = Y_string 55 | if cv2.waitKey(200) == 27: # Decimal 27 = Esc 56 | break 57 | cap.release() 58 | cv2.destroyAllWindows() 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import sys 3 | import numpy as np 4 | from get_dataset import get_img 5 | from scipy.misc import imresize 6 | from database_process import get_data 7 | from keras.models import model_from_json 8 | 9 | image_size = 64 10 | channel_size = 1 11 | 12 | def predict(model, X): # Return: Y String , Y Possibility 13 | Y = model.predict(X) 14 | Y_index = np.argmax(Y, axis=1) 15 | Y_string = get_data('SELECT char FROM "id_char" WHERE id={0}'.format(Y_index[0])) 16 | return Y_string[0][0], Y[0][Y_index][0] 17 | 18 | if __name__ == '__main__': 19 | img_dir = sys.argv[1] 20 | img = 1-np.array(get_img(img_dir)).astype('float32')/255. 21 | img = img.reshape(1, image_size, image_size, channel_size) 22 | # Getting model: 23 | model_file = open('Data/Model/model.json', 'r') 24 | model = model_file.read() 25 | model_file.close() 26 | model = model_from_json(model) 27 | # Getting weights 28 | model.load_weights("Data/Model/weights.h5") 29 | Y_string, Y_possibility = predict(model, img) 30 | print('Class:', Y_string, '\nPossibility:', Y_possibility) 31 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit-learn 3 | scikit-image 4 | tensorflow 5 | keras 6 | h5py 7 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # Arda Mavi 2 | import os 3 | import numpy 4 | from get_dataset import get_dataset 5 | from get_model import get_model, save_model 6 | from keras.callbacks import ModelCheckpoint, TensorBoard 7 | 8 | epochs = 15 9 | batch_size = 6 10 | 11 | def train_model(model, X, X_test, Y, Y_test): 12 | checkpoints = [] 13 | if not os.path.exists('Data/Checkpoints/'): 14 | os.makedirs('Data/Checkpoints/') 15 | 16 | checkpoints.append(ModelCheckpoint('Data/Checkpoints/best_weights.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)) 17 | checkpoints.append(TensorBoard(log_dir='Data/Checkpoints/./logs', histogram_freq=1, write_graph=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)) 18 | 19 | ''' 20 | # Creates live data: 21 | # For better yield. The duration of the training is extended. 22 | 23 | from keras.preprocessing.image import ImageDataGenerator 24 | generated_data = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2) # For include left hand data add: 'horizontal_flip = True' 25 | generated_data.fit(X) 26 | 27 | model.fit_generator(generated_data.flow(X, Y, batch_size=batch_size), steps_per_epoch=X.shape[0]/batch_size, epochs=epochs, validation_data=(X_test, Y_test), callbacks=checkpoints) 28 | ''' 29 | 30 | model.fit(X, Y, batch_size=batch_size, epochs=epochs, validation_data=(X_test, Y_test), shuffle=True, callbacks=checkpoints) 31 | 32 | return model 33 | 34 | def main(): 35 | X, X_test, Y, Y_test = get_dataset() 36 | model = get_model() 37 | model = train_model(model, X, X_test, Y, Y_test) 38 | save_model(model) 39 | return model 40 | 41 | if __name__ == '__main__': 42 | main() 43 | --------------------------------------------------------------------------------