├── README.md
├── SignToSignLanguage.py
├── geckodriver.log
├── useless_words.py
└── vidlist.txt


/README.md:
--------------------------------------------------------------------------------
 1 | # TextToSignLanguage
 2 | A translator from English text to Sign Language video.
 3 | 
 4 | A simple module to translate text to videos of people signing the literal translation of each word summed up in to one big video.
 5 | 
 6 | USAGE:
 7 | After cloning the repository you should update all path variables to suit your machine.
 8 | Next , run the application. You should input a text and guide the program - every time the program encounters a word it does not currently have in the database it will request to download it from the internet, so you will need an internet connection. The download may prompt user approval so save the file to your downloads and all should work well.
 9 | 
10 | As you will see videos are put together by concatenating videos showing each sign. You may alter these and create videos of yourself.
11 | 
12 | Currently supports PSE only.
13 | 
14 | Have fun!
15 | 


--------------------------------------------------------------------------------
/SignToSignLanguage.py:
--------------------------------------------------------------------------------
  1 | from nltk import word_tokenize
  2 | import useless_words
  3 | from nltk.stem import PorterStemmer
  4 | import time
  5 | from shutil import copyfile
  6 | from difflib import SequenceMatcher
  7 | from selenium import webdriver
  8 | 
  9 | # CONSTANTS
 10 | SIGN_PATH = "C:\\Users\\Shpoozipoo\\Desktop\\Signs"
 11 | DOWNLOAD_WAIT = 7
 12 | SIMILIARITY_RATIO = 0.9
 13 | # Get words
 14 | def download_word_sign(word):
 15 |     browser = webdriver.Firefox()
 16 |     browser.get("http://www.aslpro.com/cgi-bin/aslpro/aslpro.cgi")
 17 |     first_letter = word[0]
 18 |     letters = browser.find_elements_by_xpath('//a[@class="sideNavBarUnselectedText"]')
 19 |     for letter in letters:
 20 |         if first_letter == str(letter.text).strip().lower():
 21 |             letter.click()
 22 |             time.sleep(2)
 23 |             break
 24 | 
 25 |     # Show drop down menu ( Spinner )
 26 |     spinner = browser.find_elements_by_xpath("//option")
 27 |     best_score = -1.
 28 |     closest_word_item = None
 29 |     for item in spinner:
 30 |         item_text = item.text
 31 |         # if stem == str(item_text).lower()[:len(stem)]:
 32 |         s = similar(word, str(item_text).lower())
 33 |         if s > best_score:
 34 |             best_score = s
 35 |             closest_word_item = item
 36 |             print(word, " ", str(item_text).lower())
 37 |             print("Score: " + str(s))
 38 |     if best_score < SIMILIARITY_RATIO:
 39 |         print(word + " not found in dictionary")
 40 |         return
 41 |     real_name = str(closest_word_item.text).lower()
 42 | 
 43 |     print("Downloading " + real_name + "...")
 44 |     closest_word_item.click()
 45 |     time.sleep(DOWNLOAD_WAIT)
 46 |     in_path = "C:\\Users\\Shpoozipoo\\Downloads\\" + real_name + ".swf"
 47 |     out_path = SIGN_PATH + "\\" + real_name + ".mp4"
 48 |     convert_file_format(in_path, out_path)
 49 |     browser.close()
 50 |     return real_name
 51 | 
 52 | def convert_file_format(in_path, out_path):
 53 |     # Converts .swf filw to .mp4 file and saves new file at out_path
 54 |     from ffmpy import FFmpeg
 55 | 
 56 |     ff = FFmpeg(
 57 |     inputs = {in_path: None},
 58 |     outputs = {out_path: None})
 59 |     ff.run()
 60 | 
 61 | def get_words_in_database():
 62 |     import os
 63 |     vids = os.listdir(SIGN_PATH)
 64 |     vid_names = [v[:-4] for v in vids]
 65 |     return vid_names
 66 | 
 67 | def process_text(text):
 68 |     # Split sentence into words
 69 |     words = word_tokenize(text)
 70 |     # Remove all meaningless words
 71 |     usefull_words = [str(w).lower() for w in words if w.lower() not in set(useless_words.words())]
 72 | 
 73 |     # TODO: Add stemming to words and change search accordingly. Ex: 'talking' will yield 'talk'.
 74 |     # from nltk.stem import PorterStemmer
 75 |     # ps = PorterStemmer()
 76 |     # usefull_stems = [ps.stem(word) for word in usefull_words]
 77 |     # print("Stems: " + str(usefull_stems))
 78 | 
 79 |     # TODO: Create Sytnax such that the words will be in ASL order as opposed to PSE.
 80 | 
 81 |     return usefull_words
 82 | 
 83 | 
 84 | def merge_signs(words):
 85 |     # Write a text file containing all the paths to each video
 86 |     with open("vidlist.txt", 'w') as f:
 87 |         for w in words:
 88 |             f.write("file '" + SIGN_PATH + "\\" + w + ".mp4'\n")
 89 |     command = "ffmpeg -f concat -safe 0 -i vidlist.txt -c copy output.mp4 -y"
 90 |     import shlex
 91 |     # Splits the command into pieces in order to feed the command line
 92 |     args = shlex.split(command)
 93 |     import subprocess
 94 |     process = subprocess.Popen(args)
 95 |     process.wait() # Block code until process is complete
 96 |     copyfile("output.mp4",SIGN_PATH + "\\Output\\out.mp4") # copyfile(src, dst)
 97 |     # remove the temporary file (it used to ask me if it should override previous file).
 98 |     import os
 99 |     os.remove("output.mp4")
100 | 
101 | def in_database(w):
102 |     db_list = get_words_in_database()
103 |     from nltk.stem import PorterStemmer
104 |     ps = PorterStemmer()
105 |     s = ps.stem(w)
106 |     for word in db_list:
107 |         if s == word[:len(s)]:
108 |             return True
109 |     return False
110 | 
111 | 
112 | def similar(a, b):
113 |     # Returns a decimal representing the similiarity between the two strings.
114 |     return SequenceMatcher(None, a, b).ratio()
115 | 
116 | def find_in_db(w):
117 |     best_score = -1.
118 |     best_vid_name = None
119 |     for v in get_words_in_database():
120 |         s = similar(w, v)
121 |         if best_score < s:
122 |             best_score =  s
123 |             best_vid_name = v
124 |     if best_score > SIMILIARITY_RATIO:
125 |         return best_vid_name
126 | # Get text
127 | # text = str(input("Enter the text you would like to translate to pse \n"))
128 | text = "How would you to approach the problem of translate Sign Language to English?"
129 | print("Text: " + text)
130 | # Process text
131 | words = process_text(text)
132 | # Download words that have not been downloaded in previous sessions.
133 | real_words = []
134 | for w in words:
135 |     real_name = find_in_db(w)
136 |     if real_name:
137 |         print(w + " is already in db as " + real_name)
138 |         real_words.append(real_name)
139 |     else:
140 |         real_words.append(download_word_sign(w))
141 | words = real_words
142 | # Concatenate videos and save output video to folder
143 | merge_signs(words)
144 | 
145 | # Play the video
146 | from os import startfile
147 | startfile(SIGN_PATH + "\\Output\\out.mp4")


--------------------------------------------------------------------------------
/useless_words.py:
--------------------------------------------------------------------------------
 1 | def words():
 2 |     words = set()
 3 | 
 4 |     # Words
 5 |     words.add("is")
 6 |     words.add("the")
 7 |     words.add("are")
 8 |     words.add("am")
 9 |     words.add("a")
10 |     words.add("it")
11 |     words.add("was")
12 |     words.add("were")
13 |     words.add("an")
14 | 
15 |     # Punctuation
16 |     words.add(",")
17 |     words.add(".")
18 |     words.add("?")
19 | 
20 |     return words


--------------------------------------------------------------------------------
/vidlist.txt:
--------------------------------------------------------------------------------
1 | file 'C:\Users\Shpoozipoo\Desktop\Signs\how.mp4'
2 | file 'C:\Users\Shpoozipoo\Desktop\Signs\you.mp4'
3 | file 'C:\Users\Shpoozipoo\Desktop\Signs\to.mp4'
4 | 


--------------------------------------------------------------------------------