├── model
    ├── RNNs.py
    ├── predict.py
    ├── LSTM.py
    ├── LSTM_separate.py
    ├── predict_three_please.py
    ├── predict_onehot.py
    ├── 20_test_model.py
    ├── save_model.py
    ├── not_k_fold.py
    ├── training_onehot.py
    └── 20_train_model.py
├── util
    ├── make_pickle.py
    ├── build.py
    ├── analysis.py
    ├── test.py
    └── build_model.py
├── common
    └── load_data.py
├── README.md
└── modified_mediapipe
    ├── demo_run_graph_main.cc
    ├── tflite_tensors_to_landmarks_calculator.cc
    └── tflite_tensors_to_landmarks_calculator(relative_position_ver).cc


/model/RNNs.py:
--------------------------------------------------------------------------------
 1 | #Simple RNN, LSTM, GRU 비교  
 2 | def simple_rnn():
 3 |     model_RNN = Sequential()
 4 |     model_RNN.add(SimpleRNN(units=64, input_shape=(260, 42)))
 5 |     model_RNN.add(Dense(10, activation="relu")) #softmax, linear 어떤걸 기준으로 하지
 6 |     model_RNN.add(Dense(17))
 7 |     model_RNN.compile(loss='mse', optimizer='adam')
 8 |     
 9 |     return model_RNN
10 |     
11 | def rnn_lstm():
12 |     model_LSTM = Sequential()
13 |     model_LSTM.add(layers.LSTM(64,return_sequences=True,input_shape=(260,42))) #time_steps(network에 사용할 단위):260 픽스, features:42
14 |     model_LSTM.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
15 |     model_LSTM.add(layers.LSTM(32))  # return a single vector of dimension 32
16 |     model_LSTM.add(layers.Dense(9, activation='softmax')) #단어수:9->17  
17 |     model_LSTM.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
18 |     
19 |     return model_LSTM
20 | 
21 | def bidirectional_lstm():
22 |     Bidirectional_LSTM.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(260, 42)))
23 |     Bidirectional_LSTM.add(layers.Bidirectional(layers.LSTM(32)))
24 |     Bidirectional_LSTM.add(layers.Dense(17, activation='softmax'))
25 |     Bidirectional_LSTM.compile(loss='categorical_crossentropy', optimizer='rmsprop')
26 |     
27 |     return Bidirectional_LSTM
28 | 


--------------------------------------------------------------------------------
/model/predict.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | from keras.preprocessing import sequence
 3 | from keras.datasets import imdb
 4 | from keras import layers, models
 5 | from keras.models import Sequential
 6 | from keras import layers
 7 | import os
 8 | import sys
 9 | import pickle
10 | import numpy as np
11 | from tensorflow.keras.preprocessing.text import Tokenizer
12 | from tensorflow.keras.utils import to_categorical
13 | import random
14 | from keras import optimizers
15 | from keras.layers import SimpleRNN, Dense
16 | from keras.layers import Bidirectional
17 | import tensorflow as tf
18 | from numpy import argmax
19 | 
20 | def load_labels(dirname):
21 |     label = {}
22 |     count = 1
23 |     listfile=os.listdir(dirname)
24 |     for l in listfile:
25 |         if "_" in l:
26 |             continue
27 |         label[l] = count
28 |         count += 1
29 |     return label
30 | 
31 | x_test,y_test=load_data("/Users/anna/SLR/Seperate/testinput/")
32 | new_model = tf.keras.models.load_model('simpeRNN.h5')
33 | new_model.summary()
34 | 
35 | #모델평가
36 | #loss, acc = new_model.evaluate(x_test,y_test, verbose=2)
37 | #print('Restored model, accuracy: {:5.2f}%'.format(100*acc))
38 | 
39 | #print(new_model.predict(x_test).shape)
40 | 
41 | 
42 | #모델 사용
43 | 
44 | xhat = x_test
45 | yhat = new_model.predict(xhat)
46 | print('## yhat ##')
47 | #labels=load_label(dirname) 
48 | a=xhat.shape[0]
49 | for i in range(a):
50 |     print('True: '+str(argmax(y_test[i]))+', Predict: '+str(yhat[i]))
51 |     
52 | 


--------------------------------------------------------------------------------
/util/make_pickle.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | import os
 4 | import sys
 5 | import argparse
 6 | import random
 7 | def search(dirname):
 8 | 	listfile=os.listdir(dirname)
 9 | 	predict=[]
10 | 	maxlen=0
11 | 	listlength=[]
12 | 	for file in listfile:
13 | 		if "_" in file:
14 | 			continue
15 | 		wordname=file
16 | 		textlist=os.listdir(dirname+wordname)
17 | 		for namet in textlist:
18 | 			textnamed=dirname+wordname+"/"+namet
19 | 			with open(textnamed) as datad:
20 | 				lend=[[i for i in line.split(' ')][:-1] for line in datad.readlines()]
21 | 				listlength.append(len(lend[0]))
22 | 		maxlen=max(listlength)
23 | 	for file in listfile:
24 | 		if "_" in file:
25 | 			continue
26 | 		wordname=file
27 | 		textlist=os.listdir(dirname+wordname)
28 | 		for text in textlist:
29 | 			textname=dirname+wordname+"/"+text
30 | 			with open(textname) as data:
31 | 				numbers = [[i for i in line.split(' ')][:-1] for line in data.readlines()]
32 |         	        #print(len(numbers[0]))
33 | 				for i in range(len(numbers[0]),maxlen):
34 | 					numbers[0].extend([0.000])
35 | 				numbers.append(wordname)
36 |                 #print(numbers[0][8735])
37 | 			predict.append(numbers)
38 | 	random.shuffle(predict)        
39 | 	return predict
40 | 
41 | 
42 | def main(inputfile_path,output_path):
43 | 	ret=search(inputfile_path)
44 | 	np.shape(ret)
45 | 	out_file=output_path+'test_data.pkl'
46 | 	with open(out_file,'wb') as fout:
47 | 		pickle.dump(ret,fout)
48 | 
49 | if __name__=="__main__":
50 | 	parser = argparse.ArgumentParser(description='make pkl_file')
51 | 	parser.add_argument("--input_file_path",help=" ")
52 | 	parser.add_argument("--output_path",help=" ")
53 | 	args=parser.parse_args()
54 | 	input_file_path=args.input_file_path
55 | 	output_path=args.output_path
56 | 	main(input_file_path,output_path)
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/util/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | 
 5 | def main(input_data_path,output_data_path):
 6 |     comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
 7 |             mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu'
 8 |     #명령어 컴파일
 9 |     cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu  --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt'
10 |     #미디어 파이프 명령어 저장
11 |     listfile=os.listdir(input_data_path)
12 |     for file in listfile:
13 |             #해당 디렉토리의 하위 디렉토리 폴더명을 찾음
14 |         if not(os.path.isdir(input_data_path+file)): #ignore .DS_Store
15 |                 continue
16 |         word=file+'/'
17 |         fullfilename=os.listdir(input_data_path+word)
18 |         # 하위디렉토리의 모든 비디오들의 이름을 저장
19 |         if not(os.path.isdir(output_data_path+"_"+word)):
20 |             os.mkdir(output_data_path+"_"+word)
21 |         if not(os.path.isdir(output_data_path+word)):
22 |             os.mkdir(output_data_path+word)
23 |         os.system(comp)
24 |         outputfilelist=os.listdir(output_data_path+'_'+word)#이미 작업된 파일은 작업을 하지 않도록 하기 위한 리스트
25 |         for mp4list in fullfilename:
26 |             if mp4list in outputfilelist:
27 |                 continue
28 | 	    if ".DS_Store" in mp4list: #ignore .DS_Store
29 | 		continue
30 |             inputfilen='   --input_video_path='+input_data_path+word+mp4list
31 |             outputfilen='   --output_video_path='+output_data_path+'_'+word+mp4list
32 |             cmdret=cmd+inputfilen+outputfilen
33 |             os.system(cmdret)
34 | 
35 | if __name__ == "__main__":
36 |     parser = argparse.ArgumentParser(description='operating Mediapipe')
37 |     parser.add_argument("--input_data_path",help=" ")
38 |     parser.add_argument("--output_data_path",help=" ")
39 |     args=parser.parse_args()
40 |     input_data_path=args.input_data_path
41 |     output_data_path=args.output_data_path
42 |     #print(input_data_path)
43 |     main(input_data_path,output_data_path)
44 | 


--------------------------------------------------------------------------------
/util/analysis.py:
--------------------------------------------------------------------------------
 1 | import xlsxwriter
 2 | import os
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | from collections import Counter
 7 | import matplotlib.pylab as plt
 8 |         
 9 | def convert_tuple(value):
10 |     if not isinstance(value, tuple):
11 |         return value
12 | 
13 |         return str(value)
14 | 
15 | def make_xlxs(input_file_path, worksheet_name):
16 |     with open(input_file_path, mode = 'r') as t:
17 |         text = list(t)
18 |         string = " ".join(text)
19 |         landmarks = string.split(" ")
20 |     
21 |     workbook = xlsxwriter.Workbook(worksheet_name)
22 |     worksheet = workbook.add_worksheet()
23 |     
24 |     row=0
25 |     col=0
26 | 
27 |     for landmark in landmarks:
28 |         landmark_frame = map(convert_tuple, landmarks[row*42:(row*42)+42])
29 |         print(landmark_frame)
30 |         print("\n")
31 |         worksheet.write_row(row,col, landmark_frame)
32 |         row += 1
33 |     
34 |     workbook.close()
35 |     print(workbook)
36 | 
37 | def return_frame(dirname):
38 |     frames=[] #list to save frame numbers in txt files
39 |     listfile=os.listdir(dirname)
40 |     for file in listfile:
41 |         if "_" in file: #ignore mp4 files
42 |             continue
43 |         wordname=file
44 |         textlist=os.listdir(dirname+wordname)
45 |         for text in textlist:
46 |             if "DS_" in text:
47 |                 continue
48 |             textname=dirname+wordname+"/"+text
49 |             with open(textname, mode = 'r') as t: #open txt files 
50 |                 numbers = np.array([float(num) for num in t.read().split()])
51 |                 #print(len(numbers)/42)
52 |                 frames.append(int(len(numbers)/42))
53 |     #print(frames)
54 |     #for frame in frames:
55 |     #    unique_num = list(pd.unique(frames))
56 |     #l = unique_num
57 |     #print(l)
58 |     count = Counter(frames)
59 |     plt.bar(count.keys(), count.values())
60 |     plt.xlabel('frame number')
61 |     plt.ylabel('count')
62 |     plt.title('Histogram')
63 |     plt.grid(True)
64 |     plt.savefig('hist.png', dpi=300)
65 | 
66 | 
67 |     
68 |         
69 | def main():
70 |     #make_xlxs("/Users/anna/SLR/sentenceOutput/Sentence/bird-like-apple.txt", 'bird-like-apple.xlsx')
71 |     return_frame("/Users/anna/SLR/twenty/traindata/")
72 |     
73 | if __name__=="__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/common/load_data.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | from keras.preprocessing import sequence
 3 | from keras.datasets import imdb
 4 | from keras import layers, models
 5 | from keras.models import Sequential
 6 | from keras import layers
 7 | import os
 8 | import sys
 9 | import pickle
10 | import numpy as np
11 | from tensorflow.keras.preprocessing.text import Tokenizer
12 | from tensorflow.keras.utils import to_categorical
13 | import random
14 | from keras import optimizers
15 | 
16 | def load_data(dirname):
17 |     listfile=os.listdir(dirname)
18 |     X = []
19 |     Y = []
20 |     for file in listfile:
21 |         if "_" in file: #text파일이 들어있는 directory만 설정
22 |             continue
23 |         wordname=file
24 |         if not(os.path.isdir(dirname+file)): #ignore .DS_Store
25 |             continue
26 |         textlist=os.listdir(dirname+wordname) #Word의 하위 파일들의 리스트(text파일들)
27 |         for text in textlist:
28 |             textname=dirname+wordname+"/"+text
29 |             numbers=[]
30 |             #print(textname)
31 |             with open(textname, mode = 'r') as t:
32 |                 numbers = [float(num) for num in t.read().split()]#텍스트파일 읽기
33 |                 #print(len(numbers[0]))
34 |                 for i in range(len(numbers),10920): #단어의 동영상을 260 frame을 최대라고 가정하여 0으로 채우기
35 |                     numbers.extend([0.000]) #260 frame 고정
36 |             #numbers=np.array(numbers)
37 |             #print(numbers[0])
38 |             #numbers=np.array(numbers)
39 |             #print(numbers)
40 |             row=0
41 |             landmark_frame=[]
42 |             for i in range(0,len(numbers)): #42개 씩 끊어서 읽기
43 |                 #print(numbers[row*42:(row*42)+41])
44 |                 landmark_frame.extend(numbers[row:row+42])
45 |                 row += 42
46 |             landmark_frame=np.array(landmark_frame)
47 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
48 |             #print(landmark_frame.shape)
49 |             X.append(np.array(landmark_frame))
50 |             Y.append(wordname)
51 |     X=np.array(X)
52 |     Y=np.array(Y)
53 |     
54 |     t = Tokenizer() #Y를 원핫 벡터로 바꾸기
55 |     t.fit_on_texts(Y)
56 |     encoded=t.texts_to_sequences(Y)
57 |     one_hot=to_categorical(encoded)
58 |     
59 |     (x_train, y_train) = X, one_hot
60 |     #print(x_train[0])
61 |     return x_train,y_train #학습데이터 
62 | 
63 |     
64 | 
65 | 


--------------------------------------------------------------------------------
/model/LSTM.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | from keras.preprocessing import sequence
 3 | from keras.datasets import imdb
 4 | from keras import layers, models
 5 | import pickle
 6 | import numpy as np
 7 | from tensorflow.keras.preprocessing.text import Tokenizer
 8 | from tensorflow.keras.utils import to_categorical
 9 | 
10 | 
11 | class Data:
12 |     def __init__(self,pklname):
13 |         
14 |         X = []
15 |         Y = []
16 |         maxlength=0
17 |         with open(pklname, 'rb') as fin:
18 |             frames = pickle.load(fin)
19 |             for i, frame in enumerate(frames):
20 |                 features = frame[0]
21 |                 maxlength=len(features)
22 |                 word = frame[1]
23 |             
24 |                 X.append(np.array(features))
25 |                 Y.append(word)
26 |         X = np.array(X)
27 |         Y = np.array(Y)
28 | 
29 |         t = Tokenizer()
30 |         t.fit_on_texts(Y)
31 |         #print(t.word_index)
32 |         #Y = to_categorical(Y,len(t.word_index))
33 |         encoded=t.texts_to_sequences(Y)
34 |         #print(encoded)
35 |         one_hot=to_categorical(encoded)
36 |         #print(one_hot)
37 | 
38 |        
39 |         (x_train, y_train) = X, one_hot
40 |         
41 |         self.x_train, self.y_train = x_train, y_train
42 |         self.x_test, self.y_test = x_train, y_train
43 |         self.length=maxlength
44 |         
45 | class RNN_LSTM(models.Model):
46 |     def __init__(self,maxlen):
47 |         x = layers.Input((maxlen,))
48 |         h = layers.Embedding(maxlen, 128)(x)
49 |         h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h)
50 |         y = layers.Dense(9, activation='softmax')(h)
51 |         super().__init__(x, y)
52 | 
53 |         # try using different optimizers and different optimizer configs
54 |         self.compile(loss='binary_crossentropy',
55 |                      optimizer='adam', metrics=['accuracy'])
56 | 
57 | 
58 | class Machine:
59 |     def __init__(self,pklname):
60 |         self.data = Data(pklname)
61 |         self.model = RNN_LSTM(self.data.length)
62 | 
63 |     def run(self, epochs=3, batch_size=32):
64 |         data = self.data
65 |         model = self.model
66 |         print('Training stage')
67 |         print('==============')
68 |         model.fit(data.x_train, data.y_train,
69 |                   batch_size=batch_size,
70 |                   epochs=epochs,
71 |                   validation_data=(data.x_test, data.y_test))
72 | 
73 |         score, acc = model.evaluate(data.x_test, data.y_test,
74 |                                     batch_size=batch_size)
75 |         print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
76 | 
77 | 
78 | def main(pklname):
79 |     m = Machine(pklname)
80 |     m.run()
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     parser = argparse.ArgumentParser(description='run Model')
85 |     parser.add_argument("--pkl_data_path",help=" ")
86 |     args=parser.parse_args()
87 |     pkl_data_path=args.pkl_data_path
88 |     main(pkl_data_path)
89 | 
90 | 


--------------------------------------------------------------------------------
/model/LSTM_separate.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | from keras.preprocessing import sequence
 3 | from keras.datasets import imdb
 4 | from keras import layers, models
 5 | from keras.models import Sequential
 6 | from keras import layers
 7 | 
 8 | import pickle
 9 | import numpy as np
10 | from tensorflow.keras.preprocessing.text import Tokenizer
11 | from tensorflow.keras.utils import to_categorical
12 | 
13 | 
14 | class Data:
15 |     def __init__(self):
16 |         
17 |         X = []#학습데이터
18 |         Y = []#학습데이터
19 |         XT= []#평가데이터
20 |         YT= []#평가데이터
21 |         maxlength=0
22 |         with open("/Users/jongwook/Desktop/output.pkl", 'rb') as fin:
23 |             frames = pickle.load(fin)
24 |             for i, frame in enumerate(frames):
25 |                 features = frame[0]
26 |                 maxlength=len(features)
27 |                 word = frame[1]
28 |                 if i%3 != 0:
29 |                     X.append(np.array(features))
30 |                     Y.append(word)
31 |                 else:
32 |                     XT.append(np.array(features))
33 |                     YT.append(word)
34 |         X = np.array(X)
35 |         Y = np.array(Y)
36 |         XT= np.array(XT)
37 |         YT=np.array(YT)
38 |         
39 |         t = Tokenizer()
40 |         t.fit_on_texts(Y)
41 |     
42 |         encoded=t.texts_to_sequences(Y)
43 |         one_hot=to_categorical(encoded)
44 |         t1 = Tokenizer()
45 |         t1.fit_on_texts(YT)
46 | 
47 |         encoded1=t1.texts_to_sequences(YT)
48 |         one_hot1=to_categorical(encoded1)
49 |      
50 |         (x_train, y_train) = X, one_hot
51 |         (x_test, y_test) = XT, one_hot1
52 |         
53 |         self.x_train, self.y_train = x_train, y_train
54 |         self.x_test, self.y_test = x_test, y_test
55 |         self.maxlen=maxlength
56 |         
57 | class RNN_LSTM(models.Model):
58 |     def __init__(self,maxlen):
59 |         x = layers.Input((maxlen,))
60 |         h = layers.Embedding(maxlen, 256)(x)
61 |         h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h)
62 |         y = layers.Dense(18, activation='softmax')(h)
63 |         super().__init__(x, y)
64 | 
65 |         # try using different optimizers and different optimizer configs
66 |         self.compile(loss='binary_crossentropy',
67 |                      optimizer='adam', metrics=['accuracy'])
68 | 
69 | 
70 | class Machine:
71 |     def __init__(self):
72 |         self.data = Data()
73 |         self.model = RNN_LSTM(self.data.maxlen)
74 | 
75 |     def run(self, epochs=3, batch_size=32):
76 |         data = self.data
77 |         model = self.model
78 |         print('Training stage')
79 |         print('==============')
80 |         history=model.fit(data.x_train, data.y_train,
81 |                   batch_size=batch_size,
82 |                   epochs=epochs,
83 |                   validation_data=(data.x_test, data.y_test))
84 | 
85 |         score, acc = model.evaluate(data.x_test, data.y_test,
86 |                                     batch_size=batch_size)
87 |         print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
88 | 
89 | def main():
90 |     m = Machine()]
91 |     m.run()
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sign language recognition with RNN and Mediapipe
  2 | Sign language gesture recognition using a reccurent neural network(RNN) with Mediapipe hand tracking. 
  3 | 
  4 | This project is for academic purpose. Thank you for Google's Mediapipe team :)
  5 | 
  6 | ## Data Preprocessing with hand tracking(Desktop)
  7 | Create training data on Desktop with input video using [Hand Tracking](https://github.com/google/mediapipe/blob/master/mediapipe/docs/hand_tracking_mobile_gpu.md).
  8 | Gesture recognition with deep learning model can be done with only **42 hand landmarks** RNN training per frame.
  9 | 
 10 | **CUSTOMIZE:**
 11 | - Use video input instead of Webcam on Desktop to train with video data
 12 | - Extract hand landmarks for every frame per one word and make it into one txt file
 13 | 
 14 | ### 1. Set up Hand Tracking framework
 15 | * Install Medapipe
 16 | ```shell
 17 |   git clone https://github.com/google/mediapipe.git
 18 | ```
 19 | See the rest of installation documents [here](https://mediapipe.readthedocs.io/en/latest/install.html).
 20 | * Change **tflite_tensors_to_landmarks_caculator.cc** file
 21 | ```shell
 22 |   cd mediapipe/mediapipe/caculators/tflite
 23 |   rm tflite_tensors_to_landmarks_caculator.cc
 24 | ```
 25 | to our new tflite_tensors_to_landmarks_caculator.cc file in the modified_mediapipe folder.
 26 | 
 27 | * Change **demo_run_graph_main.cc** file 
 28 | ```shell
 29 |   cd mediapipe/mediapipe/examples/desktop
 30 |   rm demo_run_graph_main.cc
 31 | ```
 32 | to our new demo_run_graph_main.cc file in the modified_mediapipe folder.
 33 | 
 34 | ### 2. Create you own training data
 35 | Make **train_videos** and **test_videos** for each sign language word in one folder. Copy **build.by** file in util folder to your mediapipe directory. (Currently there may be a TabError. Please chang the tab manually.)
 36 | * Usage
 37 | 
 38 | To make mp4 file and txt file with mediapipe automatically, run
 39 | ```shell
 40 |   python build.py --input_data_path=[INPUT_PATH] --output_data_path=[OUTPUT_PATH]
 41 | ```
 42 | inside mediapipe directory.
 43 | 
 44 | Change INPUT_PATH, OUTPUT_PATH to your own folder directory path. INPUT_PATH is path to your input videos. OUTPUT_PATH is where all the hand-tracked mp4 files and txt files of 42 landmarks will be saved.
 45 | 
 46 | For example:
 47 | ```shell
 48 | input_videos
 49 | ├── Apple
 50 | │   ├── IMG_2733.MOV
 51 | │   ├── IMG_2734.MOV
 52 | │   ├── IMG_2735.MOV
 53 | │   └── IMG_2736.MOV
 54 | ├── Bird
 55 | │   ├── IMG_2631.MOV
 56 | │   ├── IMG_2632.MOV
 57 | │   ├── IMG_2633.MOV
 58 | │   └── IMG_2634.MOV
 59 | └── Sorry
 60 |     ├── IMG_2472.MOV
 61 |     ├── IMG_2473.MOV
 62 |     ├── IMG_2474.MOV
 63 |     └── IMG_2475.MOV
 64 |     ...
 65 | ```
 66 | OUTPUT_PATH is initially empty directory and when build is done, Mp4 and txt files will be extracted to your own folder path. 
 67 | 
 68 | Created folder example:
 69 | ```shell
 70 | output_data
 71 | ├── _Apple
 72 | │   ├── IMG_2733.mp4
 73 | │   ├── IMG_2734.mp4
 74 | │   ├── IMG_2735.mp4
 75 | │   └── IMG_2736.mp4
 76 | └── Apple
 77 |     ├── IMG_2472.txt
 78 |     ├── IMG_2473.txt
 79 |     ├── IMG_2474.txt
 80 |     └── IMG_2475.txt
 81 |     ...
 82 | ```
 83 | (DO NOT use space bar or '_' to your folder path and video name ex) Apple_pie (X))
 84 | 
 85 | 
 86 | ### 3. Train RNN model
 87 | 
 88 | * Train
 89 | ```shell
 90 |   python LSTM.py --input_file=[PKL_FILE]
 91 | ```
 92 | Add path to preprocessed pkl file into PKL_FILE.
 93 | 
 94 | Watch [this video](https://www.youtube.com/watch?v=5epWNiv5EKk&t=77s) for the overall workflow.
 95 | [more details](https://www.slideshare.net/JiHyunKim204)
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/model/predict_three_please.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | import tensorflow as tf
 18 | from numpy import argmax
 19 | 
 20 | def load_data(dirname):
 21 |     listfile=os.listdir(dirname)
 22 |     X = []
 23 |     Y = []
 24 |     for file in listfile:
 25 |         if "_" in file:
 26 |             continue
 27 |         wordname=file
 28 |         textlist=os.listdir(dirname+wordname)
 29 |         for text in textlist:
 30 |             if "DS_" in text:
 31 |                 continue
 32 |             textname=dirname+wordname+"/"+text
 33 |             numbers=[]
 34 |             #print(textname)
 35 |             with open(textname, mode = 'r') as t:
 36 |                 numbers = [float(num) for num in t.read().split()]
 37 |                 #print(len(numbers[0]))
 38 |                 for i in range(len(numbers),12600):
 39 |                     numbers.extend([0.000]) #300 frame 고정
 40 |             #numbers=np.array(numbers)
 41 |             #print(numbers[0])
 42 |             #numbers=np.array(numbers)
 43 |             #print(numbers)
 44 |             row=42*8#앞의 8프레임 제거
 45 |             landmark_frame=[]
 46 |             for i in range(0,150):#뒤의 142프레임제거==> 총 150프레임으로 고정
 47 |                 #print(numbers[row*42:(row*42)+41])
 48 |                 landmark_frame.extend(numbers[row:row+42])
 49 |                 row += 42
 50 |             landmark_frame=np.array(landmark_frame)
 51 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 52 |             #print(landmark_frame.shape)
 53 |             X.append(np.array(landmark_frame))
 54 |             Y.append(wordname)
 55 |     X=np.array(X)
 56 |     Y=np.array(Y)
 57 |     print(X.shape)
 58 |     t = Tokenizer()
 59 |     t.fit_on_texts(Y)
 60 |     encoded=t.texts_to_sequences(Y)
 61 |     one_hot=to_categorical(encoded)
 62 |     
 63 |     (x_train, y_train) = X, one_hot
 64 |     tmp = [[x,y] for x, y in zip(x_train, y_train)]
 65 |     x_train = [n[0] for n in tmp]
 66 |     y_train = [n[1] for n in tmp]
 67 |     #print(x_train[0])
 68 |     x_train=np.array(x_train)
 69 |     y_train=np.array(y_train)
 70 |     return x_train,y_train
 71 | 
 72 | 
 73 | #prediction
 74 | def load_label(dirname):
 75 |     label = {}
 76 |     count = 1
 77 |     listfile=os.listdir(dirname)
 78 |     for l in listfile:
 79 |         if "_" in l:
 80 |             continue
 81 |         label[l] = count
 82 |         count += 1
 83 |     return label
 84 | 
 85 | 
 86 | 
 87 | x_test,y_test=load_data("/Users/anna/SLR/OnlyThree/outputdata/")
 88 | new_model = tf.keras.models.load_model("/Users/anna/SLR/prj/simpleRNN2.h5")
 89 | new_model.summary()
 90 | 
 91 | labels=load_label("/Users/anna/SLR/OnlyThree/outputdata/")
 92 | 
 93 | #모델 사용
 94 | 
 95 | xhat = x_test
 96 | yhat = new_model.predict(xhat)
 97 | print('## yhat ##')
 98 | 
 99 | 
100 | 
101 | #prediction
102 | predictions = np.array([np.argmax(pred) for pred in yhat])
103 | Y = np.array([np.argmax(each) for each in y_test])
104 | print(predictions)
105 | rev_labels = dict(zip(list(labels.values()), list(labels.keys())))
106 | print(rev_labels)
107 | with open("result.txt", "w") as f:
108 |     f.write("gold, pred\n")
109 |     for a, b in zip(Y, predictions):
110 |         f.write("%s %s\n" % (rev_labels[a], rev_labels[b]))
111 |         
112 | acc = 100 * np.sum(predictions == Y) / len(Y)
113 | print("Accuracy: ", acc)
114 | 


--------------------------------------------------------------------------------
/model/predict_onehot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | import tensorflow as tf
 18 | from numpy import argmax
 19 | 
 20 | def load_data(dirname):
 21 |     listfile=os.listdir(dirname)
 22 |     X = []
 23 |     Y = []
 24 |     for file in listfile:
 25 |         if "_" in file:
 26 |             continue
 27 |         wordname=file
 28 |         textlist=os.listdir(dirname+wordname)
 29 |         for text in textlist:
 30 |             if "DS_" in text:
 31 |                 continue
 32 |             textname=dirname+wordname+"/"+text
 33 |             numbers=[]
 34 |             #print(textname)
 35 |             with open(textname, mode = 'r') as t:
 36 |                 numbers = [float(num) for num in t.read().split()]
 37 |                 #print(len(numbers[0]))
 38 |                 for i in range(len(numbers),12600):
 39 |                     numbers.extend([0.000]) #300 frame 고정
 40 |             #numbers=np.array(numbers)
 41 |             #print(numbers[0])
 42 |             #numbers=np.array(numbers)
 43 |             #print(numbers)
 44 |             row=42*8#앞의 8프레임 제거
 45 |             landmark_frame=[]
 46 |             for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정
 47 |                 #print(numbers[row*42:(row*42)+41])
 48 |                 landmark_frame.extend(numbers[row:row+42])
 49 |                 row += 42
 50 |             landmark_frame=np.array(landmark_frame)
 51 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 52 |             #print(landmark_frame.shape)
 53 |             X.append(np.array(landmark_frame))
 54 |             Y.append(wordname)
 55 |     X=np.array(X)
 56 |     Y=np.array(Y)
 57 |     tmp = [[x,y] for x, y in zip(X,Y)]
 58 |     #random.shuffle(tmp)
 59 |     X = [n[0] for n in tmp]
 60 |     Y = [n[1] for n in tmp]
 61 |     #print(Y)
 62 |     #print(X.shape)
 63 |     #t = Tokenizer()
 64 |     #t.fit_on_texts(Y)
 65 |     #encoded=t.texts_to_sequences(Y)
 66 |     text="Apple Bird Sorry"
 67 |     t = Tokenizer()
 68 |     t.fit_on_texts([text])
 69 |     print(t.word_index) 
 70 |     #one_hot=to_categorical(encoded)
 71 |     encoded=t.texts_to_sequences([Y])[0]
 72 |     print(encoded)
 73 |     one_hot = to_categorical(encoded)
 74 | 
 75 | 
 76 |     (x_train, y_train) = X, one_hot
 77 |     #print(x_train[0])
 78 |     x_train=np.array(x_train)
 79 |     y_train=np.array(y_train)
 80 |     return x_train,y_train
 81 | 
 82 | 
 83 | #prediction
 84 | def load_label(dirname):
 85 |     label = {}
 86 |     count = 1
 87 |     listfile=os.listdir(dirname)
 88 |     for l in listfile:
 89 |         if "_" in l:
 90 |             continue
 91 |         label[l] = count
 92 |         count += 1
 93 |     return label
 94 | 
 95 | 
 96 | 
 97 | x_test,y_test=load_data("/Users/jongwook/Desktop/test1/testdata/")
 98 | new_model = tf.keras.models.load_model('simpleRNN.h5')
 99 | new_model.summary()
100 | 
101 | labels=load_label("/Users/jongwook/Desktop/test1/testdata/")
102 | 
103 | #모델 사용
104 | 
105 | xhat = x_test
106 | yhat = new_model.predict(xhat)
107 | print('## yhat ##')
108 | 
109 | 
110 | 
111 | #prediction
112 | predictions = np.array([np.argmax(pred) for pred in yhat])
113 | Y = np.array([np.argmax(each) for each in y_test])
114 | #print(predictions)
115 | rev_labels = dict(zip(list(labels.values()), list(labels.keys())))
116 | print(rev_labels)
117 | print(predictions)
118 | with open("result.txt", "w") as f:
119 |     f.write("gold, pred\n")
120 |     for a, b in zip(Y, predictions):
121 |         f.write("%s %s\n" % (rev_labels[a], rev_labels[b]))
122 | 
123 | acc = 100 * np.sum(predictions == Y) / len(Y)
124 | print("Accuracy: ", acc)
125 | 


--------------------------------------------------------------------------------
/model/20_test_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | import tensorflow as tf
 18 | from numpy import argmax
 19 | 
 20 | def load_data(dirname):
 21 |     listfile=os.listdir(dirname)
 22 |     X = []
 23 |     Y = []
 24 |     for file in listfile:
 25 |         if "_" in file:
 26 |             continue
 27 |         wordname=file
 28 |         textlist=os.listdir(dirname+wordname)
 29 |         for text in textlist:
 30 |             if "DS_" in text:
 31 |                 continue
 32 |             textname=dirname+wordname+"/"+text
 33 |             numbers=[]
 34 |             #print(textname)
 35 |             with open(textname, mode = 'r') as t:
 36 |                 numbers = [float(num) for num in t.read().split()]
 37 |                 #print(len(numbers[0]))
 38 |                 for i in range(len(numbers),12600):
 39 |                     numbers.extend([0.000]) #300 frame 고정
 40 |             #numbers=np.array(numbers)
 41 |             #print(numbers[0])
 42 |             #numbers=np.array(numbers)
 43 |             #print(numbers)
 44 |             row=42*8#앞의 8프레임 제거
 45 |             landmark_frame=[]
 46 |             for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정
 47 |                 #print(numbers[row*42:(row*42)+41])
 48 |                 landmark_frame.extend(numbers[row:row+42])
 49 |                 row += 42
 50 |             landmark_frame=np.array(landmark_frame)
 51 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 52 |             #print(landmark_frame.shape)
 53 |             X.append(np.array(landmark_frame))
 54 |             Y.append(wordname)
 55 |     X=np.array(X)
 56 |     Y=np.array(Y)
 57 |     tmp = [[x,y] for x, y in zip(X,Y)]
 58 |     random.shuffle(tmp)
 59 |     X = [n[0] for n in tmp]
 60 |     Y = [n[1] for n in tmp]
 61 |     #print(Y)
 62 |     #print(X.shape)
 63 |     #t = Tokenizer()
 64 |     #t.fit_on_texts(Y)
 65 |     #encoded=t.texts_to_sequences(Y)
 66 |     text="Apple Bird Blue Cents Child Cow Drink Green Hello Like Metoo No Orange Pig Sorry Thankyou Where Who Yes You"
 67 | 
 68 |     t = Tokenizer()
 69 |     t.fit_on_texts([text])
 70 |     print(t.word_index) 
 71 |     #one_hot=to_categorical(encoded)
 72 |     encoded=t.texts_to_sequences([Y])[0]
 73 |     print(encoded)
 74 |     one_hot = to_categorical(encoded)
 75 | 
 76 | 
 77 |     (x_train, y_train) = X, one_hot
 78 |     #print(x_train[0])
 79 |     x_train=np.array(x_train)
 80 |     y_train=np.array(y_train)
 81 |     return x_train,y_train
 82 | 
 83 | 
 84 | #prediction
 85 | def load_label():
 86 |     label = {}
 87 |     count = 1
 88 |     listfile=['Apple','Bird','Blue','Cents','Child','Cow','Drink','Green','Hello','Like','Metoo','No',
 89 |               'Orange','Pig','Sorry','Thankyou','Where','Who','Yes','You']
 90 |     for l in listfile:
 91 |         if "_" in l:
 92 |             continue
 93 |         label[l] = count
 94 |         count += 1
 95 |     return label
 96 | 
 97 | 
 98 | 
 99 | x_test,y_test=load_data("/Users/jongwook/Desktop/testdata/")
100 | new_model = tf.keras.models.load_model('simpleRNN.h5')
101 | new_model.summary()
102 | 
103 | labels=load_label()
104 | 
105 | #모델 사용
106 | 
107 | xhat = x_test
108 | #print()
109 | #xhat=xhat[55:56]
110 | yhat = new_model.predict(xhat)
111 | print('## yhat ##')
112 | 
113 | predictions = np.array([np.argmax(pred) for pred in yhat])
114 | Y=np.array([np.argmax(i) for i in y_test])
115 | print(Y)
116 | rev_labels = dict(zip(list(labels.values()), list(labels.keys())))
117 | #print(rev_labels[predictions[0]])
118 | 
119 | #print(rev_labels)
120 | #print(predictions)
121 | with open("result.txt", "w") as f:
122 |     f.write("gold, pred\n")
123 |     for a, b in zip(Y, predictions):
124 |         f.write("%s %s\n" % (rev_labels[a], rev_labels[b]))
125 | 
126 | acc = 100 * np.sum(predictions == Y) / len(Y)
127 | 
128 | print("Accuracy: ", acc)
129 | 


--------------------------------------------------------------------------------
/util/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | import tensorflow as tf
 18 | from numpy import argmax
 19 | import argparse
 20 | 
 21 | def load_data(dirname):
 22 | 	listfile=os.listdir(dirname)
 23 | 	X = []
 24 | 	Y = []
 25 | 	for file in listfile:
 26 | 		if "_" in file:
 27 | 			continue
 28 | 		wordname=file
 29 | 		textlist=os.listdir(dirname+wordname)
 30 | 		for text in textlist:
 31 | 			if "DS_" in text:
 32 | 				continue
 33 | 			textname=dirname+wordname+"/"+text
 34 | 			numbers=[]
 35 |             #print(textname)
 36 | 			with open(textname, mode = 'r') as t:
 37 | 				numbers = [float(num) for num in t.read().split()]
 38 |                 #print(len(numbers[0]))
 39 | 				for i in range(len(numbers),12600):
 40 | 					numbers.extend([0.000]) #300 frame 고정
 41 |             #numbers=np.array(numbers)
 42 |             #print(numbers[0])
 43 |             #numbers=np.array(numbers)
 44 |             #print(numbers)
 45 | 			row=42*8#앞의 8프레임 제거
 46 | 			landmark_frame=[]
 47 | 			for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정
 48 |                 #print(numbers[row*42:(row*42)+41])
 49 | 				landmark_frame.extend(numbers[row:row+42])
 50 | 				row += 42
 51 | 			landmark_frame=np.array(landmark_frame)
 52 | 			landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 53 |             #print(landmark_frame.shape)
 54 | 			X.append(np.array(landmark_frame))
 55 | 			Y.append(wordname)
 56 | 	X=np.array(X)
 57 | 	Y=np.array(Y)
 58 | 	tmp = [[x,y] for x, y in zip(X,Y)]
 59 |     #random.shuffle(tmp)
 60 | 	X = [n[0] for n in tmp]
 61 | 	Y = [n[1] for n in tmp]
 62 |     #print(Y)
 63 |     #print(X.shape)
 64 |     #t = Tokenizer()
 65 |     #t.fit_on_texts(Y)
 66 |     #encoded=t.texts_to_sequences(Y)
 67 | 
 68 | 	x_train = X
 69 |     #print(x_train[0])
 70 | 	x_train=np.array(x_train)
 71 | 	return x_train
 72 | 
 73 | 
 74 | #prediction
 75 | def load_label():
 76 | 	label = {}
 77 | 	count = 1
 78 | 	listfile=['Apple','Bird','Sorry']
 79 | 	for l in listfile:
 80 | 		if "_" in l:
 81 | 			continue
 82 | 		label[l] = count
 83 | 		count += 1
 84 | 	return label
 85 | 
 86 | def main(input_data_path,output_data_path):
 87 | 	comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
 88 |             mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu'
 89 |     #명령어 컴파일
 90 | 	cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu  --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt'
 91 |     #미디어 파이프 명령어 저장
 92 | 	listfile=os.listdir(input_data_path)
 93 | 	output_dir=""
 94 | 	for file in listfile:
 95 | 		if ".DS_" in file:
 96 | 			continue
 97 | 		word=file+'/'
 98 | 		fullfilename=os.listdir(input_data_path+word)
 99 |         # 하위디렉토리의 모든 비디오들의 이름을 저장
100 | 		if not(os.path.isdir(output_data_path+"_"+word)):
101 | 			os.mkdir(output_data_path+"_"+word)
102 | 		if not(os.path.isdir(output_data_path+word)):
103 | 			os.mkdir(output_data_path+word)
104 | 		os.system(comp)
105 | 		outputfilelist=os.listdir(output_data_path+'_'+word)
106 | 		for mp4list in fullfilename:
107 | 			if ".DS_Store" in mp4list:
108 | 				continue    
109 | 			inputfilen='   --input_video_path='+input_data_path+word+mp4list
110 | 			outputfilen='   --output_video_path='+output_data_path+'_'+word+mp4list
111 | 			cmdret=cmd+inputfilen+outputfilen
112 | 			os.system(cmdret)
113 |     
114 |     #mediapipe동작 작동 종료:
115 | 	output_dir=output_data_path
116 | 	x_test=load_data(output_dir)
117 | 	new_model = tf.keras.models.load_model('simpleRNN.h5')
118 | 	new_model.summary()
119 | 
120 | 	labels=load_label()
121 | 
122 |     #모델 사용
123 | 
124 | 	xhat = x_test
125 |     #print()
126 |     #xhat=xhat[55:56]
127 | 	yhat = new_model.predict(xhat)
128 |     #print('## yhat ##')
129 | 
130 | 	predictions = np.array([np.argmax(pred) for pred in yhat])
131 | 	rev_labels = dict(zip(list(labels.values()), list(labels.keys())))
132 | 	print("----------result------------\n")
133 | 	for i in predictions:
134 | 		print(rev_labels[i])
135 | 		print('\n')
136 | 	print("------------end-------------\n")
137 | 
138 | if __name__ == "__main__":
139 | 	parser = argparse.ArgumentParser(description='operating Mediapipe')
140 | 	parser.add_argument("--input_data_path",help=" ")
141 | 	parser.add_argument("--output_data_path",help=" ")
142 | 	args=parser.parse_args()
143 | 	input_data_path=args.input_data_path
144 | 	output_data_path=args.output_data_path
145 |     #print(input_data_path)
146 | 	main(input_data_path,output_data_path)
147 | 


--------------------------------------------------------------------------------
/model/save_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | 
 18 | def load_data(dirname):
 19 |     listfile=os.listdir(dirname)
 20 |     X = []
 21 |     Y = []
 22 |     for file in listfile:
 23 |         if "_" in file:
 24 |             continue
 25 |         wordname=file
 26 |         textlist=os.listdir(dirname+wordname)
 27 |         for text in textlist:
 28 |             if "DS_" in text:
 29 |                 continue
 30 |             textname=dirname+wordname+"/"+text
 31 |             numbers=[]
 32 |             #print(textname)
 33 |             with open(textname, mode = 'r') as t:
 34 |                 numbers = [float(num) for num in t.read().split()]
 35 |                 #print(len(numbers[0]))
 36 |                 for i in range(len(numbers),10920):
 37 |                     numbers.extend([0.000]) #260 frame 고정
 38 |             #numbers=np.array(numbers)
 39 |             #print(numbers[0])
 40 |             #numbers=np.array(numbers)
 41 |             #print(numbers)
 42 |             row=0
 43 |             landmark_frame=[]
 44 |             for i in range(0,len(numbers)):
 45 |                 #print(numbers[row*42:(row*42)+41])
 46 |                 landmark_frame.extend(numbers[row:row+42])
 47 |                 row += 42
 48 |             landmark_frame=np.array(landmark_frame)
 49 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 50 |             #print(landmark_frame.shape)
 51 |             X.append(np.array(landmark_frame))
 52 |             Y.append(wordname)
 53 |     X=np.array(X)
 54 |     Y=np.array(Y)
 55 |     
 56 |     t = Tokenizer()
 57 |     t.fit_on_texts(Y)
 58 |     encoded=t.texts_to_sequences(Y)
 59 |     one_hot=to_categorical(encoded)
 60 |     
 61 |     (x_train, y_train) = X, one_hot
 62 |     #print(x_train[0])
 63 |     return x_train,y_train
 64 | 
 65 | 
 66 | def simple_rnn():
 67 |     model = Sequential()
 68 |     model.add(SimpleRNN(units=64, input_shape=(260, 42)))
 69 |     model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지
 70 |     model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지
 71 |     model.add(Dense(21))
 72 |     model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
 73 |     return model
 74 |     
 75 | 
 76 | def rnn_lstm():
 77 |     model = Sequential()
 78 |     model.add(layers.LSTM(64,return_sequences=True,input_shape=(260,42)))  # returns a sequence of vectors of dimension 32
 79 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
 80 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
 81 |     model.add(layers.Dense(21, activation='softmax'))    
 82 |     model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
 83 |     return model
 84 | 
 85 | def bidirectional_lstm():
 86 |     model = Sequential()
 87 |     model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(260, 42)))
 88 |     model.add(layers.Bidirectional(layers.LSTM(32)))
 89 |     model.add(layers.Dense(21, activation='softmax'))
 90 |     model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])   
 91 |     return model
 92 | 
 93 | 
 94 | def main(dirname):
 95 |     (x_train,y_train)=load_data(dirname)
 96 |     num_val_samples=(x_train.shape[0])//5
 97 |    # print(num_val_samples)
 98 |     num_epochs=5
 99 |     all_scores=[]
100 |     
101 |     for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 
102 |         print('처리중인 폴드 #',i)
103 |         val_data=x_train[i*num_val_samples:(i+1)*num_val_samples]
104 |         val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples]
105 |         partial_train_data=np.concatenate([x_train[:i*num_val_samples],
106 |                                           x_train[(i+1)*num_val_samples:]],
107 |                                          axis=0)
108 |         partial_train_targets=np.concatenate([y_train[:i*num_val_samples],
109 |                                              y_train[(i+1)*num_val_samples:]],
110 |                                              axis=0)
111 |         
112 |         model=simple_rnn()
113 |     
114 |         print('Training stage')
115 |         print('==============')
116 |         history=model.fit(partial_train_data,partial_train_targets,epochs=3,batch_size=32)
117 |         score, acc = model.evaluate(val_data,val_targets,batch_size=256,verbose=0)
118 |         print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
119 |         
120 |         model.save('simpleRNN.h5')
121 |     
122 |     #score, acc = model.evaluate(x_test,y_test,batch_size=32)
123 |     #print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
124 |     
125 | if __name__=='__main__':
126 |     main("/Users/anna/SLR/seperate/traininput/")
127 | 


--------------------------------------------------------------------------------
/model/not_k_fold.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 |     
 18 | 
 19 | def load_data(dirname):
 20 |     listfile=os.listdir(dirname)
 21 |     X = []
 22 |     Y = []
 23 |     for file in listfile:
 24 |         if "_" in file:
 25 |             continue
 26 |         wordname=file
 27 |         textlist=os.listdir(dirname+wordname)
 28 |         for text in textlist:
 29 |             if "DS_" in text:
 30 |                 continue
 31 |             textname=dirname+wordname+"/"+text
 32 |             numbers=[]
 33 |             #print(textname)
 34 |             with open(textname, mode = 'r') as t:
 35 |                 numbers = [float(num) for num in t.read().split()]
 36 |                 #print(len(numbers[0]))
 37 |                 for i in range(len(numbers),12600):
 38 |                     numbers.extend([0.000]) #300 frame 고정
 39 |             #numbers=np.array(numbers)
 40 |             #print(numbers[0])
 41 |             #numbers=np.array(numbers)
 42 |             #print(numbers)
 43 |             row=42*8#앞의 8프레임 제거
 44 |             landmark_frame=[]
 45 |             for i in range(0,150):#뒤의 142프레임제거==> 총 150프레임으로 고정
 46 |                 #print(numbers[row*42:(row*42)+41])
 47 |                 landmark_frame.extend(numbers[row:row+42])
 48 |                 row += 42
 49 |             landmark_frame=np.array(landmark_frame)
 50 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 51 |             #print(landmark_frame.shape)
 52 |             X.append(np.array(landmark_frame))
 53 |             Y.append(wordname)
 54 |     X=np.array(X)
 55 |     Y=np.array(Y)
 56 |     print(X.shape)
 57 |     t = Tokenizer()
 58 |     t.fit_on_texts(Y)
 59 |     encoded=t.texts_to_sequences(Y)
 60 |     one_hot=to_categorical(encoded)
 61 |     
 62 |     (x_train, y_train) = X, one_hot
 63 |     tmp = [[x,y] for x, y in zip(x_train, y_train)]
 64 |     x_train = [n[0] for n in tmp]
 65 |     y_train = [n[1] for n in tmp]
 66 |     #print(x_train[0])
 67 |     x_train=np.array(x_train)
 68 |     y_train=np.array(y_train)
 69 |     return x_train[0:450],y_train[0:450],x_train[451:-1],y_train[451:-1]
 70 | 
 71 | 
 72 | def simple_rnn():
 73 |     model = Sequential()
 74 |     model.add(SimpleRNN(units=64, input_shape=(150, 42)))
 75 |     model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지
 76 |     model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지
 77 |     model.add(Dense(21))
 78 |     model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
 79 |     return model
 80 |     
 81 | 
 82 | def rnn_lstm():
 83 |     model = Sequential()
 84 |     model.add(layers.LSTM(64,return_sequences=True,input_shape=(150,42)))  # returns a sequence of vectors of dimension 32
 85 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
 86 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
 87 |     model.add(layers.Dense(21, activation='softmax'))    
 88 |     model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
 89 |     return model
 90 | 
 91 | def bidirectional_lstm():
 92 |     model = Sequential()
 93 |     model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(150, 42)))
 94 |     model.add(layers.Bidirectional(layers.LSTM(32)))
 95 |     model.add(layers.Dense(21, activation='softmax'))
 96 |     model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])   
 97 |     return model
 98 | 
 99 | 
100 | def main(dirname):
101 |     x_train,y_train,x_test,y_test=load_data(dirname)
102 |     #num_val_samples=(x_train.shape[0])//5
103 |    # print(num_val_samples)
104 |     #num_epochs=5
105 |     #all_scores=[]
106 |     '''
107 |     for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 
108 |         print('처리중인 폴드 #',i)
109 |         val_data=x_train[i*num_val_samples:(i+1)*num_val_samples]
110 |         val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples]
111 |         partial_train_data=np.concatenate([x_train[:i*num_val_samples],
112 |                                           x_train[(i+1)*num_val_samples:]],
113 |                                          axis=0)
114 |         partial_train_targets=np.concatenate([y_train[:i*num_val_samples],
115 |                                              y_train[(i+1)*num_val_samples:]],
116 |                                             axis=0)
117 |     '''
118 |     #labels=load_label(dirname)
119 |     model=simple_rnn()
120 | 
121 |     print('Training stage')
122 |     print('==============')
123 |     history=model.fit(x_train,y_train,epochs=200,batch_size=32,validation_data=(x_test,y_test))
124 |     score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0)
125 |     print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
126 | 
127 |     model.save('simpleRNN.h5')
128 | 
129 |         #score, acc = model.evaluate(x_test,y_test,batch_size=32)
130 |         #print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
131 |     
132 | if __name__=='__main__':
133 |     main("/Users/jongwook/Desktop/traininput/")
134 | 


--------------------------------------------------------------------------------
/util/build_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 | import tensorflow as tf
 18 | from numpy import argmax
 19 | import argparse
 20 | 
 21 | def load_data(dirname):
 22 |     listfile=os.listdir(dirname)
 23 |     X = []
 24 |     Y = []
 25 |     for file in listfile:
 26 |         if "_" in file:
 27 |             continue
 28 |         wordname=file
 29 |         textlist=os.listdir(dirname+wordname)
 30 |         for text in textlist:
 31 |             if "DS_" in text:
 32 |                 continue
 33 |             textname=dirname+wordname+"/"+text
 34 |             numbers=[]
 35 |             #print(textname)
 36 |             with open(textname, mode = 'r') as t:
 37 |                 numbers = [float(num) for num in t.read().split()]
 38 |                 #print(len(numbers[0]))
 39 |                 for i in range(len(numbers),12600):
 40 |                     numbers.extend([0.000]) #300 frame 고정
 41 |             #print(numbers)
 42 |             row=42*8#앞의 8프레임 제거
 43 |             landmark_frame=[]
 44 |             for i in range(0,100):#총 100프레임으로 고정
 45 |                 #print(numbers[row*42:(row*42)+41])
 46 |                 landmark_frame.extend(numbers[row:row+42])
 47 |                 row += 42
 48 |             landmark_frame=np.array(landmark_frame)
 49 |             landmark_frame=landmark_frame.reshape(-1,42)#2차원으로 변환(260*42)
 50 |             #print(landmark_frame.shape)
 51 |             X.append(np.array(landmark_frame))
 52 |             Y.append(wordname)
 53 |     X=np.array(X)
 54 |     Y=np.array(Y)
 55 |     #tmp = [[x,y] for x, y in zip(X,Y)]
 56 |     #random.shuffle(tmp)
 57 |     #X = [n[0] for n in tmp]
 58 |     #Y = [n[1] for n in tmp]
 59 |     #print(Y)
 60 |     #print(X.shape)
 61 |     #t = Tokenizer()
 62 |     #t.fit_on_texts(Y)
 63 |     #encoded=t.texts_to_sequences(Y)
 64 | 
 65 |     x_train = X
 66 |     #print(x_train[0])
 67 |     x_train=np.array(x_train)
 68 |     return x_train
 69 | 
 70 | 
 71 | #prediction
 72 | def load_label():
 73 |     label = {}
 74 |     count = 1
 75 |     listfile=['Apple','Bird','Blue','Cents','Child','Cow','Drink','Green','Hello','Like','Metoo','No',
 76 |               'Orange','Pig','Sorry','Thankyou','Where','Who','Yes','You']
 77 |     for l in listfile:
 78 |         if "_" in l:
 79 |             continue
 80 |         label[l] = count
 81 |         count += 1
 82 |     return label
 83 |     
 84 | def main(input_data_path,output_data_path):
 85 |     comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
 86 |             mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu'
 87 |     #명령어 컴파일
 88 |     cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu  --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt'
 89 |     #미디어 파이프 명령어 저장
 90 |     listfile=os.listdir(input_data_path)
 91 |     output_dir=""
 92 |     filel=[]
 93 |     for file in listfile:
 94 |         if ".DS_" in file:
 95 |             continue
 96 |         word=file+'/'
 97 |         fullfilename=os.listdir(input_data_path+word)
 98 |         # 하위디렉토리의 모든 비디오들의 이름을 저장
 99 |         if not(os.path.isdir(output_data_path+"_"+word)):
100 |             os.mkdir(output_data_path+"_"+word)
101 |         if not(os.path.isdir(output_data_path+word)):
102 |             os.mkdir(output_data_path+word)
103 |         os.system(comp)
104 |         outputfilelist=os.listdir(output_data_path+'_'+word)
105 |         for mp4list in fullfilename:
106 |             if ".DS_Store" in mp4list:
107 |                 continue
108 |             filel.append(mp4list)
109 |             inputfilen='   --input_video_path='+input_data_path+word+mp4list
110 |             outputfilen='   --output_video_path='+output_data_path+'_'+word+mp4list
111 |             cmdret=cmd+inputfilen+outputfilen
112 |             os.system(cmdret)
113 | 
114 |     #mediapipe동작 작동 종료:
115 |     output_dir=output_data_path
116 |     x_test=load_data(output_dir)
117 |     new_model = tf.keras.models.load_model('simpleRNN1.h5')
118 |     #new_model.summary()
119 | 
120 |     labels=load_label()
121 | 
122 |     #모델 사용
123 | 
124 |     xhat = x_test
125 |     #print()
126 |     #xhat=xhat[55:56]
127 |     yhat = new_model.predict(xhat)
128 |     #print('## yhat ##')
129 | 
130 |     predictions = np.array([np.argmax(pred) for pred in yhat])
131 |     rev_labels = dict(zip(list(labels.values()), list(labels.keys())))
132 |     s=0
133 |     filel=np.array(filel)
134 |     print(filel)
135 |     for i in predictions:
136 |         if s==4:
137 |             continue
138 |         txtpath="/Users/jongwook/Desktop/word"+str(s)+".txt"
139 |         with open(txtpath, "w") as f:
140 |             f.write(filel[s])
141 |             f.write(" ")
142 |             f.write(rev_labels[i])
143 |         s+=1
144 | 
145 | if __name__ == "__main__":
146 |     parser = argparse.ArgumentParser(description='operating Mediapipe')
147 |     parser.add_argument("--input_data_path",help=" ")
148 |     parser.add_argument("--output_data_path",help=" ")
149 |     args=parser.parse_args()
150 |     input_data_path=args.input_data_path
151 |     output_data_path=args.output_data_path
152 |     #print(input_data_path)
153 |     main(input_data_path,output_data_path)
154 | 


--------------------------------------------------------------------------------
/model/training_onehot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 |     
 18 | 
 19 | def load_data(dirname):
 20 |     listfile=os.listdir(dirname)
 21 |     X = []
 22 |     Y = []
 23 |     for file in listfile:
 24 |         if "_" in file:
 25 |             continue
 26 |         wordname=file
 27 |         textlist=os.listdir(dirname+wordname)
 28 |         for text in textlist:
 29 |             if "DS_" in text:
 30 |                 continue
 31 |             textname=dirname+wordname+"/"+text
 32 |             numbers=[]
 33 |             #print(textname)
 34 |             with open(textname, mode = 'r') as t:
 35 |                 numbers = [float(num) for num in t.read().split()]
 36 |                 #print(len(numbers[0]))
 37 |                 for i in range(len(numbers),12600):
 38 |                     numbers.extend([0.000]) #300 frame 고정
 39 |             #numbers=np.array(numbers)
 40 |             #print(numbers[0])
 41 |             #numbers=np.array(numbers)
 42 |             #print(numbers)
 43 |             row=42*8#앞의 8프레임 제거
 44 |             landmark_frame=[]
 45 |             for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정
 46 |                 #print(numbers[row*42:(row*42)+41])
 47 |                 landmark_frame.extend(numbers[row:row+42])
 48 |                 row += 42
 49 |             landmark_frame=np.array(landmark_frame)
 50 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 51 |             #print(landmark_frame.shape)
 52 |             X.append(np.array(landmark_frame))
 53 |             Y.append(wordname)
 54 |     X=np.array(X)
 55 |     Y=np.array(Y)
 56 |     print(X.shape)
 57 |     tmp = [[x,y] for x, y in zip(X, Y)]
 58 |     random.shuffle(tmp)
 59 | 
 60 |     X = [n[0] for n in tmp]
 61 |     Y = [n[1] for n in tmp]
 62 |     '''
 63 |     t = Tokenizer()
 64 |     t.fit_on_texts(Y)
 65 |     encoded=t.texts_to_sequences(Y)
 66 |     one_hot=to_categorical(encoded)
 67 |     '''
 68 |     text="Apple Bird Sorry"
 69 |     t = Tokenizer()
 70 |     t.fit_on_texts([text])
 71 |     print(t.word_index) 
 72 |     #one_hot=to_categorical(encoded)
 73 |     encoded=t.texts_to_sequences([Y])[0]
 74 |     print(encoded)
 75 |     one_hot = to_categorical(encoded)
 76 |     (x_train, y_train) = X, one_hot
 77 |     #print(x_train[0])
 78 | 
 79 |     x_train=np.array(x_train)
 80 |     y_train=np.array(y_train)
 81 |     a=x_train.shape[0]
 82 |     a=a//3
 83 |     return x_train[0:2*a],y_train[0:2*a],x_train[2*a:-1],y_train[2*a:-1]
 84 | 
 85 | 
 86 | def simple_rnn():
 87 |     model = Sequential()
 88 |     model.add(SimpleRNN(units=64, input_shape=(200, 42)))
 89 |     model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지
 90 |     model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지
 91 |     model.add(Dense(21))
 92 |     model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
 93 |     return model
 94 |     
 95 | 
 96 | def rnn_lstm():
 97 |     model = Sequential()
 98 |     model.add(layers.LSTM(64,return_sequences=True,input_shape=(100,42)))  # returns a sequence of vectors of dimension 32
 99 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
100 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
101 |     model.add(layers.Dense(3, activation='softmax'))    
102 |     model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
103 |     return model
104 | 
105 | def bidirectional_lstm():
106 |     model = Sequential()
107 |     model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(100, 42)))
108 |     model.add(layers.Bidirectional(layers.LSTM(32)))
109 |     model.add(layers.Dense(3, activation='softmax'))
110 |     model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])   
111 |     return model
112 | 
113 | def build_model():
114 |     model = Sequential()
115 |     model.add(layers.LSTM(32, return_sequences=True,
116 |                    input_shape=(100, 42)))  # returns a sequence of vectors of dimension 32
117 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
118 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
119 |     model.add(layers.Dense(4, activation='softmax'))
120 |     model.compile(loss='categorical_crossentropy',
121 |                   optimizer='rmsprop',
122 |                   metrics=['accuracy'])
123 |     return model
124 | 
125 | def main(dirname):
126 |     x_train,y_train,x_test,y_test=load_data(dirname)
127 |     num_val_samples=(x_train.shape[0])//5
128 |     #print(num_val_samples)
129 |     #num_epochs=5
130 |     #all_scores=[]
131 |     model=build_model()
132 |     '''
133 |     for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 
134 |         print('처리중인 폴드 #',i)
135 |         val_data=x_train[i*num_val_samples:(i+1)*num_val_samples]
136 |         val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples]
137 |         partial_train_data=np.concatenate([x_train[:i*num_val_samples],
138 |                                           x_train[(i+1)*num_val_samples:]],
139 |                                          axis=0)
140 |         partial_train_targets=np.concatenate([y_train[:i*num_val_samples],
141 |                                              y_train[(i+1)*num_val_samples:]],
142 |                                             axis=0)
143 |         #labels=load_label(dirname)
144 |     '''
145 |     print('Training stage')
146 |     print('==============')
147 |     history=model.fit(x_train,y_train,epochs=100,batch_size=32,validation_data=(x_test,y_test))
148 |     score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0)
149 |     print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
150 |     model.save('simpleRNN.h5')
151 | 
152 |         #score, acc = model.evaluate(x_test,y_test,batch_size=32)
153 |         #print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
154 |     
155 | if __name__=='__main__':
156 |     main("/Users/jongwook/Desktop/test1/outputdata/")
157 | 


--------------------------------------------------------------------------------
/modified_mediapipe/demo_run_graph_main.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The MediaPipe Authors.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | //
 15 | // An example of sending OpenCV webcam frames into a MediaPipe graph.
 16 | 
 17 | #include "mediapipe/framework/calculator_framework.h"
 18 | #include "mediapipe/framework/formats/image_frame.h"
 19 | #include "mediapipe/framework/formats/image_frame_opencv.h"
 20 | #include "mediapipe/framework/port/commandlineflags.h"
 21 | #include "mediapipe/framework/port/file_helpers.h"
 22 | #include "mediapipe/framework/port/opencv_highgui_inc.h"
 23 | #include "mediapipe/framework/port/opencv_imgproc_inc.h"
 24 | #include "mediapipe/framework/port/opencv_video_inc.h"
 25 | #include "mediapipe/framework/port/parse_text_proto.h"
 26 | #include "mediapipe/framework/port/status.h"
 27 | using namespace std;
 28 | string input_video_new="";//입력 비디오 경로
 29 | string output_video_new="";//결과 비디오 경로
 30 | constexpr char kInputStream[] = "input_video";
 31 | constexpr char kOutputStream[] = "output_video";
 32 | constexpr char kWindowName[] = "MediaPipe";
 33 | 
 34 | DEFINE_string(
 35 |     calculator_graph_config_file, "",
 36 |     "Name of file containing text format CalculatorGraphConfig proto.");
 37 | DEFINE_string(input_video_path, "",
 38 |               "Full path of video to load. "
 39 |               "If not provided, attempt to use a webcam.");
 40 | DEFINE_string(output_video_path, "",
 41 |               "Full path of where to save result (.mp4 only). "
 42 |               "If not provided, show result in a window.");
 43 | 
 44 | ::mediapipe::Status RunMPPGraph() {
 45 |   std::string calculator_graph_config_contents;
 46 |   MP_RETURN_IF_ERROR(mediapipe::file::GetContents(
 47 |       FLAGS_calculator_graph_config_file, &calculator_graph_config_contents));
 48 |   LOG(INFO) << "Get calculator graph config contents: "
 49 |             << calculator_graph_config_contents;
 50 |   mediapipe::CalculatorGraphConfig config =
 51 |       mediapipe::ParseTextProtoOrDie<mediapipe::CalculatorGraphConfig>(
 52 |           calculator_graph_config_contents);
 53 | 
 54 |   LOG(INFO) << "Initialize the calculator graph.";
 55 |   mediapipe::CalculatorGraph graph;
 56 |   MP_RETURN_IF_ERROR(graph.Initialize(config));
 57 | 
 58 |   LOG(INFO) << "Initialize the camera or load the video.";
 59 |   cv::VideoCapture capture;
 60 |   const bool load_video = !FLAGS_input_video_path.empty();
 61 |   if (load_video) {
 62 |     capture.open(FLAGS_input_video_path);
 63 |   } else {
 64 |     capture.open(0);
 65 |   }
 66 |   RET_CHECK(capture.isOpened());
 67 | 
 68 |   cv::VideoWriter writer;
 69 |   const bool save_video = !FLAGS_output_video_path.empty();
 70 |   if (save_video) {
 71 |     LOG(INFO) << "Prepare video writer.";
 72 |     cv::Mat test_frame;
 73 |     capture.read(test_frame);                    // Consume first frame.
 74 |     capture.set(cv::CAP_PROP_POS_AVI_RATIO, 0);  // Rewind to beginning.
 75 |     writer.open(FLAGS_output_video_path,
 76 |                 mediapipe::fourcc('a', 'v', 'c', '1'),  // .mp4
 77 |                 capture.get(cv::CAP_PROP_FPS), test_frame.size());
 78 |     RET_CHECK(writer.isOpened());
 79 |   } else {
 80 |     cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1);
 81 |   }
 82 | 
 83 |   LOG(INFO) << "Start running the calculator graph.";
 84 |   ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller poller,
 85 |                    graph.AddOutputStreamPoller(kOutputStream));
 86 |   MP_RETURN_IF_ERROR(graph.StartRun({}));
 87 | 
 88 |   LOG(INFO) << "Start grabbing and processing frames.";
 89 |   size_t frame_timestamp = 0;
 90 |   bool grab_frames = true;
 91 |   while (grab_frames) {
 92 |     // Capture opencv camera or video frame.
 93 |     cv::Mat camera_frame_raw;
 94 |     capture >> camera_frame_raw;
 95 |     if (camera_frame_raw.empty()) break;  // End of video.
 96 |     cv::Mat camera_frame;
 97 |     cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB);
 98 |     if (!load_video) {
 99 |       cv::flip(camera_frame, camera_frame, /*flipcode=HORIZONTAL*/ 1);
100 |     }
101 | 
102 |     // Wrap Mat into an ImageFrame.
103 |     auto input_frame = absl::make_unique<mediapipe::ImageFrame>(
104 |         mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows,
105 |         mediapipe::ImageFrame::kDefaultAlignmentBoundary);
106 |     cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get());
107 |     camera_frame.copyTo(input_frame_mat);
108 | 
109 |     // Send image packet into the graph.
110 |     MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
111 |         kInputStream, mediapipe::Adopt(input_frame.release())
112 |                           .At(mediapipe::Timestamp(frame_timestamp++))));
113 | 
114 |     // Get the graph result packet, or stop if that fails.
115 |     mediapipe::Packet packet;
116 |     if (!poller.Next(&packet)) break;
117 |     auto& output_frame = packet.Get<mediapipe::ImageFrame>();
118 | 
119 |     // Convert back to opencv for display or saving.
120 |     cv::Mat output_frame_mat = mediapipe::formats::MatView(&output_frame);
121 |     cv::cvtColor(output_frame_mat, output_frame_mat, cv::COLOR_RGB2BGR);
122 |     if (save_video) {
123 |       writer.write(output_frame_mat);
124 |     } else {
125 |       cv::imshow(kWindowName, output_frame_mat);
126 |       // Press any key to exit.
127 |       const int pressed_key = cv::waitKey(5);
128 |       if (pressed_key >= 0 && pressed_key != 255) grab_frames = false;
129 |     }
130 |   }
131 | 
132 |   LOG(INFO) << "Shutting down.";
133 |   if (writer.isOpened()) writer.release();
134 |   MP_RETURN_IF_ERROR(graph.CloseInputStream(kInputStream));
135 |   return graph.WaitUntilDone();
136 | }
137 | 
138 | int main(int argc, char** argv) {
139 |   google::InitGoogleLogging(argv[0]);
140 |   input_video_new = argv[argc-2];//추가
141 |   output_video_new = argv[argc-1];//추가ㅣ
142 |   gflags::ParseCommandLineFlags(&argc, &argv, true);
143 |   ::mediapipe::Status run_status = RunMPPGraph();
144 |   if (!run_status.ok()) {
145 |     LOG(ERROR) << "Failed to run the graph: " << run_status.message();
146 |   } else {
147 |     LOG(INFO) << "Success!";
148 |   }
149 |   return 0;
150 | }
151 | 


--------------------------------------------------------------------------------
/model/20_train_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function, unicode_literals
  2 | from keras.preprocessing import sequence
  3 | from keras.datasets import imdb
  4 | from keras import layers, models
  5 | from keras.models import Sequential
  6 | from keras import layers
  7 | import os
  8 | import sys
  9 | import pickle
 10 | import numpy as np
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.utils import to_categorical
 13 | import random
 14 | from keras import optimizers
 15 | from keras.layers import SimpleRNN, Dense
 16 | from keras.layers import Bidirectional
 17 |     
 18 | 
 19 | def load_data(dirname):
 20 |     listfile=os.listdir(dirname)
 21 |     X = []
 22 |     Y = []
 23 |     XT = []
 24 |     YT = []
 25 |     for file in listfile:
 26 |         if "_" in file:
 27 |             continue
 28 |         wordname=file
 29 |         textlist=os.listdir(dirname+wordname)
 30 |         a=len(textlist)
 31 |         #print(a)
 32 |         b=a//3
 33 |         #print(b)
 34 |         k=0
 35 |         for text in textlist:
 36 |             if "DS_" in text:
 37 |                 continue
 38 |             textname=dirname+wordname+"/"+text
 39 |             numbers=[]
 40 |             #print(textname)
 41 |             with open(textname, mode = 'r') as t:
 42 |                 numbers = [float(num) for num in t.read().split()]
 43 |                 #print(len(numbers[0]))
 44 |                 for i in range(len(numbers),12600):
 45 |                     numbers.extend([0.000]) #300 frame 고정
 46 |             row=42*8#앞의 8프레임 제거
 47 |             landmark_frame=[]
 48 |             for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정
 49 |                 landmark_frame.extend(numbers[row:row+42])
 50 |                 row += 42
 51 |             landmark_frame=np.array(landmark_frame)
 52 |             landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42)
 53 |             if (k%3==2):
 54 |                 XT.append(np.array(landmark_frame))
 55 |                 YT.append(wordname)
 56 |             else:
 57 |                 X.append(np.array(landmark_frame))
 58 |                 Y.append(wordname)
 59 |             k+=1
 60 |             
 61 |     X=np.array(X)
 62 |     Y=np.array(Y)
 63 |     #print(YT)
 64 |     XT=np.array(XT)
 65 |     YT=np.array(YT)
 66 |     #print(X.shape)
 67 |     
 68 |     tmp = [[x,y] for x, y in zip(X, Y)]
 69 |     random.shuffle(tmp)
 70 |     random.shuffle(tmp)
 71 | 
 72 |     tmp1 = [[xt,yt] for xt, yt in zip(XT, YT)]
 73 |     random.shuffle(tmp1)
 74 |     
 75 |     X = [n[0] for n in tmp]
 76 |     Y = [n[1] for n in tmp]
 77 |     XT = [n[0] for n in tmp1]
 78 |     YT = [n[1] for n in tmp1]
 79 |     '''
 80 |     t = Tokenizer()
 81 |     t.fit_on_texts(Y)
 82 |     encoded=t.texts_to_sequences(Y)
 83 |     one_hot=to_categorical(encoded)
 84 |     '''
 85 |     text="Apple Bird Blue Cents Child Cow Drink Green Hello Like Metoo No Orange Pig Sorry Thankyou Where Who Yes You"
 86 |     t = Tokenizer()
 87 |     t.fit_on_texts([text])
 88 |     encoded=t.texts_to_sequences([Y])[0]
 89 |     encoded2=t.texts_to_sequences([YT])[0]
 90 |     one_hot = to_categorical(encoded)
 91 |     one_hot2=to_categorical(encoded2)
 92 |     
 93 |     (x_train, y_train) = X, one_hot
 94 |     #print(x_train[0])
 95 |     (x_test,y_test)=XT,one_hot2
 96 |     x_train=np.array(x_train)
 97 |     y_train=np.array(y_train)
 98 |     x_test=np.array(x_test)
 99 |     y_test=np.array(y_test)
100 |     #return x_train[0:2*a],y_train[0:2*a],x_train[2*a:-1],y_train[2*a:-1]
101 |     return x_train,y_train,x_test,y_test
102 | 
103 | def simple_rnn():
104 |     model = Sequential()
105 |     model.add(SimpleRNN(units=64, input_shape=(200, 42)))
106 |     model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지
107 |     model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지
108 |     model.add(Dense(21))
109 |     model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
110 |     return model
111 |     
112 | 
113 | def rnn_lstm():
114 |     model = Sequential()
115 |     model.add(layers.LSTM(64,return_sequences=True,input_shape=(100,42)))  # returns a sequence of vectors of dimension 32
116 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
117 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
118 |     model.add(layers.Dense(4, activation='softmax'))    
119 |     model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
120 |     return model
121 | 
122 | def bidirectional_lstm():
123 |     model = Sequential()
124 |     model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(100, 42)))
125 |     model.add(layers.Bidirectional(layers.LSTM(32)))
126 |     model.add(layers.Dense(4, activation='softmax'))
127 |     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])   
128 |     return model
129 | 
130 | def build_model():
131 |     model = Sequential()
132 |     model.add(layers.LSTM(64, return_sequences=True,
133 |                    input_shape=(100, 42)))  # returns a sequence of vectors of dimension 32
134 |     model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
135 |     model.add(layers.LSTM(32))  # return a single vector of dimension 32
136 |     model.add(layers.Dense(21, activation='softmax'))
137 |     model.compile(loss='categorical_crossentropy',
138 |                   optimizer='rmsprop',
139 |                   metrics=['accuracy'])
140 |     return model
141 | 
142 | def main(dirname):
143 |     x_train,y_train,x_test,y_test=load_data(dirname)
144 |     num_val_samples=(x_train.shape[0])//5
145 |     #print(num_val_samples)
146 |     #num_epochs=5
147 |     #all_scores=[]
148 |     model=build_model()
149 |     '''
150 |     for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 
151 |         print('처리중인 폴드 #',i)
152 |         val_data=x_train[i*num_val_samples:(i+1)*num_val_samples]
153 |         val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples]
154 |         partial_train_data=np.concatenate([x_train[:i*num_val_samples],
155 |                                           x_train[(i+1)*num_val_samples:]],
156 |                                          axis=0)
157 |         partial_train_targets=np.concatenate([y_train[:i*num_val_samples],
158 |                                              y_train[(i+1)*num_val_samples:]],
159 |                                             axis=0)
160 |         #labels=load_label(dirname)
161 |     '''
162 |     print('Training stage')
163 |     print('==============')
164 |     history=model.fit(x_train,y_train,epochs=100,batch_size=32,validation_data=(x_test,y_test))
165 |     score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0)
166 |     print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
167 |     model.save('simpleRNN.h5')
168 | 
169 |         #score, acc = model.evaluate(x_test,y_test,batch_size=32)
170 |         #print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
171 |     
172 | if __name__=='__main__':
173 |     main("/Users/jongwook/Desktop/traindata/")
174 | 


--------------------------------------------------------------------------------
/modified_mediapipe/tflite_tensors_to_landmarks_calculator.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The MediaPipe Authors.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | /*absolute position landmarks*/
 16 | #include "mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.pb.h"
 17 | #include "mediapipe/framework/calculator_framework.h"
 18 | #include "mediapipe/framework/formats/landmark.pb.h"
 19 | #include "mediapipe/framework/port/ret_check.h"
 20 | #include "tensorflow/lite/interpreter.h"
 21 | #include "mediapipe/framework/port/status.h"
 22 | #include "mediapipe/framework/tool/status_util.h"
 23 | #include <iostream>
 24 | #include <fstream>
 25 | #include <string>
 26 | #include <vector>
 27 | using namespace std;
 28 | 
 29 | extern string input_video_new;
 30 | extern string output_video_new;
 31 | static bool cpos=false;
 32 | static vector<pair<float,float>> landpos(21);
 33 | namespace mediapipe {
 34 | 
 35 | // A calculator for converting TFLite tensors from regression models into
 36 | // landmarks.
 37 | //
 38 | // Input:
 39 | //  TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first
 40 | //            tensor will be used. The size of the values must be
 41 | //            (num_dimension x num_landmarks).
 42 | // Output:
 43 | //  LANDMARKS(optional) - Result MediaPipe landmarks.
 44 | //  NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks.
 45 | //
 46 | // Notes:
 47 | //   To output normalized landmarks, user must provide the original input image
 48 | //   size to the model using calculator option input_image_width and
 49 | //   input_image_height.
 50 | // Usage example:
 51 | // node {
 52 | //   calculator: "TfLiteTensorsToLandmarksCalculator"
 53 | //   input_stream: "TENSORS:landmark_tensors"
 54 | //   output_stream: "LANDMARKS:landmarks"
 55 | //   output_stream: "NORM_LANDMARKS:landmarks"
 56 | //   options: {
 57 | //     [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] {
 58 | //       num_landmarks: 21
 59 | //
 60 | //       input_image_width: 256
 61 | //       input_image_height: 256
 62 | //     }
 63 | //   }
 64 | // }
 65 | class TfLiteTensorsToLandmarksCalculator : public CalculatorBase {
 66 |  public:
 67 |   static ::mediapipe::Status GetContract(CalculatorContract* cc);
 68 | 
 69 |   ::mediapipe::Status Open(CalculatorContext* cc) override;
 70 |   ::mediapipe::Status Process(CalculatorContext* cc) override;
 71 | 
 72 |  private:
 73 |   ::mediapipe::Status LoadOptions(CalculatorContext* cc);
 74 |   int num_landmarks_ = 0;
 75 | 
 76 |   ::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions options_;
 77 | };
 78 | REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator);
 79 | 
 80 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::GetContract(
 81 |     CalculatorContract* cc) {
 82 |   RET_CHECK(!cc->Inputs().GetTags().empty());
 83 |   RET_CHECK(!cc->Outputs().GetTags().empty());
 84 | 
 85 |   if (cc->Inputs().HasTag("TENSORS")) {
 86 |     cc->Inputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
 87 |   }
 88 | 
 89 |   if (cc->Outputs().HasTag("LANDMARKS")) {
 90 |     cc->Outputs().Tag("LANDMARKS").Set<std::vector<Landmark>>();
 91 |   }
 92 | 
 93 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
 94 |     cc->Outputs().Tag("NORM_LANDMARKS").Set<std::vector<NormalizedLandmark>>();
 95 |   }
 96 | 
 97 |   return ::mediapipe::OkStatus();
 98 | }
 99 | 
100 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Open(
101 |     CalculatorContext* cc) {
102 |   cc->SetOffset(TimestampDiff(0));
103 | 
104 |   MP_RETURN_IF_ERROR(LoadOptions(cc));
105 | 
106 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
107 |     RET_CHECK(options_.has_input_image_height() &&
108 |               options_.has_input_image_width())
109 |         << "Must provide input with/height for getting normalized landmarks.";
110 |   }
111 |   if (cc->Outputs().HasTag("LANDMARKS") &&
112 |       (options_.flip_vertically() || options_.flip_horizontally())) {
113 |     RET_CHECK(options_.has_input_image_height() &&
114 |               options_.has_input_image_width())
115 |         << "Must provide input with/height for using flip_vertically option "
116 |            "when outputing landmarks in absolute coordinates.";
117 |   }
118 |   return ::mediapipe::OkStatus();
119 | }
120 | 
121 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Process(
122 |     CalculatorContext* cc) {
123 |   static int idx=0;
124 |   if (cc->Inputs().Tag("TENSORS").IsEmpty()) {
125 |     return ::mediapipe::OkStatus();
126 |   }
127 | 
128 |   const auto& input_tensors =
129 |       cc->Inputs().Tag("TENSORS").Get<std::vector<TfLiteTensor>>();
130 | 
131 |   const TfLiteTensor* raw_tensor = &input_tensors[0];
132 | 
133 |   int num_values = 1;
134 |   for (int i = 0; i < raw_tensor->dims->size; ++i) {
135 |     num_values *= raw_tensor->dims->data[i];
136 |   }
137 |   const int num_dimensions = num_values / num_landmarks_;
138 |   // Landmarks must have less than 3 dimensions. Otherwise please consider
139 |   // using matrix.
140 |   CHECK_LE(num_dimensions, 3);
141 |   CHECK_GT(num_dimensions, 0);
142 | 
143 |   const float* raw_landmarks = raw_tensor->data.f;
144 | 
145 |   auto output_landmarks = absl::make_unique<std::vector<Landmark>>();
146 | 
147 |   for (int ld = 0; ld < num_landmarks_; ++ld) {
148 |     const int offset = ld * num_dimensions;
149 |     Landmark landmark;
150 | 
151 |     if (options_.flip_horizontally()) {
152 |       landmark.set_x(options_.input_image_width() - raw_landmarks[offset]);
153 |     } else {
154 |       landmark.set_x(raw_landmarks[offset]);
155 |     }
156 |     if (num_dimensions > 1) {
157 |       if (options_.flip_vertically()) {
158 |         landmark.set_y(options_.input_image_height() -
159 |                        raw_landmarks[offset + 1]);
160 |       } else {
161 |         landmark.set_y(raw_landmarks[offset + 1]);
162 |       }
163 |     }
164 |     if (num_dimensions > 2) {
165 |       landmark.set_z(raw_landmarks[offset + 2]);
166 |     }
167 |     output_landmarks->push_back(landmark);
168 |   }
169 |   //
170 |   
171 |   // Output normalized landmarks if required.
172 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
173 |     auto output_norm_landmarks =
174 |         absl::make_unique<std::vector<NormalizedLandmark>>();
175 |     //추가한 부분
176 |     //비디오 이름만 빼오기
177 |     string video_fname="";
178 |     bool isTrue=false;
179 |     int slx=0;
180 | 
181 |     for(slx=input_video_new.size()-1;input_video_new[slx]!='/';slx--){
182 |    	if(isTrue){
183 | 	   video_fname=input_video_new[slx]+video_fname;
184 | 	}
185 | 	if(input_video_new[slx]=='.') isTrue=true;
186 |     }
187 |     slx--;
188 |     string dir_name="/";//비디오 디렉토리 빼기:
189 |     for(;input_video_new[slx]!='/';slx--){
190 |     	dir_name=input_video_new[slx]+dir_name;
191 |     }
192 |     string output_path_cp="";//아웃풋 파일의 경로의 일부를 저장하는 변수
193 |     int j=0;
194 |     for (;output_video_new[j]!='=';j++);
195 |     j++;
196 |     for(;output_video_new[j]!='_';j++){
197 |   	output_path_cp.push_back(output_video_new[j]);
198 |     }
199 |     string str=output_path_cp+dir_name+video_fname+".txt";
200 |     ofstream out(str,std::ios_base::out | std::ios_base::app);
201 |     int i=0;	
202 |     for (const auto& landmark : *output_landmarks) {
203 |       NormalizedLandmark norm_landmark;
204 |       norm_landmark.set_x(static_cast<float>(landmark.x()) /
205 |                           options_.input_image_width());
206 |       norm_landmark.set_y(static_cast<float>(landmark.y()) /
207 |                           options_.input_image_height());
208 |       norm_landmark.set_z(landmark.z() / options_.normalize_z());
209 |       //if(cpos){
210 |       out<<static_cast<float>(landmark.x()) / options_.input_image_width()<<" ";
211 |       out<<static_cast<float>(landmark.y()) / options_.input_image_height()<<" ";
212 |       //}
213 |       //landpos[i]=make_pair(static_cast<float>(landmark.x())/options_.input_image_width(),
214 | 		//tatic_cast<float>(landmark.y())/options_.input_image_height());
215 |       i=i+1;
216 |       output_norm_landmarks->push_back(norm_landmark);
217 |     }
218 |     cpos=true;
219 |     idx++;
220 |     out.close();
221 |     //추가한 부분 끝
222 |     cc->Outputs()
223 |         .Tag("NORM_LANDMARKS")
224 |         .Add(output_norm_landmarks.release(), cc->InputTimestamp());
225 |   }
226 |   // Output absolute landmarks.
227 |   if (cc->Outputs().HasTag("LANDMARKS")) {
228 |     cc->Outputs()
229 |         .Tag("LANDMARKS")
230 |         .Add(output_landmarks.release(), cc->InputTimestamp());
231 |   }
232 | 
233 |   return ::mediapipe::OkStatus();
234 | }
235 | 
236 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::LoadOptions(
237 |     CalculatorContext* cc) {
238 |   // Get calculator options specified in the graph.
239 |   options_ =
240 |       cc->Options<::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions>();
241 |   num_landmarks_ = options_.num_landmarks();
242 | 
243 |   return ::mediapipe::OkStatus();
244 | }}  // namespace mediapipe
245 | 


--------------------------------------------------------------------------------
/modified_mediapipe/tflite_tensors_to_landmarks_calculator(relative_position_ver).cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The MediaPipe Authors.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.pb.h"
 16 | #include "mediapipe/framework/calculator_framework.h"
 17 | #include "mediapipe/framework/formats/landmark.pb.h"
 18 | #include "mediapipe/framework/port/ret_check.h"
 19 | #include "tensorflow/lite/interpreter.h"
 20 | #include "mediapipe/framework/port/status.h"
 21 | #include "mediapipe/framework/tool/status_util.h"
 22 | #include <iostream>
 23 | #include <fstream>
 24 | #include <string>
 25 | #include <vector>
 26 | using namespace std;
 27 | 
 28 | extern string input_video_new;
 29 | extern string output_video_new;
 30 | static bool cpos=false;
 31 | static vector<pair<float,float>> landpos(21);
 32 | namespace mediapipe {
 33 | 
 34 | // A calculator for converting TFLite tensors from regression models into
 35 | // landmarks.
 36 | //
 37 | // Input:
 38 | //  TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first
 39 | //            tensor will be used. The size of the values must be
 40 | //            (num_dimension x num_landmarks).
 41 | // Output:
 42 | //  LANDMARKS(optional) - Result MediaPipe landmarks.
 43 | //  NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks.
 44 | //
 45 | // Notes:
 46 | //   To output normalized landmarks, user must provide the original input image
 47 | //   size to the model using calculator option input_image_width and
 48 | //   input_image_height.
 49 | // Usage example:
 50 | // node {
 51 | //   calculator: "TfLiteTensorsToLandmarksCalculator"
 52 | //   input_stream: "TENSORS:landmark_tensors"
 53 | //   output_stream: "LANDMARKS:landmarks"
 54 | //   output_stream: "NORM_LANDMARKS:landmarks"
 55 | //   options: {
 56 | //     [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] {
 57 | //       num_landmarks: 21
 58 | //
 59 | //       input_image_width: 256
 60 | //       input_image_height: 256
 61 | //     }
 62 | //   }
 63 | // }
 64 | class TfLiteTensorsToLandmarksCalculator : public CalculatorBase {
 65 |  public:
 66 |   static ::mediapipe::Status GetContract(CalculatorContract* cc);
 67 | 
 68 |   ::mediapipe::Status Open(CalculatorContext* cc) override;
 69 |   ::mediapipe::Status Process(CalculatorContext* cc) override;
 70 | 
 71 |  private:
 72 |   ::mediapipe::Status LoadOptions(CalculatorContext* cc);
 73 |   int num_landmarks_ = 0;
 74 | 
 75 |   ::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions options_;
 76 | };
 77 | REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator);
 78 | 
 79 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::GetContract(
 80 |     CalculatorContract* cc) {
 81 |   RET_CHECK(!cc->Inputs().GetTags().empty());
 82 |   RET_CHECK(!cc->Outputs().GetTags().empty());
 83 | 
 84 |   if (cc->Inputs().HasTag("TENSORS")) {
 85 |     cc->Inputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
 86 |   }
 87 | 
 88 |   if (cc->Outputs().HasTag("LANDMARKS")) {
 89 |     cc->Outputs().Tag("LANDMARKS").Set<std::vector<Landmark>>();
 90 |   }
 91 | 
 92 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
 93 |     cc->Outputs().Tag("NORM_LANDMARKS").Set<std::vector<NormalizedLandmark>>();
 94 |   }
 95 | 
 96 |   return ::mediapipe::OkStatus();
 97 | }
 98 | 
 99 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Open(
100 |     CalculatorContext* cc) {
101 |   cc->SetOffset(TimestampDiff(0));
102 | 
103 |   MP_RETURN_IF_ERROR(LoadOptions(cc));
104 | 
105 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
106 |     RET_CHECK(options_.has_input_image_height() &&
107 |               options_.has_input_image_width())
108 |         << "Must provide input with/height for getting normalized landmarks.";
109 |   }
110 |   if (cc->Outputs().HasTag("LANDMARKS") &&
111 |       (options_.flip_vertically() || options_.flip_horizontally())) {
112 |     RET_CHECK(options_.has_input_image_height() &&
113 |               options_.has_input_image_width())
114 |         << "Must provide input with/height for using flip_vertically option "
115 |            "when outputing landmarks in absolute coordinates.";
116 |   }
117 |   return ::mediapipe::OkStatus();
118 | }
119 | 
120 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Process(
121 |     CalculatorContext* cc) {
122 |   static int idx=0;
123 |   if (cc->Inputs().Tag("TENSORS").IsEmpty()) {
124 |     return ::mediapipe::OkStatus();
125 |   }
126 | 
127 |   const auto& input_tensors =
128 |       cc->Inputs().Tag("TENSORS").Get<std::vector<TfLiteTensor>>();
129 | 
130 |   const TfLiteTensor* raw_tensor = &input_tensors[0];
131 | 
132 |   int num_values = 1;
133 |   for (int i = 0; i < raw_tensor->dims->size; ++i) {
134 |     num_values *= raw_tensor->dims->data[i];
135 |   }
136 |   const int num_dimensions = num_values / num_landmarks_;
137 |   // Landmarks must have less than 3 dimensions. Otherwise please consider
138 |   // using matrix.
139 |   CHECK_LE(num_dimensions, 3);
140 |   CHECK_GT(num_dimensions, 0);
141 | 
142 |   const float* raw_landmarks = raw_tensor->data.f;
143 | 
144 |   auto output_landmarks = absl::make_unique<std::vector<Landmark>>();
145 | 
146 |   for (int ld = 0; ld < num_landmarks_; ++ld) {
147 |     const int offset = ld * num_dimensions;
148 |     Landmark landmark;
149 | 
150 |     if (options_.flip_horizontally()) {
151 |       landmark.set_x(options_.input_image_width() - raw_landmarks[offset]);
152 |     } else {
153 |       landmark.set_x(raw_landmarks[offset]);
154 |     }
155 |     if (num_dimensions > 1) {
156 |       if (options_.flip_vertically()) {
157 |         landmark.set_y(options_.input_image_height() -
158 |                        raw_landmarks[offset + 1]);
159 |       } else {
160 |         landmark.set_y(raw_landmarks[offset + 1]);
161 |       }
162 |     }
163 |     if (num_dimensions > 2) {
164 |       landmark.set_z(raw_landmarks[offset + 2]);
165 |     }
166 |     output_landmarks->push_back(landmark);
167 |   }
168 |   //
169 |   
170 |   // Output normalized landmarks if required.
171 |   if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
172 |     auto output_norm_landmarks =
173 |         absl::make_unique<std::vector<NormalizedLandmark>>();
174 |     //추가한 부분
175 |     //비디오 이름만 빼오기
176 |     string video_fname="";
177 |     bool isTrue=false;
178 |     int slx=0;
179 | 
180 |     for(slx=input_video_new.size()-1;input_video_new[slx]!='/';slx--){
181 |    	if(isTrue){
182 | 	   video_fname=input_video_new[slx]+video_fname;
183 | 	}
184 | 	if(input_video_new[slx]=='.') isTrue=true;
185 |     }
186 |     slx--;
187 |     string dir_name="/";//비디오 디렉토리 빼기:
188 |     for(;input_video_new[slx]!='/';slx--){
189 |     	dir_name=input_video_new[slx]+dir_name;
190 |     }
191 |     string output_path_cp="";//아웃풋 파일의 경로의 일부를 저장하는 변수
192 |     int j=0;
193 |     for (;output_video_new[j]!='=';j++);
194 |     j++;
195 |     for(;output_video_new[j]!='_';j++){
196 |   	output_path_cp.push_back(output_video_new[j]);
197 |     }
198 |     string str=output_path_cp+dir_name+video_fname+".txt";
199 |     ofstream out(str,std::ios_base::out | std::ios_base::app);
200 |     int i=0;	
201 |     for (const auto& landmark : *output_landmarks) {
202 |       NormalizedLandmark norm_landmark;
203 |       norm_landmark.set_x(static_cast<float>(landmark.x()) /
204 |                           options_.input_image_width());
205 |       norm_landmark.set_y(static_cast<float>(landmark.y()) /
206 |                           options_.input_image_height());
207 |       norm_landmark.set_z(landmark.z() / options_.normalize_z());
208 |       if(cpos){
209 |      	 out<<static_cast<float>(landmark.x()) / options_.input_image_width()-landpos[i].first<<" ";
210 |      	 out<<static_cast<float>(landmark.y()) / options_.input_image_height()-landpos[i].second<<" ";
211 |       }
212 |       landpos[i]=make_pair(static_cast<float>(landmark.x())/options_.input_image_width(),
213 | 		static_cast<float>(landmark.y())/options_.input_image_height());
214 |       i=i+1;
215 |       output_norm_landmarks->push_back(norm_landmark);
216 |     }
217 |     cpos=true;
218 |     idx++;
219 |     out.close();
220 |     //추가한 부분 끝
221 |     cc->Outputs()
222 |         .Tag("NORM_LANDMARKS")
223 |         .Add(output_norm_landmarks.release(), cc->InputTimestamp());
224 |   }
225 |   // Output absolute landmarks.
226 |   if (cc->Outputs().HasTag("LANDMARKS")) {
227 |     cc->Outputs()
228 |         .Tag("LANDMARKS")
229 |         .Add(output_landmarks.release(), cc->InputTimestamp());
230 |   }
231 | 
232 |   return ::mediapipe::OkStatus();
233 | }
234 | 
235 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::LoadOptions(
236 |     CalculatorContext* cc) {
237 |   // Get calculator options specified in the graph.
238 |   options_ =
239 |       cc->Options<::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions>();
240 |   num_landmarks_ = options_.num_landmarks();
241 | 
242 |   return ::mediapipe::OkStatus();
243 | }}  // namespace mediapipe
244 | 


--------------------------------------------------------------------------------