├── model ├── RNNs.py ├── predict.py ├── LSTM.py ├── LSTM_separate.py ├── predict_three_please.py ├── predict_onehot.py ├── 20_test_model.py ├── save_model.py ├── not_k_fold.py ├── training_onehot.py └── 20_train_model.py ├── util ├── make_pickle.py ├── build.py ├── analysis.py ├── test.py └── build_model.py ├── common └── load_data.py ├── README.md └── modified_mediapipe ├── demo_run_graph_main.cc ├── tflite_tensors_to_landmarks_calculator.cc └── tflite_tensors_to_landmarks_calculator(relative_position_ver).cc /model/RNNs.py: -------------------------------------------------------------------------------- 1 | #Simple RNN, LSTM, GRU 비교 2 | def simple_rnn(): 3 | model_RNN = Sequential() 4 | model_RNN.add(SimpleRNN(units=64, input_shape=(260, 42))) 5 | model_RNN.add(Dense(10, activation="relu")) #softmax, linear 어떤걸 기준으로 하지 6 | model_RNN.add(Dense(17)) 7 | model_RNN.compile(loss='mse', optimizer='adam') 8 | 9 | return model_RNN 10 | 11 | def rnn_lstm(): 12 | model_LSTM = Sequential() 13 | model_LSTM.add(layers.LSTM(64,return_sequences=True,input_shape=(260,42))) #time_steps(network에 사용할 단위):260 픽스, features:42 14 | model_LSTM.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 15 | model_LSTM.add(layers.LSTM(32)) # return a single vector of dimension 32 16 | model_LSTM.add(layers.Dense(9, activation='softmax')) #단어수:9->17 17 | model_LSTM.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 18 | 19 | return model_LSTM 20 | 21 | def bidirectional_lstm(): 22 | Bidirectional_LSTM.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(260, 42))) 23 | Bidirectional_LSTM.add(layers.Bidirectional(layers.LSTM(32))) 24 | Bidirectional_LSTM.add(layers.Dense(17, activation='softmax')) 25 | Bidirectional_LSTM.compile(loss='categorical_crossentropy', optimizer='rmsprop') 26 | 27 | return Bidirectional_LSTM 28 | -------------------------------------------------------------------------------- /model/predict.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | 20 | def load_labels(dirname): 21 | label = {} 22 | count = 1 23 | listfile=os.listdir(dirname) 24 | for l in listfile: 25 | if "_" in l: 26 | continue 27 | label[l] = count 28 | count += 1 29 | return label 30 | 31 | x_test,y_test=load_data("/Users/anna/SLR/Seperate/testinput/") 32 | new_model = tf.keras.models.load_model('simpeRNN.h5') 33 | new_model.summary() 34 | 35 | #모델평가 36 | #loss, acc = new_model.evaluate(x_test,y_test, verbose=2) 37 | #print('Restored model, accuracy: {:5.2f}%'.format(100*acc)) 38 | 39 | #print(new_model.predict(x_test).shape) 40 | 41 | 42 | #모델 사용 43 | 44 | xhat = x_test 45 | yhat = new_model.predict(xhat) 46 | print('## yhat ##') 47 | #labels=load_label(dirname) 48 | a=xhat.shape[0] 49 | for i in range(a): 50 | print('True: '+str(argmax(y_test[i]))+', Predict: '+str(yhat[i])) 51 | 52 | -------------------------------------------------------------------------------- /util/make_pickle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import os 4 | import sys 5 | import argparse 6 | import random 7 | def search(dirname): 8 | listfile=os.listdir(dirname) 9 | predict=[] 10 | maxlen=0 11 | listlength=[] 12 | for file in listfile: 13 | if "_" in file: 14 | continue 15 | wordname=file 16 | textlist=os.listdir(dirname+wordname) 17 | for namet in textlist: 18 | textnamed=dirname+wordname+"/"+namet 19 | with open(textnamed) as datad: 20 | lend=[[i for i in line.split(' ')][:-1] for line in datad.readlines()] 21 | listlength.append(len(lend[0])) 22 | maxlen=max(listlength) 23 | for file in listfile: 24 | if "_" in file: 25 | continue 26 | wordname=file 27 | textlist=os.listdir(dirname+wordname) 28 | for text in textlist: 29 | textname=dirname+wordname+"/"+text 30 | with open(textname) as data: 31 | numbers = [[i for i in line.split(' ')][:-1] for line in data.readlines()] 32 | #print(len(numbers[0])) 33 | for i in range(len(numbers[0]),maxlen): 34 | numbers[0].extend([0.000]) 35 | numbers.append(wordname) 36 | #print(numbers[0][8735]) 37 | predict.append(numbers) 38 | random.shuffle(predict) 39 | return predict 40 | 41 | 42 | def main(inputfile_path,output_path): 43 | ret=search(inputfile_path) 44 | np.shape(ret) 45 | out_file=output_path+'test_data.pkl' 46 | with open(out_file,'wb') as fout: 47 | pickle.dump(ret,fout) 48 | 49 | if __name__=="__main__": 50 | parser = argparse.ArgumentParser(description='make pkl_file') 51 | parser.add_argument("--input_file_path",help=" ") 52 | parser.add_argument("--output_path",help=" ") 53 | args=parser.parse_args() 54 | input_file_path=args.input_file_path 55 | output_path=args.output_path 56 | main(input_file_path,output_path) 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /util/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | def main(input_data_path,output_data_path): 6 | comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ 7 | mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu' 8 | #명령어 컴파일 9 | cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt' 10 | #미디어 파이프 명령어 저장 11 | listfile=os.listdir(input_data_path) 12 | for file in listfile: 13 | #해당 디렉토리의 하위 디렉토리 폴더명을 찾음 14 | if not(os.path.isdir(input_data_path+file)): #ignore .DS_Store 15 | continue 16 | word=file+'/' 17 | fullfilename=os.listdir(input_data_path+word) 18 | # 하위디렉토리의 모든 비디오들의 이름을 저장 19 | if not(os.path.isdir(output_data_path+"_"+word)): 20 | os.mkdir(output_data_path+"_"+word) 21 | if not(os.path.isdir(output_data_path+word)): 22 | os.mkdir(output_data_path+word) 23 | os.system(comp) 24 | outputfilelist=os.listdir(output_data_path+'_'+word)#이미 작업된 파일은 작업을 하지 않도록 하기 위한 리스트 25 | for mp4list in fullfilename: 26 | if mp4list in outputfilelist: 27 | continue 28 | if ".DS_Store" in mp4list: #ignore .DS_Store 29 | continue 30 | inputfilen=' --input_video_path='+input_data_path+word+mp4list 31 | outputfilen=' --output_video_path='+output_data_path+'_'+word+mp4list 32 | cmdret=cmd+inputfilen+outputfilen 33 | os.system(cmdret) 34 | 35 | if __name__ == "__main__": 36 | parser = argparse.ArgumentParser(description='operating Mediapipe') 37 | parser.add_argument("--input_data_path",help=" ") 38 | parser.add_argument("--output_data_path",help=" ") 39 | args=parser.parse_args() 40 | input_data_path=args.input_data_path 41 | output_data_path=args.output_data_path 42 | #print(input_data_path) 43 | main(input_data_path,output_data_path) 44 | -------------------------------------------------------------------------------- /util/analysis.py: -------------------------------------------------------------------------------- 1 | import xlsxwriter 2 | import os 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | from collections import Counter 7 | import matplotlib.pylab as plt 8 | 9 | def convert_tuple(value): 10 | if not isinstance(value, tuple): 11 | return value 12 | 13 | return str(value) 14 | 15 | def make_xlxs(input_file_path, worksheet_name): 16 | with open(input_file_path, mode = 'r') as t: 17 | text = list(t) 18 | string = " ".join(text) 19 | landmarks = string.split(" ") 20 | 21 | workbook = xlsxwriter.Workbook(worksheet_name) 22 | worksheet = workbook.add_worksheet() 23 | 24 | row=0 25 | col=0 26 | 27 | for landmark in landmarks: 28 | landmark_frame = map(convert_tuple, landmarks[row*42:(row*42)+42]) 29 | print(landmark_frame) 30 | print("\n") 31 | worksheet.write_row(row,col, landmark_frame) 32 | row += 1 33 | 34 | workbook.close() 35 | print(workbook) 36 | 37 | def return_frame(dirname): 38 | frames=[] #list to save frame numbers in txt files 39 | listfile=os.listdir(dirname) 40 | for file in listfile: 41 | if "_" in file: #ignore mp4 files 42 | continue 43 | wordname=file 44 | textlist=os.listdir(dirname+wordname) 45 | for text in textlist: 46 | if "DS_" in text: 47 | continue 48 | textname=dirname+wordname+"/"+text 49 | with open(textname, mode = 'r') as t: #open txt files 50 | numbers = np.array([float(num) for num in t.read().split()]) 51 | #print(len(numbers)/42) 52 | frames.append(int(len(numbers)/42)) 53 | #print(frames) 54 | #for frame in frames: 55 | # unique_num = list(pd.unique(frames)) 56 | #l = unique_num 57 | #print(l) 58 | count = Counter(frames) 59 | plt.bar(count.keys(), count.values()) 60 | plt.xlabel('frame number') 61 | plt.ylabel('count') 62 | plt.title('Histogram') 63 | plt.grid(True) 64 | plt.savefig('hist.png', dpi=300) 65 | 66 | 67 | 68 | 69 | def main(): 70 | #make_xlxs("/Users/anna/SLR/sentenceOutput/Sentence/bird-like-apple.txt", 'bird-like-apple.xlsx') 71 | return_frame("/Users/anna/SLR/twenty/traindata/") 72 | 73 | if __name__=="__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /common/load_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | 16 | def load_data(dirname): 17 | listfile=os.listdir(dirname) 18 | X = [] 19 | Y = [] 20 | for file in listfile: 21 | if "_" in file: #text파일이 들어있는 directory만 설정 22 | continue 23 | wordname=file 24 | if not(os.path.isdir(dirname+file)): #ignore .DS_Store 25 | continue 26 | textlist=os.listdir(dirname+wordname) #Word의 하위 파일들의 리스트(text파일들) 27 | for text in textlist: 28 | textname=dirname+wordname+"/"+text 29 | numbers=[] 30 | #print(textname) 31 | with open(textname, mode = 'r') as t: 32 | numbers = [float(num) for num in t.read().split()]#텍스트파일 읽기 33 | #print(len(numbers[0])) 34 | for i in range(len(numbers),10920): #단어의 동영상을 260 frame을 최대라고 가정하여 0으로 채우기 35 | numbers.extend([0.000]) #260 frame 고정 36 | #numbers=np.array(numbers) 37 | #print(numbers[0]) 38 | #numbers=np.array(numbers) 39 | #print(numbers) 40 | row=0 41 | landmark_frame=[] 42 | for i in range(0,len(numbers)): #42개 씩 끊어서 읽기 43 | #print(numbers[row*42:(row*42)+41]) 44 | landmark_frame.extend(numbers[row:row+42]) 45 | row += 42 46 | landmark_frame=np.array(landmark_frame) 47 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 48 | #print(landmark_frame.shape) 49 | X.append(np.array(landmark_frame)) 50 | Y.append(wordname) 51 | X=np.array(X) 52 | Y=np.array(Y) 53 | 54 | t = Tokenizer() #Y를 원핫 벡터로 바꾸기 55 | t.fit_on_texts(Y) 56 | encoded=t.texts_to_sequences(Y) 57 | one_hot=to_categorical(encoded) 58 | 59 | (x_train, y_train) = X, one_hot 60 | #print(x_train[0]) 61 | return x_train,y_train #학습데이터 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /model/LSTM.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | import pickle 6 | import numpy as np 7 | from tensorflow.keras.preprocessing.text import Tokenizer 8 | from tensorflow.keras.utils import to_categorical 9 | 10 | 11 | class Data: 12 | def __init__(self,pklname): 13 | 14 | X = [] 15 | Y = [] 16 | maxlength=0 17 | with open(pklname, 'rb') as fin: 18 | frames = pickle.load(fin) 19 | for i, frame in enumerate(frames): 20 | features = frame[0] 21 | maxlength=len(features) 22 | word = frame[1] 23 | 24 | X.append(np.array(features)) 25 | Y.append(word) 26 | X = np.array(X) 27 | Y = np.array(Y) 28 | 29 | t = Tokenizer() 30 | t.fit_on_texts(Y) 31 | #print(t.word_index) 32 | #Y = to_categorical(Y,len(t.word_index)) 33 | encoded=t.texts_to_sequences(Y) 34 | #print(encoded) 35 | one_hot=to_categorical(encoded) 36 | #print(one_hot) 37 | 38 | 39 | (x_train, y_train) = X, one_hot 40 | 41 | self.x_train, self.y_train = x_train, y_train 42 | self.x_test, self.y_test = x_train, y_train 43 | self.length=maxlength 44 | 45 | class RNN_LSTM(models.Model): 46 | def __init__(self,maxlen): 47 | x = layers.Input((maxlen,)) 48 | h = layers.Embedding(maxlen, 128)(x) 49 | h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h) 50 | y = layers.Dense(9, activation='softmax')(h) 51 | super().__init__(x, y) 52 | 53 | # try using different optimizers and different optimizer configs 54 | self.compile(loss='binary_crossentropy', 55 | optimizer='adam', metrics=['accuracy']) 56 | 57 | 58 | class Machine: 59 | def __init__(self,pklname): 60 | self.data = Data(pklname) 61 | self.model = RNN_LSTM(self.data.length) 62 | 63 | def run(self, epochs=3, batch_size=32): 64 | data = self.data 65 | model = self.model 66 | print('Training stage') 67 | print('==============') 68 | model.fit(data.x_train, data.y_train, 69 | batch_size=batch_size, 70 | epochs=epochs, 71 | validation_data=(data.x_test, data.y_test)) 72 | 73 | score, acc = model.evaluate(data.x_test, data.y_test, 74 | batch_size=batch_size) 75 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 76 | 77 | 78 | def main(pklname): 79 | m = Machine(pklname) 80 | m.run() 81 | 82 | 83 | if __name__ == '__main__': 84 | parser = argparse.ArgumentParser(description='run Model') 85 | parser.add_argument("--pkl_data_path",help=" ") 86 | args=parser.parse_args() 87 | pkl_data_path=args.pkl_data_path 88 | main(pkl_data_path) 89 | 90 | -------------------------------------------------------------------------------- /model/LSTM_separate.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | 8 | import pickle 9 | import numpy as np 10 | from tensorflow.keras.preprocessing.text import Tokenizer 11 | from tensorflow.keras.utils import to_categorical 12 | 13 | 14 | class Data: 15 | def __init__(self): 16 | 17 | X = []#학습데이터 18 | Y = []#학습데이터 19 | XT= []#평가데이터 20 | YT= []#평가데이터 21 | maxlength=0 22 | with open("/Users/jongwook/Desktop/output.pkl", 'rb') as fin: 23 | frames = pickle.load(fin) 24 | for i, frame in enumerate(frames): 25 | features = frame[0] 26 | maxlength=len(features) 27 | word = frame[1] 28 | if i%3 != 0: 29 | X.append(np.array(features)) 30 | Y.append(word) 31 | else: 32 | XT.append(np.array(features)) 33 | YT.append(word) 34 | X = np.array(X) 35 | Y = np.array(Y) 36 | XT= np.array(XT) 37 | YT=np.array(YT) 38 | 39 | t = Tokenizer() 40 | t.fit_on_texts(Y) 41 | 42 | encoded=t.texts_to_sequences(Y) 43 | one_hot=to_categorical(encoded) 44 | t1 = Tokenizer() 45 | t1.fit_on_texts(YT) 46 | 47 | encoded1=t1.texts_to_sequences(YT) 48 | one_hot1=to_categorical(encoded1) 49 | 50 | (x_train, y_train) = X, one_hot 51 | (x_test, y_test) = XT, one_hot1 52 | 53 | self.x_train, self.y_train = x_train, y_train 54 | self.x_test, self.y_test = x_test, y_test 55 | self.maxlen=maxlength 56 | 57 | class RNN_LSTM(models.Model): 58 | def __init__(self,maxlen): 59 | x = layers.Input((maxlen,)) 60 | h = layers.Embedding(maxlen, 256)(x) 61 | h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h) 62 | y = layers.Dense(18, activation='softmax')(h) 63 | super().__init__(x, y) 64 | 65 | # try using different optimizers and different optimizer configs 66 | self.compile(loss='binary_crossentropy', 67 | optimizer='adam', metrics=['accuracy']) 68 | 69 | 70 | class Machine: 71 | def __init__(self): 72 | self.data = Data() 73 | self.model = RNN_LSTM(self.data.maxlen) 74 | 75 | def run(self, epochs=3, batch_size=32): 76 | data = self.data 77 | model = self.model 78 | print('Training stage') 79 | print('==============') 80 | history=model.fit(data.x_train, data.y_train, 81 | batch_size=batch_size, 82 | epochs=epochs, 83 | validation_data=(data.x_test, data.y_test)) 84 | 85 | score, acc = model.evaluate(data.x_test, data.y_test, 86 | batch_size=batch_size) 87 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 88 | 89 | def main(): 90 | m = Machine()] 91 | m.run() 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sign language recognition with RNN and Mediapipe 2 | Sign language gesture recognition using a reccurent neural network(RNN) with Mediapipe hand tracking. 3 | 4 | This project is for academic purpose. Thank you for Google's Mediapipe team :) 5 | 6 | ## Data Preprocessing with hand tracking(Desktop) 7 | Create training data on Desktop with input video using [Hand Tracking](https://github.com/google/mediapipe/blob/master/mediapipe/docs/hand_tracking_mobile_gpu.md). 8 | Gesture recognition with deep learning model can be done with only **42 hand landmarks** RNN training per frame. 9 | 10 | **CUSTOMIZE:** 11 | - Use video input instead of Webcam on Desktop to train with video data 12 | - Extract hand landmarks for every frame per one word and make it into one txt file 13 | 14 | ### 1. Set up Hand Tracking framework 15 | * Install Medapipe 16 | ```shell 17 | git clone https://github.com/google/mediapipe.git 18 | ``` 19 | See the rest of installation documents [here](https://mediapipe.readthedocs.io/en/latest/install.html). 20 | * Change **tflite_tensors_to_landmarks_caculator.cc** file 21 | ```shell 22 | cd mediapipe/mediapipe/caculators/tflite 23 | rm tflite_tensors_to_landmarks_caculator.cc 24 | ``` 25 | to our new tflite_tensors_to_landmarks_caculator.cc file in the modified_mediapipe folder. 26 | 27 | * Change **demo_run_graph_main.cc** file 28 | ```shell 29 | cd mediapipe/mediapipe/examples/desktop 30 | rm demo_run_graph_main.cc 31 | ``` 32 | to our new demo_run_graph_main.cc file in the modified_mediapipe folder. 33 | 34 | ### 2. Create you own training data 35 | Make **train_videos** and **test_videos** for each sign language word in one folder. Copy **build.by** file in util folder to your mediapipe directory. (Currently there may be a TabError. Please chang the tab manually.) 36 | * Usage 37 | 38 | To make mp4 file and txt file with mediapipe automatically, run 39 | ```shell 40 | python build.py --input_data_path=[INPUT_PATH] --output_data_path=[OUTPUT_PATH] 41 | ``` 42 | inside mediapipe directory. 43 | 44 | Change INPUT_PATH, OUTPUT_PATH to your own folder directory path. INPUT_PATH is path to your input videos. OUTPUT_PATH is where all the hand-tracked mp4 files and txt files of 42 landmarks will be saved. 45 | 46 | For example: 47 | ```shell 48 | input_videos 49 | ├── Apple 50 | │   ├── IMG_2733.MOV 51 | │   ├── IMG_2734.MOV 52 | │   ├── IMG_2735.MOV 53 | │   └── IMG_2736.MOV 54 | ├── Bird 55 | │   ├── IMG_2631.MOV 56 | │   ├── IMG_2632.MOV 57 | │   ├── IMG_2633.MOV 58 | │   └── IMG_2634.MOV 59 | └── Sorry 60 | ├── IMG_2472.MOV 61 | ├── IMG_2473.MOV 62 | ├── IMG_2474.MOV 63 | └── IMG_2475.MOV 64 | ... 65 | ``` 66 | OUTPUT_PATH is initially empty directory and when build is done, Mp4 and txt files will be extracted to your own folder path. 67 | 68 | Created folder example: 69 | ```shell 70 | output_data 71 | ├── _Apple 72 | │   ├── IMG_2733.mp4 73 | │   ├── IMG_2734.mp4 74 | │   ├── IMG_2735.mp4 75 | │   └── IMG_2736.mp4 76 | └── Apple 77 | ├── IMG_2472.txt 78 | ├── IMG_2473.txt 79 | ├── IMG_2474.txt 80 | └── IMG_2475.txt 81 | ... 82 | ``` 83 | (DO NOT use space bar or '_' to your folder path and video name ex) Apple_pie (X)) 84 | 85 | 86 | ### 3. Train RNN model 87 | 88 | * Train 89 | ```shell 90 | python LSTM.py --input_file=[PKL_FILE] 91 | ``` 92 | Add path to preprocessed pkl file into PKL_FILE. 93 | 94 | Watch [this video](https://www.youtube.com/watch?v=5epWNiv5EKk&t=77s) for the overall workflow. 95 | [more details](https://www.slideshare.net/JiHyunKim204) 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /model/predict_three_please.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | 20 | def load_data(dirname): 21 | listfile=os.listdir(dirname) 22 | X = [] 23 | Y = [] 24 | for file in listfile: 25 | if "_" in file: 26 | continue 27 | wordname=file 28 | textlist=os.listdir(dirname+wordname) 29 | for text in textlist: 30 | if "DS_" in text: 31 | continue 32 | textname=dirname+wordname+"/"+text 33 | numbers=[] 34 | #print(textname) 35 | with open(textname, mode = 'r') as t: 36 | numbers = [float(num) for num in t.read().split()] 37 | #print(len(numbers[0])) 38 | for i in range(len(numbers),12600): 39 | numbers.extend([0.000]) #300 frame 고정 40 | #numbers=np.array(numbers) 41 | #print(numbers[0]) 42 | #numbers=np.array(numbers) 43 | #print(numbers) 44 | row=42*8#앞의 8프레임 제거 45 | landmark_frame=[] 46 | for i in range(0,150):#뒤의 142프레임제거==> 총 150프레임으로 고정 47 | #print(numbers[row*42:(row*42)+41]) 48 | landmark_frame.extend(numbers[row:row+42]) 49 | row += 42 50 | landmark_frame=np.array(landmark_frame) 51 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 52 | #print(landmark_frame.shape) 53 | X.append(np.array(landmark_frame)) 54 | Y.append(wordname) 55 | X=np.array(X) 56 | Y=np.array(Y) 57 | print(X.shape) 58 | t = Tokenizer() 59 | t.fit_on_texts(Y) 60 | encoded=t.texts_to_sequences(Y) 61 | one_hot=to_categorical(encoded) 62 | 63 | (x_train, y_train) = X, one_hot 64 | tmp = [[x,y] for x, y in zip(x_train, y_train)] 65 | x_train = [n[0] for n in tmp] 66 | y_train = [n[1] for n in tmp] 67 | #print(x_train[0]) 68 | x_train=np.array(x_train) 69 | y_train=np.array(y_train) 70 | return x_train,y_train 71 | 72 | 73 | #prediction 74 | def load_label(dirname): 75 | label = {} 76 | count = 1 77 | listfile=os.listdir(dirname) 78 | for l in listfile: 79 | if "_" in l: 80 | continue 81 | label[l] = count 82 | count += 1 83 | return label 84 | 85 | 86 | 87 | x_test,y_test=load_data("/Users/anna/SLR/OnlyThree/outputdata/") 88 | new_model = tf.keras.models.load_model("/Users/anna/SLR/prj/simpleRNN2.h5") 89 | new_model.summary() 90 | 91 | labels=load_label("/Users/anna/SLR/OnlyThree/outputdata/") 92 | 93 | #모델 사용 94 | 95 | xhat = x_test 96 | yhat = new_model.predict(xhat) 97 | print('## yhat ##') 98 | 99 | 100 | 101 | #prediction 102 | predictions = np.array([np.argmax(pred) for pred in yhat]) 103 | Y = np.array([np.argmax(each) for each in y_test]) 104 | print(predictions) 105 | rev_labels = dict(zip(list(labels.values()), list(labels.keys()))) 106 | print(rev_labels) 107 | with open("result.txt", "w") as f: 108 | f.write("gold, pred\n") 109 | for a, b in zip(Y, predictions): 110 | f.write("%s %s\n" % (rev_labels[a], rev_labels[b])) 111 | 112 | acc = 100 * np.sum(predictions == Y) / len(Y) 113 | print("Accuracy: ", acc) 114 | -------------------------------------------------------------------------------- /model/predict_onehot.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | 20 | def load_data(dirname): 21 | listfile=os.listdir(dirname) 22 | X = [] 23 | Y = [] 24 | for file in listfile: 25 | if "_" in file: 26 | continue 27 | wordname=file 28 | textlist=os.listdir(dirname+wordname) 29 | for text in textlist: 30 | if "DS_" in text: 31 | continue 32 | textname=dirname+wordname+"/"+text 33 | numbers=[] 34 | #print(textname) 35 | with open(textname, mode = 'r') as t: 36 | numbers = [float(num) for num in t.read().split()] 37 | #print(len(numbers[0])) 38 | for i in range(len(numbers),12600): 39 | numbers.extend([0.000]) #300 frame 고정 40 | #numbers=np.array(numbers) 41 | #print(numbers[0]) 42 | #numbers=np.array(numbers) 43 | #print(numbers) 44 | row=42*8#앞의 8프레임 제거 45 | landmark_frame=[] 46 | for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정 47 | #print(numbers[row*42:(row*42)+41]) 48 | landmark_frame.extend(numbers[row:row+42]) 49 | row += 42 50 | landmark_frame=np.array(landmark_frame) 51 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 52 | #print(landmark_frame.shape) 53 | X.append(np.array(landmark_frame)) 54 | Y.append(wordname) 55 | X=np.array(X) 56 | Y=np.array(Y) 57 | tmp = [[x,y] for x, y in zip(X,Y)] 58 | #random.shuffle(tmp) 59 | X = [n[0] for n in tmp] 60 | Y = [n[1] for n in tmp] 61 | #print(Y) 62 | #print(X.shape) 63 | #t = Tokenizer() 64 | #t.fit_on_texts(Y) 65 | #encoded=t.texts_to_sequences(Y) 66 | text="Apple Bird Sorry" 67 | t = Tokenizer() 68 | t.fit_on_texts([text]) 69 | print(t.word_index) 70 | #one_hot=to_categorical(encoded) 71 | encoded=t.texts_to_sequences([Y])[0] 72 | print(encoded) 73 | one_hot = to_categorical(encoded) 74 | 75 | 76 | (x_train, y_train) = X, one_hot 77 | #print(x_train[0]) 78 | x_train=np.array(x_train) 79 | y_train=np.array(y_train) 80 | return x_train,y_train 81 | 82 | 83 | #prediction 84 | def load_label(dirname): 85 | label = {} 86 | count = 1 87 | listfile=os.listdir(dirname) 88 | for l in listfile: 89 | if "_" in l: 90 | continue 91 | label[l] = count 92 | count += 1 93 | return label 94 | 95 | 96 | 97 | x_test,y_test=load_data("/Users/jongwook/Desktop/test1/testdata/") 98 | new_model = tf.keras.models.load_model('simpleRNN.h5') 99 | new_model.summary() 100 | 101 | labels=load_label("/Users/jongwook/Desktop/test1/testdata/") 102 | 103 | #모델 사용 104 | 105 | xhat = x_test 106 | yhat = new_model.predict(xhat) 107 | print('## yhat ##') 108 | 109 | 110 | 111 | #prediction 112 | predictions = np.array([np.argmax(pred) for pred in yhat]) 113 | Y = np.array([np.argmax(each) for each in y_test]) 114 | #print(predictions) 115 | rev_labels = dict(zip(list(labels.values()), list(labels.keys()))) 116 | print(rev_labels) 117 | print(predictions) 118 | with open("result.txt", "w") as f: 119 | f.write("gold, pred\n") 120 | for a, b in zip(Y, predictions): 121 | f.write("%s %s\n" % (rev_labels[a], rev_labels[b])) 122 | 123 | acc = 100 * np.sum(predictions == Y) / len(Y) 124 | print("Accuracy: ", acc) 125 | -------------------------------------------------------------------------------- /model/20_test_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | 20 | def load_data(dirname): 21 | listfile=os.listdir(dirname) 22 | X = [] 23 | Y = [] 24 | for file in listfile: 25 | if "_" in file: 26 | continue 27 | wordname=file 28 | textlist=os.listdir(dirname+wordname) 29 | for text in textlist: 30 | if "DS_" in text: 31 | continue 32 | textname=dirname+wordname+"/"+text 33 | numbers=[] 34 | #print(textname) 35 | with open(textname, mode = 'r') as t: 36 | numbers = [float(num) for num in t.read().split()] 37 | #print(len(numbers[0])) 38 | for i in range(len(numbers),12600): 39 | numbers.extend([0.000]) #300 frame 고정 40 | #numbers=np.array(numbers) 41 | #print(numbers[0]) 42 | #numbers=np.array(numbers) 43 | #print(numbers) 44 | row=42*8#앞의 8프레임 제거 45 | landmark_frame=[] 46 | for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정 47 | #print(numbers[row*42:(row*42)+41]) 48 | landmark_frame.extend(numbers[row:row+42]) 49 | row += 42 50 | landmark_frame=np.array(landmark_frame) 51 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 52 | #print(landmark_frame.shape) 53 | X.append(np.array(landmark_frame)) 54 | Y.append(wordname) 55 | X=np.array(X) 56 | Y=np.array(Y) 57 | tmp = [[x,y] for x, y in zip(X,Y)] 58 | random.shuffle(tmp) 59 | X = [n[0] for n in tmp] 60 | Y = [n[1] for n in tmp] 61 | #print(Y) 62 | #print(X.shape) 63 | #t = Tokenizer() 64 | #t.fit_on_texts(Y) 65 | #encoded=t.texts_to_sequences(Y) 66 | text="Apple Bird Blue Cents Child Cow Drink Green Hello Like Metoo No Orange Pig Sorry Thankyou Where Who Yes You" 67 | 68 | t = Tokenizer() 69 | t.fit_on_texts([text]) 70 | print(t.word_index) 71 | #one_hot=to_categorical(encoded) 72 | encoded=t.texts_to_sequences([Y])[0] 73 | print(encoded) 74 | one_hot = to_categorical(encoded) 75 | 76 | 77 | (x_train, y_train) = X, one_hot 78 | #print(x_train[0]) 79 | x_train=np.array(x_train) 80 | y_train=np.array(y_train) 81 | return x_train,y_train 82 | 83 | 84 | #prediction 85 | def load_label(): 86 | label = {} 87 | count = 1 88 | listfile=['Apple','Bird','Blue','Cents','Child','Cow','Drink','Green','Hello','Like','Metoo','No', 89 | 'Orange','Pig','Sorry','Thankyou','Where','Who','Yes','You'] 90 | for l in listfile: 91 | if "_" in l: 92 | continue 93 | label[l] = count 94 | count += 1 95 | return label 96 | 97 | 98 | 99 | x_test,y_test=load_data("/Users/jongwook/Desktop/testdata/") 100 | new_model = tf.keras.models.load_model('simpleRNN.h5') 101 | new_model.summary() 102 | 103 | labels=load_label() 104 | 105 | #모델 사용 106 | 107 | xhat = x_test 108 | #print() 109 | #xhat=xhat[55:56] 110 | yhat = new_model.predict(xhat) 111 | print('## yhat ##') 112 | 113 | predictions = np.array([np.argmax(pred) for pred in yhat]) 114 | Y=np.array([np.argmax(i) for i in y_test]) 115 | print(Y) 116 | rev_labels = dict(zip(list(labels.values()), list(labels.keys()))) 117 | #print(rev_labels[predictions[0]]) 118 | 119 | #print(rev_labels) 120 | #print(predictions) 121 | with open("result.txt", "w") as f: 122 | f.write("gold, pred\n") 123 | for a, b in zip(Y, predictions): 124 | f.write("%s %s\n" % (rev_labels[a], rev_labels[b])) 125 | 126 | acc = 100 * np.sum(predictions == Y) / len(Y) 127 | 128 | print("Accuracy: ", acc) 129 | -------------------------------------------------------------------------------- /util/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | import argparse 20 | 21 | def load_data(dirname): 22 | listfile=os.listdir(dirname) 23 | X = [] 24 | Y = [] 25 | for file in listfile: 26 | if "_" in file: 27 | continue 28 | wordname=file 29 | textlist=os.listdir(dirname+wordname) 30 | for text in textlist: 31 | if "DS_" in text: 32 | continue 33 | textname=dirname+wordname+"/"+text 34 | numbers=[] 35 | #print(textname) 36 | with open(textname, mode = 'r') as t: 37 | numbers = [float(num) for num in t.read().split()] 38 | #print(len(numbers[0])) 39 | for i in range(len(numbers),12600): 40 | numbers.extend([0.000]) #300 frame 고정 41 | #numbers=np.array(numbers) 42 | #print(numbers[0]) 43 | #numbers=np.array(numbers) 44 | #print(numbers) 45 | row=42*8#앞의 8프레임 제거 46 | landmark_frame=[] 47 | for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정 48 | #print(numbers[row*42:(row*42)+41]) 49 | landmark_frame.extend(numbers[row:row+42]) 50 | row += 42 51 | landmark_frame=np.array(landmark_frame) 52 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 53 | #print(landmark_frame.shape) 54 | X.append(np.array(landmark_frame)) 55 | Y.append(wordname) 56 | X=np.array(X) 57 | Y=np.array(Y) 58 | tmp = [[x,y] for x, y in zip(X,Y)] 59 | #random.shuffle(tmp) 60 | X = [n[0] for n in tmp] 61 | Y = [n[1] for n in tmp] 62 | #print(Y) 63 | #print(X.shape) 64 | #t = Tokenizer() 65 | #t.fit_on_texts(Y) 66 | #encoded=t.texts_to_sequences(Y) 67 | 68 | x_train = X 69 | #print(x_train[0]) 70 | x_train=np.array(x_train) 71 | return x_train 72 | 73 | 74 | #prediction 75 | def load_label(): 76 | label = {} 77 | count = 1 78 | listfile=['Apple','Bird','Sorry'] 79 | for l in listfile: 80 | if "_" in l: 81 | continue 82 | label[l] = count 83 | count += 1 84 | return label 85 | 86 | def main(input_data_path,output_data_path): 87 | comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ 88 | mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu' 89 | #명령어 컴파일 90 | cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt' 91 | #미디어 파이프 명령어 저장 92 | listfile=os.listdir(input_data_path) 93 | output_dir="" 94 | for file in listfile: 95 | if ".DS_" in file: 96 | continue 97 | word=file+'/' 98 | fullfilename=os.listdir(input_data_path+word) 99 | # 하위디렉토리의 모든 비디오들의 이름을 저장 100 | if not(os.path.isdir(output_data_path+"_"+word)): 101 | os.mkdir(output_data_path+"_"+word) 102 | if not(os.path.isdir(output_data_path+word)): 103 | os.mkdir(output_data_path+word) 104 | os.system(comp) 105 | outputfilelist=os.listdir(output_data_path+'_'+word) 106 | for mp4list in fullfilename: 107 | if ".DS_Store" in mp4list: 108 | continue 109 | inputfilen=' --input_video_path='+input_data_path+word+mp4list 110 | outputfilen=' --output_video_path='+output_data_path+'_'+word+mp4list 111 | cmdret=cmd+inputfilen+outputfilen 112 | os.system(cmdret) 113 | 114 | #mediapipe동작 작동 종료: 115 | output_dir=output_data_path 116 | x_test=load_data(output_dir) 117 | new_model = tf.keras.models.load_model('simpleRNN.h5') 118 | new_model.summary() 119 | 120 | labels=load_label() 121 | 122 | #모델 사용 123 | 124 | xhat = x_test 125 | #print() 126 | #xhat=xhat[55:56] 127 | yhat = new_model.predict(xhat) 128 | #print('## yhat ##') 129 | 130 | predictions = np.array([np.argmax(pred) for pred in yhat]) 131 | rev_labels = dict(zip(list(labels.values()), list(labels.keys()))) 132 | print("----------result------------\n") 133 | for i in predictions: 134 | print(rev_labels[i]) 135 | print('\n') 136 | print("------------end-------------\n") 137 | 138 | if __name__ == "__main__": 139 | parser = argparse.ArgumentParser(description='operating Mediapipe') 140 | parser.add_argument("--input_data_path",help=" ") 141 | parser.add_argument("--output_data_path",help=" ") 142 | args=parser.parse_args() 143 | input_data_path=args.input_data_path 144 | output_data_path=args.output_data_path 145 | #print(input_data_path) 146 | main(input_data_path,output_data_path) 147 | -------------------------------------------------------------------------------- /model/save_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | 18 | def load_data(dirname): 19 | listfile=os.listdir(dirname) 20 | X = [] 21 | Y = [] 22 | for file in listfile: 23 | if "_" in file: 24 | continue 25 | wordname=file 26 | textlist=os.listdir(dirname+wordname) 27 | for text in textlist: 28 | if "DS_" in text: 29 | continue 30 | textname=dirname+wordname+"/"+text 31 | numbers=[] 32 | #print(textname) 33 | with open(textname, mode = 'r') as t: 34 | numbers = [float(num) for num in t.read().split()] 35 | #print(len(numbers[0])) 36 | for i in range(len(numbers),10920): 37 | numbers.extend([0.000]) #260 frame 고정 38 | #numbers=np.array(numbers) 39 | #print(numbers[0]) 40 | #numbers=np.array(numbers) 41 | #print(numbers) 42 | row=0 43 | landmark_frame=[] 44 | for i in range(0,len(numbers)): 45 | #print(numbers[row*42:(row*42)+41]) 46 | landmark_frame.extend(numbers[row:row+42]) 47 | row += 42 48 | landmark_frame=np.array(landmark_frame) 49 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 50 | #print(landmark_frame.shape) 51 | X.append(np.array(landmark_frame)) 52 | Y.append(wordname) 53 | X=np.array(X) 54 | Y=np.array(Y) 55 | 56 | t = Tokenizer() 57 | t.fit_on_texts(Y) 58 | encoded=t.texts_to_sequences(Y) 59 | one_hot=to_categorical(encoded) 60 | 61 | (x_train, y_train) = X, one_hot 62 | #print(x_train[0]) 63 | return x_train,y_train 64 | 65 | 66 | def simple_rnn(): 67 | model = Sequential() 68 | model.add(SimpleRNN(units=64, input_shape=(260, 42))) 69 | model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지 70 | model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지 71 | model.add(Dense(21)) 72 | model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 73 | return model 74 | 75 | 76 | def rnn_lstm(): 77 | model = Sequential() 78 | model.add(layers.LSTM(64,return_sequences=True,input_shape=(260,42))) # returns a sequence of vectors of dimension 32 79 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 80 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 81 | model.add(layers.Dense(21, activation='softmax')) 82 | model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 83 | return model 84 | 85 | def bidirectional_lstm(): 86 | model = Sequential() 87 | model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(260, 42))) 88 | model.add(layers.Bidirectional(layers.LSTM(32))) 89 | model.add(layers.Dense(21, activation='softmax')) 90 | model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 91 | return model 92 | 93 | 94 | def main(dirname): 95 | (x_train,y_train)=load_data(dirname) 96 | num_val_samples=(x_train.shape[0])//5 97 | # print(num_val_samples) 98 | num_epochs=5 99 | all_scores=[] 100 | 101 | for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 102 | print('처리중인 폴드 #',i) 103 | val_data=x_train[i*num_val_samples:(i+1)*num_val_samples] 104 | val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples] 105 | partial_train_data=np.concatenate([x_train[:i*num_val_samples], 106 | x_train[(i+1)*num_val_samples:]], 107 | axis=0) 108 | partial_train_targets=np.concatenate([y_train[:i*num_val_samples], 109 | y_train[(i+1)*num_val_samples:]], 110 | axis=0) 111 | 112 | model=simple_rnn() 113 | 114 | print('Training stage') 115 | print('==============') 116 | history=model.fit(partial_train_data,partial_train_targets,epochs=3,batch_size=32) 117 | score, acc = model.evaluate(val_data,val_targets,batch_size=256,verbose=0) 118 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 119 | 120 | model.save('simpleRNN.h5') 121 | 122 | #score, acc = model.evaluate(x_test,y_test,batch_size=32) 123 | #print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 124 | 125 | if __name__=='__main__': 126 | main("/Users/anna/SLR/seperate/traininput/") 127 | -------------------------------------------------------------------------------- /model/not_k_fold.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | 18 | 19 | def load_data(dirname): 20 | listfile=os.listdir(dirname) 21 | X = [] 22 | Y = [] 23 | for file in listfile: 24 | if "_" in file: 25 | continue 26 | wordname=file 27 | textlist=os.listdir(dirname+wordname) 28 | for text in textlist: 29 | if "DS_" in text: 30 | continue 31 | textname=dirname+wordname+"/"+text 32 | numbers=[] 33 | #print(textname) 34 | with open(textname, mode = 'r') as t: 35 | numbers = [float(num) for num in t.read().split()] 36 | #print(len(numbers[0])) 37 | for i in range(len(numbers),12600): 38 | numbers.extend([0.000]) #300 frame 고정 39 | #numbers=np.array(numbers) 40 | #print(numbers[0]) 41 | #numbers=np.array(numbers) 42 | #print(numbers) 43 | row=42*8#앞의 8프레임 제거 44 | landmark_frame=[] 45 | for i in range(0,150):#뒤의 142프레임제거==> 총 150프레임으로 고정 46 | #print(numbers[row*42:(row*42)+41]) 47 | landmark_frame.extend(numbers[row:row+42]) 48 | row += 42 49 | landmark_frame=np.array(landmark_frame) 50 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 51 | #print(landmark_frame.shape) 52 | X.append(np.array(landmark_frame)) 53 | Y.append(wordname) 54 | X=np.array(X) 55 | Y=np.array(Y) 56 | print(X.shape) 57 | t = Tokenizer() 58 | t.fit_on_texts(Y) 59 | encoded=t.texts_to_sequences(Y) 60 | one_hot=to_categorical(encoded) 61 | 62 | (x_train, y_train) = X, one_hot 63 | tmp = [[x,y] for x, y in zip(x_train, y_train)] 64 | x_train = [n[0] for n in tmp] 65 | y_train = [n[1] for n in tmp] 66 | #print(x_train[0]) 67 | x_train=np.array(x_train) 68 | y_train=np.array(y_train) 69 | return x_train[0:450],y_train[0:450],x_train[451:-1],y_train[451:-1] 70 | 71 | 72 | def simple_rnn(): 73 | model = Sequential() 74 | model.add(SimpleRNN(units=64, input_shape=(150, 42))) 75 | model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지 76 | model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지 77 | model.add(Dense(21)) 78 | model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 79 | return model 80 | 81 | 82 | def rnn_lstm(): 83 | model = Sequential() 84 | model.add(layers.LSTM(64,return_sequences=True,input_shape=(150,42))) # returns a sequence of vectors of dimension 32 85 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 86 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 87 | model.add(layers.Dense(21, activation='softmax')) 88 | model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 89 | return model 90 | 91 | def bidirectional_lstm(): 92 | model = Sequential() 93 | model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(150, 42))) 94 | model.add(layers.Bidirectional(layers.LSTM(32))) 95 | model.add(layers.Dense(21, activation='softmax')) 96 | model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 97 | return model 98 | 99 | 100 | def main(dirname): 101 | x_train,y_train,x_test,y_test=load_data(dirname) 102 | #num_val_samples=(x_train.shape[0])//5 103 | # print(num_val_samples) 104 | #num_epochs=5 105 | #all_scores=[] 106 | ''' 107 | for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 108 | print('처리중인 폴드 #',i) 109 | val_data=x_train[i*num_val_samples:(i+1)*num_val_samples] 110 | val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples] 111 | partial_train_data=np.concatenate([x_train[:i*num_val_samples], 112 | x_train[(i+1)*num_val_samples:]], 113 | axis=0) 114 | partial_train_targets=np.concatenate([y_train[:i*num_val_samples], 115 | y_train[(i+1)*num_val_samples:]], 116 | axis=0) 117 | ''' 118 | #labels=load_label(dirname) 119 | model=simple_rnn() 120 | 121 | print('Training stage') 122 | print('==============') 123 | history=model.fit(x_train,y_train,epochs=200,batch_size=32,validation_data=(x_test,y_test)) 124 | score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0) 125 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 126 | 127 | model.save('simpleRNN.h5') 128 | 129 | #score, acc = model.evaluate(x_test,y_test,batch_size=32) 130 | #print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 131 | 132 | if __name__=='__main__': 133 | main("/Users/jongwook/Desktop/traininput/") 134 | -------------------------------------------------------------------------------- /util/build_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | import tensorflow as tf 18 | from numpy import argmax 19 | import argparse 20 | 21 | def load_data(dirname): 22 | listfile=os.listdir(dirname) 23 | X = [] 24 | Y = [] 25 | for file in listfile: 26 | if "_" in file: 27 | continue 28 | wordname=file 29 | textlist=os.listdir(dirname+wordname) 30 | for text in textlist: 31 | if "DS_" in text: 32 | continue 33 | textname=dirname+wordname+"/"+text 34 | numbers=[] 35 | #print(textname) 36 | with open(textname, mode = 'r') as t: 37 | numbers = [float(num) for num in t.read().split()] 38 | #print(len(numbers[0])) 39 | for i in range(len(numbers),12600): 40 | numbers.extend([0.000]) #300 frame 고정 41 | #print(numbers) 42 | row=42*8#앞의 8프레임 제거 43 | landmark_frame=[] 44 | for i in range(0,100):#총 100프레임으로 고정 45 | #print(numbers[row*42:(row*42)+41]) 46 | landmark_frame.extend(numbers[row:row+42]) 47 | row += 42 48 | landmark_frame=np.array(landmark_frame) 49 | landmark_frame=landmark_frame.reshape(-1,42)#2차원으로 변환(260*42) 50 | #print(landmark_frame.shape) 51 | X.append(np.array(landmark_frame)) 52 | Y.append(wordname) 53 | X=np.array(X) 54 | Y=np.array(Y) 55 | #tmp = [[x,y] for x, y in zip(X,Y)] 56 | #random.shuffle(tmp) 57 | #X = [n[0] for n in tmp] 58 | #Y = [n[1] for n in tmp] 59 | #print(Y) 60 | #print(X.shape) 61 | #t = Tokenizer() 62 | #t.fit_on_texts(Y) 63 | #encoded=t.texts_to_sequences(Y) 64 | 65 | x_train = X 66 | #print(x_train[0]) 67 | x_train=np.array(x_train) 68 | return x_train 69 | 70 | 71 | #prediction 72 | def load_label(): 73 | label = {} 74 | count = 1 75 | listfile=['Apple','Bird','Blue','Cents','Child','Cow','Drink','Green','Hello','Like','Metoo','No', 76 | 'Orange','Pig','Sorry','Thankyou','Where','Who','Yes','You'] 77 | for l in listfile: 78 | if "_" in l: 79 | continue 80 | label[l] = count 81 | count += 1 82 | return label 83 | 84 | def main(input_data_path,output_data_path): 85 | comp='bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ 86 | mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu' 87 | #명령어 컴파일 88 | cmd='GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt' 89 | #미디어 파이프 명령어 저장 90 | listfile=os.listdir(input_data_path) 91 | output_dir="" 92 | filel=[] 93 | for file in listfile: 94 | if ".DS_" in file: 95 | continue 96 | word=file+'/' 97 | fullfilename=os.listdir(input_data_path+word) 98 | # 하위디렉토리의 모든 비디오들의 이름을 저장 99 | if not(os.path.isdir(output_data_path+"_"+word)): 100 | os.mkdir(output_data_path+"_"+word) 101 | if not(os.path.isdir(output_data_path+word)): 102 | os.mkdir(output_data_path+word) 103 | os.system(comp) 104 | outputfilelist=os.listdir(output_data_path+'_'+word) 105 | for mp4list in fullfilename: 106 | if ".DS_Store" in mp4list: 107 | continue 108 | filel.append(mp4list) 109 | inputfilen=' --input_video_path='+input_data_path+word+mp4list 110 | outputfilen=' --output_video_path='+output_data_path+'_'+word+mp4list 111 | cmdret=cmd+inputfilen+outputfilen 112 | os.system(cmdret) 113 | 114 | #mediapipe동작 작동 종료: 115 | output_dir=output_data_path 116 | x_test=load_data(output_dir) 117 | new_model = tf.keras.models.load_model('simpleRNN1.h5') 118 | #new_model.summary() 119 | 120 | labels=load_label() 121 | 122 | #모델 사용 123 | 124 | xhat = x_test 125 | #print() 126 | #xhat=xhat[55:56] 127 | yhat = new_model.predict(xhat) 128 | #print('## yhat ##') 129 | 130 | predictions = np.array([np.argmax(pred) for pred in yhat]) 131 | rev_labels = dict(zip(list(labels.values()), list(labels.keys()))) 132 | s=0 133 | filel=np.array(filel) 134 | print(filel) 135 | for i in predictions: 136 | if s==4: 137 | continue 138 | txtpath="/Users/jongwook/Desktop/word"+str(s)+".txt" 139 | with open(txtpath, "w") as f: 140 | f.write(filel[s]) 141 | f.write(" ") 142 | f.write(rev_labels[i]) 143 | s+=1 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser(description='operating Mediapipe') 147 | parser.add_argument("--input_data_path",help=" ") 148 | parser.add_argument("--output_data_path",help=" ") 149 | args=parser.parse_args() 150 | input_data_path=args.input_data_path 151 | output_data_path=args.output_data_path 152 | #print(input_data_path) 153 | main(input_data_path,output_data_path) 154 | -------------------------------------------------------------------------------- /model/training_onehot.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | 18 | 19 | def load_data(dirname): 20 | listfile=os.listdir(dirname) 21 | X = [] 22 | Y = [] 23 | for file in listfile: 24 | if "_" in file: 25 | continue 26 | wordname=file 27 | textlist=os.listdir(dirname+wordname) 28 | for text in textlist: 29 | if "DS_" in text: 30 | continue 31 | textname=dirname+wordname+"/"+text 32 | numbers=[] 33 | #print(textname) 34 | with open(textname, mode = 'r') as t: 35 | numbers = [float(num) for num in t.read().split()] 36 | #print(len(numbers[0])) 37 | for i in range(len(numbers),12600): 38 | numbers.extend([0.000]) #300 frame 고정 39 | #numbers=np.array(numbers) 40 | #print(numbers[0]) 41 | #numbers=np.array(numbers) 42 | #print(numbers) 43 | row=42*8#앞의 8프레임 제거 44 | landmark_frame=[] 45 | for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정 46 | #print(numbers[row*42:(row*42)+41]) 47 | landmark_frame.extend(numbers[row:row+42]) 48 | row += 42 49 | landmark_frame=np.array(landmark_frame) 50 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 51 | #print(landmark_frame.shape) 52 | X.append(np.array(landmark_frame)) 53 | Y.append(wordname) 54 | X=np.array(X) 55 | Y=np.array(Y) 56 | print(X.shape) 57 | tmp = [[x,y] for x, y in zip(X, Y)] 58 | random.shuffle(tmp) 59 | 60 | X = [n[0] for n in tmp] 61 | Y = [n[1] for n in tmp] 62 | ''' 63 | t = Tokenizer() 64 | t.fit_on_texts(Y) 65 | encoded=t.texts_to_sequences(Y) 66 | one_hot=to_categorical(encoded) 67 | ''' 68 | text="Apple Bird Sorry" 69 | t = Tokenizer() 70 | t.fit_on_texts([text]) 71 | print(t.word_index) 72 | #one_hot=to_categorical(encoded) 73 | encoded=t.texts_to_sequences([Y])[0] 74 | print(encoded) 75 | one_hot = to_categorical(encoded) 76 | (x_train, y_train) = X, one_hot 77 | #print(x_train[0]) 78 | 79 | x_train=np.array(x_train) 80 | y_train=np.array(y_train) 81 | a=x_train.shape[0] 82 | a=a//3 83 | return x_train[0:2*a],y_train[0:2*a],x_train[2*a:-1],y_train[2*a:-1] 84 | 85 | 86 | def simple_rnn(): 87 | model = Sequential() 88 | model.add(SimpleRNN(units=64, input_shape=(200, 42))) 89 | model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지 90 | model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지 91 | model.add(Dense(21)) 92 | model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) 93 | return model 94 | 95 | 96 | def rnn_lstm(): 97 | model = Sequential() 98 | model.add(layers.LSTM(64,return_sequences=True,input_shape=(100,42))) # returns a sequence of vectors of dimension 32 99 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 100 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 101 | model.add(layers.Dense(3, activation='softmax')) 102 | model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) 103 | return model 104 | 105 | def bidirectional_lstm(): 106 | model = Sequential() 107 | model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(100, 42))) 108 | model.add(layers.Bidirectional(layers.LSTM(32))) 109 | model.add(layers.Dense(3, activation='softmax')) 110 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 111 | return model 112 | 113 | def build_model(): 114 | model = Sequential() 115 | model.add(layers.LSTM(32, return_sequences=True, 116 | input_shape=(100, 42))) # returns a sequence of vectors of dimension 32 117 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 118 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 119 | model.add(layers.Dense(4, activation='softmax')) 120 | model.compile(loss='categorical_crossentropy', 121 | optimizer='rmsprop', 122 | metrics=['accuracy']) 123 | return model 124 | 125 | def main(dirname): 126 | x_train,y_train,x_test,y_test=load_data(dirname) 127 | num_val_samples=(x_train.shape[0])//5 128 | #print(num_val_samples) 129 | #num_epochs=5 130 | #all_scores=[] 131 | model=build_model() 132 | ''' 133 | for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 134 | print('처리중인 폴드 #',i) 135 | val_data=x_train[i*num_val_samples:(i+1)*num_val_samples] 136 | val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples] 137 | partial_train_data=np.concatenate([x_train[:i*num_val_samples], 138 | x_train[(i+1)*num_val_samples:]], 139 | axis=0) 140 | partial_train_targets=np.concatenate([y_train[:i*num_val_samples], 141 | y_train[(i+1)*num_val_samples:]], 142 | axis=0) 143 | #labels=load_label(dirname) 144 | ''' 145 | print('Training stage') 146 | print('==============') 147 | history=model.fit(x_train,y_train,epochs=100,batch_size=32,validation_data=(x_test,y_test)) 148 | score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0) 149 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 150 | model.save('simpleRNN.h5') 151 | 152 | #score, acc = model.evaluate(x_test,y_test,batch_size=32) 153 | #print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 154 | 155 | if __name__=='__main__': 156 | main("/Users/jongwook/Desktop/test1/outputdata/") 157 | -------------------------------------------------------------------------------- /modified_mediapipe/demo_run_graph_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The MediaPipe Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // An example of sending OpenCV webcam frames into a MediaPipe graph. 16 | 17 | #include "mediapipe/framework/calculator_framework.h" 18 | #include "mediapipe/framework/formats/image_frame.h" 19 | #include "mediapipe/framework/formats/image_frame_opencv.h" 20 | #include "mediapipe/framework/port/commandlineflags.h" 21 | #include "mediapipe/framework/port/file_helpers.h" 22 | #include "mediapipe/framework/port/opencv_highgui_inc.h" 23 | #include "mediapipe/framework/port/opencv_imgproc_inc.h" 24 | #include "mediapipe/framework/port/opencv_video_inc.h" 25 | #include "mediapipe/framework/port/parse_text_proto.h" 26 | #include "mediapipe/framework/port/status.h" 27 | using namespace std; 28 | string input_video_new="";//입력 비디오 경로 29 | string output_video_new="";//결과 비디오 경로 30 | constexpr char kInputStream[] = "input_video"; 31 | constexpr char kOutputStream[] = "output_video"; 32 | constexpr char kWindowName[] = "MediaPipe"; 33 | 34 | DEFINE_string( 35 | calculator_graph_config_file, "", 36 | "Name of file containing text format CalculatorGraphConfig proto."); 37 | DEFINE_string(input_video_path, "", 38 | "Full path of video to load. " 39 | "If not provided, attempt to use a webcam."); 40 | DEFINE_string(output_video_path, "", 41 | "Full path of where to save result (.mp4 only). " 42 | "If not provided, show result in a window."); 43 | 44 | ::mediapipe::Status RunMPPGraph() { 45 | std::string calculator_graph_config_contents; 46 | MP_RETURN_IF_ERROR(mediapipe::file::GetContents( 47 | FLAGS_calculator_graph_config_file, &calculator_graph_config_contents)); 48 | LOG(INFO) << "Get calculator graph config contents: " 49 | << calculator_graph_config_contents; 50 | mediapipe::CalculatorGraphConfig config = 51 | mediapipe::ParseTextProtoOrDie( 52 | calculator_graph_config_contents); 53 | 54 | LOG(INFO) << "Initialize the calculator graph."; 55 | mediapipe::CalculatorGraph graph; 56 | MP_RETURN_IF_ERROR(graph.Initialize(config)); 57 | 58 | LOG(INFO) << "Initialize the camera or load the video."; 59 | cv::VideoCapture capture; 60 | const bool load_video = !FLAGS_input_video_path.empty(); 61 | if (load_video) { 62 | capture.open(FLAGS_input_video_path); 63 | } else { 64 | capture.open(0); 65 | } 66 | RET_CHECK(capture.isOpened()); 67 | 68 | cv::VideoWriter writer; 69 | const bool save_video = !FLAGS_output_video_path.empty(); 70 | if (save_video) { 71 | LOG(INFO) << "Prepare video writer."; 72 | cv::Mat test_frame; 73 | capture.read(test_frame); // Consume first frame. 74 | capture.set(cv::CAP_PROP_POS_AVI_RATIO, 0); // Rewind to beginning. 75 | writer.open(FLAGS_output_video_path, 76 | mediapipe::fourcc('a', 'v', 'c', '1'), // .mp4 77 | capture.get(cv::CAP_PROP_FPS), test_frame.size()); 78 | RET_CHECK(writer.isOpened()); 79 | } else { 80 | cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); 81 | } 82 | 83 | LOG(INFO) << "Start running the calculator graph."; 84 | ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller poller, 85 | graph.AddOutputStreamPoller(kOutputStream)); 86 | MP_RETURN_IF_ERROR(graph.StartRun({})); 87 | 88 | LOG(INFO) << "Start grabbing and processing frames."; 89 | size_t frame_timestamp = 0; 90 | bool grab_frames = true; 91 | while (grab_frames) { 92 | // Capture opencv camera or video frame. 93 | cv::Mat camera_frame_raw; 94 | capture >> camera_frame_raw; 95 | if (camera_frame_raw.empty()) break; // End of video. 96 | cv::Mat camera_frame; 97 | cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB); 98 | if (!load_video) { 99 | cv::flip(camera_frame, camera_frame, /*flipcode=HORIZONTAL*/ 1); 100 | } 101 | 102 | // Wrap Mat into an ImageFrame. 103 | auto input_frame = absl::make_unique( 104 | mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, 105 | mediapipe::ImageFrame::kDefaultAlignmentBoundary); 106 | cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get()); 107 | camera_frame.copyTo(input_frame_mat); 108 | 109 | // Send image packet into the graph. 110 | MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( 111 | kInputStream, mediapipe::Adopt(input_frame.release()) 112 | .At(mediapipe::Timestamp(frame_timestamp++)))); 113 | 114 | // Get the graph result packet, or stop if that fails. 115 | mediapipe::Packet packet; 116 | if (!poller.Next(&packet)) break; 117 | auto& output_frame = packet.Get(); 118 | 119 | // Convert back to opencv for display or saving. 120 | cv::Mat output_frame_mat = mediapipe::formats::MatView(&output_frame); 121 | cv::cvtColor(output_frame_mat, output_frame_mat, cv::COLOR_RGB2BGR); 122 | if (save_video) { 123 | writer.write(output_frame_mat); 124 | } else { 125 | cv::imshow(kWindowName, output_frame_mat); 126 | // Press any key to exit. 127 | const int pressed_key = cv::waitKey(5); 128 | if (pressed_key >= 0 && pressed_key != 255) grab_frames = false; 129 | } 130 | } 131 | 132 | LOG(INFO) << "Shutting down."; 133 | if (writer.isOpened()) writer.release(); 134 | MP_RETURN_IF_ERROR(graph.CloseInputStream(kInputStream)); 135 | return graph.WaitUntilDone(); 136 | } 137 | 138 | int main(int argc, char** argv) { 139 | google::InitGoogleLogging(argv[0]); 140 | input_video_new = argv[argc-2];//추가 141 | output_video_new = argv[argc-1];//추가ㅣ 142 | gflags::ParseCommandLineFlags(&argc, &argv, true); 143 | ::mediapipe::Status run_status = RunMPPGraph(); 144 | if (!run_status.ok()) { 145 | LOG(ERROR) << "Failed to run the graph: " << run_status.message(); 146 | } else { 147 | LOG(INFO) << "Success!"; 148 | } 149 | return 0; 150 | } 151 | -------------------------------------------------------------------------------- /model/20_train_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | from keras.preprocessing import sequence 3 | from keras.datasets import imdb 4 | from keras import layers, models 5 | from keras.models import Sequential 6 | from keras import layers 7 | import os 8 | import sys 9 | import pickle 10 | import numpy as np 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.utils import to_categorical 13 | import random 14 | from keras import optimizers 15 | from keras.layers import SimpleRNN, Dense 16 | from keras.layers import Bidirectional 17 | 18 | 19 | def load_data(dirname): 20 | listfile=os.listdir(dirname) 21 | X = [] 22 | Y = [] 23 | XT = [] 24 | YT = [] 25 | for file in listfile: 26 | if "_" in file: 27 | continue 28 | wordname=file 29 | textlist=os.listdir(dirname+wordname) 30 | a=len(textlist) 31 | #print(a) 32 | b=a//3 33 | #print(b) 34 | k=0 35 | for text in textlist: 36 | if "DS_" in text: 37 | continue 38 | textname=dirname+wordname+"/"+text 39 | numbers=[] 40 | #print(textname) 41 | with open(textname, mode = 'r') as t: 42 | numbers = [float(num) for num in t.read().split()] 43 | #print(len(numbers[0])) 44 | for i in range(len(numbers),12600): 45 | numbers.extend([0.000]) #300 frame 고정 46 | row=42*8#앞의 8프레임 제거 47 | landmark_frame=[] 48 | for i in range(0,100):#뒤의 142프레임제거==> 총 150프레임으로 고정 49 | landmark_frame.extend(numbers[row:row+42]) 50 | row += 42 51 | landmark_frame=np.array(landmark_frame) 52 | landmark_frame=list(landmark_frame.reshape(-1,42))#2차원으로 변환(260*42) 53 | if (k%3==2): 54 | XT.append(np.array(landmark_frame)) 55 | YT.append(wordname) 56 | else: 57 | X.append(np.array(landmark_frame)) 58 | Y.append(wordname) 59 | k+=1 60 | 61 | X=np.array(X) 62 | Y=np.array(Y) 63 | #print(YT) 64 | XT=np.array(XT) 65 | YT=np.array(YT) 66 | #print(X.shape) 67 | 68 | tmp = [[x,y] for x, y in zip(X, Y)] 69 | random.shuffle(tmp) 70 | random.shuffle(tmp) 71 | 72 | tmp1 = [[xt,yt] for xt, yt in zip(XT, YT)] 73 | random.shuffle(tmp1) 74 | 75 | X = [n[0] for n in tmp] 76 | Y = [n[1] for n in tmp] 77 | XT = [n[0] for n in tmp1] 78 | YT = [n[1] for n in tmp1] 79 | ''' 80 | t = Tokenizer() 81 | t.fit_on_texts(Y) 82 | encoded=t.texts_to_sequences(Y) 83 | one_hot=to_categorical(encoded) 84 | ''' 85 | text="Apple Bird Blue Cents Child Cow Drink Green Hello Like Metoo No Orange Pig Sorry Thankyou Where Who Yes You" 86 | t = Tokenizer() 87 | t.fit_on_texts([text]) 88 | encoded=t.texts_to_sequences([Y])[0] 89 | encoded2=t.texts_to_sequences([YT])[0] 90 | one_hot = to_categorical(encoded) 91 | one_hot2=to_categorical(encoded2) 92 | 93 | (x_train, y_train) = X, one_hot 94 | #print(x_train[0]) 95 | (x_test,y_test)=XT,one_hot2 96 | x_train=np.array(x_train) 97 | y_train=np.array(y_train) 98 | x_test=np.array(x_test) 99 | y_test=np.array(y_test) 100 | #return x_train[0:2*a],y_train[0:2*a],x_train[2*a:-1],y_train[2*a:-1] 101 | return x_train,y_train,x_test,y_test 102 | 103 | def simple_rnn(): 104 | model = Sequential() 105 | model.add(SimpleRNN(units=64, input_shape=(200, 42))) 106 | model.add(Dense(64, activation="softmax")) #softmax, linear 어떤걸 기준으로 하지 107 | model.add(Dense(128, activation="linear")) #softmax, linear 어떤걸 기준으로 하지 108 | model.add(Dense(21)) 109 | model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) 110 | return model 111 | 112 | 113 | def rnn_lstm(): 114 | model = Sequential() 115 | model.add(layers.LSTM(64,return_sequences=True,input_shape=(100,42))) # returns a sequence of vectors of dimension 32 116 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 117 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 118 | model.add(layers.Dense(4, activation='softmax')) 119 | model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) 120 | return model 121 | 122 | def bidirectional_lstm(): 123 | model = Sequential() 124 | model.add(Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(100, 42))) 125 | model.add(layers.Bidirectional(layers.LSTM(32))) 126 | model.add(layers.Dense(4, activation='softmax')) 127 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 128 | return model 129 | 130 | def build_model(): 131 | model = Sequential() 132 | model.add(layers.LSTM(64, return_sequences=True, 133 | input_shape=(100, 42))) # returns a sequence of vectors of dimension 32 134 | model.add(layers.LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32 135 | model.add(layers.LSTM(32)) # return a single vector of dimension 32 136 | model.add(layers.Dense(21, activation='softmax')) 137 | model.compile(loss='categorical_crossentropy', 138 | optimizer='rmsprop', 139 | metrics=['accuracy']) 140 | return model 141 | 142 | def main(dirname): 143 | x_train,y_train,x_test,y_test=load_data(dirname) 144 | num_val_samples=(x_train.shape[0])//5 145 | #print(num_val_samples) 146 | #num_epochs=5 147 | #all_scores=[] 148 | model=build_model() 149 | ''' 150 | for i in range(5):#5개의 분할로 시행 # k-겹 교차 검증 151 | print('처리중인 폴드 #',i) 152 | val_data=x_train[i*num_val_samples:(i+1)*num_val_samples] 153 | val_targets=y_train[i*num_val_samples:(i+1)*num_val_samples] 154 | partial_train_data=np.concatenate([x_train[:i*num_val_samples], 155 | x_train[(i+1)*num_val_samples:]], 156 | axis=0) 157 | partial_train_targets=np.concatenate([y_train[:i*num_val_samples], 158 | y_train[(i+1)*num_val_samples:]], 159 | axis=0) 160 | #labels=load_label(dirname) 161 | ''' 162 | print('Training stage') 163 | print('==============') 164 | history=model.fit(x_train,y_train,epochs=100,batch_size=32,validation_data=(x_test,y_test)) 165 | score, acc = model.evaluate(x_test,y_test,batch_size=32,verbose=0) 166 | print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 167 | model.save('simpleRNN.h5') 168 | 169 | #score, acc = model.evaluate(x_test,y_test,batch_size=32) 170 | #print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) 171 | 172 | if __name__=='__main__': 173 | main("/Users/jongwook/Desktop/traindata/") 174 | -------------------------------------------------------------------------------- /modified_mediapipe/tflite_tensors_to_landmarks_calculator.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The MediaPipe Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /*absolute position landmarks*/ 16 | #include "mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.pb.h" 17 | #include "mediapipe/framework/calculator_framework.h" 18 | #include "mediapipe/framework/formats/landmark.pb.h" 19 | #include "mediapipe/framework/port/ret_check.h" 20 | #include "tensorflow/lite/interpreter.h" 21 | #include "mediapipe/framework/port/status.h" 22 | #include "mediapipe/framework/tool/status_util.h" 23 | #include 24 | #include 25 | #include 26 | #include 27 | using namespace std; 28 | 29 | extern string input_video_new; 30 | extern string output_video_new; 31 | static bool cpos=false; 32 | static vector> landpos(21); 33 | namespace mediapipe { 34 | 35 | // A calculator for converting TFLite tensors from regression models into 36 | // landmarks. 37 | // 38 | // Input: 39 | // TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first 40 | // tensor will be used. The size of the values must be 41 | // (num_dimension x num_landmarks). 42 | // Output: 43 | // LANDMARKS(optional) - Result MediaPipe landmarks. 44 | // NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks. 45 | // 46 | // Notes: 47 | // To output normalized landmarks, user must provide the original input image 48 | // size to the model using calculator option input_image_width and 49 | // input_image_height. 50 | // Usage example: 51 | // node { 52 | // calculator: "TfLiteTensorsToLandmarksCalculator" 53 | // input_stream: "TENSORS:landmark_tensors" 54 | // output_stream: "LANDMARKS:landmarks" 55 | // output_stream: "NORM_LANDMARKS:landmarks" 56 | // options: { 57 | // [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { 58 | // num_landmarks: 21 59 | // 60 | // input_image_width: 256 61 | // input_image_height: 256 62 | // } 63 | // } 64 | // } 65 | class TfLiteTensorsToLandmarksCalculator : public CalculatorBase { 66 | public: 67 | static ::mediapipe::Status GetContract(CalculatorContract* cc); 68 | 69 | ::mediapipe::Status Open(CalculatorContext* cc) override; 70 | ::mediapipe::Status Process(CalculatorContext* cc) override; 71 | 72 | private: 73 | ::mediapipe::Status LoadOptions(CalculatorContext* cc); 74 | int num_landmarks_ = 0; 75 | 76 | ::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions options_; 77 | }; 78 | REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator); 79 | 80 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::GetContract( 81 | CalculatorContract* cc) { 82 | RET_CHECK(!cc->Inputs().GetTags().empty()); 83 | RET_CHECK(!cc->Outputs().GetTags().empty()); 84 | 85 | if (cc->Inputs().HasTag("TENSORS")) { 86 | cc->Inputs().Tag("TENSORS").Set>(); 87 | } 88 | 89 | if (cc->Outputs().HasTag("LANDMARKS")) { 90 | cc->Outputs().Tag("LANDMARKS").Set>(); 91 | } 92 | 93 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 94 | cc->Outputs().Tag("NORM_LANDMARKS").Set>(); 95 | } 96 | 97 | return ::mediapipe::OkStatus(); 98 | } 99 | 100 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Open( 101 | CalculatorContext* cc) { 102 | cc->SetOffset(TimestampDiff(0)); 103 | 104 | MP_RETURN_IF_ERROR(LoadOptions(cc)); 105 | 106 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 107 | RET_CHECK(options_.has_input_image_height() && 108 | options_.has_input_image_width()) 109 | << "Must provide input with/height for getting normalized landmarks."; 110 | } 111 | if (cc->Outputs().HasTag("LANDMARKS") && 112 | (options_.flip_vertically() || options_.flip_horizontally())) { 113 | RET_CHECK(options_.has_input_image_height() && 114 | options_.has_input_image_width()) 115 | << "Must provide input with/height for using flip_vertically option " 116 | "when outputing landmarks in absolute coordinates."; 117 | } 118 | return ::mediapipe::OkStatus(); 119 | } 120 | 121 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Process( 122 | CalculatorContext* cc) { 123 | static int idx=0; 124 | if (cc->Inputs().Tag("TENSORS").IsEmpty()) { 125 | return ::mediapipe::OkStatus(); 126 | } 127 | 128 | const auto& input_tensors = 129 | cc->Inputs().Tag("TENSORS").Get>(); 130 | 131 | const TfLiteTensor* raw_tensor = &input_tensors[0]; 132 | 133 | int num_values = 1; 134 | for (int i = 0; i < raw_tensor->dims->size; ++i) { 135 | num_values *= raw_tensor->dims->data[i]; 136 | } 137 | const int num_dimensions = num_values / num_landmarks_; 138 | // Landmarks must have less than 3 dimensions. Otherwise please consider 139 | // using matrix. 140 | CHECK_LE(num_dimensions, 3); 141 | CHECK_GT(num_dimensions, 0); 142 | 143 | const float* raw_landmarks = raw_tensor->data.f; 144 | 145 | auto output_landmarks = absl::make_unique>(); 146 | 147 | for (int ld = 0; ld < num_landmarks_; ++ld) { 148 | const int offset = ld * num_dimensions; 149 | Landmark landmark; 150 | 151 | if (options_.flip_horizontally()) { 152 | landmark.set_x(options_.input_image_width() - raw_landmarks[offset]); 153 | } else { 154 | landmark.set_x(raw_landmarks[offset]); 155 | } 156 | if (num_dimensions > 1) { 157 | if (options_.flip_vertically()) { 158 | landmark.set_y(options_.input_image_height() - 159 | raw_landmarks[offset + 1]); 160 | } else { 161 | landmark.set_y(raw_landmarks[offset + 1]); 162 | } 163 | } 164 | if (num_dimensions > 2) { 165 | landmark.set_z(raw_landmarks[offset + 2]); 166 | } 167 | output_landmarks->push_back(landmark); 168 | } 169 | // 170 | 171 | // Output normalized landmarks if required. 172 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 173 | auto output_norm_landmarks = 174 | absl::make_unique>(); 175 | //추가한 부분 176 | //비디오 이름만 빼오기 177 | string video_fname=""; 178 | bool isTrue=false; 179 | int slx=0; 180 | 181 | for(slx=input_video_new.size()-1;input_video_new[slx]!='/';slx--){ 182 | if(isTrue){ 183 | video_fname=input_video_new[slx]+video_fname; 184 | } 185 | if(input_video_new[slx]=='.') isTrue=true; 186 | } 187 | slx--; 188 | string dir_name="/";//비디오 디렉토리 빼기: 189 | for(;input_video_new[slx]!='/';slx--){ 190 | dir_name=input_video_new[slx]+dir_name; 191 | } 192 | string output_path_cp="";//아웃풋 파일의 경로의 일부를 저장하는 변수 193 | int j=0; 194 | for (;output_video_new[j]!='=';j++); 195 | j++; 196 | for(;output_video_new[j]!='_';j++){ 197 | output_path_cp.push_back(output_video_new[j]); 198 | } 199 | string str=output_path_cp+dir_name+video_fname+".txt"; 200 | ofstream out(str,std::ios_base::out | std::ios_base::app); 201 | int i=0; 202 | for (const auto& landmark : *output_landmarks) { 203 | NormalizedLandmark norm_landmark; 204 | norm_landmark.set_x(static_cast(landmark.x()) / 205 | options_.input_image_width()); 206 | norm_landmark.set_y(static_cast(landmark.y()) / 207 | options_.input_image_height()); 208 | norm_landmark.set_z(landmark.z() / options_.normalize_z()); 209 | //if(cpos){ 210 | out<(landmark.x()) / options_.input_image_width()<<" "; 211 | out<(landmark.y()) / options_.input_image_height()<<" "; 212 | //} 213 | //landpos[i]=make_pair(static_cast(landmark.x())/options_.input_image_width(), 214 | //tatic_cast(landmark.y())/options_.input_image_height()); 215 | i=i+1; 216 | output_norm_landmarks->push_back(norm_landmark); 217 | } 218 | cpos=true; 219 | idx++; 220 | out.close(); 221 | //추가한 부분 끝 222 | cc->Outputs() 223 | .Tag("NORM_LANDMARKS") 224 | .Add(output_norm_landmarks.release(), cc->InputTimestamp()); 225 | } 226 | // Output absolute landmarks. 227 | if (cc->Outputs().HasTag("LANDMARKS")) { 228 | cc->Outputs() 229 | .Tag("LANDMARKS") 230 | .Add(output_landmarks.release(), cc->InputTimestamp()); 231 | } 232 | 233 | return ::mediapipe::OkStatus(); 234 | } 235 | 236 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::LoadOptions( 237 | CalculatorContext* cc) { 238 | // Get calculator options specified in the graph. 239 | options_ = 240 | cc->Options<::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions>(); 241 | num_landmarks_ = options_.num_landmarks(); 242 | 243 | return ::mediapipe::OkStatus(); 244 | }} // namespace mediapipe 245 | -------------------------------------------------------------------------------- /modified_mediapipe/tflite_tensors_to_landmarks_calculator(relative_position_ver).cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The MediaPipe Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.pb.h" 16 | #include "mediapipe/framework/calculator_framework.h" 17 | #include "mediapipe/framework/formats/landmark.pb.h" 18 | #include "mediapipe/framework/port/ret_check.h" 19 | #include "tensorflow/lite/interpreter.h" 20 | #include "mediapipe/framework/port/status.h" 21 | #include "mediapipe/framework/tool/status_util.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | using namespace std; 27 | 28 | extern string input_video_new; 29 | extern string output_video_new; 30 | static bool cpos=false; 31 | static vector> landpos(21); 32 | namespace mediapipe { 33 | 34 | // A calculator for converting TFLite tensors from regression models into 35 | // landmarks. 36 | // 37 | // Input: 38 | // TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first 39 | // tensor will be used. The size of the values must be 40 | // (num_dimension x num_landmarks). 41 | // Output: 42 | // LANDMARKS(optional) - Result MediaPipe landmarks. 43 | // NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks. 44 | // 45 | // Notes: 46 | // To output normalized landmarks, user must provide the original input image 47 | // size to the model using calculator option input_image_width and 48 | // input_image_height. 49 | // Usage example: 50 | // node { 51 | // calculator: "TfLiteTensorsToLandmarksCalculator" 52 | // input_stream: "TENSORS:landmark_tensors" 53 | // output_stream: "LANDMARKS:landmarks" 54 | // output_stream: "NORM_LANDMARKS:landmarks" 55 | // options: { 56 | // [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { 57 | // num_landmarks: 21 58 | // 59 | // input_image_width: 256 60 | // input_image_height: 256 61 | // } 62 | // } 63 | // } 64 | class TfLiteTensorsToLandmarksCalculator : public CalculatorBase { 65 | public: 66 | static ::mediapipe::Status GetContract(CalculatorContract* cc); 67 | 68 | ::mediapipe::Status Open(CalculatorContext* cc) override; 69 | ::mediapipe::Status Process(CalculatorContext* cc) override; 70 | 71 | private: 72 | ::mediapipe::Status LoadOptions(CalculatorContext* cc); 73 | int num_landmarks_ = 0; 74 | 75 | ::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions options_; 76 | }; 77 | REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator); 78 | 79 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::GetContract( 80 | CalculatorContract* cc) { 81 | RET_CHECK(!cc->Inputs().GetTags().empty()); 82 | RET_CHECK(!cc->Outputs().GetTags().empty()); 83 | 84 | if (cc->Inputs().HasTag("TENSORS")) { 85 | cc->Inputs().Tag("TENSORS").Set>(); 86 | } 87 | 88 | if (cc->Outputs().HasTag("LANDMARKS")) { 89 | cc->Outputs().Tag("LANDMARKS").Set>(); 90 | } 91 | 92 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 93 | cc->Outputs().Tag("NORM_LANDMARKS").Set>(); 94 | } 95 | 96 | return ::mediapipe::OkStatus(); 97 | } 98 | 99 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Open( 100 | CalculatorContext* cc) { 101 | cc->SetOffset(TimestampDiff(0)); 102 | 103 | MP_RETURN_IF_ERROR(LoadOptions(cc)); 104 | 105 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 106 | RET_CHECK(options_.has_input_image_height() && 107 | options_.has_input_image_width()) 108 | << "Must provide input with/height for getting normalized landmarks."; 109 | } 110 | if (cc->Outputs().HasTag("LANDMARKS") && 111 | (options_.flip_vertically() || options_.flip_horizontally())) { 112 | RET_CHECK(options_.has_input_image_height() && 113 | options_.has_input_image_width()) 114 | << "Must provide input with/height for using flip_vertically option " 115 | "when outputing landmarks in absolute coordinates."; 116 | } 117 | return ::mediapipe::OkStatus(); 118 | } 119 | 120 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Process( 121 | CalculatorContext* cc) { 122 | static int idx=0; 123 | if (cc->Inputs().Tag("TENSORS").IsEmpty()) { 124 | return ::mediapipe::OkStatus(); 125 | } 126 | 127 | const auto& input_tensors = 128 | cc->Inputs().Tag("TENSORS").Get>(); 129 | 130 | const TfLiteTensor* raw_tensor = &input_tensors[0]; 131 | 132 | int num_values = 1; 133 | for (int i = 0; i < raw_tensor->dims->size; ++i) { 134 | num_values *= raw_tensor->dims->data[i]; 135 | } 136 | const int num_dimensions = num_values / num_landmarks_; 137 | // Landmarks must have less than 3 dimensions. Otherwise please consider 138 | // using matrix. 139 | CHECK_LE(num_dimensions, 3); 140 | CHECK_GT(num_dimensions, 0); 141 | 142 | const float* raw_landmarks = raw_tensor->data.f; 143 | 144 | auto output_landmarks = absl::make_unique>(); 145 | 146 | for (int ld = 0; ld < num_landmarks_; ++ld) { 147 | const int offset = ld * num_dimensions; 148 | Landmark landmark; 149 | 150 | if (options_.flip_horizontally()) { 151 | landmark.set_x(options_.input_image_width() - raw_landmarks[offset]); 152 | } else { 153 | landmark.set_x(raw_landmarks[offset]); 154 | } 155 | if (num_dimensions > 1) { 156 | if (options_.flip_vertically()) { 157 | landmark.set_y(options_.input_image_height() - 158 | raw_landmarks[offset + 1]); 159 | } else { 160 | landmark.set_y(raw_landmarks[offset + 1]); 161 | } 162 | } 163 | if (num_dimensions > 2) { 164 | landmark.set_z(raw_landmarks[offset + 2]); 165 | } 166 | output_landmarks->push_back(landmark); 167 | } 168 | // 169 | 170 | // Output normalized landmarks if required. 171 | if (cc->Outputs().HasTag("NORM_LANDMARKS")) { 172 | auto output_norm_landmarks = 173 | absl::make_unique>(); 174 | //추가한 부분 175 | //비디오 이름만 빼오기 176 | string video_fname=""; 177 | bool isTrue=false; 178 | int slx=0; 179 | 180 | for(slx=input_video_new.size()-1;input_video_new[slx]!='/';slx--){ 181 | if(isTrue){ 182 | video_fname=input_video_new[slx]+video_fname; 183 | } 184 | if(input_video_new[slx]=='.') isTrue=true; 185 | } 186 | slx--; 187 | string dir_name="/";//비디오 디렉토리 빼기: 188 | for(;input_video_new[slx]!='/';slx--){ 189 | dir_name=input_video_new[slx]+dir_name; 190 | } 191 | string output_path_cp="";//아웃풋 파일의 경로의 일부를 저장하는 변수 192 | int j=0; 193 | for (;output_video_new[j]!='=';j++); 194 | j++; 195 | for(;output_video_new[j]!='_';j++){ 196 | output_path_cp.push_back(output_video_new[j]); 197 | } 198 | string str=output_path_cp+dir_name+video_fname+".txt"; 199 | ofstream out(str,std::ios_base::out | std::ios_base::app); 200 | int i=0; 201 | for (const auto& landmark : *output_landmarks) { 202 | NormalizedLandmark norm_landmark; 203 | norm_landmark.set_x(static_cast(landmark.x()) / 204 | options_.input_image_width()); 205 | norm_landmark.set_y(static_cast(landmark.y()) / 206 | options_.input_image_height()); 207 | norm_landmark.set_z(landmark.z() / options_.normalize_z()); 208 | if(cpos){ 209 | out<(landmark.x()) / options_.input_image_width()-landpos[i].first<<" "; 210 | out<(landmark.y()) / options_.input_image_height()-landpos[i].second<<" "; 211 | } 212 | landpos[i]=make_pair(static_cast(landmark.x())/options_.input_image_width(), 213 | static_cast(landmark.y())/options_.input_image_height()); 214 | i=i+1; 215 | output_norm_landmarks->push_back(norm_landmark); 216 | } 217 | cpos=true; 218 | idx++; 219 | out.close(); 220 | //추가한 부분 끝 221 | cc->Outputs() 222 | .Tag("NORM_LANDMARKS") 223 | .Add(output_norm_landmarks.release(), cc->InputTimestamp()); 224 | } 225 | // Output absolute landmarks. 226 | if (cc->Outputs().HasTag("LANDMARKS")) { 227 | cc->Outputs() 228 | .Tag("LANDMARKS") 229 | .Add(output_landmarks.release(), cc->InputTimestamp()); 230 | } 231 | 232 | return ::mediapipe::OkStatus(); 233 | } 234 | 235 | ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::LoadOptions( 236 | CalculatorContext* cc) { 237 | // Get calculator options specified in the graph. 238 | options_ = 239 | cc->Options<::mediapipe::TfLiteTensorsToLandmarksCalculatorOptions>(); 240 | num_landmarks_ = options_.num_landmarks(); 241 | 242 | return ::mediapipe::OkStatus(); 243 | }} // namespace mediapipe 244 | --------------------------------------------------------------------------------