├── LICENSE ├── README.md ├── split_audio.py └── audio_reco.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Charles Grassin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Breaking passwords with a microphone 2 | 3 | This repository contains a Python proof-of-concept for breaking passwords with a microphone, using machine learning. 4 | 5 | *Because keyboards are mechanical devices, each key may create a slightly different sound due to various manufacturing considerations. The fact that keys make a somewhat unique sound is a vulnerability. Although it is not easily picked up by our ear, it can be exploited by an algorithm...* 6 | 7 | Please have a look at my original article here: http://charleslabs.fr/en/project-Breaking+Passwords+with+a+Microphone 8 | 9 | ## Requirements 10 | 11 | * Python3 12 | * Keras and Tensorflow (`pip3 install keras tensorflow`) 13 | * argparse (`pip3 install argparse`) 14 | 15 | ## Use instructions 16 | 17 | **Disclaimer:** this is research code, build as a proof-of-concept. It is not meant to be a practical application. 18 | 19 | This repository includes two executable Python files: 20 | * **split_audio.py**, a script that breaks up an audio recording file in WAV format into individual files for each key presses. It is used to generate the train data. 21 | * **audio_reco.py**, a script that actually performs the key recognition. Several methods are included. 22 | 23 | To generate the train data, call the "split_audio.py" script: 24 | ```bash 25 | ./split_audio.py --input ./path/to/file_with_KEY_presses.wav --out-dir ./path/to/train --label KEY 26 | ``` 27 | 28 | To launch the learning process, save the model and make a prediction: 29 | ```bash 30 | ./audio_reco.py --train-path ./path/to/train --test-path ./path/to/test.wav --model ../path/to/save/trained_model.h5 31 | ``` 32 | 33 | You may want to use the `--help` option on both scripts. -------------------------------------------------------------------------------- /split_audio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Dec 13 21:30:59 2019 5 | 6 | @author: charles 7 | """ 8 | 9 | import argparse 10 | import numpy as np 11 | from scipy.io.wavfile import read, write 12 | import os 13 | 14 | def normalize(x: np.ndarray): 15 | return x/np.max(x); 16 | 17 | def split_file(file, time_before=0.025, time_after=0.2, trigger=1000, normalize_result=False): 18 | outputs = [] 19 | 20 | # Open file 21 | sample_rate, a = read(file) 22 | a = np.array(a,dtype=float) 23 | 24 | # Compute params 25 | length_after = (int)(time_after*sample_rate) 26 | length_before = (int)(time_before*sample_rate) 27 | 28 | # Display sound (debug) 29 | #plt.plot(a) 30 | #plt.show() 31 | 32 | i = 0 33 | while i < a.size : 34 | # End of usable recording 35 | if(i+length_after > a.size): 36 | break; 37 | if (a[i] > trigger and i >= length_before): 38 | sub = a[i-length_before:i+length_after] 39 | if(normalize_result): sub = normalize(sub) 40 | outputs.append(sub) 41 | i += length_after 42 | i += 1 43 | 44 | return outputs, sample_rate; 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser(description='Split key presses recording.') 48 | parser.add_argument('--input', type=str, help='Input WAV file') 49 | parser.add_argument('--out-dir', type=str, help='Output directory') 50 | parser.add_argument('--label', type=str, help='Output files prefix') 51 | parser.add_argument('--split-label-char', type=str, default='', help='Char to split the label string') 52 | parser.add_argument('--trigger', type=float, default=1000, help='Trigger threshold') 53 | parser.add_argument('--time_before', type=float, default=0.025, help='Samples to keep before triggers (s)') 54 | parser.add_argument('--time_after', type=float, default=0.2, help='Samples to keep after triggers (s)') 55 | args = parser.parse_args() 56 | 57 | outputs,sample_rate = split_file(args.input,args.time_before, args.time_after, args.trigger) 58 | 59 | if(args.split_label_char == ''): 60 | labels = [args.label] 61 | else: 62 | labels = str.split(args.label, args.split_label_char) 63 | 64 | if(len(outputs)%len(labels)): 65 | print("ERROR!") 66 | return 67 | 68 | n = 0; i = 0 69 | for output in outputs: 70 | while os.path.isfile(args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav"): 71 | n+=1 72 | write(args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav", sample_rate, np.asarray(output, dtype=np.int16)) 73 | print('Created ' + args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav!") 74 | i += 1 75 | 76 | if __name__== "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /audio_reco.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Process some recordings of key presses to recover characters. 5 | Created on Fri Dec 13 22:44:38 2019 6 | 7 | @author: charles 8 | """ 9 | 10 | import argparse 11 | import numpy as np 12 | from scipy.io.wavfile import read 13 | from scipy.signal import fftconvolve 14 | import os 15 | from keras.models import load_model,Sequential 16 | from keras.layers import Dense,Conv1D,Flatten,Reshape 17 | import matplotlib.pyplot as plt 18 | 19 | import split_audio 20 | 21 | def get_output(predictions,class_names): 22 | for i in range(len(class_names)): 23 | print(class_names[i],end="\t") 24 | print("") 25 | 26 | for i in range(len(predictions)): 27 | for j in range(len(predictions[i])): 28 | print("%02.0f"%(predictions[i,j]*100)+"%",end="\t") 29 | print("-> " + class_names[np.argmax(predictions[i])],end="\n") 30 | 31 | def get_output_plt(predictions,class_names): 32 | for i in range(len(predictions)): 33 | plt.subplot(1,len(predictions),i+1) 34 | plt.yticks([]) 35 | #plt.xticks(rotation=90) 36 | plt.grid(False) 37 | plt.bar(class_names, predictions[i]) 38 | 39 | def main(): 40 | parser = argparse.ArgumentParser(description='Process some recordings of key presses to recover characters.') 41 | parser.add_argument('--train-path', type=str, help='Training directory containing files KEY_number.waw') 42 | parser.add_argument('--test-path', type=str, help='File to predict.') 43 | parser.add_argument('--method', type=str, default='ml_mlp', help='Method to use: ml_mlp/ml_cnn/cross_correlation/fft') 44 | parser.add_argument('--model', type=str, default='', help='Path to save/load H5 model') 45 | parser.add_argument('--trigger', type=float, default=1000, help='Trigger threshold') 46 | parser.add_argument('--lbound-samples', type=float, default=1050, help='Lower sample bound') 47 | parser.add_argument('--ubound-samples', type=float, default=2000, help='Upper sample bound') 48 | args = parser.parse_args() 49 | 50 | # Args 51 | train_path = args.train_path 52 | test_path = args.test_path 53 | method = args.method 54 | model_path = args.model 55 | 56 | # Open WAV train set 57 | train_inputs = [] 58 | train_labels = [] 59 | class_names = [] 60 | 61 | for filename in os.listdir(train_path): 62 | if filename.endswith(".wav"): 63 | train_inputs.append(split_audio.normalize(read(train_path + "/" + filename)[1])) 64 | label = str.split(filename,"_")[0] 65 | if (label not in class_names): 66 | class_names.append(label) 67 | train_labels.append(class_names.index(label)) 68 | 69 | test_inputs = split_audio.split_file(file=test_path,normalize_result=True, trigger=args.trigger)[0] 70 | 71 | train_inputs = np.array(train_inputs) 72 | train_labels = np.array(train_labels) 73 | test_inputs = np.array(test_inputs) 74 | 75 | train_inputs = train_inputs[:,args.lbound_samples:args.ubound_samples] 76 | test_inputs = test_inputs[:,args.lbound_samples:args.ubound_samples] 77 | 78 | # DEBUG : show sample of input 79 | #for i in range(25): 80 | # plt.subplot(5,5,i+1) 81 | # plt.xticks([]) 82 | # plt.yticks([]) 83 | # plt.grid(False) 84 | # plt.xlabel(class_names[train_labels[i]]) 85 | # plt.plot(train_inputs[i]) 86 | #plt.show() 87 | # 88 | #for i in range(3): 89 | # plt.subplot(5,5,i+1) 90 | # plt.xticks([]) 91 | # plt.yticks([]) 92 | # plt.grid(False) 93 | # plt.plot(test_inputs[i]) 94 | #plt.show() 95 | 96 | if(method == 'ml_mlp' or method == 'ml_cnn'): 97 | if(not os.path.isfile(model_path)): 98 | model = Sequential() 99 | 100 | if(method == 'ml_mlp'): 101 | model.add(Dense(len(class_names)*500, input_dim=train_inputs[0].size, activation='relu')) 102 | model.add(Dense(len(class_names)*500, activation='relu')) 103 | model.add(Dense(len(class_names)*10, activation='relu')) 104 | model.add(Dense(len(class_names)*5, activation='relu')) 105 | model.add(Dense(len(class_names)*5, activation='relu')) 106 | model.add(Dense(len(class_names)*5, activation='relu')) 107 | model.add(Dense(len(class_names), activation='softmax')) 108 | 109 | # compile and train the keras model 110 | model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam') 111 | model.fit(train_inputs, train_labels, epochs=25, validation_split=0.1) 112 | 113 | if(method == 'ml_cnn'): 114 | # FIXME : network not properly tuned 115 | model.add(Reshape((train_inputs.shape[1], 1), input_shape=(train_inputs.shape[1], ))) 116 | model.add(Conv1D(10,10,activation='relu')) 117 | model.add(Flatten()) 118 | model.add(Dense(len(class_names)*16, activation='relu')) 119 | model.add(Dense(len(class_names)*8, activation='relu')) 120 | model.add(Dense(len(class_names)*4, activation='relu')) 121 | model.add(Dense(len(class_names), activation='relu')) 122 | model.add(Dense(len(class_names), activation='softmax')) 123 | 124 | # compile and train the keras model 125 | model.compile(loss='sparse_categorical_crossentropy', optimizer='adam') 126 | model.fit(train_inputs, train_labels, epochs=100, validation_split=0.1) 127 | 128 | if(model_path != ""): model.save(model_path) 129 | 130 | else: 131 | model = load_model(model_path) 132 | 133 | predictions = model.predict(test_inputs) 134 | get_output(predictions,class_names) 135 | 136 | elif(method == 'cross_correlation'): 137 | trains = [] 138 | for i in range(len(train_inputs)): 139 | trains.append(np.fft.rfft(train_inputs[i])) 140 | tests = [] 141 | for j in range(len(test_inputs)): 142 | tests.append(np.fft.rfft(test_inputs[j])) 143 | 144 | for i in range(len(train_inputs)): 145 | print (class_names[train_labels[i]], end="\t") 146 | for j in range(len(test_inputs)): 147 | dist = np.average(np.abs(np.multiply(trains[i],tests[j]))) 148 | print(np.average(np.abs(dist)),end="\t") 149 | print("") 150 | 151 | elif(method == 'fft'): 152 | trains = [] 153 | for i in range(len(train_inputs)): 154 | trains.append(np.log10(np.abs(np.fft.rfft(train_inputs[i])))) 155 | tests = [] 156 | for j in range(len(test_inputs)): 157 | tests.append(np.log10(np.abs(np.fft.rfft(test_inputs[j])))) 158 | 159 | for i in range(len(train_inputs)): 160 | print (class_names[train_labels[i]], end="\t") 161 | for j in range(len(test_inputs)): 162 | print(np.average(np.abs(tests[j]-trains[i])),end="\t") 163 | print("") 164 | 165 | else: 166 | print ("Invalid method") 167 | 168 | if __name__== "__main__": 169 | main() --------------------------------------------------------------------------------