├── LICENSE
├── README.md
├── split_audio.py
└── audio_reco.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Charles Grassin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Breaking passwords with a microphone
 2 | 
 3 | This repository contains a Python proof-of-concept for breaking passwords with a microphone, using machine learning.
 4 | 
 5 | *Because keyboards are mechanical devices, each key may create a slightly different sound due to various manufacturing considerations. The fact that keys make a somewhat unique sound is a vulnerability. Although it is not easily picked up by our ear, it can be exploited by an algorithm...*
 6 | 
 7 | Please have a look at my original article here: http://charleslabs.fr/en/project-Breaking+Passwords+with+a+Microphone
 8 | 
 9 | ## Requirements
10 | 
11 | * Python3
12 | * Keras and Tensorflow (`pip3 install keras tensorflow`)
13 | * argparse (`pip3 install argparse`)
14 | 
15 | ## Use instructions
16 | 
17 | **Disclaimer:** this is research code, build as a proof-of-concept. It is not meant to be a practical application.
18 | 
19 | This repository includes two executable Python files:
20 | * **split_audio.py**, a script that breaks up an audio recording file in WAV format into individual files for each key presses. It is used to generate the train data.
21 | * **audio_reco.py**, a script that actually performs the key recognition. Several methods are included.
22 | 
23 | To generate the train data, call the "split_audio.py" script:
24 | ```bash
25 | ./split_audio.py --input ./path/to/file_with_KEY_presses.wav --out-dir ./path/to/train --label KEY
26 | ```
27 | 
28 | To launch the learning process, save the model and make a prediction:
29 | ```bash
30 | ./audio_reco.py --train-path ./path/to/train --test-path ./path/to/test.wav --model ../path/to/save/trained_model.h5
31 | ```
32 | 
33 | You may want to use the `--help` option on both scripts.


--------------------------------------------------------------------------------
/split_audio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Dec 13 21:30:59 2019
 5 | 
 6 | @author: charles
 7 | """
 8 | 
 9 | import argparse
10 | import numpy as np
11 | from scipy.io.wavfile import read, write
12 | import os
13 | 
14 | def normalize(x: np.ndarray):
15 |     return x/np.max(x);
16 | 
17 | def split_file(file, time_before=0.025, time_after=0.2, trigger=1000, normalize_result=False):
18 |     outputs = []
19 | 
20 |     # Open file
21 |     sample_rate, a = read(file)
22 |     a = np.array(a,dtype=float)
23 |     
24 |     # Compute params
25 |     length_after = (int)(time_after*sample_rate)
26 |     length_before = (int)(time_before*sample_rate)
27 |     
28 |     # Display sound (debug)
29 |     #plt.plot(a)
30 |     #plt.show()
31 |     
32 |     i = 0
33 |     while i < a.size :
34 |         # End of usable recording
35 |         if(i+length_after > a.size):
36 |             break;
37 |         if (a[i] > trigger and i >= length_before):
38 |             sub = a[i-length_before:i+length_after]
39 |             if(normalize_result): sub = normalize(sub)
40 |             outputs.append(sub)
41 |             i += length_after
42 |         i += 1
43 |     
44 |     return outputs, sample_rate;
45 | 
46 | def main():
47 |     parser = argparse.ArgumentParser(description='Split key presses recording.')
48 |     parser.add_argument('--input', type=str, help='Input WAV file')
49 |     parser.add_argument('--out-dir', type=str, help='Output directory')
50 |     parser.add_argument('--label', type=str, help='Output files prefix')
51 |     parser.add_argument('--split-label-char', type=str, default='', help='Char to split the label string')
52 |     parser.add_argument('--trigger', type=float, default=1000, help='Trigger threshold')
53 |     parser.add_argument('--time_before', type=float, default=0.025, help='Samples to keep before triggers (s)')
54 |     parser.add_argument('--time_after', type=float, default=0.2, help='Samples to keep after triggers (s)')
55 |     args = parser.parse_args()
56 |     
57 |     outputs,sample_rate = split_file(args.input,args.time_before, args.time_after, args.trigger)
58 |     
59 |     if(args.split_label_char == ''):
60 |         labels = [args.label]
61 |     else:
62 |         labels = str.split(args.label, args.split_label_char) 
63 |     
64 |     if(len(outputs)%len(labels)):
65 |         print("ERROR!")
66 |         return
67 |         
68 |     n = 0; i = 0
69 |     for output in outputs:
70 |         while os.path.isfile(args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav"):
71 |             n+=1
72 |         write(args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav", sample_rate, np.asarray(output, dtype=np.int16))
73 |         print('Created ' + args.out_dir + "/" + labels[i%len(labels)] + "_" + str(n) + ".wav!")
74 |         i += 1
75 |         
76 | if __name__== "__main__":
77 |   main()
78 | 


--------------------------------------------------------------------------------
/audio_reco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Process some recordings of key presses to recover characters.
  5 | Created on Fri Dec 13 22:44:38 2019
  6 | 
  7 | @author: charles
  8 | """
  9 | 
 10 | import argparse
 11 | import numpy as np
 12 | from scipy.io.wavfile import read
 13 | from scipy.signal import fftconvolve
 14 | import os
 15 | from keras.models import load_model,Sequential
 16 | from keras.layers import Dense,Conv1D,Flatten,Reshape
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | import split_audio
 20 | 
 21 | def get_output(predictions,class_names):
 22 |     for i in range(len(class_names)):
 23 |         print(class_names[i],end="\t")
 24 |     print("")
 25 |     
 26 |     for i in range(len(predictions)):
 27 |         for j in range(len(predictions[i])):
 28 |             print("%02.0f"%(predictions[i,j]*100)+"%",end="\t")
 29 |         print("-> " + class_names[np.argmax(predictions[i])],end="\n")
 30 | 
 31 | def get_output_plt(predictions,class_names):
 32 |     for i in range(len(predictions)):
 33 |         plt.subplot(1,len(predictions),i+1)
 34 |         plt.yticks([])
 35 |         #plt.xticks(rotation=90)
 36 |         plt.grid(False)
 37 |         plt.bar(class_names, predictions[i])
 38 | 
 39 | def main():
 40 |     parser = argparse.ArgumentParser(description='Process some recordings of key presses to recover characters.')
 41 |     parser.add_argument('--train-path', type=str, help='Training directory containing files KEY_number.waw')
 42 |     parser.add_argument('--test-path', type=str, help='File to predict.')
 43 |     parser.add_argument('--method', type=str, default='ml_mlp', help='Method to use: ml_mlp/ml_cnn/cross_correlation/fft')
 44 |     parser.add_argument('--model', type=str, default='', help='Path to save/load H5 model')
 45 |     parser.add_argument('--trigger', type=float, default=1000, help='Trigger threshold')
 46 |     parser.add_argument('--lbound-samples', type=float, default=1050, help='Lower sample bound')
 47 |     parser.add_argument('--ubound-samples', type=float, default=2000, help='Upper sample bound')
 48 |     args = parser.parse_args()
 49 | 
 50 |     # Args
 51 |     train_path = args.train_path
 52 |     test_path = args.test_path
 53 |     method = args.method
 54 |     model_path = args.model
 55 | 
 56 |     # Open WAV train set
 57 |     train_inputs = []
 58 |     train_labels = []
 59 |     class_names = []
 60 | 
 61 |     for filename in os.listdir(train_path):
 62 |         if filename.endswith(".wav"):
 63 |             train_inputs.append(split_audio.normalize(read(train_path + "/" + filename)[1]))
 64 |             label = str.split(filename,"_")[0]
 65 |             if (label not in class_names):
 66 |                 class_names.append(label)
 67 |             train_labels.append(class_names.index(label))
 68 | 
 69 |     test_inputs = split_audio.split_file(file=test_path,normalize_result=True, trigger=args.trigger)[0]
 70 | 
 71 |     train_inputs = np.array(train_inputs)
 72 |     train_labels = np.array(train_labels)
 73 |     test_inputs = np.array(test_inputs)
 74 | 
 75 |     train_inputs = train_inputs[:,args.lbound_samples:args.ubound_samples]
 76 |     test_inputs = test_inputs[:,args.lbound_samples:args.ubound_samples]
 77 |         
 78 |     # DEBUG : show sample of input
 79 |     #for i in range(25):
 80 |     #    plt.subplot(5,5,i+1)
 81 |     #    plt.xticks([])
 82 |     #    plt.yticks([])
 83 |     #    plt.grid(False)
 84 |     #    plt.xlabel(class_names[train_labels[i]])
 85 |     #    plt.plot(train_inputs[i])
 86 |     #plt.show()
 87 |     #
 88 |     #for i in range(3):
 89 |     #    plt.subplot(5,5,i+1)
 90 |     #    plt.xticks([])
 91 |     #    plt.yticks([])
 92 |     #    plt.grid(False)
 93 |     #    plt.plot(test_inputs[i])
 94 |     #plt.show()
 95 | 
 96 |     if(method == 'ml_mlp' or method == 'ml_cnn'):
 97 |         if(not os.path.isfile(model_path)):
 98 |             model = Sequential()
 99 |             
100 |             if(method == 'ml_mlp'):
101 |                 model.add(Dense(len(class_names)*500, input_dim=train_inputs[0].size, activation='relu'))
102 |                 model.add(Dense(len(class_names)*500, activation='relu'))
103 |                 model.add(Dense(len(class_names)*10, activation='relu'))
104 |                 model.add(Dense(len(class_names)*5, activation='relu'))
105 |                 model.add(Dense(len(class_names)*5, activation='relu'))
106 |                 model.add(Dense(len(class_names)*5, activation='relu'))
107 |                 model.add(Dense(len(class_names), activation='softmax'))
108 | 
109 |                 # compile and train the keras model
110 |                 model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam')
111 |                 model.fit(train_inputs, train_labels, epochs=25, validation_split=0.1)
112 |                 
113 |             if(method == 'ml_cnn'):
114 |                 # FIXME : network not properly tuned
115 |                 model.add(Reshape((train_inputs.shape[1], 1), input_shape=(train_inputs.shape[1], )))
116 |                 model.add(Conv1D(10,10,activation='relu'))
117 |                 model.add(Flatten())
118 |                 model.add(Dense(len(class_names)*16, activation='relu'))
119 |                 model.add(Dense(len(class_names)*8, activation='relu'))
120 |                 model.add(Dense(len(class_names)*4, activation='relu'))
121 |                 model.add(Dense(len(class_names), activation='relu'))
122 |                 model.add(Dense(len(class_names), activation='softmax'))
123 | 
124 |                 # compile and train the keras model
125 |                 model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
126 |                 model.fit(train_inputs, train_labels, epochs=100, validation_split=0.1)
127 |                 
128 |             if(model_path != ""): model.save(model_path)
129 |             
130 |         else:
131 |             model = load_model(model_path)
132 | 
133 |         predictions = model.predict(test_inputs)
134 |         get_output(predictions,class_names)
135 |         
136 |     elif(method == 'cross_correlation'):
137 |         trains = []
138 |         for i in range(len(train_inputs)):
139 |             trains.append(np.fft.rfft(train_inputs[i]))
140 |         tests = []
141 |         for j in range(len(test_inputs)):
142 |             tests.append(np.fft.rfft(test_inputs[j]))
143 |             
144 |         for i in range(len(train_inputs)):
145 |             print (class_names[train_labels[i]], end="\t")
146 |             for j in range(len(test_inputs)):
147 |                 dist = np.average(np.abs(np.multiply(trains[i],tests[j])))
148 |                 print(np.average(np.abs(dist)),end="\t")
149 |             print("")
150 |             
151 |     elif(method == 'fft'):
152 |         trains = []
153 |         for i in range(len(train_inputs)):
154 |             trains.append(np.log10(np.abs(np.fft.rfft(train_inputs[i]))))
155 |         tests = []
156 |         for j in range(len(test_inputs)):
157 |             tests.append(np.log10(np.abs(np.fft.rfft(test_inputs[j]))))
158 |             
159 |         for i in range(len(train_inputs)):
160 |             print (class_names[train_labels[i]], end="\t")
161 |             for j in range(len(test_inputs)):
162 |                 print(np.average(np.abs(tests[j]-trains[i])),end="\t")
163 |             print("")          
164 |             
165 |     else:
166 |         print ("Invalid method")
167 |     
168 | if __name__== "__main__":
169 |   main()


--------------------------------------------------------------------------------