├── MKAD Corporate CLA_2021.pdf
├── MKAD Individual CLA_2021.pdf
├── MKAD NOSA 2019.pdf
├── PythonCode
    ├── SAX.py
    ├── config_template.json
    ├── preprocess_files_multiprocess.py
    ├── run_mkad.py
    └── visualization.py
├── README.md
├── documentation
    └── README.docx
└── kernels
    ├── README.rst
    ├── __init__.py
    ├── setup.py
    └── src
        └── nlcs
            ├── lcs.cpp
            ├── lcs.h
            └── nlcs_wrapper.cpp


/MKAD Corporate CLA_2021.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/PyMKAD/02d29db9e13ceffc7fdabb188948618da40306a1/MKAD Corporate CLA_2021.pdf


--------------------------------------------------------------------------------
/MKAD Individual CLA_2021.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/PyMKAD/02d29db9e13ceffc7fdabb188948618da40306a1/MKAD Individual CLA_2021.pdf


--------------------------------------------------------------------------------
/MKAD NOSA 2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/PyMKAD/02d29db9e13ceffc7fdabb188948618da40306a1/MKAD NOSA 2019.pdf


--------------------------------------------------------------------------------
/PythonCode/SAX.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @author: Bryan Matthews KBRWyle
  3 |          Data Science Group
  4 |          NASA Ames Research Center
  5 | 
  6 | This code provides helper functions used by both preprocess_files_multiprocess.py and run_mkad.py. 
  7 | 
  8 | Code Updated: 2019-03-08
  9 | '''
 10 | 
 11 | 
 12 | import numpy as np
 13 | from scipy import signal
 14 | import csv
 15 | import json
 16 | import pickle
 17 | from glob import glob
 18 | import time
 19 | import gzip
 20 | import os,sys
 21 | import io
 22 | import pandas as pd
 23 | import nlcs
 24 | 
 25 | global cut_points
 26 | cut_points={ '2': [-np.inf,0],
 27 |              '3': [-np.inf,-0.43,0.43],
 28 |              '4': [-np.inf,-0.67,0,0.67],
 29 |              '5': [-np.inf,-0.84,-0.25,0.25,0.84],
 30 |              '6': [-np.inf,-0.97,-0.43,0,0.43,0.97],
 31 |              '7': [-np.inf,-1.07,-0.57,-0.18,0.18,0.57,1.07],
 32 |              '8': [-np.inf,-1.15,-0.67,-0.32,0,0.32,0.67,1.15],
 33 |              '9': [-np.inf,-1.22,-0.76,-0.43,-0.14,0.14,0.43,0.76,1.22],
 34 |              '10': [-np.inf,-1.28,-0.84,-0.52,-0.25,0,0.25,0.52,0.84,1.28],
 35 |              '11': [-np.inf,-1.34,-0.91,-0.6,-0.35,-0.11,0.11,0.35,0.6,0.91,1.34],
 36 |              '12': [-np.inf,-1.38,-0.97,-0.67,-0.43,-0.21,0,0.21,0.43,0.67,0.97,1.38],
 37 |              '13': [-np.inf,-1.43,-1.02,-0.74,-0.5,-0.29,-0.1,0.1,0.29,0.5,0.74,1.02,1.43],
 38 |              '14': [-np.inf,-1.47,-1.07,-0.79,-0.57,-0.37,-0.18,0,0.18,0.37,0.57,0.79,1.07,1.47],
 39 |              '15': [-np.inf,-1.5,-1.11,-0.84,-0.62,-0.43,-0.25,-0.08,0.08,0.25,0.43,0.62,0.84,1.11,1.5],
 40 |              '16': [-np.inf,-1.53,-1.15,-0.89,-0.67,-0.49,-0.32,-0.16,0,0.16,0.32,0.49,0.67,0.89,1.15,1.53],
 41 |              '17': [-np.inf,-1.56,-1.19,-0.93,-0.72,-0.54,-0.38,-0.22,-0.07,0.07,0.22,0.38,0.54,0.72,0.93,1.19,1.56],
 42 |              '18': [-np.inf,-1.59,-1.22,-0.97,-0.76,-0.59,-0.43,-0.28,-0.14,0,0.14,0.28,0.43,0.59,0.76,0.97,1.22,1.59],
 43 |              '19': [-np.inf,-1.62,-1.25,-1 -0.8,-0.63,-0.48,-0.34,-0.2,-0.07,0.07,0.2,0.34,0.48,0.63,0.8,1,1.25,1.62],
 44 |              '20': [-np.inf,-1.64,-1.28,-1.04,-0.84,-0.67,-0.52,-0.39,-0.25,-0.13,0,0.13,0.25,0.39,0.52,0.67,0.84,1.04,1.28,1.64]}
 45 | 
 46 | def read_pandas(filename):
 47 |     gz = gzip.open(filename, 'rb')
 48 |     f = io.BufferedReader(gz)
 49 |     data= pd.read_csv(f,low_memory=False).replace('False','0').replace('True','1').replace('DNE','nan')
 50 |     f.close()
 51 |     gz.close()
 52 |     header = np.array(data.keys())
 53 |     return(header,data.values.astype(float))
 54 | 
 55 | 
 56 | def quantize_lookup_table(x,alphabet_size):
 57 |     global cut_points
 58 |     return(alphabet_size-list(np.flipud(np.array(cut_points[str(alphabet_size)],dtype=float)<=x)).index(True))
 59 | 
 60 | 
 61 | def quantize_time_series(Data,params,alphabet,window_size):
 62 |     
 63 |     quantized_data = np.zeros((int(np.ceil(Data['data'].shape[0]/float(window_size))),len(params['continuous_indx'])),dtype=int)
 64 |     for i in range(int(np.ceil(Data['data'].shape[0]/float(window_size)))):
 65 |         jj=0
 66 |         for j in params['continuous_indx']:
 67 |             max_range=min([(i+1)*window_size-1,Data['data'].shape[0]])
 68 |             val = np.mean(Data['data'][i*window_size:max_range,j])
 69 |             quantized_data[i,jj]=quantize_lookup_table(val,alphabet)
 70 |             jj+=1
 71 |     return(quantized_data)
 72 | 
 73 | def convert_disc_2_seq(Data,params):
 74 |     if(len(params['discrete_indx'])>0):
 75 |         changes=np.diff(Data['data'][:,params['discrete_indx']],axis=0)
 76 |         for i in range(changes.shape[1]):
 77 |             changes[changes[:,i]==1,i]=(i+1)*2-1
 78 |             changes[changes[:,i]==-1,i]=(i+1)*2
 79 |         seq=changes.flatten()
 80 |         seq=np.append(seq[seq!=0],0)
 81 |     else:
 82 |         seq=np.array([1])
 83 |     return(seq)
 84 | 
 85 | def output_vector_SVMlight(filename,append,quantized_data,discrete_seq):
 86 |     FeatureV=[len(discrete_seq),quantized_data.shape[0]]
 87 |     FeatureV.extend(list(discrete_seq.astype(int)))
 88 |     FeatureV.extend(list(np.transpose(quantized_data).flatten()))
 89 |     if(append):
 90 |         fid=open(filename,'a')
 91 |     else:
 92 |         fid=open(filename,'w')
 93 |     fid.write("1 ")
 94 |     for i in range(len(FeatureV)):
 95 |         fid.write(str(i+1)+":"+str(FeatureV[i])+" ")
 96 |     fid.write("\n")
 97 |     fid.close()
 98 |     return([])
 99 | 
100 | 
101 | def find_param_indices(header,params):
102 |     indx=[]
103 |     for p in params:
104 |         indx.append(list(header).index(p))
105 |     return(tuple(indx))
106 |     
107 | def load_FOQA_csv(filename):
108 |     header,data = read_pandas(filename) 
109 |     data[0,np.isnan(data[0,:])]=0
110 |     for i,row in enumerate(data[:-1,:]):
111 |         indx_nans = np.isnan(data[i+1,:])
112 |         data[i+1,indx_nans] = data[i,indx_nans]
113 |     return({'header':header,'data':data})
114 | 
115 | #Finds touchdown point and decent beggining at cutoff altitude. 
116 | def find_marker(Data,important_params):
117 |     alt_indx=list(Data['header']).index(str(important_params['alt']))
118 |     td_indicator_indx=list(Data['header']).index(str(important_params['td_indicator']))
119 |     middle_indx=list(signal.filtfilt(np.ones((30),dtype=float),np.ones((1),dtype=float),Data['data'][:,alt_indx])/30**2>15000).index(True) #30 sec windowed filter to get rid of startup noise.
120 |     td_indx=list(np.diff(Data['data'][middle_indx:,td_indicator_indx])>0).index(1)+middle_indx
121 |     return({'middle_indx':middle_indx,'td_indx':td_indx,'alt_indx':alt_indx})
122 | 
123 | def get_approach(Data,start_alt,markers):
124 |     ##Adjust altitudes by touchdown altitude##
125 |     Data['data'][:,markers['alt_indx']]=Data['data'][:,markers['alt_indx']]-Data['data'][markers['td_indx'],markers['alt_indx']]
126 |     start_indx=markers['td_indx']-list(np.flipud(Data['data'][markers['middle_indx']:markers['td_indx'],markers['alt_indx']])>start_alt).index(True)
127 |     Data['data']=Data['data'][start_indx:markers['td_indx'],:]
128 |     return(Data)
129 | 
130 | # Keeps track of first order statistics using Welford's Online algorithm
131 | def zscore_stream(data,statistics={'dataMean':[],'dataStd':[],'S0':[],'S1':[],'S2':[]}):
132 |     
133 |     if(len(statistics['dataMean'])==0):
134 |         statistics['S0']=np.zeros((data.shape[1]),dtype=int)
135 |         statistics['S1']=np.zeros((data.shape[1]),dtype=int)
136 |         statistics['S2']=np.zeros((data.shape[1]),dtype=int)
137 |         statistics['dataMean']=np.zeros((data.shape[1]),dtype=float)
138 |         statistics['dataStd']=np.zeros((data.shape[1]),dtype=float)
139 |        
140 |     statistics['S0']=statistics['S0']+np.sum(data**0,axis=0)
141 |     statistics['S1']=statistics['S1']+np.sum(data**1,axis=0)
142 |     statistics['S2']=statistics['S2']+np.sum(data**2,axis=0)
143 |     
144 |     for i in range(data.shape[1]):
145 |         statistics['dataMean'][i]=statistics['S1'][i]/statistics['S0'][i]
146 |         statistics['dataStd'][i]=(1.0/statistics['S0'][i])*np.sqrt(np.abs(statistics['S0'][i]*statistics['S2'][i]-statistics['S1'][i]**2))
147 |     statistics['dataStd'][statistics['dataStd']==0]=1
148 |     return(statistics)
149 | 
150 | # Merge reduce function to compute global statistics.
151 | def zscore_stream_merge(statistics1,statistics2):
152 |           
153 |     statistics1['S0']+=statistics2['S0']
154 |     statistics1['S1']+=statistics2['S1']
155 |     statistics1['S2']+=statistics2['S2']
156 |     
157 |     for i in range(statistics1['dataMean'].shape[0]):
158 |         statistics1['dataMean'][i]=statistics1['S1'][i]/statistics1['S0'][i]
159 |         statistics1['dataStd'][i]=(1.0/statistics1['S0'][i])*np.sqrt(np.abs(statistics1['S0'][i]*statistics1['S2'][i]-statistics1['S1'][i]**2))
160 |     statistics1['dataStd'][statistics1['dataStd']==0]=1
161 |     return(statistics1)
162 | 
163 | # Calls nlcs from c-extensions code Compiled separately with kernels module
164 | def MKAD_kernel_function(A,B):
165 |     return(nlcs.compute(np.atleast_2d(np.array(A,dtype=np.uint16)),np.atleast_2d(np.array(B,dtype=np.uint16))))
166 | 


--------------------------------------------------------------------------------
/PythonCode/config_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name":"RUN_ID",
 3 |     "filelist":"/path/to/filelist_of_gzip_csvs.txt",
 4 |     "working_dir":"/path/to/working/directory/",
 5 |     "svmlight_file":"/path/to/svmlight/filename.txt",
 6 |     "MKAD_folder":"/path/to/mkad/output/folder/",
 7 |     "important_params":{"alt":"ALTITUDE",
 8 |                         "td_indicator":"LANDING GEAR COMPRESSED",
 9 | 			"ground_speed": "GRND SPEED"},
10 |     "params":{"continuous":"/path/to/continuous/parameter/continuous_parameter_list.txt",
11 |               "discrete":"/path/to/continuous/parameter/discrete_parameter_list.txt"},
12 |     "nu":0.1,
13 |     "alphabet":10,
14 |     "window_size":30, 
15 |     "starting_alt":6000,
16 |     "cluster_eps":0.07,
17 |     "save_kernel":true,
18 |     "use_existing_kernel":false
19 | }
20 | 


--------------------------------------------------------------------------------
/PythonCode/preprocess_files_multiprocess.py:
--------------------------------------------------------------------------------
  1 | #!${HOMNE}/anaconda3/bin/python
  2 | 
  3 | # __________________________________________________________________________
  4 | # 
  5 | # Notices:
  6 | # 
  7 | # Copyright 2010, 2019 United States Government as represented by the Administrator of the National Aeronautics and
  8 | # Space Administration.  All Rights Reserved.
  9 | # 
 10 | # Disclaimers
 11 | # 
 12 | # No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED,
 13 | # IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM
 14 | # TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM
 15 | # FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION,
 16 | # IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN
 17 | # ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE
 18 | # PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE.  FURTHER, GOVERNMENT AGENCY
 19 | # DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE,
 20 | # AND DISTRIBUTES IT "AS IS."
 21 | # 
 22 | # Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT,
 23 | # ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT.  IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE
 24 | # RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY
 25 | # DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT
 26 | # SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL
 27 | # AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW.  RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL
 28 | # BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
 29 | # 
 30 | # __________________________________________________________________________
 31 | #
 32 | #
 33 | '''
 34 | @author: Bryan Matthews KBRWyle
 35 |          Data Science Group
 36 |          NASA Ames Research Center
 37 | 
 38 | This code is designed to process gzipped csv files and preprocess using Symbolic Aggregate approXimation (SAX).
 39 | These files are stored in a SVMlight format file. Usage:
 40 | $>python preprocess_files_multiprocess.py config.json number_of_processes(optional)
 41 | 
 42 | Code Updated: 2019-03-08
 43 | '''
 44 | 
 45 | 
 46 | 
 47 | import numpy as np
 48 | from scipy import signal
 49 | import csv
 50 | import json
 51 | import pickle
 52 | from glob import glob
 53 | import time
 54 | import gzip
 55 | import sys,os
 56 | import SAX
 57 | from multiprocessing import Process
 58 | from progress.bar import IncrementalBar
 59 | import warnings
 60 | warnings.simplefilter(action='ignore', category=FutureWarning)
 61 | 
 62 | # Distributed worker for loading, partitioning, and computing statistics of flights.
 63 | def worker(filelist,config,thread_id):
 64 |     first_time=True
 65 |     continuous_params = np.atleast_1d(np.genfromtxt(config['params']['continuous'],delimiter="\n",comments="@",dtype=str))
 66 |     discrete_params =   np.atleast_1d(np.genfromtxt(config['params']['discrete'],delimiter="\n",comments="@",dtype=str))
 67 |     data_cube = {'continuous':np.zeros((filelist.shape[0], 81, continuous_params.shape[0]), dtype=float),
 68 |                 'discrete':np.zeros((filelist.shape[0], 81, discrete_params.shape[0]), dtype=float),
 69 |                  'continuous_params':continuous_params,'discrete_params':discrete_params, 'filelist':[]}
 70 |     bar = IncrementalBar('Task '+str(100+thread_id)[1:]+': Partitioning Flights...', max=len(filelist))
 71 |     for i,f in enumerate(filelist):
 72 |         data_cube['filelist'].append(os.path.basename(f).split(".")[0])
 73 |         try:
 74 |             Data = SAX.load_FOQA_csv(f)
 75 |             markers=SAX.find_marker(Data,config['important_params'])
 76 |             Data=SAX.get_approach(Data,config['starting_alt'],markers)
 77 |         except ValueError:
 78 |             continue
 79 |         config['params']['continuous_indx']=SAX.find_param_indices(Data['header'],continuous_params)
 80 |         config['params']['discrete_indx']=SAX.find_param_indices(Data['header'],discrete_params)
 81 |         if(first_time):
 82 |             statistics=SAX.zscore_stream(Data['data'][:,config['params']['continuous_indx']])
 83 |             first_time=False
 84 |         else:
 85 |             statistics=SAX.zscore_stream(Data['data'][:,config['params']['continuous_indx']],statistics)
 86 |         xvec = np.flipud(np.cumsum(np.flipud(Data['data'][:, np.where(np.array(Data['header']) == config['important_params']['ground_speed'])[0]])) / 3600)
 87 |         bins = [np.intersect1d(np.where((xvec >= d)), np.where(xvec < (d + 0.25))) for d in np.linspace(0, 20 - 0.25, 80)] #Create 20 NM to 0 NM vector in 0.25 mile bins.
 88 |         bins.append(np.intersect1d(np.where(xvec >= 20.0), np.where(xvec < np.inf))) 
 89 | 
 90 |         data_cube['continuous'][i, :, :] = np.array([np.mean(Data['data'][b,:][:,config['params']['continuous_indx']],axis=0) if len(b)>0 else np.zeros((len(config['params']['continuous_indx'])),dtype=float)*np.nan for b in np.flipud(bins)])
 91 |         data_cube['discrete'][i, :, :] = np.array([np.mean(Data['data'][b, :][:, config['params']['discrete_indx']],axis=0) if len(b) > 0 else np.zeros((len(config['params']['discrete_indx'])), dtype=float) * np.nan for b in np.flipud(bins)])
 92 |         pickle.dump(Data,open(os.path.join(config['working_dir'],'data',os.path.basename(f).replace('.csv.gz','.pkl')),'wb'))
 93 |         bar.next()
 94 |     bar.finish()
 95 |     pickle.dump(statistics,open(os.path.join(config['working_dir'],'statistics_'+str(thread_id)+'.pkl'),'wb'))
 96 |     pickle.dump(data_cube, open(os.path.join(config['working_dir'], 'data_cube_' + str(thread_id) + '.pkl'), 'wb'))
 97 |     return()
 98 | 
 99 | # Distributed worker for applying SAX vectorization to flight data. 
100 | def worker_SAX(filelist,config,statistics,thread_id):
101 |     
102 |     good_indx=np.zeros((len(filelist)),dtype=bool)
103 |     continuous_params = np.atleast_1d(np.genfromtxt(config['params']['continuous'],delimiter="\n",comments="@",dtype=str))    
104 |     discrete_params =   np.atleast_1d(np.genfromtxt(config['params']['discrete'],delimiter="\n",comments="@",dtype=str))
105 |     bar = IncrementalBar('Task '+str(100+thread_id)[1:]+': Creating SAX Vector...', max=len(filelist))
106 |     first_time=True
107 |     for i,f in enumerate(filelist):
108 |         Data=pickle.load(open(f,'rb'))
109 |         config['params']['continuous_indx']=SAX.find_param_indices(Data['header'],continuous_params)
110 |         config['params']['discrete_indx']=SAX.find_param_indices(Data['header'],discrete_params)
111 |         Data['data'][:,config['params']['continuous_indx']]=(Data['data'][:,config['params']['continuous_indx']]-np.tile(statistics['dataMean'],[Data['data'].shape[0],1]))/np.tile(statistics['dataStd'],[Data['data'].shape[0],1])
112 |         Data['data'][np.isnan(Data['data'])]=0
113 |         quantized_data=SAX.quantize_time_series(Data,config['params'],config['alphabet'],config['window_size'])
114 |         good_indx[i]=quantized_data.shape[0]!=0
115 |         if(not good_indx[i]):
116 |             continue
117 |         discrete_seq=SAX.convert_disc_2_seq(Data,config['params'])
118 |         if(first_time):
119 |             first_time=False
120 |             # if(os.path.dirname(config['svmlight_file'])!=""):
121 |             print(os.path.dirname(config['svmlight_file']))
122 |             os.makedirs(os.path.dirname(config['svmlight_file']), exist_ok=True)
123 |             SAX.output_vector_SVMlight(config['svmlight_file']+'_'+str(100+thread_id)[1:],False,quantized_data,discrete_seq)
124 |         else:
125 |             SAX.output_vector_SVMlight(config['svmlight_file']+'_'+str(100+thread_id)[1:],True,quantized_data,discrete_seq)
126 |         bar.next()
127 |     bar.finish()
128 |     np.savetxt(os.path.join(config['working_dir'],"filelist_in_svmlight_file_"+str(100+thread_id)[1:]+".txt"),np.array(filelist)[good_indx],fmt="%s")
129 |     return()
130 | 
131 | def cat_files(filelist,output):
132 |     fid_out = open(output,'w')
133 |     for f in filelist:
134 |         with open(f,'r') as fid:
135 |             data = fid.read()
136 |         fid_out.write(data)
137 |     fid_out.close()
138 | 
139 | if __name__ == '__main__':
140 |     
141 |     if(len(sys.argv)<2):
142 |         print("Usage:")
143 |         print("$>python preprocess_files_multiprocess.py config.json number_of_processes(optional)")
144 |         quit()
145 | 
146 |     skip_paritioning=False #For debugging purposes. Skipps the initial partitioning of flights when True.
147 |     
148 |     # Limit multi threading in scientific packagages like BLAS to 1 process to avoid conflict with our multiprocess preprocess steps.
149 |     os.environ["OMP_NUM_THREADS"] = "1" 
150 |     
151 |     startT = time.time()
152 |     config=json.load(open(sys.argv[1]))
153 |     if(len(sys.argv)<3):
154 |         number_of_processes=1.0
155 |     else:
156 |         number_of_processes=float(sys.argv[2])
157 | 
158 |     os.makedirs(os.path.join(config['working_dir'],'data'), exist_ok=True)
159 |     if not skip_paritioning:
160 |         print("Partitioning flights from "+str(config['starting_alt'])+ " ft to landing...")
161 |         filelist=np.genfromtxt(config['filelist'],delimiter='\n',dtype=str)
162 |         size_per_thread=np.ceil(float(filelist.shape[0])/number_of_processes)
163 |         jobs=[]
164 |         for i in range(int(number_of_processes)):
165 |             p = Process(target=worker, args=(filelist[int((i)*size_per_thread):int(min(int((i+1)*size_per_thread),filelist.shape[0]))],config,i))
166 |             jobs.append(p)
167 |             p.start()
168 |         while len(jobs) > 0:
169 |             jobs = [job for job in jobs if job.is_alive()]
170 |             time.sleep(1)
171 |     
172 |         statistics=pickle.load(open(os.path.join(config['working_dir'],'statistics_0.pkl'),'rb'))
173 |         data_cube = pickle.load(open(os.path.join(config['working_dir'],'data_cube_0.pkl'), 'rb'))
174 |         for i in range(1,int(number_of_processes)):
175 |             statistics2=pickle.load(open(os.path.join(config['working_dir'],'statistics_'+str(i)+'.pkl'),'rb'))
176 |             statistics=SAX.zscore_stream_merge(statistics,statistics2)
177 |             data_cube_tmp = pickle.load(open(os.path.join(config['working_dir'], 'data_cube_' + str(i) + '.pkl'), 'rb'))
178 |             data_cube['continuous'] = np.vstack((data_cube['continuous'],data_cube_tmp['continuous']))
179 |             data_cube['discrete'] = np.vstack((data_cube['discrete'], data_cube_tmp['discrete']))
180 |             data_cube['filelist'].extend(data_cube_tmp['filelist'])
181 | 
182 |         pickle.dump(statistics,open(os.path.join(config['working_dir'],'statistics.pkl'),'wb'))
183 |         pickle.dump(data_cube, open(os.path.join(config['working_dir'],'data_cube.pkl'), 'wb'))
184 |         for i in range(int(number_of_processes)):
185 |             os.remove(os.path.join(config['working_dir'],'statistics_'+str(i)+'.pkl'))
186 |             os.remove(os.path.join(config['working_dir'],'data_cube_' + str(i) + '.pkl'))
187 |     
188 |     first_time=True
189 |     statistics=pickle.load(open(os.path.join(config['working_dir'],'statistics.pkl'),'rb'))
190 |     filelist=np.array(sorted(list(set(glob(os.path.join(config['working_dir'],'data','*.pkl'))))))
191 |     size_per_thread=np.ceil(float(filelist.shape[0])/number_of_processes)
192 |     jobs=[]
193 |     for i in range(int(number_of_processes)):
194 |         p = Process(target=worker_SAX, args=(filelist[int((i)*size_per_thread):int(min(int((i+1)*size_per_thread),filelist.shape[0]))],config,statistics,i))
195 |         jobs.append(p)
196 |         p.start()
197 |     while len(jobs) > 0:
198 |         jobs = [job for job in jobs if job.is_alive()]
199 |         time.sleep(1)
200 |         
201 |     filelist = sorted(glob(os.path.join(config['working_dir'],'filelist_in_svmlight_file_*')))
202 |     cat_files(filelist,os.path.join(config['working_dir'],'filelist_in_svmlight_file.txt'))
203 |     # os.system('cat '+config['working_dir']+'/filelist_in_svmlight_file_* > '+ config['working_dir']+'/filelist_in_svmlight_file.txt')
204 |     [os.remove(f) for f in glob(config['working_dir']+"/filelist_in_svmlight_file_*")]
205 |     filelist = sorted(glob(config['svmlight_file']+'_*'))
206 |     cat_files(filelist,config['svmlight_file'])
207 |     # os.system('cat '+config['svmlight_file']+'_* > '+config['svmlight_file'])
208 |     [os.remove(f) for f in glob(config['svmlight_file']+"_*")]
209 | 
210 |     print("Runtime:" + str(time.time()-startT) + "Seconds")
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------
/PythonCode/run_mkad.py:
--------------------------------------------------------------------------------
  1 | #!${HOMNE}/anaconda3/bin/python
  2 | 
  3 | #_________________________________________________________________________
  4 | # 
  5 | # Notices:
  6 | # 
  7 | # Copyright 2010, 2019 United States Government as represented by the Administrator of the National Aeronautics and
  8 | # Space Administration.  All Rights Reserved.
  9 | # 
 10 | # Disclaimers
 11 | # 
 12 | # No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED,
 13 | # IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM
 14 | # TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM
 15 | # FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION,
 16 | # IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN
 17 | # ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE
 18 | # PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE.  FURTHER, GOVERNMENT AGENCY
 19 | # DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE,
 20 | # AND DISTRIBUTES IT "AS IS."
 21 | # 
 22 | # Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT,
 23 | # ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT.  IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE
 24 | # RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY
 25 | # DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT
 26 | # SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL
 27 | # AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW.  RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL
 28 | # BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
 29 | # 
 30 | # __________________________________________________________________________
 31 | 
 32 | '''
 33 | @author: Bryan Matthews KBRWyle
 34 |          Data Science Group
 35 |          NASA Ames Research Center
 36 | 
 37 | This code will load the SVMlight file produced by preprocess_files_multiprocess.py and execute the Multiple Kernel Anomaly
 38 | Detection (MKAD) algorithm. The output will be saved in a csv file with decomposed score compositions. Usage: 
 39 | $>python run_mkad.py config.json number_of_processes(optional)
 40 | 
 41 | Code Updated: 2019-03-08
 42 | '''
 43 | 
 44 | 
 45 | 
 46 | import sys,os
 47 | import json
 48 | import numpy as np
 49 | from multiprocessing import Process, Queue
 50 | import time
 51 | from sklearn.datasets import load_svmlight_file
 52 | import SAX
 53 | from progress.bar import IncrementalBar
 54 | from sklearn.svm import OneClassSVM
 55 | import pickle
 56 | from sklearn.cluster import DBSCAN
 57 | 
 58 | 
 59 | def parse_SAX_vector(SAX_v):
 60 |     seq = SAX_v[0,2:2+int(SAX_v[0,0])]
 61 |     num_rows = int(SAX_v[0,1])
 62 |     num_cols = int((SAX_v.shape[1]-int(SAX_v[0,0])-2)/int(SAX_v[0,1]))
 63 |     cont_matrix = SAX_v[0,2+int(SAX_v[0,0]):].reshape((num_cols,num_rows))
 64 |     return([seq,cont_matrix])
 65 | 
 66 | def worker(index,svmlight_data,thread_id,q):
 67 |     
 68 |     bar = IncrementalBar('Task '+str(100+thread_id)[1:]+': Computing Kernel...', max=len(index))
 69 |     K = np.zeros((len(index),svmlight_data.shape[0]),dtype=float)
 70 |     count = 0
 71 |     for I,i in enumerate(index):
 72 |         seq1,cont_matrix1 = parse_SAX_vector(svmlight_data[i,:svmlight_data.getrow(i).nonzero()[1][-1]+1].todense())
 73 |         for j in range(i,svmlight_data.shape[0]):
 74 |             seq2,cont_matrix2 = parse_SAX_vector(svmlight_data[j,:svmlight_data.getrow(j).nonzero()[1][-1]+1].todense())
 75 |             K[I,j] = 0.5*SAX.MKAD_kernel_function(np.transpose(seq1),np.transpose(seq2))
 76 |             for l in range(cont_matrix1.shape[0]):
 77 |                 K[I,j] += 0.5*SAX.MKAD_kernel_function(np.transpose(cont_matrix1[l,:]),np.transpose(cont_matrix2[l,:]))/cont_matrix1.shape[0]
 78 |             count += 1
 79 |         bar.next()
 80 |     bar.finish()
 81 |     q.put(K)
 82 |     return([])
 83 | 
 84 | def worker_test(alphas,SVs,test,thread_id,q):
 85 |     
 86 |     _,cont_matrix = parse_SAX_vector(SVs[0,:np.max(SVs[0,:].nonzero()[1])+1].todense())
 87 |     num_contin = cont_matrix.shape[0]
 88 |     bar = IncrementalBar('Task '+str(100+thread_id)[1:]+': Calculating Decomposed Scores...', max=test.shape[0])
 89 |     
 90 |     scores_decomposed = np.zeros((test.shape[0],1+num_contin),dtype=float)
 91 |     for j in range(test.shape[0]):
 92 |         seq2,cont_matrix2 = parse_SAX_vector(test[j,:np.max(test[j,:].nonzero()[1])+1].todense())
 93 |         for i in range(SVs.shape[0]):
 94 |             seq1,cont_matrix1 = parse_SAX_vector(SVs[i,:np.max(SVs[i,:].nonzero()[1])+1].todense())
 95 |             scores_decomposed[j,0] += alphas[i]*SAX.MKAD_kernel_function(np.transpose(seq1),np.transpose(seq2))
 96 |             for l in range(num_contin):
 97 |                 scores_decomposed[j,1+l] += alphas[i]*SAX.MKAD_kernel_function(np.transpose(cont_matrix1[l,:]),np.transpose(cont_matrix2[l,:]))
 98 |         bar.next()
 99 |     bar.finish()
100 |     q.put(scores_decomposed)
101 |     return([])
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     
106 |     if(len(sys.argv)<2):
107 |         print("Usage:")
108 |         print("$>python run_mkad.py config.json number_of_processes(optional)")
109 |         quit()
110 |         
111 |     if(len(sys.argv)<3):
112 |         number_of_processes=1.0
113 |     else:
114 |         number_of_processes=float(sys.argv[2])
115 |         
116 |     config=json.load(open(sys.argv[1]))
117 |     
118 |     startT = time.time()
119 |     
120 |     svmlight_data = load_svmlight_file(config['svmlight_file'])[0][:,:]
121 |     nu = config['nu']
122 |     working_dir = config['working_dir']
123 |     params_c = np.genfromtxt(config['params']['continuous'],delimiter="\n",dtype=str)
124 |     
125 |     # Check to make sure kernel file exists. If not resets to compute kernel from SVMlight file and save kernel. 
126 |     if(not os.path.isfile(os.path.join(config['working_dir'],'kernel_'+config['name']+'.pkl'))):
127 |         print("No exisiting kernel found...Computing from SVMlightFile")
128 |         config['use_existing_kernel'] = False
129 |         config['save_kernel'] = True
130 |     
131 |     os.system('mkdir -p '+config['MKAD_folder'])
132 |     if(not config['use_existing_kernel']):
133 |         totals = np.cumsum(np.arange(svmlight_data.shape[0],1,-1))
134 |         chunk_size = int(totals[-1]/number_of_processes)
135 |         index = [0]
136 |         while np.sum(totals) > 0:
137 |             I = np.argmax(totals>chunk_size)
138 |             if(I==0):
139 |                 index.append(totals.shape[0]+1)
140 |                 break
141 |             index.append(I)
142 |             totals -= totals[index[-1]]
143 |             totals[:index[-1]] = 0
144 |         
145 |         size_per_thread=np.ceil(float(svmlight_data.shape[0])/number_of_processes)
146 |         jobs=[]
147 |         pipe_list = []
148 |         for i in range(int(number_of_processes)):
149 |             if(index[i]==svmlight_data.shape[0]):
150 |                 break
151 |             q = Queue()
152 |             p = Process(target=worker, args=(np.arange(index[i],index[i+1]),svmlight_data,i,q))
153 |             jobs.append(p)
154 |             pipe_list.append(q)
155 |             p.start()
156 |         
157 |         time.sleep(1)
158 |         
159 |         K = np.zeros((svmlight_data.shape[0],svmlight_data.shape[0]),dtype=float)
160 |         indx = 0
161 |         for i,x in enumerate(pipe_list):
162 |             tmp = x.get()
163 |             K[indx:indx+tmp.shape[0],:] = tmp
164 |             indx += tmp.shape[0]
165 |         
166 |         # Copy over the upper to lower triangle
167 |         i_lower = np.tril_indices(K.shape[0],-1)
168 |         K[i_lower] = np.transpose(K)[i_lower] #Keep consisten row major indexing by transposing and getting the upper.
169 |         
170 |         if(config['save_kernel']):
171 |             pickle.dump(K,open(os.path.join(config['working_dir'],'kernel_'+config['name']+'.pkl'),'wb'))
172 |     if(config['use_existing_kernel']):
173 |         print("Loading Exisiting Kernel...")
174 |         K=pickle.load(open(os.path.join(config['working_dir'],'kernel_'+config['name']+'.pkl'),'rb'))
175 |     
176 |     # Solve the one-class SVM
177 |     clf = OneClassSVM(kernel='precomputed',nu=0.1,tol=1e-12)
178 |     clf.fit(K)
179 |     scores = clf.score_samples(K) - clf.offset_
180 | 
181 |     filelist = np.genfromtxt(working_dir+"/filelist_in_svmlight_file.txt",delimiter="\n",dtype=str)
182 |     filelist = np.array([os.path.basename(f).split(".")[0] for f in filelist])
183 |     
184 |     sorted_indx = np.argsort(scores)
185 |     cutoff_point = np.argmax(scores[sorted_indx]>=0)
186 |     
187 |     # Reduce scores and flights to anomaly list
188 |     filelist_anoms = filelist[sorted_indx][:cutoff_point]
189 |     scores = scores[sorted_indx][:cutoff_point]
190 |     
191 |     # Select data for Support Vectors and anomalies
192 |     SVs = svmlight_data[clf.support_,:]
193 |     anoms = svmlight_data[sorted_indx,:][:cutoff_point,:]
194 |     del(K)
195 |     
196 |     # Normalize alphas to sum to 1
197 |     alphas = clf.dual_coef_[0]/np.sum(clf.dual_coef_[0])
198 |     
199 |     # Get unbounded Support Vectors (used for computing rho)
200 |     SVs_ub =  SVs[alphas <= 1/(clf.dual_coef_[0]*svmlight_data.shape[0]),:]
201 | 
202 |     _,cont_matrix1 = parse_SAX_vector(svmlight_data[0,:np.max(svmlight_data[0,:].nonzero()[1])+1].todense()) #get the number of continuous parameters.
203 |     num_contin = cont_matrix1.shape[0]
204 |     
205 |     
206 |     print("\nComputing Decomposed Rho Values...")
207 |     # Decompose the rhos
208 |     rho = np.zeros((1+num_contin),dtype=float)
209 |     for i in range(SVs.shape[0]):
210 |         seq1,cont_matrix1 = parse_SAX_vector(SVs[i,:np.max(SVs[i,:].nonzero()[1])+1].todense())
211 |         for j in range(SVs_ub.shape[0]):
212 |             seq2,cont_matrix2 = parse_SAX_vector(SVs_ub[j,:np.max(SVs_ub[j,:].nonzero()[1])+1].todense())
213 |             rho[0] += alphas[i]*SAX.MKAD_kernel_function(np.transpose(seq1),np.transpose(seq2))
214 |             for l in range(num_contin):
215 |                 rho[1+l] += alphas[i]*SAX.MKAD_kernel_function(np.transpose(cont_matrix1[l,:]),np.transpose(cont_matrix2[l,:]))#/cont_matrix1.shape[0]
216 |     rho /= SVs_ub.shape[0]
217 |  
218 |        
219 |     global_rho = np.sum(rho[1:]*0.5/num_contin)+rho[0]*0.5
220 |     print(global_rho)
221 |     
222 |     print("Decomposing Scores for "+str(anoms.shape[0])+ " Anomalies...")
223 |     size_per_thread=int(np.ceil(float(anoms.shape[0])/number_of_processes))
224 |     jobs=[]
225 |     pipe_list = []
226 |     for i in range(int(number_of_processes)):
227 |         q = Queue()
228 |         p = Process(target=worker_test, args=(alphas,SVs,anoms[int(i)*size_per_thread:int(min(int((i+1)*size_per_thread),anoms.shape[0])),:],i,q))
229 |         jobs.append(p)
230 |         p.start()
231 |         pipe_list.append(q)
232 |     
233 |     scores_decomposed = np.zeros((anoms.shape[0],1+num_contin),dtype=float)
234 |     indx = 0
235 |     for x in pipe_list:
236 |         tmp = x.get()
237 |         scores_decomposed[indx:indx+tmp.shape[0],:] = tmp
238 |         indx += tmp.shape[0]
239 |     
240 |     print("Computing Contributions...")
241 |     # Account for kernel weights and subtract out the decomposed rhos
242 |     scores_decomposed[:,0] -= rho[0]
243 |     scores_decomposed[:,0] *= 0.5
244 |     for l in range(num_contin):
245 |         scores_decomposed[:,1+l] -= rho[1+l]
246 |         scores_decomposed[:,1+l] *= 0.5/num_contin
247 |     
248 |     # Compute the global scores using the normalized alphas 
249 |     global_scores = np.sum(scores_decomposed,axis=1)- global_rho
250 |     
251 |     # Compute the percent contribution. 
252 |     percent_contribution = np.zeros((anoms.shape[0],1+num_contin),dtype=float)
253 |     for i,s in enumerate(scores_decomposed):
254 |         percent_contribution[i,:] = (s-np.max(s))/np.sum(s-np.max(s))
255 |     
256 |     print("Clustering flights with similar contributions...")
257 |     db = DBSCAN(eps=config['cluster_eps']).fit(percent_contribution)
258 |     print(set(db.labels_))
259 |     print("Number of Clusters: " + str(len(set(db.labels_))))
260 |     
261 |     print("Saving contribution file...\n"+config['MKAD_folder']+'/anomalous_flights_contributions_'+config['name']+'.csv')
262 |     fid=open(config['MKAD_folder']+'/anomalous_flights_contributions_'+config['name']+'.csv','w')
263 |     fid.write('Flight,MKAD_score,Cluster_ID,discrete_contribution,')
264 |     fid.write(",".join(params_c)+"\n")
265 |     for i in range(percent_contribution.shape[0]):
266 |         fid.write(filelist_anoms[i]+","+str(round(global_scores[i],6))+','+str(db.labels_[i])+",")
267 |         np.savetxt(fid,np.expand_dims(percent_contribution[i,:],axis=0),delimiter=",",fmt="%.6f")
268 |     fid.close()
269 |     print("Runtime:" + str(time.time()-startT) + "Seconds")


--------------------------------------------------------------------------------
/PythonCode/visualization.py:
--------------------------------------------------------------------------------
  1 | #!${HOMNE}/anaconda3/bin/python
  2 | 
  3 | #_________________________________________________________________________
  4 | # 
  5 | # Notices:
  6 | # 
  7 | # Copyright 2010, 2019 United States Government as represented by the Administrator of the National Aeronautics and
  8 | # Space Administration.  All Rights Reserved.
  9 | # 
 10 | # Disclaimers
 11 | # 
 12 | # No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED,
 13 | # IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM
 14 | # TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM
 15 | # FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION,
 16 | # IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN
 17 | # ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE
 18 | # PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE.  FURTHER, GOVERNMENT AGENCY
 19 | # DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE,
 20 | # AND DISTRIBUTES IT "AS IS."
 21 | # 
 22 | # Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT,
 23 | # ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT.  IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE
 24 | # RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY
 25 | # DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT
 26 | # SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL
 27 | # AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW.  RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL
 28 | # BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
 29 | # 
 30 | # __________________________________________________________________________
 31 | 
 32 | '''
 33 | @author: Bryan Matthews KBRWyle
 34 |          Data Science Group
 35 |          NASA Ames Research Center
 36 | 
 37 | This code will take the report and generate visualization plots for each flight using the statistics derrived by
 38 | all the flights in the data set to determine 10-90th percentiles and binary state probabilities for each distance
 39 | to touchdown. Usage: 
 40 | $>python visualization.py config.json number_of_processes(optional)
 41 | 
 42 | Code Updated: 2019-03-08
 43 | '''
 44 | 
 45 | import numpy as np
 46 | import sys,os
 47 | import time
 48 | import pickle
 49 | import json
 50 | import matplotlib.pyplot as plt
 51 | from multiprocessing import Process
 52 | 
 53 | 
 54 | def worker(filelist,data_cube,MKAD_file,config,thread_id):
 55 |     
 56 |     ## Hard coded constants ##
 57 |     Ncols = 6.0
 58 |     discrete_fuzzy_threshold = 0.30 # determines when a normally off or on discrete is marked abnormal
 59 |     xvec = np.linspace(20,0,81)
 60 |     
 61 |     ptileData=np.percentile(data_cube['continuous'],[10,90],axis=0)
 62 |     avg_discrete = np.mean(data_cube['discrete'],axis=0)
 63 |     for i,a in enumerate(filelist):
 64 |         plot_idx = len(data_cube['continuous_params'])
 65 |         Nrows = np.ceil(len(data_cube['continuous_params'])/Ncols)
 66 |         
 67 |         flight_indx = np.where(a==np.array(data_cube['filelist']))[0][0]
 68 |         
 69 |         indx_sorted = np.argsort(MKAD_file[i,4:].astype(float))[::-1]
 70 |         scores = MKAD_file[i,4+indx_sorted].astype(float)
 71 |         indx_most_anomalous_params = indx_sorted[np.where(np.cumsum(scores/np.sum(scores))<0.5)]
 72 | 
 73 |         #Continuous
 74 |         fig, axs = plt.subplots(int(Ncols),int(Nrows),figsize=[12,19])
 75 |         fig.subplots_adjust(hspace=.5)
 76 |         plt.suptitle(a+" (Continuous Parameters)", fontsize=16)
 77 |  
 78 |         axs=axs.ravel()
 79 |         for pltIdx in np.arange(plot_idx):
 80 |             excursions_below = data_cube['continuous'][flight_indx,:,pltIdx] < ptileData[0,:,pltIdx]
 81 |             excursions_above = data_cube['continuous'][flight_indx,:,pltIdx] > ptileData[1,:,pltIdx]
 82 |             # plot time series variable 
 83 |             axs[pltIdx].plot(xvec,data_cube['continuous'][flight_indx,:,pltIdx],linewidth=2,label="flight data")
 84 |             axs[pltIdx].plot(xvec,ptileData[0,:,pltIdx],'k--',label="10/90 percentile")
 85 |             axs[pltIdx].plot(xvec,ptileData[1,:,pltIdx],'k--')
 86 |             axs[pltIdx].plot(xvec[excursions_below],data_cube['continuous'][flight_indx,excursions_below,pltIdx],'r.',markersize=10,linewidth=2,label="above|below percentile")
 87 |             axs[pltIdx].plot(xvec[excursions_above],data_cube['continuous'][flight_indx,excursions_above,pltIdx],'r.',markersize=10,linewidth=2)
 88 |             axs[pltIdx].invert_xaxis()
 89 |             if(pltIdx in indx_most_anomalous_params):
 90 |                 axs[pltIdx].set_title("{}".format(data_cube['continuous_params'][pltIdx]),fontsize=10,color='red')
 91 |             else:
 92 |                 axs[pltIdx].set_title("{}".format(data_cube['continuous_params'][pltIdx]),fontsize=10)
 93 |             axs[pltIdx].set_xlabel("Distance to Landing (NM)")
 94 |             if(pltIdx==1):
 95 |                 axs[pltIdx].legend(loc=9, bbox_to_anchor=(0.5, 1.66), ncol=2)
 96 |         print("Saving:" + os.path.join(config['MKAD_folder'],'figs', a +'_c.pdf'))
 97 |         plt.savefig(os.path.join(config['MKAD_folder'],'figs', a +'_c.pdf'))
 98 |         plt.close()
 99 |         
100 |         #Discretes
101 |         plot_idx = len(data_cube['discrete_params'])
102 |         Nrows = np.ceil(len(data_cube['discrete_params'])/Ncols)
103 |         
104 |         fig, axs = plt.subplots(int(Ncols),int(Nrows), figsize= [12,19])
105 |         fig.subplots_adjust(hspace=.5)
106 |         plt.suptitle(a+" (Discrete Parameters)", fontsize=16)
107 |         axs=axs.ravel()
108 |         for pltIdx in np.arange(plot_idx):
109 |             
110 |             high_probability_on = avg_discrete[:,pltIdx] > (0.5 + discrete_fuzzy_threshold)
111 |             high_probability_off = avg_discrete[:,pltIdx] < (0.5 - discrete_fuzzy_threshold)
112 |             
113 |             excursions_off = (data_cube['discrete'][flight_indx,:,pltIdx] < (0.5 - discrete_fuzzy_threshold)) & high_probability_on
114 |             excursions_on = (data_cube['discrete'][flight_indx,:,pltIdx] > (0.5 + discrete_fuzzy_threshold)) & high_probability_off
115 |             
116 |             # plot time series variable 
117 |             axs[pltIdx].plot(xvec,(data_cube['discrete'][flight_indx,:,pltIdx]>0).astype(float),linewidth=2,label="flight data") #Have to threshold because we took the average over the 1/4 NM bin
118 |             axs[pltIdx].plot(xvec,avg_discrete[:,pltIdx],'k--',label="average state")
119 |             axs[pltIdx].plot(xvec[excursions_off],(data_cube['discrete'][flight_indx,excursions_off,pltIdx]>0).astype(float),'rs',markersize=8,linewidth=2,label="off when nominally on") #Have to threshold because we took the average over the 1/4 NM bin
120 |             axs[pltIdx].plot(xvec[excursions_on],(data_cube['discrete'][flight_indx,excursions_on,pltIdx]>0).astype(float),'go',markersize=8,linewidth=2,label="on when nominally off") #Have to threshold because we took the average over the 1/4 NM bin
121 |             axs[pltIdx].invert_xaxis()
122 |             axs[pltIdx].set_title("{}".format(data_cube['discrete_params'][pltIdx]),fontsize=10)
123 |             axs[pltIdx].set_xlabel("Distance to Landing (NM)")
124 |             axs[pltIdx].set_ylim([-0.1,1.1])
125 |             if(pltIdx==1):
126 |                 axs[pltIdx].legend(loc=9, bbox_to_anchor=(0.5, 1.66), ncol=2)
127 |         print("Saving:" + os.path.join(config['MKAD_folder'],'figs' , a +'_d.pdf'))
128 |         plt.savefig(os.path.join(config['MKAD_folder'],'figs' , a +'_d.pdf'))
129 |         # plt.show()
130 |         plt.close()
131 |     """thread worker function to generate pdf plots"""
132 |     print('Process '+str(thread_id) + ' done.')
133 |     return()
134 | 
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     
139 |     if(len(sys.argv)<2):
140 |         print("Usage:")
141 |         print("$>python visualization.py config.json number_of_processes(optional)")
142 |         quit()
143 |     
144 |     config=json.load(open(sys.argv[1]))
145 |     if(len(sys.argv)<3):
146 |         number_of_processes=1.0
147 |     else:
148 |         number_of_processes=float(sys.argv[2])
149 |     
150 |     params_cont = np.genfromtxt(config['params']['continuous'],delimiter="\n",comments="@",dtype=str)
151 |     params_disc = np.genfromtxt(config['params']['discrete'],delimiter="\n",dtype=str)
152 |     
153 |     filelist = np.genfromtxt(os.path.join(config['working_dir'],'filelist_in_svmlight_file.txt'),delimiter="\n",dtype=str)
154 |     MKAD_file = np.genfromtxt(os.path.join(config['MKAD_folder'],'anomalous_flights_contributions_'+config['name']+'.csv'),delimiter=",",comments="@",dtype=str)[1:,:]
155 | 
156 |     
157 |     anomaly_list = np.genfromtxt(os.path.join(config['MKAD_folder'],'anomalous_flights_contributions_'+config['name']+'.csv'),delimiter=",",comments="@",dtype=str)[1:,0]
158 |     data_cube = pickle.load(open(os.path.join(config['working_dir'] , 'data_cube.pkl'),'rb'))
159 | 
160 |     root_good_filelist = [os.path.basename(f).replace('.pkl','') for f in filelist]
161 | 
162 |     good_indx = np.zeros((len(data_cube['filelist'])),dtype=bool)
163 |     for i,a in enumerate(data_cube['filelist']):
164 |         good_indx[i] = a in root_good_filelist
165 | 
166 |     data_cube['continuous'] = data_cube['continuous'][good_indx,:,:]
167 |     data_cube['discrete'] = data_cube['discrete'][good_indx,:,:]
168 |     data_cube['filelist'] = np.array(data_cube['filelist'])[good_indx]
169 | 
170 |     for i in range(data_cube['continuous'].shape[0]):
171 |         if(np.sum(np.isnan(data_cube['continuous'][i,:,:]))>0):
172 |             last_nan = np.max(np.where(np.isnan(data_cube['continuous'][i,:,0])==True))
173 |             data_cube['continuous'][i,:last_nan+1,:] = data_cube['continuous'][i,last_nan+1,:]
174 |             last_nan = np.max(np.where(np.isnan(data_cube['discrete'][i,:,0])==True))
175 |             data_cube['discrete'][i,:last_nan+1,:] = data_cube['discrete'][i,last_nan+1,:]
176 |    
177 |     os.makedirs(os.path.join(config['MKAD_folder'],'figs'), exist_ok=True)
178 |     # os.system('mkdir -p ' + config['MKAD_folder']+'/figs') 
179 |     startT = time.time()
180 |     
181 |     size_per_thread=np.ceil(float(anomaly_list.shape[0])/number_of_processes)
182 |     jobs=[]
183 |     for i in range(int(number_of_processes)):
184 |         p = Process(target=worker, args=(anomaly_list[int((i)*size_per_thread):int(min(int((i+1)*size_per_thread),anomaly_list.shape[0]))],data_cube,MKAD_file[int((i)*size_per_thread):int(min(int((i+1)*size_per_thread),anomaly_list.shape[0])),:],config,i))
185 |         jobs.append(p)
186 |         p.start()
187 |     while len(jobs) > 0:
188 |         jobs = [job for job in jobs if job.is_alive()]
189 |         time.sleep(1)
190 |     
191 |     print("Runtime: " + str(time.time() - startT) + " Seconds")
192 | 
193 |         
194 |         
195 |         
196 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyMKAD
 2 | 
 3 | The world-wide aviation system is one of the most complex dynamical systems ever developed and is generating data at an extremely rapid rate. Most modern commercial aircraft record several hundred flight parameters including information from the guidance, navigation, and control systems, the avionics and propulsion systems, and the pilot inputs into the aircraft. These parameters may be continuous measurements or binary/categorical measurements recorded in one second intervals for the duration of the flight. Currently, most approaches to aviation safety are reactive, meaning that they are designed to react to an aviation safety incident or accident. PyMKAD is a novel approach based on the theory of multiple kernel learning to detect potential safety anomalies in very large data bases of discrete and continuous data from world-wide operations of commercial fleets. This code address an anomaly detection problem which includes both discrete and continuous data streams, where we assume that the discrete streams influence on the continuous streams. We also assume that atypical sequence of events in the discrete streams can lead to off-nominal system performance.  
 4 | 
 5 | The objective of this project is to automate the analysis of flight safety incidents in a way that combines both analysis of discrete and continuous parameters. 
 6 | 
 7 | This repository contains the following files in its top level directory:
 8 | 
 9 | * [PythonCode](PythonCode)  
10 | The source code of the repository includes: preprocessing modules, the main mkad code, and a post processing visualization tool. The code is uses a command line interface and a json file for configuring. 
11 | 
12 | * [documentation](documentation)  
13 | Documents describing how to configure and run the program, as well as how to interpret the results. 
14 | 
15 | 
16 | * [MKAD NOSA 2019.pdf](MKAD%20NOSA%202019.pdf)  
17 | Licensing for MKAD
18 | 
19 | 
20 | 
21 | 
22 | ## Contact Info
23 | 
24 | NASA Point of contact: Nikunj Oza <nikunj.c.oza@nasa.gov>, Data Science Group Lead.
25 | 
26 | For questions regarding the research and development of the algorithm, please contact Bryan Matthews <bryan.l.matthews@nasa.gov>, Senior Research Engineer.
27 | 
28 | 
29 | ## Copyright and Notices
30 | 
31 | Notices:
32 | 
33 | Copyright © 2019 United States Government as represented by the Administrator of the National Aeronautics and Space Administration.  All Rights Reserved.
34 | 
35 | Disclaimers
36 | 
37 | No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE.  FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT "AS IS."
38 | 
39 | Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT.  IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW.  RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
40 | 
41 | 


--------------------------------------------------------------------------------
/documentation/README.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/PyMKAD/02d29db9e13ceffc7fdabb188948618da40306a1/documentation/README.docx


--------------------------------------------------------------------------------
/kernels/README.rst:
--------------------------------------------------------------------------------
 1 | kernels
 2 | ============
 3 | kernels is a Python module with utility functions for data mining to support 
 4 | the Data Sciences group at NASA Ames
 5 | 
 6 | 
 7 | Dependencies
 8 | ============
 9 | 
10 | The required dependencies to build the software are 
11 | Python >= 3.7, setuptools, 
12 | Numpy >= 1.15.4, 
13 | SciPy >= 1.2.0, 
14 | scikit-learn >= 0.20.3 
15 | and a working C/C++ compiler.
16 | 
17 | 
18 | Install
19 | =======
20 | 
21 | This package uses distutils, which is the default way of installing
22 | python modules. To install in your home directory, use::
23 | 
24 |   python setup.py install --user
25 | 
26 | To install for all users on Unix/Linux::
27 | 
28 |   python setup.py build
29 |   sudo python setup.py install
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/PyMKAD/02d29db9e13ceffc7fdabb188948618da40306a1/kernels/__init__.py


--------------------------------------------------------------------------------
/kernels/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from os.path import join
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | nlcs_sources = [join('src', 'nlcs', 'nlcs_wrapper.cpp'),\
 8 |                     join('src', 'nlcs', 'lcs.cpp')]  
 9 | 
10 | setup(name = 'nlcs', version = '1.0',  \
11 |    ext_modules = [Extension('nlcs', nlcs_sources, include_dirs=[join(os.path.split(np.__file__)[0],'core','include')])])
12 | 


--------------------------------------------------------------------------------
/kernels/src/nlcs/lcs.cpp:
--------------------------------------------------------------------------------
  1 |  #include <iostream>   
  2 |  #include <stdlib.h>   
  3 |  #include <stdio.h>   
  4 |  #include <math.h>
  5 |  #include "lcs.h"
  6 | 
  7 |  using namespace std;	
  8 |     
  9 | // enum direction {north, west, nw};
 10 | // Direction uses chars {1,2,3};
 11 |     
 12 | 
 13 | LCS::LCS(){}
 14 | 
 15 | 
 16 | float LCS::CalcDist(unsigned short *vec1,unsigned short vec1Size,unsigned short *vec2,unsigned short vec2Size){
 17 | 
 18 | 		unsigned short m, n;  	 //lengths of the two strings	
 19 | 		unsigned short **c;		 // table of LCS lengths   
 20 | 	     	char **b;     // table of which optimal subprob solution   
 21 | 	        unsigned short i, j;   
 22 | 		unsigned short length;	 // length of LCS of prefixes	
 23 | 
 24 | 		m = (unsigned short)vec1Size;      // length of X   
 25 | 		n = (unsigned short)vec2Size;      // length of Y   
 26 |     
 27 | 		// Use two tables, b and c, each with m+1 rows and n+1 columns.   
 28 | 		// Initialize the c table to all 0.  The b table doesn't need to be   
 29 | 		// initialized.   	
 30 | 		c =(unsigned short**) calloc(m+1, sizeof(unsigned short *));   
 31 | 		for (i = 0; i <= m; i++)	
 32 | 		{   
 33 | 			c[i] =(unsigned short*) calloc(n+1, sizeof(unsigned short));   
 34 | 		}   
 35 | 		
 36 | 		b = (char**)calloc(m+1, sizeof(char *));   
 37 | 		for (i = 0; i <= m; i++)	
 38 | 		{   
 39 | 			b[i] = (char*)calloc(n+1, sizeof(char));   
 40 | 		}   
 41 | 		
 42 |     
 43 | 		// Now run through the main loop of the LCS-Length algorithm on p.353.   
 44 | 		for (i = 1; i <= m; i++)	
 45 | 		{   
 46 | 			for (j = 1; j <= n; j++)   
 47 | 			{   
 48 | 				if(vec1[i-1]==vec2[j-1])
 49 | 				{   
 50 | 				// Extending the LCS of X[1..i-1] and Y[1..j-1] by one character.   
 51 | 					c[i][j] = c[i-1][j-1] + 1;   
 52 | 					b[i][j] = 3;	//NorthWest
 53 | 				}	
 54 | 				else if (c[i-1][j] >= c[i][j-1])   
 55 | 				{	
 56 | 				// Using LCS of X[1..i-1] and Y[1..j].   
 57 | 					c[i][j] = c[i-1][j];   
 58 | 					b[i][j] = 1;   //North
 59 | 				}	
 60 | 				else   
 61 | 				{	
 62 | 				// Using LCS of X[1..i] and Y[1..j-1].   
 63 | 					c[i][j] = c[i][j-1];   
 64 | 					b[i][j] = 2;   //West
 65 | 				}	
 66 | 			}   
 67 | 		}	
 68 |     
 69 | 	//The tables are all filled in.  Print out the LCS found.   
 70 | 	//print_LCS also returns the length of the LCS found.	
 71 | 	length = print_LCS(b, m, n);   
 72 | //	printf("\nlength = %d\n", length);   
 73 | 	for (i = 0; i <= m; i++)	
 74 | 	{   
 75 | 		free(c[i]);
 76 | 	}   
 77 | 	for (i = 0; i <= m; i++)	
 78 | 	{   
 79 | 		free(b[i]);
 80 | 	}   
 81 |     	free(c);
 82 | 	free(b);
 83 | 	return (float)(length)/sqrt((float)(m)*(float)(n));
 84 | 
 85 | }
 86 | 
 87 | // Print an LCS of X[1..i] and Y[1..j], assuming that the b table has	
 88 | // already been filled in.  Based on the Print-LCS procedure of p.355.   
 89 | // int print_LCS(enum direction **b, char *X, int i, int j)   
 90 | //int LCS::print_LCS(enum direction **b,int i, int j)   
 91 | unsigned short LCS::print_LCS(char **b,unsigned short i, unsigned short j)   
 92 | {   
 93 | 	if (i == 0 || j == 0)	// is either string empty?   
 94 | 		return 0;   
 95 | 	if (b[i][j] == 3)   //NorthWest
 96 | 	{	
 97 | 	// We extended X[1..i-1] and Y[1..j-1] by one character, which is X[i].   
 98 | 	// Print the LCS of X[1..i-1] and Y[1..j-1] and then print X[i].   
 99 | 		unsigned short length = print_LCS(b,i-1, j-1);   
100 | 		return length+1;   
101 | 	}	
102 | 	else if (b[i][j] == 1) //North
103 | 	{  
104 | 		return print_LCS(b,i-1, j); // used LCS of X[1..i-1] and Y[1..j]   
105 | 	}
106 | 		else   
107 | 	{
108 | 		return print_LCS(b,i, j-1); // used LCS of X[1..i] and Y[1..j-1]   
109 | 	}
110 |     
111 | }   
112 | 
113 | 


--------------------------------------------------------------------------------
/kernels/src/nlcs/lcs.h:
--------------------------------------------------------------------------------
 1 |  #include <iostream>   
 2 |  #include <stdlib.h>   
 3 |  #include <stdio.h>   
 4 | 
 5 | using namespace std;	
 6 | 
 7 | /*enum direction {north, west, nw};*/
 8 | 
 9 | class LCS
10 | {
11 | 	private:
12 | 		unsigned short print_LCS(char **b, unsigned short i, unsigned short j);
13 | 
14 | 	public:
15 | 		LCS(void);
16 | 		float CalcDist(unsigned short *vec1,unsigned short vec1Size,unsigned short *str2,unsigned short vec2Size);
17 | 
18 | };
19 | 


--------------------------------------------------------------------------------
/kernels/src/nlcs/nlcs_wrapper.cpp:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | #include "lcs.h"
 3 | #include "numpy/arrayobject.h"
 4 | #include <stdio.h>
 5 | 
 6 | // Compatible with Python 3.7. 
 7 | 
 8 | // Function: compute nLCS
 9 | static PyObject* compute(PyObject* self, PyObject* args)
10 | 	{
11 | 
12 | 		PyArrayObject *input1,*input2;
13 | 		
14 | 		if (!PyArg_ParseTuple(args, "OO",&input1,&input2))  
15 | 			return NULL;
16 | 		
17 | 		
18 | 		unsigned short *list1 = (unsigned short *)malloc(input1->dimensions[0]*sizeof(unsigned short));
19 | 		unsigned short *list2 = (unsigned short *)malloc(input2->dimensions[0]*sizeof(unsigned short));
20 | 		
21 | 	
22 | 		if(input1->dimensions[1]!=input2->dimensions[1]){
23 | 			fprintf(stderr,"Error Dimensions missmatch\n %d!=%d\n",input1->dimensions[1],input2->dimensions[1]);
24 | 			return Py_BuildValue("f",-1.0);
25 | 		}
26 | 
27 | 		LCS LCSObj;
28 | 		float d=0;		
29 | 		for (int i=0;i<input1->dimensions[1];i++){
30 | 			for (int j=0;j<input1->dimensions[0];j++){
31 | 				memcpy(list1+j,input1->data+(i+j*input1->dimensions[1])*sizeof(unsigned short),sizeof(unsigned short));
32 | 			}
33 | 			for (int j=0;j<input2->dimensions[0];j++){
34 | 				memcpy(list2+j,input2->data+(i+j*input2->dimensions[1])*sizeof(unsigned short),sizeof(unsigned short));
35 | 			}
36 | 			d += LCSObj.CalcDist(list1,input1->dimensions[0],list2,input2->dimensions[0]);
37 | 		}
38 | 	
39 | 		free(list1);
40 | 		free(list2);
41 | 		
42 | 		return Py_BuildValue("f",d/((float)input1->dimensions[1]));
43 | 	}
44 | 
45 | // Module's Function Definition struct
46 | // We require this `NULL` to signal the end of our method
47 | // definition
48 | static PyMethodDef myMethods[] = {
49 |     { "compute", compute, METH_VARARGS, "Computes nLCS similarity" },
50 |     { NULL, NULL, 0, NULL }
51 | };
52 | 
53 | // Module Definition struct
54 | static struct PyModuleDef nlcs = {
55 |     PyModuleDef_HEAD_INIT,
56 |     "nlcs",
57 |     "Normalized Longest Common Subsequence Calculation",
58 |     -1,
59 |     myMethods
60 | };
61 | 
62 | // Initializes our module using our above struct
63 | PyMODINIT_FUNC PyInit_nlcs(void)
64 | {
65 |     return PyModule_Create(&nlcs);
66 | }
67 | 


--------------------------------------------------------------------------------