├── Hilbert_Envelope.m
├── Homomorphic_Envelope_with_Hilbert.m
├── README.md
├── butterworth_high_pass_filter.m
├── butterworth_low_pass_filter.m
├── default_Springer_HSMM_options.m
├── example_data.mat
├── expand_qt.m
├── getDWT.m
├── getHeartRateSchmidt.m
├── getSpringerPCGFeatures.m
├── get_PSD_feature_Springer_HMM.m
├── get_duration_distributions.m
├── labelPCGStates.m
├── normalise_signal.m
├── runSpringerSegmentationAlgorithm.m
├── run_Example_Springer_Script.m
├── schmidt_spike_removal.m
├── trainBandPiMatricesSpringer.m
├── trainSpringerSegmentationAlgorithm.m
├── viterbiDecodePCG_Springer.m
└── viterbi_Springer.c


/Hilbert_Envelope.m:
--------------------------------------------------------------------------------
 1 | % function [hilbert_envelope] = Hilbert_Envelope(input_signal, sampling_frequency,figures)
 2 | %
 3 | % This function finds the Hilbert envelope of a signal. This is taken from:
 4 | %
 5 | % Choi et al, Comparison of envelope extraction algorithms for cardiac sound
 6 | % signal segmentation, Expert Systems with Applications, 2008
 7 | %
 8 | %% Inputs:
 9 | % input_signal: the original signal
10 | % samplingFrequency: the signal's sampling frequency
11 | % figures: (optional) boolean variable to display a figure of both the
12 | % original and normalised signal
13 | %
14 | %% Outputs:
15 | % hilbert_envelope is the hilbert envelope of the original signal
16 | %
17 | % This code was developed by David Springer for comparison purposes in the
18 | % paper:
19 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
20 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
21 | %
22 | %% Copyright (C) 2016  David Springer
23 | % dave.springer@gmail.com
24 | %
25 | % This program is free software: you can redistribute it and/or modify
26 | % it under the terms of the GNU General Public License as published by
27 | % the Free Software Foundation, either version 3 of the License, or
28 | % any later version.
29 | %
30 | % This program is distributed in the hope that it will be useful,
31 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
32 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33 | % GNU General Public License for more details.
34 | %
35 | % You should have received a copy of the GNU General Public License
36 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
37 | 
38 | function hilbert_envelope = Hilbert_Envelope(input_signal, sampling_frequency,figures)
39 | 
40 | if nargin <3,
41 |     figures = 0;
42 | end
43 | 
44 | 
45 | hilbert_envelope = abs(hilbert(input_signal)); %find the envelope of the signal using the Hilbert transform
46 | 
47 | if(figures)
48 |     figure('Name', 'Hilbert Envelope');
49 |     plot(input_signal');
50 |     hold on;
51 |     plot(hilbert_envelope,'r');
52 |     legend('Original Signal','Hilbert Envelope');
53 |     pause();
54 | end


--------------------------------------------------------------------------------
/Homomorphic_Envelope_with_Hilbert.m:
--------------------------------------------------------------------------------
 1 | % function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures)
 2 | %
 3 | % This function finds the homomorphic envelope of a signal, using the method
 4 | % described in the following publications:
 5 | %
 6 | % S. E. Schmidt et al., ?Segmentation of heart sound recordings by a 
 7 | % duration-dependent hidden Markov model.,? Physiol. Meas., vol. 31, no. 4,
 8 | % pp. 513?29, Apr. 2010.
 9 | % 
10 | % C. Gupta et al., ?Neural network classification of homomorphic segmented
11 | % heart sounds,? Appl. Soft Comput., vol. 7, no. 1, pp. 286?297, Jan. 2007.
12 | %
13 | % D. Gill et al., ?Detection and identification of heart sounds using 
14 | % homomorphic envelogram and self-organizing probabilistic model,? in 
15 | % Computers in Cardiology, 2005, pp. 957?960.
16 | % (However, these researchers found the homomorphic envelope of shannon
17 | % energy.)
18 | %
19 | % In I. Rezek and S. Roberts, ?Envelope Extraction via Complex Homomorphic
20 | % Filtering. Technical Report TR-98-9,? London, 1998, the researchers state
21 | % that the singularity at 0 when using the natural logarithm (resulting in
22 | % values of -inf) can be fixed by using a complex valued signal. They
23 | % motivate the use of the Hilbert transform to find the analytic signal,
24 | % which is a converstion of a real-valued signal to a complex-valued
25 | % signal, which is unaffected by the singularity. 
26 | %
27 | % A zero-phase low-pass Butterworth filter is used to extract the envelope.
28 | %% Inputs:
29 | % input_signal: the original signal (1D) signal
30 | % samplingFrequency: the signal's sampling frequency (Hz)
31 | % lpf_frequency: the frequency cut-off of the low-pass filter to be used in
32 | % the envelope extraciton (Default = 8 Hz as in Schmidt's publication).
33 | % figures: (optional) boolean variable dictating the display of a figure of
34 | % both the original signal and the extracted envelope:
35 | %
36 | %% Outputs:
37 | % homomorphic_envelope: The homomorphic envelope of the original
38 | % signal (not normalised).
39 | %
40 | % This code was developed by David Springer for comparison purposes in the
41 | % paper:
42 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 
43 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
44 | %
45 | %% Copyright (C) 2016  David Springer
46 | % dave.springer@gmail.com
47 | % 
48 | % This program is free software: you can redistribute it and/or modify
49 | % it under the terms of the GNU General Public License as published by
50 | % the Free Software Foundation, either version 3 of the License, or
51 | % any later version.
52 | % 
53 | % This program is distributed in the hope that it will be useful,
54 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
55 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
56 | % GNU General Public License for more details.
57 | % 
58 | % You should have received a copy of the GNU General Public License
59 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
60 | 
61 | function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures)
62 | 
63 | if nargin <4,
64 |     figures = 0;
65 | end
66 | if nargin <3,
67 |     figures = 0;
68 |     lpf_frequency = 8;
69 | end
70 | 
71 | %8Hz, 1st order, Butterworth LPF
72 | [B_low,A_low] = butter(1,2*lpf_frequency/sampling_frequency,'low');
73 | homomorphic_envelope = exp(filtfilt(B_low,A_low,log(abs(hilbert(input_signal)))));
74 | 
75 | % Remove spurious spikes in first sample:
76 | homomorphic_envelope(1) = [homomorphic_envelope(2)];
77 | 
78 | if(figures)
79 |     figure('Name', 'Homomorphic Envelope');
80 |     plot(input_signal);
81 |     hold on;
82 |     plot(homomorphic_envelope,'r');
83 |     legend('Original Signal','Homomorphic Envelope')
84 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Springer-Segmentation-Code
 2 | Heart sound segmentation code based on duration-dependant HMM
 3 | 
 4 | This is Matlab code to run the heart sound segmentation algorithm as outlined in the publication:
 5 | 
 6 | D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
 7 | Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 8 | 
 9 | The code includes the feature extraction, training of the duration-dependant HMM,
10 | and the decoding of the most likely sequence of states using an extended Viterbi algorithm.
11 | 
12 | An example of the code at work can be seen in "run_Example_Springer_Script.m".
13 | 
14 | Copyright (C) 2016  David Springer
15 | dave.springer@gmail.com
16 | 
17 | This program is free software: you can redistribute it and/or modify
18 | it under the terms of the GNU General Public License as published by
19 | the Free Software Foundation, either version 3 of the License, or
20 | any later version.
21 | 
22 | This program is distributed in the hope that it will be useful,
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 | GNU General Public License for more details.
26 | 
27 | You should have received a copy of the GNU General Public License
28 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
29 | 
30 | 


--------------------------------------------------------------------------------
/butterworth_high_pass_filter.m:
--------------------------------------------------------------------------------
 1 | % function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency)
 2 | %
 3 | % High-pass filter a given signal using a forward-backward, zero-phase
 4 | % butterworth filter.
 5 | %
 6 | %% INPUTS:
 7 | % original_signal: The 1D signal to be filtered
 8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is
 9 | % effectively doubled as this function uses a forward-backward filter that
10 | % ensures zero phase distortion
11 | % cutoff: The frequency cutoff for the high-pass filter (in Hz)
12 | % sampling_frequency: The sampling frequency of the signal being filtered
13 | % (in Hz).
14 | % figures (optional): boolean variable dictating the display of figures
15 | %
16 | %% OUTPUTS:
17 | % high_pass_filtered_signal: the high-pass filtered signal.
18 | %
19 | % This code is derived from the paper:
20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
22 | % no. 4, pp. 513-29, Apr. 2010.
23 | %
24 | % Developed by David Springer for comparison purposes in the paper:
25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
27 | %
28 | %% Copyright (C) 2016  David Springer
29 | % dave.springer@gmail.com
30 | %
31 | % This program is free software: you can redistribute it and/or modify
32 | % it under the terms of the GNU General Public License as published by
33 | % the Free Software Foundation, either version 3 of the License, or
34 | % any later version.
35 | %
36 | % This program is distributed in the hope that it will be useful,
37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
39 | % GNU General Public License for more details.
40 | %
41 | % You should have received a copy of the GNU General Public License
42 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
43 | 
44 | function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
45 | 
46 | if nargin < 5,
47 |     figures = 0;
48 | end
49 | 
50 | %Get the butterworth filter coefficients
51 | [B_high,A_high] = butter(order,2*cutoff/sampling_frequency,'high');
52 | 
53 | %Forward-backward filter the original signal using the butterworth
54 | %coefficients, ensuring zero phase distortion
55 | high_pass_filtered_signal = filtfilt(B_high,A_high,original_signal);
56 | 
57 | if(figures)
58 |     
59 |     figure('Name','High-pass filter frequency response');
60 |     [sos,g] = zp2sos(B_high,A_high,1);	     % Convert to SOS form
61 |     Hd = dfilt.df2tsos(sos,g);   % Create a dfilt object
62 |     h = fvtool(Hd);	             % Plot magnitude response
63 |     set(h,'Analysis','freq')	     % Display frequency response
64 |     
65 |     figure('Name','Original vs. high-pass filtered signal');
66 |     plot(original_signal);
67 |     hold on;
68 |     plot(high_pass_filtered_signal,'r');
69 |     legend('Original Signal', 'High-pass filtered signal');
70 |     pause();
71 | end
72 | 
73 | 


--------------------------------------------------------------------------------
/butterworth_low_pass_filter.m:
--------------------------------------------------------------------------------
 1 | % function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
 2 | %
 3 | % Low-pass filter a given signal using a forward-backward, zero-phase
 4 | % butterworth low-pass filter.
 5 | %
 6 | %% INPUTS:
 7 | % original_signal: The 1D signal to be filtered
 8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is
 9 | % effectively doubled as this function uses a forward-backward filter that
10 | % ensures zero phase distortion
11 | % cutoff: The frequency cutoff for the low-pass filter (in Hz)
12 | % sampling_frequency: The sampling frequency of the signal being filtered
13 | % (in Hz).
14 | % figures (optional): boolean variable dictating the display of figures
15 | %
16 | %% OUTPUTS:
17 | % low_pass_filtered_signal: the low-pass filtered signal.
18 | %
19 | % This code is derived from the paper:
20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
22 | % no. 4, pp. 513-29, Apr. 2010.
23 | %
24 | % Developed by David Springer for comparison purposes in the paper:
25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
27 | %
28 | %% Copyright (C) 2016  David Springer
29 | % dave.springer@gmail.com
30 | %
31 | % This program is free software: you can redistribute it and/or modify
32 | % it under the terms of the GNU General Public License as published by
33 | % the Free Software Foundation, either version 3 of the License, or
34 | % any later version.
35 | %
36 | % This program is distributed in the hope that it will be useful,
37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
39 | % GNU General Public License for more details.
40 | %
41 | % You should have received a copy of the GNU General Public License
42 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
43 | 
44 | function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
45 | 
46 | if nargin < 5,
47 |     figures = 0;
48 | end
49 | 
50 | %Get the butterworth filter coefficients
51 | [B_low,A_low] = butter(order,2*cutoff/sampling_frequency,'low');
52 | 
53 | if(figures)
54 |     figure('Name','Low-pass filter frequency response');
55 |     [sos,g] = zp2sos(B_low,A_low,1);	     % Convert to SOS form
56 |     Hd = dfilt.df2tsos(sos,g);   % Create a dfilt object
57 |     h = fvtool(Hd);	             % Plot magnitude response
58 |     set(h,'Analysis','freq')	     % Display frequency response
59 | end
60 | 
61 | 
62 | %Forward-backward filter the original signal using the butterworth
63 | %coefficients, ensuring zero phase distortion
64 | low_pass_filtered_signal = filtfilt(B_low,A_low,original_signal);
65 | 
66 | if(figures)
67 |     figure('Name','Original vs. low-pass filtered signal');
68 |     plot(original_signal);
69 |     hold on;
70 |     plot(low_pass_filtered_signal,'r');
71 |     legend('Original Signal', 'Low-pass filtered signal');
72 |     pause();
73 | end


--------------------------------------------------------------------------------
/default_Springer_HSMM_options.m:
--------------------------------------------------------------------------------
 1 | % function springer_options = default_Springer_HSMM_options()
 2 | %
 3 | % The default options to be used with the Springer segmentation algorithm.
 4 | % USAGE: springer_options = default_Springer_HSMM_options
 5 | %
 6 | % Developed for use in the paper:
 7 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 8 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 9 | %
10 | %% Copyright (C) 2016  David Springer
11 | % dave.springer@gmail.com
12 | % 
13 | % This program is free software: you can redistribute it and/or modify
14 | % it under the terms of the GNU General Public License as published by
15 | % the Free Software Foundation, either version 3 of the License, or
16 | % any later version.
17 | % 
18 | % This program is distributed in the hope that it will be useful,
19 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 | % GNU General Public License for more details.
22 | % 
23 | % You should have received a copy of the GNU General Public License
24 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
25 | 
26 | function springer_options = default_Springer_HSMM_options()
27 | 
28 | %% The sampling frequency at which to extract signal features:
29 | springer_options.audio_Fs = 1000;
30 | 
31 | %% The downsampled frequency
32 | %Set to 50 in Springer paper
33 | springer_options.audio_segmentation_Fs = 50;
34 | 
35 | 
36 | %% Tolerance for S1 and S2 localization
37 | springer_options.segmentation_tolerance = 0.1;%seconds
38 | 
39 | %% Whether to use the mex code or not:
40 | % The mex code currently has a bug. This will be fixed asap.
41 | springer_options.use_mex = false;
42 | 
43 | %% Whether to use the wavelet function or not:
44 | springer_options.include_wavelet_feature = false;
45 | 
46 | 


--------------------------------------------------------------------------------
/example_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidspringer/Springer-Segmentation-Code/853cb535247dbea013798683ac343d9526aed973/example_data.mat


--------------------------------------------------------------------------------
/expand_qt.m:
--------------------------------------------------------------------------------
 1 | % function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length)
 2 | % 
 3 | % Function to expand the derived HMM states to a higher sampling frequency. 
 4 | %
 5 | % Developed by David Springer for comparison purposes in the paper:
 6 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 7 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 8 | %
 9 | %% INPUTS:
10 | % original_qt: the original derived states from the HMM
11 | % old_fs: the old sampling frequency of the original_qt
12 | % new_fs: the desired sampling frequency
13 | % new_length: the desired length of the qt signal
14 | 
15 | %% Outputs:
16 | % expanded_qt: the expanded qt, to the new FS and length
17 | %
18 | %% Copyright (C) 2016  David Springer
19 | % dave.springer@gmail.com
20 | % 
21 | % This program is free software: you can redistribute it and/or modify
22 | % it under the terms of the GNU General Public License as published by
23 | % the Free Software Foundation, either version 3 of the License, or
24 | % any later version.
25 | % 
26 | % This program is distributed in the hope that it will be useful,
27 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
28 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29 | % GNU General Public License for more details.
30 | % 
31 | % You should have received a copy of the GNU General Public License
32 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
33 | 
34 | function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length)
35 | 
36 | original_qt = original_qt(:)';
37 | expanded_qt = zeros(new_length,1);
38 | 
39 | indeces_of_changes = find(diff(original_qt));
40 | 
41 | indeces_of_changes = [indeces_of_changes, length(original_qt)];
42 | 
43 | start_index = 0;
44 | for i = 1:length(indeces_of_changes)
45 |     
46 |     start_index;
47 |     end_index = indeces_of_changes(i);
48 |     
49 |     mid_point = round((end_index - start_index)/2) + start_index;
50 |     
51 |     value_at_mid_point = original_qt(mid_point);
52 |     
53 |     expanded_start_index = round((start_index./old_fs).*new_fs) + 1;
54 |     expanded_end_index = round((end_index./(old_fs)).*new_fs);
55 |     
56 |     if(expanded_end_index > new_length)
57 |         expanded_end_index = new_length;
58 |     end
59 |     
60 |     expanded_qt(expanded_start_index:expanded_end_index) = value_at_mid_point;
61 | 
62 |     start_index = end_index;
63 | end


--------------------------------------------------------------------------------
/getDWT.m:
--------------------------------------------------------------------------------
 1 | % function [cD cA] = getDWT(X,N,Name)
 2 | %
 3 | % finds the discrete wavelet transform at level N for signal X using the
 4 | % wavelet specified by Name.
 5 | %
 6 | %% Inputs:
 7 | % X: the original signal
 8 | % N: the decomposition level
 9 | % Name: the wavelet name to use
10 | %
11 | %% Outputs:
12 | % cD is a N-row matrix containing the detail coefficients up to N levels
13 | % cA is the same for the approximations
14 | 
15 | % This code was developed by David Springer for comparison purposes in the
16 | % paper:
17 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
18 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
19 | %
20 | %% Copyright (C) 2016  David Springer
21 | % dave.springer@gmail.com
22 | %
23 | % This program is free software: you can redistribute it and/or modify
24 | % it under the terms of the GNU General Public License as published by
25 | % the Free Software Foundation, either version 3 of the License, or
26 | % any later version.
27 | %
28 | % This program is distributed in the hope that it will be useful,
29 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
30 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31 | % GNU General Public License for more details.
32 | %
33 | % You should have received a copy of the GNU General Public License
34 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
35 | 
36 | function [cD cA] = getDWT(X,N,Name)
37 | 
38 | 
39 | %No DWT available for Morlet - therefore perform CWT:
40 | if(strcmp(Name,'morl'))
41 |     
42 |     c = cwt(X,1:N,'morl');
43 |     
44 |     cD = c;
45 |     cA = c;
46 | else
47 |     %Preform wavelet decomposition
48 |     
49 |     [c,l] = wavedec(X,N,Name);
50 |     
51 |     %Reorder the details based on the structure of the wavelet
52 |     %decomposition (see help in wavedec.m)
53 |     len = length(X);
54 |     cD = zeros(N,len);
55 |     for k = 1:N
56 |         d = detcoef(c,l,k);
57 |         d = d(:)';
58 |         d = d(ones(1,2^k),:);
59 |         cD(k,:) = wkeep1(d(:)',len);
60 |     end
61 |     cD =  cD(:);
62 |     
63 |     %Space cD according to spacing of floating point numbers:
64 |     I = find(abs(cD)<sqrt(eps));
65 |     cD(I) = zeros(size(I));
66 |     cD = reshape(cD,N,len);
67 |     % cD = wcodemat(cfd,nbcol,'row');
68 |     
69 |     
70 |     %Reorder the approximations based on the structure of the wavelet
71 |     %decomposition (see help in wavedec.m)
72 |     len = length(X);
73 |     cA = zeros(N,len);
74 |     for k = 1:N
75 |         a = appcoef(c,l,Name,k);
76 |         a = a(:)';
77 |         a = a(ones(1,2^k),:);
78 |         cA(k,:) = wkeep1(a(:)',len);
79 |     end
80 |     cA =  cA(:);
81 |     I = find(abs(cA)<sqrt(eps));
82 |     cA(I) = zeros(size(I));
83 |     cA = reshape(cA,N,len);
84 | end


--------------------------------------------------------------------------------
/getHeartRateSchmidt.m:
--------------------------------------------------------------------------------
  1 | % function [heartRate systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs, figures)
  2 | %
  3 | % Derive the heart rate and the sytolic time interval from a PCG recording.
  4 | % This is used in the duration-dependant HMM-based segmentation of the PCG
  5 | % recording.
  6 | %
  7 | % This method is based on analysis of the autocorrelation function, and the
  8 | % positions of the peaks therein.
  9 | %
 10 | % This code is derived from the paper:
 11 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 
 12 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
 13 | % no. 4, pp. 513-29, Apr. 2010.
 14 | %
 15 | % Developed by David Springer for comparison purposes in the paper:
 16 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 17 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 18 | %
 19 | %% INPUTS:
 20 | % audio_data: The raw audio data from the PCG recording
 21 | % Fs: the sampling frequency of the audio recording
 22 | % figures: optional boolean to display figures
 23 | %
 24 | %% OUTPUTS:
 25 | % heartRate: the heart rate of the PCG in beats per minute
 26 | % systolicTimeInterval: the duration of systole, as derived from the
 27 | % autocorrelation function, in seconds
 28 | %
 29 | %% Copyright (C) 2016  David Springer
 30 | % dave.springer@gmail.com
 31 | % 
 32 | % This program is free software: you can redistribute it and/or modify
 33 | % it under the terms of the GNU General Public License as published by
 34 | % the Free Software Foundation, either version 3 of the License, or
 35 | % any later version.
 36 | % 
 37 | % This program is distributed in the hope that it will be useful,
 38 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 39 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 40 | % GNU General Public License for more details.
 41 | % 
 42 | % You should have received a copy of the GNU General Public License
 43 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 44 | 
 45 | function [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs, figures)
 46 | 
 47 | if nargin < 3
 48 |     figures = false;
 49 | end
 50 | 
 51 | %% Get heatrate:
 52 | % From Schmidt:
 53 | % "The duration of the heart cycle is estimated as the time from lag zero
 54 | % to the highest peaks between 500 and 2000 ms in the resulting
 55 | % autocorrelation"
 56 | % This is performed after filtering and spike removal:
 57 | 
 58 | %% 25-400Hz 4th order Butterworth band pass
 59 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false);
 60 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs);
 61 | 
 62 | %% Spike removal from the original paper:
 63 | audio_data = schmidt_spike_removal(audio_data,Fs);
 64 | 
 65 | %% Find the homomorphic envelope
 66 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs);
 67 | 
 68 | %% Find the autocorrelation:
 69 | y=homomorphic_envelope-mean(homomorphic_envelope);
 70 | [c] = xcorr(y,'coeff');
 71 | signal_autocorrelation = c(length(homomorphic_envelope)+1:end);
 72 | 
 73 | min_index = 0.5*Fs;
 74 | max_index = 2*Fs;
 75 | 
 76 | [~, index] = max(signal_autocorrelation(min_index:max_index));
 77 | true_index = index+min_index-1;
 78 | 
 79 | heartRate = 60/(true_index/Fs);
 80 | 
 81 | 
 82 | %% Find the systolic time interval:
 83 | % From Schmidt: "The systolic duration is defined as the time from lag zero
 84 | % to the highest peak in the interval between 200 ms and half of the heart
 85 | % cycle duration"
 86 | 
 87 | 
 88 | max_sys_duration = round(((60/heartRate)*Fs)/2);
 89 | min_sys_duration = round(0.2*Fs);
 90 | 
 91 | [~, pos] = max(signal_autocorrelation(min_sys_duration:max_sys_duration));
 92 | systolicTimeInterval = (min_sys_duration+pos)/Fs;
 93 | 
 94 | 
 95 | if(figures)
 96 |     figure('Name', 'Heart rate calculation figure');
 97 |     plot(signal_autocorrelation);
 98 |     hold on;
 99 |     plot(true_index, signal_autocorrelation(true_index),'ro');
100 |     plot((min_sys_duration+pos), signal_autocorrelation((min_sys_duration+pos)), 'mo');
101 |     xlabel('Samples');
102 |     legend('Autocorrelation', 'Position of max peak used to calculate HR', 'Position of max peak within systolic interval');
103 | end
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/getSpringerPCGFeatures.m:
--------------------------------------------------------------------------------
  1 | % function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures)
  2 | %
  3 | % Get the features used in the Springer segmentation algorithm. These 
  4 | % features include:
  5 | % -The homomorphic envelope (as performed in Schmidt et al's paper)
  6 | % -The Hilbert envelope
  7 | % -A wavelet-based feature
  8 | % -A PSD-based feature
  9 | % This function was developed for use in the paper:
 10 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 11 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 12 | %
 13 | %% INPUTS:
 14 | % audio_data: array of data from which to extract features
 15 | % Fs: the sampling frequency of the audio data
 16 | % figures (optional): boolean variable dictating the display of figures
 17 | %
 18 | %% OUTPUTS:
 19 | % PCG_Features: array of derived features
 20 | % featuresFs: the sampling frequency of the derived features. This is set
 21 | % in default_Springer_HSMM_options.m
 22 | %
 23 | %% Copyright (C) 2016  David Springer
 24 | % dave.springer@gmail.com
 25 | % 
 26 | % This program is free software: you can redistribute it and/or modify
 27 | % it under the terms of the GNU General Public License as published by
 28 | % the Free Software Foundation, either version 3 of the License, or
 29 | % any later version.
 30 | % 
 31 | % This program is distributed in the hope that it will be useful,
 32 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 33 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 34 | % GNU General Public License for more details.
 35 | % 
 36 | % You should have received a copy of the GNU General Public License
 37 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 38 | 
 39 | function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures)
 40 | % function PCG_Features = getSpringerPCGFeatures(audio, Fs)
 41 | % Get the features used in the Springer segmentation algorithm.
 42 | 
 43 | 
 44 | if(nargin < 3)
 45 |     figures = false;
 46 | end
 47 | 
 48 | springer_options = default_Springer_HSMM_options;
 49 | 
 50 | 
 51 | % Check to see if the Wavelet toolbox is available on the machine:
 52 | include_wavelet = springer_options.include_wavelet_feature;
 53 | featuresFs = springer_options.audio_segmentation_Fs; % Downsampled feature sampling frequency
 54 | 
 55 | %% 25-400Hz 4th order Butterworth band pass
 56 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false);
 57 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs);
 58 | 
 59 | %% Spike removal from the original paper:
 60 | audio_data = schmidt_spike_removal(audio_data,Fs);
 61 | 
 62 | 
 63 | 
 64 | %% Find the homomorphic envelope
 65 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs);
 66 | % Downsample the envelope:
 67 | downsampled_homomorphic_envelope = resample(homomorphic_envelope,featuresFs, Fs);
 68 | % normalise the envelope:
 69 | downsampled_homomorphic_envelope = normalise_signal(downsampled_homomorphic_envelope);
 70 | 
 71 | 
 72 | %% Hilbert Envelope
 73 | hilbert_envelope = Hilbert_Envelope(audio_data, Fs);
 74 | downsampled_hilbert_envelope = resample(hilbert_envelope, featuresFs, Fs);
 75 | downsampled_hilbert_envelope = normalise_signal(downsampled_hilbert_envelope);
 76 | 
 77 | %% Power spectral density feature:
 78 | 
 79 | psd = get_PSD_feature_Springer_HMM(audio_data, Fs, 40,60)';
 80 | psd = resample(psd, length(downsampled_homomorphic_envelope), length(psd));
 81 | psd = normalise_signal(psd);
 82 | 
 83 | %% Wavelet features:
 84 | 
 85 | if(include_wavelet)
 86 |     wavelet_level = 3;
 87 |     wavelet_name ='rbio3.9';
 88 |     
 89 |     % Audio needs to be longer than 1 second for getDWT to work:
 90 |     if(length(audio_data)< Fs*1.025)
 91 |         audio_data = [audio_data; zeros(round(0.025*Fs),1)];
 92 |     end
 93 |     
 94 |     [cD, cA] = getDWT(audio_data,wavelet_level,wavelet_name);
 95 |     
 96 |     wavelet_feature = abs(cD(wavelet_level,:));
 97 |     wavelet_feature = wavelet_feature(1:length(homomorphic_envelope));
 98 |     downsampled_wavelet = resample(wavelet_feature, featuresFs, Fs);
 99 |     downsampled_wavelet =  normalise_signal(downsampled_wavelet)';
100 | end
101 | 
102 | %%
103 | 
104 | if(include_wavelet)
105 |     PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd, downsampled_wavelet];
106 | else
107 |     PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd];
108 | end
109 | 
110 | %% Plotting figures
111 | if(figures)
112 |     figure('Name', 'PCG features');
113 |     t1 = (1:length(audio_data))./Fs;
114 |     plot(t1,audio_data);
115 |     hold on;
116 |     t2 = (1:length(PCG_Features))./featuresFs;
117 |     plot(t2,PCG_Features);
118 |     pause();
119 | end


--------------------------------------------------------------------------------
/get_PSD_feature_Springer_HMM.m:
--------------------------------------------------------------------------------
 1 | %cfunction [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures)
 2 | %
 3 | % PSD-based feature extraction for heart sound segmentation.
 4 | %
 5 | %% INPUTS:
 6 | % data: this is the audio waveform
 7 | % sampling_frequency is self-explanatory
 8 | % frequency_limit_low is the lower-bound on the frequency range you want to
 9 | % analyse
10 | % frequency_limit_high is the upper-bound on the frequency range
11 | % figures: (optional) boolean variable to display figures
12 | %
13 | %% OUTPUTS:
14 | % psd is the array of maximum PSD values between the max and min limits,
15 | % resampled to the same size as the original data.
16 | %
17 | % This code was developed by David Springer in the paper:
18 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
19 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
20 | %
21 | %% Copyright (C) 2016  David Springer
22 | % dave.springer@gmail.com
23 | %
24 | % This program is free software: you can redistribute it and/or modify
25 | % it under the terms of the GNU General Public License as published by
26 | % the Free Software Foundation, either version 3 of the License, or
27 | % any later version.
28 | %
29 | % This program is distributed in the hope that it will be useful,
30 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
31 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32 | % GNU General Public License for more details.
33 | %
34 | % You should have received a copy of the GNU General Public License
35 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
36 | 
37 | function [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures)
38 | 
39 | if nargin < 5
40 |     figures = 0;
41 | end
42 | 
43 | % Find the spectrogram of the signal:
44 | [~,F,T,P] = spectrogram(data,sampling_frequency/40,round(sampling_frequency/80),1:1:round(sampling_frequency/2),sampling_frequency);
45 | 
46 | if(figures)
47 |     figure();
48 |     surf(T,F,10*log(P),'edgecolor','none'); axis tight;
49 |     view(0,90);
50 |     xlabel('Time (Seconds)'); ylabel('Hz');
51 |     pause();
52 | end
53 | 
54 | [~, low_limit_position] = min(abs(F - frequency_limit_low));
55 | [~, high_limit_position] = min(abs(F - frequency_limit_high));
56 | 
57 | 
58 | % Find the mean PSD over the frequency range of interest:
59 | psd = mean(P(low_limit_position:high_limit_position,:));
60 | 
61 | 
62 | if(figures)
63 |     t4  = (1:length(psd))./sampling_frequency;
64 |     t3  = (1:length(data))./sampling_frequency;
65 |     figure('Name', 'PSD Feature');
66 |     
67 |     plot(t3,(data - mean(data))./std(data),'c');
68 |     hold on;
69 |     
70 |     plot(t4, (psd - mean(psd))./std(psd),'k');
71 |     
72 |     pause();
73 | end


--------------------------------------------------------------------------------
/get_duration_distributions.m:
--------------------------------------------------------------------------------
  1 | % function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time)
  2 | %
  3 | % This function calculates the duration distributions for each heart cycle
  4 | % state, and the minimum and maximum times for each state.
  5 | %
  6 | %% Inputs:
  7 | % heartrate is the calculated average heart rate over the entire recording
  8 | % systolic_time is the systolic time interval
  9 | %
 10 | %% Outputs:
 11 | % d_distributions is a 4 (the number of states) dimensional vector of
 12 | % gaussian mixture models (one dimensional in this case), representing the
 13 | % mean and std deviation of the duration in each state.
 14 | %
 15 | % The max and min values are self-explanatory.
 16 | %
 17 | % This code is implemented as outlined in the paper:
 18 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 
 19 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
 20 | % no. 4, pp. 513-29, Apr. 2010.
 21 | %
 22 | % Developed by David Springer for comparison purposes in the paper:
 23 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 24 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 25 | %
 26 | %% Copyright (C) 2016  David Springer
 27 | % dave.springer@gmail.com
 28 | % 
 29 | % This program is free software: you can redistribute it and/or modify
 30 | % it under the terms of the GNU General Public License as published by
 31 | % the Free Software Foundation, either version 3 of the License, or
 32 | % any later version.
 33 | % 
 34 | % This program is distributed in the hope that it will be useful,
 35 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 36 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 37 | % GNU General Public License for more details.
 38 | % 
 39 | % You should have received a copy of the GNU General Public License
 40 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 41 | 
 42 | function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time)
 43 | 
 44 | springer_options = default_Springer_HSMM_options;
 45 | 
 46 | 
 47 | 
 48 | mean_S1 = round(0.122*springer_options.audio_segmentation_Fs);
 49 | std_S1 = round(0.022*springer_options.audio_segmentation_Fs);
 50 | mean_S2 = round(0.094*springer_options.audio_segmentation_Fs);
 51 | std_S2 = round(0.022*springer_options.audio_segmentation_Fs);
 52 | 
 53 | 
 54 | mean_systole = round(systolic_time*springer_options.audio_segmentation_Fs) - mean_S1;
 55 | std_systole = (25/1000)*springer_options.audio_segmentation_Fs;
 56 | 
 57 | 
 58 | mean_diastole = ((60/heartrate) - systolic_time - 0.094)*springer_options.audio_segmentation_Fs;
 59 | std_diastole = 0.07*mean_diastole + (6/1000)*springer_options.audio_segmentation_Fs;
 60 | 
 61 | 
 62 | 
 63 | %% Cell array for the mean and covariance of the duration distributions:
 64 | d_distributions = cell(4,2);
 65 | 
 66 | %% Assign mean and covariance values to d_distributions:
 67 | d_distributions{1,1} = mean_S1;
 68 | d_distributions{1,2} = (std_S1)^2;
 69 | 
 70 | d_distributions{2,1} = mean_systole;
 71 | d_distributions{2,2} = (std_systole)^2;
 72 | 
 73 | d_distributions{3,1} = mean_S2;
 74 | d_distributions{3,2} = (std_S2)^2;
 75 | 
 76 | d_distributions{4,1} = mean_diastole;
 77 | d_distributions{4,2} = (std_diastole)^2;
 78 | 
 79 | 
 80 | %Min systole and diastole times
 81 | min_systole = mean_systole - 3*(std_systole+std_S1);
 82 | max_systole = mean_systole + 3*(std_systole+std_S1);
 83 | 
 84 | min_diastole = mean_diastole-3*std_diastole;
 85 | max_diastole = mean_diastole + 3*std_diastole;
 86 | 
 87 | 
 88 | 
 89 | %Setting the Min and Max values for the S1 and S2 sounds:
 90 | %If the minimum lengths are less than a 50th of the sampling frequency, set
 91 | %to a 50th of the sampling frequency:
 92 | min_S1 = (mean_S1 - 3*(std_S1));
 93 | if(min_S1<(springer_options.audio_segmentation_Fs/50))
 94 |     min_S1 = (springer_options.audio_segmentation_Fs/50);
 95 | end
 96 | 
 97 | min_S2 = (mean_S2 - 3*(std_S2));
 98 | if(min_S2<(springer_options.audio_segmentation_Fs/50))
 99 |     min_S2 = (springer_options.audio_segmentation_Fs/50);
100 | end
101 | max_S1 = (mean_S1 + 3*(std_S1));
102 | max_S2 = (mean_S2 + 3*(std_S2));
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/labelPCGStates.m:
--------------------------------------------------------------------------------
  1 | % function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures)
  2 | %
  3 | % This function assigns the state labels to a PCG record. 
  4 | % This is based on ECG markers, dervied from the R peak and end-T wave locations.
  5 | %
  6 | %% Inputs:
  7 | % envelope: The PCG recording envelope (found in getSchmidtPCGFeatures.m)
  8 | % s1_positions: The locations of the R peaks (in samples)
  9 | % s2_positions: The locations of the end-T waves (in samples)
 10 | % samplingFrequency: The sampling frequency of the PCG recording
 11 | % figures (optional): boolean variable dictating the display of figures
 12 | %
 13 | %% Output:
 14 | % states: An array of the state label for each sample in the feature
 15 | % vector. The total number of states is 4. Therefore, this is an array of
 16 | % values between 1 and 4, such as: [1,1,1,1,2,2,2,3,3,3,3,4,4,4,4,4,1,1,1],
 17 | % illustrating the "true" state label for each sample in the features.
 18 | % State 1 = S1 sound
 19 | % State 2 = systole
 20 | % State 3 = S2 sound
 21 | % State 4 = diastole
 22 | %
 23 | % This code was developed by David Springer for comparison purposes in the
 24 | % paper:
 25 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 26 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 27 | % where a novel segmentation approach is compared to the paper by Schmidt
 28 | % et al:
 29 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 
 30 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
 31 | % no. 4, pp. 513-29, Apr. 2010.
 32 | %
 33 | %% Copyright (C) 2016  David Springer
 34 | % dave.springer@gmail.com
 35 | % 
 36 | % This program is free software: you can redistribute it and/or modify
 37 | % it under the terms of the GNU General Public License as published by
 38 | % the Free Software Foundation, either version 3 of the License, or
 39 | % any later version.
 40 | % 
 41 | % This program is distributed in the hope that it will be useful,
 42 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 43 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 44 | % GNU General Public License for more details.
 45 | % 
 46 | % You should have received a copy of the GNU General Public License
 47 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 48 | 
 49 | function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures)
 50 | 
 51 | if(nargin<5)
 52 |     figures = false;
 53 | end
 54 | 
 55 | states = zeros(length(envelope),1);
 56 | 
 57 | 
 58 | %% Timing durations from Schmidt:
 59 | mean_S1 = 0.122*samplingFrequency;
 60 | std_S1 = 0.022*samplingFrequency;
 61 | mean_S2 = 0.092*samplingFrequency;
 62 | std_S2 = 0.022*samplingFrequency;
 63 | 
 64 | %% Setting the duration from each R-peak to (R-peak+mean_S1) as the first state:
 65 | % The R-peak in the ECG coincides with the start of the S1 sound (A. G.
 66 | % Tilkian and M. B. Conover, Understanding heart sounds and murmurs: with
 67 | % an introduction to lung sounds, 4th ed. Saunders, 2001.)
 68 | % Therefore, the duration from each R-peak to the mean_S1 sound duration
 69 | % later were labelled as the "true" positions of the S1 sounds:
 70 | for i = 1: length(s1_positions)
 71 |     %Set an upper bound, incase the window extends over the length of the
 72 |     %signal:
 73 |     upper_bound = round(min(length(states), s1_positions(i) + mean_S1));
 74 |     
 75 |     %Set the states between the start of the R peak and the upper bound as
 76 |     %state 1:
 77 |     states(max([1,s1_positions(i)]):min([upper_bound,length(states)])) = 1;
 78 | end
 79 | 
 80 | %% Set S2 as state 3 depending on position of end T-wave peak in ECG:
 81 | % The second heart sound occurs at approximately the same time as the
 82 | % end-T-wave (A. G. Tilkian and M. B. Conover, Understanding heart sounds
 83 | % and murmurs: with an introduction to lung sounds, 4th ed. Saunders, 2001.)
 84 | % Therefore, for each end-T-wave, find the peak in the envelope around the
 85 | % end-T-wave, setting a window centered on this peak as the second heart
 86 | % sound state:
 87 | for i = 1: length(s2_positions)
 88 |     
 89 |     %find search window of envelope:
 90 |     %T-end +- mean+1sd
 91 |     %Set upper and lower bounds, to avoid errors of searching outside size
 92 |     %of the signal
 93 |     lower_bound = max([s2_positions(i) - floor((mean_S2 + std_S2)),1]);
 94 |     upper_bound = min(length(states), ceil(s2_positions(i) + floor(mean_S2 + std_S2)));
 95 |     search_window = envelope(lower_bound:upper_bound).*(states(lower_bound:upper_bound)~=1);
 96 |     
 97 |     % Find the maximum value of the envelope in the search window:
 98 |     [~, S2_index] = max(search_window);
 99 |     
100 |     %Find the actual index in the envelope of the maximum peak:
101 |     %Make sure this has a max value of the length of the signal:
102 |     S2_index = min(length(states),lower_bound+ S2_index-1);
103 |     
104 |     %Set the states to state 3, centered on the S2 peak, +- 1/2 of the
105 |     %expected S2 sound duration. Again, making sure it does not try to set a
106 |     %value outside of the length of the signal:
107 |     upper_bound = min(length(states), ceil(S2_index +((mean_S2)/2)));
108 |     states(max([ceil(S2_index - ((mean_S2)/2)),1]):upper_bound) = 3;
109 |     
110 |     %Set the spaces between state 3 and the next R peak as state 4:
111 |     if(i<=length(s2_positions))
112 |         %We need to find the next R peak after this S2 sound
113 |         %So, subtract the position of this S2 from the S1 positions
114 |         diffs = (s1_positions - s2_positions(i));
115 |         %Exclude those that are negative (meaning before this S2 occured)
116 |         %by setting them to infinity. They are then excluded when finding
117 |         %the minumum later
118 |         diffs(diffs<0) = inf;
119 |         
120 |         %If the array is empty, then no S1s after this S2, so set to end of
121 |         %signal:
122 |         
123 |         if(isempty(diffs<inf))
124 |             end_pos = length(states);
125 |         else
126 |             %else, send the end position to the minimum diff -1
127 |             [~, index] = min(diffs);
128 |             end_pos = s1_positions(index) -1;
129 |         end
130 |         states(ceil(S2_index +((mean_S2 +(0*std_S2))/2)):end_pos) = 4;
131 |     end
132 | end
133 | 
134 | 
135 | 
136 | 
137 | %% Setting the first and last sections of the signal
138 | % As all states are derived from either R-peak or end-T-wave locations, the first affirmed state
139 | % in the signal will always be state 1 or state 3. Therefore, until this state, the
140 | % first state should always be set to 4 or 2:
141 | 
142 | %Find the first step up:
143 | first_location_of_definite_state = find(states ~= 0, 1)-1;
144 | 
145 | if(first_location_of_definite_state > 1)
146 |     
147 |     if(states(first_location_of_definite_state + 1) == 1)
148 |         states(1:first_location_of_definite_state) = 4;
149 |     end
150 |     
151 |     if(states(first_location_of_definite_state + 1) == 3)
152 |         states(1:first_location_of_definite_state) = 2;
153 |     end
154 |     
155 | end
156 | 
157 | 
158 | % Find the last step down:
159 | last_location_of_definite_state = find(states ~= 0, 1,'last');
160 | 
161 | if(last_location_of_definite_state > 1)
162 |     
163 |     if(states(last_location_of_definite_state) == 1)
164 |         states(last_location_of_definite_state:end) = 2;
165 |     end
166 |     
167 |     if(states(last_location_of_definite_state) == 3)
168 |         states(last_location_of_definite_state:end) = 4;
169 |     end
170 |     
171 | end
172 | 
173 | 
174 | states(length(envelope)+1 : end) = [];
175 | 
176 | 
177 | %Set everywhere else as state 2:
178 | states(states == 0) = 2;
179 | 
180 | 
181 | %% Plotting figures
182 | if(figures)
183 |     figure('Name','Envelope and labelled states');
184 |     plot(envelope);
185 |     hold on;
186 |     plot(states,'r');
187 |     legend('Envelope', 'States');
188 |     pause();
189 | end
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/normalise_signal.m:
--------------------------------------------------------------------------------
 1 | % function [normalised_signal] = normalise_signal(signal)
 2 | %
 3 | % This function subtracts the mean and divides by the standard deviation of
 4 | % a (1D) signal in order to normalise it for machine learning applications.
 5 | %
 6 | %% Inputs:
 7 | % signal: the original signal
 8 | %
 9 | %% Outputs:
10 | % normalised_signal: the original signal, minus the mean and divided by
11 | % the standard deviation.
12 | %
13 | % Developed by David Springer for the paper:
14 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
15 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
16 | %
17 | %% Copyright (C) 2016  David Springer
18 | % dave.springer@gmail.com
19 | %
20 | % This program is free software: you can redistribute it and/or modify
21 | % it under the terms of the GNU General Public License as published by
22 | % the Free Software Foundation, either version 3 of the License, or
23 | % any later version.
24 | %
25 | % This program is distributed in the hope that it will be useful,
26 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
27 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28 | % GNU General Public License for more details.
29 | %
30 | % You should have received a copy of the GNU General Public License
31 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
32 | 
33 | function [normalised_signal] = normalise_signal(signal)
34 | 
35 | mean_of_signal = mean(signal);
36 | 
37 | standard_deviation = std(signal);
38 | 
39 | normalised_signal = (signal - mean_of_signal)./standard_deviation;
40 | 
41 | 


--------------------------------------------------------------------------------
/runSpringerSegmentationAlgorithm.m:
--------------------------------------------------------------------------------
 1 | % function assigned_states = runSpringerSegmentationAlgorithm(audio_data, Fs, B_matrix, pi_vector, total_observation_distribution, figures)
 2 | %
 3 | % A function to assign states to a PCG recording using a duration dependant
 4 | % logisitic regression-based HMM, using the trained B_matrix and pi_vector
 5 | % trained in "trainSpringerSegmentationAlgorithm.m". Developed for use in
 6 | % the paper:
 7 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
 8 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 9 | %
10 | %% INPUTS:
11 | % audio_data: The audio data from the PCG recording
12 | % Fs: the sampling frequency of the audio recording
13 | % B_matrix: the observation matrix for the HMM, trained in the 
14 | % "trainSpringerSegmentationAlgorithm.m" function
15 | % pi_vector: the initial state distribution, also trained in the 
16 | % "trainSpringerSegmentationAlgorithm.m" function
17 | % total_observation_distribution, the observation probabilities of all the
18 | % data, again, trained in trainSpringerSegmentationAlgorithm.
19 | % figures: (optional) boolean variable for displaying figures
20 | %
21 | %% OUTPUTS:
22 | % assigned_states: the array of state values assigned to the original
23 | % audio_data (in the original sampling frequency).
24 | %
25 | %% Copyright (C) 2016  David Springer
26 | % dave.springer@gmail.com
27 | %
28 | % This program is free software: you can redistribute it and/or modify
29 | % it under the terms of the GNU General Public License as published by
30 | % the Free Software Foundation, either version 3 of the License, or
31 | % any later version.
32 | %
33 | % This program is distributed in the hope that it will be useful,
34 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
35 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
36 | % GNU General Public License for more details.
37 | %
38 | % You should have received a copy of the GNU General Public License
39 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
40 | 
41 | function assigned_states = runSpringerSegmentationAlgorithm(audio_data, Fs, B_matrix, pi_vector, total_observation_distribution, figures)
42 | 
43 | %% Preliminary
44 | if(nargin < 6)
45 |     figures = false;
46 | end
47 | 
48 | %% Get PCG Features:
49 | 
50 | [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs);
51 | 
52 | %% Get PCG heart rate
53 | 
54 | [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs);
55 | 
56 | [~, ~, qt] = viterbiDecodePCG_Springer(PCG_Features, pi_vector, B_matrix, total_observation_distribution, heartRate, systolicTimeInterval, featuresFs);
57 | 
58 | assigned_states = expand_qt(qt, featuresFs, Fs, length(audio_data));
59 | 
60 | if(figures)
61 |    figure('Name','Derived state sequence');
62 |    t1 = (1:length(audio_data))./Fs;
63 |    plot(t1,normalise_signal(audio_data),'k');
64 |    hold on;
65 |    plot(t1,assigned_states,'r--');
66 |    xlabel('Time (s)');
67 |    legend('Audio data', 'Derived states');
68 | end
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/run_Example_Springer_Script.m:
--------------------------------------------------------------------------------
 1 | %% Example Springer script
 2 | % A script to demonstrate the use of the Springer segmentation algorithm
 3 | 
 4 | %% Copyright (C) 2016  David Springer
 5 | % dave.springer@gmail.com
 6 | %
 7 | % This program is free software: you can redistribute it and/or modify
 8 | % it under the terms of the GNU General Public License as published by
 9 | % the Free Software Foundation, either version 3 of the License, or
10 | % any later version.
11 | %
12 | % This program is distributed in the hope that it will be useful,
13 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | % GNU General Public License for more details.
16 | %
17 | % You should have received a copy of the GNU General Public License
18 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | %%
21 | close all;
22 | clear all;
23 | 
24 | %% Load the default options:
25 | % These options control options such as the original sampling frequency of
26 | % the data, the sampling frequency for the derived features and whether the
27 | % mex code should be used for the Viterbi decoding:
28 | springer_options = default_Springer_HSMM_options;
29 | 
30 | %% Load the audio data and the annotations:
31 | % These are 6 example PCG recordings, downsampled to 1000 Hz, with
32 | % annotations of the R-peak and end-T-wave positions.
33 | load('example_data.mat');
34 | 
35 | %% Split the data into train and test sets:
36 | % Select the first 5 recordings for training and the sixth for testing:
37 | train_recordings = example_data.example_audio_data([1:5]);
38 | train_annotations = example_data.example_annotations([1:5],:);
39 | 
40 | test_recordings = example_data.example_audio_data(6);
41 | test_annotations = example_data.example_annotations(6,:);
42 | 
43 | 
44 | %% Train the HMM:
45 | [B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(train_recordings,train_annotations,springer_options.audio_Fs, false);
46 | 
47 | %% Run the HMM on an unseen test recording:
48 | % And display the resulting segmentation
49 | numPCGs = length(test_recordings);
50 | 
51 | for PCGi = 1:numPCGs
52 |     [assigned_states] = runSpringerSegmentationAlgorithm(test_recordings{PCGi}, springer_options.audio_Fs, B_matrix, pi_vector, total_obs_distribution, true);
53 | end
54 | 
55 | 


--------------------------------------------------------------------------------
/schmidt_spike_removal.m:
--------------------------------------------------------------------------------
  1 | % function [despiked_signal] = schmidt_spike_removal(original_signal, fs)
  2 | %
  3 | % This function removes the spikes in a signal as done by Schmidt et al in
  4 | % the paper:
  5 | % Schmidt, S. E., Holst-Hansen, C., Graff, C., Toft, E., & Struijk, J. J.
  6 | % (2010). Segmentation of heart sound recordings by a duration-dependent
  7 | % hidden Markov model. Physiological Measurement, 31(4), 513-29.
  8 | %
  9 | % The spike removal process works as follows:
 10 | % (1) The recording is divided into 500 ms windows.
 11 | % (2) The maximum absolute amplitude (MAA) in each window is found.
 12 | % (3) If at least one MAA exceeds three times the median value of the MAA's,
 13 | % the following steps were carried out. If not continue to point 4.
 14 | %   (a) The window with the highest MAA was chosen.
 15 | %   (b) In the chosen window, the location of the MAA point was identified as the top of the noise spike.
 16 | %   (c) The beginning of the noise spike was defined as the last zero-crossing point before theMAA point.
 17 | %   (d) The end of the spike was defined as the first zero-crossing point after the maximum point.
 18 | %   (e) The defined noise spike was replaced by zeroes.
 19 | %   (f) Resume at step 2.
 20 | % (4) Procedure completed.
 21 | %
 22 | %% Inputs:
 23 | % original_signal: The original (1D) audio signal array
 24 | % fs: the sampling frequency (Hz)
 25 | %
 26 | %% Outputs:
 27 | % despiked_signal: the audio signal with any spikes removed.
 28 | %
 29 | % This code is derived from the paper:
 30 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
 31 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
 32 | % no. 4, pp. 513-29, Apr. 2010.
 33 | %
 34 | % Developed by David Springer for comparison purposes in the paper:
 35 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
 36 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
 37 | %
 38 | %% Copyright (C) 2016  David Springer
 39 | % dave.springer@gmail.com
 40 | %
 41 | % This program is free software: you can redistribute it and/or modify
 42 | % it under the terms of the GNU General Public License as published by
 43 | % the Free Software Foundation, either version 3 of the License, or
 44 | % any later version.
 45 | %
 46 | % This program is distributed in the hope that it will be useful,
 47 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 48 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 49 | % GNU General Public License for more details.
 50 | %
 51 | % You should have received a copy of the GNU General Public License
 52 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 53 | 
 54 | 
 55 | function [despiked_signal] = schmidt_spike_removal(original_signal, fs)
 56 | 
 57 | %% Find the window size
 58 | % (500 ms)
 59 | windowsize = round(fs/2);
 60 | 
 61 | %% Find any samples outside of a integer number of windows:
 62 | trailingsamples = mod(length(original_signal), windowsize);
 63 | 
 64 | %% Reshape the signal into a number of windows:
 65 | sampleframes = reshape( original_signal(1:end-trailingsamples), windowsize, []);
 66 | 
 67 | %% Find the MAAs:
 68 | MAAs = max(abs(sampleframes));
 69 | 
 70 | 
 71 | % While there are still samples greater than 3* the median value of the
 72 | % MAAs, then remove those spikes:
 73 | while(~isempty(find((MAAs>median(MAAs)*3))))
 74 |     
 75 |     %Find the window with the max MAA:
 76 |     [val window_num] = max(MAAs);
 77 |     if(numel(window_num)>1)
 78 |         window_num = window_num(1);
 79 |     end
 80 |     
 81 |     %Find the postion of the spike within that window:
 82 |     [val spike_position] = max(abs(sampleframes(:,window_num)));
 83 |     
 84 |     if(numel(spike_position)>1)
 85 |         spike_position = spike_position(1);
 86 |     end
 87 |     
 88 |     
 89 |     % Finding zero crossings (where there may not be actual 0 values, just a change from positive to negative):
 90 |     zero_crossings = [abs(diff(sign(sampleframes(:,window_num))))>1; 0];
 91 |     
 92 |     %Find the start of the spike, finding the last zero crossing before
 93 |     %spike position. If that is empty, take the start of the window:
 94 |     spike_start = max([1 find(zero_crossings(1:spike_position),1,'last')]);
 95 |     
 96 |     %Find the end of the spike, finding the first zero crossing after
 97 |     %spike position. If that is empty, take the end of the window:
 98 |     zero_crossings(1:spike_position) = 0;
 99 |     spike_end = min([(find(zero_crossings,1,'first')) windowsize]);
100 |     
101 |     %Set to Zero
102 |     sampleframes(spike_start:spike_end,window_num) = 0.0001;
103 | 
104 |     %Recaclulate MAAs
105 |     MAAs = max(abs(sampleframes));
106 | end
107 | 
108 | despiked_signal = reshape(sampleframes, [],1);
109 | 
110 | % Add the trailing samples back to the signal:
111 | despiked_signal = [despiked_signal; original_signal(length(despiked_signal)+1:end)];
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/trainBandPiMatricesSpringer.m:
--------------------------------------------------------------------------------
  1 | % function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values)
  2 | %
  3 | % Train the B matrix and pi vector for the Springer HMM.
  4 | % The pi vector is the initial state probability, while the B matrix are
  5 | % the observation probabilities. In the case of Springer's algorith, the
  6 | % observation probabilities are based on a logistic regression-based
  7 | % probabilities. 
  8 | %
  9 | %% Inputs:
 10 | % state_observation_values: an Nx4 cell array of observation values from
 11 | % each of N PCG signals for each (of 4) state. Within each cell is a KxJ
 12 | % double array, where K is the number of samples from that state in the PCG
 13 | % and J is the number of feature vectors extracted from the PCG.
 14 | %
 15 | %% Outputs:
 16 | % The B_matrix and pi arrays for an HMM - as Springer et al's algorithm is a
 17 | % duration dependant HMM, there is no need to calculate the A_matrix, as
 18 | % the transition between states is only dependant on the state durations.
 19 | % total_obs_distribution:
 20 | %
 21 | % Developed by David Springer for the paper:
 22 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 
 23 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 24 | %
 25 | %% Copyright (C) 2016  David Springer
 26 | % dave.springer@gmail.com
 27 | % 
 28 | % This program is free software: you can redistribute it and/or modify
 29 | % it under the terms of the GNU General Public License as published by
 30 | % the Free Software Foundation, either version 3 of the License, or
 31 | % any later version.
 32 | % 
 33 | % This program is distributed in the hope that it will be useful,
 34 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 35 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 36 | % GNU General Public License for more details.
 37 | % 
 38 | % You should have received a copy of the GNU General Public License
 39 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 40 | 
 41 | function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values)
 42 | 
 43 | %% Prelim
 44 | 
 45 | number_of_states = 4;
 46 | 
 47 | %% Set pi_vector
 48 | % The true value of the pi vector, which are the initial state
 49 | % probabilities, are dependant on the heart rate of each PCG, and the
 50 | % individual sound duration for each patient. Therefore, instead of setting
 51 | % a patient-dependant pi_vector, simplify by setting all states as equally
 52 | % probable:
 53 | 
 54 | pi_vector = [0.25,0.25,0.25,0.25];
 55 | 
 56 | %% Train the logistic regression-based B_matrix:
 57 | 
 58 | 
 59 | % Initialise the B_matrix as a 1x4 cell array. This is to hold the
 60 | % coefficients of the trained logisitic regression model for each state.
 61 | B_matrix = cell(1,number_of_states);
 62 | 
 63 | statei_values = cell(number_of_states,1);
 64 | 
 65 | for PCGi = 1: length(state_observation_values)
 66 |         
 67 |     statei_values{1} = vertcat(statei_values{1},state_observation_values{PCGi,1});
 68 |     statei_values{2} = vertcat(statei_values{2},state_observation_values{PCGi,2});
 69 |     statei_values{3} = vertcat(statei_values{3},state_observation_values{PCGi,3});
 70 |     statei_values{4} = vertcat(statei_values{4},state_observation_values{PCGi,4});
 71 |     
 72 | end
 73 | 
 74 | 
 75 | % In order to use Bayes' formula with the logistic regression derived
 76 | % probabilities, we need to get the probability of seeing a specific
 77 | % observation in the total training data set. This is the
 78 | % 'total_observation_sequence', and the mean and covariance for each state
 79 | % is found:
 80 | 
 81 | total_observation_sequence = vertcat(statei_values{1}, statei_values{2}, statei_values{3}, statei_values{4});
 82 | total_obs_distribution = cell(2,1);
 83 | total_obs_distribution{1} = mean(total_observation_sequence);
 84 | total_obs_distribution{2} = cov(total_observation_sequence);
 85 | 
 86 | 
 87 | for state = 1: number_of_states
 88 |     
 89 |     % Randomly select indices of samples from the other states not being 
 90 |     % learnt, in order to balance the two data sets. The code below ensures
 91 |     % that if class 1 is being learnt vs the rest, the number of the rest =
 92 |     % the number of class 1, evenly split across all other classes
 93 |     length_of_state_samples = length(statei_values{state});
 94 |     
 95 |     % Number of samples required from each of the other states:
 96 |     length_per_other_state = floor(length_of_state_samples/(number_of_states-1));
 97 |     
 98 |     
 99 |     %If the length of the main class / (num states - 1) >
100 |     %length(shortest other class), then only select
101 |     %length(shortect other class) from the other states,
102 |     %and (3* length) for main class
103 |     min_length_other_class = inf;
104 |     
105 |     for other_state = 1: number_of_states
106 |         samples_in_other_state = length(statei_values{other_state});
107 |         
108 |         if(other_state == state)
109 |         else
110 |             min_length_other_class = min([min_length_other_class, samples_in_other_state]);
111 |         end
112 |     end
113 |     
114 |     %This means there aren't enough samples in one of the
115 |     %states to match the length of the main class being
116 |     %trained:
117 |     if( length_per_other_state > min_length_other_class)
118 |         length_per_other_state = min_length_other_class;
119 |     end
120 |     
121 |     training_data = cell(2,1);
122 |     
123 |     for other_state = 1: number_of_states
124 |         samples_in_other_state = length(statei_values{other_state});
125 |                 
126 |         if(other_state == state)
127 |             %Make sure you only choose (n-1)*3 *
128 |             %length_per_other_state samples for the main
129 |             %state, to ensure that the sets are balanced:
130 |             indices = randperm(samples_in_other_state,length_per_other_state*(number_of_states-1));
131 |             training_data{1} = statei_values{other_state}(indices,:);
132 |         else
133 |                        
134 |             indices = randperm(samples_in_other_state,length_per_other_state);
135 |             state_data = statei_values{other_state}(indices,:);
136 |             training_data{2} = vertcat(training_data{2}, state_data);
137 |             
138 |         end
139 |     end
140 |     
141 |     % Label all the data:
142 |     labels = ones(length(training_data{1}) + length(training_data{2}),1);
143 |     labels(1:length(training_data{1})) = 2;
144 |     
145 |     % Train the logisitic regression model for this state:
146 |     all_data = [training_data{1};training_data{2}];
147 |     [B,~,~] = mnrfit(all_data,labels);
148 |     B_matrix{state} = B;
149 | end
150 | 
151 | 


--------------------------------------------------------------------------------
/trainSpringerSegmentationAlgorithm.m:
--------------------------------------------------------------------------------
  1 | % function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures)
  2 | %
  3 | % Training the Springer HMM segmentation algorithm. Developed for use in
  4 | % the paper:
  5 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
  6 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
  7 | %
  8 | %% Inputs:
  9 | % PCGCellArray: A 1XN cell array of the N audio signals. For evaluation
 10 | % purposes, these signals should be from a distinct training set of
 11 | % recordings, while the algorithm should be evaluated on a separate test
 12 | % set of recordings, which are recorded from a completely different set of
 13 | % patients (for example, if there are numerous recordings from each
 14 | % patient).
 15 | % annotationsArray: a Nx2 cell array: position (n,1) = the positions of the
 16 | % R-peaks and postion (n,2) = the positions of the end-T-waves
 17 | % (both in SAMPLES)
 18 | % Fs: The sampling frequency of the PCG signals
 19 | % figures (optional): boolean variable dictating the disaplay of figures.
 20 | %
 21 | %% Outputs:
 22 | % logistic_regression_B_matrix:
 23 | % pi_vector:
 24 | % total_obs_distribution:
 25 | % As Springer et al's algorithm is a duration dependant HMM, there is no
 26 | % need to calculate the A_matrix, as the transition between states is only
 27 | % dependant on the state durations.
 28 | %
 29 | %% Copyright (C) 2016  David Springer
 30 | % dave.springer@gmail.com
 31 | %
 32 | % This program is free software: you can redistribute it and/or modify
 33 | % it under the terms of the GNU General Public License as published by
 34 | % the Free Software Foundation, either version 3 of the License, or
 35 | % any later version.
 36 | %
 37 | % This program is distributed in the hope that it will be useful,
 38 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 39 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 40 | % GNU General Public License for more details.
 41 | %
 42 | % You should have received a copy of the GNU General Public License
 43 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 44 | 
 45 | 
 46 | function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures)
 47 | 
 48 | %% Options
 49 | 
 50 | if(nargin < 4)
 51 |     figures = false;
 52 | end
 53 | 
 54 | numberOfStates = 4;
 55 | numPCGs = length(PCGCellArray);
 56 | 
 57 | % A matrix of the values from each state in each of the PCG recordings:
 58 | state_observation_values = cell(numPCGs,numberOfStates);
 59 | 
 60 | 
 61 | for PCGi = 1:length(PCGCellArray)
 62 |     PCG_audio = PCGCellArray{PCGi};
 63 |     
 64 |     S1_locations = annotationsArray{PCGi,1};
 65 |     S2_locations = annotationsArray{PCGi,2};
 66 |     
 67 |     [PCG_Features, featuresFs] = getSpringerPCGFeatures(PCG_audio, Fs);
 68 |     
 69 |     PCG_states = labelPCGStates(PCG_Features(:,1),S1_locations, S2_locations, featuresFs);
 70 |     
 71 |     
 72 |     %% Plotting assigned states:
 73 |     if(figures)
 74 |         figure('Name','Assigned states to PCG');
 75 |         
 76 |         t1 = (1:length(PCG_audio))./Fs;
 77 |         t2 = (1:length(PCG_Features))./featuresFs;
 78 |         
 79 |         plot(t1, PCG_audio, 'k-');
 80 |         hold on;
 81 |         plot(t2, PCG_Features, 'b-');
 82 |         plot(t2, PCG_states, 'r-');
 83 |         
 84 |         legend('Audio','Features','States');
 85 |         pause();
 86 |     end
 87 |     
 88 |     
 89 |     
 90 |     %% Group together all observations from the same state in the PCG recordings:
 91 |     for state_i = 1:numberOfStates
 92 |         state_observation_values{PCGi,state_i} = PCG_Features(PCG_states == state_i,:);
 93 |     end
 94 | end
 95 | 
 96 | % Save the state observation values to the main workspace of Matlab for
 97 | % later investigation if needed:
 98 | assignin('base', 'state_observation_values', state_observation_values)
 99 | 
100 | %% Train the B and pi matrices after all the PCG recordings have been labelled:
101 | [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values);
102 | 
103 | 


--------------------------------------------------------------------------------
/viterbiDecodePCG_Springer.m:
--------------------------------------------------------------------------------
  1 | % function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs, figures)
  2 | %
  3 | % This function calculates the delta, psi and qt matrices associated with
  4 | % the Viterbi decoding algorithm from:
  5 | % L. R. Rabiner, "A tutorial on hidden Markov models and selected
  6 | % applications in speech recognition," Proc. IEEE, vol. 77, no. 2, pp.
  7 | % 257-286, Feb. 1989.
  8 | % using equations 32a - 35, and equations 68 - 69 to include duration
  9 | % dependancy of the states.
 10 | %
 11 | % This decoding is performed after the observation probabilities have been
 12 | % derived from the logistic regression model of Springer et al:
 13 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
 14 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
 15 | %
 16 | % Further, this function is extended to allow the duration distributions to extend
 17 | % past the beginning and end of the sequence. Without this, the label
 18 | % sequence has to start and stop with an "entire" state duration being
 19 | % fulfilled. This extension takes away that requirement, by allowing the
 20 | % duration distributions to extend past the beginning and end, but only
 21 | % considering the observations within the sequence for emission probability
 22 | % estimation. More detail can be found in the publication by Springer et
 23 | % al., mentioned above.
 24 | %
 25 | %% Inputs:
 26 | % observation_sequence: The observed features
 27 | % pi_vector: the array of initial state probabilities, dervived from
 28 | % "trainSpringerSegmentationAlgorithm".
 29 | % b_matrix: the observation probabilities, dervived from
 30 | % "trainSpringerSegmentationAlgorithm".
 31 | % heartrate: the heart rate of the PCG, extracted using
 32 | % "getHeartRateSchmidt"
 33 | % systolic_time: the duration of systole, extracted using
 34 | % "getHeartRateSchmidt"
 35 | % Fs: the sampling frequency of the observation_sequence
 36 | % figures: optional boolean variable to show figures
 37 | %
 38 | %% Outputs:
 39 | % logistic_regression_B_matrix:
 40 | % pi_vector:
 41 | % total_obs_distribution:
 42 | % As Springer et al's algorithm is a duration dependant HMM, there is no
 43 | % need to calculate the A_matrix, as the transition between states is only
 44 | % dependant on the state durations.
 45 | %
 46 | %% Copyright (C) 2016  David Springer
 47 | % dave.springer@gmail.com
 48 | %
 49 | % This program is free software: you can redistribute it and/or modify
 50 | % it under the terms of the GNU General Public License as published by
 51 | % the Free Software Foundation, either version 3 of the License, or
 52 | % any later version.
 53 | %
 54 | % This program is distributed in the hope that it will be useful,
 55 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
 56 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 57 | % GNU General Public License for more details.
 58 | %
 59 | % You should have received a copy of the GNU General Public License
 60 | % along with this program.  If not, see <http://www.gnu.org/licenses/>.
 61 | 
 62 | function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs,figures)
 63 | 
 64 | if nargin < 8
 65 |     figures = false;
 66 | end
 67 | 
 68 | %% Preliminary
 69 | springer_options = default_Springer_HSMM_options;
 70 | 
 71 | T = length(observation_sequence);
 72 | N = 4; % Number of states
 73 | 
 74 | % Setting the maximum duration of a single state. This is set to an entire
 75 | % heart cycle:
 76 | max_duration_D = round((1*(60/heartrate))*Fs);
 77 | 
 78 | %Initialising the variables that are needed to find the optimal state path along
 79 | %the observation sequence.
 80 | %delta_t(j), as defined on page 264 of Rabiner, is the best score (highest
 81 | %probability) along a single path, at time t, which accounts for the first
 82 | %t observations and ends in State s_j. In this case, the length of the
 83 | %matrix is extended by max_duration_D samples, in order to allow the use
 84 | %of the extended Viterbi algortithm:
 85 | delta = ones(T+ max_duration_D-1,N)*-inf;
 86 | 
 87 | %The argument that maximises the transition between states (this is
 88 | %basically the previous state that had the highest transition probability
 89 | %to the current state) is tracked using the psi variable.
 90 | psi = zeros(T+ max_duration_D-1,N);
 91 | 
 92 | %An additional variable, that is not included on page 264 or Rabiner, is
 93 | %the state duration that maximises the delta variable. This is essential
 94 | %for the duration dependant HMM.
 95 | psi_duration =zeros(T + max_duration_D-1,N);
 96 | 
 97 | %% Setting up observation probs
 98 | observation_probs = zeros(T,N);
 99 | 
100 | for n = 1:N
101 |     
102 |     %MLR gives P(state|obs)
103 |     %Therefore, need Bayes to get P(o|state)
104 |     %P(o|state) = P(state|obs) * P(obs) / P(states)
105 |     %Where p(obs) is derived from a MVN distribution from all
106 |     %obserbations, and p(states) is taken from the pi_vector:
107 |     pihat = mnrval(cell2mat(b_matrix(n)),observation_sequence(:,:));
108 |     
109 |     for t = 1:T
110 |         
111 |         Po_correction = mvnpdf(observation_sequence(t,:),cell2mat(total_obs_distribution(1)),cell2mat(total_obs_distribution(2)));
112 |         
113 |         %When saving the coefficients from the logistic
114 |         %regression, it orders them P(class 1) then P(class 2). When
115 |         %training, I label the classes as 0 and 1, so the
116 |         %correct probability would be pihat(2).
117 |         
118 |         observation_probs(t,n) = (pihat(t,2)*Po_correction)/pi_vector(n);
119 |         
120 |     end
121 | end
122 | 
123 | %% Setting up state duration probabilities, using Gaussian distributions:
124 | [d_distributions, max_S1, min_S1, max_S2, min_S2, max_systole, min_systole, max_diastole, min_diastole] = get_duration_distributions(heartrate,systolic_time);
125 | 
126 | 
127 | 
128 | duration_probs = zeros(N,3*Fs);
129 | duration_sum = zeros(N,1);
130 | for state_j = 1:N
131 |     for d = 1:max_duration_D
132 |         if(state_j == 1)
133 |             duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
134 |             
135 |             if(d < min_S1 || d > max_S1)
136 |                 duration_probs(state_j,d)= realmin;
137 |             end
138 |             
139 |             
140 |         elseif(state_j==3)
141 |             duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
142 |             
143 |             if(d < min_S2 || d > max_S2)
144 |                 duration_probs(state_j,d)= realmin;
145 |             end
146 |             
147 |             
148 |         elseif(state_j==2)
149 |             
150 |             duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
151 |             
152 |             if(d < min_systole|| d > max_systole)
153 |                 duration_probs(state_j,d)= realmin;
154 |             end
155 |             
156 |             
157 |         elseif (state_j==4)
158 |             
159 |             duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
160 |             
161 |             if(d < min_diastole ||d > max_diastole)
162 |                 duration_probs(state_j,d)= realmin;
163 |             end
164 |         end
165 |     end
166 |     duration_sum(state_j) = sum(duration_probs(state_j,:));
167 | end
168 | 
169 | 
170 | if(length(duration_probs)>3*Fs)
171 |     duration_probs(:,(3*Fs+1):end) = [];
172 | end
173 | 
174 | if(figures)
175 |     figure('Name', 'Duration probabilities');
176 |     plot(duration_probs(1,:)./ duration_sum(1),'Linewidth',2);
177 |     hold on;
178 |     plot(duration_probs(2,:)./ duration_sum(2),'r','Linewidth',2);
179 |     hold on;
180 |     plot(duration_probs(3,:)./ duration_sum(3),'g','Linewidth',2);
181 |     hold on;
182 |     plot(duration_probs(4,:)./ duration_sum(4),'k','Linewidth',2);
183 |     hold on;
184 |     legend('S1 Duration','Systolic Duration','S2 Duration','Diastolic Duration');
185 |     pause();
186 | end
187 | %% Perform the actual Viterbi Recursion:
188 | 
189 | 
190 | qt = zeros(1,length(delta));
191 | %% Initialisation Step
192 | 
193 | %Equation 32a and 69a, but leave out the probability of being in
194 | %state i for only 1 sample, as the state could have started before time t =
195 | %0.
196 | 
197 | delta(1,:) = log(pi_vector) + log(observation_probs(1,:)); %first value is the probability of intially being in each state * probability of observation 1 coming from each state
198 | 
199 | %Equation 32b
200 | psi(1,:) = -1;
201 | 
202 | 
203 | % The state duration probabilities are now used.
204 | %Change the a_matrix to have zeros along the diagonal, therefore, only
205 | %relying on the duration probabilities and observation probabilities to
206 | %influence change in states:
207 | %This would only be valid in sequences where the transition between states
208 | %follows a distinct order.
209 | a_matrix = [0,1,0,0;0 0 1 0; 0 0 0 1;1 0 0 0];
210 | 
211 | 
212 | %% Run the core Viterbi algorith
213 | 
214 | if(springer_options.use_mex)
215 |     
216 |     %% Run Mex code
217 |     % Ensure you have run the mex viterbi_PhysChallenge.c code on the
218 |     % native machine before running this:
219 |     [delta, psi, psi_duration] = viterbi_Springer(N,T,a_matrix,max_duration_D,delta,observation_probs,duration_probs,psi, duration_sum);
220 |     
221 |     
222 | else
223 |     
224 |     %% Recursion
225 |     
226 |     %% The Extended Viterbi algorithm:
227 |     
228 |     %Equations 33a and 33b and 69a, b, c etc:
229 |     %again, ommitting the p(d), as state could have started before t = 1
230 |     
231 |     % This implementation extends the standard implementation of the
232 |     % duration-dependant Viterbi algorithm by allowing the durations to
233 |     % extend beyond the start and end of the time series, thereby allowing
234 |     % states to "start" and "stop" outside of the recorded signal. This
235 |     % addresses the issue of partial states at the beginning and end of the
236 |     % signal being labelled as the incorrect state. For instance, a
237 |     % short-duration diastole at the beginning of a signal looks a lot like
238 |     % systole, and can lead to labelling errors.
239 |     
240 |     % t spans input 2 to T + max_duration_D:
241 |     
242 |     
243 |     for t = 2:T+ max_duration_D-1
244 |         for j = 1:N
245 |             for d = 1:1:max_duration_D
246 |                 
247 |                 
248 |                 %The start of the analysis window, which is the current time
249 |                 %step, minus d (the time horizon we are currently looking back),
250 |                 %plus 1. The analysis window can be seen to be starting one
251 |                 %step back each time the variable d is increased.
252 |                 % This is clamped to 1 if extending past the start of the
253 |                 % record, and T-1 is extending past the end of the record:
254 |                 start_t = t - d;
255 |                 if(start_t<1)
256 |                     start_t = 1;
257 |                 end
258 |                 if(start_t > T-1)
259 |                     start_t = T-1;
260 |                 end
261 |                 
262 |                 %The end of the analysis window, which is the current time
263 |                 %step, unless the time has gone past T, the end of the record, in
264 |                 %which case it is truncated to T. This allows the analysis
265 |                 %window to extend past the end of the record, so that the
266 |                 %timing durations of the states do not have to "end" at the end
267 |                 %of the record.
268 |                 end_t = t;
269 |                 if(t>T)
270 |                     end_t = T;
271 |                 end
272 |                 
273 |                 
274 |                 %Find the max_delta and index of that from the previous step
275 |                 %and the transition to the current step:
276 |                 %This is the first half of the expression of equation 33a from
277 |                 %Rabiner:
278 |                 [max_delta, max_index] = max(delta(start_t,:)+log(a_matrix(:,j))');
279 |                                
280 |                 
281 |                 %Find the normalised probabilities of the observations over the
282 |                 %analysis window:
283 |                 probs = prod(observation_probs(start_t:end_t,j));
284 |                 
285 |                 
286 |                 %Find the normalised probabilities of the observations at only
287 |                 %the time point at the start of the time window:
288 |                 
289 |                 if(probs ==0)
290 |                     probs = realmin;
291 |                 end
292 |                 emission_probs = log(probs);
293 |                 
294 |                 
295 |                 %Keep a running total of the emmission probabilities as the
296 |                 %start point of the time window is moved back one step at a
297 |                 %time. This is the probability of seeing all the observations
298 |                 %in the analysis window in state j:
299 |                 
300 |                 if(emission_probs == 0 || isnan(emission_probs))
301 |                     emission_probs =realmin;
302 |                 end
303 |                 
304 |                 
305 |                 %Find the total probability of transitioning from the last
306 |                 %state to this one, with the observations and being in the same
307 |                 %state for the analysis window. This is the duration-dependant
308 |                 %variation of equation 33a from Rabiner:
309 |                 %                 fprintf('log((duration_probs(j,d)./duration_sum(j))):%d\n',log((duration_probs(j,d)./duration_sum(j))));
310 |                 delta_temp = max_delta + (emission_probs)+ log((duration_probs(j,d)./duration_sum(j)));
311 |                 
312 |                 
313 |                 %Unlike equation 33a from Rabiner, the maximum delta could come
314 |                 %from multiple d values, or from multiple size of the analysis
315 |                 %window. Therefore, only keep the maximum delta value over the
316 |                 %entire analysis window:
317 |                 %If this probability is greater than the last greatest,
318 |                 %update the delta matrix and the time duration variable:
319 |                 
320 |                 
321 |                 if(delta_temp>delta(t,j))
322 |                     delta(t,j) = delta_temp;
323 |                     psi(t,j) = max_index;
324 |                     psi_duration(t,j) = d;
325 |                 end
326 |                 
327 |             end
328 |         end
329 |     end
330 | end
331 | 
332 | 
333 | %% Termination
334 | 
335 | % For the extended case, need to find max prob after end of actual
336 | % sequence:
337 | 
338 | % Find just the delta after the end of the actual signal
339 | temp_delta = delta(T+1:end,:);
340 | %Find the maximum value in this section, and which state it is in:
341 | [~, idx] = max(temp_delta(:));
342 | [pos, ~] = ind2sub(size(temp_delta), idx);
343 | 
344 | % Change this position to the real position in delta matrix:
345 | pos = pos+T;
346 | 
347 | %1) Find the last most probable state
348 | %2) From the psi matrix, find the most likely preceding state
349 | %3) Find the duration of the last state from the psi_duration matrix
350 | %4) From the onset to the offset of this state, set to the most likely state
351 | %5) Repeat steps 2 - 5 until reached the beginning of the signal
352 | 
353 | 
354 | %The initial steps 1-4 are equation 34b in Rabiner. 1) finds P*, the most
355 | %likely last state in the sequence, 2) finds the state that precedes the
356 | %last most likely state, 3) finds the onset in time of the last state
357 | %(included due to the duration-dependancy) and 4) sets the most likely last
358 | %state to the q_t variable.
359 | 
360 | %1)
361 | [~, state] = max(delta(pos,:),[],2);
362 | 
363 | %2)
364 | offset = pos;
365 | preceding_state = psi(offset,state);
366 | 
367 | %3)
368 | % state_duration = psi_duration(offset, state);
369 | onset = offset - psi_duration(offset,state)+1;
370 | 
371 | %4)
372 | qt(onset:offset) = state;
373 | 
374 | %The state is then updated to the preceding state, found above, which must
375 | %end when the last most likely state started in the observation sequence:
376 | state = preceding_state;
377 | 
378 | count = 0;
379 | %While the onset of the state is larger than the maximum duration
380 | %specified:
381 | while(onset > 2)
382 |     
383 |     %2)
384 |     offset = onset-1;
385 |     %     offset_array(offset,1) = inf;
386 |     preceding_state = psi(offset,state);
387 |     %     offset_array(offset,2) = preceding_state;
388 |     
389 |     
390 |     %3)
391 |     %     state_duration = psi_duration(offset, state);
392 |     onset = offset - psi_duration(offset,state)+1;
393 |     
394 |     %4)
395 |     %     offset_array(onset:offset,3) = state;
396 |     
397 |     if(onset<2)
398 |         onset = 1;
399 |     end
400 |     qt(onset:offset) = state;
401 |     state = preceding_state;
402 |     count = count +1;
403 |     
404 |     if(count> 1000)
405 |         break;
406 |     end
407 | end
408 | 
409 | qt = qt(1:T);
410 | 
411 | 
412 | 


--------------------------------------------------------------------------------
/viterbi_Springer.c:
--------------------------------------------------------------------------------
  1 | /* Many people have requested a simple example on how to create a C
  2 |  * MEX-file.  In response to this request, the following C MEX-file,
  3 |  * named mexample, is provided as an introduction to cmex
  4 |  * programming. mexample is a commented program which describes how to
  5 |  * use the following MEX-functions:
  6 |  *
  7 |  * mexErrMsgTxt
  8 |  * mxCreateDoubleMatrix
  9 |  * mxGetM
 10 |  * mxGetN
 11 |  * mxGetPr
 12 |  * mxIsComplex
 13 |  * mxIsSparse
 14 |  * mxIsChar
 15 |  *
 16 |  * In MATLAB, mexample accepts two inputs and returns one output. The
 17 |  * inputs are a 2x2 array denoted as ARRAY_IN and a 2x1 vector denoted as
 18 |  * VECTOR_IN.  The function calculates the determinant of ARRAY_IN,
 19 |  * multiplies each element of VECTOR_IN by the determinant, and returns
 20 |  * this as the output, denoted by VECTOR_OUT.  All inputs and outputs to
 21 |  * this function are assumed to be real (not complex). */
 22 | 
 23 | /*   First, include some basic header files.  The header file
 24 |  * "mex.h" is required for a MEX-file.  Add any other header
 25 |  * files that your function may need here. */
 26 | 
 27 | #include "mex.h"
 28 | #include <limits.h>
 29 | #include <float.h>
 30 | #include <math.h>       /* log */
 31 | /*   A C MEX-file generally consists of two sections.  The first
 32 |  * section is a function or set of functions which performs
 33 |  * the actual mathematical calculation that the MEX-function
 34 |  * is to carry out.  In this example, the function is called
 35 |  * workFcn().  The second section is a gateway between MATLAB
 36 |  * and the first section, and consists of a function called
 37 |  * mexFunction.  The gateway is responsible for several tasks,
 38 |  * including:
 39 |  *
 40 |  * I)  error checking,
 41 |  * II)  allocating memory for return arguments,
 42 |  * III)  converting data from MATLAB into a format that
 43 |  * the workFcn function can use, and vice versa.
 44 |  *
 45 |  * The first function to be written in this example, then, is
 46 |  * workFcn:
 47 |  *
 48 |  * Since C and MATLAB handle two-dimensional arrays
 49 |  * differently, we will explicitly declare the dimension of
 50 |  * the variable theArray.  The variables, theVector and
 51 |  * theResult, are both one-dimensional arrays, and therefore
 52 |  * do not need such rigid typing. */
 53 | 
 54 | 
 55 | void viterbi(
 56 |         int N,
 57 |         int T,
 58 |         double a_matrix[4][4],
 59 |         int max_duration_D,
 60 |         double *delta,
 61 |         double *observation_probs,
 62 |         double duration_probs [4][150],
 63 |         double *psi,
 64 |         double *psi_duration_out,
 65 |         double duration_sum_in[4]
 66 |         )
 67 |         
 68 | {
 69 |     
 70 |     int i;
 71 |     int i2;
 72 |     int i3;
 73 |     int j;
 74 |     int t;
 75 |     int d;
 76 |     
 77 |     
 78 |     
 79 |     for (t = 1; t<T+ max_duration_D-1;t++){
 80 |         
 81 |         
 82 |         /*For each state */
 83 |         for (j = 0; j<4;j++){
 84 |             
 85 |             double emission_probs = 0;
 86 |             
 87 |             /*        max_duration_D*/
 88 |             for (d = 1; d<=max_duration_D; d++){
 89 |                 
 90 |                 int start; int max_index = 0;
 91 |                 int end_t = 0;
 92 |                 float probs = 0;
 93 |                 float duration_sum = 0;
 94 |                 float delta_temp = 0;
 95 |                 float max_delta = -1*DBL_MAX;
 96 |                 
 97 |                 
 98 |                 /*  Get the maximum value for delta at this t, and record the state where it was found:
 99 |                  * This is the first half of the expression of equation 33a from Rabiner:*/
100 |                 
101 |                 /*
102 |                  * %The start of the analysis window, which is the current time
103 |                  * %step, minus d (the time horizon we are currently looking back),
104 |                  * %plus 1. The analysis window can be seen to be starting one
105 |                  * %step back each time the variable d is increased.
106 |                  * % This is clamped to 1 if extending past the start of the
107 |                  * % record, and T-1 is extending past the end of the record:
108 |                  */
109 |                 
110 |                 start = t - d;
111 |                 
112 |                 if(start < 0){
113 |                     start = 0;
114 |                 }
115 |                 
116 |                 if(start > T-2){
117 |                     start = T-2;
118 |                 }
119 |                 
120 |                 /*
121 |                  * %The end of the analysis window, which is the current time
122 |                  * %step, unless the time has gone past T, the end of the record, in
123 |                  * %which case it is truncated to T. This allows the analysis
124 |                  * %window to extend past the end of the record, so that the
125 |                  * %timing durations of the states do not have to "end" at the end
126 |                  * %of the record.
127 |                  */
128 |                 
129 |                 end_t = t;
130 |                 if(end_t>T-1){
131 |                     end_t = T-1;
132 |                 }
133 |                 
134 |                 
135 |                 for(i = 0; i<N; i++)
136 |                 {
137 |                     double temp = delta[(start) +(i*(T+ max_duration_D-1))] + log(a_matrix[i][j]);
138 |                     if(temp > max_delta){
139 |                         max_delta = temp;
140 |                         max_index = i;
141 |                     }
142 |                 }
143 |                 
144 |                 
145 |                 /*//Find the normaliser for the observations at the start of the
146 |                  * //analysis window. The probability of seeing all the
147 |                  * //observations in the analysis window in state j is updated each
148 |                  * //time d is incrememented two lines below, so we only need to
149 |                  * //find the observation probabilities for one time step, each
150 |                  * //time d is updated:*/
151 |                 
152 |                 
153 |                 probs = 0;
154 |                 for(i2 = start; i2<=end_t; i2++){
155 |                     
156 |                     // Ensure that the probabilities aren't zero leading to -inf probabilities after log:
157 |                     if(observation_probs[i2 +j*T] == 0){
158 |                         observation_probs[i2 +j*T] = FLT_MIN;
159 |                     }
160 |                     
161 |                     probs = probs + log(observation_probs[i2 +j*T]);
162 |                 }
163 |                 
164 |                 if(probs ==0){
165 |                     probs = FLT_MIN;
166 |                 }
167 | 
168 |                 emission_probs = (probs);
169 |                 
170 |                 /*Find the total probability of transitioning from the last
171 |                  * //state to this one, with the observations and being in the same
172 |                  * //state for the analysis window. This is the duration-dependant
173 |                  * //variation of equation 33a from Rabiner:*/
174 |                 delta_temp = max_delta + (emission_probs)+ (log((duration_probs[j][d-1]/duration_sum_in[j])));
175 |                 
176 |                 
177 |                 
178 |                 // Uncomment the below for debuggin:
179 | //                     mexPrintf("\n t:%d", t);
180 | //                     mexPrintf("\n j:%d", j);
181 | //                     mexPrintf("\n d:%d", d);
182 | //                     mexPrintf("\n max_delta:%f", max_delta);
183 | //                     mexPrintf("\n max_index:%i \n", max_index);
184 | //                     mexPrintf ("emission_probs: %f \n",emission_probs);
185 | //                     mexPrintf ("log((duration_probs[j][d-1]/duration_sum)): %f \n",log((duration_probs[j][d-1]/duration_sum_in[j])));
186 | //                     mexPrintf ("delta_temp: %f \n",delta_temp);
187 | //                     mexPrintf ("delta[t+j*(T+ max_duration_D-1)]: %f \n",delta[t+j*(T+ max_duration_D-1)]);
188 | //                     mexPrintf ("duration_probs[j][d]: %f \n",duration_probs[j][d]);
189 | //                     mexPrintf ("duration_sum_in[j]: %f \n",duration_sum_in[j]);
190 |                   
191 |                 /*
192 |                  * Unlike equation 33a from Rabiner, the maximum delta could come
193 |                  * from multiple d values, or from multiple size of the analysis
194 |                  * window. Therefore, only keep the maximum delta value over the
195 |                  * entire analysis window:
196 |                  * If this probability is greater than the last greatest,
197 |                  * update the delta matrix and the time duration variable:
198 |                  */
199 |                 
200 |                 if(delta_temp>delta[t+j*(T+ max_duration_D-1)]){
201 |                     
202 |                     delta[t+j*(T+ max_duration_D-1)] = delta_temp;
203 |                     psi[t+j*(T+ max_duration_D-1)] = max_index+1;
204 |                     
205 |                     psi_duration_out[t + j*(T+ max_duration_D-1)] = d;
206 |                     
207 |                 }
208 |             }
209 |         }
210 |     }
211 |     
212 | }
213 | 
214 | /*   Now, define the gateway function, i.e., mexFunction.Below
215 |  * is the standard, predeclared header to mexFunction.  nlhs
216 |  * and nrhs are the number of left-hand and right-hand side
217 |  * arguments that mexample was called with from within MATLAB.
218 |  * In this example, nlhs equals 1 and nrhs should equal 2.  If
219 |  * not, then the user has called mexample the wrong way and
220 |  * should be informed of this.  plhs and prhs are arrays which
221 |  * contain the pointers to the MATLAB arrays, which are
222 |  * stored in a C struct called an Array.  prhs is an array of
223 |  * length rhs,and its pointers point to valid input data.
224 |  * plhs is an array of length nlhs, and its pointers point to
225 |  * invalid data (i.e., garbage).  It is the job of mexFunction
226 |  * to fill plhs with valid data.
227 |  *
228 |  * First, define the following values.  This makes it much
229 |  * easier to change the order of inputs to mexample, should we
230 |  * want to change the function later.  In addition, it makes
231 |  * the code easier to read. */
232 | 
233 | #define N prhs[0]
234 | #define T prhs[1]
235 | #define a_matrix prhs[2]
236 | #define max_duration_D prhs[3]
237 | #define delta  prhs[4]
238 | #define observation_probs prhs[5]
239 | #define duration_probs prhs[6]
240 | #define psi prhs[7]
241 | #define duration_sum prhs[8]
242 | 
243 | 
244 | #define delta_out plhs[0]
245 | #define psi_out plhs[1]
246 | #define psi_duration plhs[2]
247 | 
248 | 
249 | void mexFunction(
250 |         int     nlhs,
251 |         mxArray  *plhs[],
252 |         int     nrhs,
253 |         const mxArray  *prhs[]
254 |         )
255 | {
256 |     double a_matrix_in[4][4];/* 2 dimensional C array to pass to workFcn() */
257 |     double *delta_in_matrix;/* 2 dimensional C array to pass to workFcn() */
258 |     double *observation_probs_matrix;/* 2 dimensional C array to pass to workFcn() */
259 |     double *psi_matrix;/* 2 dimensional C array to pass to workFcn() */
260 |     double duration_sum_in[4];/* 2 dimensional C array to pass to workFcn() */
261 |     
262 |     double duration_probs_matrix[4][150];/* 2 dimensional C array to pass to workFcn() */
263 |     
264 |     int actual_T;
265 |     int fake_T_extended;
266 |     int actual_N;
267 |     int max_duration_D_val;
268 |     
269 |     int    row,col;        /* loop indices */
270 |     int    m,n;            /* temporary array size holders */
271 |     
272 |     /*   Step 1: Error Checking Step 1a: is nlhs 1?  If not,
273 |      * generate an error message and exit mexample (mexErrMsgTxt
274 |      * does this for us!) */
275 |     if (nlhs!=3)
276 |         mexErrMsgTxt("mexample requires 3 output argument.");
277 |     
278 |     /*   Step 1b: is nrhs 2? */
279 |     if (nrhs!=9)
280 |         mexErrMsgTxt("mexample requires 9 input arguments");
281 |     
282 |     
283 |     actual_T = mxGetM(observation_probs);
284 |     actual_N = mxGetN(observation_probs);
285 |     
286 |     max_duration_D_val = mxGetScalar(max_duration_D);
287 |     
288 |     
289 |     /*   Step 2:  Allocate memory for return argument(s) */
290 |     delta_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
291 |     psi_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
292 |     psi_duration = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
293 |     
294 |     /*   Step 3:  Convert ARRAY_IN to a 2x2 C array
295 |      * MATLAB stores a two-dimensional matrix in memory as a one-
296 |      * dimensional array.  If the matrix is size MxN, then the
297 |      * first M elements of the one-dimensional array correspond to
298 |      * the first column of the matrix, and the next M elements
299 |      * correspond to the second column, etc. The following loop
300 |      * converts from MATLAB format to C format: */
301 |     
302 |     for (col=0; col < mxGetN(a_matrix); col++){
303 |         for (row=0; row < mxGetM(a_matrix); row++){
304 |             a_matrix_in[row][col] =(mxGetPr(a_matrix))[row+col*mxGetM(a_matrix)];
305 |         }
306 |     }
307 |     
308 |     for (col=0; col < mxGetM(duration_sum); col++){
309 |         duration_sum_in[col] =(mxGetPr(duration_sum))[col];
310 |     }
311 |     
312 |     
313 |     
314 |     
315 |     delta_in_matrix = mxGetPr(delta);
316 |     observation_probs_matrix = mxGetPr(observation_probs);
317 |     psi_matrix = mxGetPr(psi);
318 |     
319 |     /*     for (col=0; col < mxGetN(delta); col++){
320 |      * //         for (row=0; row < mxGetM(delta); row++){
321 |      * //
322 |      * //
323 |      * //             observation_probs_matrix[row][col] =(mxGetPr(observation_probs))[row+col*mxGetM(observation_probs)];
324 |      * //             psi_matrix[row][col] =(mxGetPr(psi))[row+col*mxGetM(psi)];
325 |      * //         }
326 |      * //     }*/
327 |     
328 |     
329 |     for (col=0; col < mxGetN(duration_probs); col++){
330 |         for (row=0; row < mxGetM(duration_probs); row++){
331 |             duration_probs_matrix[row][col] =(mxGetPr(duration_probs))[row+col*mxGetM(duration_probs)];
332 |         }
333 |     }
334 |     
335 |     
336 |     
337 |     
338 |     /*   mxGetPr returns a pointer to the real part of the array
339 |      * ARRAY_IN.  In the line above, it is treated as the one-
340 |      * dimensional array mentioned in the previous comment.  */
341 |     
342 |     /*   Step 4:  Call workFcn function */
343 |     viterbi(actual_N,actual_T,a_matrix_in,max_duration_D_val,delta_in_matrix,observation_probs_matrix,duration_probs_matrix,psi_matrix,mxGetPr(psi_duration),duration_sum_in);
344 |     memcpy ( mxGetPr(delta_out), delta_in_matrix, actual_N*(actual_T+max_duration_D_val-1)*8);
345 |     memcpy ( mxGetPr(psi_out), psi_matrix, actual_N*(actual_T+max_duration_D_val-1)*8);
346 |     
347 | }


--------------------------------------------------------------------------------