├── Hilbert_Envelope.m
├── Homomorphic_Envelope_with_Hilbert.m
├── README.md
├── butterworth_high_pass_filter.m
├── butterworth_low_pass_filter.m
├── default_Springer_HSMM_options.m
├── example_data.mat
├── expand_qt.m
├── getDWT.m
├── getHeartRateSchmidt.m
├── getSpringerPCGFeatures.m
├── get_PSD_feature_Springer_HMM.m
├── get_duration_distributions.m
├── labelPCGStates.m
├── normalise_signal.m
├── runSpringerSegmentationAlgorithm.m
├── run_Example_Springer_Script.m
├── schmidt_spike_removal.m
├── trainBandPiMatricesSpringer.m
├── trainSpringerSegmentationAlgorithm.m
├── viterbiDecodePCG_Springer.m
└── viterbi_Springer.c
/Hilbert_Envelope.m:
--------------------------------------------------------------------------------
1 | % function [hilbert_envelope] = Hilbert_Envelope(input_signal, sampling_frequency,figures)
2 | %
3 | % This function finds the Hilbert envelope of a signal. This is taken from:
4 | %
5 | % Choi et al, Comparison of envelope extraction algorithms for cardiac sound
6 | % signal segmentation, Expert Systems with Applications, 2008
7 | %
8 | %% Inputs:
9 | % input_signal: the original signal
10 | % samplingFrequency: the signal's sampling frequency
11 | % figures: (optional) boolean variable to display a figure of both the
12 | % original and normalised signal
13 | %
14 | %% Outputs:
15 | % hilbert_envelope is the hilbert envelope of the original signal
16 | %
17 | % This code was developed by David Springer for comparison purposes in the
18 | % paper:
19 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
20 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
21 | %
22 | %% Copyright (C) 2016 David Springer
23 | % dave.springer@gmail.com
24 | %
25 | % This program is free software: you can redistribute it and/or modify
26 | % it under the terms of the GNU General Public License as published by
27 | % the Free Software Foundation, either version 3 of the License, or
28 | % any later version.
29 | %
30 | % This program is distributed in the hope that it will be useful,
31 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
32 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 | % GNU General Public License for more details.
34 | %
35 | % You should have received a copy of the GNU General Public License
36 | % along with this program. If not, see .
37 |
38 | function hilbert_envelope = Hilbert_Envelope(input_signal, sampling_frequency,figures)
39 |
40 | if nargin <3,
41 | figures = 0;
42 | end
43 |
44 |
45 | hilbert_envelope = abs(hilbert(input_signal)); %find the envelope of the signal using the Hilbert transform
46 |
47 | if(figures)
48 | figure('Name', 'Hilbert Envelope');
49 | plot(input_signal');
50 | hold on;
51 | plot(hilbert_envelope,'r');
52 | legend('Original Signal','Hilbert Envelope');
53 | pause();
54 | end
--------------------------------------------------------------------------------
/Homomorphic_Envelope_with_Hilbert.m:
--------------------------------------------------------------------------------
1 | % function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures)
2 | %
3 | % This function finds the homomorphic envelope of a signal, using the method
4 | % described in the following publications:
5 | %
6 | % S. E. Schmidt et al., ?Segmentation of heart sound recordings by a
7 | % duration-dependent hidden Markov model.,? Physiol. Meas., vol. 31, no. 4,
8 | % pp. 513?29, Apr. 2010.
9 | %
10 | % C. Gupta et al., ?Neural network classification of homomorphic segmented
11 | % heart sounds,? Appl. Soft Comput., vol. 7, no. 1, pp. 286?297, Jan. 2007.
12 | %
13 | % D. Gill et al., ?Detection and identification of heart sounds using
14 | % homomorphic envelogram and self-organizing probabilistic model,? in
15 | % Computers in Cardiology, 2005, pp. 957?960.
16 | % (However, these researchers found the homomorphic envelope of shannon
17 | % energy.)
18 | %
19 | % In I. Rezek and S. Roberts, ?Envelope Extraction via Complex Homomorphic
20 | % Filtering. Technical Report TR-98-9,? London, 1998, the researchers state
21 | % that the singularity at 0 when using the natural logarithm (resulting in
22 | % values of -inf) can be fixed by using a complex valued signal. They
23 | % motivate the use of the Hilbert transform to find the analytic signal,
24 | % which is a converstion of a real-valued signal to a complex-valued
25 | % signal, which is unaffected by the singularity.
26 | %
27 | % A zero-phase low-pass Butterworth filter is used to extract the envelope.
28 | %% Inputs:
29 | % input_signal: the original signal (1D) signal
30 | % samplingFrequency: the signal's sampling frequency (Hz)
31 | % lpf_frequency: the frequency cut-off of the low-pass filter to be used in
32 | % the envelope extraciton (Default = 8 Hz as in Schmidt's publication).
33 | % figures: (optional) boolean variable dictating the display of a figure of
34 | % both the original signal and the extracted envelope:
35 | %
36 | %% Outputs:
37 | % homomorphic_envelope: The homomorphic envelope of the original
38 | % signal (not normalised).
39 | %
40 | % This code was developed by David Springer for comparison purposes in the
41 | % paper:
42 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
43 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
44 | %
45 | %% Copyright (C) 2016 David Springer
46 | % dave.springer@gmail.com
47 | %
48 | % This program is free software: you can redistribute it and/or modify
49 | % it under the terms of the GNU General Public License as published by
50 | % the Free Software Foundation, either version 3 of the License, or
51 | % any later version.
52 | %
53 | % This program is distributed in the hope that it will be useful,
54 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
55 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 | % GNU General Public License for more details.
57 | %
58 | % You should have received a copy of the GNU General Public License
59 | % along with this program. If not, see .
60 |
61 | function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures)
62 |
63 | if nargin <4,
64 | figures = 0;
65 | end
66 | if nargin <3,
67 | figures = 0;
68 | lpf_frequency = 8;
69 | end
70 |
71 | %8Hz, 1st order, Butterworth LPF
72 | [B_low,A_low] = butter(1,2*lpf_frequency/sampling_frequency,'low');
73 | homomorphic_envelope = exp(filtfilt(B_low,A_low,log(abs(hilbert(input_signal)))));
74 |
75 | % Remove spurious spikes in first sample:
76 | homomorphic_envelope(1) = [homomorphic_envelope(2)];
77 |
78 | if(figures)
79 | figure('Name', 'Homomorphic Envelope');
80 | plot(input_signal);
81 | hold on;
82 | plot(homomorphic_envelope,'r');
83 | legend('Original Signal','Homomorphic Envelope')
84 | end
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Springer-Segmentation-Code
2 | Heart sound segmentation code based on duration-dependant HMM
3 |
4 | This is Matlab code to run the heart sound segmentation algorithm as outlined in the publication:
5 |
6 | D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
7 | Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
8 |
9 | The code includes the feature extraction, training of the duration-dependant HMM,
10 | and the decoding of the most likely sequence of states using an extended Viterbi algorithm.
11 |
12 | An example of the code at work can be seen in "run_Example_Springer_Script.m".
13 |
14 | Copyright (C) 2016 David Springer
15 | dave.springer@gmail.com
16 |
17 | This program is free software: you can redistribute it and/or modify
18 | it under the terms of the GNU General Public License as published by
19 | the Free Software Foundation, either version 3 of the License, or
20 | any later version.
21 |
22 | This program is distributed in the hope that it will be useful,
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | GNU General Public License for more details.
26 |
27 | You should have received a copy of the GNU General Public License
28 | along with this program. If not, see .
29 |
30 |
--------------------------------------------------------------------------------
/butterworth_high_pass_filter.m:
--------------------------------------------------------------------------------
1 | % function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency)
2 | %
3 | % High-pass filter a given signal using a forward-backward, zero-phase
4 | % butterworth filter.
5 | %
6 | %% INPUTS:
7 | % original_signal: The 1D signal to be filtered
8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is
9 | % effectively doubled as this function uses a forward-backward filter that
10 | % ensures zero phase distortion
11 | % cutoff: The frequency cutoff for the high-pass filter (in Hz)
12 | % sampling_frequency: The sampling frequency of the signal being filtered
13 | % (in Hz).
14 | % figures (optional): boolean variable dictating the display of figures
15 | %
16 | %% OUTPUTS:
17 | % high_pass_filtered_signal: the high-pass filtered signal.
18 | %
19 | % This code is derived from the paper:
20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
22 | % no. 4, pp. 513-29, Apr. 2010.
23 | %
24 | % Developed by David Springer for comparison purposes in the paper:
25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
27 | %
28 | %% Copyright (C) 2016 David Springer
29 | % dave.springer@gmail.com
30 | %
31 | % This program is free software: you can redistribute it and/or modify
32 | % it under the terms of the GNU General Public License as published by
33 | % the Free Software Foundation, either version 3 of the License, or
34 | % any later version.
35 | %
36 | % This program is distributed in the hope that it will be useful,
37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 | % GNU General Public License for more details.
40 | %
41 | % You should have received a copy of the GNU General Public License
42 | % along with this program. If not, see .
43 |
44 | function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
45 |
46 | if nargin < 5,
47 | figures = 0;
48 | end
49 |
50 | %Get the butterworth filter coefficients
51 | [B_high,A_high] = butter(order,2*cutoff/sampling_frequency,'high');
52 |
53 | %Forward-backward filter the original signal using the butterworth
54 | %coefficients, ensuring zero phase distortion
55 | high_pass_filtered_signal = filtfilt(B_high,A_high,original_signal);
56 |
57 | if(figures)
58 |
59 | figure('Name','High-pass filter frequency response');
60 | [sos,g] = zp2sos(B_high,A_high,1); % Convert to SOS form
61 | Hd = dfilt.df2tsos(sos,g); % Create a dfilt object
62 | h = fvtool(Hd); % Plot magnitude response
63 | set(h,'Analysis','freq') % Display frequency response
64 |
65 | figure('Name','Original vs. high-pass filtered signal');
66 | plot(original_signal);
67 | hold on;
68 | plot(high_pass_filtered_signal,'r');
69 | legend('Original Signal', 'High-pass filtered signal');
70 | pause();
71 | end
72 |
73 |
--------------------------------------------------------------------------------
/butterworth_low_pass_filter.m:
--------------------------------------------------------------------------------
1 | % function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
2 | %
3 | % Low-pass filter a given signal using a forward-backward, zero-phase
4 | % butterworth low-pass filter.
5 | %
6 | %% INPUTS:
7 | % original_signal: The 1D signal to be filtered
8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is
9 | % effectively doubled as this function uses a forward-backward filter that
10 | % ensures zero phase distortion
11 | % cutoff: The frequency cutoff for the low-pass filter (in Hz)
12 | % sampling_frequency: The sampling frequency of the signal being filtered
13 | % (in Hz).
14 | % figures (optional): boolean variable dictating the display of figures
15 | %
16 | %% OUTPUTS:
17 | % low_pass_filtered_signal: the low-pass filtered signal.
18 | %
19 | % This code is derived from the paper:
20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
22 | % no. 4, pp. 513-29, Apr. 2010.
23 | %
24 | % Developed by David Springer for comparison purposes in the paper:
25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
27 | %
28 | %% Copyright (C) 2016 David Springer
29 | % dave.springer@gmail.com
30 | %
31 | % This program is free software: you can redistribute it and/or modify
32 | % it under the terms of the GNU General Public License as published by
33 | % the Free Software Foundation, either version 3 of the License, or
34 | % any later version.
35 | %
36 | % This program is distributed in the hope that it will be useful,
37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 | % GNU General Public License for more details.
40 | %
41 | % You should have received a copy of the GNU General Public License
42 | % along with this program. If not, see .
43 |
44 | function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures)
45 |
46 | if nargin < 5,
47 | figures = 0;
48 | end
49 |
50 | %Get the butterworth filter coefficients
51 | [B_low,A_low] = butter(order,2*cutoff/sampling_frequency,'low');
52 |
53 | if(figures)
54 | figure('Name','Low-pass filter frequency response');
55 | [sos,g] = zp2sos(B_low,A_low,1); % Convert to SOS form
56 | Hd = dfilt.df2tsos(sos,g); % Create a dfilt object
57 | h = fvtool(Hd); % Plot magnitude response
58 | set(h,'Analysis','freq') % Display frequency response
59 | end
60 |
61 |
62 | %Forward-backward filter the original signal using the butterworth
63 | %coefficients, ensuring zero phase distortion
64 | low_pass_filtered_signal = filtfilt(B_low,A_low,original_signal);
65 |
66 | if(figures)
67 | figure('Name','Original vs. low-pass filtered signal');
68 | plot(original_signal);
69 | hold on;
70 | plot(low_pass_filtered_signal,'r');
71 | legend('Original Signal', 'Low-pass filtered signal');
72 | pause();
73 | end
--------------------------------------------------------------------------------
/default_Springer_HSMM_options.m:
--------------------------------------------------------------------------------
1 | % function springer_options = default_Springer_HSMM_options()
2 | %
3 | % The default options to be used with the Springer segmentation algorithm.
4 | % USAGE: springer_options = default_Springer_HSMM_options
5 | %
6 | % Developed for use in the paper:
7 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
8 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
9 | %
10 | %% Copyright (C) 2016 David Springer
11 | % dave.springer@gmail.com
12 | %
13 | % This program is free software: you can redistribute it and/or modify
14 | % it under the terms of the GNU General Public License as published by
15 | % the Free Software Foundation, either version 3 of the License, or
16 | % any later version.
17 | %
18 | % This program is distributed in the hope that it will be useful,
19 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 | % GNU General Public License for more details.
22 | %
23 | % You should have received a copy of the GNU General Public License
24 | % along with this program. If not, see .
25 |
26 | function springer_options = default_Springer_HSMM_options()
27 |
28 | %% The sampling frequency at which to extract signal features:
29 | springer_options.audio_Fs = 1000;
30 |
31 | %% The downsampled frequency
32 | %Set to 50 in Springer paper
33 | springer_options.audio_segmentation_Fs = 50;
34 |
35 |
36 | %% Tolerance for S1 and S2 localization
37 | springer_options.segmentation_tolerance = 0.1;%seconds
38 |
39 | %% Whether to use the mex code or not:
40 | % The mex code currently has a bug. This will be fixed asap.
41 | springer_options.use_mex = false;
42 |
43 | %% Whether to use the wavelet function or not:
44 | springer_options.include_wavelet_feature = false;
45 |
46 |
--------------------------------------------------------------------------------
/example_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidspringer/Springer-Segmentation-Code/853cb535247dbea013798683ac343d9526aed973/example_data.mat
--------------------------------------------------------------------------------
/expand_qt.m:
--------------------------------------------------------------------------------
1 | % function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length)
2 | %
3 | % Function to expand the derived HMM states to a higher sampling frequency.
4 | %
5 | % Developed by David Springer for comparison purposes in the paper:
6 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
7 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
8 | %
9 | %% INPUTS:
10 | % original_qt: the original derived states from the HMM
11 | % old_fs: the old sampling frequency of the original_qt
12 | % new_fs: the desired sampling frequency
13 | % new_length: the desired length of the qt signal
14 |
15 | %% Outputs:
16 | % expanded_qt: the expanded qt, to the new FS and length
17 | %
18 | %% Copyright (C) 2016 David Springer
19 | % dave.springer@gmail.com
20 | %
21 | % This program is free software: you can redistribute it and/or modify
22 | % it under the terms of the GNU General Public License as published by
23 | % the Free Software Foundation, either version 3 of the License, or
24 | % any later version.
25 | %
26 | % This program is distributed in the hope that it will be useful,
27 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
28 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 | % GNU General Public License for more details.
30 | %
31 | % You should have received a copy of the GNU General Public License
32 | % along with this program. If not, see .
33 |
34 | function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length)
35 |
36 | original_qt = original_qt(:)';
37 | expanded_qt = zeros(new_length,1);
38 |
39 | indeces_of_changes = find(diff(original_qt));
40 |
41 | indeces_of_changes = [indeces_of_changes, length(original_qt)];
42 |
43 | start_index = 0;
44 | for i = 1:length(indeces_of_changes)
45 |
46 | start_index;
47 | end_index = indeces_of_changes(i);
48 |
49 | mid_point = round((end_index - start_index)/2) + start_index;
50 |
51 | value_at_mid_point = original_qt(mid_point);
52 |
53 | expanded_start_index = round((start_index./old_fs).*new_fs) + 1;
54 | expanded_end_index = round((end_index./(old_fs)).*new_fs);
55 |
56 | if(expanded_end_index > new_length)
57 | expanded_end_index = new_length;
58 | end
59 |
60 | expanded_qt(expanded_start_index:expanded_end_index) = value_at_mid_point;
61 |
62 | start_index = end_index;
63 | end
--------------------------------------------------------------------------------
/getDWT.m:
--------------------------------------------------------------------------------
1 | % function [cD cA] = getDWT(X,N,Name)
2 | %
3 | % finds the discrete wavelet transform at level N for signal X using the
4 | % wavelet specified by Name.
5 | %
6 | %% Inputs:
7 | % X: the original signal
8 | % N: the decomposition level
9 | % Name: the wavelet name to use
10 | %
11 | %% Outputs:
12 | % cD is a N-row matrix containing the detail coefficients up to N levels
13 | % cA is the same for the approximations
14 |
15 | % This code was developed by David Springer for comparison purposes in the
16 | % paper:
17 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
18 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
19 | %
20 | %% Copyright (C) 2016 David Springer
21 | % dave.springer@gmail.com
22 | %
23 | % This program is free software: you can redistribute it and/or modify
24 | % it under the terms of the GNU General Public License as published by
25 | % the Free Software Foundation, either version 3 of the License, or
26 | % any later version.
27 | %
28 | % This program is distributed in the hope that it will be useful,
29 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
30 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 | % GNU General Public License for more details.
32 | %
33 | % You should have received a copy of the GNU General Public License
34 | % along with this program. If not, see .
35 |
36 | function [cD cA] = getDWT(X,N,Name)
37 |
38 |
39 | %No DWT available for Morlet - therefore perform CWT:
40 | if(strcmp(Name,'morl'))
41 |
42 | c = cwt(X,1:N,'morl');
43 |
44 | cD = c;
45 | cA = c;
46 | else
47 | %Preform wavelet decomposition
48 |
49 | [c,l] = wavedec(X,N,Name);
50 |
51 | %Reorder the details based on the structure of the wavelet
52 | %decomposition (see help in wavedec.m)
53 | len = length(X);
54 | cD = zeros(N,len);
55 | for k = 1:N
56 | d = detcoef(c,l,k);
57 | d = d(:)';
58 | d = d(ones(1,2^k),:);
59 | cD(k,:) = wkeep1(d(:)',len);
60 | end
61 | cD = cD(:);
62 |
63 | %Space cD according to spacing of floating point numbers:
64 | I = find(abs(cD).
44 |
45 | function [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs, figures)
46 |
47 | if nargin < 3
48 | figures = false;
49 | end
50 |
51 | %% Get heatrate:
52 | % From Schmidt:
53 | % "The duration of the heart cycle is estimated as the time from lag zero
54 | % to the highest peaks between 500 and 2000 ms in the resulting
55 | % autocorrelation"
56 | % This is performed after filtering and spike removal:
57 |
58 | %% 25-400Hz 4th order Butterworth band pass
59 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false);
60 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs);
61 |
62 | %% Spike removal from the original paper:
63 | audio_data = schmidt_spike_removal(audio_data,Fs);
64 |
65 | %% Find the homomorphic envelope
66 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs);
67 |
68 | %% Find the autocorrelation:
69 | y=homomorphic_envelope-mean(homomorphic_envelope);
70 | [c] = xcorr(y,'coeff');
71 | signal_autocorrelation = c(length(homomorphic_envelope)+1:end);
72 |
73 | min_index = 0.5*Fs;
74 | max_index = 2*Fs;
75 |
76 | [~, index] = max(signal_autocorrelation(min_index:max_index));
77 | true_index = index+min_index-1;
78 |
79 | heartRate = 60/(true_index/Fs);
80 |
81 |
82 | %% Find the systolic time interval:
83 | % From Schmidt: "The systolic duration is defined as the time from lag zero
84 | % to the highest peak in the interval between 200 ms and half of the heart
85 | % cycle duration"
86 |
87 |
88 | max_sys_duration = round(((60/heartRate)*Fs)/2);
89 | min_sys_duration = round(0.2*Fs);
90 |
91 | [~, pos] = max(signal_autocorrelation(min_sys_duration:max_sys_duration));
92 | systolicTimeInterval = (min_sys_duration+pos)/Fs;
93 |
94 |
95 | if(figures)
96 | figure('Name', 'Heart rate calculation figure');
97 | plot(signal_autocorrelation);
98 | hold on;
99 | plot(true_index, signal_autocorrelation(true_index),'ro');
100 | plot((min_sys_duration+pos), signal_autocorrelation((min_sys_duration+pos)), 'mo');
101 | xlabel('Samples');
102 | legend('Autocorrelation', 'Position of max peak used to calculate HR', 'Position of max peak within systolic interval');
103 | end
104 |
105 |
106 |
--------------------------------------------------------------------------------
/getSpringerPCGFeatures.m:
--------------------------------------------------------------------------------
1 | % function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures)
2 | %
3 | % Get the features used in the Springer segmentation algorithm. These
4 | % features include:
5 | % -The homomorphic envelope (as performed in Schmidt et al's paper)
6 | % -The Hilbert envelope
7 | % -A wavelet-based feature
8 | % -A PSD-based feature
9 | % This function was developed for use in the paper:
10 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
11 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
12 | %
13 | %% INPUTS:
14 | % audio_data: array of data from which to extract features
15 | % Fs: the sampling frequency of the audio data
16 | % figures (optional): boolean variable dictating the display of figures
17 | %
18 | %% OUTPUTS:
19 | % PCG_Features: array of derived features
20 | % featuresFs: the sampling frequency of the derived features. This is set
21 | % in default_Springer_HSMM_options.m
22 | %
23 | %% Copyright (C) 2016 David Springer
24 | % dave.springer@gmail.com
25 | %
26 | % This program is free software: you can redistribute it and/or modify
27 | % it under the terms of the GNU General Public License as published by
28 | % the Free Software Foundation, either version 3 of the License, or
29 | % any later version.
30 | %
31 | % This program is distributed in the hope that it will be useful,
32 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
33 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 | % GNU General Public License for more details.
35 | %
36 | % You should have received a copy of the GNU General Public License
37 | % along with this program. If not, see .
38 |
39 | function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures)
40 | % function PCG_Features = getSpringerPCGFeatures(audio, Fs)
41 | % Get the features used in the Springer segmentation algorithm.
42 |
43 |
44 | if(nargin < 3)
45 | figures = false;
46 | end
47 |
48 | springer_options = default_Springer_HSMM_options;
49 |
50 |
51 | % Check to see if the Wavelet toolbox is available on the machine:
52 | include_wavelet = springer_options.include_wavelet_feature;
53 | featuresFs = springer_options.audio_segmentation_Fs; % Downsampled feature sampling frequency
54 |
55 | %% 25-400Hz 4th order Butterworth band pass
56 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false);
57 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs);
58 |
59 | %% Spike removal from the original paper:
60 | audio_data = schmidt_spike_removal(audio_data,Fs);
61 |
62 |
63 |
64 | %% Find the homomorphic envelope
65 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs);
66 | % Downsample the envelope:
67 | downsampled_homomorphic_envelope = resample(homomorphic_envelope,featuresFs, Fs);
68 | % normalise the envelope:
69 | downsampled_homomorphic_envelope = normalise_signal(downsampled_homomorphic_envelope);
70 |
71 |
72 | %% Hilbert Envelope
73 | hilbert_envelope = Hilbert_Envelope(audio_data, Fs);
74 | downsampled_hilbert_envelope = resample(hilbert_envelope, featuresFs, Fs);
75 | downsampled_hilbert_envelope = normalise_signal(downsampled_hilbert_envelope);
76 |
77 | %% Power spectral density feature:
78 |
79 | psd = get_PSD_feature_Springer_HMM(audio_data, Fs, 40,60)';
80 | psd = resample(psd, length(downsampled_homomorphic_envelope), length(psd));
81 | psd = normalise_signal(psd);
82 |
83 | %% Wavelet features:
84 |
85 | if(include_wavelet)
86 | wavelet_level = 3;
87 | wavelet_name ='rbio3.9';
88 |
89 | % Audio needs to be longer than 1 second for getDWT to work:
90 | if(length(audio_data)< Fs*1.025)
91 | audio_data = [audio_data; zeros(round(0.025*Fs),1)];
92 | end
93 |
94 | [cD, cA] = getDWT(audio_data,wavelet_level,wavelet_name);
95 |
96 | wavelet_feature = abs(cD(wavelet_level,:));
97 | wavelet_feature = wavelet_feature(1:length(homomorphic_envelope));
98 | downsampled_wavelet = resample(wavelet_feature, featuresFs, Fs);
99 | downsampled_wavelet = normalise_signal(downsampled_wavelet)';
100 | end
101 |
102 | %%
103 |
104 | if(include_wavelet)
105 | PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd, downsampled_wavelet];
106 | else
107 | PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd];
108 | end
109 |
110 | %% Plotting figures
111 | if(figures)
112 | figure('Name', 'PCG features');
113 | t1 = (1:length(audio_data))./Fs;
114 | plot(t1,audio_data);
115 | hold on;
116 | t2 = (1:length(PCG_Features))./featuresFs;
117 | plot(t2,PCG_Features);
118 | pause();
119 | end
--------------------------------------------------------------------------------
/get_PSD_feature_Springer_HMM.m:
--------------------------------------------------------------------------------
1 | %cfunction [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures)
2 | %
3 | % PSD-based feature extraction for heart sound segmentation.
4 | %
5 | %% INPUTS:
6 | % data: this is the audio waveform
7 | % sampling_frequency is self-explanatory
8 | % frequency_limit_low is the lower-bound on the frequency range you want to
9 | % analyse
10 | % frequency_limit_high is the upper-bound on the frequency range
11 | % figures: (optional) boolean variable to display figures
12 | %
13 | %% OUTPUTS:
14 | % psd is the array of maximum PSD values between the max and min limits,
15 | % resampled to the same size as the original data.
16 | %
17 | % This code was developed by David Springer in the paper:
18 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
19 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
20 | %
21 | %% Copyright (C) 2016 David Springer
22 | % dave.springer@gmail.com
23 | %
24 | % This program is free software: you can redistribute it and/or modify
25 | % it under the terms of the GNU General Public License as published by
26 | % the Free Software Foundation, either version 3 of the License, or
27 | % any later version.
28 | %
29 | % This program is distributed in the hope that it will be useful,
30 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
31 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 | % GNU General Public License for more details.
33 | %
34 | % You should have received a copy of the GNU General Public License
35 | % along with this program. If not, see .
36 |
37 | function [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures)
38 |
39 | if nargin < 5
40 | figures = 0;
41 | end
42 |
43 | % Find the spectrogram of the signal:
44 | [~,F,T,P] = spectrogram(data,sampling_frequency/40,round(sampling_frequency/80),1:1:round(sampling_frequency/2),sampling_frequency);
45 |
46 | if(figures)
47 | figure();
48 | surf(T,F,10*log(P),'edgecolor','none'); axis tight;
49 | view(0,90);
50 | xlabel('Time (Seconds)'); ylabel('Hz');
51 | pause();
52 | end
53 |
54 | [~, low_limit_position] = min(abs(F - frequency_limit_low));
55 | [~, high_limit_position] = min(abs(F - frequency_limit_high));
56 |
57 |
58 | % Find the mean PSD over the frequency range of interest:
59 | psd = mean(P(low_limit_position:high_limit_position,:));
60 |
61 |
62 | if(figures)
63 | t4 = (1:length(psd))./sampling_frequency;
64 | t3 = (1:length(data))./sampling_frequency;
65 | figure('Name', 'PSD Feature');
66 |
67 | plot(t3,(data - mean(data))./std(data),'c');
68 | hold on;
69 |
70 | plot(t4, (psd - mean(psd))./std(psd),'k');
71 |
72 | pause();
73 | end
--------------------------------------------------------------------------------
/get_duration_distributions.m:
--------------------------------------------------------------------------------
1 | % function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time)
2 | %
3 | % This function calculates the duration distributions for each heart cycle
4 | % state, and the minimum and maximum times for each state.
5 | %
6 | %% Inputs:
7 | % heartrate is the calculated average heart rate over the entire recording
8 | % systolic_time is the systolic time interval
9 | %
10 | %% Outputs:
11 | % d_distributions is a 4 (the number of states) dimensional vector of
12 | % gaussian mixture models (one dimensional in this case), representing the
13 | % mean and std deviation of the duration in each state.
14 | %
15 | % The max and min values are self-explanatory.
16 | %
17 | % This code is implemented as outlined in the paper:
18 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
19 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
20 | % no. 4, pp. 513-29, Apr. 2010.
21 | %
22 | % Developed by David Springer for comparison purposes in the paper:
23 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
24 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
25 | %
26 | %% Copyright (C) 2016 David Springer
27 | % dave.springer@gmail.com
28 | %
29 | % This program is free software: you can redistribute it and/or modify
30 | % it under the terms of the GNU General Public License as published by
31 | % the Free Software Foundation, either version 3 of the License, or
32 | % any later version.
33 | %
34 | % This program is distributed in the hope that it will be useful,
35 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
36 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37 | % GNU General Public License for more details.
38 | %
39 | % You should have received a copy of the GNU General Public License
40 | % along with this program. If not, see .
41 |
42 | function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time)
43 |
44 | springer_options = default_Springer_HSMM_options;
45 |
46 |
47 |
48 | mean_S1 = round(0.122*springer_options.audio_segmentation_Fs);
49 | std_S1 = round(0.022*springer_options.audio_segmentation_Fs);
50 | mean_S2 = round(0.094*springer_options.audio_segmentation_Fs);
51 | std_S2 = round(0.022*springer_options.audio_segmentation_Fs);
52 |
53 |
54 | mean_systole = round(systolic_time*springer_options.audio_segmentation_Fs) - mean_S1;
55 | std_systole = (25/1000)*springer_options.audio_segmentation_Fs;
56 |
57 |
58 | mean_diastole = ((60/heartrate) - systolic_time - 0.094)*springer_options.audio_segmentation_Fs;
59 | std_diastole = 0.07*mean_diastole + (6/1000)*springer_options.audio_segmentation_Fs;
60 |
61 |
62 |
63 | %% Cell array for the mean and covariance of the duration distributions:
64 | d_distributions = cell(4,2);
65 |
66 | %% Assign mean and covariance values to d_distributions:
67 | d_distributions{1,1} = mean_S1;
68 | d_distributions{1,2} = (std_S1)^2;
69 |
70 | d_distributions{2,1} = mean_systole;
71 | d_distributions{2,2} = (std_systole)^2;
72 |
73 | d_distributions{3,1} = mean_S2;
74 | d_distributions{3,2} = (std_S2)^2;
75 |
76 | d_distributions{4,1} = mean_diastole;
77 | d_distributions{4,2} = (std_diastole)^2;
78 |
79 |
80 | %Min systole and diastole times
81 | min_systole = mean_systole - 3*(std_systole+std_S1);
82 | max_systole = mean_systole + 3*(std_systole+std_S1);
83 |
84 | min_diastole = mean_diastole-3*std_diastole;
85 | max_diastole = mean_diastole + 3*std_diastole;
86 |
87 |
88 |
89 | %Setting the Min and Max values for the S1 and S2 sounds:
90 | %If the minimum lengths are less than a 50th of the sampling frequency, set
91 | %to a 50th of the sampling frequency:
92 | min_S1 = (mean_S1 - 3*(std_S1));
93 | if(min_S1<(springer_options.audio_segmentation_Fs/50))
94 | min_S1 = (springer_options.audio_segmentation_Fs/50);
95 | end
96 |
97 | min_S2 = (mean_S2 - 3*(std_S2));
98 | if(min_S2<(springer_options.audio_segmentation_Fs/50))
99 | min_S2 = (springer_options.audio_segmentation_Fs/50);
100 | end
101 | max_S1 = (mean_S1 + 3*(std_S1));
102 | max_S2 = (mean_S2 + 3*(std_S2));
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/labelPCGStates.m:
--------------------------------------------------------------------------------
1 | % function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures)
2 | %
3 | % This function assigns the state labels to a PCG record.
4 | % This is based on ECG markers, dervied from the R peak and end-T wave locations.
5 | %
6 | %% Inputs:
7 | % envelope: The PCG recording envelope (found in getSchmidtPCGFeatures.m)
8 | % s1_positions: The locations of the R peaks (in samples)
9 | % s2_positions: The locations of the end-T waves (in samples)
10 | % samplingFrequency: The sampling frequency of the PCG recording
11 | % figures (optional): boolean variable dictating the display of figures
12 | %
13 | %% Output:
14 | % states: An array of the state label for each sample in the feature
15 | % vector. The total number of states is 4. Therefore, this is an array of
16 | % values between 1 and 4, such as: [1,1,1,1,2,2,2,3,3,3,3,4,4,4,4,4,1,1,1],
17 | % illustrating the "true" state label for each sample in the features.
18 | % State 1 = S1 sound
19 | % State 2 = systole
20 | % State 3 = S2 sound
21 | % State 4 = diastole
22 | %
23 | % This code was developed by David Springer for comparison purposes in the
24 | % paper:
25 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
26 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
27 | % where a novel segmentation approach is compared to the paper by Schmidt
28 | % et al:
29 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
30 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
31 | % no. 4, pp. 513-29, Apr. 2010.
32 | %
33 | %% Copyright (C) 2016 David Springer
34 | % dave.springer@gmail.com
35 | %
36 | % This program is free software: you can redistribute it and/or modify
37 | % it under the terms of the GNU General Public License as published by
38 | % the Free Software Foundation, either version 3 of the License, or
39 | % any later version.
40 | %
41 | % This program is distributed in the hope that it will be useful,
42 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
43 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 | % GNU General Public License for more details.
45 | %
46 | % You should have received a copy of the GNU General Public License
47 | % along with this program. If not, see .
48 |
49 | function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures)
50 |
51 | if(nargin<5)
52 | figures = false;
53 | end
54 |
55 | states = zeros(length(envelope),1);
56 |
57 |
58 | %% Timing durations from Schmidt:
59 | mean_S1 = 0.122*samplingFrequency;
60 | std_S1 = 0.022*samplingFrequency;
61 | mean_S2 = 0.092*samplingFrequency;
62 | std_S2 = 0.022*samplingFrequency;
63 |
64 | %% Setting the duration from each R-peak to (R-peak+mean_S1) as the first state:
65 | % The R-peak in the ECG coincides with the start of the S1 sound (A. G.
66 | % Tilkian and M. B. Conover, Understanding heart sounds and murmurs: with
67 | % an introduction to lung sounds, 4th ed. Saunders, 2001.)
68 | % Therefore, the duration from each R-peak to the mean_S1 sound duration
69 | % later were labelled as the "true" positions of the S1 sounds:
70 | for i = 1: length(s1_positions)
71 | %Set an upper bound, incase the window extends over the length of the
72 | %signal:
73 | upper_bound = round(min(length(states), s1_positions(i) + mean_S1));
74 |
75 | %Set the states between the start of the R peak and the upper bound as
76 | %state 1:
77 | states(max([1,s1_positions(i)]):min([upper_bound,length(states)])) = 1;
78 | end
79 |
80 | %% Set S2 as state 3 depending on position of end T-wave peak in ECG:
81 | % The second heart sound occurs at approximately the same time as the
82 | % end-T-wave (A. G. Tilkian and M. B. Conover, Understanding heart sounds
83 | % and murmurs: with an introduction to lung sounds, 4th ed. Saunders, 2001.)
84 | % Therefore, for each end-T-wave, find the peak in the envelope around the
85 | % end-T-wave, setting a window centered on this peak as the second heart
86 | % sound state:
87 | for i = 1: length(s2_positions)
88 |
89 | %find search window of envelope:
90 | %T-end +- mean+1sd
91 | %Set upper and lower bounds, to avoid errors of searching outside size
92 | %of the signal
93 | lower_bound = max([s2_positions(i) - floor((mean_S2 + std_S2)),1]);
94 | upper_bound = min(length(states), ceil(s2_positions(i) + floor(mean_S2 + std_S2)));
95 | search_window = envelope(lower_bound:upper_bound).*(states(lower_bound:upper_bound)~=1);
96 |
97 | % Find the maximum value of the envelope in the search window:
98 | [~, S2_index] = max(search_window);
99 |
100 | %Find the actual index in the envelope of the maximum peak:
101 | %Make sure this has a max value of the length of the signal:
102 | S2_index = min(length(states),lower_bound+ S2_index-1);
103 |
104 | %Set the states to state 3, centered on the S2 peak, +- 1/2 of the
105 | %expected S2 sound duration. Again, making sure it does not try to set a
106 | %value outside of the length of the signal:
107 | upper_bound = min(length(states), ceil(S2_index +((mean_S2)/2)));
108 | states(max([ceil(S2_index - ((mean_S2)/2)),1]):upper_bound) = 3;
109 |
110 | %Set the spaces between state 3 and the next R peak as state 4:
111 | if(i<=length(s2_positions))
112 | %We need to find the next R peak after this S2 sound
113 | %So, subtract the position of this S2 from the S1 positions
114 | diffs = (s1_positions - s2_positions(i));
115 | %Exclude those that are negative (meaning before this S2 occured)
116 | %by setting them to infinity. They are then excluded when finding
117 | %the minumum later
118 | diffs(diffs<0) = inf;
119 |
120 | %If the array is empty, then no S1s after this S2, so set to end of
121 | %signal:
122 |
123 | if(isempty(diffs 1)
146 |
147 | if(states(first_location_of_definite_state + 1) == 1)
148 | states(1:first_location_of_definite_state) = 4;
149 | end
150 |
151 | if(states(first_location_of_definite_state + 1) == 3)
152 | states(1:first_location_of_definite_state) = 2;
153 | end
154 |
155 | end
156 |
157 |
158 | % Find the last step down:
159 | last_location_of_definite_state = find(states ~= 0, 1,'last');
160 |
161 | if(last_location_of_definite_state > 1)
162 |
163 | if(states(last_location_of_definite_state) == 1)
164 | states(last_location_of_definite_state:end) = 2;
165 | end
166 |
167 | if(states(last_location_of_definite_state) == 3)
168 | states(last_location_of_definite_state:end) = 4;
169 | end
170 |
171 | end
172 |
173 |
174 | states(length(envelope)+1 : end) = [];
175 |
176 |
177 | %Set everywhere else as state 2:
178 | states(states == 0) = 2;
179 |
180 |
181 | %% Plotting figures
182 | if(figures)
183 | figure('Name','Envelope and labelled states');
184 | plot(envelope);
185 | hold on;
186 | plot(states,'r');
187 | legend('Envelope', 'States');
188 | pause();
189 | end
190 |
191 |
192 |
193 |
--------------------------------------------------------------------------------
/normalise_signal.m:
--------------------------------------------------------------------------------
1 | % function [normalised_signal] = normalise_signal(signal)
2 | %
3 | % This function subtracts the mean and divides by the standard deviation of
4 | % a (1D) signal in order to normalise it for machine learning applications.
5 | %
6 | %% Inputs:
7 | % signal: the original signal
8 | %
9 | %% Outputs:
10 | % normalised_signal: the original signal, minus the mean and divided by
11 | % the standard deviation.
12 | %
13 | % Developed by David Springer for the paper:
14 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
15 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
16 | %
17 | %% Copyright (C) 2016 David Springer
18 | % dave.springer@gmail.com
19 | %
20 | % This program is free software: you can redistribute it and/or modify
21 | % it under the terms of the GNU General Public License as published by
22 | % the Free Software Foundation, either version 3 of the License, or
23 | % any later version.
24 | %
25 | % This program is distributed in the hope that it will be useful,
26 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
27 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 | % GNU General Public License for more details.
29 | %
30 | % You should have received a copy of the GNU General Public License
31 | % along with this program. If not, see .
32 |
33 | function [normalised_signal] = normalise_signal(signal)
34 |
35 | mean_of_signal = mean(signal);
36 |
37 | standard_deviation = std(signal);
38 |
39 | normalised_signal = (signal - mean_of_signal)./standard_deviation;
40 |
41 |
--------------------------------------------------------------------------------
/runSpringerSegmentationAlgorithm.m:
--------------------------------------------------------------------------------
1 | % function assigned_states = runSpringerSegmentationAlgorithm(audio_data, Fs, B_matrix, pi_vector, total_observation_distribution, figures)
2 | %
3 | % A function to assign states to a PCG recording using a duration dependant
4 | % logisitic regression-based HMM, using the trained B_matrix and pi_vector
5 | % trained in "trainSpringerSegmentationAlgorithm.m". Developed for use in
6 | % the paper:
7 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
8 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
9 | %
10 | %% INPUTS:
11 | % audio_data: The audio data from the PCG recording
12 | % Fs: the sampling frequency of the audio recording
13 | % B_matrix: the observation matrix for the HMM, trained in the
14 | % "trainSpringerSegmentationAlgorithm.m" function
15 | % pi_vector: the initial state distribution, also trained in the
16 | % "trainSpringerSegmentationAlgorithm.m" function
17 | % total_observation_distribution, the observation probabilities of all the
18 | % data, again, trained in trainSpringerSegmentationAlgorithm.
19 | % figures: (optional) boolean variable for displaying figures
20 | %
21 | %% OUTPUTS:
22 | % assigned_states: the array of state values assigned to the original
23 | % audio_data (in the original sampling frequency).
24 | %
25 | %% Copyright (C) 2016 David Springer
26 | % dave.springer@gmail.com
27 | %
28 | % This program is free software: you can redistribute it and/or modify
29 | % it under the terms of the GNU General Public License as published by
30 | % the Free Software Foundation, either version 3 of the License, or
31 | % any later version.
32 | %
33 | % This program is distributed in the hope that it will be useful,
34 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
35 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 | % GNU General Public License for more details.
37 | %
38 | % You should have received a copy of the GNU General Public License
39 | % along with this program. If not, see .
40 |
41 | function assigned_states = runSpringerSegmentationAlgorithm(audio_data, Fs, B_matrix, pi_vector, total_observation_distribution, figures)
42 |
43 | %% Preliminary
44 | if(nargin < 6)
45 | figures = false;
46 | end
47 |
48 | %% Get PCG Features:
49 |
50 | [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs);
51 |
52 | %% Get PCG heart rate
53 |
54 | [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs);
55 |
56 | [~, ~, qt] = viterbiDecodePCG_Springer(PCG_Features, pi_vector, B_matrix, total_observation_distribution, heartRate, systolicTimeInterval, featuresFs);
57 |
58 | assigned_states = expand_qt(qt, featuresFs, Fs, length(audio_data));
59 |
60 | if(figures)
61 | figure('Name','Derived state sequence');
62 | t1 = (1:length(audio_data))./Fs;
63 | plot(t1,normalise_signal(audio_data),'k');
64 | hold on;
65 | plot(t1,assigned_states,'r--');
66 | xlabel('Time (s)');
67 | legend('Audio data', 'Derived states');
68 | end
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/run_Example_Springer_Script.m:
--------------------------------------------------------------------------------
1 | %% Example Springer script
2 | % A script to demonstrate the use of the Springer segmentation algorithm
3 |
4 | %% Copyright (C) 2016 David Springer
5 | % dave.springer@gmail.com
6 | %
7 | % This program is free software: you can redistribute it and/or modify
8 | % it under the terms of the GNU General Public License as published by
9 | % the Free Software Foundation, either version 3 of the License, or
10 | % any later version.
11 | %
12 | % This program is distributed in the hope that it will be useful,
13 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | % GNU General Public License for more details.
16 | %
17 | % You should have received a copy of the GNU General Public License
18 | % along with this program. If not, see .
19 |
20 | %%
21 | close all;
22 | clear all;
23 |
24 | %% Load the default options:
25 | % These options control options such as the original sampling frequency of
26 | % the data, the sampling frequency for the derived features and whether the
27 | % mex code should be used for the Viterbi decoding:
28 | springer_options = default_Springer_HSMM_options;
29 |
30 | %% Load the audio data and the annotations:
31 | % These are 6 example PCG recordings, downsampled to 1000 Hz, with
32 | % annotations of the R-peak and end-T-wave positions.
33 | load('example_data.mat');
34 |
35 | %% Split the data into train and test sets:
36 | % Select the first 5 recordings for training and the sixth for testing:
37 | train_recordings = example_data.example_audio_data([1:5]);
38 | train_annotations = example_data.example_annotations([1:5],:);
39 |
40 | test_recordings = example_data.example_audio_data(6);
41 | test_annotations = example_data.example_annotations(6,:);
42 |
43 |
44 | %% Train the HMM:
45 | [B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(train_recordings,train_annotations,springer_options.audio_Fs, false);
46 |
47 | %% Run the HMM on an unseen test recording:
48 | % And display the resulting segmentation
49 | numPCGs = length(test_recordings);
50 |
51 | for PCGi = 1:numPCGs
52 | [assigned_states] = runSpringerSegmentationAlgorithm(test_recordings{PCGi}, springer_options.audio_Fs, B_matrix, pi_vector, total_obs_distribution, true);
53 | end
54 |
55 |
--------------------------------------------------------------------------------
/schmidt_spike_removal.m:
--------------------------------------------------------------------------------
1 | % function [despiked_signal] = schmidt_spike_removal(original_signal, fs)
2 | %
3 | % This function removes the spikes in a signal as done by Schmidt et al in
4 | % the paper:
5 | % Schmidt, S. E., Holst-Hansen, C., Graff, C., Toft, E., & Struijk, J. J.
6 | % (2010). Segmentation of heart sound recordings by a duration-dependent
7 | % hidden Markov model. Physiological Measurement, 31(4), 513-29.
8 | %
9 | % The spike removal process works as follows:
10 | % (1) The recording is divided into 500 ms windows.
11 | % (2) The maximum absolute amplitude (MAA) in each window is found.
12 | % (3) If at least one MAA exceeds three times the median value of the MAA's,
13 | % the following steps were carried out. If not continue to point 4.
14 | % (a) The window with the highest MAA was chosen.
15 | % (b) In the chosen window, the location of the MAA point was identified as the top of the noise spike.
16 | % (c) The beginning of the noise spike was defined as the last zero-crossing point before theMAA point.
17 | % (d) The end of the spike was defined as the first zero-crossing point after the maximum point.
18 | % (e) The defined noise spike was replaced by zeroes.
19 | % (f) Resume at step 2.
20 | % (4) Procedure completed.
21 | %
22 | %% Inputs:
23 | % original_signal: The original (1D) audio signal array
24 | % fs: the sampling frequency (Hz)
25 | %
26 | %% Outputs:
27 | % despiked_signal: the audio signal with any spikes removed.
28 | %
29 | % This code is derived from the paper:
30 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a
31 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31,
32 | % no. 4, pp. 513-29, Apr. 2010.
33 | %
34 | % Developed by David Springer for comparison purposes in the paper:
35 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound
36 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015.
37 | %
38 | %% Copyright (C) 2016 David Springer
39 | % dave.springer@gmail.com
40 | %
41 | % This program is free software: you can redistribute it and/or modify
42 | % it under the terms of the GNU General Public License as published by
43 | % the Free Software Foundation, either version 3 of the License, or
44 | % any later version.
45 | %
46 | % This program is distributed in the hope that it will be useful,
47 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
48 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49 | % GNU General Public License for more details.
50 | %
51 | % You should have received a copy of the GNU General Public License
52 | % along with this program. If not, see .
53 |
54 |
55 | function [despiked_signal] = schmidt_spike_removal(original_signal, fs)
56 |
57 | %% Find the window size
58 | % (500 ms)
59 | windowsize = round(fs/2);
60 |
61 | %% Find any samples outside of a integer number of windows:
62 | trailingsamples = mod(length(original_signal), windowsize);
63 |
64 | %% Reshape the signal into a number of windows:
65 | sampleframes = reshape( original_signal(1:end-trailingsamples), windowsize, []);
66 |
67 | %% Find the MAAs:
68 | MAAs = max(abs(sampleframes));
69 |
70 |
71 | % While there are still samples greater than 3* the median value of the
72 | % MAAs, then remove those spikes:
73 | while(~isempty(find((MAAs>median(MAAs)*3))))
74 |
75 | %Find the window with the max MAA:
76 | [val window_num] = max(MAAs);
77 | if(numel(window_num)>1)
78 | window_num = window_num(1);
79 | end
80 |
81 | %Find the postion of the spike within that window:
82 | [val spike_position] = max(abs(sampleframes(:,window_num)));
83 |
84 | if(numel(spike_position)>1)
85 | spike_position = spike_position(1);
86 | end
87 |
88 |
89 | % Finding zero crossings (where there may not be actual 0 values, just a change from positive to negative):
90 | zero_crossings = [abs(diff(sign(sampleframes(:,window_num))))>1; 0];
91 |
92 | %Find the start of the spike, finding the last zero crossing before
93 | %spike position. If that is empty, take the start of the window:
94 | spike_start = max([1 find(zero_crossings(1:spike_position),1,'last')]);
95 |
96 | %Find the end of the spike, finding the first zero crossing after
97 | %spike position. If that is empty, take the end of the window:
98 | zero_crossings(1:spike_position) = 0;
99 | spike_end = min([(find(zero_crossings,1,'first')) windowsize]);
100 |
101 | %Set to Zero
102 | sampleframes(spike_start:spike_end,window_num) = 0.0001;
103 |
104 | %Recaclulate MAAs
105 | MAAs = max(abs(sampleframes));
106 | end
107 |
108 | despiked_signal = reshape(sampleframes, [],1);
109 |
110 | % Add the trailing samples back to the signal:
111 | despiked_signal = [despiked_signal; original_signal(length(despiked_signal)+1:end)];
112 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/trainBandPiMatricesSpringer.m:
--------------------------------------------------------------------------------
1 | % function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values)
2 | %
3 | % Train the B matrix and pi vector for the Springer HMM.
4 | % The pi vector is the initial state probability, while the B matrix are
5 | % the observation probabilities. In the case of Springer's algorith, the
6 | % observation probabilities are based on a logistic regression-based
7 | % probabilities.
8 | %
9 | %% Inputs:
10 | % state_observation_values: an Nx4 cell array of observation values from
11 | % each of N PCG signals for each (of 4) state. Within each cell is a KxJ
12 | % double array, where K is the number of samples from that state in the PCG
13 | % and J is the number of feature vectors extracted from the PCG.
14 | %
15 | %% Outputs:
16 | % The B_matrix and pi arrays for an HMM - as Springer et al's algorithm is a
17 | % duration dependant HMM, there is no need to calculate the A_matrix, as
18 | % the transition between states is only dependant on the state durations.
19 | % total_obs_distribution:
20 | %
21 | % Developed by David Springer for the paper:
22 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
23 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
24 | %
25 | %% Copyright (C) 2016 David Springer
26 | % dave.springer@gmail.com
27 | %
28 | % This program is free software: you can redistribute it and/or modify
29 | % it under the terms of the GNU General Public License as published by
30 | % the Free Software Foundation, either version 3 of the License, or
31 | % any later version.
32 | %
33 | % This program is distributed in the hope that it will be useful,
34 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
35 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 | % GNU General Public License for more details.
37 | %
38 | % You should have received a copy of the GNU General Public License
39 | % along with this program. If not, see .
40 |
41 | function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values)
42 |
43 | %% Prelim
44 |
45 | number_of_states = 4;
46 |
47 | %% Set pi_vector
48 | % The true value of the pi vector, which are the initial state
49 | % probabilities, are dependant on the heart rate of each PCG, and the
50 | % individual sound duration for each patient. Therefore, instead of setting
51 | % a patient-dependant pi_vector, simplify by setting all states as equally
52 | % probable:
53 |
54 | pi_vector = [0.25,0.25,0.25,0.25];
55 |
56 | %% Train the logistic regression-based B_matrix:
57 |
58 |
59 | % Initialise the B_matrix as a 1x4 cell array. This is to hold the
60 | % coefficients of the trained logisitic regression model for each state.
61 | B_matrix = cell(1,number_of_states);
62 |
63 | statei_values = cell(number_of_states,1);
64 |
65 | for PCGi = 1: length(state_observation_values)
66 |
67 | statei_values{1} = vertcat(statei_values{1},state_observation_values{PCGi,1});
68 | statei_values{2} = vertcat(statei_values{2},state_observation_values{PCGi,2});
69 | statei_values{3} = vertcat(statei_values{3},state_observation_values{PCGi,3});
70 | statei_values{4} = vertcat(statei_values{4},state_observation_values{PCGi,4});
71 |
72 | end
73 |
74 |
75 | % In order to use Bayes' formula with the logistic regression derived
76 | % probabilities, we need to get the probability of seeing a specific
77 | % observation in the total training data set. This is the
78 | % 'total_observation_sequence', and the mean and covariance for each state
79 | % is found:
80 |
81 | total_observation_sequence = vertcat(statei_values{1}, statei_values{2}, statei_values{3}, statei_values{4});
82 | total_obs_distribution = cell(2,1);
83 | total_obs_distribution{1} = mean(total_observation_sequence);
84 | total_obs_distribution{2} = cov(total_observation_sequence);
85 |
86 |
87 | for state = 1: number_of_states
88 |
89 | % Randomly select indices of samples from the other states not being
90 | % learnt, in order to balance the two data sets. The code below ensures
91 | % that if class 1 is being learnt vs the rest, the number of the rest =
92 | % the number of class 1, evenly split across all other classes
93 | length_of_state_samples = length(statei_values{state});
94 |
95 | % Number of samples required from each of the other states:
96 | length_per_other_state = floor(length_of_state_samples/(number_of_states-1));
97 |
98 |
99 | %If the length of the main class / (num states - 1) >
100 | %length(shortest other class), then only select
101 | %length(shortect other class) from the other states,
102 | %and (3* length) for main class
103 | min_length_other_class = inf;
104 |
105 | for other_state = 1: number_of_states
106 | samples_in_other_state = length(statei_values{other_state});
107 |
108 | if(other_state == state)
109 | else
110 | min_length_other_class = min([min_length_other_class, samples_in_other_state]);
111 | end
112 | end
113 |
114 | %This means there aren't enough samples in one of the
115 | %states to match the length of the main class being
116 | %trained:
117 | if( length_per_other_state > min_length_other_class)
118 | length_per_other_state = min_length_other_class;
119 | end
120 |
121 | training_data = cell(2,1);
122 |
123 | for other_state = 1: number_of_states
124 | samples_in_other_state = length(statei_values{other_state});
125 |
126 | if(other_state == state)
127 | %Make sure you only choose (n-1)*3 *
128 | %length_per_other_state samples for the main
129 | %state, to ensure that the sets are balanced:
130 | indices = randperm(samples_in_other_state,length_per_other_state*(number_of_states-1));
131 | training_data{1} = statei_values{other_state}(indices,:);
132 | else
133 |
134 | indices = randperm(samples_in_other_state,length_per_other_state);
135 | state_data = statei_values{other_state}(indices,:);
136 | training_data{2} = vertcat(training_data{2}, state_data);
137 |
138 | end
139 | end
140 |
141 | % Label all the data:
142 | labels = ones(length(training_data{1}) + length(training_data{2}),1);
143 | labels(1:length(training_data{1})) = 2;
144 |
145 | % Train the logisitic regression model for this state:
146 | all_data = [training_data{1};training_data{2}];
147 | [B,~,~] = mnrfit(all_data,labels);
148 | B_matrix{state} = B;
149 | end
150 |
151 |
--------------------------------------------------------------------------------
/trainSpringerSegmentationAlgorithm.m:
--------------------------------------------------------------------------------
1 | % function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures)
2 | %
3 | % Training the Springer HMM segmentation algorithm. Developed for use in
4 | % the paper:
5 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
6 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
7 | %
8 | %% Inputs:
9 | % PCGCellArray: A 1XN cell array of the N audio signals. For evaluation
10 | % purposes, these signals should be from a distinct training set of
11 | % recordings, while the algorithm should be evaluated on a separate test
12 | % set of recordings, which are recorded from a completely different set of
13 | % patients (for example, if there are numerous recordings from each
14 | % patient).
15 | % annotationsArray: a Nx2 cell array: position (n,1) = the positions of the
16 | % R-peaks and postion (n,2) = the positions of the end-T-waves
17 | % (both in SAMPLES)
18 | % Fs: The sampling frequency of the PCG signals
19 | % figures (optional): boolean variable dictating the disaplay of figures.
20 | %
21 | %% Outputs:
22 | % logistic_regression_B_matrix:
23 | % pi_vector:
24 | % total_obs_distribution:
25 | % As Springer et al's algorithm is a duration dependant HMM, there is no
26 | % need to calculate the A_matrix, as the transition between states is only
27 | % dependant on the state durations.
28 | %
29 | %% Copyright (C) 2016 David Springer
30 | % dave.springer@gmail.com
31 | %
32 | % This program is free software: you can redistribute it and/or modify
33 | % it under the terms of the GNU General Public License as published by
34 | % the Free Software Foundation, either version 3 of the License, or
35 | % any later version.
36 | %
37 | % This program is distributed in the hope that it will be useful,
38 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
39 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40 | % GNU General Public License for more details.
41 | %
42 | % You should have received a copy of the GNU General Public License
43 | % along with this program. If not, see .
44 |
45 |
46 | function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures)
47 |
48 | %% Options
49 |
50 | if(nargin < 4)
51 | figures = false;
52 | end
53 |
54 | numberOfStates = 4;
55 | numPCGs = length(PCGCellArray);
56 |
57 | % A matrix of the values from each state in each of the PCG recordings:
58 | state_observation_values = cell(numPCGs,numberOfStates);
59 |
60 |
61 | for PCGi = 1:length(PCGCellArray)
62 | PCG_audio = PCGCellArray{PCGi};
63 |
64 | S1_locations = annotationsArray{PCGi,1};
65 | S2_locations = annotationsArray{PCGi,2};
66 |
67 | [PCG_Features, featuresFs] = getSpringerPCGFeatures(PCG_audio, Fs);
68 |
69 | PCG_states = labelPCGStates(PCG_Features(:,1),S1_locations, S2_locations, featuresFs);
70 |
71 |
72 | %% Plotting assigned states:
73 | if(figures)
74 | figure('Name','Assigned states to PCG');
75 |
76 | t1 = (1:length(PCG_audio))./Fs;
77 | t2 = (1:length(PCG_Features))./featuresFs;
78 |
79 | plot(t1, PCG_audio, 'k-');
80 | hold on;
81 | plot(t2, PCG_Features, 'b-');
82 | plot(t2, PCG_states, 'r-');
83 |
84 | legend('Audio','Features','States');
85 | pause();
86 | end
87 |
88 |
89 |
90 | %% Group together all observations from the same state in the PCG recordings:
91 | for state_i = 1:numberOfStates
92 | state_observation_values{PCGi,state_i} = PCG_Features(PCG_states == state_i,:);
93 | end
94 | end
95 |
96 | % Save the state observation values to the main workspace of Matlab for
97 | % later investigation if needed:
98 | assignin('base', 'state_observation_values', state_observation_values)
99 |
100 | %% Train the B and pi matrices after all the PCG recordings have been labelled:
101 | [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values);
102 |
103 |
--------------------------------------------------------------------------------
/viterbiDecodePCG_Springer.m:
--------------------------------------------------------------------------------
1 | % function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs, figures)
2 | %
3 | % This function calculates the delta, psi and qt matrices associated with
4 | % the Viterbi decoding algorithm from:
5 | % L. R. Rabiner, "A tutorial on hidden Markov models and selected
6 | % applications in speech recognition," Proc. IEEE, vol. 77, no. 2, pp.
7 | % 257-286, Feb. 1989.
8 | % using equations 32a - 35, and equations 68 - 69 to include duration
9 | % dependancy of the states.
10 | %
11 | % This decoding is performed after the observation probabilities have been
12 | % derived from the logistic regression model of Springer et al:
13 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound
14 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015.
15 | %
16 | % Further, this function is extended to allow the duration distributions to extend
17 | % past the beginning and end of the sequence. Without this, the label
18 | % sequence has to start and stop with an "entire" state duration being
19 | % fulfilled. This extension takes away that requirement, by allowing the
20 | % duration distributions to extend past the beginning and end, but only
21 | % considering the observations within the sequence for emission probability
22 | % estimation. More detail can be found in the publication by Springer et
23 | % al., mentioned above.
24 | %
25 | %% Inputs:
26 | % observation_sequence: The observed features
27 | % pi_vector: the array of initial state probabilities, dervived from
28 | % "trainSpringerSegmentationAlgorithm".
29 | % b_matrix: the observation probabilities, dervived from
30 | % "trainSpringerSegmentationAlgorithm".
31 | % heartrate: the heart rate of the PCG, extracted using
32 | % "getHeartRateSchmidt"
33 | % systolic_time: the duration of systole, extracted using
34 | % "getHeartRateSchmidt"
35 | % Fs: the sampling frequency of the observation_sequence
36 | % figures: optional boolean variable to show figures
37 | %
38 | %% Outputs:
39 | % logistic_regression_B_matrix:
40 | % pi_vector:
41 | % total_obs_distribution:
42 | % As Springer et al's algorithm is a duration dependant HMM, there is no
43 | % need to calculate the A_matrix, as the transition between states is only
44 | % dependant on the state durations.
45 | %
46 | %% Copyright (C) 2016 David Springer
47 | % dave.springer@gmail.com
48 | %
49 | % This program is free software: you can redistribute it and/or modify
50 | % it under the terms of the GNU General Public License as published by
51 | % the Free Software Foundation, either version 3 of the License, or
52 | % any later version.
53 | %
54 | % This program is distributed in the hope that it will be useful,
55 | % but WITHOUT ANY WARRANTY; without even the implied warranty of
56 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
57 | % GNU General Public License for more details.
58 | %
59 | % You should have received a copy of the GNU General Public License
60 | % along with this program. If not, see .
61 |
62 | function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs,figures)
63 |
64 | if nargin < 8
65 | figures = false;
66 | end
67 |
68 | %% Preliminary
69 | springer_options = default_Springer_HSMM_options;
70 |
71 | T = length(observation_sequence);
72 | N = 4; % Number of states
73 |
74 | % Setting the maximum duration of a single state. This is set to an entire
75 | % heart cycle:
76 | max_duration_D = round((1*(60/heartrate))*Fs);
77 |
78 | %Initialising the variables that are needed to find the optimal state path along
79 | %the observation sequence.
80 | %delta_t(j), as defined on page 264 of Rabiner, is the best score (highest
81 | %probability) along a single path, at time t, which accounts for the first
82 | %t observations and ends in State s_j. In this case, the length of the
83 | %matrix is extended by max_duration_D samples, in order to allow the use
84 | %of the extended Viterbi algortithm:
85 | delta = ones(T+ max_duration_D-1,N)*-inf;
86 |
87 | %The argument that maximises the transition between states (this is
88 | %basically the previous state that had the highest transition probability
89 | %to the current state) is tracked using the psi variable.
90 | psi = zeros(T+ max_duration_D-1,N);
91 |
92 | %An additional variable, that is not included on page 264 or Rabiner, is
93 | %the state duration that maximises the delta variable. This is essential
94 | %for the duration dependant HMM.
95 | psi_duration =zeros(T + max_duration_D-1,N);
96 |
97 | %% Setting up observation probs
98 | observation_probs = zeros(T,N);
99 |
100 | for n = 1:N
101 |
102 | %MLR gives P(state|obs)
103 | %Therefore, need Bayes to get P(o|state)
104 | %P(o|state) = P(state|obs) * P(obs) / P(states)
105 | %Where p(obs) is derived from a MVN distribution from all
106 | %obserbations, and p(states) is taken from the pi_vector:
107 | pihat = mnrval(cell2mat(b_matrix(n)),observation_sequence(:,:));
108 |
109 | for t = 1:T
110 |
111 | Po_correction = mvnpdf(observation_sequence(t,:),cell2mat(total_obs_distribution(1)),cell2mat(total_obs_distribution(2)));
112 |
113 | %When saving the coefficients from the logistic
114 | %regression, it orders them P(class 1) then P(class 2). When
115 | %training, I label the classes as 0 and 1, so the
116 | %correct probability would be pihat(2).
117 |
118 | observation_probs(t,n) = (pihat(t,2)*Po_correction)/pi_vector(n);
119 |
120 | end
121 | end
122 |
123 | %% Setting up state duration probabilities, using Gaussian distributions:
124 | [d_distributions, max_S1, min_S1, max_S2, min_S2, max_systole, min_systole, max_diastole, min_diastole] = get_duration_distributions(heartrate,systolic_time);
125 |
126 |
127 |
128 | duration_probs = zeros(N,3*Fs);
129 | duration_sum = zeros(N,1);
130 | for state_j = 1:N
131 | for d = 1:max_duration_D
132 | if(state_j == 1)
133 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
134 |
135 | if(d < min_S1 || d > max_S1)
136 | duration_probs(state_j,d)= realmin;
137 | end
138 |
139 |
140 | elseif(state_j==3)
141 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
142 |
143 | if(d < min_S2 || d > max_S2)
144 | duration_probs(state_j,d)= realmin;
145 | end
146 |
147 |
148 | elseif(state_j==2)
149 |
150 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
151 |
152 | if(d < min_systole|| d > max_systole)
153 | duration_probs(state_j,d)= realmin;
154 | end
155 |
156 |
157 | elseif (state_j==4)
158 |
159 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2)));
160 |
161 | if(d < min_diastole ||d > max_diastole)
162 | duration_probs(state_j,d)= realmin;
163 | end
164 | end
165 | end
166 | duration_sum(state_j) = sum(duration_probs(state_j,:));
167 | end
168 |
169 |
170 | if(length(duration_probs)>3*Fs)
171 | duration_probs(:,(3*Fs+1):end) = [];
172 | end
173 |
174 | if(figures)
175 | figure('Name', 'Duration probabilities');
176 | plot(duration_probs(1,:)./ duration_sum(1),'Linewidth',2);
177 | hold on;
178 | plot(duration_probs(2,:)./ duration_sum(2),'r','Linewidth',2);
179 | hold on;
180 | plot(duration_probs(3,:)./ duration_sum(3),'g','Linewidth',2);
181 | hold on;
182 | plot(duration_probs(4,:)./ duration_sum(4),'k','Linewidth',2);
183 | hold on;
184 | legend('S1 Duration','Systolic Duration','S2 Duration','Diastolic Duration');
185 | pause();
186 | end
187 | %% Perform the actual Viterbi Recursion:
188 |
189 |
190 | qt = zeros(1,length(delta));
191 | %% Initialisation Step
192 |
193 | %Equation 32a and 69a, but leave out the probability of being in
194 | %state i for only 1 sample, as the state could have started before time t =
195 | %0.
196 |
197 | delta(1,:) = log(pi_vector) + log(observation_probs(1,:)); %first value is the probability of intially being in each state * probability of observation 1 coming from each state
198 |
199 | %Equation 32b
200 | psi(1,:) = -1;
201 |
202 |
203 | % The state duration probabilities are now used.
204 | %Change the a_matrix to have zeros along the diagonal, therefore, only
205 | %relying on the duration probabilities and observation probabilities to
206 | %influence change in states:
207 | %This would only be valid in sequences where the transition between states
208 | %follows a distinct order.
209 | a_matrix = [0,1,0,0;0 0 1 0; 0 0 0 1;1 0 0 0];
210 |
211 |
212 | %% Run the core Viterbi algorith
213 |
214 | if(springer_options.use_mex)
215 |
216 | %% Run Mex code
217 | % Ensure you have run the mex viterbi_PhysChallenge.c code on the
218 | % native machine before running this:
219 | [delta, psi, psi_duration] = viterbi_Springer(N,T,a_matrix,max_duration_D,delta,observation_probs,duration_probs,psi, duration_sum);
220 |
221 |
222 | else
223 |
224 | %% Recursion
225 |
226 | %% The Extended Viterbi algorithm:
227 |
228 | %Equations 33a and 33b and 69a, b, c etc:
229 | %again, ommitting the p(d), as state could have started before t = 1
230 |
231 | % This implementation extends the standard implementation of the
232 | % duration-dependant Viterbi algorithm by allowing the durations to
233 | % extend beyond the start and end of the time series, thereby allowing
234 | % states to "start" and "stop" outside of the recorded signal. This
235 | % addresses the issue of partial states at the beginning and end of the
236 | % signal being labelled as the incorrect state. For instance, a
237 | % short-duration diastole at the beginning of a signal looks a lot like
238 | % systole, and can lead to labelling errors.
239 |
240 | % t spans input 2 to T + max_duration_D:
241 |
242 |
243 | for t = 2:T+ max_duration_D-1
244 | for j = 1:N
245 | for d = 1:1:max_duration_D
246 |
247 |
248 | %The start of the analysis window, which is the current time
249 | %step, minus d (the time horizon we are currently looking back),
250 | %plus 1. The analysis window can be seen to be starting one
251 | %step back each time the variable d is increased.
252 | % This is clamped to 1 if extending past the start of the
253 | % record, and T-1 is extending past the end of the record:
254 | start_t = t - d;
255 | if(start_t<1)
256 | start_t = 1;
257 | end
258 | if(start_t > T-1)
259 | start_t = T-1;
260 | end
261 |
262 | %The end of the analysis window, which is the current time
263 | %step, unless the time has gone past T, the end of the record, in
264 | %which case it is truncated to T. This allows the analysis
265 | %window to extend past the end of the record, so that the
266 | %timing durations of the states do not have to "end" at the end
267 | %of the record.
268 | end_t = t;
269 | if(t>T)
270 | end_t = T;
271 | end
272 |
273 |
274 | %Find the max_delta and index of that from the previous step
275 | %and the transition to the current step:
276 | %This is the first half of the expression of equation 33a from
277 | %Rabiner:
278 | [max_delta, max_index] = max(delta(start_t,:)+log(a_matrix(:,j))');
279 |
280 |
281 | %Find the normalised probabilities of the observations over the
282 | %analysis window:
283 | probs = prod(observation_probs(start_t:end_t,j));
284 |
285 |
286 | %Find the normalised probabilities of the observations at only
287 | %the time point at the start of the time window:
288 |
289 | if(probs ==0)
290 | probs = realmin;
291 | end
292 | emission_probs = log(probs);
293 |
294 |
295 | %Keep a running total of the emmission probabilities as the
296 | %start point of the time window is moved back one step at a
297 | %time. This is the probability of seeing all the observations
298 | %in the analysis window in state j:
299 |
300 | if(emission_probs == 0 || isnan(emission_probs))
301 | emission_probs =realmin;
302 | end
303 |
304 |
305 | %Find the total probability of transitioning from the last
306 | %state to this one, with the observations and being in the same
307 | %state for the analysis window. This is the duration-dependant
308 | %variation of equation 33a from Rabiner:
309 | % fprintf('log((duration_probs(j,d)./duration_sum(j))):%d\n',log((duration_probs(j,d)./duration_sum(j))));
310 | delta_temp = max_delta + (emission_probs)+ log((duration_probs(j,d)./duration_sum(j)));
311 |
312 |
313 | %Unlike equation 33a from Rabiner, the maximum delta could come
314 | %from multiple d values, or from multiple size of the analysis
315 | %window. Therefore, only keep the maximum delta value over the
316 | %entire analysis window:
317 | %If this probability is greater than the last greatest,
318 | %update the delta matrix and the time duration variable:
319 |
320 |
321 | if(delta_temp>delta(t,j))
322 | delta(t,j) = delta_temp;
323 | psi(t,j) = max_index;
324 | psi_duration(t,j) = d;
325 | end
326 |
327 | end
328 | end
329 | end
330 | end
331 |
332 |
333 | %% Termination
334 |
335 | % For the extended case, need to find max prob after end of actual
336 | % sequence:
337 |
338 | % Find just the delta after the end of the actual signal
339 | temp_delta = delta(T+1:end,:);
340 | %Find the maximum value in this section, and which state it is in:
341 | [~, idx] = max(temp_delta(:));
342 | [pos, ~] = ind2sub(size(temp_delta), idx);
343 |
344 | % Change this position to the real position in delta matrix:
345 | pos = pos+T;
346 |
347 | %1) Find the last most probable state
348 | %2) From the psi matrix, find the most likely preceding state
349 | %3) Find the duration of the last state from the psi_duration matrix
350 | %4) From the onset to the offset of this state, set to the most likely state
351 | %5) Repeat steps 2 - 5 until reached the beginning of the signal
352 |
353 |
354 | %The initial steps 1-4 are equation 34b in Rabiner. 1) finds P*, the most
355 | %likely last state in the sequence, 2) finds the state that precedes the
356 | %last most likely state, 3) finds the onset in time of the last state
357 | %(included due to the duration-dependancy) and 4) sets the most likely last
358 | %state to the q_t variable.
359 |
360 | %1)
361 | [~, state] = max(delta(pos,:),[],2);
362 |
363 | %2)
364 | offset = pos;
365 | preceding_state = psi(offset,state);
366 |
367 | %3)
368 | % state_duration = psi_duration(offset, state);
369 | onset = offset - psi_duration(offset,state)+1;
370 |
371 | %4)
372 | qt(onset:offset) = state;
373 |
374 | %The state is then updated to the preceding state, found above, which must
375 | %end when the last most likely state started in the observation sequence:
376 | state = preceding_state;
377 |
378 | count = 0;
379 | %While the onset of the state is larger than the maximum duration
380 | %specified:
381 | while(onset > 2)
382 |
383 | %2)
384 | offset = onset-1;
385 | % offset_array(offset,1) = inf;
386 | preceding_state = psi(offset,state);
387 | % offset_array(offset,2) = preceding_state;
388 |
389 |
390 | %3)
391 | % state_duration = psi_duration(offset, state);
392 | onset = offset - psi_duration(offset,state)+1;
393 |
394 | %4)
395 | % offset_array(onset:offset,3) = state;
396 |
397 | if(onset<2)
398 | onset = 1;
399 | end
400 | qt(onset:offset) = state;
401 | state = preceding_state;
402 | count = count +1;
403 |
404 | if(count> 1000)
405 | break;
406 | end
407 | end
408 |
409 | qt = qt(1:T);
410 |
411 |
412 |
--------------------------------------------------------------------------------
/viterbi_Springer.c:
--------------------------------------------------------------------------------
1 | /* Many people have requested a simple example on how to create a C
2 | * MEX-file. In response to this request, the following C MEX-file,
3 | * named mexample, is provided as an introduction to cmex
4 | * programming. mexample is a commented program which describes how to
5 | * use the following MEX-functions:
6 | *
7 | * mexErrMsgTxt
8 | * mxCreateDoubleMatrix
9 | * mxGetM
10 | * mxGetN
11 | * mxGetPr
12 | * mxIsComplex
13 | * mxIsSparse
14 | * mxIsChar
15 | *
16 | * In MATLAB, mexample accepts two inputs and returns one output. The
17 | * inputs are a 2x2 array denoted as ARRAY_IN and a 2x1 vector denoted as
18 | * VECTOR_IN. The function calculates the determinant of ARRAY_IN,
19 | * multiplies each element of VECTOR_IN by the determinant, and returns
20 | * this as the output, denoted by VECTOR_OUT. All inputs and outputs to
21 | * this function are assumed to be real (not complex). */
22 |
23 | /* First, include some basic header files. The header file
24 | * "mex.h" is required for a MEX-file. Add any other header
25 | * files that your function may need here. */
26 |
27 | #include "mex.h"
28 | #include
29 | #include
30 | #include /* log */
31 | /* A C MEX-file generally consists of two sections. The first
32 | * section is a function or set of functions which performs
33 | * the actual mathematical calculation that the MEX-function
34 | * is to carry out. In this example, the function is called
35 | * workFcn(). The second section is a gateway between MATLAB
36 | * and the first section, and consists of a function called
37 | * mexFunction. The gateway is responsible for several tasks,
38 | * including:
39 | *
40 | * I) error checking,
41 | * II) allocating memory for return arguments,
42 | * III) converting data from MATLAB into a format that
43 | * the workFcn function can use, and vice versa.
44 | *
45 | * The first function to be written in this example, then, is
46 | * workFcn:
47 | *
48 | * Since C and MATLAB handle two-dimensional arrays
49 | * differently, we will explicitly declare the dimension of
50 | * the variable theArray. The variables, theVector and
51 | * theResult, are both one-dimensional arrays, and therefore
52 | * do not need such rigid typing. */
53 |
54 |
55 | void viterbi(
56 | int N,
57 | int T,
58 | double a_matrix[4][4],
59 | int max_duration_D,
60 | double *delta,
61 | double *observation_probs,
62 | double duration_probs [4][150],
63 | double *psi,
64 | double *psi_duration_out,
65 | double duration_sum_in[4]
66 | )
67 |
68 | {
69 |
70 | int i;
71 | int i2;
72 | int i3;
73 | int j;
74 | int t;
75 | int d;
76 |
77 |
78 |
79 | for (t = 1; t T-2){
117 | start = T-2;
118 | }
119 |
120 | /*
121 | * %The end of the analysis window, which is the current time
122 | * %step, unless the time has gone past T, the end of the record, in
123 | * %which case it is truncated to T. This allows the analysis
124 | * %window to extend past the end of the record, so that the
125 | * %timing durations of the states do not have to "end" at the end
126 | * %of the record.
127 | */
128 |
129 | end_t = t;
130 | if(end_t>T-1){
131 | end_t = T-1;
132 | }
133 |
134 |
135 | for(i = 0; i max_delta){
139 | max_delta = temp;
140 | max_index = i;
141 | }
142 | }
143 |
144 |
145 | /*//Find the normaliser for the observations at the start of the
146 | * //analysis window. The probability of seeing all the
147 | * //observations in the analysis window in state j is updated each
148 | * //time d is incrememented two lines below, so we only need to
149 | * //find the observation probabilities for one time step, each
150 | * //time d is updated:*/
151 |
152 |
153 | probs = 0;
154 | for(i2 = start; i2<=end_t; i2++){
155 |
156 | // Ensure that the probabilities aren't zero leading to -inf probabilities after log:
157 | if(observation_probs[i2 +j*T] == 0){
158 | observation_probs[i2 +j*T] = FLT_MIN;
159 | }
160 |
161 | probs = probs + log(observation_probs[i2 +j*T]);
162 | }
163 |
164 | if(probs ==0){
165 | probs = FLT_MIN;
166 | }
167 |
168 | emission_probs = (probs);
169 |
170 | /*Find the total probability of transitioning from the last
171 | * //state to this one, with the observations and being in the same
172 | * //state for the analysis window. This is the duration-dependant
173 | * //variation of equation 33a from Rabiner:*/
174 | delta_temp = max_delta + (emission_probs)+ (log((duration_probs[j][d-1]/duration_sum_in[j])));
175 |
176 |
177 |
178 | // Uncomment the below for debuggin:
179 | // mexPrintf("\n t:%d", t);
180 | // mexPrintf("\n j:%d", j);
181 | // mexPrintf("\n d:%d", d);
182 | // mexPrintf("\n max_delta:%f", max_delta);
183 | // mexPrintf("\n max_index:%i \n", max_index);
184 | // mexPrintf ("emission_probs: %f \n",emission_probs);
185 | // mexPrintf ("log((duration_probs[j][d-1]/duration_sum)): %f \n",log((duration_probs[j][d-1]/duration_sum_in[j])));
186 | // mexPrintf ("delta_temp: %f \n",delta_temp);
187 | // mexPrintf ("delta[t+j*(T+ max_duration_D-1)]: %f \n",delta[t+j*(T+ max_duration_D-1)]);
188 | // mexPrintf ("duration_probs[j][d]: %f \n",duration_probs[j][d]);
189 | // mexPrintf ("duration_sum_in[j]: %f \n",duration_sum_in[j]);
190 |
191 | /*
192 | * Unlike equation 33a from Rabiner, the maximum delta could come
193 | * from multiple d values, or from multiple size of the analysis
194 | * window. Therefore, only keep the maximum delta value over the
195 | * entire analysis window:
196 | * If this probability is greater than the last greatest,
197 | * update the delta matrix and the time duration variable:
198 | */
199 |
200 | if(delta_temp>delta[t+j*(T+ max_duration_D-1)]){
201 |
202 | delta[t+j*(T+ max_duration_D-1)] = delta_temp;
203 | psi[t+j*(T+ max_duration_D-1)] = max_index+1;
204 |
205 | psi_duration_out[t + j*(T+ max_duration_D-1)] = d;
206 |
207 | }
208 | }
209 | }
210 | }
211 |
212 | }
213 |
214 | /* Now, define the gateway function, i.e., mexFunction.Below
215 | * is the standard, predeclared header to mexFunction. nlhs
216 | * and nrhs are the number of left-hand and right-hand side
217 | * arguments that mexample was called with from within MATLAB.
218 | * In this example, nlhs equals 1 and nrhs should equal 2. If
219 | * not, then the user has called mexample the wrong way and
220 | * should be informed of this. plhs and prhs are arrays which
221 | * contain the pointers to the MATLAB arrays, which are
222 | * stored in a C struct called an Array. prhs is an array of
223 | * length rhs,and its pointers point to valid input data.
224 | * plhs is an array of length nlhs, and its pointers point to
225 | * invalid data (i.e., garbage). It is the job of mexFunction
226 | * to fill plhs with valid data.
227 | *
228 | * First, define the following values. This makes it much
229 | * easier to change the order of inputs to mexample, should we
230 | * want to change the function later. In addition, it makes
231 | * the code easier to read. */
232 |
233 | #define N prhs[0]
234 | #define T prhs[1]
235 | #define a_matrix prhs[2]
236 | #define max_duration_D prhs[3]
237 | #define delta prhs[4]
238 | #define observation_probs prhs[5]
239 | #define duration_probs prhs[6]
240 | #define psi prhs[7]
241 | #define duration_sum prhs[8]
242 |
243 |
244 | #define delta_out plhs[0]
245 | #define psi_out plhs[1]
246 | #define psi_duration plhs[2]
247 |
248 |
249 | void mexFunction(
250 | int nlhs,
251 | mxArray *plhs[],
252 | int nrhs,
253 | const mxArray *prhs[]
254 | )
255 | {
256 | double a_matrix_in[4][4];/* 2 dimensional C array to pass to workFcn() */
257 | double *delta_in_matrix;/* 2 dimensional C array to pass to workFcn() */
258 | double *observation_probs_matrix;/* 2 dimensional C array to pass to workFcn() */
259 | double *psi_matrix;/* 2 dimensional C array to pass to workFcn() */
260 | double duration_sum_in[4];/* 2 dimensional C array to pass to workFcn() */
261 |
262 | double duration_probs_matrix[4][150];/* 2 dimensional C array to pass to workFcn() */
263 |
264 | int actual_T;
265 | int fake_T_extended;
266 | int actual_N;
267 | int max_duration_D_val;
268 |
269 | int row,col; /* loop indices */
270 | int m,n; /* temporary array size holders */
271 |
272 | /* Step 1: Error Checking Step 1a: is nlhs 1? If not,
273 | * generate an error message and exit mexample (mexErrMsgTxt
274 | * does this for us!) */
275 | if (nlhs!=3)
276 | mexErrMsgTxt("mexample requires 3 output argument.");
277 |
278 | /* Step 1b: is nrhs 2? */
279 | if (nrhs!=9)
280 | mexErrMsgTxt("mexample requires 9 input arguments");
281 |
282 |
283 | actual_T = mxGetM(observation_probs);
284 | actual_N = mxGetN(observation_probs);
285 |
286 | max_duration_D_val = mxGetScalar(max_duration_D);
287 |
288 |
289 | /* Step 2: Allocate memory for return argument(s) */
290 | delta_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
291 | psi_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
292 | psi_duration = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL);
293 |
294 | /* Step 3: Convert ARRAY_IN to a 2x2 C array
295 | * MATLAB stores a two-dimensional matrix in memory as a one-
296 | * dimensional array. If the matrix is size MxN, then the
297 | * first M elements of the one-dimensional array correspond to
298 | * the first column of the matrix, and the next M elements
299 | * correspond to the second column, etc. The following loop
300 | * converts from MATLAB format to C format: */
301 |
302 | for (col=0; col < mxGetN(a_matrix); col++){
303 | for (row=0; row < mxGetM(a_matrix); row++){
304 | a_matrix_in[row][col] =(mxGetPr(a_matrix))[row+col*mxGetM(a_matrix)];
305 | }
306 | }
307 |
308 | for (col=0; col < mxGetM(duration_sum); col++){
309 | duration_sum_in[col] =(mxGetPr(duration_sum))[col];
310 | }
311 |
312 |
313 |
314 |
315 | delta_in_matrix = mxGetPr(delta);
316 | observation_probs_matrix = mxGetPr(observation_probs);
317 | psi_matrix = mxGetPr(psi);
318 |
319 | /* for (col=0; col < mxGetN(delta); col++){
320 | * // for (row=0; row < mxGetM(delta); row++){
321 | * //
322 | * //
323 | * // observation_probs_matrix[row][col] =(mxGetPr(observation_probs))[row+col*mxGetM(observation_probs)];
324 | * // psi_matrix[row][col] =(mxGetPr(psi))[row+col*mxGetM(psi)];
325 | * // }
326 | * // }*/
327 |
328 |
329 | for (col=0; col < mxGetN(duration_probs); col++){
330 | for (row=0; row < mxGetM(duration_probs); row++){
331 | duration_probs_matrix[row][col] =(mxGetPr(duration_probs))[row+col*mxGetM(duration_probs)];
332 | }
333 | }
334 |
335 |
336 |
337 |
338 | /* mxGetPr returns a pointer to the real part of the array
339 | * ARRAY_IN. In the line above, it is treated as the one-
340 | * dimensional array mentioned in the previous comment. */
341 |
342 | /* Step 4: Call workFcn function */
343 | viterbi(actual_N,actual_T,a_matrix_in,max_duration_D_val,delta_in_matrix,observation_probs_matrix,duration_probs_matrix,psi_matrix,mxGetPr(psi_duration),duration_sum_in);
344 | memcpy ( mxGetPr(delta_out), delta_in_matrix, actual_N*(actual_T+max_duration_D_val-1)*8);
345 | memcpy ( mxGetPr(psi_out), psi_matrix, actual_N*(actual_T+max_duration_D_val-1)*8);
346 |
347 | }
--------------------------------------------------------------------------------