├── example_data.mat ├── README.md ├── normalise_signal.m ├── default_Springer_HSMM_options.m ├── Hilbert_Envelope.m ├── run_Example_Springer_Script.m ├── expand_qt.m ├── get_PSD_feature_Springer_HMM.m ├── getDWT.m ├── runSpringerSegmentationAlgorithm.m ├── butterworth_high_pass_filter.m ├── butterworth_low_pass_filter.m ├── Homomorphic_Envelope_with_Hilbert.m ├── getHeartRateSchmidt.m ├── trainSpringerSegmentationAlgorithm.m ├── get_duration_distributions.m ├── getSpringerPCGFeatures.m ├── schmidt_spike_removal.m ├── trainBandPiMatricesSpringer.m ├── labelPCGStates.m ├── viterbi_Springer.c └── viterbiDecodePCG_Springer.m /example_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidspringer/Springer-Segmentation-Code/HEAD/example_data.mat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Springer-Segmentation-Code 2 | Heart sound segmentation code based on duration-dependant HMM 3 | 4 | This is Matlab code to run the heart sound segmentation algorithm as outlined in the publication: 5 | 6 | D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 7 | Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 8 | 9 | The code includes the feature extraction, training of the duration-dependant HMM, 10 | and the decoding of the most likely sequence of states using an extended Viterbi algorithm. 11 | 12 | An example of the code at work can be seen in "run_Example_Springer_Script.m". 13 | 14 | Copyright (C) 2016 David Springer 15 | dave.springer@gmail.com 16 | 17 | This program is free software: you can redistribute it and/or modify 18 | it under the terms of the GNU General Public License as published by 19 | the Free Software Foundation, either version 3 of the License, or 20 | any later version. 21 | 22 | This program is distributed in the hope that it will be useful, 23 | but WITHOUT ANY WARRANTY; without even the implied warranty of 24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 | GNU General Public License for more details. 26 | 27 | You should have received a copy of the GNU General Public License 28 | along with this program. If not, see . 29 | 30 | -------------------------------------------------------------------------------- /normalise_signal.m: -------------------------------------------------------------------------------- 1 | % function [normalised_signal] = normalise_signal(signal) 2 | % 3 | % This function subtracts the mean and divides by the standard deviation of 4 | % a (1D) signal in order to normalise it for machine learning applications. 5 | % 6 | %% Inputs: 7 | % signal: the original signal 8 | % 9 | %% Outputs: 10 | % normalised_signal: the original signal, minus the mean and divided by 11 | % the standard deviation. 12 | % 13 | % Developed by David Springer for the paper: 14 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 15 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015. 16 | % 17 | %% Copyright (C) 2016 David Springer 18 | % dave.springer@gmail.com 19 | % 20 | % This program is free software: you can redistribute it and/or modify 21 | % it under the terms of the GNU General Public License as published by 22 | % the Free Software Foundation, either version 3 of the License, or 23 | % any later version. 24 | % 25 | % This program is distributed in the hope that it will be useful, 26 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 27 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28 | % GNU General Public License for more details. 29 | % 30 | % You should have received a copy of the GNU General Public License 31 | % along with this program. If not, see . 32 | 33 | function [normalised_signal] = normalise_signal(signal) 34 | 35 | mean_of_signal = mean(signal); 36 | 37 | standard_deviation = std(signal); 38 | 39 | normalised_signal = (signal - mean_of_signal)./standard_deviation; 40 | 41 | -------------------------------------------------------------------------------- /default_Springer_HSMM_options.m: -------------------------------------------------------------------------------- 1 | % function springer_options = default_Springer_HSMM_options() 2 | % 3 | % The default options to be used with the Springer segmentation algorithm. 4 | % USAGE: springer_options = default_Springer_HSMM_options 5 | % 6 | % Developed for use in the paper: 7 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 8 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 9 | % 10 | %% Copyright (C) 2016 David Springer 11 | % dave.springer@gmail.com 12 | % 13 | % This program is free software: you can redistribute it and/or modify 14 | % it under the terms of the GNU General Public License as published by 15 | % the Free Software Foundation, either version 3 of the License, or 16 | % any later version. 17 | % 18 | % This program is distributed in the hope that it will be useful, 19 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | % GNU General Public License for more details. 22 | % 23 | % You should have received a copy of the GNU General Public License 24 | % along with this program. If not, see . 25 | 26 | function springer_options = default_Springer_HSMM_options() 27 | 28 | %% The sampling frequency at which to extract signal features: 29 | springer_options.audio_Fs = 1000; 30 | 31 | %% The downsampled frequency 32 | %Set to 50 in Springer paper 33 | springer_options.audio_segmentation_Fs = 50; 34 | 35 | 36 | %% Tolerance for S1 and S2 localization 37 | springer_options.segmentation_tolerance = 0.1;%seconds 38 | 39 | %% Whether to use the mex code or not: 40 | % The mex code currently has a bug. This will be fixed asap. 41 | springer_options.use_mex = false; 42 | 43 | %% Whether to use the wavelet function or not: 44 | springer_options.include_wavelet_feature = false; 45 | 46 | -------------------------------------------------------------------------------- /Hilbert_Envelope.m: -------------------------------------------------------------------------------- 1 | % function [hilbert_envelope] = Hilbert_Envelope(input_signal, sampling_frequency,figures) 2 | % 3 | % This function finds the Hilbert envelope of a signal. This is taken from: 4 | % 5 | % Choi et al, Comparison of envelope extraction algorithms for cardiac sound 6 | % signal segmentation, Expert Systems with Applications, 2008 7 | % 8 | %% Inputs: 9 | % input_signal: the original signal 10 | % samplingFrequency: the signal's sampling frequency 11 | % figures: (optional) boolean variable to display a figure of both the 12 | % original and normalised signal 13 | % 14 | %% Outputs: 15 | % hilbert_envelope is the hilbert envelope of the original signal 16 | % 17 | % This code was developed by David Springer for comparison purposes in the 18 | % paper: 19 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 20 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 21 | % 22 | %% Copyright (C) 2016 David Springer 23 | % dave.springer@gmail.com 24 | % 25 | % This program is free software: you can redistribute it and/or modify 26 | % it under the terms of the GNU General Public License as published by 27 | % the Free Software Foundation, either version 3 of the License, or 28 | % any later version. 29 | % 30 | % This program is distributed in the hope that it will be useful, 31 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 32 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 33 | % GNU General Public License for more details. 34 | % 35 | % You should have received a copy of the GNU General Public License 36 | % along with this program. If not, see . 37 | 38 | function hilbert_envelope = Hilbert_Envelope(input_signal, sampling_frequency,figures) 39 | 40 | if nargin <3, 41 | figures = 0; 42 | end 43 | 44 | 45 | hilbert_envelope = abs(hilbert(input_signal)); %find the envelope of the signal using the Hilbert transform 46 | 47 | if(figures) 48 | figure('Name', 'Hilbert Envelope'); 49 | plot(input_signal'); 50 | hold on; 51 | plot(hilbert_envelope,'r'); 52 | legend('Original Signal','Hilbert Envelope'); 53 | pause(); 54 | end -------------------------------------------------------------------------------- /run_Example_Springer_Script.m: -------------------------------------------------------------------------------- 1 | %% Example Springer script 2 | % A script to demonstrate the use of the Springer segmentation algorithm 3 | 4 | %% Copyright (C) 2016 David Springer 5 | % dave.springer@gmail.com 6 | % 7 | % This program is free software: you can redistribute it and/or modify 8 | % it under the terms of the GNU General Public License as published by 9 | % the Free Software Foundation, either version 3 of the License, or 10 | % any later version. 11 | % 12 | % This program is distributed in the hope that it will be useful, 13 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | % GNU General Public License for more details. 16 | % 17 | % You should have received a copy of the GNU General Public License 18 | % along with this program. If not, see . 19 | 20 | %% 21 | close all; 22 | clear all; 23 | 24 | %% Load the default options: 25 | % These options control options such as the original sampling frequency of 26 | % the data, the sampling frequency for the derived features and whether the 27 | % mex code should be used for the Viterbi decoding: 28 | springer_options = default_Springer_HSMM_options; 29 | 30 | %% Load the audio data and the annotations: 31 | % These are 6 example PCG recordings, downsampled to 1000 Hz, with 32 | % annotations of the R-peak and end-T-wave positions. 33 | load('example_data.mat'); 34 | 35 | %% Split the data into train and test sets: 36 | % Select the first 5 recordings for training and the sixth for testing: 37 | train_recordings = example_data.example_audio_data([1:5]); 38 | train_annotations = example_data.example_annotations([1:5],:); 39 | 40 | test_recordings = example_data.example_audio_data(6); 41 | test_annotations = example_data.example_annotations(6,:); 42 | 43 | 44 | %% Train the HMM: 45 | [B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(train_recordings,train_annotations,springer_options.audio_Fs, false); 46 | 47 | %% Run the HMM on an unseen test recording: 48 | % And display the resulting segmentation 49 | numPCGs = length(test_recordings); 50 | 51 | for PCGi = 1:numPCGs 52 | [assigned_states] = runSpringerSegmentationAlgorithm(test_recordings{PCGi}, springer_options.audio_Fs, B_matrix, pi_vector, total_obs_distribution, true); 53 | end 54 | 55 | -------------------------------------------------------------------------------- /expand_qt.m: -------------------------------------------------------------------------------- 1 | % function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length) 2 | % 3 | % Function to expand the derived HMM states to a higher sampling frequency. 4 | % 5 | % Developed by David Springer for comparison purposes in the paper: 6 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 7 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 8 | % 9 | %% INPUTS: 10 | % original_qt: the original derived states from the HMM 11 | % old_fs: the old sampling frequency of the original_qt 12 | % new_fs: the desired sampling frequency 13 | % new_length: the desired length of the qt signal 14 | 15 | %% Outputs: 16 | % expanded_qt: the expanded qt, to the new FS and length 17 | % 18 | %% Copyright (C) 2016 David Springer 19 | % dave.springer@gmail.com 20 | % 21 | % This program is free software: you can redistribute it and/or modify 22 | % it under the terms of the GNU General Public License as published by 23 | % the Free Software Foundation, either version 3 of the License, or 24 | % any later version. 25 | % 26 | % This program is distributed in the hope that it will be useful, 27 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 28 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 | % GNU General Public License for more details. 30 | % 31 | % You should have received a copy of the GNU General Public License 32 | % along with this program. If not, see . 33 | 34 | function expanded_qt = expand_qt(original_qt, old_fs, new_fs, new_length) 35 | 36 | original_qt = original_qt(:)'; 37 | expanded_qt = zeros(new_length,1); 38 | 39 | indeces_of_changes = find(diff(original_qt)); 40 | 41 | indeces_of_changes = [indeces_of_changes, length(original_qt)]; 42 | 43 | start_index = 0; 44 | for i = 1:length(indeces_of_changes) 45 | 46 | start_index; 47 | end_index = indeces_of_changes(i); 48 | 49 | mid_point = round((end_index - start_index)/2) + start_index; 50 | 51 | value_at_mid_point = original_qt(mid_point); 52 | 53 | expanded_start_index = round((start_index./old_fs).*new_fs) + 1; 54 | expanded_end_index = round((end_index./(old_fs)).*new_fs); 55 | 56 | if(expanded_end_index > new_length) 57 | expanded_end_index = new_length; 58 | end 59 | 60 | expanded_qt(expanded_start_index:expanded_end_index) = value_at_mid_point; 61 | 62 | start_index = end_index; 63 | end -------------------------------------------------------------------------------- /get_PSD_feature_Springer_HMM.m: -------------------------------------------------------------------------------- 1 | %cfunction [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures) 2 | % 3 | % PSD-based feature extraction for heart sound segmentation. 4 | % 5 | %% INPUTS: 6 | % data: this is the audio waveform 7 | % sampling_frequency is self-explanatory 8 | % frequency_limit_low is the lower-bound on the frequency range you want to 9 | % analyse 10 | % frequency_limit_high is the upper-bound on the frequency range 11 | % figures: (optional) boolean variable to display figures 12 | % 13 | %% OUTPUTS: 14 | % psd is the array of maximum PSD values between the max and min limits, 15 | % resampled to the same size as the original data. 16 | % 17 | % This code was developed by David Springer in the paper: 18 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 19 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 20 | % 21 | %% Copyright (C) 2016 David Springer 22 | % dave.springer@gmail.com 23 | % 24 | % This program is free software: you can redistribute it and/or modify 25 | % it under the terms of the GNU General Public License as published by 26 | % the Free Software Foundation, either version 3 of the License, or 27 | % any later version. 28 | % 29 | % This program is distributed in the hope that it will be useful, 30 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 31 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 32 | % GNU General Public License for more details. 33 | % 34 | % You should have received a copy of the GNU General Public License 35 | % along with this program. If not, see . 36 | 37 | function [psd] = get_PSD_feature_Springer_HMM(data, sampling_frequency, frequency_limit_low, frequency_limit_high, figures) 38 | 39 | if nargin < 5 40 | figures = 0; 41 | end 42 | 43 | % Find the spectrogram of the signal: 44 | [~,F,T,P] = spectrogram(data,sampling_frequency/40,round(sampling_frequency/80),1:1:round(sampling_frequency/2),sampling_frequency); 45 | 46 | if(figures) 47 | figure(); 48 | surf(T,F,10*log(P),'edgecolor','none'); axis tight; 49 | view(0,90); 50 | xlabel('Time (Seconds)'); ylabel('Hz'); 51 | pause(); 52 | end 53 | 54 | [~, low_limit_position] = min(abs(F - frequency_limit_low)); 55 | [~, high_limit_position] = min(abs(F - frequency_limit_high)); 56 | 57 | 58 | % Find the mean PSD over the frequency range of interest: 59 | psd = mean(P(low_limit_position:high_limit_position,:)); 60 | 61 | 62 | if(figures) 63 | t4 = (1:length(psd))./sampling_frequency; 64 | t3 = (1:length(data))./sampling_frequency; 65 | figure('Name', 'PSD Feature'); 66 | 67 | plot(t3,(data - mean(data))./std(data),'c'); 68 | hold on; 69 | 70 | plot(t4, (psd - mean(psd))./std(psd),'k'); 71 | 72 | pause(); 73 | end -------------------------------------------------------------------------------- /getDWT.m: -------------------------------------------------------------------------------- 1 | % function [cD cA] = getDWT(X,N,Name) 2 | % 3 | % finds the discrete wavelet transform at level N for signal X using the 4 | % wavelet specified by Name. 5 | % 6 | %% Inputs: 7 | % X: the original signal 8 | % N: the decomposition level 9 | % Name: the wavelet name to use 10 | % 11 | %% Outputs: 12 | % cD is a N-row matrix containing the detail coefficients up to N levels 13 | % cA is the same for the approximations 14 | 15 | % This code was developed by David Springer for comparison purposes in the 16 | % paper: 17 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 18 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 19 | % 20 | %% Copyright (C) 2016 David Springer 21 | % dave.springer@gmail.com 22 | % 23 | % This program is free software: you can redistribute it and/or modify 24 | % it under the terms of the GNU General Public License as published by 25 | % the Free Software Foundation, either version 3 of the License, or 26 | % any later version. 27 | % 28 | % This program is distributed in the hope that it will be useful, 29 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 30 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31 | % GNU General Public License for more details. 32 | % 33 | % You should have received a copy of the GNU General Public License 34 | % along with this program. If not, see . 35 | 36 | function [cD cA] = getDWT(X,N,Name) 37 | 38 | 39 | %No DWT available for Morlet - therefore perform CWT: 40 | if(strcmp(Name,'morl')) 41 | 42 | c = cwt(X,1:N,'morl'); 43 | 44 | cD = c; 45 | cA = c; 46 | else 47 | %Preform wavelet decomposition 48 | 49 | [c,l] = wavedec(X,N,Name); 50 | 51 | %Reorder the details based on the structure of the wavelet 52 | %decomposition (see help in wavedec.m) 53 | len = length(X); 54 | cD = zeros(N,len); 55 | for k = 1:N 56 | d = detcoef(c,l,k); 57 | d = d(:)'; 58 | d = d(ones(1,2^k),:); 59 | cD(k,:) = wkeep1(d(:)',len); 60 | end 61 | cD = cD(:); 62 | 63 | %Space cD according to spacing of floating point numbers: 64 | I = find(abs(cD). 40 | 41 | function assigned_states = runSpringerSegmentationAlgorithm(audio_data, Fs, B_matrix, pi_vector, total_observation_distribution, figures) 42 | 43 | %% Preliminary 44 | if(nargin < 6) 45 | figures = false; 46 | end 47 | 48 | %% Get PCG Features: 49 | 50 | [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs); 51 | 52 | %% Get PCG heart rate 53 | 54 | [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs); 55 | 56 | [~, ~, qt] = viterbiDecodePCG_Springer(PCG_Features, pi_vector, B_matrix, total_observation_distribution, heartRate, systolicTimeInterval, featuresFs); 57 | 58 | assigned_states = expand_qt(qt, featuresFs, Fs, length(audio_data)); 59 | 60 | if(figures) 61 | figure('Name','Derived state sequence'); 62 | t1 = (1:length(audio_data))./Fs; 63 | plot(t1,normalise_signal(audio_data),'k'); 64 | hold on; 65 | plot(t1,assigned_states,'r--'); 66 | xlabel('Time (s)'); 67 | legend('Audio data', 'Derived states'); 68 | end 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /butterworth_high_pass_filter.m: -------------------------------------------------------------------------------- 1 | % function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency) 2 | % 3 | % High-pass filter a given signal using a forward-backward, zero-phase 4 | % butterworth filter. 5 | % 6 | %% INPUTS: 7 | % original_signal: The 1D signal to be filtered 8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is 9 | % effectively doubled as this function uses a forward-backward filter that 10 | % ensures zero phase distortion 11 | % cutoff: The frequency cutoff for the high-pass filter (in Hz) 12 | % sampling_frequency: The sampling frequency of the signal being filtered 13 | % (in Hz). 14 | % figures (optional): boolean variable dictating the display of figures 15 | % 16 | %% OUTPUTS: 17 | % high_pass_filtered_signal: the high-pass filtered signal. 18 | % 19 | % This code is derived from the paper: 20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 22 | % no. 4, pp. 513-29, Apr. 2010. 23 | % 24 | % Developed by David Springer for comparison purposes in the paper: 25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015. 27 | % 28 | %% Copyright (C) 2016 David Springer 29 | % dave.springer@gmail.com 30 | % 31 | % This program is free software: you can redistribute it and/or modify 32 | % it under the terms of the GNU General Public License as published by 33 | % the Free Software Foundation, either version 3 of the License, or 34 | % any later version. 35 | % 36 | % This program is distributed in the hope that it will be useful, 37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 39 | % GNU General Public License for more details. 40 | % 41 | % You should have received a copy of the GNU General Public License 42 | % along with this program. If not, see . 43 | 44 | function high_pass_filtered_signal = butterworth_high_pass_filter(original_signal,order,cutoff,sampling_frequency, figures) 45 | 46 | if nargin < 5, 47 | figures = 0; 48 | end 49 | 50 | %Get the butterworth filter coefficients 51 | [B_high,A_high] = butter(order,2*cutoff/sampling_frequency,'high'); 52 | 53 | %Forward-backward filter the original signal using the butterworth 54 | %coefficients, ensuring zero phase distortion 55 | high_pass_filtered_signal = filtfilt(B_high,A_high,original_signal); 56 | 57 | if(figures) 58 | 59 | figure('Name','High-pass filter frequency response'); 60 | [sos,g] = zp2sos(B_high,A_high,1); % Convert to SOS form 61 | Hd = dfilt.df2tsos(sos,g); % Create a dfilt object 62 | h = fvtool(Hd); % Plot magnitude response 63 | set(h,'Analysis','freq') % Display frequency response 64 | 65 | figure('Name','Original vs. high-pass filtered signal'); 66 | plot(original_signal); 67 | hold on; 68 | plot(high_pass_filtered_signal,'r'); 69 | legend('Original Signal', 'High-pass filtered signal'); 70 | pause(); 71 | end 72 | 73 | -------------------------------------------------------------------------------- /butterworth_low_pass_filter.m: -------------------------------------------------------------------------------- 1 | % function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures) 2 | % 3 | % Low-pass filter a given signal using a forward-backward, zero-phase 4 | % butterworth low-pass filter. 5 | % 6 | %% INPUTS: 7 | % original_signal: The 1D signal to be filtered 8 | % order: The order of the filter (1,2,3,4 etc). NOTE: This order is 9 | % effectively doubled as this function uses a forward-backward filter that 10 | % ensures zero phase distortion 11 | % cutoff: The frequency cutoff for the low-pass filter (in Hz) 12 | % sampling_frequency: The sampling frequency of the signal being filtered 13 | % (in Hz). 14 | % figures (optional): boolean variable dictating the display of figures 15 | % 16 | %% OUTPUTS: 17 | % low_pass_filtered_signal: the low-pass filtered signal. 18 | % 19 | % This code is derived from the paper: 20 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 21 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 22 | % no. 4, pp. 513-29, Apr. 2010. 23 | % 24 | % Developed by David Springer for comparison purposes in the paper: 25 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 26 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015. 27 | % 28 | %% Copyright (C) 2016 David Springer 29 | % dave.springer@gmail.com 30 | % 31 | % This program is free software: you can redistribute it and/or modify 32 | % it under the terms of the GNU General Public License as published by 33 | % the Free Software Foundation, either version 3 of the License, or 34 | % any later version. 35 | % 36 | % This program is distributed in the hope that it will be useful, 37 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 38 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 39 | % GNU General Public License for more details. 40 | % 41 | % You should have received a copy of the GNU General Public License 42 | % along with this program. If not, see . 43 | 44 | function low_pass_filtered_signal = butterworth_low_pass_filter(original_signal,order,cutoff,sampling_frequency, figures) 45 | 46 | if nargin < 5, 47 | figures = 0; 48 | end 49 | 50 | %Get the butterworth filter coefficients 51 | [B_low,A_low] = butter(order,2*cutoff/sampling_frequency,'low'); 52 | 53 | if(figures) 54 | figure('Name','Low-pass filter frequency response'); 55 | [sos,g] = zp2sos(B_low,A_low,1); % Convert to SOS form 56 | Hd = dfilt.df2tsos(sos,g); % Create a dfilt object 57 | h = fvtool(Hd); % Plot magnitude response 58 | set(h,'Analysis','freq') % Display frequency response 59 | end 60 | 61 | 62 | %Forward-backward filter the original signal using the butterworth 63 | %coefficients, ensuring zero phase distortion 64 | low_pass_filtered_signal = filtfilt(B_low,A_low,original_signal); 65 | 66 | if(figures) 67 | figure('Name','Original vs. low-pass filtered signal'); 68 | plot(original_signal); 69 | hold on; 70 | plot(low_pass_filtered_signal,'r'); 71 | legend('Original Signal', 'Low-pass filtered signal'); 72 | pause(); 73 | end -------------------------------------------------------------------------------- /Homomorphic_Envelope_with_Hilbert.m: -------------------------------------------------------------------------------- 1 | % function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures) 2 | % 3 | % This function finds the homomorphic envelope of a signal, using the method 4 | % described in the following publications: 5 | % 6 | % S. E. Schmidt et al., ?Segmentation of heart sound recordings by a 7 | % duration-dependent hidden Markov model.,? Physiol. Meas., vol. 31, no. 4, 8 | % pp. 513?29, Apr. 2010. 9 | % 10 | % C. Gupta et al., ?Neural network classification of homomorphic segmented 11 | % heart sounds,? Appl. Soft Comput., vol. 7, no. 1, pp. 286?297, Jan. 2007. 12 | % 13 | % D. Gill et al., ?Detection and identification of heart sounds using 14 | % homomorphic envelogram and self-organizing probabilistic model,? in 15 | % Computers in Cardiology, 2005, pp. 957?960. 16 | % (However, these researchers found the homomorphic envelope of shannon 17 | % energy.) 18 | % 19 | % In I. Rezek and S. Roberts, ?Envelope Extraction via Complex Homomorphic 20 | % Filtering. Technical Report TR-98-9,? London, 1998, the researchers state 21 | % that the singularity at 0 when using the natural logarithm (resulting in 22 | % values of -inf) can be fixed by using a complex valued signal. They 23 | % motivate the use of the Hilbert transform to find the analytic signal, 24 | % which is a converstion of a real-valued signal to a complex-valued 25 | % signal, which is unaffected by the singularity. 26 | % 27 | % A zero-phase low-pass Butterworth filter is used to extract the envelope. 28 | %% Inputs: 29 | % input_signal: the original signal (1D) signal 30 | % samplingFrequency: the signal's sampling frequency (Hz) 31 | % lpf_frequency: the frequency cut-off of the low-pass filter to be used in 32 | % the envelope extraciton (Default = 8 Hz as in Schmidt's publication). 33 | % figures: (optional) boolean variable dictating the display of a figure of 34 | % both the original signal and the extracted envelope: 35 | % 36 | %% Outputs: 37 | % homomorphic_envelope: The homomorphic envelope of the original 38 | % signal (not normalised). 39 | % 40 | % This code was developed by David Springer for comparison purposes in the 41 | % paper: 42 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 43 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015. 44 | % 45 | %% Copyright (C) 2016 David Springer 46 | % dave.springer@gmail.com 47 | % 48 | % This program is free software: you can redistribute it and/or modify 49 | % it under the terms of the GNU General Public License as published by 50 | % the Free Software Foundation, either version 3 of the License, or 51 | % any later version. 52 | % 53 | % This program is distributed in the hope that it will be useful, 54 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 55 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 56 | % GNU General Public License for more details. 57 | % 58 | % You should have received a copy of the GNU General Public License 59 | % along with this program. If not, see . 60 | 61 | function homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(input_signal, sampling_frequency,lpf_frequency,figures) 62 | 63 | if nargin <4, 64 | figures = 0; 65 | end 66 | if nargin <3, 67 | figures = 0; 68 | lpf_frequency = 8; 69 | end 70 | 71 | %8Hz, 1st order, Butterworth LPF 72 | [B_low,A_low] = butter(1,2*lpf_frequency/sampling_frequency,'low'); 73 | homomorphic_envelope = exp(filtfilt(B_low,A_low,log(abs(hilbert(input_signal))))); 74 | 75 | % Remove spurious spikes in first sample: 76 | homomorphic_envelope(1) = [homomorphic_envelope(2)]; 77 | 78 | if(figures) 79 | figure('Name', 'Homomorphic Envelope'); 80 | plot(input_signal); 81 | hold on; 82 | plot(homomorphic_envelope,'r'); 83 | legend('Original Signal','Homomorphic Envelope') 84 | end -------------------------------------------------------------------------------- /getHeartRateSchmidt.m: -------------------------------------------------------------------------------- 1 | % function [heartRate systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs, figures) 2 | % 3 | % Derive the heart rate and the sytolic time interval from a PCG recording. 4 | % This is used in the duration-dependant HMM-based segmentation of the PCG 5 | % recording. 6 | % 7 | % This method is based on analysis of the autocorrelation function, and the 8 | % positions of the peaks therein. 9 | % 10 | % This code is derived from the paper: 11 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 12 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 13 | % no. 4, pp. 513-29, Apr. 2010. 14 | % 15 | % Developed by David Springer for comparison purposes in the paper: 16 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 17 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 18 | % 19 | %% INPUTS: 20 | % audio_data: The raw audio data from the PCG recording 21 | % Fs: the sampling frequency of the audio recording 22 | % figures: optional boolean to display figures 23 | % 24 | %% OUTPUTS: 25 | % heartRate: the heart rate of the PCG in beats per minute 26 | % systolicTimeInterval: the duration of systole, as derived from the 27 | % autocorrelation function, in seconds 28 | % 29 | %% Copyright (C) 2016 David Springer 30 | % dave.springer@gmail.com 31 | % 32 | % This program is free software: you can redistribute it and/or modify 33 | % it under the terms of the GNU General Public License as published by 34 | % the Free Software Foundation, either version 3 of the License, or 35 | % any later version. 36 | % 37 | % This program is distributed in the hope that it will be useful, 38 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 39 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 40 | % GNU General Public License for more details. 41 | % 42 | % You should have received a copy of the GNU General Public License 43 | % along with this program. If not, see . 44 | 45 | function [heartRate, systolicTimeInterval] = getHeartRateSchmidt(audio_data, Fs, figures) 46 | 47 | if nargin < 3 48 | figures = false; 49 | end 50 | 51 | %% Get heatrate: 52 | % From Schmidt: 53 | % "The duration of the heart cycle is estimated as the time from lag zero 54 | % to the highest peaks between 500 and 2000 ms in the resulting 55 | % autocorrelation" 56 | % This is performed after filtering and spike removal: 57 | 58 | %% 25-400Hz 4th order Butterworth band pass 59 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false); 60 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs); 61 | 62 | %% Spike removal from the original paper: 63 | audio_data = schmidt_spike_removal(audio_data,Fs); 64 | 65 | %% Find the homomorphic envelope 66 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs); 67 | 68 | %% Find the autocorrelation: 69 | y=homomorphic_envelope-mean(homomorphic_envelope); 70 | [c] = xcorr(y,'coeff'); 71 | signal_autocorrelation = c(length(homomorphic_envelope)+1:end); 72 | 73 | min_index = 0.5*Fs; 74 | max_index = 2*Fs; 75 | 76 | [~, index] = max(signal_autocorrelation(min_index:max_index)); 77 | true_index = index+min_index-1; 78 | 79 | heartRate = 60/(true_index/Fs); 80 | 81 | 82 | %% Find the systolic time interval: 83 | % From Schmidt: "The systolic duration is defined as the time from lag zero 84 | % to the highest peak in the interval between 200 ms and half of the heart 85 | % cycle duration" 86 | 87 | 88 | max_sys_duration = round(((60/heartRate)*Fs)/2); 89 | min_sys_duration = round(0.2*Fs); 90 | 91 | [~, pos] = max(signal_autocorrelation(min_sys_duration:max_sys_duration)); 92 | systolicTimeInterval = (min_sys_duration+pos)/Fs; 93 | 94 | 95 | if(figures) 96 | figure('Name', 'Heart rate calculation figure'); 97 | plot(signal_autocorrelation); 98 | hold on; 99 | plot(true_index, signal_autocorrelation(true_index),'ro'); 100 | plot((min_sys_duration+pos), signal_autocorrelation((min_sys_duration+pos)), 'mo'); 101 | xlabel('Samples'); 102 | legend('Autocorrelation', 'Position of max peak used to calculate HR', 'Position of max peak within systolic interval'); 103 | end 104 | 105 | 106 | -------------------------------------------------------------------------------- /trainSpringerSegmentationAlgorithm.m: -------------------------------------------------------------------------------- 1 | % function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures) 2 | % 3 | % Training the Springer HMM segmentation algorithm. Developed for use in 4 | % the paper: 5 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 6 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 7 | % 8 | %% Inputs: 9 | % PCGCellArray: A 1XN cell array of the N audio signals. For evaluation 10 | % purposes, these signals should be from a distinct training set of 11 | % recordings, while the algorithm should be evaluated on a separate test 12 | % set of recordings, which are recorded from a completely different set of 13 | % patients (for example, if there are numerous recordings from each 14 | % patient). 15 | % annotationsArray: a Nx2 cell array: position (n,1) = the positions of the 16 | % R-peaks and postion (n,2) = the positions of the end-T-waves 17 | % (both in SAMPLES) 18 | % Fs: The sampling frequency of the PCG signals 19 | % figures (optional): boolean variable dictating the disaplay of figures. 20 | % 21 | %% Outputs: 22 | % logistic_regression_B_matrix: 23 | % pi_vector: 24 | % total_obs_distribution: 25 | % As Springer et al's algorithm is a duration dependant HMM, there is no 26 | % need to calculate the A_matrix, as the transition between states is only 27 | % dependant on the state durations. 28 | % 29 | %% Copyright (C) 2016 David Springer 30 | % dave.springer@gmail.com 31 | % 32 | % This program is free software: you can redistribute it and/or modify 33 | % it under the terms of the GNU General Public License as published by 34 | % the Free Software Foundation, either version 3 of the License, or 35 | % any later version. 36 | % 37 | % This program is distributed in the hope that it will be useful, 38 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 39 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 40 | % GNU General Public License for more details. 41 | % 42 | % You should have received a copy of the GNU General Public License 43 | % along with this program. If not, see . 44 | 45 | 46 | function [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainSpringerSegmentationAlgorithm(PCGCellArray, annotationsArray, Fs, figures) 47 | 48 | %% Options 49 | 50 | if(nargin < 4) 51 | figures = false; 52 | end 53 | 54 | numberOfStates = 4; 55 | numPCGs = length(PCGCellArray); 56 | 57 | % A matrix of the values from each state in each of the PCG recordings: 58 | state_observation_values = cell(numPCGs,numberOfStates); 59 | 60 | 61 | for PCGi = 1:length(PCGCellArray) 62 | PCG_audio = PCGCellArray{PCGi}; 63 | 64 | S1_locations = annotationsArray{PCGi,1}; 65 | S2_locations = annotationsArray{PCGi,2}; 66 | 67 | [PCG_Features, featuresFs] = getSpringerPCGFeatures(PCG_audio, Fs); 68 | 69 | PCG_states = labelPCGStates(PCG_Features(:,1),S1_locations, S2_locations, featuresFs); 70 | 71 | 72 | %% Plotting assigned states: 73 | if(figures) 74 | figure('Name','Assigned states to PCG'); 75 | 76 | t1 = (1:length(PCG_audio))./Fs; 77 | t2 = (1:length(PCG_Features))./featuresFs; 78 | 79 | plot(t1, PCG_audio, 'k-'); 80 | hold on; 81 | plot(t2, PCG_Features, 'b-'); 82 | plot(t2, PCG_states, 'r-'); 83 | 84 | legend('Audio','Features','States'); 85 | pause(); 86 | end 87 | 88 | 89 | 90 | %% Group together all observations from the same state in the PCG recordings: 91 | for state_i = 1:numberOfStates 92 | state_observation_values{PCGi,state_i} = PCG_Features(PCG_states == state_i,:); 93 | end 94 | end 95 | 96 | % Save the state observation values to the main workspace of Matlab for 97 | % later investigation if needed: 98 | assignin('base', 'state_observation_values', state_observation_values) 99 | 100 | %% Train the B and pi matrices after all the PCG recordings have been labelled: 101 | [logistic_regression_B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values); 102 | 103 | -------------------------------------------------------------------------------- /get_duration_distributions.m: -------------------------------------------------------------------------------- 1 | % function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time) 2 | % 3 | % This function calculates the duration distributions for each heart cycle 4 | % state, and the minimum and maximum times for each state. 5 | % 6 | %% Inputs: 7 | % heartrate is the calculated average heart rate over the entire recording 8 | % systolic_time is the systolic time interval 9 | % 10 | %% Outputs: 11 | % d_distributions is a 4 (the number of states) dimensional vector of 12 | % gaussian mixture models (one dimensional in this case), representing the 13 | % mean and std deviation of the duration in each state. 14 | % 15 | % The max and min values are self-explanatory. 16 | % 17 | % This code is implemented as outlined in the paper: 18 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 19 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 20 | % no. 4, pp. 513-29, Apr. 2010. 21 | % 22 | % Developed by David Springer for comparison purposes in the paper: 23 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 24 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 25 | % 26 | %% Copyright (C) 2016 David Springer 27 | % dave.springer@gmail.com 28 | % 29 | % This program is free software: you can redistribute it and/or modify 30 | % it under the terms of the GNU General Public License as published by 31 | % the Free Software Foundation, either version 3 of the License, or 32 | % any later version. 33 | % 34 | % This program is distributed in the hope that it will be useful, 35 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 36 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 37 | % GNU General Public License for more details. 38 | % 39 | % You should have received a copy of the GNU General Public License 40 | % along with this program. If not, see . 41 | 42 | function [d_distributions max_S1 min_S1 max_S2 min_S2 max_systole min_systole max_diastole min_diastole] = get_duration_distributions(heartrate,systolic_time) 43 | 44 | springer_options = default_Springer_HSMM_options; 45 | 46 | 47 | 48 | mean_S1 = round(0.122*springer_options.audio_segmentation_Fs); 49 | std_S1 = round(0.022*springer_options.audio_segmentation_Fs); 50 | mean_S2 = round(0.094*springer_options.audio_segmentation_Fs); 51 | std_S2 = round(0.022*springer_options.audio_segmentation_Fs); 52 | 53 | 54 | mean_systole = round(systolic_time*springer_options.audio_segmentation_Fs) - mean_S1; 55 | std_systole = (25/1000)*springer_options.audio_segmentation_Fs; 56 | 57 | 58 | mean_diastole = ((60/heartrate) - systolic_time - 0.094)*springer_options.audio_segmentation_Fs; 59 | std_diastole = 0.07*mean_diastole + (6/1000)*springer_options.audio_segmentation_Fs; 60 | 61 | 62 | 63 | %% Cell array for the mean and covariance of the duration distributions: 64 | d_distributions = cell(4,2); 65 | 66 | %% Assign mean and covariance values to d_distributions: 67 | d_distributions{1,1} = mean_S1; 68 | d_distributions{1,2} = (std_S1)^2; 69 | 70 | d_distributions{2,1} = mean_systole; 71 | d_distributions{2,2} = (std_systole)^2; 72 | 73 | d_distributions{3,1} = mean_S2; 74 | d_distributions{3,2} = (std_S2)^2; 75 | 76 | d_distributions{4,1} = mean_diastole; 77 | d_distributions{4,2} = (std_diastole)^2; 78 | 79 | 80 | %Min systole and diastole times 81 | min_systole = mean_systole - 3*(std_systole+std_S1); 82 | max_systole = mean_systole + 3*(std_systole+std_S1); 83 | 84 | min_diastole = mean_diastole-3*std_diastole; 85 | max_diastole = mean_diastole + 3*std_diastole; 86 | 87 | 88 | 89 | %Setting the Min and Max values for the S1 and S2 sounds: 90 | %If the minimum lengths are less than a 50th of the sampling frequency, set 91 | %to a 50th of the sampling frequency: 92 | min_S1 = (mean_S1 - 3*(std_S1)); 93 | if(min_S1<(springer_options.audio_segmentation_Fs/50)) 94 | min_S1 = (springer_options.audio_segmentation_Fs/50); 95 | end 96 | 97 | min_S2 = (mean_S2 - 3*(std_S2)); 98 | if(min_S2<(springer_options.audio_segmentation_Fs/50)) 99 | min_S2 = (springer_options.audio_segmentation_Fs/50); 100 | end 101 | max_S1 = (mean_S1 + 3*(std_S1)); 102 | max_S2 = (mean_S2 + 3*(std_S2)); 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /getSpringerPCGFeatures.m: -------------------------------------------------------------------------------- 1 | % function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures) 2 | % 3 | % Get the features used in the Springer segmentation algorithm. These 4 | % features include: 5 | % -The homomorphic envelope (as performed in Schmidt et al's paper) 6 | % -The Hilbert envelope 7 | % -A wavelet-based feature 8 | % -A PSD-based feature 9 | % This function was developed for use in the paper: 10 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 11 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 12 | % 13 | %% INPUTS: 14 | % audio_data: array of data from which to extract features 15 | % Fs: the sampling frequency of the audio data 16 | % figures (optional): boolean variable dictating the display of figures 17 | % 18 | %% OUTPUTS: 19 | % PCG_Features: array of derived features 20 | % featuresFs: the sampling frequency of the derived features. This is set 21 | % in default_Springer_HSMM_options.m 22 | % 23 | %% Copyright (C) 2016 David Springer 24 | % dave.springer@gmail.com 25 | % 26 | % This program is free software: you can redistribute it and/or modify 27 | % it under the terms of the GNU General Public License as published by 28 | % the Free Software Foundation, either version 3 of the License, or 29 | % any later version. 30 | % 31 | % This program is distributed in the hope that it will be useful, 32 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 33 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 34 | % GNU General Public License for more details. 35 | % 36 | % You should have received a copy of the GNU General Public License 37 | % along with this program. If not, see . 38 | 39 | function [PCG_Features, featuresFs] = getSpringerPCGFeatures(audio_data, Fs, figures) 40 | % function PCG_Features = getSpringerPCGFeatures(audio, Fs) 41 | % Get the features used in the Springer segmentation algorithm. 42 | 43 | 44 | if(nargin < 3) 45 | figures = false; 46 | end 47 | 48 | springer_options = default_Springer_HSMM_options; 49 | 50 | 51 | % Check to see if the Wavelet toolbox is available on the machine: 52 | include_wavelet = springer_options.include_wavelet_feature; 53 | featuresFs = springer_options.audio_segmentation_Fs; % Downsampled feature sampling frequency 54 | 55 | %% 25-400Hz 4th order Butterworth band pass 56 | audio_data = butterworth_low_pass_filter(audio_data,2,400,Fs, false); 57 | audio_data = butterworth_high_pass_filter(audio_data,2,25,Fs); 58 | 59 | %% Spike removal from the original paper: 60 | audio_data = schmidt_spike_removal(audio_data,Fs); 61 | 62 | 63 | 64 | %% Find the homomorphic envelope 65 | homomorphic_envelope = Homomorphic_Envelope_with_Hilbert(audio_data, Fs); 66 | % Downsample the envelope: 67 | downsampled_homomorphic_envelope = resample(homomorphic_envelope,featuresFs, Fs); 68 | % normalise the envelope: 69 | downsampled_homomorphic_envelope = normalise_signal(downsampled_homomorphic_envelope); 70 | 71 | 72 | %% Hilbert Envelope 73 | hilbert_envelope = Hilbert_Envelope(audio_data, Fs); 74 | downsampled_hilbert_envelope = resample(hilbert_envelope, featuresFs, Fs); 75 | downsampled_hilbert_envelope = normalise_signal(downsampled_hilbert_envelope); 76 | 77 | %% Power spectral density feature: 78 | 79 | psd = get_PSD_feature_Springer_HMM(audio_data, Fs, 40,60)'; 80 | psd = resample(psd, length(downsampled_homomorphic_envelope), length(psd)); 81 | psd = normalise_signal(psd); 82 | 83 | %% Wavelet features: 84 | 85 | if(include_wavelet) 86 | wavelet_level = 3; 87 | wavelet_name ='rbio3.9'; 88 | 89 | % Audio needs to be longer than 1 second for getDWT to work: 90 | if(length(audio_data)< Fs*1.025) 91 | audio_data = [audio_data; zeros(round(0.025*Fs),1)]; 92 | end 93 | 94 | [cD, cA] = getDWT(audio_data,wavelet_level,wavelet_name); 95 | 96 | wavelet_feature = abs(cD(wavelet_level,:)); 97 | wavelet_feature = wavelet_feature(1:length(homomorphic_envelope)); 98 | downsampled_wavelet = resample(wavelet_feature, featuresFs, Fs); 99 | downsampled_wavelet = normalise_signal(downsampled_wavelet)'; 100 | end 101 | 102 | %% 103 | 104 | if(include_wavelet) 105 | PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd, downsampled_wavelet]; 106 | else 107 | PCG_Features = [downsampled_homomorphic_envelope, downsampled_hilbert_envelope, psd]; 108 | end 109 | 110 | %% Plotting figures 111 | if(figures) 112 | figure('Name', 'PCG features'); 113 | t1 = (1:length(audio_data))./Fs; 114 | plot(t1,audio_data); 115 | hold on; 116 | t2 = (1:length(PCG_Features))./featuresFs; 117 | plot(t2,PCG_Features); 118 | pause(); 119 | end -------------------------------------------------------------------------------- /schmidt_spike_removal.m: -------------------------------------------------------------------------------- 1 | % function [despiked_signal] = schmidt_spike_removal(original_signal, fs) 2 | % 3 | % This function removes the spikes in a signal as done by Schmidt et al in 4 | % the paper: 5 | % Schmidt, S. E., Holst-Hansen, C., Graff, C., Toft, E., & Struijk, J. J. 6 | % (2010). Segmentation of heart sound recordings by a duration-dependent 7 | % hidden Markov model. Physiological Measurement, 31(4), 513-29. 8 | % 9 | % The spike removal process works as follows: 10 | % (1) The recording is divided into 500 ms windows. 11 | % (2) The maximum absolute amplitude (MAA) in each window is found. 12 | % (3) If at least one MAA exceeds three times the median value of the MAA's, 13 | % the following steps were carried out. If not continue to point 4. 14 | % (a) The window with the highest MAA was chosen. 15 | % (b) In the chosen window, the location of the MAA point was identified as the top of the noise spike. 16 | % (c) The beginning of the noise spike was defined as the last zero-crossing point before theMAA point. 17 | % (d) The end of the spike was defined as the first zero-crossing point after the maximum point. 18 | % (e) The defined noise spike was replaced by zeroes. 19 | % (f) Resume at step 2. 20 | % (4) Procedure completed. 21 | % 22 | %% Inputs: 23 | % original_signal: The original (1D) audio signal array 24 | % fs: the sampling frequency (Hz) 25 | % 26 | %% Outputs: 27 | % despiked_signal: the audio signal with any spikes removed. 28 | % 29 | % This code is derived from the paper: 30 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 31 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 32 | % no. 4, pp. 513-29, Apr. 2010. 33 | % 34 | % Developed by David Springer for comparison purposes in the paper: 35 | % D. Springer et al., ?Logistic Regression-HSMM-based Heart Sound 36 | % Segmentation,? IEEE Trans. Biomed. Eng., In Press, 2015. 37 | % 38 | %% Copyright (C) 2016 David Springer 39 | % dave.springer@gmail.com 40 | % 41 | % This program is free software: you can redistribute it and/or modify 42 | % it under the terms of the GNU General Public License as published by 43 | % the Free Software Foundation, either version 3 of the License, or 44 | % any later version. 45 | % 46 | % This program is distributed in the hope that it will be useful, 47 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 48 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 | % GNU General Public License for more details. 50 | % 51 | % You should have received a copy of the GNU General Public License 52 | % along with this program. If not, see . 53 | 54 | 55 | function [despiked_signal] = schmidt_spike_removal(original_signal, fs) 56 | 57 | %% Find the window size 58 | % (500 ms) 59 | windowsize = round(fs/2); 60 | 61 | %% Find any samples outside of a integer number of windows: 62 | trailingsamples = mod(length(original_signal), windowsize); 63 | 64 | %% Reshape the signal into a number of windows: 65 | sampleframes = reshape( original_signal(1:end-trailingsamples), windowsize, []); 66 | 67 | %% Find the MAAs: 68 | MAAs = max(abs(sampleframes)); 69 | 70 | 71 | % While there are still samples greater than 3* the median value of the 72 | % MAAs, then remove those spikes: 73 | while(~isempty(find((MAAs>median(MAAs)*3)))) 74 | 75 | %Find the window with the max MAA: 76 | [val window_num] = max(MAAs); 77 | if(numel(window_num)>1) 78 | window_num = window_num(1); 79 | end 80 | 81 | %Find the postion of the spike within that window: 82 | [val spike_position] = max(abs(sampleframes(:,window_num))); 83 | 84 | if(numel(spike_position)>1) 85 | spike_position = spike_position(1); 86 | end 87 | 88 | 89 | % Finding zero crossings (where there may not be actual 0 values, just a change from positive to negative): 90 | zero_crossings = [abs(diff(sign(sampleframes(:,window_num))))>1; 0]; 91 | 92 | %Find the start of the spike, finding the last zero crossing before 93 | %spike position. If that is empty, take the start of the window: 94 | spike_start = max([1 find(zero_crossings(1:spike_position),1,'last')]); 95 | 96 | %Find the end of the spike, finding the first zero crossing after 97 | %spike position. If that is empty, take the end of the window: 98 | zero_crossings(1:spike_position) = 0; 99 | spike_end = min([(find(zero_crossings,1,'first')) windowsize]); 100 | 101 | %Set to Zero 102 | sampleframes(spike_start:spike_end,window_num) = 0.0001; 103 | 104 | %Recaclulate MAAs 105 | MAAs = max(abs(sampleframes)); 106 | end 107 | 108 | despiked_signal = reshape(sampleframes, [],1); 109 | 110 | % Add the trailing samples back to the signal: 111 | despiked_signal = [despiked_signal; original_signal(length(despiked_signal)+1:end)]; 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /trainBandPiMatricesSpringer.m: -------------------------------------------------------------------------------- 1 | % function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values) 2 | % 3 | % Train the B matrix and pi vector for the Springer HMM. 4 | % The pi vector is the initial state probability, while the B matrix are 5 | % the observation probabilities. In the case of Springer's algorith, the 6 | % observation probabilities are based on a logistic regression-based 7 | % probabilities. 8 | % 9 | %% Inputs: 10 | % state_observation_values: an Nx4 cell array of observation values from 11 | % each of N PCG signals for each (of 4) state. Within each cell is a KxJ 12 | % double array, where K is the number of samples from that state in the PCG 13 | % and J is the number of feature vectors extracted from the PCG. 14 | % 15 | %% Outputs: 16 | % The B_matrix and pi arrays for an HMM - as Springer et al's algorithm is a 17 | % duration dependant HMM, there is no need to calculate the A_matrix, as 18 | % the transition between states is only dependant on the state durations. 19 | % total_obs_distribution: 20 | % 21 | % Developed by David Springer for the paper: 22 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 23 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 24 | % 25 | %% Copyright (C) 2016 David Springer 26 | % dave.springer@gmail.com 27 | % 28 | % This program is free software: you can redistribute it and/or modify 29 | % it under the terms of the GNU General Public License as published by 30 | % the Free Software Foundation, either version 3 of the License, or 31 | % any later version. 32 | % 33 | % This program is distributed in the hope that it will be useful, 34 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 35 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 36 | % GNU General Public License for more details. 37 | % 38 | % You should have received a copy of the GNU General Public License 39 | % along with this program. If not, see . 40 | 41 | function [B_matrix, pi_vector, total_obs_distribution] = trainBandPiMatricesSpringer(state_observation_values) 42 | 43 | %% Prelim 44 | 45 | number_of_states = 4; 46 | 47 | %% Set pi_vector 48 | % The true value of the pi vector, which are the initial state 49 | % probabilities, are dependant on the heart rate of each PCG, and the 50 | % individual sound duration for each patient. Therefore, instead of setting 51 | % a patient-dependant pi_vector, simplify by setting all states as equally 52 | % probable: 53 | 54 | pi_vector = [0.25,0.25,0.25,0.25]; 55 | 56 | %% Train the logistic regression-based B_matrix: 57 | 58 | 59 | % Initialise the B_matrix as a 1x4 cell array. This is to hold the 60 | % coefficients of the trained logisitic regression model for each state. 61 | B_matrix = cell(1,number_of_states); 62 | 63 | statei_values = cell(number_of_states,1); 64 | 65 | for PCGi = 1: length(state_observation_values) 66 | 67 | statei_values{1} = vertcat(statei_values{1},state_observation_values{PCGi,1}); 68 | statei_values{2} = vertcat(statei_values{2},state_observation_values{PCGi,2}); 69 | statei_values{3} = vertcat(statei_values{3},state_observation_values{PCGi,3}); 70 | statei_values{4} = vertcat(statei_values{4},state_observation_values{PCGi,4}); 71 | 72 | end 73 | 74 | 75 | % In order to use Bayes' formula with the logistic regression derived 76 | % probabilities, we need to get the probability of seeing a specific 77 | % observation in the total training data set. This is the 78 | % 'total_observation_sequence', and the mean and covariance for each state 79 | % is found: 80 | 81 | total_observation_sequence = vertcat(statei_values{1}, statei_values{2}, statei_values{3}, statei_values{4}); 82 | total_obs_distribution = cell(2,1); 83 | total_obs_distribution{1} = mean(total_observation_sequence); 84 | total_obs_distribution{2} = cov(total_observation_sequence); 85 | 86 | 87 | for state = 1: number_of_states 88 | 89 | % Randomly select indices of samples from the other states not being 90 | % learnt, in order to balance the two data sets. The code below ensures 91 | % that if class 1 is being learnt vs the rest, the number of the rest = 92 | % the number of class 1, evenly split across all other classes 93 | length_of_state_samples = length(statei_values{state}); 94 | 95 | % Number of samples required from each of the other states: 96 | length_per_other_state = floor(length_of_state_samples/(number_of_states-1)); 97 | 98 | 99 | %If the length of the main class / (num states - 1) > 100 | %length(shortest other class), then only select 101 | %length(shortect other class) from the other states, 102 | %and (3* length) for main class 103 | min_length_other_class = inf; 104 | 105 | for other_state = 1: number_of_states 106 | samples_in_other_state = length(statei_values{other_state}); 107 | 108 | if(other_state == state) 109 | else 110 | min_length_other_class = min([min_length_other_class, samples_in_other_state]); 111 | end 112 | end 113 | 114 | %This means there aren't enough samples in one of the 115 | %states to match the length of the main class being 116 | %trained: 117 | if( length_per_other_state > min_length_other_class) 118 | length_per_other_state = min_length_other_class; 119 | end 120 | 121 | training_data = cell(2,1); 122 | 123 | for other_state = 1: number_of_states 124 | samples_in_other_state = length(statei_values{other_state}); 125 | 126 | if(other_state == state) 127 | %Make sure you only choose (n-1)*3 * 128 | %length_per_other_state samples for the main 129 | %state, to ensure that the sets are balanced: 130 | indices = randperm(samples_in_other_state,length_per_other_state*(number_of_states-1)); 131 | training_data{1} = statei_values{other_state}(indices,:); 132 | else 133 | 134 | indices = randperm(samples_in_other_state,length_per_other_state); 135 | state_data = statei_values{other_state}(indices,:); 136 | training_data{2} = vertcat(training_data{2}, state_data); 137 | 138 | end 139 | end 140 | 141 | % Label all the data: 142 | labels = ones(length(training_data{1}) + length(training_data{2}),1); 143 | labels(1:length(training_data{1})) = 2; 144 | 145 | % Train the logisitic regression model for this state: 146 | all_data = [training_data{1};training_data{2}]; 147 | [B,~,~] = mnrfit(all_data,labels); 148 | B_matrix{state} = B; 149 | end 150 | 151 | -------------------------------------------------------------------------------- /labelPCGStates.m: -------------------------------------------------------------------------------- 1 | % function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures) 2 | % 3 | % This function assigns the state labels to a PCG record. 4 | % This is based on ECG markers, dervied from the R peak and end-T wave locations. 5 | % 6 | %% Inputs: 7 | % envelope: The PCG recording envelope (found in getSchmidtPCGFeatures.m) 8 | % s1_positions: The locations of the R peaks (in samples) 9 | % s2_positions: The locations of the end-T waves (in samples) 10 | % samplingFrequency: The sampling frequency of the PCG recording 11 | % figures (optional): boolean variable dictating the display of figures 12 | % 13 | %% Output: 14 | % states: An array of the state label for each sample in the feature 15 | % vector. The total number of states is 4. Therefore, this is an array of 16 | % values between 1 and 4, such as: [1,1,1,1,2,2,2,3,3,3,3,4,4,4,4,4,1,1,1], 17 | % illustrating the "true" state label for each sample in the features. 18 | % State 1 = S1 sound 19 | % State 2 = systole 20 | % State 3 = S2 sound 21 | % State 4 = diastole 22 | % 23 | % This code was developed by David Springer for comparison purposes in the 24 | % paper: 25 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 26 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 27 | % where a novel segmentation approach is compared to the paper by Schmidt 28 | % et al: 29 | % S. E. Schmidt et al., "Segmentation of heart sound recordings by a 30 | % duration-dependent hidden Markov model," Physiol. Meas., vol. 31, 31 | % no. 4, pp. 513-29, Apr. 2010. 32 | % 33 | %% Copyright (C) 2016 David Springer 34 | % dave.springer@gmail.com 35 | % 36 | % This program is free software: you can redistribute it and/or modify 37 | % it under the terms of the GNU General Public License as published by 38 | % the Free Software Foundation, either version 3 of the License, or 39 | % any later version. 40 | % 41 | % This program is distributed in the hope that it will be useful, 42 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 43 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 44 | % GNU General Public License for more details. 45 | % 46 | % You should have received a copy of the GNU General Public License 47 | % along with this program. If not, see . 48 | 49 | function states = labelPCGStates(envelope,s1_positions, s2_positions, samplingFrequency, figures) 50 | 51 | if(nargin<5) 52 | figures = false; 53 | end 54 | 55 | states = zeros(length(envelope),1); 56 | 57 | 58 | %% Timing durations from Schmidt: 59 | mean_S1 = 0.122*samplingFrequency; 60 | std_S1 = 0.022*samplingFrequency; 61 | mean_S2 = 0.092*samplingFrequency; 62 | std_S2 = 0.022*samplingFrequency; 63 | 64 | %% Setting the duration from each R-peak to (R-peak+mean_S1) as the first state: 65 | % The R-peak in the ECG coincides with the start of the S1 sound (A. G. 66 | % Tilkian and M. B. Conover, Understanding heart sounds and murmurs: with 67 | % an introduction to lung sounds, 4th ed. Saunders, 2001.) 68 | % Therefore, the duration from each R-peak to the mean_S1 sound duration 69 | % later were labelled as the "true" positions of the S1 sounds: 70 | for i = 1: length(s1_positions) 71 | %Set an upper bound, incase the window extends over the length of the 72 | %signal: 73 | upper_bound = round(min(length(states), s1_positions(i) + mean_S1)); 74 | 75 | %Set the states between the start of the R peak and the upper bound as 76 | %state 1: 77 | states(max([1,s1_positions(i)]):min([upper_bound,length(states)])) = 1; 78 | end 79 | 80 | %% Set S2 as state 3 depending on position of end T-wave peak in ECG: 81 | % The second heart sound occurs at approximately the same time as the 82 | % end-T-wave (A. G. Tilkian and M. B. Conover, Understanding heart sounds 83 | % and murmurs: with an introduction to lung sounds, 4th ed. Saunders, 2001.) 84 | % Therefore, for each end-T-wave, find the peak in the envelope around the 85 | % end-T-wave, setting a window centered on this peak as the second heart 86 | % sound state: 87 | for i = 1: length(s2_positions) 88 | 89 | %find search window of envelope: 90 | %T-end +- mean+1sd 91 | %Set upper and lower bounds, to avoid errors of searching outside size 92 | %of the signal 93 | lower_bound = max([s2_positions(i) - floor((mean_S2 + std_S2)),1]); 94 | upper_bound = min(length(states), ceil(s2_positions(i) + floor(mean_S2 + std_S2))); 95 | search_window = envelope(lower_bound:upper_bound).*(states(lower_bound:upper_bound)~=1); 96 | 97 | % Find the maximum value of the envelope in the search window: 98 | [~, S2_index] = max(search_window); 99 | 100 | %Find the actual index in the envelope of the maximum peak: 101 | %Make sure this has a max value of the length of the signal: 102 | S2_index = min(length(states),lower_bound+ S2_index-1); 103 | 104 | %Set the states to state 3, centered on the S2 peak, +- 1/2 of the 105 | %expected S2 sound duration. Again, making sure it does not try to set a 106 | %value outside of the length of the signal: 107 | upper_bound = min(length(states), ceil(S2_index +((mean_S2)/2))); 108 | states(max([ceil(S2_index - ((mean_S2)/2)),1]):upper_bound) = 3; 109 | 110 | %Set the spaces between state 3 and the next R peak as state 4: 111 | if(i<=length(s2_positions)) 112 | %We need to find the next R peak after this S2 sound 113 | %So, subtract the position of this S2 from the S1 positions 114 | diffs = (s1_positions - s2_positions(i)); 115 | %Exclude those that are negative (meaning before this S2 occured) 116 | %by setting them to infinity. They are then excluded when finding 117 | %the minumum later 118 | diffs(diffs<0) = inf; 119 | 120 | %If the array is empty, then no S1s after this S2, so set to end of 121 | %signal: 122 | 123 | if(isempty(diffs 1) 146 | 147 | if(states(first_location_of_definite_state + 1) == 1) 148 | states(1:first_location_of_definite_state) = 4; 149 | end 150 | 151 | if(states(first_location_of_definite_state + 1) == 3) 152 | states(1:first_location_of_definite_state) = 2; 153 | end 154 | 155 | end 156 | 157 | 158 | % Find the last step down: 159 | last_location_of_definite_state = find(states ~= 0, 1,'last'); 160 | 161 | if(last_location_of_definite_state > 1) 162 | 163 | if(states(last_location_of_definite_state) == 1) 164 | states(last_location_of_definite_state:end) = 2; 165 | end 166 | 167 | if(states(last_location_of_definite_state) == 3) 168 | states(last_location_of_definite_state:end) = 4; 169 | end 170 | 171 | end 172 | 173 | 174 | states(length(envelope)+1 : end) = []; 175 | 176 | 177 | %Set everywhere else as state 2: 178 | states(states == 0) = 2; 179 | 180 | 181 | %% Plotting figures 182 | if(figures) 183 | figure('Name','Envelope and labelled states'); 184 | plot(envelope); 185 | hold on; 186 | plot(states,'r'); 187 | legend('Envelope', 'States'); 188 | pause(); 189 | end 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /viterbi_Springer.c: -------------------------------------------------------------------------------- 1 | /* Many people have requested a simple example on how to create a C 2 | * MEX-file. In response to this request, the following C MEX-file, 3 | * named mexample, is provided as an introduction to cmex 4 | * programming. mexample is a commented program which describes how to 5 | * use the following MEX-functions: 6 | * 7 | * mexErrMsgTxt 8 | * mxCreateDoubleMatrix 9 | * mxGetM 10 | * mxGetN 11 | * mxGetPr 12 | * mxIsComplex 13 | * mxIsSparse 14 | * mxIsChar 15 | * 16 | * In MATLAB, mexample accepts two inputs and returns one output. The 17 | * inputs are a 2x2 array denoted as ARRAY_IN and a 2x1 vector denoted as 18 | * VECTOR_IN. The function calculates the determinant of ARRAY_IN, 19 | * multiplies each element of VECTOR_IN by the determinant, and returns 20 | * this as the output, denoted by VECTOR_OUT. All inputs and outputs to 21 | * this function are assumed to be real (not complex). */ 22 | 23 | /* First, include some basic header files. The header file 24 | * "mex.h" is required for a MEX-file. Add any other header 25 | * files that your function may need here. */ 26 | 27 | #include "mex.h" 28 | #include 29 | #include 30 | #include /* log */ 31 | /* A C MEX-file generally consists of two sections. The first 32 | * section is a function or set of functions which performs 33 | * the actual mathematical calculation that the MEX-function 34 | * is to carry out. In this example, the function is called 35 | * workFcn(). The second section is a gateway between MATLAB 36 | * and the first section, and consists of a function called 37 | * mexFunction. The gateway is responsible for several tasks, 38 | * including: 39 | * 40 | * I) error checking, 41 | * II) allocating memory for return arguments, 42 | * III) converting data from MATLAB into a format that 43 | * the workFcn function can use, and vice versa. 44 | * 45 | * The first function to be written in this example, then, is 46 | * workFcn: 47 | * 48 | * Since C and MATLAB handle two-dimensional arrays 49 | * differently, we will explicitly declare the dimension of 50 | * the variable theArray. The variables, theVector and 51 | * theResult, are both one-dimensional arrays, and therefore 52 | * do not need such rigid typing. */ 53 | 54 | 55 | void viterbi( 56 | int N, 57 | int T, 58 | double a_matrix[4][4], 59 | int max_duration_D, 60 | double *delta, 61 | double *observation_probs, 62 | double duration_probs [4][150], 63 | double *psi, 64 | double *psi_duration_out, 65 | double duration_sum_in[4] 66 | ) 67 | 68 | { 69 | 70 | int i; 71 | int i2; 72 | int i3; 73 | int j; 74 | int t; 75 | int d; 76 | 77 | 78 | 79 | for (t = 1; t T-2){ 117 | start = T-2; 118 | } 119 | 120 | /* 121 | * %The end of the analysis window, which is the current time 122 | * %step, unless the time has gone past T, the end of the record, in 123 | * %which case it is truncated to T. This allows the analysis 124 | * %window to extend past the end of the record, so that the 125 | * %timing durations of the states do not have to "end" at the end 126 | * %of the record. 127 | */ 128 | 129 | end_t = t; 130 | if(end_t>T-1){ 131 | end_t = T-1; 132 | } 133 | 134 | 135 | for(i = 0; i max_delta){ 139 | max_delta = temp; 140 | max_index = i; 141 | } 142 | } 143 | 144 | 145 | /*//Find the normaliser for the observations at the start of the 146 | * //analysis window. The probability of seeing all the 147 | * //observations in the analysis window in state j is updated each 148 | * //time d is incrememented two lines below, so we only need to 149 | * //find the observation probabilities for one time step, each 150 | * //time d is updated:*/ 151 | 152 | 153 | probs = 0; 154 | for(i2 = start; i2<=end_t; i2++){ 155 | 156 | // Ensure that the probabilities aren't zero leading to -inf probabilities after log: 157 | if(observation_probs[i2 +j*T] == 0){ 158 | observation_probs[i2 +j*T] = FLT_MIN; 159 | } 160 | 161 | probs = probs + log(observation_probs[i2 +j*T]); 162 | } 163 | 164 | if(probs ==0){ 165 | probs = FLT_MIN; 166 | } 167 | 168 | emission_probs = (probs); 169 | 170 | /*Find the total probability of transitioning from the last 171 | * //state to this one, with the observations and being in the same 172 | * //state for the analysis window. This is the duration-dependant 173 | * //variation of equation 33a from Rabiner:*/ 174 | delta_temp = max_delta + (emission_probs)+ (log((duration_probs[j][d-1]/duration_sum_in[j]))); 175 | 176 | 177 | 178 | // Uncomment the below for debuggin: 179 | // mexPrintf("\n t:%d", t); 180 | // mexPrintf("\n j:%d", j); 181 | // mexPrintf("\n d:%d", d); 182 | // mexPrintf("\n max_delta:%f", max_delta); 183 | // mexPrintf("\n max_index:%i \n", max_index); 184 | // mexPrintf ("emission_probs: %f \n",emission_probs); 185 | // mexPrintf ("log((duration_probs[j][d-1]/duration_sum)): %f \n",log((duration_probs[j][d-1]/duration_sum_in[j]))); 186 | // mexPrintf ("delta_temp: %f \n",delta_temp); 187 | // mexPrintf ("delta[t+j*(T+ max_duration_D-1)]: %f \n",delta[t+j*(T+ max_duration_D-1)]); 188 | // mexPrintf ("duration_probs[j][d]: %f \n",duration_probs[j][d]); 189 | // mexPrintf ("duration_sum_in[j]: %f \n",duration_sum_in[j]); 190 | 191 | /* 192 | * Unlike equation 33a from Rabiner, the maximum delta could come 193 | * from multiple d values, or from multiple size of the analysis 194 | * window. Therefore, only keep the maximum delta value over the 195 | * entire analysis window: 196 | * If this probability is greater than the last greatest, 197 | * update the delta matrix and the time duration variable: 198 | */ 199 | 200 | if(delta_temp>delta[t+j*(T+ max_duration_D-1)]){ 201 | 202 | delta[t+j*(T+ max_duration_D-1)] = delta_temp; 203 | psi[t+j*(T+ max_duration_D-1)] = max_index+1; 204 | 205 | psi_duration_out[t + j*(T+ max_duration_D-1)] = d; 206 | 207 | } 208 | } 209 | } 210 | } 211 | 212 | } 213 | 214 | /* Now, define the gateway function, i.e., mexFunction.Below 215 | * is the standard, predeclared header to mexFunction. nlhs 216 | * and nrhs are the number of left-hand and right-hand side 217 | * arguments that mexample was called with from within MATLAB. 218 | * In this example, nlhs equals 1 and nrhs should equal 2. If 219 | * not, then the user has called mexample the wrong way and 220 | * should be informed of this. plhs and prhs are arrays which 221 | * contain the pointers to the MATLAB arrays, which are 222 | * stored in a C struct called an Array. prhs is an array of 223 | * length rhs,and its pointers point to valid input data. 224 | * plhs is an array of length nlhs, and its pointers point to 225 | * invalid data (i.e., garbage). It is the job of mexFunction 226 | * to fill plhs with valid data. 227 | * 228 | * First, define the following values. This makes it much 229 | * easier to change the order of inputs to mexample, should we 230 | * want to change the function later. In addition, it makes 231 | * the code easier to read. */ 232 | 233 | #define N prhs[0] 234 | #define T prhs[1] 235 | #define a_matrix prhs[2] 236 | #define max_duration_D prhs[3] 237 | #define delta prhs[4] 238 | #define observation_probs prhs[5] 239 | #define duration_probs prhs[6] 240 | #define psi prhs[7] 241 | #define duration_sum prhs[8] 242 | 243 | 244 | #define delta_out plhs[0] 245 | #define psi_out plhs[1] 246 | #define psi_duration plhs[2] 247 | 248 | 249 | void mexFunction( 250 | int nlhs, 251 | mxArray *plhs[], 252 | int nrhs, 253 | const mxArray *prhs[] 254 | ) 255 | { 256 | double a_matrix_in[4][4];/* 2 dimensional C array to pass to workFcn() */ 257 | double *delta_in_matrix;/* 2 dimensional C array to pass to workFcn() */ 258 | double *observation_probs_matrix;/* 2 dimensional C array to pass to workFcn() */ 259 | double *psi_matrix;/* 2 dimensional C array to pass to workFcn() */ 260 | double duration_sum_in[4];/* 2 dimensional C array to pass to workFcn() */ 261 | 262 | double duration_probs_matrix[4][150];/* 2 dimensional C array to pass to workFcn() */ 263 | 264 | int actual_T; 265 | int fake_T_extended; 266 | int actual_N; 267 | int max_duration_D_val; 268 | 269 | int row,col; /* loop indices */ 270 | int m,n; /* temporary array size holders */ 271 | 272 | /* Step 1: Error Checking Step 1a: is nlhs 1? If not, 273 | * generate an error message and exit mexample (mexErrMsgTxt 274 | * does this for us!) */ 275 | if (nlhs!=3) 276 | mexErrMsgTxt("mexample requires 3 output argument."); 277 | 278 | /* Step 1b: is nrhs 2? */ 279 | if (nrhs!=9) 280 | mexErrMsgTxt("mexample requires 9 input arguments"); 281 | 282 | 283 | actual_T = mxGetM(observation_probs); 284 | actual_N = mxGetN(observation_probs); 285 | 286 | max_duration_D_val = mxGetScalar(max_duration_D); 287 | 288 | 289 | /* Step 2: Allocate memory for return argument(s) */ 290 | delta_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL); 291 | psi_out = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL); 292 | psi_duration = mxCreateDoubleMatrix((actual_T+max_duration_D_val-1), actual_N, mxREAL); 293 | 294 | /* Step 3: Convert ARRAY_IN to a 2x2 C array 295 | * MATLAB stores a two-dimensional matrix in memory as a one- 296 | * dimensional array. If the matrix is size MxN, then the 297 | * first M elements of the one-dimensional array correspond to 298 | * the first column of the matrix, and the next M elements 299 | * correspond to the second column, etc. The following loop 300 | * converts from MATLAB format to C format: */ 301 | 302 | for (col=0; col < mxGetN(a_matrix); col++){ 303 | for (row=0; row < mxGetM(a_matrix); row++){ 304 | a_matrix_in[row][col] =(mxGetPr(a_matrix))[row+col*mxGetM(a_matrix)]; 305 | } 306 | } 307 | 308 | for (col=0; col < mxGetM(duration_sum); col++){ 309 | duration_sum_in[col] =(mxGetPr(duration_sum))[col]; 310 | } 311 | 312 | 313 | 314 | 315 | delta_in_matrix = mxGetPr(delta); 316 | observation_probs_matrix = mxGetPr(observation_probs); 317 | psi_matrix = mxGetPr(psi); 318 | 319 | /* for (col=0; col < mxGetN(delta); col++){ 320 | * // for (row=0; row < mxGetM(delta); row++){ 321 | * // 322 | * // 323 | * // observation_probs_matrix[row][col] =(mxGetPr(observation_probs))[row+col*mxGetM(observation_probs)]; 324 | * // psi_matrix[row][col] =(mxGetPr(psi))[row+col*mxGetM(psi)]; 325 | * // } 326 | * // }*/ 327 | 328 | 329 | for (col=0; col < mxGetN(duration_probs); col++){ 330 | for (row=0; row < mxGetM(duration_probs); row++){ 331 | duration_probs_matrix[row][col] =(mxGetPr(duration_probs))[row+col*mxGetM(duration_probs)]; 332 | } 333 | } 334 | 335 | 336 | 337 | 338 | /* mxGetPr returns a pointer to the real part of the array 339 | * ARRAY_IN. In the line above, it is treated as the one- 340 | * dimensional array mentioned in the previous comment. */ 341 | 342 | /* Step 4: Call workFcn function */ 343 | viterbi(actual_N,actual_T,a_matrix_in,max_duration_D_val,delta_in_matrix,observation_probs_matrix,duration_probs_matrix,psi_matrix,mxGetPr(psi_duration),duration_sum_in); 344 | memcpy ( mxGetPr(delta_out), delta_in_matrix, actual_N*(actual_T+max_duration_D_val-1)*8); 345 | memcpy ( mxGetPr(psi_out), psi_matrix, actual_N*(actual_T+max_duration_D_val-1)*8); 346 | 347 | } -------------------------------------------------------------------------------- /viterbiDecodePCG_Springer.m: -------------------------------------------------------------------------------- 1 | % function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs, figures) 2 | % 3 | % This function calculates the delta, psi and qt matrices associated with 4 | % the Viterbi decoding algorithm from: 5 | % L. R. Rabiner, "A tutorial on hidden Markov models and selected 6 | % applications in speech recognition," Proc. IEEE, vol. 77, no. 2, pp. 7 | % 257-286, Feb. 1989. 8 | % using equations 32a - 35, and equations 68 - 69 to include duration 9 | % dependancy of the states. 10 | % 11 | % This decoding is performed after the observation probabilities have been 12 | % derived from the logistic regression model of Springer et al: 13 | % D. Springer et al., "Logistic Regression-HSMM-based Heart Sound 14 | % Segmentation," IEEE Trans. Biomed. Eng., In Press, 2015. 15 | % 16 | % Further, this function is extended to allow the duration distributions to extend 17 | % past the beginning and end of the sequence. Without this, the label 18 | % sequence has to start and stop with an "entire" state duration being 19 | % fulfilled. This extension takes away that requirement, by allowing the 20 | % duration distributions to extend past the beginning and end, but only 21 | % considering the observations within the sequence for emission probability 22 | % estimation. More detail can be found in the publication by Springer et 23 | % al., mentioned above. 24 | % 25 | %% Inputs: 26 | % observation_sequence: The observed features 27 | % pi_vector: the array of initial state probabilities, dervived from 28 | % "trainSpringerSegmentationAlgorithm". 29 | % b_matrix: the observation probabilities, dervived from 30 | % "trainSpringerSegmentationAlgorithm". 31 | % heartrate: the heart rate of the PCG, extracted using 32 | % "getHeartRateSchmidt" 33 | % systolic_time: the duration of systole, extracted using 34 | % "getHeartRateSchmidt" 35 | % Fs: the sampling frequency of the observation_sequence 36 | % figures: optional boolean variable to show figures 37 | % 38 | %% Outputs: 39 | % logistic_regression_B_matrix: 40 | % pi_vector: 41 | % total_obs_distribution: 42 | % As Springer et al's algorithm is a duration dependant HMM, there is no 43 | % need to calculate the A_matrix, as the transition between states is only 44 | % dependant on the state durations. 45 | % 46 | %% Copyright (C) 2016 David Springer 47 | % dave.springer@gmail.com 48 | % 49 | % This program is free software: you can redistribute it and/or modify 50 | % it under the terms of the GNU General Public License as published by 51 | % the Free Software Foundation, either version 3 of the License, or 52 | % any later version. 53 | % 54 | % This program is distributed in the hope that it will be useful, 55 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 56 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 57 | % GNU General Public License for more details. 58 | % 59 | % You should have received a copy of the GNU General Public License 60 | % along with this program. If not, see . 61 | 62 | function [delta, psi, qt] = viterbiDecodePCG_Springer(observation_sequence, pi_vector, b_matrix, total_obs_distribution, heartrate, systolic_time, Fs,figures) 63 | 64 | if nargin < 8 65 | figures = false; 66 | end 67 | 68 | %% Preliminary 69 | springer_options = default_Springer_HSMM_options; 70 | 71 | T = length(observation_sequence); 72 | N = 4; % Number of states 73 | 74 | % Setting the maximum duration of a single state. This is set to an entire 75 | % heart cycle: 76 | max_duration_D = round((1*(60/heartrate))*Fs); 77 | 78 | %Initialising the variables that are needed to find the optimal state path along 79 | %the observation sequence. 80 | %delta_t(j), as defined on page 264 of Rabiner, is the best score (highest 81 | %probability) along a single path, at time t, which accounts for the first 82 | %t observations and ends in State s_j. In this case, the length of the 83 | %matrix is extended by max_duration_D samples, in order to allow the use 84 | %of the extended Viterbi algortithm: 85 | delta = ones(T+ max_duration_D-1,N)*-inf; 86 | 87 | %The argument that maximises the transition between states (this is 88 | %basically the previous state that had the highest transition probability 89 | %to the current state) is tracked using the psi variable. 90 | psi = zeros(T+ max_duration_D-1,N); 91 | 92 | %An additional variable, that is not included on page 264 or Rabiner, is 93 | %the state duration that maximises the delta variable. This is essential 94 | %for the duration dependant HMM. 95 | psi_duration =zeros(T + max_duration_D-1,N); 96 | 97 | %% Setting up observation probs 98 | observation_probs = zeros(T,N); 99 | 100 | for n = 1:N 101 | 102 | %MLR gives P(state|obs) 103 | %Therefore, need Bayes to get P(o|state) 104 | %P(o|state) = P(state|obs) * P(obs) / P(states) 105 | %Where p(obs) is derived from a MVN distribution from all 106 | %obserbations, and p(states) is taken from the pi_vector: 107 | pihat = mnrval(cell2mat(b_matrix(n)),observation_sequence(:,:)); 108 | 109 | for t = 1:T 110 | 111 | Po_correction = mvnpdf(observation_sequence(t,:),cell2mat(total_obs_distribution(1)),cell2mat(total_obs_distribution(2))); 112 | 113 | %When saving the coefficients from the logistic 114 | %regression, it orders them P(class 1) then P(class 2). When 115 | %training, I label the classes as 0 and 1, so the 116 | %correct probability would be pihat(2). 117 | 118 | observation_probs(t,n) = (pihat(t,2)*Po_correction)/pi_vector(n); 119 | 120 | end 121 | end 122 | 123 | %% Setting up state duration probabilities, using Gaussian distributions: 124 | [d_distributions, max_S1, min_S1, max_S2, min_S2, max_systole, min_systole, max_diastole, min_diastole] = get_duration_distributions(heartrate,systolic_time); 125 | 126 | 127 | 128 | duration_probs = zeros(N,3*Fs); 129 | duration_sum = zeros(N,1); 130 | for state_j = 1:N 131 | for d = 1:max_duration_D 132 | if(state_j == 1) 133 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2))); 134 | 135 | if(d < min_S1 || d > max_S1) 136 | duration_probs(state_j,d)= realmin; 137 | end 138 | 139 | 140 | elseif(state_j==3) 141 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2))); 142 | 143 | if(d < min_S2 || d > max_S2) 144 | duration_probs(state_j,d)= realmin; 145 | end 146 | 147 | 148 | elseif(state_j==2) 149 | 150 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2))); 151 | 152 | if(d < min_systole|| d > max_systole) 153 | duration_probs(state_j,d)= realmin; 154 | end 155 | 156 | 157 | elseif (state_j==4) 158 | 159 | duration_probs(state_j,d) = mvnpdf(d,cell2mat(d_distributions(state_j,1)),cell2mat(d_distributions(state_j,2))); 160 | 161 | if(d < min_diastole ||d > max_diastole) 162 | duration_probs(state_j,d)= realmin; 163 | end 164 | end 165 | end 166 | duration_sum(state_j) = sum(duration_probs(state_j,:)); 167 | end 168 | 169 | 170 | if(length(duration_probs)>3*Fs) 171 | duration_probs(:,(3*Fs+1):end) = []; 172 | end 173 | 174 | if(figures) 175 | figure('Name', 'Duration probabilities'); 176 | plot(duration_probs(1,:)./ duration_sum(1),'Linewidth',2); 177 | hold on; 178 | plot(duration_probs(2,:)./ duration_sum(2),'r','Linewidth',2); 179 | hold on; 180 | plot(duration_probs(3,:)./ duration_sum(3),'g','Linewidth',2); 181 | hold on; 182 | plot(duration_probs(4,:)./ duration_sum(4),'k','Linewidth',2); 183 | hold on; 184 | legend('S1 Duration','Systolic Duration','S2 Duration','Diastolic Duration'); 185 | pause(); 186 | end 187 | %% Perform the actual Viterbi Recursion: 188 | 189 | 190 | qt = zeros(1,length(delta)); 191 | %% Initialisation Step 192 | 193 | %Equation 32a and 69a, but leave out the probability of being in 194 | %state i for only 1 sample, as the state could have started before time t = 195 | %0. 196 | 197 | delta(1,:) = log(pi_vector) + log(observation_probs(1,:)); %first value is the probability of intially being in each state * probability of observation 1 coming from each state 198 | 199 | %Equation 32b 200 | psi(1,:) = -1; 201 | 202 | 203 | % The state duration probabilities are now used. 204 | %Change the a_matrix to have zeros along the diagonal, therefore, only 205 | %relying on the duration probabilities and observation probabilities to 206 | %influence change in states: 207 | %This would only be valid in sequences where the transition between states 208 | %follows a distinct order. 209 | a_matrix = [0,1,0,0;0 0 1 0; 0 0 0 1;1 0 0 0]; 210 | 211 | 212 | %% Run the core Viterbi algorith 213 | 214 | if(springer_options.use_mex) 215 | 216 | %% Run Mex code 217 | % Ensure you have run the mex viterbi_PhysChallenge.c code on the 218 | % native machine before running this: 219 | [delta, psi, psi_duration] = viterbi_Springer(N,T,a_matrix,max_duration_D,delta,observation_probs,duration_probs,psi, duration_sum); 220 | 221 | 222 | else 223 | 224 | %% Recursion 225 | 226 | %% The Extended Viterbi algorithm: 227 | 228 | %Equations 33a and 33b and 69a, b, c etc: 229 | %again, ommitting the p(d), as state could have started before t = 1 230 | 231 | % This implementation extends the standard implementation of the 232 | % duration-dependant Viterbi algorithm by allowing the durations to 233 | % extend beyond the start and end of the time series, thereby allowing 234 | % states to "start" and "stop" outside of the recorded signal. This 235 | % addresses the issue of partial states at the beginning and end of the 236 | % signal being labelled as the incorrect state. For instance, a 237 | % short-duration diastole at the beginning of a signal looks a lot like 238 | % systole, and can lead to labelling errors. 239 | 240 | % t spans input 2 to T + max_duration_D: 241 | 242 | 243 | for t = 2:T+ max_duration_D-1 244 | for j = 1:N 245 | for d = 1:1:max_duration_D 246 | 247 | 248 | %The start of the analysis window, which is the current time 249 | %step, minus d (the time horizon we are currently looking back), 250 | %plus 1. The analysis window can be seen to be starting one 251 | %step back each time the variable d is increased. 252 | % This is clamped to 1 if extending past the start of the 253 | % record, and T-1 is extending past the end of the record: 254 | start_t = t - d; 255 | if(start_t<1) 256 | start_t = 1; 257 | end 258 | if(start_t > T-1) 259 | start_t = T-1; 260 | end 261 | 262 | %The end of the analysis window, which is the current time 263 | %step, unless the time has gone past T, the end of the record, in 264 | %which case it is truncated to T. This allows the analysis 265 | %window to extend past the end of the record, so that the 266 | %timing durations of the states do not have to "end" at the end 267 | %of the record. 268 | end_t = t; 269 | if(t>T) 270 | end_t = T; 271 | end 272 | 273 | 274 | %Find the max_delta and index of that from the previous step 275 | %and the transition to the current step: 276 | %This is the first half of the expression of equation 33a from 277 | %Rabiner: 278 | [max_delta, max_index] = max(delta(start_t,:)+log(a_matrix(:,j))'); 279 | 280 | 281 | %Find the normalised probabilities of the observations over the 282 | %analysis window: 283 | probs = prod(observation_probs(start_t:end_t,j)); 284 | 285 | 286 | %Find the normalised probabilities of the observations at only 287 | %the time point at the start of the time window: 288 | 289 | if(probs ==0) 290 | probs = realmin; 291 | end 292 | emission_probs = log(probs); 293 | 294 | 295 | %Keep a running total of the emmission probabilities as the 296 | %start point of the time window is moved back one step at a 297 | %time. This is the probability of seeing all the observations 298 | %in the analysis window in state j: 299 | 300 | if(emission_probs == 0 || isnan(emission_probs)) 301 | emission_probs =realmin; 302 | end 303 | 304 | 305 | %Find the total probability of transitioning from the last 306 | %state to this one, with the observations and being in the same 307 | %state for the analysis window. This is the duration-dependant 308 | %variation of equation 33a from Rabiner: 309 | % fprintf('log((duration_probs(j,d)./duration_sum(j))):%d\n',log((duration_probs(j,d)./duration_sum(j)))); 310 | delta_temp = max_delta + (emission_probs)+ log((duration_probs(j,d)./duration_sum(j))); 311 | 312 | 313 | %Unlike equation 33a from Rabiner, the maximum delta could come 314 | %from multiple d values, or from multiple size of the analysis 315 | %window. Therefore, only keep the maximum delta value over the 316 | %entire analysis window: 317 | %If this probability is greater than the last greatest, 318 | %update the delta matrix and the time duration variable: 319 | 320 | 321 | if(delta_temp>delta(t,j)) 322 | delta(t,j) = delta_temp; 323 | psi(t,j) = max_index; 324 | psi_duration(t,j) = d; 325 | end 326 | 327 | end 328 | end 329 | end 330 | end 331 | 332 | 333 | %% Termination 334 | 335 | % For the extended case, need to find max prob after end of actual 336 | % sequence: 337 | 338 | % Find just the delta after the end of the actual signal 339 | temp_delta = delta(T+1:end,:); 340 | %Find the maximum value in this section, and which state it is in: 341 | [~, idx] = max(temp_delta(:)); 342 | [pos, ~] = ind2sub(size(temp_delta), idx); 343 | 344 | % Change this position to the real position in delta matrix: 345 | pos = pos+T; 346 | 347 | %1) Find the last most probable state 348 | %2) From the psi matrix, find the most likely preceding state 349 | %3) Find the duration of the last state from the psi_duration matrix 350 | %4) From the onset to the offset of this state, set to the most likely state 351 | %5) Repeat steps 2 - 5 until reached the beginning of the signal 352 | 353 | 354 | %The initial steps 1-4 are equation 34b in Rabiner. 1) finds P*, the most 355 | %likely last state in the sequence, 2) finds the state that precedes the 356 | %last most likely state, 3) finds the onset in time of the last state 357 | %(included due to the duration-dependancy) and 4) sets the most likely last 358 | %state to the q_t variable. 359 | 360 | %1) 361 | [~, state] = max(delta(pos,:),[],2); 362 | 363 | %2) 364 | offset = pos; 365 | preceding_state = psi(offset,state); 366 | 367 | %3) 368 | % state_duration = psi_duration(offset, state); 369 | onset = offset - psi_duration(offset,state)+1; 370 | 371 | %4) 372 | qt(onset:offset) = state; 373 | 374 | %The state is then updated to the preceding state, found above, which must 375 | %end when the last most likely state started in the observation sequence: 376 | state = preceding_state; 377 | 378 | count = 0; 379 | %While the onset of the state is larger than the maximum duration 380 | %specified: 381 | while(onset > 2) 382 | 383 | %2) 384 | offset = onset-1; 385 | % offset_array(offset,1) = inf; 386 | preceding_state = psi(offset,state); 387 | % offset_array(offset,2) = preceding_state; 388 | 389 | 390 | %3) 391 | % state_duration = psi_duration(offset, state); 392 | onset = offset - psi_duration(offset,state)+1; 393 | 394 | %4) 395 | % offset_array(onset:offset,3) = state; 396 | 397 | if(onset<2) 398 | onset = 1; 399 | end 400 | qt(onset:offset) = state; 401 | state = preceding_state; 402 | count = count +1; 403 | 404 | if(count> 1000) 405 | break; 406 | end 407 | end 408 | 409 | qt = qt(1:T); 410 | 411 | 412 | --------------------------------------------------------------------------------