├── .gitignore ├── .gitmodules ├── Dockerfile ├── LICENSE.txt ├── README.md ├── __init__.py ├── driver.py ├── notebooks ├── final_eval.ipynb ├── full_eval_v0.2.csv ├── train_das_tranche_A_resnet.ipynb └── train_das_tranche_all.ipynb ├── official_phase_legacy ├── __init__.py ├── cfg.py ├── data_reader.py ├── dataset.py ├── eval_all.py ├── get_12ECG_features.py ├── model_configs │ ├── __init__.py │ ├── ati_cnn.py │ ├── cnn.py │ ├── cpsc.py │ ├── ecg_crnn.py │ ├── ecg_seq_lab_net.py │ ├── ecg_subtract_unet.py │ ├── ecg_unet.py │ └── rnn.py ├── models │ ├── __init__.py │ ├── _experimental │ │ ├── keras │ │ │ └── .keep │ │ └── pytorch │ │ │ ├── af_lstm.py │ │ │ ├── ecg_crnn.py │ │ │ ├── ecg_crnn_deprecated.py │ │ │ ├── ecg_seq_lab_net.py │ │ │ ├── ecg_spp_cnn.py │ │ │ ├── ecg_subtract_unet.py │ │ │ ├── ecg_unet.py │ │ │ └── ecg_yolo.py │ ├── ecg_crnn.py │ ├── ecg_unet.py │ ├── legacy │ │ ├── legacy_v01.py │ │ └── legacy_v02.py │ ├── special_detectors.py │ └── utils │ │ ├── __init__.py │ │ ├── grad_cam.py │ │ ├── keras_utils.py │ │ └── torch_utils.py ├── run_12ECG_classifier.py ├── train.py ├── train_12ECG_classifier.py ├── train_model.py ├── train_seq_lab_net │ ├── __init__.py │ ├── cfg_seq_lab_net.py │ ├── data_reader.py │ ├── dataset.py │ ├── metrics.py │ └── train_seq_lab_net.py └── train_unet │ ├── __init__.py │ ├── cfg_unet.py │ ├── data_reader.py │ ├── dataset.py │ ├── metrics.py │ └── train_unet.py ├── references ├── AttentionDeepMIL │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── dataloader.py │ ├── main.py │ ├── mnist_bags_loader.py │ └── model.py ├── CPSC0223 │ ├── CPSC0223-LICENSE.txt │ ├── LSTM0922.json │ ├── cpsc2018.py │ ├── score_py3.py │ ├── time_validation.txt │ ├── train0905.json │ ├── train0906bbb.json │ ├── train0906naa.json │ ├── train0906pa.json │ └── train0906st.json ├── CPSC0236 │ ├── CPSC0236-LICENSE.txt │ ├── cpsc2018.py │ ├── note.txt │ ├── score_py3.py │ └── time_validation.txt ├── __init__.py ├── ati_cnn │ ├── ati_cnn_model.py │ ├── const.py │ ├── crnn_keras.py │ ├── crnn_torch.py │ ├── init_test.ipynb │ ├── scoring_metrics.py │ ├── ti_cnn_model.py │ └── train_das.py ├── encase │ ├── AUTHORS.txt │ ├── LICENSE.txt │ ├── README.md │ ├── answers.txt │ ├── code │ │ ├── BasicCLF.py │ │ ├── CDL.py │ │ ├── Encase.py │ │ ├── FeatureExtract.py │ │ ├── MyEval.py │ │ ├── OptF.py │ │ ├── ParSelect.py │ │ ├── ReadData.py │ │ ├── SubmitPrepareModel.py │ │ ├── TestBasic.py │ │ ├── TestEncase.py │ │ ├── TestKNN.py │ │ ├── TestOptF.py │ │ ├── TestXGB_cut.py │ │ ├── challenge.py │ │ ├── challenge_0825.py │ │ ├── challenge_0828.py │ │ ├── challenge_encase_mimic.py │ │ ├── challenge_encase_mimic_offline.py │ │ ├── deep_compress.py │ │ ├── dnn.py │ │ ├── dnn1.py │ │ ├── dnn1_test.py │ │ ├── dnn_lstm_simp.py │ │ ├── fastdtw.py │ │ ├── features_all.py │ │ ├── features_centerwave.py │ │ ├── features_deep_centerwave.py │ │ ├── features_long.py │ │ ├── features_mimic.py │ │ ├── features_qrs.py │ │ ├── features_resNet.py │ │ ├── features_short.py │ │ ├── mimic_1_gendata.py │ │ ├── mimic_2_build_student.py │ │ ├── mimic_3_build_LR.py │ │ ├── mimic_test.py │ │ ├── minNCCE.py │ │ ├── model_deep_centerwave.py │ │ ├── normalize_data.py │ │ ├── plot_ecg.m │ │ ├── preprocess.m │ │ ├── preprocess_data.py │ │ ├── preprocess_slide.m │ │ ├── qrs_detect2.m │ │ ├── rdmat.m │ │ ├── read_data.py │ │ ├── resNet.py │ │ ├── resNet_1.py │ │ ├── resNet_2.py │ │ ├── resNet_3.py │ │ ├── resNet_4.py │ │ ├── resNet_5.py │ │ ├── sampen2.py │ │ ├── stat.py │ │ ├── test_exp_for_paper.py │ │ ├── test_gpu.py │ │ ├── test_importance.py │ │ ├── tflearn │ │ │ ├── __init__.py │ │ │ ├── activations.py │ │ │ ├── callbacks.py │ │ │ ├── collections.py │ │ │ ├── config.py │ │ │ ├── data_augmentation.py │ │ │ ├── data_flow.py │ │ │ ├── data_preprocessing.py │ │ │ ├── data_utils.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── cifar10.py │ │ │ │ ├── cifar100.py │ │ │ │ ├── imdb.py │ │ │ │ ├── mnist.py │ │ │ │ ├── oxflower17.py │ │ │ │ ├── svhn.py │ │ │ │ └── titanic.py │ │ │ ├── helpers │ │ │ │ ├── __init__.py │ │ │ │ ├── evaluator.py │ │ │ │ ├── generator.py │ │ │ │ ├── regularizer.py │ │ │ │ ├── summarizer.py │ │ │ │ └── trainer.py │ │ │ ├── initializations.py │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── conv.py │ │ │ │ ├── core.py │ │ │ │ ├── embedding_ops.py │ │ │ │ ├── estimator.py │ │ │ │ ├── merge_ops.py │ │ │ │ ├── normalization.py │ │ │ │ └── recurrent.py │ │ │ ├── losses.py │ │ │ ├── metrics.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── dnn.py │ │ │ │ └── generator.py │ │ │ ├── objectives.py │ │ │ ├── optimizers.py │ │ │ ├── summaries.py │ │ │ ├── utils.py │ │ │ └── variables.py │ │ └── util_vote.py │ ├── preprocess_sub.m │ ├── qrs_detect2.m │ ├── rdmat.m │ └── score2017Challenge.m └── stanford │ └── ecg │ ├── __init__.py │ ├── load.py │ ├── network.py │ ├── predict.py │ ├── train.py │ └── util.py ├── requirements.txt ├── saved_models └── .keep ├── signal_processing ├── __init__.py ├── ecg_preproc.py ├── ecg_rpeaks.py ├── ecg_spectral.py ├── ecg_waves.py ├── ecg_waves_wavelet.py └── pantompkins.py ├── unofficial_phase_legacy ├── __init__.py ├── cinc2020_aux_data.py ├── driver.py ├── get_12ECG_features.py ├── init_test.ipynb ├── official_scoring_metrics.py ├── official_scoring_metrics_legacy.py ├── run_12ECG_classifier.py ├── train_legacy.py └── weights-0.22loss.hdf5 └── utils ├── __init__.py ├── diagnoses_records_list.json ├── ecg_arrhythmia_knowledge.py ├── misc.py ├── record_list.json ├── scoring_aux_data.py ├── scoring_metrics.py ├── utils_nn.py └── utils_signal.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .vscode/ 132 | 133 | test/CPSC0223/ 134 | test/CPSC0236/ 135 | test/ati_cnn/ckpt/ 136 | 137 | working_dir 138 | tmp 139 | temp 140 | checkpoints 141 | log 142 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "official_baseline_classifier"] 2 | path = official_baseline_classifier 3 | url = https://github.com/wenh06/python-classifier-2020.git 4 | [submodule "official_scoring_metric"] 5 | path = official_scoring_metric 6 | url = https://github.com/wenh06/evaluation-2020.git 7 | [submodule "torch_ecg"] 8 | path = torch_ecg 9 | url = https://github.com/DeepPSP/torch_ecg.git 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.3-slim 2 | 3 | ## The MAINTAINER instruction sets the Author field of the generated images 4 | LABEL maintainer="wenh06@gmail.com" 5 | ## DO NOT EDIT THESE 3 lines 6 | RUN mkdir /physionet 7 | COPY ./ /physionet 8 | WORKDIR /physionet 9 | 10 | ## Install your dependencies here using apt-get etc. 11 | 12 | ## Do not edit if you have a requirements.txt 13 | RUN pip install -r requirements.txt 14 | 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, PhysioNet 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [PhysioNet/CinC Challenge 2020](https://physionetchallenges.github.io/2020/) 2 | 3 | ## Digest of Top Models ([CinC2020 papers](https://www.cinc.org/archives/2020/) searching `Challenge` on this page) 4 | - [prna](http://www.cinc.org/archives/2020/pdf/CinC2020-107.pdf): Transformer 5 | - [Between a ROC and a heart place](http://www.cinc.org/archives/2020/pdf/CinC2020-112.pdf): Adaptive lead weighted ResNet 6 | - [HeartBeats](http://www.cinc.org/archives/2020/pdf/CinC2020-281.pdf): SE-ResNet 7 | - [Triage](http://www.cinc.org/archives/2020/pdf/CinC2020-133.pdf): ResNet and Scatter Transform 8 | - [Sharif AI Team](http://www.cinc.org/archives/2020/pdf/CinC2020-445.pdf): Branched CNN (each branch being VGG-like) and RNN 9 | - [DSAIL_SNU](http://www.cinc.org/archives/2020/pdf/CinC2020-328.pdf): Bag of Tricks (data augmentation?) for Electrocardiogram Classification with DNN 10 | - [UMCUVA](http://www.cinc.org/archives/2020/pdf/CinC2020-253.pdf): Exponentially Dilated Causal CNN 11 | - [CQUPT_ECG](http://www.cinc.org/archives/2020/pdf/CinC2020-085.pdf): Multi-scale SE-Net 12 | - [ECU](http://www.cinc.org/archives/2020/pdf/CinC2020-161.pdf): CRNN + NAS 13 | - [PALab](http://www.cinc.org/archives/2020/pdf/CinC2020-035.pdf): SE-ResNet 14 | - [BUTTeam](http://www.cinc.org/archives/2020/pdf/CinC2020-189.pdf): CNN (modified resnet) With Global Skip Connections and Custom Loss Function 15 | - [SpaceOn Flattop](http://www.cinc.org/archives/2020/pdf/CinC2020-007.pdf): Modified (how modified?) ResNet 16 | - [ISIBrno](http://www.cinc.org/archives/2020/pdf/CinC2020-032.pdf): Residual CNN-GRU with Attention Mechanism 17 | - other teams: Modified EfficientNet; Class Activation Map; Graph Convolutional Network (GCN) 18 | 19 | Final rankings can be found [here](https://github.com/physionetchallenges/evaluation-2020/blob/master/Results/physionet_2020_official_scores.csv) 20 | 21 | # Update 22 | This repo no longer updates. Further updates will be done in [torch_ecg](https://github.com/DeepPSP/torch_ecg/tree/master/torch_ecg/train/train_crnn_cinc2020), if there are any. 23 | 24 | [CinC2021](https://github.com/DeepPSP/cinc2021) | [CinC2022](https://github.com/DeepPSP/cinc2022) | [CinC2023](https://github.com/DeepPSP/cinc2023) 25 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import os, sys 4 | 5 | _BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 6 | _PARENT_DIR = os.path.dirname(_BASE_DIR) 7 | # _IN_SYS_PATH = [p for p in [_BASE_DIR, _PARENT_DIR] if p in sys.path] 8 | if _PARENT_DIR not in sys.path: 9 | sys.path.insert(0, _PARENT_DIR) 10 | 11 | import warnings 12 | warnings.simplefilter(action='ignore', category=FutureWarning) 13 | -------------------------------------------------------------------------------- /driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np, os, sys 4 | np.set_printoptions(precision=5, suppress=True) 5 | from scipy.io import loadmat 6 | 7 | from run_12ECG_classifier import load_12ECG_model, run_12ECG_classifier 8 | from cfg import ModelCfg 9 | 10 | 11 | def load_challenge_data(filename): 12 | """ 13 | """ 14 | if ModelCfg.torch_dtype.lower() == 'double': 15 | dtype = np.float64 16 | else: 17 | dtype = np.float32 18 | x = loadmat(filename) 19 | data = np.asarray(x['val'], dtype=dtype) 20 | 21 | new_file = filename.replace('.mat','.hea') 22 | input_header_file = os.path.join(new_file) 23 | 24 | with open(input_header_file,'r') as f: 25 | header_data=f.readlines() 26 | 27 | 28 | return data, header_data 29 | 30 | 31 | def save_challenge_predictions(output_directory,filename,scores,labels,classes): 32 | """ 33 | """ 34 | recording = os.path.splitext(filename)[0] 35 | new_file = filename.replace('.mat','.csv') 36 | output_file = os.path.join(output_directory,new_file) 37 | 38 | # Include the filename as the recording number 39 | recording_string = '#{}'.format(recording) 40 | class_string = ','.join(classes) 41 | label_string = ','.join(str(i) for i in labels) 42 | score_string = ','.join(str(i) for i in scores) 43 | 44 | with open(output_file, 'w') as f: 45 | f.write(recording_string + '\n' + class_string + '\n' + label_string + '\n' + score_string + '\n') 46 | 47 | 48 | 49 | if __name__ == '__main__': 50 | # Parse arguments. 51 | if len(sys.argv) != 4: 52 | raise Exception('Include the input and output directories as arguments, e.g., python driver.py input output.') 53 | 54 | model_input = sys.argv[1] 55 | input_directory = sys.argv[2] 56 | output_directory = sys.argv[3] 57 | 58 | # Find files. 59 | input_files = [] 60 | for f in os.listdir(input_directory): 61 | if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): 62 | input_files.append(f) 63 | 64 | if not os.path.isdir(output_directory): 65 | os.mkdir(output_directory) 66 | 67 | # Load model. 68 | print('Loading 12ECG model...') 69 | model = load_12ECG_model(model_input) 70 | 71 | # Iterate over files. 72 | print('Extracting 12ECG features...') 73 | num_files = len(input_files) 74 | 75 | for i, f in enumerate(input_files): 76 | print(' {}/{}...'.format(i+1, num_files)) 77 | tmp_input_file = os.path.join(input_directory,f) 78 | data,header_data = load_challenge_data(tmp_input_file) 79 | current_label, current_score,classes = run_12ECG_classifier(data,header_data, model) 80 | # Save results. 81 | save_challenge_predictions(output_directory,f,current_score,current_label,classes) 82 | 83 | 84 | print('Done.') 85 | -------------------------------------------------------------------------------- /official_phase_legacy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | this folder consists of all code written during the official phase, 3 | and will no longer be updated 4 | 5 | further model development will be moved to the `torch_ecg` repository 6 | """ 7 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Problems: 3 | --------- 4 | 1. CNN: 5 | 1.1. small scale: 6 | filter length (kernel size, dilation), downsampling (stride), 7 | these mainly depend on frequency bands of regions of interest, 8 | like QRS complex, P wave, T wave, or even intervals like qt interval, 9 | and also finer structures like notches on QRS complexes 10 | 1.2. large scale: 11 | network depth, and block structures (e.g. ResNet v.s. VGG); 12 | upsampling? 13 | 2. RNN: 14 | 2.1. choice between LSTM and attention 15 | 2.2. use the last state for the last classifying layer or use the whole sequence 16 | 17 | Frequency bands (from literature): 18 | ---------------------------------- 19 | QRS complex: 8 - 25 Hz 20 | P wave: 5 - 20 Hz 21 | T wave: 2.5 - 7 Hz 22 | notch: 30 - 50 Hz (?) 23 | NOTE that different literatures have different conlusions, 24 | the above takes into considerations of many literatures 25 | 26 | Frequency bands (from ludb ref. [4]): 27 | ------------------------------------- 28 | from the annotations of ludb, the [0.05, 0.95] percentile of the durations of waves are 29 | QRS complex: 70 - 144 ms 30 | P wave: 60 - 134 ms 31 | T wave: 116 - 240 ms 32 | which roughly corr. to the following frequency bands: 33 | QRS complex: 7 - 15 Hz 34 | P wave: 7 - 17 Hz 35 | T wave: 4 - 9 Hz 36 | NOTE that there are records in ludb that there are no onsets (offsets) of certain waves. 37 | in this case, the duration is from the peaks to the offsets (onsets). 38 | 39 | according to ref [7], typical kernel sizes are 8 and 9 40 | 41 | References: 42 | ----------- 43 | [1] Lin, Chia-Hung. "Frequency-domain features for ECG beat discrimination using grey relational analysis-based classifier." Computers & Mathematics with Applications 55.4 (2008): 680-690. 44 | [2] Elgendi, Mohamed, Mirjam Jonkman, and Friso De Boer. "Frequency Bands Effects on QRS Detection." BIOSIGNALS 2003 (2010): 2002. 45 | [3] Tereshchenko, Larisa G., and Mark E. Josephson. "Frequency content and characteristics of ventricular conduction." Journal of electrocardiology 48.6 (2015): 933-937. 46 | [4] https://physionet.org/content/ludb/1.0.0/ 47 | [5] Kalyakulina, Alena, et al. "Lobachevsky University Electrocardiography Database" (version 1.0.0). PhysioNet (2020), https://doi.org/10.13026/qweb-sr17. 48 | [6] Kalyakulina, A.I., Yusipov, I.I., Moskalenko, V.A., Nikolskiy, A.V., Kozlov, A.A., Kosonogov, K.A., Zolotykh, N.Yu., Ivanchenko, M.V.: LU electrocardio-graphy database: a new open-access validation tool for delineation algorithms 49 | [7] Moskalenko, Viktor, Nikolai Zolotykh, and Grigory Osipov. "Deep Learning for ECG Segmentation." International Conference on Neuroinformatics. Springer, Cham, 2019. 50 | """ 51 | 52 | from .cnn import * 53 | from .rnn import * 54 | from .ecg_crnn import * 55 | from .ecg_unet import * 56 | from .ecg_subtract_unet import * 57 | from .ecg_seq_lab_net import * 58 | # from .ati_cnn import * 59 | # from .cpsc import * 60 | 61 | 62 | __all__ = [s for s in dir() if not s.startswith('_')] 63 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/ati_cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | the model of (attention-based) time-incremental CNN 3 | 4 | the cnn layers of this model has a constant kernel size 3, 5 | but keep increasing the number of channels 6 | """ 7 | from copy import deepcopy 8 | 9 | from easydict import EasyDict as ED 10 | 11 | from .cnn import ( 12 | vgg_block_basic, vgg_block_mish, vgg_block_swish, 13 | vgg16, vgg16_leadwise, 14 | resnet_block_stanford, resnet_stanford, 15 | resnet_block_basic, resnet_bottle_neck, 16 | resnet, resnet_leadwise, 17 | ) 18 | 19 | 20 | __all__ = [ 21 | "ATI_CNN_CONFIG", 22 | ] 23 | 24 | 25 | ATI_CNN_CONFIG = ED() 26 | 27 | # cnn part 28 | ATI_CNN_CONFIG.cnn = ED() 29 | ATI_CNN_CONFIG.cnn.name = 'vgg16' 30 | 31 | 32 | if ATI_CNN_CONFIG.cnn.name == 'vgg16': 33 | ATI_CNN_CONFIG.cnn.vgg16 = deepcopy(vgg16) 34 | ATI_CNN_CONFIG.cnn.vgg16.block = deepcopy(vgg_block_basic) 35 | elif ATI_CNN_CONFIG.cnn.name == 'vgg16_mish': 36 | ATI_CNN_CONFIG.cnn.vgg16 = deepcopy(vgg16) 37 | ATI_CNN_CONFIG.cnn.vgg16.block = deepcopy(vgg_block_mish) 38 | elif ATI_CNN_CONFIG.cnn.name == 'vgg16_swish': 39 | ATI_CNN_CONFIG.cnn.vgg16 = deepcopy(vgg16) 40 | ATI_CNN_CONFIG.cnn.vgg16.block = deepcopy(vgg_block_swish) 41 | elif ATI_CNN_CONFIG.cnn.name == 'vgg16_dilation': # not finished 42 | ATI_CNN_CONFIG.cnn.vgg16 = deepcopy(vgg16) 43 | ATI_CNN_CONFIG.cnn.vgg16.block = deepcopy(vgg_block_basic) 44 | elif ATI_CNN_CONFIG.cnn.name == 'resnet': 45 | ATI_CNN_CONFIG.cnn.resnet = deepcopy(resnet) 46 | ATI_CNN_CONFIG.cnn.resnet.block = deepcopy(resnet_block_basic) 47 | elif ATI_CNN_CONFIG.cnn.name == 'resnet_bottleneck': 48 | ATI_CNN_CONFIG.cnn.resnet = deepcopy(resnet) 49 | ATI_CNN_CONFIG.cnn.resnet.block = deepcopy(resnet_bottle_neck) 50 | elif ATI_CNN_CONFIG.cnn.name == 'resnet_stanford': 51 | ATI_CNN_CONFIG.cnn.resnet = deepcopy(resnet_stanford) 52 | ATI_CNN_CONFIG.cnn.resnet.block = deepcopy(resnet_block_stanford) 53 | else: 54 | pass 55 | 56 | 57 | # rnn part 58 | ATI_CNN_CONFIG.rnn = ED() 59 | ATI_CNN_CONFIG.rnn.name = 'lstm' 60 | 61 | if ATI_CNN_CONFIG.rnn.name == 'lstm': 62 | ATI_CNN_CONFIG.rnn.bias = True 63 | ATI_CNN_CONFIG.rnn.dropout = 0.2 64 | ATI_CNN_CONFIG.rnn.bidirectional = True 65 | ATI_CNN_CONFIG.rnn.retseq = False 66 | ATI_CNN_CONFIG.rnn.hidden_sizes = [128,32] 67 | elif ATI_CNN_CONFIG.rnn.name == 'attention': 68 | pass 69 | else: 70 | pass 71 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/cpsc.py: -------------------------------------------------------------------------------- 1 | """ 2 | the best model of CPSC2018 3 | 4 | this model keeps number of channels constantly 12, without raising it at any step; 5 | the basic blocks of this model is a combination of 2 small kernel layer with 1 large kernel layer, 6 | following the pattern of 7 | baby vision --> baby vision --> giant vision 8 | """ 9 | from copy import deepcopy 10 | 11 | from easydict import EasyDict as ED 12 | 13 | from .cnn import ( 14 | cpsc_block_basic, 15 | cpsc_2018, 16 | ) 17 | 18 | 19 | __all__ = [ 20 | "CPSC_CONFIG", 21 | ] 22 | 23 | 24 | CPSC_CONFIG = ED() 25 | 26 | 27 | # cnn part 28 | CPSC_CONFIG.cnn = ED() 29 | CPSC_CONFIG.cnn.name = "cpsc_2018" 30 | 31 | if CPSC_CONFIG.cnn.name == "cpsc_2018": 32 | CPSC_CONFIG.cnn.cpsc_block = deepcopy(cpsc_block_basic) 33 | CPSC_CONFIG.cnn.cpsc = deepcopy(cpsc_2018) 34 | else: 35 | pass 36 | 37 | 38 | CPSC_CONFIG.rnn = ED() 39 | CPSC_CONFIG.rnn.activation = "leaky" 40 | CPSC_CONFIG.rnn.kw_activation = ED(negative_slope=0.2) 41 | CPSC_CONFIG.rnn.dropout = 0.2 42 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/ecg_crnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | configs of models of CRNN structures, for classification 3 | """ 4 | from copy import deepcopy 5 | 6 | from easydict import EasyDict as ED 7 | 8 | from .cnn import ( 9 | vgg_block_basic, vgg_block_mish, vgg_block_swish, 10 | vgg16, vgg16_leadwise, 11 | resnet_block_stanford, resnet_stanford, 12 | resnet_block_basic, resnet_bottle_neck, 13 | resnet, resnet_leadwise, 14 | multi_scopic_block, 15 | multi_scopic, multi_scopic_leadwise, 16 | ) 17 | from .rnn import ( 18 | lstm, 19 | attention, 20 | linear, 21 | ) 22 | 23 | 24 | __all__ = [ 25 | "ECG_CRNN_CONFIG", 26 | ] 27 | 28 | 29 | ECG_CRNN_CONFIG = ED() 30 | 31 | # cnn part 32 | ECG_CRNN_CONFIG.cnn = ED() 33 | ECG_CRNN_CONFIG.cnn.name = 'resnet_leadwise' 34 | 35 | 36 | ECG_CRNN_CONFIG.cnn.vgg16 = deepcopy(vgg16) 37 | ECG_CRNN_CONFIG.cnn.vgg16.block = deepcopy(vgg_block_basic) 38 | ECG_CRNN_CONFIG.cnn.vgg16_mish = deepcopy(vgg16) 39 | ECG_CRNN_CONFIG.cnn.vgg16_mish.block = deepcopy(vgg_block_mish) 40 | ECG_CRNN_CONFIG.cnn.vgg16_swish = deepcopy(vgg16) 41 | ECG_CRNN_CONFIG.cnn.vgg16_swish.block = deepcopy(vgg_block_swish) 42 | ECG_CRNN_CONFIG.cnn.vgg16_leadwise = deepcopy(vgg16_leadwise) 43 | ECG_CRNN_CONFIG.cnn.vgg16_leadwise.block = deepcopy(vgg_block_swish) 44 | # ECG_CRNN_CONFIG.cnn.vgg16_dilation = deepcopy(vgg16) 45 | # ECG_CRNN_CONFIG.cnn.vgg16_dilation.block = deepcopy(vgg_block_basic) 46 | 47 | ECG_CRNN_CONFIG.cnn.resnet = deepcopy(resnet) 48 | ECG_CRNN_CONFIG.cnn.resnet.block = deepcopy(resnet_block_basic) 49 | ECG_CRNN_CONFIG.cnn.resnet_bottleneck = deepcopy(resnet) 50 | ECG_CRNN_CONFIG.cnn.resnet_bottleneck.block = deepcopy(resnet_bottle_neck) 51 | ECG_CRNN_CONFIG.cnn.resnet_leadwise = deepcopy(resnet_leadwise) 52 | ECG_CRNN_CONFIG.cnn.resnet_leadwise.block = deepcopy(resnet_block_basic) 53 | 54 | ECG_CRNN_CONFIG.cnn.resnet_stanford = deepcopy(resnet_stanford) 55 | ECG_CRNN_CONFIG.cnn.resnet_stanford.block = deepcopy(resnet_block_stanford) 56 | 57 | ECG_CRNN_CONFIG.cnn.multi_scopic = deepcopy(multi_scopic) 58 | ECG_CRNN_CONFIG.cnn.multi_scopic.block = deepcopy(multi_scopic_block) 59 | ECG_CRNN_CONFIG.cnn.multi_scopic_leadwise = deepcopy(multi_scopic_leadwise) 60 | ECG_CRNN_CONFIG.cnn.multi_scopic_leadwise.block = deepcopy(multi_scopic_block) 61 | 62 | 63 | # rnn part 64 | ECG_CRNN_CONFIG.rnn = ED() 65 | ECG_CRNN_CONFIG.rnn.name = 'linear' # 'none', 'lstm', 'attention' 66 | 67 | ECG_CRNN_CONFIG.rnn.lstm = deepcopy(lstm) 68 | ECG_CRNN_CONFIG.rnn.attention = deepcopy(attention) 69 | ECG_CRNN_CONFIG.rnn.linear = deepcopy(linear) 70 | 71 | 72 | # global pooling 73 | # currently is fixed using `AdaptiveMaxPool1d` 74 | ECG_CRNN_CONFIG.global_pool = 'max' # 'avg', 'attentive' 75 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/ecg_seq_lab_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | configs of C(R)NN structure models, for ECG wave delineation 3 | """ 4 | from copy import deepcopy 5 | 6 | from easydict import EasyDict as ED 7 | 8 | from .cnn import ( 9 | multi_scopic_block, 10 | multi_scopic, multi_scopic_leadwise, 11 | ) 12 | 13 | 14 | __all__ = [ 15 | "ECG_SEQ_LAB_NET_CONFIG", 16 | ] 17 | 18 | 19 | ECG_SEQ_LAB_NET_CONFIG = ED() 20 | 21 | 22 | ECG_SEQ_LAB_NET_CONFIG.cnn = ED() 23 | ECG_SEQ_LAB_NET_CONFIG.cnn.name = 'multi_scopic' # 'multi_scopic_leadwise 24 | ECG_SEQ_LAB_NET_CONFIG.cnn.multi_scopic = deepcopy(multi_scopic) 25 | ECG_SEQ_LAB_NET_CONFIG.cnn.multi_scopic.block = deepcopy(multi_scopic_block) 26 | ECG_SEQ_LAB_NET_CONFIG.cnn.multi_scopic_leadwise = deepcopy(multi_scopic_leadwise) 27 | ECG_SEQ_LAB_NET_CONFIG.cnn.multi_scopic_leadwise.block = deepcopy(multi_scopic_block) 28 | 29 | 30 | ECG_SEQ_LAB_NET_CONFIG.rnn = ED() 31 | ECG_SEQ_LAB_NET_CONFIG.rnn.name = 'lstm' # 'none' 32 | ECG_SEQ_LAB_NET_CONFIG.rnn.lstm = ED() 33 | ECG_SEQ_LAB_NET_CONFIG.rnn.lstm.hidden_sizes = [256, 256] 34 | ECG_SEQ_LAB_NET_CONFIG.rnn.lstm.bias = True 35 | ECG_SEQ_LAB_NET_CONFIG.rnn.lstm.dropout = 0 36 | ECG_SEQ_LAB_NET_CONFIG.rnn.lstm.bidirectional = True 37 | 38 | 39 | ECG_SEQ_LAB_NET_CONFIG.attn = ED() 40 | ECG_SEQ_LAB_NET_CONFIG.attn.out_channels = [64] # not including the last linear layer 41 | ECG_SEQ_LAB_NET_CONFIG.attn.activation = "relu" 42 | ECG_SEQ_LAB_NET_CONFIG.attn.bias = True 43 | ECG_SEQ_LAB_NET_CONFIG.attn.kernel_initializer = 'he_normal' 44 | ECG_SEQ_LAB_NET_CONFIG.attn.dropouts = [0.2, 0.0] 45 | 46 | 47 | ECG_SEQ_LAB_NET_CONFIG.clf = ED() 48 | ECG_SEQ_LAB_NET_CONFIG.clf.out_channels = [256, 64] # not including the last linear layer 49 | ECG_SEQ_LAB_NET_CONFIG.clf.activation = "mish" 50 | ECG_SEQ_LAB_NET_CONFIG.clf.bias = True 51 | ECG_SEQ_LAB_NET_CONFIG.clf.kernel_initializer = 'he_normal' 52 | ECG_SEQ_LAB_NET_CONFIG.clf.dropouts = [0.2, 0.2, 0.0] 53 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/ecg_subtract_unet.py: -------------------------------------------------------------------------------- 1 | """ 2 | the model of UNET structures 3 | """ 4 | from itertools import repeat 5 | from copy import deepcopy 6 | 7 | from easydict import EasyDict as ED 8 | 9 | 10 | __all__ = [ 11 | "ECG_SUBTRACT_UNET_CONFIG", 12 | ] 13 | 14 | 15 | ECG_SUBTRACT_UNET_CONFIG = ED() 16 | 17 | ECG_SUBTRACT_UNET_CONFIG.groups = 1 18 | ECG_SUBTRACT_UNET_CONFIG.init_batch_norm = False 19 | 20 | 21 | # in triple conv 22 | ECG_SUBTRACT_UNET_CONFIG.init_num_filters = 16 23 | ECG_SUBTRACT_UNET_CONFIG.init_filter_length = 21 24 | ECG_SUBTRACT_UNET_CONFIG.init_dropouts = [0.0, 0.15, 0.0] 25 | ECG_SUBTRACT_UNET_CONFIG.batch_norm = True 26 | ECG_SUBTRACT_UNET_CONFIG.kernel_initializer = "he_normal" 27 | ECG_SUBTRACT_UNET_CONFIG.kw_initializer = {} 28 | ECG_SUBTRACT_UNET_CONFIG.activation = "relu" 29 | ECG_SUBTRACT_UNET_CONFIG.kw_activation = {} 30 | 31 | 32 | _num_convs = 3 # TripleConv 33 | 34 | # down, triple conv 35 | ECG_SUBTRACT_UNET_CONFIG.down_up_block_num = 3 36 | 37 | ECG_SUBTRACT_UNET_CONFIG.down_mode = 'max' 38 | ECG_SUBTRACT_UNET_CONFIG.down_scales = [10, 5, 2] 39 | init_down_num_filters = 24 40 | ECG_SUBTRACT_UNET_CONFIG.down_num_filters = [ 41 | list(repeat(init_down_num_filters * (2**idx), _num_convs)) \ 42 | for idx in range(0, ECG_SUBTRACT_UNET_CONFIG.down_up_block_num-1) 43 | ] 44 | ECG_SUBTRACT_UNET_CONFIG.down_filter_lengths = [ 45 | 11, 5 46 | ] 47 | ECG_SUBTRACT_UNET_CONFIG.down_dropouts = \ 48 | list(repeat([0.0, 0.15, 0.0], ECG_SUBTRACT_UNET_CONFIG.down_up_block_num-1)) 49 | 50 | 51 | # bottom, double conv 52 | ECG_SUBTRACT_UNET_CONFIG.bottom_num_filters = [ 53 | # branch 1 54 | list(repeat(init_down_num_filters*(2**(ECG_SUBTRACT_UNET_CONFIG.down_up_block_num-1)), 2)), 55 | # branch 2 56 | list(repeat(init_down_num_filters*(2**(ECG_SUBTRACT_UNET_CONFIG.down_up_block_num-1)), 2)), 57 | ] 58 | ECG_SUBTRACT_UNET_CONFIG.bottom_filter_lengths = [ 59 | list(repeat(5, 2)), # branch 1 60 | list(repeat(5, 2)), # branch 2 61 | ] 62 | ECG_SUBTRACT_UNET_CONFIG.bottom_dilations = [ 63 | # the ordering matters 64 | list(repeat(1, 2)), # branch 1 65 | list(repeat(10, 2)), # branch 2 66 | ] 67 | ECG_SUBTRACT_UNET_CONFIG.bottom_dropouts = [ 68 | [0.15, 0.0], # branch 1 69 | [0.15, 0.0], # branch 2 70 | ] 71 | 72 | 73 | # up, triple conv 74 | ECG_SUBTRACT_UNET_CONFIG.up_mode = 'nearest' 75 | ECG_SUBTRACT_UNET_CONFIG.up_scales = [2, 5, 10] 76 | ECG_SUBTRACT_UNET_CONFIG.up_num_filters = [ 77 | list(repeat(48, _num_convs)), 78 | list(repeat(24, _num_convs)), 79 | list(repeat(16, _num_convs)), 80 | ] 81 | ECG_SUBTRACT_UNET_CONFIG.up_deconv_filter_lengths = \ 82 | list(repeat(9, ECG_SUBTRACT_UNET_CONFIG.down_up_block_num)) 83 | ECG_SUBTRACT_UNET_CONFIG.up_conv_filter_lengths = [5, 11, 21] 84 | ECG_SUBTRACT_UNET_CONFIG.up_dropouts = [ 85 | [0.15, 0.15, 0.0], 86 | [0.15, 0.15, 0.0], 87 | [0.15, 0.15, 0.0], 88 | ] 89 | 90 | 91 | # out conv 92 | ECG_SUBTRACT_UNET_CONFIG.out_filter_length = 1 93 | 94 | 95 | unet_down_block = ED() 96 | unet_down_block.batch_norm = ECG_SUBTRACT_UNET_CONFIG.batch_norm 97 | unet_down_block.kernel_initializer = ECG_SUBTRACT_UNET_CONFIG.kernel_initializer 98 | unet_down_block.kw_initializer = deepcopy(ECG_SUBTRACT_UNET_CONFIG.kw_initializer) 99 | unet_down_block.activation = ECG_SUBTRACT_UNET_CONFIG.activation 100 | unet_down_block.kw_activation = deepcopy(ECG_SUBTRACT_UNET_CONFIG.kw_activation) 101 | 102 | 103 | unet_up_block = ED() 104 | unet_up_block.batch_norm = ECG_SUBTRACT_UNET_CONFIG.batch_norm 105 | unet_up_block.kernel_initializer = ECG_SUBTRACT_UNET_CONFIG.kernel_initializer 106 | unet_up_block.kw_initializer = deepcopy(ECG_SUBTRACT_UNET_CONFIG.kw_initializer) 107 | unet_up_block.activation = ECG_SUBTRACT_UNET_CONFIG.activation 108 | unet_up_block.kw_activation = deepcopy(ECG_SUBTRACT_UNET_CONFIG.kw_activation) 109 | 110 | 111 | ECG_SUBTRACT_UNET_CONFIG.down_block = deepcopy(unet_down_block) 112 | ECG_SUBTRACT_UNET_CONFIG.up_block = deepcopy(unet_up_block) 113 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/ecg_unet.py: -------------------------------------------------------------------------------- 1 | """ 2 | the model of UNET structures 3 | """ 4 | from itertools import repeat 5 | from copy import deepcopy 6 | 7 | from easydict import EasyDict as ED 8 | 9 | 10 | __all__ = [ 11 | "ECG_UNET_CONFIG", 12 | ] 13 | 14 | 15 | ECG_UNET_CONFIG = ED() 16 | 17 | ECG_UNET_CONFIG.groups = 1 18 | 19 | ECG_UNET_CONFIG.init_num_filters = 4 # keep the same with n_classes 20 | ECG_UNET_CONFIG.init_filter_length = 9 21 | ECG_UNET_CONFIG.out_filter_length = 9 22 | ECG_UNET_CONFIG.batch_norm = True 23 | ECG_UNET_CONFIG.kernel_initializer = "he_normal" 24 | ECG_UNET_CONFIG.kw_initializer = {} 25 | ECG_UNET_CONFIG.activation = "relu" 26 | ECG_UNET_CONFIG.kw_activation = {} 27 | 28 | ECG_UNET_CONFIG.down_up_block_num = 4 29 | 30 | ECG_UNET_CONFIG.down_mode = 'max' 31 | ECG_UNET_CONFIG.down_scales = list(repeat(2, ECG_UNET_CONFIG.down_up_block_num)) 32 | ECG_UNET_CONFIG.down_num_filters = [ 33 | ECG_UNET_CONFIG.init_num_filters * (2**idx) \ 34 | for idx in range(1, ECG_UNET_CONFIG.down_up_block_num+1) 35 | ] 36 | ECG_UNET_CONFIG.down_filter_lengths = \ 37 | list(repeat(ECG_UNET_CONFIG.init_filter_length, ECG_UNET_CONFIG.down_up_block_num)) 38 | 39 | ECG_UNET_CONFIG.up_mode = 'nearest' 40 | ECG_UNET_CONFIG.up_scales = list(repeat(2, ECG_UNET_CONFIG.down_up_block_num)) 41 | ECG_UNET_CONFIG.up_num_filters = [ 42 | ECG_UNET_CONFIG.init_num_filters * (2**idx) \ 43 | for idx in range(ECG_UNET_CONFIG.down_up_block_num-1,-1,-1) 44 | ] 45 | ECG_UNET_CONFIG.up_deconv_filter_lengths = \ 46 | list(repeat(9, ECG_UNET_CONFIG.down_up_block_num)) 47 | ECG_UNET_CONFIG.up_conv_filter_lengths = \ 48 | list(repeat(ECG_UNET_CONFIG.init_filter_length, ECG_UNET_CONFIG.down_up_block_num)) 49 | 50 | 51 | unet_down_block = ED() 52 | unet_down_block.batch_norm = ECG_UNET_CONFIG.batch_norm 53 | unet_down_block.kernel_initializer = ECG_UNET_CONFIG.kernel_initializer 54 | unet_down_block.kw_initializer = deepcopy(ECG_UNET_CONFIG.kw_initializer) 55 | unet_down_block.activation = ECG_UNET_CONFIG.activation 56 | unet_down_block.kw_activation = deepcopy(ECG_UNET_CONFIG.kw_activation) 57 | 58 | 59 | unet_up_block = ED() 60 | unet_up_block.batch_norm = ECG_UNET_CONFIG.batch_norm 61 | unet_up_block.kernel_initializer = ECG_UNET_CONFIG.kernel_initializer 62 | unet_up_block.kw_initializer = deepcopy(ECG_UNET_CONFIG.kw_initializer) 63 | unet_up_block.activation = ECG_UNET_CONFIG.activation 64 | unet_up_block.kw_activation = deepcopy(ECG_UNET_CONFIG.kw_activation) 65 | 66 | 67 | ECG_UNET_CONFIG.down_block = deepcopy(unet_down_block) 68 | ECG_UNET_CONFIG.up_block = deepcopy(unet_up_block) 69 | -------------------------------------------------------------------------------- /official_phase_legacy/model_configs/rnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | the modules that follows CNN feature extractor, 3 | mainly RNN, but can also be attention, and linears with non-linear activations 4 | """ 5 | from copy import deepcopy 6 | 7 | from easydict import EasyDict as ED 8 | 9 | from cfg import ModelCfg 10 | 11 | 12 | __all__ = [ 13 | "lstm", 14 | "attention", 15 | ] 16 | 17 | 18 | lstm = ED() 19 | lstm.bias = True 20 | lstm.dropout = 0.2 21 | lstm.bidirectional = True 22 | lstm.retseq = False 23 | lstm.hidden_sizes = [12*24, 12*6] 24 | 25 | 26 | attention = ED() 27 | # almost the same with lstm, but the last layer is an attention layer 28 | attention.head_num = 12 29 | attention.bias = True 30 | attention.dropout = 0.2 31 | attention.bidirectional = True 32 | attention.hidden_sizes = [12*24, 12*6] 33 | 34 | 35 | # previously, if rnn is set 'none', 36 | # then cnn is followed by only ONE linear layer to make predictions 37 | # split this linear layer into several and adding non-linear activation function 38 | # might be able to let the model learn better classifying hyper-surfaces 39 | linear = ED() 40 | linear.out_channels = [ 41 | 256, 64, 42 | ] 43 | linear.bias = True 44 | linear.dropouts = 0.2 45 | linear.activation = 'mish' 46 | -------------------------------------------------------------------------------- /official_phase_legacy/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Resources: 3 | ---------- 4 | 1. ECG CRNN 5 | 2. special detectors 6 | 3. to add more 7 | 8 | Rules: 9 | ------ 10 | to write 11 | """ 12 | 13 | from .ecg_crnn import ECG_CRNN 14 | from .ecg_unet import ECG_UNET 15 | 16 | 17 | __all__ = [s for s in dir() if not s.startswith('_')] 18 | -------------------------------------------------------------------------------- /official_phase_legacy/models/_experimental/keras/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/official_phase_legacy/models/_experimental/keras/.keep -------------------------------------------------------------------------------- /official_phase_legacy/models/_experimental/pytorch/af_lstm.py: -------------------------------------------------------------------------------- 1 | """ 2 | AF (raw) detection with rr time series using lstm 3 | """ 4 | -------------------------------------------------------------------------------- /official_phase_legacy/models/_experimental/pytorch/ecg_spp_cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | try using spp pooling to extract local and global features at the same time 3 | """ 4 | -------------------------------------------------------------------------------- /official_phase_legacy/models/_experimental/pytorch/ecg_yolo.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3rd place (entry 0436) of CPSC2019 3 | """ 4 | 5 | import sys 6 | from copy import deepcopy 7 | from collections import OrderedDict 8 | from itertools import repeat 9 | from typing import Union, Optional, Sequence, NoReturn 10 | from numbers import Real 11 | 12 | import torch 13 | from torch import nn 14 | from torch import Tensor 15 | import torch.nn.functional as F 16 | from easydict import EasyDict as ED 17 | 18 | from cfg import ModelCfg 19 | from models.utils.torch_utils import ( 20 | Conv_Bn_Activation, 21 | DownSample, ZeroPadding, 22 | ) 23 | from utils.utils_nn import compute_deconv_output_shape 24 | from utils.misc import dict_to_str 25 | 26 | if ModelCfg.torch_dtype.lower() == 'double': 27 | torch.set_default_tensor_type(torch.DoubleTensor) 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /official_phase_legacy/models/legacy/legacy_v01.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | from copy import deepcopy 4 | 5 | from keras import layers 6 | from keras import Input 7 | from keras.models import Sequential, Model, load_model 8 | from keras.layers import ( 9 | LSTM, GRU, 10 | TimeDistributed, Bidirectional, 11 | ReLU, LeakyReLU, 12 | BatchNormalization, 13 | Dense, Dropout, Activation, Flatten, 14 | Input, Reshape, GRU, CuDNNGRU, 15 | Conv1D, 16 | MaxPooling1D, 17 | GlobalAveragePooling1D, AveragePooling1D, 18 | concatenate, 19 | ) 20 | from keras.initializers import he_normal, he_uniform, Orthogonal 21 | from easydict import EasyDict as ED 22 | 23 | from model_configs.cnn import vgg16, vgg_block_basic, vgg_block_mish, vgg_block_swish 24 | 25 | 26 | SEED = 42 27 | 28 | 29 | def get_model(config:dict): 30 | """ 31 | """ 32 | cfg = ED(deepcopy(config)) 33 | 34 | model = Sequential(name='TI_CNN') 35 | 36 | vgg_block_cfg = deepcopy(vgg_block_basic) 37 | 38 | for block_idx, (num_convs, filters) in enumerate(zip(vgg16.num_convs, vgg16.num_filters)): 39 | for idx in range(num_convs): 40 | if block_idx == idx == 0: 41 | model.add( 42 | Conv1D( 43 | input_shape=(cfg.input_len, 12), 44 | filters=filters, 45 | kernel_size=vgg_block_cfg.filter_length, 46 | strides=vgg_block_cfg.subsample_length, 47 | padding='same', 48 | kernel_initializer=he_normal(SEED), 49 | ) 50 | ) 51 | else: 52 | model.add( 53 | Conv1D( 54 | filters=filters, 55 | kernel_size=vgg_block_cfg.filter_length, 56 | strides=vgg_block_cfg.subsample_length, 57 | padding='same', 58 | kernel_initializer=he_normal(SEED), 59 | ) 60 | ) 61 | model.add( 62 | BatchNormalization() 63 | ) 64 | model.add( 65 | ReLU() 66 | ) 67 | model.add( 68 | MaxPooling1D( 69 | pool_size=vgg_block_cfg.pool_size, 70 | strides=vgg_block_cfg.pool_size, 71 | ) 72 | ) 73 | 74 | if cfg.tranches_for_training: 75 | nb_classes = len(cfg.tranche_classes[cfg.tranches_for_training]) 76 | else: 77 | nb_classes = len(cfg.classes) 78 | 79 | for units in [256, 64]: 80 | model.add( 81 | Bidirectional(LSTM( 82 | units, kernel_initializer=Orthogonal(seed=SEED), 83 | return_sequences=True, 84 | )) 85 | ) 86 | model.add( 87 | Bidirectional(LSTM( 88 | nb_classes, kernel_initializer=Orthogonal(seed=SEED), 89 | return_sequences=False, 90 | )) 91 | ) 92 | 93 | model.add( 94 | Dense(nb_classes, activation='sigmoid') 95 | ) 96 | 97 | return model 98 | -------------------------------------------------------------------------------- /official_phase_legacy/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | -------------------------------------------------------------------------------- /official_phase_legacy/models/utils/keras_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import tensorflow as tf 4 | import keras 5 | from keras import layers 6 | from keras import Input 7 | from keras import initializers, regularizers, constraints 8 | from keras.models import Sequential, Model 9 | from keras.layers import ( 10 | Layer, 11 | LSTM, GRU, 12 | TimeDistributed, Bidirectional, 13 | ReLU, LeakyReLU, 14 | BatchNormalization, 15 | Dense, Dropout, Activation, Flatten, 16 | Input, Reshape, GRU, 17 | Conv1D, 18 | MaxPooling1D, 19 | GlobalAveragePooling1D, 20 | concatenate, add, 21 | ) 22 | from keras.initializers import he_normal, he_uniform, Orthogonal 23 | 24 | 25 | __all__ = [ 26 | "AttentionWithContext", 27 | ] 28 | 29 | 30 | def Mish(x:tf.Tensor) -> tf.Tensor: 31 | """ 32 | """ 33 | raise NotImplementedError 34 | 35 | 36 | class AttentionWithContext(Layer): 37 | """ 38 | from 0236 of CPSC2018 challenge 39 | """ 40 | def __init__(self, 41 | W_regularizer=None, u_regularizer=None, b_regularizer=None, 42 | W_constraint=None, u_constraint=None, b_constraint=None, 43 | bias=True, **kwargs): 44 | self.supports_masking = True 45 | self.init = initializers.get('glorot_uniform') 46 | self.W_regularizer = regularizers.get(W_regularizer) 47 | self.u_regularizer = regularizers.get(u_regularizer) 48 | self.b_regularizer = regularizers.get(b_regularizer) 49 | self.W_constraint = constraints.get(W_constraint) 50 | self.u_constraint = constraints.get(u_constraint) 51 | self.b_constraint = constraints.get(b_constraint) 52 | self.bias = bias 53 | super(AttentionWithContext, self).__init__(**kwargs) 54 | 55 | def build(self, input_shape): 56 | assert len(input_shape) == 3 57 | self.W = self.add_weight( 58 | shape=(input_shape[-1], input_shape[-1],), 59 | initializer=self.init, 60 | name='{}_W'.format(self.name), 61 | regularizer=self.W_regularizer, 62 | constraint=self.W_constraint 63 | ) 64 | if self.bias: 65 | self.b = self.add_weight( 66 | shape=(input_shape[-1],), 67 | initializer='zero', 68 | name='{}_b'.format(self.name), 69 | regularizer=self.b_regularizer, 70 | constraint=self.b_constraint 71 | ) 72 | self.u = self.add_weight( 73 | shape=(input_shape[-1],), 74 | initializer=self.init, 75 | name='{}_u'.format(self.name), 76 | regularizer=self.u_regularizer, 77 | constraint=self.u_constraint 78 | ) 79 | super(AttentionWithContext, self).build(input_shape) 80 | 81 | def compute_mask(self, input, input_mask=None): 82 | return None 83 | 84 | def call(self, x, mask=None): 85 | uit = self.dot_product(x, self.W) 86 | if self.bias: 87 | uit += self.b 88 | uit = K.tanh(uit) 89 | ait = self.dot_product(uit, self.u) 90 | a = K.exp(ait) 91 | if mask is not None: 92 | a *= K.cast(mask, K.floatx()) 93 | a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) 94 | a = K.expand_dims(a) 95 | weighted_input = x * a 96 | return K.sum(weighted_input, axis=1) 97 | 98 | def compute_output_shape(self, input_shape): 99 | return input_shape[0], input_shape[-1] 100 | 101 | def dot_product(self, x, kernel): 102 | """ 103 | """ 104 | if K.backend() == 'tensorflow': 105 | return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) 106 | else: 107 | return K.dot(x, kernel) 108 | -------------------------------------------------------------------------------- /official_phase_legacy/train_12ECG_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np, os, sys, joblib 4 | 5 | from scipy.io import loadmat 6 | from sklearn.impute import SimpleImputer 7 | from sklearn.ensemble import RandomForestClassifier 8 | 9 | from get_12ECG_features import get_12ECG_features 10 | from cfg import ModelCfg 11 | from utils import misc 12 | 13 | 14 | def train_12ECG_classifier(input_directory, output_directory): 15 | # Load data. 16 | print('Loading data...') 17 | 18 | header_files = [] 19 | for f in os.listdir(input_directory): 20 | g = os.path.join(input_directory, f) 21 | if not f.lower().startswith('.') and f.lower().endswith('hea') and os.path.isfile(g): 22 | header_files.append(g) 23 | 24 | classes = get_classes(input_directory, header_files) 25 | num_classes = len(classes) 26 | num_files = len(header_files) 27 | recordings = list() 28 | headers = list() 29 | 30 | for i in range(num_files): 31 | recording, header = load_challenge_data(header_files[i]) 32 | recordings.append(recording) 33 | headers.append(header) 34 | 35 | # Train model. 36 | print('Training model...') 37 | 38 | # TODO: replace with functions in train.py 39 | 40 | # Save model. 41 | print('Saving model...') 42 | 43 | final_model={'model':model, 'imputer':imputer,'classes':classes} 44 | 45 | filename = os.path.join(output_directory, 'finalized_model.sav') 46 | joblib.dump(final_model, filename, protocol=0) 47 | 48 | # Load challenge data. 49 | def load_challenge_data(header_file): 50 | if ModelCfg.torch_dtype.lower() == 'double': 51 | dtype = np.float64 52 | else: 53 | dtype = np.float32 54 | with open(header_file, 'r') as f: 55 | header = f.readlines() 56 | mat_file = header_file.replace('.hea', '.mat') 57 | x = loadmat(mat_file) 58 | recording = np.asarray(x['val'], dtype=np.float32) 59 | return recording, header 60 | 61 | # Find unique classes. 62 | def get_classes(input_directory, filenames): 63 | classes = set() 64 | for filename in filenames: 65 | with open(filename, 'r') as f: 66 | for l in f: 67 | if l.startswith('#Dx'): 68 | tmp = l.split(': ')[1].split(',') 69 | for c in tmp: 70 | classes.add(c.strip()) 71 | return sorted(classes) 72 | -------------------------------------------------------------------------------- /official_phase_legacy/train_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os, sys 4 | from train_12ECG_classifier import train_12ECG_classifier 5 | 6 | if __name__ == '__main__': 7 | # Parse arguments. 8 | input_directory = sys.argv[1] 9 | output_directory = sys.argv[2] 10 | 11 | if not os.path.isdir(output_directory): 12 | os.mkdir(output_directory) 13 | 14 | print('Running training code...') 15 | 16 | train_12ECG_classifier(input_directory, output_directory) 17 | 18 | print('Done.') 19 | -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/cfg_seq_lab_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import os 4 | from copy import deepcopy 5 | 6 | from easydict import EasyDict as ED 7 | -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/data_reader.py: -------------------------------------------------------------------------------- 1 | """ 2 | data reader for CPSC2019 3 | """ 4 | import os 5 | import json 6 | from collections import namedtuple 7 | from datetime import datetime 8 | from typing import Union, Optional, Any, List, Tuple, Dict, Sequence, NoReturn 9 | from numbers import Real 10 | 11 | import numpy as np 12 | np.set_printoptions(precision=5, suppress=True) 13 | import pandas as pd 14 | import wfdb 15 | from easydict import EasyDict as ED 16 | 17 | import utils 18 | from utils.misc import ( 19 | get_record_list_recursive, 20 | get_record_list_recursive2, 21 | get_record_list_recursive3, 22 | dict_to_str, 23 | ms2samples, 24 | ECGWaveForm, masks_to_waveforms, 25 | ) 26 | 27 | 28 | __all__ = [ 29 | "CPSC2019Reader", 30 | ] 31 | 32 | 33 | class CPSC2019Reader(object): 34 | """ 35 | """ 36 | def __init__(self, db_dir:str, working_dir:Optional[str]=None, verbose:int=2, **kwargs): 37 | """ 38 | Parameters: 39 | ----------- 40 | db_dir: str, 41 | storage path of the database 42 | working_dir: str, optional, 43 | working directory, to store intermediate files and log file 44 | verbose: int, default 2, 45 | """ 46 | raise NotImplementedError -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import os, sys 4 | import json 5 | from random import shuffle, randint 6 | from copy import deepcopy 7 | from functools import reduce 8 | from typing import Union, Optional, List, Tuple, Dict, Sequence, Set, NoReturn 9 | 10 | import numpy as np 11 | np.set_printoptions(precision=5, suppress=True) 12 | from easydict import EasyDict as ED 13 | from tqdm import tqdm 14 | import torch 15 | from torch.utils.data.dataset import Dataset 16 | from sklearn.preprocessing import StandardScaler 17 | 18 | # torch.set_default_tensor_type(torch.DoubleTensor) 19 | 20 | from .data_reader import CPSC2019Reader as CR -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | -------------------------------------------------------------------------------- /official_phase_legacy/train_seq_lab_net/train_seq_lab_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | -------------------------------------------------------------------------------- /official_phase_legacy/train_unet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/official_phase_legacy/train_unet/__init__.py -------------------------------------------------------------------------------- /official_phase_legacy/train_unet/cfg_unet.py: -------------------------------------------------------------------------------- 1 | """ 2 | References: 3 | ----------- 4 | [1] Moskalenko, Viktor, Nikolai Zolotykh, and Grigory Osipov. "Deep Learning for ECG Segmentation." International Conference on Neuroinformatics. Springer, Cham, 2019. 5 | """ 6 | import os 7 | from copy import deepcopy 8 | 9 | from easydict import EasyDict as ED 10 | 11 | 12 | __all__ = [ 13 | "TrainCfg", 14 | ] 15 | 16 | _BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | 18 | TrainCfg = ED() 19 | 20 | # configs of files 21 | TrainCfg.db_dir = "/media/cfs/wenhao71/data/PhysioNet/ludb/1.0.0/" 22 | TrainCfg.log_dir = os.path.join(_BASE_DIR, 'log') 23 | TrainCfg.checkpoints = os.path.join(_BASE_DIR, "checkpoints") 24 | TrainCfg.keep_checkpoint_max = 20 25 | 26 | TrainCfg.fs = 500 27 | TrainCfg.train_ratio = 0.8 28 | TrainCfg.classes = [ 29 | 'p', # pwave 30 | 'N', # qrs complex 31 | 't', # twave 32 | 'i', # isoelectric 33 | ] 34 | TrainCfg.class_map = ED(p=1, N=2, t=3, i=0) 35 | 36 | TrainCfg.leads_ordering = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6',] 37 | TrainCfg.lead = 'II' # the lead to tain model, None --> all leads 38 | TrainCfg.use_single_lead = True # use single lead as input or use all 12 leads. used only when `TrainCfg.lead` = None 39 | 40 | # as for `start_from` and `end_at`, see ref. [1] section 3.1 41 | TrainCfg.start_from = int(2 * TrainCfg.fs) 42 | TrainCfg.end_at = int(2 * TrainCfg.fs) 43 | TrainCfg.input_len = int(4 * TrainCfg.fs) 44 | 45 | TrainCfg.over_sampling = 2 46 | 47 | # configs of training epochs, batch, etc. 48 | TrainCfg.n_epochs = 300 49 | TrainCfg.batch_size = 128 50 | # TrainCfg.max_batches = 500500 51 | 52 | # configs of optimizers and lr_schedulers 53 | TrainCfg.train_optimizer = "adam" # "sgd", "rmsprop", 54 | 55 | TrainCfg.learning_rate = 0.0001 56 | TrainCfg.lr = TrainCfg.learning_rate 57 | TrainCfg.lr_step_size = 50 58 | TrainCfg.lr_gamma = 0.1 59 | 60 | # configs of loss function 61 | TrainCfg.loss = 'CrossEntropyLoss' 62 | TrainCfg.eval_every = 20 63 | -------------------------------------------------------------------------------- /references/AttentionDeepMIL/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Maximilian Ilse and Jakub Tomczak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /references/AttentionDeepMIL/README.md: -------------------------------------------------------------------------------- 1 | Attention-based Deep Multiple Instance Learning 2 | ================================================ 3 | 4 | by Maximilian Ilse (), Jakub M. Tomczak () and Max Welling 5 | 6 | Overview 7 | -------- 8 | 9 | PyTorch implementation of our paper "Attention-based Deep Multiple Instance Learning": 10 | * Ilse, M., Tomczak, J. M., & Welling, M. (2018). Attention-based Deep Multiple Instance Learning. arXiv preprint arXiv:1802.04712. [link](https://arxiv.org/pdf/1802.04712.pdf). 11 | 12 | 13 | Installation 14 | ------------ 15 | 16 | Installing Pytorch 0.3.1, using pip or conda, should resolve all dependencies. 17 | Tested with Python 2.7, but should work with 3.x as well. 18 | Tested on both CPU and GPU. 19 | 20 | 21 | Content 22 | -------- 23 | 24 | The code can be used to run the MNIST-BAGS experiment, see Section 4.2 and Figure 1 in our [paper](https://arxiv.org/pdf/1802.04712.pdf). 25 | In order to have a small and concise experimental setup, the code has the following limitation: 26 | + Mean bag length parameter shouldn't be much larger than 10, for larger numbers the training dataset will become unbalanced very quickly. You can run the data loader on its own to check, see __main__ part of dataloader.py 27 | + No validation set is used during training, no early stopping 28 | 29 | __NOTE__: In order to run experiments on the histopathology datasets, please download datasets [Breast Cancer](http://bioimage.ucsb.edu/research/bio-segmentation) and [Colon Cancer](https://warwick.ac.uk/fac/sci/dcs/research/tia/data/crchistolabelednucleihe/). In the histopathology experiments we used a similar model to the model in `model.py`, please see the [paper](https://arxiv.org/pdf/1802.04712.pdf) for details. 30 | 31 | 32 | How to Use 33 | ---------- 34 | `dataloader.py`: Generates training and test set by combining multiple MNIST images to bags. A bag is given a positive label if it contains one or more images with the label specified by the variable target_number. 35 | If run as main, it computes the ratio of positive bags as well as the mean, max and min value for the number per instances in a bag. 36 | 37 | `mnist_bags_loader.py`: Added the original data loader we used in the experiments. It can handle any bag length without the dataset becoming unbalanced. It is most probably not the most efficient way to create the bags. Furthermore it is only test for the case that the target number is ‘9’. 38 | 39 | `main.py`: Trains a small CNN with the Adam optimization algorithm. 40 | The training takes 20 epochs. Last, the accuracy and loss of the model on the test set is computed. 41 | In addition, a subset of the bags labels and instance labels are printed. 42 | 43 | `model.py`: The model is a modified LeNet-5, see . 44 | The Attention-based MIL pooling is located before the last layer of the model. 45 | The objective function is the negative log-likelihood of the Bernoulli distribution. 46 | 47 | 48 | Questions and Issues 49 | -------------------- 50 | 51 | If you find any bugs or have any questions about this code please contact Maximilian or Jakub. We cannot guarantee any support for this software. 52 | 53 | Citation 54 | -------------------- 55 | 56 | Please cite our paper if you use this code in your research: 57 | ``` 58 | @article{ITW:2018, 59 | title={Attention-based Deep Multiple Instance Learning}, 60 | author={Ilse, Maximilian and Tomczak, Jakub M and Welling, Max}, 61 | journal={arXiv preprint arXiv:1802.04712}, 62 | year={2018} 63 | } 64 | ``` 65 | 66 | Acknowledgments 67 | -------------------- 68 | 69 | The work conducted by Maximilian Ilse was funded by the Nederlandse Organisatie voor Wetenschappelijk Onderzoek (Grant DLMedIa: Deep Learning for Medical Image Analysis). 70 | 71 | The work conducted by Jakub Tomczak was funded by the European Commission within the Marie Skodowska-Curie Individual Fellowship (Grant No. 702666, ”Deep learning and Bayesian inference for medical imaging”). 72 | -------------------------------------------------------------------------------- /references/AttentionDeepMIL/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/references/AttentionDeepMIL/__init__.py -------------------------------------------------------------------------------- /references/AttentionDeepMIL/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Attention(nn.Module): 7 | def __init__(self): 8 | super(Attention, self).__init__() 9 | self.L = 500 10 | self.D = 128 11 | self.K = 1 12 | 13 | self.feature_extractor_part1 = nn.Sequential( 14 | nn.Conv2d(1, 20, kernel_size=5), 15 | nn.ReLU(), 16 | nn.MaxPool2d(2, stride=2), 17 | nn.Conv2d(20, 50, kernel_size=5), 18 | nn.ReLU(), 19 | nn.MaxPool2d(2, stride=2) 20 | ) 21 | 22 | self.feature_extractor_part2 = nn.Sequential( 23 | nn.Linear(50 * 4 * 4, self.L), 24 | nn.ReLU(), 25 | ) 26 | 27 | self.attention = nn.Sequential( 28 | nn.Linear(self.L, self.D), 29 | nn.Tanh(), 30 | nn.Linear(self.D, self.K) 31 | ) 32 | 33 | self.classifier = nn.Sequential( 34 | nn.Linear(self.L*self.K, 1), 35 | nn.Sigmoid() 36 | ) 37 | 38 | def forward(self, x): 39 | x = x.squeeze(0) 40 | 41 | H = self.feature_extractor_part1(x) 42 | H = H.view(-1, 50 * 4 * 4) 43 | H = self.feature_extractor_part2(H) # NxL 44 | 45 | A = self.attention(H) # NxK 46 | A = torch.transpose(A, 1, 0) # KxN 47 | A = F.softmax(A, dim=1) # softmax over N 48 | 49 | M = torch.mm(A, H) # KxL 50 | 51 | Y_prob = self.classifier(M) 52 | Y_hat = torch.ge(Y_prob, 0.5).float() 53 | 54 | return Y_prob, Y_hat, A 55 | 56 | # AUXILIARY METHODS 57 | def calculate_classification_error(self, X, Y): 58 | Y = Y.float() 59 | _, Y_hat, _ = self.forward(X) 60 | error = 1. - Y_hat.eq(Y).cpu().float().mean().data[0] 61 | 62 | return error, Y_hat 63 | 64 | def calculate_objective(self, X, Y): 65 | Y = Y.float() 66 | Y_prob, _, A = self.forward(X) 67 | Y_prob = torch.clamp(Y_prob, min=1e-5, max=1. - 1e-5) 68 | neg_log_likelihood = -1. * (Y * torch.log(Y_prob) + (1. - Y) * torch.log(1. - Y_prob)) # negative log bernoulli 69 | 70 | return neg_log_likelihood, A 71 | 72 | class GatedAttention(nn.Module): 73 | def __init__(self): 74 | super(GatedAttention, self).__init__() 75 | self.L = 500 76 | self.D = 128 77 | self.K = 1 78 | 79 | self.feature_extractor_part1 = nn.Sequential( 80 | nn.Conv2d(1, 20, kernel_size=5), 81 | nn.ReLU(), 82 | nn.MaxPool2d(2, stride=2), 83 | nn.Conv2d(20, 50, kernel_size=5), 84 | nn.ReLU(), 85 | nn.MaxPool2d(2, stride=2) 86 | ) 87 | 88 | self.feature_extractor_part2 = nn.Sequential( 89 | nn.Linear(50 * 4 * 4, self.L), 90 | nn.ReLU(), 91 | ) 92 | 93 | self.attention_V = nn.Sequential( 94 | nn.Linear(self.L, self.D), 95 | nn.Tanh() 96 | ) 97 | 98 | self.attention_U = nn.Sequential( 99 | nn.Linear(self.L, self.D), 100 | nn.Sigmoid() 101 | ) 102 | 103 | self.attention_weights = nn.Linear(self.D, self.K) 104 | 105 | self.classifier = nn.Sequential( 106 | nn.Linear(self.L*self.K, 1), 107 | nn.Sigmoid() 108 | ) 109 | 110 | def forward(self, x): 111 | x = x.squeeze(0) 112 | 113 | H = self.feature_extractor_part1(x) 114 | H = H.view(-1, 50 * 4 * 4) 115 | H = self.feature_extractor_part2(H) # NxL 116 | 117 | A_V = self.attention_V(H) # NxD 118 | A_U = self.attention_U(H) # NxD 119 | A = self.attention_weights(A_V * A_U) # element wise multiplication # NxK 120 | A = torch.transpose(A, 1, 0) # KxN 121 | A = F.softmax(A, dim=1) # softmax over N 122 | 123 | M = torch.mm(A, H) # KxL 124 | 125 | Y_prob = self.classifier(M) 126 | Y_hat = torch.ge(Y_prob, 0.5).float() 127 | 128 | return Y_prob, Y_hat, A 129 | 130 | # AUXILIARY METHODS 131 | def calculate_classification_error(self, X, Y): 132 | Y = Y.float() 133 | _, Y_hat, _ = self.forward(X) 134 | error = 1. - Y_hat.eq(Y).cpu().float().mean().item() 135 | 136 | return error, Y_hat 137 | 138 | def calculate_objective(self, X, Y): 139 | Y = Y.float() 140 | Y_prob, _, A = self.forward(X) 141 | Y_prob = torch.clamp(Y_prob, min=1e-5, max=1. - 1e-5) 142 | neg_log_likelihood = -1. * (Y * torch.log(Y_prob) + (1. - Y) * torch.log(1. - Y_prob)) # negative log bernoulli 143 | 144 | return neg_log_likelihood, A 145 | -------------------------------------------------------------------------------- /references/CPSC0223/LSTM0922.json: -------------------------------------------------------------------------------- 1 | {"config": [{"config": {"return_state": false, "recurrent_dropout": 0.2, "batch_input_shape": [null, 45, 16], "kernel_regularizer": null, "recurrent_activation": "hard_sigmoid", "kernel_constraint": null, "bias_constraint": null, "return_sequences": false, "units": 64, "implementation": 1, "unroll": false, "name": "lstm_1", "recurrent_constraint": null, "kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "recurrent_initializer": {"config": {"seed": null, "gain": 1.0}, "class_name": "Orthogonal"}, "go_backwards": false, "dtype": "float32", "stateful": false, "activation": "tanh", "activity_regularizer": null, "recurrent_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "trainable": true, "unit_forget_bias": true, "dropout": 0.2, "use_bias": true}, "class_name": "LSTM"}, {"config": {"kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "activation": "relu", "kernel_regularizer": null, "kernel_constraint": null, "bias_constraint": null, "activity_regularizer": null, "units": 64, "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "trainable": true, "name": "dense_1", "use_bias": true}, "class_name": "Dense"}, {"config": {"rate": 0.5, "name": "dropout_1", "trainable": true, "noise_shape": null, "seed": null}, "class_name": "Dropout"}, {"config": {"kernel_initializer": {"config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "activation": "softmax", "kernel_regularizer": null, "kernel_constraint": null, "bias_constraint": null, "activity_regularizer": null, "units": 9, "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "trainable": true, "name": "dense_2", "use_bias": true}, "class_name": "Dense"}], "keras_version": "2.1.5", "backend": "tensorflow", "class_name": "Sequential"} -------------------------------------------------------------------------------- /references/CPSC0223/time_validation.txt: -------------------------------------------------------------------------------- 1 | 175 seconds -------------------------------------------------------------------------------- /references/CPSC0236/CPSC0236-LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/references/CPSC0236/CPSC0236-LICENSE.txt -------------------------------------------------------------------------------- /references/CPSC0236/note.txt: -------------------------------------------------------------------------------- 1 | SYSTEM REQUIREMENTS: 2 | NVIDIA GPU 3 | 4 | addional model and file: 5 | 131 files in the zipped file except note.txt, time_validation.txt, cpsc2018.py 6 | please put these 131 files into the same file path as record (mat. file of ECG) 7 | 8 | package: 9 | CUDA 10 | cuDNN 11 | GPU version of TensorFlow 12 | random 13 | os 14 | argparse 15 | scipy 16 | keras 17 | sklearn 18 | csv 19 | numpy 20 | pandas -------------------------------------------------------------------------------- /references/CPSC0236/time_validation.txt: -------------------------------------------------------------------------------- 1 | 30 minutes -------------------------------------------------------------------------------- /references/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | references, 3 | mainly for model development, 4 | also server as baselines 5 | 6 | [1] Yao, Qihang, et al. "Multi-class Arrhythmia detection from 12-lead varied-length ECG using Attention-based Time-Incremental Convolutional Neural Network." Information Fusion 53 (2020): 174-182. 7 | [2] Hannun, Awni Y., et al. "Cardiologist-level arrhythmia detection and classification in ambulatory electrocardiograms using a deep neural network." Nature medicine 25.1 (2019): 65. 8 | [3] https://stanfordmlgroup.github.io/projects/ecg2/ 9 | [4] https://github.com/awni/ecg 10 | [5] Hong, Shenda, et al. "ENCASE: An ENsemble ClASsifiEr for ECG classification using expert features and deep neural networks." 2017 Computing in cardiology (cinc). IEEE, 2017. 11 | [6] Ilse, Maximilian, Jakub M. Tomczak, and Max Welling. "Attention-based deep multiple instance learning." arXiv preprint arXiv:1802.04712 (2018). 12 | [7] https://github.com/AMLab-Amsterdam/AttentionDeepMIL 13 | [8] CPSC0236 from CPSC2018 challenge 14 | [9] CPSC0223 from CPSC2018 challenge 15 | 16 | to add more... 17 | """ 18 | -------------------------------------------------------------------------------- /references/ati_cnn/const.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | SEED = 42 5 | 6 | freq = 500 7 | cell_len_t = 6 8 | model_input_len = freq * model_input_len 9 | 10 | batch_size = 128 11 | 12 | all_labels = ['N', 'AF', 'I-AVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE'] 13 | nb_classes = len(all_labels) 14 | 15 | nb_leads = 12 16 | -------------------------------------------------------------------------------- /references/ati_cnn/crnn_keras.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import keras 4 | from keras.preprocessing import sequence 5 | from keras.models import Sequential 6 | from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional 7 | -------------------------------------------------------------------------------- /references/ati_cnn/crnn_torch.py: -------------------------------------------------------------------------------- 1 | """ 2 | original source: 3 | https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py 4 | 5 | original license: 6 | 7 | The MIT License (MIT) 8 | 9 | Copyright (c) 2017 Jieru Mei meijieru@gmail.com 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | """ 17 | import numpy as np 18 | import argparse 19 | import torch.nn as nn 20 | import torch.nn.functional as F 21 | 22 | 23 | class BidirectionalLSTM(nn.Module): 24 | 25 | def __init__(self, input_size, hidden_size, output_size): 26 | super(BidirectionalLSTM, self).__init__() 27 | 28 | self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True) 29 | self.embedding = nn.Linear(hidden_size * 2, output_size) 30 | 31 | def forward(self, input): 32 | recurrent, _ = self.rnn(input) 33 | T, b, h = recurrent.size() 34 | t_rec = recurrent.view(T * b, h) 35 | 36 | output = self.embedding(t_rec) # [T * b, nOut] 37 | output = output.view(T, b, -1) 38 | 39 | return output 40 | 41 | 42 | class CRNN(nn.Module): 43 | 44 | def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False): 45 | super(CRNN, self).__init__() 46 | assert imgH % 16 == 0, 'imgH has to be a multiple of 16' 47 | 48 | ks = [3, 3, 3, 3, 3, 3, 2] 49 | ps = [1, 1, 1, 1, 1, 1, 0] 50 | ss = [1, 1, 1, 1, 1, 1, 1] 51 | nm = [64, 128, 256, 256, 512, 512, 512] 52 | 53 | cnn = nn.Sequential() 54 | 55 | def convRelu(i, batchNormalization=False): 56 | nIn = nc if i == 0 else nm[i - 1] 57 | nOut = nm[i] 58 | cnn.add_module('conv{0}'.format(i), 59 | nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) 60 | if batchNormalization: 61 | cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) 62 | if leakyRelu: 63 | cnn.add_module('relu{0}'.format(i), 64 | nn.LeakyReLU(0.2, inplace=True)) 65 | else: 66 | cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) 67 | 68 | convRelu(0) 69 | cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 70 | convRelu(1) 71 | cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 72 | convRelu(2, True) 73 | convRelu(3) 74 | cnn.add_module('pooling{0}'.format(2), 75 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 76 | convRelu(4, True) 77 | convRelu(5) 78 | cnn.add_module('pooling{0}'.format(3), 79 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 80 | convRelu(6, True) # 512x1x16 81 | 82 | self.cnn = cnn 83 | self.rnn = nn.Sequential( 84 | BidirectionalLSTM(512, nh, nh), 85 | BidirectionalLSTM(nh, nh, nclass)) 86 | 87 | def forward(self, input): 88 | # conv features 89 | conv = self.cnn(input) 90 | b, c, h, w = conv.size() 91 | assert h == 1, "the height of conv must be 1" 92 | conv = conv.squeeze(2) 93 | conv = conv.permute(2, 0, 1) # [w, b, c] 94 | 95 | # rnn features 96 | output = self.rnn(conv) 97 | 98 | return output 99 | -------------------------------------------------------------------------------- /references/ati_cnn/train_das.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .ti_cnn_model import get_model as ti_cnn 4 | 5 | 6 | SEED = 42 7 | x = np.load("/mnt/wenhao71/data/cinc2020_data/cpsc_x.npy") 8 | y = np.load("/mnt/wenhao71/data/cinc2020_data/cpsc_y.npy") 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /references/encase/AUTHORS.txt: -------------------------------------------------------------------------------- 1 | Shenda Hong, Peking University 2 | Yuxi Zhou, Peking University 3 | Qingyun Wang, Peking University 4 | Meng Wu, Peking University 5 | Junyuan Shang, Huazhong University of Science and Technology -------------------------------------------------------------------------------- /references/encase/README.md: -------------------------------------------------------------------------------- 1 | Notice: this repo is no more updated. 2 | - Extraction of expert features can be found at https://github.com/hsd1503/ENCASE/tree/master/code/featrues_*.py 3 | - Recent PyTorch implementation of a collection of deep models on 1d signal data can be found at https://github.com/hsd1503/resnet1d 4 | 5 | # ENCASE 6 | 7 | ENCASE combines deep neural networks and expert features together for AF Classification from a short single lead ECG recording. It won the First Place in the PhysioNet/Computing in Cardiology Challenge 2017 (https://physionet.org/challenge/2017), with an overall F1 score of 0.83. The original code can be downloaded from https://physionet.org/challenge/2017/sources/shenda-hong-221.zip 8 | 9 | Detailed description of ENCASE can be found at http://www.cinc.org/archives/2017/pdf/178-245.pdf. 10 | If you find the idea useful or use this code in your own work, please cite our paper as 11 | ``` 12 | @inproceedings{hong2017encase, 13 | author = {Shenda Hong and Meng Wu and Yuxi Zhou and Qingyun Wang and Junyuan Shang and Hongyan Li and Junqing Xie}, 14 | title = {{ENCASE:} an ENsemble ClASsifiEr for {ECG} Classification Using Expert 15 | Features and Deep Neural Networks}, 16 | booktitle = {CinC}, 17 | year = {2017}, 18 | url = {https://doi.org/10.22489/CinC.2017.178-245}, 19 | doi = {10.22489/CinC.2017.178-245} 20 | } 21 | ``` 22 | and 23 | ``` 24 | @article{hong2019combining, 25 | doi = {10.1088/1361-6579/ab15a2}, 26 | url = {https://doi.org/10.1088%2F1361-6579%2Fab15a2}, 27 | year = 2019, 28 | month = {Jun}, 29 | publisher = {{IOP} Publishing}, 30 | volume = {40}, 31 | number = {5}, 32 | pages = {054009}, 33 | author = {Shenda Hong and Yuxi Zhou and Meng Wu and Junyuan Shang and Qingyun Wang and Hongyan Li and Junqing Xie}, 34 | title = {Combining deep neural networks and engineered features for cardiac arrhythmia detection from {ECG} recordings}, 35 | journal = {Physiological Measurement} 36 | } 37 | ``` 38 | 39 | 40 | ## Task Description 41 | 42 | Please refer to the Challenge website https://physionet.org/challenge/2017/#introduction and Challenge description paper http://www.cinc.org/archives/2017/pdf/065-469.pdf. 43 | 44 | ## Dataset 45 | 46 | **Data** Training data can be found at https://archive.physionet.org/challenge/2017/#challenge-data 47 | 48 | **Label** Please use Revised labels (v3) at https://archive.physionet.org/challenge/2017/REFERENCE-v3.csv 49 | 50 | **Preprocessed** Or you can download my preprocessed dataset challenge2017.pkl from https://drive.google.com/drive/folders/1AuPxvGoyUbKcVaFmeyt3xsqj6ucWZezf 51 | -------------------------------------------------------------------------------- /references/encase/answers.txt: -------------------------------------------------------------------------------- 1 | A00001,N 2 | A00002,N 3 | A00003,N 4 | A00004,A 5 | A00005,A 6 | A00006,N 7 | A00007,N 8 | A00008,O 9 | A00009,A 10 | A00010,N 11 | A00011,N 12 | A00012,N 13 | A00013,O 14 | A00014,N 15 | A00015,A 16 | A00016,N 17 | A00017,O 18 | A00018,N 19 | A00019,N 20 | A00020,O 21 | A00021,N 22 | A00022,~ 23 | A00023,O 24 | A00024,O 25 | A00025,N 26 | A00026,N 27 | A00027,A 28 | A00028,N 29 | A00029,O 30 | A00030,O 31 | A00031,N 32 | A00032,N 33 | A00033,N 34 | A00034,~ 35 | A00035,N 36 | A00036,N 37 | A00037,O 38 | A00038,O 39 | A00039,N 40 | A00040,N 41 | A00041,O 42 | A00042,N 43 | A00043,O 44 | A00044,N 45 | A00045,N 46 | A00046,N 47 | A00047,O 48 | A00048,N 49 | A00049,N 50 | A00050,N 51 | A00051,N 52 | A00052,N 53 | A00053,N 54 | A00054,A 55 | A00055,O 56 | A00056,~ 57 | A00057,N 58 | A00058,O 59 | A00059,N 60 | A00060,N 61 | A00061,O 62 | A00062,N 63 | A00063,N 64 | A00064,N 65 | A00065,O 66 | A00066,O 67 | A00067,A 68 | A00068,N 69 | A00069,O 70 | A00070,O 71 | A00071,A 72 | A00072,N 73 | A00073,N 74 | A00074,O 75 | A00075,O 76 | A00076,N 77 | A00077,O 78 | A00078,O 79 | A00079,N 80 | A00080,N 81 | A00081,N 82 | A00082,O 83 | A00083,O 84 | A00084,N 85 | A00085,N 86 | A00086,N 87 | A00087,A 88 | A00088,O 89 | A00089,N 90 | A00090,A 91 | A00091,N 92 | A00092,O 93 | A00093,N 94 | A00094,N 95 | A00095,N 96 | A00096,O 97 | A00097,N 98 | A00098,N 99 | A00099,N 100 | A00100,O 101 | A00101,A 102 | A00102,A 103 | A00103,O 104 | A00104,N 105 | A00105,N 106 | A00106,~ 107 | A00107,A 108 | A00108,O 109 | A00109,N 110 | A00110,O 111 | A00111,N 112 | A00112,N 113 | A00113,N 114 | A00114,O 115 | A00115,O 116 | A00116,N 117 | A00117,N 118 | A00118,N 119 | A00119,O 120 | A00120,O 121 | A00121,O 122 | A00122,N 123 | A00123,O 124 | A00124,N 125 | A00125,~ 126 | A00126,O 127 | A00127,N 128 | A00128,A 129 | A00129,N 130 | A00130,N 131 | A00131,O 132 | A00132,A 133 | A00133,O 134 | A00134,N 135 | A00135,N 136 | A00136,O 137 | A00137,A 138 | A00138,O 139 | A00139,~ 140 | A00140,N 141 | A00141,A 142 | A00142,N 143 | A00143,N 144 | A00144,N 145 | A00145,O 146 | A00146,N 147 | A00147,N 148 | A00148,N 149 | A00149,N 150 | A00150,N 151 | A00151,N 152 | A00152,N 153 | A00153,N 154 | A00154,N 155 | A00155,A 156 | A00156,A 157 | A00157,N 158 | A00158,O 159 | A00159,O 160 | A00160,N 161 | A00161,O 162 | A00162,O 163 | A00163,N 164 | A00164,~ 165 | A00165,N 166 | A00166,N 167 | A00167,N 168 | A00168,N 169 | A00169,N 170 | A00170,O 171 | A00171,N 172 | A00172,N 173 | A00173,N 174 | A00174,N 175 | A00175,N 176 | A00176,O 177 | A00177,N 178 | A00178,N 179 | A00179,N 180 | A00180,N 181 | A00181,O 182 | A00182,N 183 | A00183,N 184 | A00184,N 185 | A00185,N 186 | A00186,O 187 | A00187,O 188 | A00188,N 189 | A00189,O 190 | A00190,N 191 | A00191,N 192 | A00192,N 193 | A00193,N 194 | A00194,N 195 | A00195,O 196 | A00196,~ 197 | A00197,N 198 | A00198,O 199 | A00199,N 200 | A00200,N 201 | A00201,~ 202 | A00202,N 203 | A00203,O 204 | A00204,O 205 | A00205,~ 206 | A00206,N 207 | A00207,N 208 | A00208,A 209 | A00209,O 210 | A00210,N 211 | A00211,O 212 | A00212,O 213 | A00213,O 214 | A00214,N 215 | A00215,O 216 | A00216,A 217 | A00217,A 218 | A00218,O 219 | A00219,N 220 | A00220,O 221 | A00221,N 222 | A00222,N 223 | A00223,N 224 | A00224,N 225 | A00225,A 226 | A00226,N 227 | A00227,N 228 | A00228,N 229 | A00229,N 230 | A00230,N 231 | A00231,A 232 | A00232,N 233 | A00233,N 234 | A00234,N 235 | A00235,N 236 | A00236,N 237 | A00237,N 238 | A00238,N 239 | A00239,N 240 | A00240,N 241 | A00241,N 242 | A00242,N 243 | A00244,N 244 | A00245,N 245 | A00247,A 246 | A00248,N 247 | A00249,N 248 | A00253,A 249 | A00267,A 250 | A00271,A 251 | A00301,A 252 | A00321,A 253 | A00375,A 254 | A00395,A 255 | A00397,A 256 | A00405,A 257 | A00422,A 258 | A00432,A 259 | A00438,A 260 | A00439,A 261 | A00441,A 262 | A00456,A 263 | A00465,A 264 | A00473,A 265 | A00486,A 266 | A00509,A 267 | A00519,A 268 | A00520,A 269 | A00524,~ 270 | A00542,A 271 | A00551,A 272 | A00585,~ 273 | A01006,~ 274 | A01070,~ 275 | A01246,~ 276 | A01299,~ 277 | A01521,~ 278 | A01567,~ 279 | A01707,~ 280 | A01727,~ 281 | A01772,~ 282 | A01833,~ 283 | A02168,~ 284 | A02372,~ 285 | A02772,~ 286 | A02785,~ 287 | A02833,~ 288 | A03549,~ 289 | A03738,~ 290 | A04086,~ 291 | A04137,~ 292 | A04170,~ 293 | A04186,~ 294 | A04216,~ 295 | A04282,~ 296 | A04452,~ 297 | A04522,~ 298 | A04701,~ 299 | A04735,~ 300 | A04805,~ 301 | -------------------------------------------------------------------------------- /references/encase/code/CDL.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Apr 15 00:21:52 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | class CDL(object): 10 | """ 11 | Meng's method 12 | """ 13 | def __init__(self): 14 | pass -------------------------------------------------------------------------------- /references/encase/code/Encase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 16 18:54:12 2017 5 | 6 | @author: shenda 7 | 8 | 9 | class order: ['A', 'N', 'O', '~'] 10 | 11 | """ 12 | 13 | from CDL import CDL 14 | import dill 15 | import numpy as np 16 | 17 | class Encase(object): 18 | def __init__(self, clf_list): 19 | self.clf_list = clf_list 20 | self.n_clf = len(self.clf_list) 21 | self.prob_list = [[] for i in range(self.n_clf)] 22 | self.final_prob = None 23 | self.pred_list = [] 24 | self.labels = ['N', 'A', 'O', '~'] 25 | self.weight = [1/self.n_clf for i in range(self.n_clf)] 26 | 27 | def fit(self, train_data, train_label): 28 | for clf in self.clf_list: 29 | clf.fit(train_data, train_label) 30 | 31 | def predict_prob(self, test_data): 32 | for i in range(self.n_clf): 33 | self.prob_list[i] = self.weight[i] * self.clf_list[i].predict_prob(test_data) 34 | 35 | self.final_prob = np.sum(np.array(self.prob_list), axis=0) 36 | 37 | return self.final_prob 38 | 39 | def predict(self, test_data): 40 | self.final_prob = self.predict_prob(test_data) 41 | self.pred_list = [] 42 | 43 | n_row, _ = self.final_prob.shape 44 | for i in range(n_row): 45 | tmp_pred = self.final_prob[i, :] 46 | self.pred_list.append(self.labels[list(tmp_pred).index(max(tmp_pred))]) 47 | return self.pred_list 48 | 49 | 50 | if __name__ == "__main__": 51 | pass -------------------------------------------------------------------------------- /references/encase/code/OptF.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 17 15:06:35 2017 4 | 5 | @author: v-shon 6 | """ 7 | 8 | import numpy as np 9 | import sklearn 10 | from collections import Counter 11 | 12 | 13 | class OptF(object): 14 | def __init__(self, alpha=0.5, epochs=100): 15 | self.alpha = alpha 16 | self.epochs = epochs 17 | self.theta = None 18 | 19 | def gradTheta(self, theta, train_data, train_label): 20 | """ 21 | Jansche, Martin. EMNLP 2005 22 | "Maximum expected F-measure training of logistic regression models." 23 | 24 | must be normalized first 25 | """ 26 | n_row, n_col = train_data.shape 27 | m = 0.0 28 | A = 0.0 29 | dm = np.zeros([n_col, 1]) 30 | dA = np.zeros([n_col, 1]) 31 | p = np.zeros([n_row, 1]) 32 | 33 | p = 1.0 / (1.0 + np.exp(-np.dot(train_data, theta))) 34 | 35 | m = sum(p) 36 | A = sum(p * train_label) 37 | 38 | dm = np.dot(np.transpose(train_data), p * (1 - p)) 39 | dA = np.dot(np.transpose(train_data), p * (1 - p) * train_label) 40 | 41 | n_pos = sum(train_label) 42 | h = 1 / (self.alpha * n_pos + (1 - self.alpha) * m) 43 | F = h * A 44 | t = F * (1 - self.alpha) 45 | 46 | dF = h * (dA - t * dm) 47 | 48 | return F, dF 49 | 50 | def fit(self, train_data, train_label): 51 | train_feature = sklearn.preprocessing.scale(train_data, axis=0) 52 | n_row, n_col = train_feature.shape 53 | train_feature = np.c_[np.ones([n_row, 1]), train_feature] 54 | train_label = np.expand_dims(np.array(train_label), axis=1) 55 | 56 | self.theta = np.random.rand(n_col+1, 1) 57 | 58 | for epoch in range(self.epochs): 59 | F, dF = self.gradTheta(self.theta, train_feature, train_label) 60 | 61 | self.theta = self.theta + dF 62 | 63 | 64 | def predict_prob(self, test_data): 65 | test_data = np.array(test_data) 66 | if test_data.ndim == 1: 67 | test_data = np.expand_dims(test_data, axis=0) 68 | test_feature = test_data 69 | n_row, n_col = test_feature.shape 70 | test_feature = np.c_[np.ones([n_row, 1]), test_feature] 71 | # print(test_feature) 72 | 73 | z = np.dot(test_feature, self.theta) 74 | gz = 1 / (1 + np.exp(-z)) 75 | 76 | return gz 77 | 78 | def predict(self, test_data): 79 | gz = self.predict_prob(test_data) 80 | out = [] 81 | for prob in gz: 82 | if prob > 0.5: 83 | out.append(1) 84 | else: 85 | out.append(0) 86 | return out 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /references/encase/code/ParSelect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 00:30:06 2017 5 | 6 | @author: shenda 7 | 8 | par selection for xgboost, my own 9 | """ 10 | 11 | import xgboost as xgb 12 | from sklearn.model_selection import RandomizedSearchCV 13 | from sklearn.model_selection import KFold 14 | from sklearn.model_selection import StratifiedKFold 15 | import numpy as np 16 | import pandas as pd 17 | import dill 18 | from scipy import stats 19 | import ReadData 20 | import MyEval 21 | from BasicCLF import MyXGB 22 | 23 | 24 | #if __name__ == "__main__": 25 | def XGBcv(all_pid, all_feature, all_label, 26 | subsample, max_depth, colsample_bytree, min_child_weight): 27 | ''' 28 | TODO: 29 | try kf = StratifiedKFold(n_splits=5, shuffle=True) 30 | ''' 31 | 32 | wrong_stat = [] 33 | 34 | ## k-fold cross validation 35 | all_pid = np.array(all_pid) 36 | all_feature = np.array(all_feature) 37 | all_label = np.array(all_label) 38 | F1_list = [] 39 | kf = StratifiedKFold(n_splits=5, shuffle=True) 40 | for train_index, test_index in kf.split(all_feature, all_label): 41 | train_data = all_feature[train_index] 42 | train_label = all_label[train_index] 43 | test_data = all_feature[test_index] 44 | test_label = all_label[test_index] 45 | 46 | clf = MyXGB(subsample=subsample, 47 | max_depth=max_depth, 48 | colsample_bytree=colsample_bytree, 49 | min_child_weight=min_child_weight) 50 | clf.fit(train_data, train_label) 51 | 52 | pred = clf.predict(test_data) 53 | F1_list.append(MyEval.F1Score3(pred, test_label, False)) 54 | 55 | print('\n\nAvg F1: ', np.mean(F1_list)) 56 | 57 | return np.mean(F1_list) 58 | 59 | 60 | if __name__ == "__main__": 61 | 62 | res = [] 63 | # fout = open('../../reseult/xgbcv.txt', 'w') 64 | 65 | with open('../data/features_all_v2.5.pkl', 'rb') as my_input: 66 | all_pid = dill.load(my_input) 67 | all_feature = dill.load(my_input) 68 | all_label = dill.load(my_input) 69 | 70 | fout = open('../../stat/xgbcv1.txt', 'a') 71 | fout.write('{0},{1},{2},{3},{4}\n'.format('subsample', 'max_depth', 'colsample_bytree', 'min_child_weight', 'f1')) 72 | fout.close() 73 | 74 | # subsample_list = [0.8, 0.85, 0.9] 75 | # max_depth_list = [7, 8, 9, 10, 11] 76 | # colsample_bytree_list = [0.8, 0.85, 0.9] 77 | subsample_list = [0.9] 78 | max_depth_list = [11] 79 | colsample_bytree_list = [0.9] 80 | min_child_weight_list = [2, 3, 4] 81 | 82 | for subsample in subsample_list: 83 | for max_depth in max_depth_list: 84 | for colsample_bytree in colsample_bytree_list: 85 | for min_child_weight in min_child_weight_list: 86 | for i in range(5): 87 | f1 = XGBcv(all_pid, all_feature, all_label, subsample, max_depth, colsample_bytree, min_child_weight) 88 | fout = open('../../stat/xgbcv1.txt', 'a') 89 | fout.write('{0},{1},{2},{3},{4}\n'.format(subsample, max_depth, colsample_bytree, min_child_weight, f1)) 90 | fout.close() 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /references/encase/code/SubmitPrepareModel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 21:34:19 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | from collections import Counter 10 | import numpy as np 11 | import pandas as pd 12 | import MyEval 13 | import ReadData 14 | from LevelTop import LevelTop 15 | import dill 16 | from sklearn.model_selection import KFold 17 | from sklearn.model_selection import StratifiedKFold 18 | from BasicCLF import MyAdaBoost 19 | from BasicCLF import MyRF 20 | from BasicCLF import MyExtraTrees 21 | from BasicCLF import MyGBDT 22 | from BasicCLF import MyXGB 23 | from BasicCLF import MyLR 24 | from CascadeCLF import CascadeCLF 25 | from OptF import OptF 26 | import sklearn 27 | import xgboost 28 | from sklearn.ensemble import AdaBoostClassifier 29 | from sklearn import ensemble 30 | from Encase import Encase 31 | 32 | if __name__ == "__main__": 33 | 34 | with open('../../data2/features_all_v1.3.pkl', 'rb') as my_input: 35 | all_pid = dill.load(my_input) 36 | all_feature = dill.load(my_input) 37 | all_label = dill.load(my_input) 38 | 39 | all_feature = np.array(all_feature) 40 | all_label = np.array(all_label) 41 | 42 | clf_1 = MyLR() 43 | 44 | clf_final = Encase([clf_1]) 45 | clf_final.fit(all_feature, all_label) 46 | 47 | with open('../model/model0423.pkl', 'wb') as my_out: 48 | dill.dump(clf_final, my_out) 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /references/encase/code/TestBasic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 11:08:54 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | from collections import Counter 10 | import numpy as np 11 | import pandas as pd 12 | import MyEval 13 | import ReadData 14 | import dill 15 | from sklearn.model_selection import KFold 16 | from sklearn.model_selection import StratifiedKFold 17 | from BasicCLF import MyAdaBoost 18 | from BasicCLF import MyRF 19 | from BasicCLF import MyExtraTrees 20 | from BasicCLF import MyXGB 21 | from BasicCLF import MyGBDT 22 | from BasicCLF import MyLR 23 | from OptF import OptF 24 | import sklearn 25 | import xgboost 26 | 27 | #def TestBasic(): 28 | if __name__ == "__main__": 29 | 30 | with open('../data/features_all_v2.2.pkl', 'rb') as my_input: 31 | all_pid = dill.load(my_input) 32 | feat_feature = dill.load(my_input) 33 | all_label = dill.load(my_input) 34 | 35 | mean_wave = ReadData.read_mean_wave_simp() 36 | 37 | ## k-fold cross validation 38 | # all_feature = np.array(np._c[mean_wave, feat_feature]) 39 | all_feature = np.array(mean_wave) 40 | all_label = np.array(all_label) 41 | F1_list = [] 42 | kf = StratifiedKFold(n_splits=5, shuffle=True) 43 | i_fold = 1 44 | for train_index, test_index in kf.split(all_feature, all_label): 45 | train_data = all_feature[train_index] 46 | train_label = all_label[train_index] 47 | test_data = all_feature[test_index] 48 | test_label = all_label[test_index] 49 | 50 | clf = MyXGB() 51 | clf.fit(train_data, train_label) 52 | # clf.save_importance() 53 | 54 | pred_train = clf.predict(train_data) 55 | MyEval.F1Score3(pred_train, train_label) 56 | pred = clf.predict(test_data) 57 | MyEval.F1Score3(pred, test_label) 58 | 59 | F1_list.append(MyEval.F1Score3(pred, test_label)) 60 | 61 | print('\n\nAvg F1: ', np.mean(F1_list)) 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /references/encase/code/TestKNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 11:08:54 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | from collections import Counter 10 | import numpy as np 11 | import pandas as pd 12 | import MyEval 13 | import ReadData 14 | import dill 15 | from sklearn.model_selection import StratifiedKFold 16 | from sklearn.model_selection import KFold 17 | from BasicCLF import MyKNN 18 | 19 | 20 | #def gen_model_1(): 21 | def TestKNN(all_pid, all_feature, all_label, fout): 22 | #if __name__ == "__main__": 23 | 24 | # with open('../data/features_all_v2.2.pkl', 'rb') as my_input: 25 | # all_pid = np.array(dill.load(my_input)) 26 | # feat_feature = np.array(dill.load(my_input)) 27 | # all_label = np.array(dill.load(my_input)) 28 | # 29 | ## mean_wave = np.array(ReadData.read_mean_wave()) 30 | # mean_wave = np.array(ReadData.read_mean_wave_simp()) 31 | # all_feature = np.array(np.c_[mean_wave, feat_feature]) 32 | # all_feature = np.array(mean_wave) 33 | 34 | wrong_stat = [] 35 | 36 | clf_final_list = [] 37 | 38 | ## k-fold cross validation 39 | 40 | all_feature = np.array(all_feature) 41 | all_label = np.array(all_label) 42 | all_pid = np.array(all_pid) 43 | F1_list = [] 44 | kf = StratifiedKFold(n_splits=5, shuffle=True) 45 | # kf = KFold(n_splits=5, shuffle=True) 46 | i_fold = 1 47 | print('all feature shape: {0}'.format(len(all_feature))) 48 | for train_index, test_index in kf.split(all_feature, all_label): 49 | # for train_index, test_index in kf.split(all_feature): 50 | train_data = all_feature[train_index] 51 | train_label = all_label[train_index] 52 | test_data = all_feature[test_index] 53 | test_label = all_label[test_index] 54 | test_pid = all_pid[test_index] 55 | 56 | clf_final = MyKNN(n_neighbors=1) 57 | clf_final.fit(train_data, train_label) 58 | 59 | pred = clf_final.predict(test_data) 60 | # pred_train = clf_final.predict(train_data) 61 | # MyEval.F1Score3(pred_train, train_label) 62 | F1_test, re_table = MyEval.F1Score3(pred, test_label, True) 63 | for line in re_table: 64 | for i in line: 65 | fout.write(str(i) + '\t') 66 | fout.write('\n') 67 | fout.write(str(F1_test)+'\n') 68 | F1_list.append(F1_test) 69 | wrong_stat.extend(MyEval.WrongStat(i_fold, pred, test_label, test_pid)) 70 | i_fold += 1 71 | 72 | clf_final_list.append(clf_final) 73 | 74 | avg_f1 = np.mean(F1_list) 75 | print('\n\nAvg F1: ', avg_f1) 76 | fout.write(str(avg_f1)+'=============================\n') 77 | wrong_stat = pd.DataFrame(wrong_stat, columns=['i_fold', 'pid', 'gt', 'pred']) 78 | # wrong_stat.to_csv('../../stat/wrong_stat_f1'+str(np.mean(F1_list))+'.csv') 79 | 80 | 81 | if __name__ == "__main__": 82 | 83 | all_feature = ReadData.read_centerwave('../../data1/centerwave_raw.csv') 84 | all_pid, _, all_label = ReadData.ReadData( '../../data1/QRSinfo.csv' ) 85 | # print(sorted([len(i) for i in all_feature])[:100]) 86 | all_feature = [np.array(i) for i in all_feature] 87 | 88 | # all_pid = all_pid[:5] 89 | # all_label = all_label[:5] 90 | # all_feature = all_feature[:5] 91 | 92 | print('read data done') 93 | fout = open('../../logs/knn', 'w') 94 | for i in range(100): 95 | TestKNN(all_pid, all_feature, all_label, fout) 96 | fout.close() 97 | 98 | -------------------------------------------------------------------------------- /references/encase/code/TestOptF.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 11:07:19 2017 5 | 6 | @author: shenda 7 | """ 8 | from collections import Counter 9 | import numpy as np 10 | import pandas as pd 11 | import MyEval 12 | import ReadData 13 | from LevelTop import LevelTop 14 | import dill 15 | from sklearn.model_selection import KFold 16 | from BasicCLF import RFSimp 17 | from BasicCLF import LRSimp 18 | from BasicCLF import MyXGB 19 | from CascadeCLF import CascadeCLF 20 | from OptF import OptF 21 | import sklearn 22 | import xgboost 23 | 24 | def TestOptF(): 25 | #if __name__ == "__main__": 26 | ''' 27 | result: 28 | 29 | LR: 30 | [[ 915. 97.] 31 | [ 267. 223.]] 32 | 0.834092980857 0.550617283951 33 | 0.692355132404 34 | [[ 970. 94.] 35 | [ 204. 233.]] 36 | 0.866845397676 0.609947643979 37 | 0.738396520828 38 | [[ 932. 102.] 39 | [ 259. 208.]] 40 | 0.837752808989 0.535392535393 41 | 0.686572672191 42 | [[ 884. 62.] 43 | [ 307. 248.]] 44 | 0.827328029949 0.573410404624 45 | 0.700369217286 46 | [[ 919. 75.] 47 | [ 292. 215.]] 48 | 0.833560090703 0.539523212045 49 | 0.686541651374 50 | Avg F1: 0.700847038817 51 | 52 | 53 | OptF: 54 | [[ 721. 291.] 55 | [ 130. 360.]] 56 | 0.774020397209 0.631025416301 57 | 0.702522906755 58 | [[ 794. 270.] 59 | [ 116. 321.]] 60 | 0.804457953394 0.624513618677 61 | 0.714485786036 62 | [[ 709. 325.] 63 | [ 130. 337.]] 64 | 0.757074212493 0.596988485385 65 | 0.677031348939 66 | [[ 719. 227.] 67 | [ 160. 395.]] 68 | 0.787945205479 0.671197960918 69 | 0.729571583199 70 | [[ 719. 275.] 71 | [ 158. 349.]] 72 | 0.768572955639 0.617152961981 73 | 0.69286295881 74 | Avg F1: 0.703294916748 75 | 76 | conclusion: 77 | F1 on O is promoted, but F1 on N is decreased 78 | after avg, no obvious diff 79 | ''' 80 | with open('../../data2/features_all.pkl', 'rb') as my_input: 81 | all_pid = dill.load(my_input) 82 | all_feature = dill.load(my_input) 83 | all_label = dill.load(my_input) 84 | 85 | ### preprocess 86 | all_feature = np.array(all_feature) 87 | selected = [i for i, x in enumerate(all_label) if x == 'N' or x == 'O'] 88 | all_label = np.array(all_label) 89 | all_feature = all_feature[selected] 90 | all_label = all_label[selected] 91 | all_label_num = np.array(ReadData.LabelTo2(all_label, 'O')) 92 | 93 | ## k-fold cross validation 94 | # F1_list = [] 95 | # kf = KFold(n_splits=5) 96 | # for train_index, test_index in kf.split(all_label): 97 | # train_data = all_feature[train_index] 98 | # train_label = all_label[train_index] 99 | # train_label_num = all_label_num[train_index] 100 | # test_data = all_feature[test_index] 101 | # test_label = all_label[test_index] 102 | # test_label_num = all_label_num[test_index] 103 | # 104 | # clf = LRSimp() 105 | # clf.fit(train_data, train_label) 106 | # pred = [] 107 | # n_row, n_col = test_data.shape 108 | # for i in range(n_row): 109 | # pred.extend(clf.predict(list(test_data[i]))) 110 | # # break 111 | # F1_list.append(MyEval.F1Score2(pred, test_label)) 112 | # 113 | # print('\n\nAvg F1: ', np.mean(F1_list)) 114 | 115 | F1_list = [] 116 | kf = KFold(n_splits=5) 117 | for train_index, test_index in kf.split(all_label): 118 | train_data = all_feature[train_index] 119 | train_label = all_label[train_index] 120 | train_label_num = all_label_num[train_index] 121 | test_data = all_feature[test_index] 122 | test_label = all_label[test_index] 123 | test_label_num = all_label_num[test_index] 124 | 125 | test_data = sklearn.preprocessing.scale(test_data, axis=0) 126 | 127 | clf = OptF() 128 | clf.fit(train_data, train_label_num) 129 | pred = [] 130 | n_row, n_col = test_data.shape 131 | for i in range(n_row): 132 | pred_prob = clf.predict_prob(list(test_data[i]))[0] 133 | if pred_prob > 0.5: 134 | pred.append('O') 135 | else: 136 | pred.append('N') 137 | # break 138 | F1_list.append(MyEval.F1Score2(pred, test_label)) 139 | 140 | print('\n\nAvg F1: ', np.mean(F1_list)) 141 | -------------------------------------------------------------------------------- /references/encase/code/challenge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 5 23:00:34 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | from collections import Counter 10 | import numpy as np 11 | import FeatureExtract 12 | import MyEval 13 | import ReadData 14 | import dill 15 | import features_all 16 | import challenge_encase_mimic 17 | 18 | ############## 19 | #### load classifier 20 | ############### 21 | #with open('model/v2.5_xgb5_all.pkl', 'rb') as my_in: 22 | # clf_final = dill.load(my_in) 23 | 24 | ############## 25 | #### read and extract 26 | ############### 27 | short_pid, short_data, short_label = ReadData.ReadData( 'data1/short.csv' ) 28 | long_pid, long_data, long_label = ReadData.ReadData( 'data1/long.csv' ) 29 | QRS_pid, QRS_data, QRS_label = ReadData.ReadData( 'data1/QRSinfo.csv' ) 30 | 31 | 32 | ############# 33 | ### feature 34 | ############# 35 | #all_feature = features_all.GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) 36 | #out_feats = features_mimic.get_mimic_feature(long_data[0]) 37 | 38 | 39 | ############ 40 | ## classifier 41 | ############ 42 | pred = [] 43 | pred = challenge_encase_mimic.pred_one_sample(short_data, long_data, QRS_data, long_pid, short_pid) 44 | 45 | fout= open('answers.txt','a') 46 | fout.write(pred[0]) 47 | fout.write('\n') 48 | fout.close 49 | -------------------------------------------------------------------------------- /references/encase/code/challenge_0825.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 5 23:00:34 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | import numpy as np 10 | import MyEval 11 | import ReadData 12 | import dill 13 | from features_mimic import get_mimic_proba 14 | from features_all import GetAllFeature_test 15 | 16 | def pred_one_sample(short_data, long_data, QRS_data, long_pid, short_pid): 17 | ''' 18 | predict one sample 19 | 20 | input: 21 | short_data: [[7,7], [8,8,8]] 22 | long_data: [[7,7,8,8,8]] 23 | QRS_data: [[2,3]] 24 | long_pid: ['A00001'] 25 | short_pid: ['A00001', 'A00001'] 26 | 27 | output: 28 | label of ['A', 'N', 'O', '~'] 29 | ''' 30 | ### load clf 31 | labels = ['N', 'A', 'O', '~'] 32 | 33 | with open('model/v2.5_xgb5_all_v2.pkl', 'rb') as fin: 34 | clf_ENCASE = dill.load(fin) 35 | 36 | ### extract features 37 | feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) 38 | if feature_ENCASE[0][-1] == 0.0: 39 | feature_ENCASE[0][-1] = 0.00000001 40 | 41 | ### pred 42 | ### alert: encase is naop, lr is anop 43 | pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0] 44 | pred_proba_LR = get_mimic_proba(long_data[0]) 45 | pred_final = 1/2 * pred_proba_ENCASE + 1/2 * pred_proba_LR 46 | print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_LR, pred_final)) 47 | 48 | pred_label = labels[np.argsort(pred_final)[-1]] 49 | 50 | return pred_label 51 | 52 | if __name__ == '__main__': 53 | short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv' ) 54 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 55 | QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data1/QRSinfo.csv' ) 56 | print('='*60) 57 | print('pred begin') 58 | 59 | res = pred_one_sample(short_data[0:40], long_data[0:1], QRS_data[0:1], long_pid[0:1], short_pid[0:40]) 60 | print('pred done, the label of {0} is {1}'.format(long_pid[0], res)) 61 | 62 | # fout= open('answers.txt','a') 63 | # fout.write(res) 64 | # fout.write('\n') 65 | # fout.close 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /references/encase/code/challenge_encase_mimic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 5 23:00:34 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | import numpy as np 10 | import MyEval 11 | import ReadData 12 | import dill 13 | from features_mimic import get_mimic_proba 14 | from features_all import GetAllFeature_test 15 | 16 | def pred_one_sample(short_data, long_data, QRS_data, long_pid, short_pid): 17 | ''' 18 | predict one sample 19 | 20 | input: 21 | short_data: [[7,7], [8,8,8]] 22 | long_data: [[7,7,8,8,8]] 23 | QRS_data: [[2,3]] 24 | long_pid: ['A00001'] 25 | short_pid: ['A00001', 'A00001'] 26 | 27 | output: 28 | label of ['A', 'N', 'O', '~'] 29 | ''' 30 | ### load clf 31 | labels = ['N', 'A', 'O', '~'] 32 | 33 | with open('model/v2.5_xgb5_all_v2.pkl', 'rb') as fin: 34 | clf_ENCASE = dill.load(fin) 35 | 36 | ### extract features 37 | feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) 38 | feature_ENCASE[0][-1]=feature_ENCASE[0][-1]+0.0000001 39 | 40 | ### pred 41 | ### alert: encase is naop, lr is anop 42 | pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0] 43 | pred_proba_mimic = get_mimic_proba(long_data[0]) 44 | pred_final = 1/2 * pred_proba_ENCASE + 1/2 * pred_proba_mimic 45 | print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic, pred_final)) 46 | 47 | pred_label = labels[np.argsort(pred_final)[-1]] 48 | 49 | return pred_label 50 | 51 | if __name__ == '__main__': 52 | short_pid, short_data, short_label = ReadData.ReadData( 'data1/short.csv' ) 53 | long_pid, long_data, long_label = ReadData.ReadData( 'data1/long.csv' ) 54 | QRS_pid, QRS_data, QRS_label = ReadData.ReadData( 'data1/QRSinfo.csv' ) 55 | print('='*60) 56 | print('pred begin') 57 | 58 | 59 | res = pred_one_sample(short_data[0:40], long_data[0:1], QRS_data[0:1], long_pid[0:1], short_pid[0:40]) 60 | print('pred done, the label of {0} is {1}'.format(long_pid[0], res)) 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /references/encase/code/challenge_encase_mimic_offline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 5 23:00:34 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | import numpy as np 10 | import MyEval 11 | import ReadData 12 | import dill 13 | from features_mimic import get_mimic_proba 14 | from features_mimic import get_mimic_proba_all 15 | from features_all import GetAllFeature_test 16 | 17 | def group_label(out_pid, long_pid, preds): 18 | pred_dic = {k: [] for k in long_pid} 19 | final_preds = [] 20 | for i in range(len(out_pid)): 21 | pred_dic[out_pid[i]].append(preds[i]) 22 | for i in long_pid: 23 | if len(pred_dic[i]) > 1: 24 | final_preds.append(np.mean(np.array(pred_dic[i]), axis=0)) 25 | else: 26 | final_preds.append(pred_dic[i][0]) 27 | return np.array(final_preds) 28 | 29 | def pred_one_sample(short_data, long_data, QRS_data, long_pid, short_pid): 30 | ''' 31 | predict one sample 32 | 33 | input: 34 | short_data: [[7,7], [8,8,8]] 35 | long_data: [[7,7,8,8,8]] 36 | QRS_data: [[2,3]] 37 | long_pid: ['A00001'] 38 | short_pid: ['A00001', 'A00001'] 39 | 40 | output: 41 | label of ['A', 'N', 'O', '~'] 42 | ''' 43 | ### load clf 44 | labels = ['N', 'A', 'O', '~'] 45 | 46 | with open('../model/v2.5_xgb5_all_v2.pkl', 'rb') as fin: 47 | clf_ENCASE = dill.load(fin) 48 | 49 | ### extract features 50 | feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) 51 | 52 | ### pred 53 | ### alert: encase is naop, lr is anop 54 | pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0] 55 | pred_proba_mimic = get_mimic_proba(long_data[0]) 56 | pred_final = 1/2 * pred_proba_ENCASE + 1/2 * pred_proba_mimic 57 | print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic, pred_final)) 58 | 59 | pred_label = labels[np.argsort(pred_final)[-1]] 60 | 61 | return pred_label 62 | 63 | if __name__ == '__main__': 64 | short_pid, short_data, short_label = ReadData.ReadData( '../../data_val/short.csv' ) 65 | long_pid, long_data, long_label = ReadData.ReadData( '../../data_val/long.csv' ) 66 | QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data_val/QRSinfo.csv' ) 67 | print('='*60) 68 | print('pred begin') 69 | 70 | # short_data = short_data[:100] 71 | # long_data = long_data[:3] 72 | # QRS_data = QRS_data[:3] 73 | # long_pid = long_pid[:3] 74 | # short_pid = short_pid[:100] 75 | 76 | with open('../model/v2.5_xgb5_all.pkl', 'rb') as fin: 77 | clf_ENCASE = dill.load(fin) 78 | feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) 79 | pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE) 80 | 81 | pred_proba_mimic_all, out_pid = get_mimic_proba_all(long_data, long_pid) 82 | pred_proba_mimic = group_label(out_pid, long_pid, pred_proba_mimic_all) 83 | 84 | pred_final = 1/2 * pred_proba_ENCASE + 1/2 * pred_proba_mimic 85 | labels = ['N', 'A', 'O', '~'] 86 | pred_label = [] 87 | for i in pred_final: 88 | pred_label.append(labels[np.argsort(i)[-1]]) 89 | 90 | fout = open('../answers.txt','w') 91 | for i in range(len(long_pid)): 92 | fout.write('{0},{1}\n'.format(long_pid[i], pred_label[i])) 93 | fout.close() 94 | 95 | MyEval.F1Score3(pred_label, QRS_label) -------------------------------------------------------------------------------- /references/encase/code/dnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jun 29 08:01:35 2017 5 | 6 | @author: shenda 7 | 8 | class order: ['A', 'N', 'O', '~'] 9 | """ 10 | 11 | 12 | 13 | import numpy as np 14 | from matplotlib import pyplot as plt 15 | import ReadData 16 | import tensorflow as tf 17 | from sklearn.metrics import roc_auc_score 18 | from tensorflow.contrib import learn 19 | from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib 20 | 21 | 22 | 23 | def next_batch(my_data, my_label, batch_idx): 24 | start = (batch_idx * batch_size) % my_data.shape[0] 25 | end = ((batch_idx + 1) * batch_size + my_data.shape[0]) % my_data.shape[0] 26 | if end-start != batch_size: 27 | data_in, label_in = next_batch(my_data, my_label, batch_idx+1) 28 | else: 29 | data_in = my_data[start:end] 30 | label_in = my_label[start:end] 31 | 32 | return data_in, label_in 33 | 34 | def truncate_long(ts, my_len): 35 | if len(ts) >= my_len: 36 | return ts[:my_len] 37 | else: 38 | ts += [0] * (my_len - len(ts)) 39 | return ts 40 | 41 | def sample_long(ts, interv): 42 | ts1 = [] 43 | for i in range(len(ts) // interv): 44 | ts1.append(ts[i * interv]) 45 | return ts1 46 | 47 | def read_data(): 48 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 49 | 50 | mat1 = [truncate_long(ts, 9000) for ts in long_data] 51 | mat2 = [truncate_long(ts, 6000) for ts in long_data] 52 | mat3 = [truncate_long(ts, 3000) for ts in long_data] 53 | 54 | mat4 = [sample_long(ts, 10) for ts in mat1] 55 | mat5 = [sample_long(ts, 10) for ts in mat2] 56 | mat6 = [sample_long(ts, 10) for ts in mat3] 57 | 58 | label_onehot = ReadData.Label2OneHot(long_label) 59 | 60 | # plt.plot(mat1[0]) 61 | # plt.plot(mat4[0]) 62 | 63 | mat1 = np.expand_dims(np.array(mat1), axis=2) 64 | label_onehot = np.array(label_onehot) 65 | 66 | return mat1, label_onehot 67 | 68 | batch_size = 100 69 | n_input = 9000 70 | n_classes = 4 71 | epochs = 10 72 | train_data, train_label = read_data() 73 | 74 | #def my_dnn(features, labels, mode): 75 | 76 | with tf.variable_scope('input'): 77 | features = tf.placeholder(tf.float32, [batch_size, n_input, 1]) 78 | labels = tf.placeholder(tf.float32, [batch_size, n_classes]) 79 | 80 | with tf.variable_scope('conv_1'): 81 | conv_1 = tf.layers.conv1d(features, 82 | filters=8, kernel_size=16, strides=2, 83 | activation=tf.nn.relu, 84 | padding='SAME', use_bias=True, reuse=False) 85 | print("conv_1", conv_1.get_shape()) 86 | pool_1 = tf.layers.max_pooling1d(conv_1, pool_size=16, strides=8) 87 | print("pool_1", pool_1.get_shape()) 88 | 89 | with tf.variable_scope('conv_2'): 90 | conv_2 = tf.layers.conv1d(pool_1, 91 | filters=32, kernel_size=8, strides=2, 92 | padding='SAME', use_bias=True, reuse=False) 93 | print("conv_2", conv_2.get_shape()) 94 | pool_2 = tf.layers.max_pooling1d(conv_2, pool_size=8, strides=2) 95 | print("pool_2", pool_2.get_shape()) 96 | 97 | with tf.variable_scope('fc'): 98 | 99 | lstm_cell = tf.contrib.rnn.LSTMCell(32, forget_bias=1.0, state_is_tuple=False) 100 | outputs, states = tf.nn.dynamic_rnn(lstm_cell, pool_2, dtype=tf.float32, time_major = False) 101 | print("outputs", outputs.get_shape()) 102 | print("states", states.get_shape()) 103 | 104 | dense = tf.layers.dense(inputs=states, units=16, activation=tf.nn.relu) 105 | dropout = tf.layers.dropout(inputs=dense, rate=0.4) 106 | print('dropout', dropout.get_shape()) 107 | 108 | logits = tf.layers.dense(inputs=dropout, units=n_classes) 109 | 110 | cost = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels)) 111 | optimizer = tf.train.AdamOptimizer(0.01).minimize(cost) 112 | 113 | pred = tf.argmax(logits, 1) 114 | pred_prob = tf.slice(tf.nn.softmax(logits), [0, 1], [-1, 1]) 115 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) 116 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 117 | 118 | 119 | # Train 120 | init = tf.global_variables_initializer() 121 | with tf.Session() as sess: 122 | sess.run(init) 123 | for epoch in range(epochs): 124 | epoch_loss = 0.0 125 | epoch_loss_cnt = 0.0 126 | epoch_loss_test = 0.0 127 | 128 | for batch_idx in range(int((train_data.shape[0]-1)/batch_size + 1)): 129 | batch_xs, batch_ys = next_batch(train_data, train_label, batch_idx) 130 | loss_v = sess.run(accuracy, feed_dict={features: batch_xs, labels: batch_ys}) 131 | epoch_loss += loss_v 132 | epoch_loss_cnt += 1 133 | 134 | epoch_loss /= epoch_loss_cnt 135 | print ("Epoch #%-5d | Train acc: %-4.3f" % 136 | (epoch, epoch_loss)) 137 | 138 | 139 | -------------------------------------------------------------------------------- /references/encase/code/dnn1_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jun 29 08:01:35 2017 5 | 6 | @author: shenda 7 | 8 | class order: ['A', 'N', 'O', '~'] 9 | """ 10 | 11 | from matplotlib import pyplot as plt 12 | 13 | 14 | if __name__ == "__main__": 15 | 16 | nn = tf.contrib.learn.Estimator(model_dir="../../tmp6") 17 | print(nn.get_variable_names()) 18 | 19 | tmp = nn.get_variable_value('dense/bias') 20 | print(tmp) 21 | tmp = nn.get_variable_value('dense/kernel') 22 | print(tmp) 23 | # 24 | # conv1d_kernel = nn.get_variable_value('conv1d/kernel') 25 | # conv1d_bias = nn.get_variable_value('conv1d/bias') 26 | # 27 | # conv1d_1_kernel = nn.get_variable_value('conv1d_1/kernel') 28 | # conv1d_1_bias = nn.get_variable_value('conv1d_1/bias') 29 | # 30 | # dense_kernel = nn.get_variable_value('dense/kernel') 31 | # dense_bias = nn.get_variable_value('dense/bias') 32 | # 33 | # dense_1_kernel = nn.get_variable_value('dense_1/kernel') 34 | # dense_1_bias = nn.get_variable_value('dense_1/bias') 35 | # 36 | # for i in range(16): 37 | # plt.subplot(16,1,i+1) 38 | # plt.plot(conv1d_kernel[:,:,i]) 39 | 40 | -------------------------------------------------------------------------------- /references/encase/code/dnn_lstm_simp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jun 29 08:01:35 2017 5 | 6 | @author: shenda 7 | 8 | class order: ['A', 'N', 'O', '~'] 9 | """ 10 | 11 | 12 | 13 | import numpy as np 14 | from matplotlib import pyplot as plt 15 | import ReadData 16 | import tensorflow as tf 17 | from sklearn.metrics import roc_auc_score 18 | from tensorflow.contrib import learn 19 | from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib 20 | 21 | 22 | 23 | def next_batch(my_data, my_label, batch_idx): 24 | start = (batch_idx * batch_size) % my_data.shape[0] 25 | end = ((batch_idx + 1) * batch_size + my_data.shape[0]) % my_data.shape[0] 26 | if end-start != batch_size: 27 | data_in, label_in = next_batch(my_data, my_label, batch_idx+1) 28 | else: 29 | data_in = my_data[start:end] 30 | label_in = my_label[start:end] 31 | 32 | return data_in, label_in 33 | 34 | def truncate_long(ts, my_len): 35 | if len(ts) >= my_len: 36 | return ts[:my_len] 37 | else: 38 | ts += [0] * (my_len - len(ts)) 39 | return ts 40 | 41 | def sample_long(ts, interv): 42 | ts1 = [] 43 | for i in range(len(ts) // interv): 44 | ts1.append(ts[i * interv]) 45 | return ts1 46 | 47 | def read_data(): 48 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/centerwave.csv' ) 49 | 50 | mat1 = [truncate_long(ts, 9000) for ts in long_data] 51 | mat2 = [truncate_long(ts, 6000) for ts in long_data] 52 | mat3 = [truncate_long(ts, 3000) for ts in long_data] 53 | 54 | mat4 = [sample_long(ts, 10) for ts in mat1] 55 | mat5 = [sample_long(ts, 10) for ts in mat2] 56 | mat6 = [sample_long(ts, 10) for ts in mat3] 57 | 58 | label_onehot = ReadData.Label2OneHot(long_label) 59 | 60 | # plt.plot(mat1[0]) 61 | # plt.plot(mat4[0]) 62 | 63 | mat1 = np.expand_dims(np.array(mat1), axis=2) 64 | label_onehot = np.array(label_onehot) 65 | 66 | return mat1, label_onehot 67 | 68 | batch_size = 100 69 | n_input = 9000 70 | n_classes = 4 71 | epochs = 10 72 | #train_data, train_label = read_data() 73 | 74 | #def my_dnn(features, labels, mode): 75 | # 76 | #with tf.variable_scope('input'): 77 | # features = tf.placeholder(tf.float32, [batch_size, n_input, 1]) 78 | # labels = tf.placeholder(tf.float32, [batch_size, n_classes]) 79 | # 80 | #with tf.variable_scope('conv_1'): 81 | # conv_1 = tf.layers.conv1d(features, 82 | # filters=8, kernel_size=100, strides=2, 83 | # activation=tf.nn.relu, 84 | # padding='SAME', use_bias=True, reuse=False) 85 | # print("conv_1", conv_1.get_shape()) 86 | # pool_1 = tf.layers.max_pooling1d(conv_1, pool_size=10, strides=2) 87 | # print("pool_1", pool_1.get_shape()) 88 | # 89 | #with tf.variable_scope('conv_2'): 90 | # conv_2 = tf.layers.conv1d(pool_1, 91 | # filters=32, kernel_size=8, strides=2, 92 | # padding='SAME', use_bias=True, reuse=False) 93 | # print("conv_2", conv_2.get_shape()) 94 | # pool_2 = tf.layers.max_pooling1d(conv_2, pool_size=8, strides=2) 95 | # print("pool_2", pool_2.get_shape()) 96 | 97 | with tf.variable_scope('fc'): 98 | 99 | lstm_cell = tf.contrib.rnn.LSTMCell(1, forget_bias=1.0, state_is_tuple=False) 100 | outputs, states = tf.nn.dynamic_rnn(lstm_cell, features, dtype=tf.float32, time_major = False) 101 | print("outputs", outputs.get_shape()) 102 | print("states", states.get_shape()) 103 | 104 | dense = tf.layers.dense(inputs=states, units=16, activation=tf.nn.relu) 105 | dropout = tf.layers.dropout(inputs=dense, rate=0.4) 106 | print('dropout', dropout.get_shape()) 107 | 108 | logits = tf.layers.dense(inputs=dropout, units=n_classes) 109 | 110 | cost = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels)) 111 | optimizer = tf.train.AdamOptimizer(0.01).minimize(cost) 112 | 113 | pred = tf.argmax(logits, 1) 114 | pred_prob = tf.slice(tf.nn.softmax(logits), [0, 1], [-1, 1]) 115 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) 116 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 117 | 118 | 119 | # Train 120 | init = tf.global_variables_initializer() 121 | with tf.Session() as sess: 122 | sess.run(init) 123 | for epoch in range(epochs): 124 | epoch_loss = 0.0 125 | epoch_loss_cnt = 0.0 126 | epoch_loss_test = 0.0 127 | 128 | for batch_idx in range(int((train_data.shape[0]-1)/batch_size + 1)): 129 | batch_xs, batch_ys = next_batch(train_data, train_label, batch_idx) 130 | loss_v = sess.run(accuracy, feed_dict={features: batch_xs, labels: batch_ys}) 131 | epoch_loss += loss_v 132 | epoch_loss_cnt += 1 133 | 134 | epoch_loss /= epoch_loss_cnt 135 | print ("Epoch #%-5d | Train acc: %-4.3f" % 136 | (epoch, epoch_loss)) 137 | 138 | 139 | -------------------------------------------------------------------------------- /references/encase/code/features_deep_centerwave.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | split long seq into small sub_seq, 5 | feed sub_seq to lstm 6 | ''' 7 | 8 | from __future__ import division, print_function, absolute_import 9 | 10 | import tflearn 11 | import tflearn.data_utils as du 12 | 13 | import numpy as np 14 | import ReadData 15 | import tensorflow as tf 16 | import MyEval 17 | import dill 18 | 19 | def read_data(): 20 | X = ReadData.read_centerwave('../../data1/centerwave_resampled.csv') 21 | _, _, Y = ReadData.ReadData( '../../data1/QRSinfo.csv' ) 22 | all_feature = np.array(X) 23 | all_label_num = np.array(ReadData.Label2OneHot(Y)) 24 | print('read data done') 25 | return all_feature, all_label_num 26 | 27 | def get_deep_centerwave_feature(test_data): 28 | 29 | tf.reset_default_graph() 30 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 31 | 32 | _, n_dim = test_data.shape 33 | 34 | ############################### model 35 | net = tflearn.input_data(shape=[None, n_dim, 1]) 36 | print("input", net.get_shape()) 37 | net = tflearn.avg_pool_1d(net, kernel_size=5, strides=5) 38 | print("avg_pool_1d", net.get_shape()) 39 | net = tflearn.conv_1d(net, 64, 16, 2) 40 | print("cov1", net.get_shape()) 41 | net = tflearn.batch_normalization(net) 42 | print("bn1", net.get_shape()) 43 | net = tflearn.activation(net, 'relu') 44 | print("relu1", net.get_shape()) 45 | net = tflearn.residual_bottleneck(net, 2, 16, 64, downsample_strides = 2, downsample=True, is_first_block = True) 46 | print("resn2", net.get_shape()) 47 | net = tflearn.residual_bottleneck(net, 2, 16, 128, downsample_strides = 2, downsample=True) 48 | print("resn4", net.get_shape()) 49 | net = tflearn.residual_bottleneck(net, 2, 16, 256, downsample_strides = 2, downsample=True) 50 | print("resn6", net.get_shape()) 51 | # net = tflearn.residual_bottleneck(net, 2, 16, 512, downsample_strides = 2, downsample=True) 52 | # print("resn8", net.get_shape()) 53 | # net = tflearn.residual_bottleneck(net, 2, 16, 1024, downsample_strides = 2, downsample=True) 54 | # print("resn10", net.get_shape()) 55 | 56 | net = tflearn.batch_normalization(net) 57 | net = tflearn.activation(net, 'relu') 58 | net = tflearn.global_avg_pool(net) 59 | 60 | feature_layer = tflearn.fully_connected(net, 32, activation='sigmoid') 61 | print("feature_layer", feature_layer.get_shape()) 62 | net = feature_layer 63 | net = tflearn.fully_connected(net, 4, activation='softmax') 64 | print("dense", net.get_shape()) 65 | net = tflearn.regression(net, optimizer='adam', 66 | loss='categorical_crossentropy', 67 | learning_rate=0.01) 68 | ############################### 69 | 70 | 71 | 72 | ### load 73 | model = tflearn.DNN(net) 74 | model.load('../model/model_deep_centerwave_0810_all/model_deep_centerwave_resnet') 75 | 76 | ### create new model, and get features 77 | m2 = tflearn.DNN(feature_layer, session=model.session) 78 | out_feature = [] 79 | pred = [] 80 | num_of_test = len(test_data) 81 | for i in range(num_of_test): 82 | tmp_test_data = test_data[i].reshape([-1, n_dim, 1]) 83 | out_feature.append(m2.predict(tmp_test_data)[0]) 84 | # pred.append(model.predict(tmp_test_data)[0]) 85 | 86 | out_feature = np.array(out_feature) 87 | 88 | # ### eval 89 | # print(len(pred), pred[0], all_label[0]) 90 | # MyEval.F1Score3_num(pred, all_label[:num_of_test]) 91 | 92 | return out_feature 93 | 94 | if __name__ == '__main__': 95 | all_data, all_label = read_data() 96 | out_feature = get_deep_centerwave_feature(all_data) 97 | print('out_feature shape: ', out_feature.shape) 98 | # with open('../data/feat_deep_centerwave_resnet.pkl', 'wb') as fout: 99 | # dill.dump(out_feature, fout) 100 | -------------------------------------------------------------------------------- /references/encase/code/mimic_3_build_LR.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | split long seq into small sub_seq, 5 | feed sub_seq to lstm 6 | 7 | !!!!!!!!!!!deprecated 8 | ''' 9 | 10 | 11 | import numpy as np 12 | import ReadData 13 | from BasicCLF import MyLR 14 | import dill 15 | import MyEval 16 | 17 | def read_data_online(): 18 | with open('../data/mimic_data_online_v3.bin', 'rb') as fin: 19 | _ = np.load(fin) 20 | mimic_all_feats = np.load(fin) 21 | all_label = np.load(fin) 22 | print('mimic_all_feats', mimic_all_feats.shape) 23 | return mimic_all_feats, all_label 24 | 25 | def read_data_online_pkl(): 26 | with open('../data/mimic_data_online_v1.pkl', 'rb') as fin: 27 | data_dict = dill.load(fin) 28 | mimic_all_feats = data_dict['mimic_all_feats'] 29 | all_label = data_dict['all_label'] 30 | print('mimic_all_feats', mimic_all_feats.shape) 31 | return mimic_all_feats, all_label 32 | 33 | def read_data_offline(): 34 | with open('../data/mimic_data_online_v1.pkl', 'rb') as fin: 35 | data_dict = dill.load(fin) 36 | mimic_all_feats = data_dict['mimic_all_feats'] 37 | all_label = data_dict['all_label'] 38 | print('mimic_all_feats', mimic_all_feats.shape) 39 | return mimic_all_feats, all_label 40 | 41 | if __name__ == '__main__': 42 | # mimic_all_feats, all_label = read_data_online() 43 | 44 | # clf = MyLR() 45 | # clf.fit(mimic_all_feats, np.array(ReadData.OneHot2Label(all_label))) 46 | # print(clf.clf.coef_) 47 | # pred_train = clf.predict(mimic_all_feats) 48 | # MyEval.F1Score3(pred_train, np.array(ReadData.OneHot2Label(all_label))) 49 | 50 | # with open('../model/mimic/mimic_online_LR_v1.1.pkl', 'wb') as fout: 51 | # dill.dump(clf, fout) 52 | # print('done') 53 | 54 | with open('../model/mimic/mimic_online_LR_v1.1.pkl', 'rb') as fin: 55 | m1 = dill.load(fin) 56 | print(m1.clf.coef_) 57 | with open('../model/mimic/mimic_online_LR.pkl', 'rb') as fin: 58 | m2 = dill.load(fin) 59 | print(m2.clf.coef_) 60 | 61 | 62 | -------------------------------------------------------------------------------- /references/encase/code/minNCCE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | import math 4 | 5 | def ampspaceIndex(x_value,xmin,xmax,phasenums = 4): 6 | ampinterval = (xmax - xmin) / phasenums 7 | index = 1 8 | while xmin < xmax: 9 | if x_value >= xmin and x_value < xmin + ampinterval: 10 | return index 11 | index += 1 12 | xmin += ampinterval 13 | return -1 14 | 15 | def minimumNCCE(line,phasenums = 4): 16 | ''' 17 | input: QSR info(R-R interval series) , the number of amplitude part 18 | method:minimum of the corrected conditional entropy of RR interval sequence 19 | paper: 20 | 1.Measuring regularity by means of a corrected conditional entropy in sympathetic outflow 21 | 2.Assessment of the dynamics of atrial signals and local atrial period 22 | series during atrial fibrillation: effects of isoproterenol 23 | administration(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC529297/) 24 | 25 | :param datas: QSR info from QSRinfo.csv 26 | phasenums: (xmax - xmin) / phasenums 27 | :return: [min_NCCE_value,min_L_Index] 28 | ''' 29 | # normalize 30 | line = np.array(line, np.float) 31 | line = line[1:-1] 32 | # not enough information 33 | if line.size == 0: 34 | return [1, -1] 35 | line = (line - np.mean(line)) / (np.std(line) + 1e-5) 36 | # useful param 37 | N = line.size 38 | xmin = np.min(line) 39 | xmax = np.max(line) 40 | 41 | L = 1 42 | E = defaultdict(float) 43 | CCE = defaultdict(float) 44 | NCCE = defaultdict(float) 45 | while L <= N: 46 | notsingle_c = defaultdict(int) 47 | single_c = 0 48 | for index in range(0, N - L + 1): 49 | index1 = ampspaceIndex(line[index], xmin, xmax, phasenums) 50 | index2 = ampspaceIndex(line[index + L - 1], xmin, xmax, phasenums) 51 | if index1 == index2: 52 | # not single 53 | notsingle_c[index1] += 1 54 | else: 55 | # single 56 | single_c += 1 57 | 58 | notsingle_array = np.array(list(notsingle_c.values()), np.float) 59 | notsingle_value = np.dot(1 / notsingle_array, np.log(notsingle_array)) 60 | 61 | single_value = single_c / N * math.log(N - L + 1) 62 | E[L] = single_value + notsingle_value 63 | EcL = single_c / N * E[1] 64 | CCE[L] = E[L] - E[L - 1] + EcL 65 | NCCE[L] = CCE[L] / (E[1] + 1e-5) 66 | L += 1 67 | CCE_values = np.array(list(CCE.values())) 68 | minCCE = CCE_values.min() 69 | minCCEI = CCE_values.argmin() 70 | NCCE_values = np.array(list(NCCE.values())) 71 | minNCCE = NCCE_values.min() 72 | return [minNCCE, minCCEI] -------------------------------------------------------------------------------- /references/encase/code/model_deep_centerwave.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | split long seq into small sub_seq, 5 | feed sub_seq to lstm 6 | ''' 7 | 8 | from __future__ import division, print_function, absolute_import 9 | 10 | import tflearn 11 | import tflearn.data_utils as du 12 | 13 | import numpy as np 14 | import ReadData 15 | import tensorflow as tf 16 | from tensorflow.contrib import learn 17 | from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib 18 | from sklearn.model_selection import StratifiedKFold 19 | import MyEval 20 | import pickle 21 | from tflearn.layers.recurrent import bidirectional_rnn, BasicLSTMCell 22 | 23 | 24 | tf.logging.set_verbosity(tf.logging.INFO) 25 | 26 | def read_data(): 27 | X = ReadData.read_centerwave('../../data1/centerwave_resampled.csv') 28 | _, _, Y = ReadData.ReadData( '../../data1/QRSinfo.csv' ) 29 | all_feature = np.array(X) 30 | print(all_feature.shape) 31 | all_label = np.array(Y) 32 | all_label_num = np.array(ReadData.Label2OneHot(Y)) 33 | kf = StratifiedKFold(n_splits=5, shuffle=True) 34 | i_fold = 1 35 | print('all feature shape: {0}'.format(all_feature.shape)) 36 | for train_index, test_index in kf.split(all_feature, all_label): 37 | train_data = all_feature[train_index] 38 | train_label = all_label_num[train_index] 39 | test_data = all_feature[test_index] 40 | test_label = all_label_num[test_index] 41 | print('read data done') 42 | return all_feature, all_label_num, train_data, train_label, test_data, test_label 43 | 44 | 45 | 46 | tf.reset_default_graph() 47 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 48 | 49 | ################### read data 50 | all_data, all_label, X, Y, valX, valY = read_data() 51 | print(X.shape, valX.shape) 52 | print(Y.shape, valY.shape) 53 | _, n_dim = X.shape 54 | all_data = all_data.reshape([-1, n_dim, 1]) 55 | X = X.reshape([-1, n_dim, 1]) 56 | valX = valX.reshape([-1, n_dim, 1]) 57 | 58 | ################### model 59 | 60 | ### input 61 | net = tflearn.input_data(shape=[None, n_dim, 1], name='input') 62 | print("input", net.get_shape()) 63 | 64 | ### conv 65 | net = tflearn.conv_1d(net, 64, 16, 2, regularizer='L2', weight_decay=0.0005, bias=True, 66 | weights_init='variance_scaling', bias_init='zeros') 67 | print("cov1", net.get_shape()) 68 | net = tflearn.batch_normalization(net) 69 | print("bn1", net.get_shape()) 70 | net = tflearn.activation(net, 'relu') 71 | print("relu1", net.get_shape()) 72 | 73 | 74 | ### lstm 75 | net = bidirectional_rnn(net, BasicLSTMCell(64), BasicLSTMCell(64)) 76 | print("lstm", net.get_shape()) 77 | feature_layer = tflearn.fully_connected(net, 32, activation='sigmoid', name='dense_1') 78 | net = feature_layer 79 | print("feature_layer", net.get_shape()) 80 | net = tflearn.fully_connected(feature_layer, 4, activation='softmax', name='output') 81 | print("dense", net.get_shape()) 82 | net = tflearn.regression(net, optimizer='adam', 83 | loss='categorical_crossentropy', 84 | learning_rate=0.1) 85 | 86 | 87 | ### Training 88 | run_id = 'deep_centerwave_v1' 89 | model = tflearn.DNN(net, checkpoint_path='../../models/model_deep_centerwave', 90 | max_checkpoints=10, tensorboard_verbose=0) 91 | 92 | model.fit(X, Y, n_epoch=10, validation_set=(valX, valY), 93 | show_metric=True, batch_size=128, run_id=run_id, snapshot_step=1000, 94 | snapshot_epoch=False) 95 | 96 | # save model 97 | model.save('../model/model_deep_centerwave/' + run_id) 98 | print('model save done') -------------------------------------------------------------------------------- /references/encase/code/normalize_data.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | 5 | def normalize_data(data): 6 | """ 7 | Normalize such that the mean of the input is 0 and the sample variance is 1 8 | 9 | :param data: The data set, expressed as a flat list of floats. 10 | :type data: list 11 | 12 | :return: The normalized data set, as a flat list of floats. 13 | :rtype: list 14 | """ 15 | 16 | mean = np.mean(data) 17 | var = 0 18 | 19 | for _ in data: 20 | data[data.index(_)] = _ - mean 21 | 22 | for _ in data: 23 | var += math.pow(_, 2) 24 | 25 | var = math.sqrt(var / float(len(data))) 26 | 27 | for _ in data: 28 | data[data.index(_)] = _ / var 29 | 30 | return data 31 | -------------------------------------------------------------------------------- /references/encase/code/plot_ecg.m: -------------------------------------------------------------------------------- 1 | 2 | fin = fopen('../../data/REFERENCE.csv'); 3 | str=fgetl(fin); 4 | cnt = 0; 5 | 6 | while ischar(str) 7 | cnt = cnt + 1; 8 | line=textscan(str,'%s'); 9 | tmp = strsplit(line{1}{1}, ','); 10 | pid = tmp{1}; 11 | label = tmp{2}; 12 | 13 | if cnt < 6352 14 | str=fgetl(fin); 15 | continue; 16 | end 17 | 18 | disp(pid); 19 | [tm,ecg,fs,siginfo]=rdmat(strcat('../../training2017/', pid)); 20 | [QRS,sign,en_thres] = qrs_detect2(ecg',0.25,0.6,fs); 21 | 22 | fig = figure(); 23 | fig.PaperPosition = [0 0 30 9]; 24 | set(fig, 'Visible', 'off'); 25 | 26 | plot(ecg); 27 | max_num = max(ecg); 28 | min_num = min(ecg); 29 | hold on; 30 | for i = 1:length(QRS) 31 | plot([QRS(i) QRS(i)], [min_num max_num], 'Color', [1 0.5 0.5], 'LineStyle', ':'); 32 | end 33 | 34 | my_title = strcat(pid,'\_', label, '\_', num2str(length(QRS)+1)); 35 | my_path = strcat('../../img/img1/', pid,'_', label, '_', num2str(length(QRS)+1)); 36 | title(my_title); 37 | saveas(fig, my_path, 'png'); 38 | 39 | str=fgetl(fin); 40 | 41 | % break; 42 | end 43 | 44 | fclose(fin); 45 | 46 | -------------------------------------------------------------------------------- /references/encase/code/preprocess.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%% 2 | % step 1: read raw data 3 | % step 2: split qrs 4 | % step 3: write 5 | % 6 | % data format sample: 7 | % A000001,N,1,1,1,1,1 8 | %%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | 11 | fin = fopen('../../REFERENCE.csv'); 12 | str=fgetl(fin); 13 | fout1 = fopen('../../data1/short.csv','w'); 14 | fout2 = fopen('../../data1/long.csv','w'); 15 | fout3 = fopen('../../data1/QRSinfo.csv','w'); 16 | 17 | n_iter = 10; 18 | ratio = 0.68; 19 | 20 | while ischar(str) 21 | line=textscan(str,'%s'); 22 | tmp = strsplit(line{1}{1}, ','); 23 | pid = tmp{1}; 24 | label = tmp{2}; 25 | 26 | disp(pid); 27 | [tm,ecg,fs,siginfo]=rdmat(strcat('../../training2017/', pid)); 28 | [QRS,sign,en_thres] = qrs_detect2(ecg',0.25,0.6,fs); 29 | QRS_info = diff([0 QRS length(ecg)]); 30 | 31 | THRES = 0.6; 32 | iter = 0; 33 | while max(QRS_info) > fs*2 34 | iter = iter + 1; 35 | if iter >= n_iter 36 | break 37 | end 38 | THRES = ratio * THRES; 39 | [QRS,sign,en_thres] = qrs_detect2(ecg',0.25,THRES,fs); 40 | QRS_info = diff([0 QRS length(ecg)]); 41 | end 42 | if max(QRS_info) > length(ecg)*0.9 43 | [QRS,sign,en_thres] = qrs_detect2(ecg'*2,0.25,0.6,fs); 44 | QRS_info = diff([0 QRS length(ecg)]); 45 | end 46 | 47 | 48 | 49 | %%% write long 50 | tmp_len = length(ecg); 51 | fprintf(fout2, '%s,', pid); 52 | fprintf(fout2, '%s,', label); 53 | fprintf(fout2, '%f,',ecg(1:tmp_len-1)); 54 | fprintf(fout2, '%f\n',ecg(tmp_len)); 55 | 56 | %%% write short 57 | for i = 1:(length(QRS)-1) 58 | %%% +1 to avoid overlap 59 | segment = ecg(QRS(i)+1:QRS(i+1)); 60 | tmp_len = length(segment); 61 | fprintf(fout1, '%s,', pid); 62 | fprintf(fout1, '%s,', label); 63 | fprintf(fout1, '%f,',segment(1:tmp_len-1)); 64 | fprintf(fout1, '%f\n',segment(tmp_len)); 65 | end 66 | 67 | %%% write qrs info 68 | % add 0 and length to head and tail, diff to get length of each 69 | % segment, notice that the first and the last is not accurate 70 | 71 | tmp_len = length(QRS_info); 72 | fprintf(fout3, '%s,', pid); 73 | fprintf(fout3, '%s,', label); 74 | if tmp_len < 2 75 | %%% if QRS only have one split 76 | fprintf(fout3, '%f\n',QRS_info(tmp_len)); 77 | else 78 | fprintf(fout3, '%f,',QRS_info(1:tmp_len-1)); 79 | fprintf(fout3, '%f\n',QRS_info(tmp_len)); 80 | end 81 | 82 | str=fgetl(fin); 83 | 84 | % break; 85 | end 86 | 87 | fclose(fin); 88 | fclose(fout1); 89 | fclose(fout2); 90 | fclose(fout3); 91 | 92 | -------------------------------------------------------------------------------- /references/encase/code/preprocess_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jul 20 21:07:18 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | import ReadData 10 | from collections import Counter 11 | import pickle 12 | import hickle 13 | import numpy as np 14 | from sklearn.model_selection import StratifiedKFold 15 | 16 | 17 | def slide_and_cut(tmp_data, tmp_label, tmp_pid): 18 | ''' 19 | slide to get more samples from long data 20 | 21 | Counter({'N': 5050, 'O': 2456, 'A': 738, '~': 284}) 22 | ''' 23 | 24 | out_pid = [] 25 | out_data = [] 26 | out_label = [] 27 | 28 | window_size = 6000 29 | 30 | cnter = {'N': 0, 'O': 0, 'A': 0, '~': 0} 31 | for i in range(len(tmp_data)): 32 | cnter[tmp_label[i]] += len(tmp_data[i]) 33 | 34 | stride_N = 500 35 | stride_O = int(stride_N // (cnter['N'] / cnter['O'])) 36 | stride_A = int(stride_N // (cnter['N'] / cnter['A'])) 37 | stride_P = int(0.85 * stride_N // (cnter['N'] / cnter['~'])) 38 | 39 | stride = {'N': stride_N, 'O': stride_O, 'A': stride_A, '~': stride_P} 40 | print(stride) 41 | 42 | for i in range(len(tmp_data)): 43 | if i % 1000 == 0: 44 | print(i) 45 | tmp_stride = stride[tmp_label[i]] 46 | tmp_ts = tmp_data[i] 47 | for j in range(0, len(tmp_ts)-window_size, tmp_stride): 48 | out_pid.append(tmp_pid[i]) 49 | out_data.append(tmp_ts[j:j+window_size]) 50 | out_label.append(tmp_label[i]) 51 | 52 | print(Counter(out_label)) 53 | 54 | idx = np.array(list(range(len(out_label)))) 55 | out_label = ReadData.Label2OneHot(out_label) 56 | out_data = np.expand_dims(np.array(out_data, dtype=np.float32), axis=2) 57 | out_label = np.array(out_label, dtype=np.float32) 58 | out_pid = np.array(out_pid, dtype=np.string_) 59 | 60 | idx_shuffle = np.random.permutation(idx) 61 | out_data = out_data[idx_shuffle] 62 | out_label = out_label[idx_shuffle] 63 | out_pid = out_pid[idx_shuffle] 64 | 65 | return out_data, out_label, out_pid 66 | 67 | def expand_three_part(): 68 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 69 | 70 | kf = StratifiedKFold(n_splits=5, shuffle=True) 71 | for train_index, other_index in kf.split(np.array(long_data), np.array(long_label)): 72 | train_data = np.array(long_data)[train_index] 73 | train_label = np.array(long_label)[train_index] 74 | train_pid = np.array(long_pid)[train_index] 75 | other_data = np.array(long_data)[other_index] 76 | other_label = np.array(long_label)[other_index] 77 | other_pid = np.array(long_pid)[other_index] 78 | 79 | kf_1 = StratifiedKFold(n_splits=2, shuffle=True) 80 | for val_index, test_index in kf_1.split(np.array(other_data), np.array(other_label)): 81 | val_data = np.array(other_data)[val_index] 82 | val_label = np.array(other_label)[val_index] 83 | val_pid = np.array(other_pid)[val_index] 84 | test_data = np.array(other_data)[test_index] 85 | test_label = np.array(other_label)[test_index] 86 | test_pid = np.array(other_pid)[test_index] 87 | 88 | break 89 | break 90 | 91 | train_data_out, train_label_out, train_data_pid_out = slide_and_cut( 92 | list(train_data), list(train_label), list(train_pid)) 93 | val_data_out, val_label_out, val_data_pid_out = slide_and_cut( 94 | list(val_data), list(val_label), list(val_pid)) 95 | test_data_out, test_label_out, test_data_pid_out = slide_and_cut( 96 | list(test_data), list(test_label), list(test_pid)) 97 | 98 | print(len(set(list(train_pid)) & set(list(val_pid)) & set(list(test_pid))) == 0) 99 | 100 | # with open('../../data1/expanded_three_part_window_6000_stride_500_6.pkl', 'wb') as fout: 101 | # pickle.dump(train_data_out, fout) 102 | # pickle.dump(train_label_out, fout) 103 | # pickle.dump(val_data_out, fout) 104 | # pickle.dump(val_label_out, fout) 105 | # pickle.dump(test_data_out, fout) 106 | # pickle.dump(test_label_out, fout) 107 | # pickle.dump(test_data_pid_out, fout) 108 | 109 | ### use np.save to save larger than 4 GB data 110 | fout = open('../../data1/expanded_three_part_window_6000_stride_299.bin', 'wb') 111 | np.save(fout, train_data_out) 112 | np.save(fout, train_label_out) 113 | np.save(fout, val_data_out) 114 | np.save(fout, val_label_out) 115 | np.save(fout, test_data_out) 116 | np.save(fout, test_label_out) 117 | np.save(fout, test_data_pid_out) 118 | fout.close() 119 | print('save done') 120 | 121 | def expand_all(): 122 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 123 | data_out, label_out, pid_out = slide_and_cut(long_data, long_label, long_pid) 124 | 125 | ### use np.save to save larger than 4 GB data 126 | fout = open('../../data1/expanded_all_window_6000_stride_500.bin', 'wb') 127 | np.save(fout, data_out) 128 | np.save(fout, label_out) 129 | fout.close() 130 | print('save done') 131 | 132 | 133 | 134 | if __name__ == "__main__": 135 | expand_all() 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /references/encase/code/preprocess_slide.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%% 2 | % step 1: read raw data 3 | % step 2: split qrs 4 | % step 3: write 5 | % 6 | % data format sample: 7 | % A000001,N,1,1,1,1,1 8 | %%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | 11 | fin = fopen('../../REFERENCE.csv'); 12 | str=fgetl(fin); 13 | fout1 = fopen('../../data1/short.csv','w'); 14 | fout2 = fopen('../../data1/long.csv','w'); 15 | fout3 = fopen('../../data1/QRSinfo.csv','w'); 16 | 17 | n_iter = 10; 18 | ratio = 0.68; 19 | window_size = 3000; 20 | stride = 1000; 21 | 22 | while ischar(str) 23 | line=textscan(str,'%s'); 24 | tmp = strsplit(line{1}{1}, ','); 25 | pid = tmp{1}; 26 | label = tmp{2}; 27 | 28 | 29 | 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | 32 | [tm,ecg,fs,siginfo]=rdmat(strcat('../../training2017/', pid)); 33 | 34 | ecg_start = 1; 35 | ecg_end = ecg_start+window_size-1; 36 | step = 1; 37 | 38 | while ecg_end <= length(ecg) 39 | tmp_pid = strcat(pid, '_', num2str(step)); 40 | tmp_ecg = ecg(ecg_start:ecg_end); 41 | 42 | fprintf('%s %d %d.\n', pid, ecg_start, ecg_end); 43 | 44 | [QRS,sign,en_thres] = qrs_detect2(tmp_ecg',0.25,0.6,fs); 45 | QRS_info = diff([0 QRS length(tmp_ecg)]); 46 | 47 | THRES = 0.6; 48 | iter = 0; 49 | while max(QRS_info) > fs*2 50 | iter = iter + 1; 51 | if iter >= n_iter 52 | break 53 | end 54 | THRES = ratio * THRES; 55 | [QRS,sign,en_thres] = qrs_detect2(tmp_ecg',0.25,THRES,fs); 56 | QRS_info = diff([0 QRS length(tmp_ecg)]); 57 | end 58 | if max(QRS_info) > length(tmp_ecg)*0.9 59 | [QRS,sign,en_thres] = qrs_detect2(tmp_ecg'*2,0.25,0.6,fs); 60 | QRS_info = diff([0 QRS length(tmp_ecg)]); 61 | end 62 | 63 | 64 | 65 | %%% write long 66 | tmp_len = length(tmp_ecg); 67 | fprintf(fout2, '%s,', tmp_pid); 68 | fprintf(fout2, '%s,', label); 69 | fprintf(fout2, '%f,',tmp_ecg(1:tmp_len-1)); 70 | fprintf(fout2, '%f\n',tmp_ecg(tmp_len)); 71 | 72 | %%% write short 73 | for i = 1:(length(QRS)-1) 74 | %%% +1 to avoid overlap 75 | segment = tmp_ecg(QRS(i)+1:QRS(i+1)); 76 | tmp_len = length(segment); 77 | fprintf(fout1, '%s,', tmp_pid); 78 | fprintf(fout1, '%s,', label); 79 | fprintf(fout1, '%f,',segment(1:tmp_len-1)); 80 | fprintf(fout1, '%f\n',segment(tmp_len)); 81 | end 82 | 83 | %%% write qrs info 84 | % add 0 and length to head and tail, diff to get length of each 85 | % segment, notice that the first and the last is not accurate 86 | 87 | tmp_len = length(QRS_info); 88 | fprintf(fout3, '%s,', tmp_pid); 89 | fprintf(fout3, '%s,', label); 90 | if tmp_len < 2 91 | %%% if QRS only have one split 92 | fprintf(fout3, '%f\n',QRS_info(tmp_len)); 93 | else 94 | fprintf(fout3, '%f,',QRS_info(1:tmp_len-1)); 95 | fprintf(fout3, '%f\n',QRS_info(tmp_len)); 96 | end 97 | 98 | ecg_start = ecg_start + stride; 99 | ecg_end = ecg_end + stride; 100 | step = step + 1; 101 | 102 | 103 | end 104 | 105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 106 | 107 | 108 | str=fgetl(fin); 109 | 110 | % break; 111 | end 112 | 113 | fclose(fin); 114 | fclose(fout1); 115 | fclose(fout2); 116 | fclose(fout3); 117 | 118 | -------------------------------------------------------------------------------- /references/encase/code/read_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 19 11:50:23 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | import ReadData 10 | import numpy as np 11 | from sklearn.model_selection import StratifiedKFold 12 | from sklearn.preprocessing import normalize 13 | import dill 14 | import os.path 15 | 16 | 17 | #if __name__ == "__main__": 18 | def read_seq(): 19 | long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 20 | 21 | seq_pid = [] 22 | seq_data = [] 23 | seq_label = [] 24 | 25 | seq_len = 1000 26 | 27 | for i in range(len(long_pid)): 28 | ts = long_data[i] 29 | for j in range(len(ts) // seq_len): 30 | seq_data.append(ts[j*seq_len : (j+1)*seq_len]) 31 | seq_pid.append(long_pid[i]) 32 | seq_label.append(long_label[i]) 33 | 34 | long_label = seq_label 35 | seq_data = np.array(seq_data, dtype=np.float32) 36 | seq_data = normalize(seq_data, axis=0) 37 | 38 | seq_label = ReadData.Label2OneHot(seq_label) 39 | seq_label = np.array(seq_label, dtype=np.float32) 40 | 41 | all_feature = seq_data 42 | all_label = seq_label 43 | 44 | kf = StratifiedKFold(n_splits=5, shuffle=True) 45 | for train_index, test_index in kf.split(all_feature, long_label): 46 | train_data = all_feature[train_index] 47 | train_label = all_label[train_index] 48 | test_data = all_feature[test_index] 49 | test_label = all_label[test_index] 50 | break 51 | 52 | train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) 53 | test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) 54 | 55 | return train_data, train_label, test_data, test_label -------------------------------------------------------------------------------- /references/encase/code/resNet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | import tflearn 6 | import tflearn.data_utils as du 7 | 8 | import numpy as np 9 | import ReadData 10 | import tensorflow as tf 11 | from tensorflow.contrib import learn 12 | from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib 13 | from sklearn.model_selection import StratifiedKFold 14 | import MyEval 15 | 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | 18 | 19 | def truncate_long(ts, my_len): 20 | if len(ts) >= my_len: 21 | return ts[:my_len] 22 | else: 23 | ts += [0] * (my_len - len(ts)) 24 | return ts 25 | 26 | def sample_long(ts, interv): 27 | ts1 = [] 28 | for i in range(len(ts) // interv): 29 | ts1.append(ts[i * interv]) 30 | return ts1 31 | 32 | def read_data(): 33 | long_pid, long_data, long_label = ReadData.ReadData( '../data1/long.csv' ) 34 | 35 | 36 | mat1 = [truncate_long(ts, 9000) for ts in long_data] 37 | # mat2 = [truncate_long(ts, 6000) for ts in long_data] 38 | # mat3 = [truncate_long(ts, 3000) for ts in long_data] 39 | 40 | # mat4 = [sample_long(ts, 10) for ts in mat1] 41 | # mat5 = [sample_long(ts, 10) for ts in mat2] 42 | # mat6 = [sample_long(ts, 10) for ts in mat3] 43 | 44 | 45 | label_onehot = ReadData.Label2OneHot(long_label) 46 | 47 | # plt.plot(mat1[0]) 48 | # plt.plot(mat4[0]) 49 | 50 | all_feature = np.array(mat1, dtype=np.float32) 51 | all_label = np.array(label_onehot, dtype=np.float32) 52 | 53 | kf = StratifiedKFold(n_splits=5, shuffle=True) 54 | 55 | for train_index, test_index in kf.split(all_feature, long_label): 56 | train_data = all_feature[train_index] 57 | train_label = all_label[train_index] 58 | test_data = all_feature[test_index] 59 | test_label = all_label[test_index] 60 | break 61 | 62 | train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) 63 | test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) 64 | 65 | return train_data, train_label, test_data, test_label 66 | 67 | 68 | tf.reset_default_graph() 69 | X, Y, testX, testY = read_data() 70 | X = X.reshape([-1, 9000, 1]) 71 | testX = testX.reshape([-1, 9000, 1]) 72 | 73 | # Building Residual Network 74 | net = tflearn.input_data(shape=[None, 9000, 1]) 75 | net = tflearn.conv_1d(net, 64, 16, 2, activation='relu', bias=False) 76 | 77 | # Residual blocks 78 | net = tflearn.residual_bottleneck(net, 2, 16, 64, downsample_strides = 2, downsample=True, is_first_block = True) 79 | print("resn2", net.get_shape()) 80 | '''net = tflearn.residual_bottleneck(net, 2, 16, 128, downsample_strides = 2, downsample=True) 81 | print("resn4", net.get_shape()) 82 | net = tflearn.residual_bottleneck(net, 2, 16, 256, downsample_strides = 2, downsample=True) 83 | print("resn6", net.get_shape()) 84 | net = tflearn.residual_bottleneck(net, 2, 16, 512, downsample_strides = 2, downsample=True) 85 | print("resn8", net.get_shape())''' 86 | net = tflearn.residual_bottleneck(net, 2, 16, 64, downsample_strides = 2, downsample=True) 87 | print("resn4", net.get_shape()) 88 | net = tflearn.residual_bottleneck(net, 2, 16, 128, downsample_strides = 2, downsample=True) 89 | print("resn6", net.get_shape()) 90 | net = tflearn.residual_bottleneck(net, 2, 16, 128, downsample_strides = 2, downsample=True) 91 | print("resn8", net.get_shape()) 92 | net = tflearn.residual_bottleneck(net, 2, 16, 256, downsample_strides = 2, downsample=True) 93 | print("resn10", net.get_shape()) 94 | net = tflearn.residual_bottleneck(net, 2, 16, 256, downsample_strides = 2, downsample=True) 95 | print("resn12", net.get_shape()) 96 | net = tflearn.residual_bottleneck(net, 2, 16, 512, downsample_strides = 2, downsample=True) 97 | print("resn14", net.get_shape()) 98 | net = tflearn.residual_bottleneck(net, 2, 16, 512, downsample_strides = 2, downsample=True) 99 | print("resn16", net.get_shape()) 100 | net = tflearn.batch_normalization(net) 101 | net = tflearn.activation(net, 'relu') 102 | net = tflearn.global_avg_pool(net) 103 | print("beforeDense", net.get_shape()) 104 | # Regression 105 | net = tflearn.fully_connected(net, 2, activation='softmax') 106 | print("dense", net.get_shape()) 107 | net = tflearn.regression(net, optimizer='momentum', 108 | loss='categorical_crossentropy', 109 | learning_rate=0.1) 110 | # Training 111 | model = tflearn.DNN(net, checkpoint_path='model_resnet', 112 | max_checkpoints=10, tensorboard_verbose=0) 113 | model.fit(X, Y, n_epoch=10, validation_set=(testX, testY), 114 | show_metric=True, batch_size=300, run_id='resnet', snapshot_step=10, 115 | snapshot_epoch=False) 116 | 117 | #Predict 118 | y_predicted=[i for i in model.predict(testX)] 119 | #Calculate F1Score 120 | MyEval.F1Score3_num(y_predicted, testY) 121 | -------------------------------------------------------------------------------- /references/encase/code/stat.py: -------------------------------------------------------------------------------- 1 | 2 | import ReadData 3 | from matplotlib import pyplot as plt 4 | from collections import Counter 5 | import numpy as np 6 | 7 | def Flatten(l): 8 | return [item for sublist in l for item in sublist] 9 | 10 | 11 | #len_stat = [len(i) for i in short_data] 12 | #ttt = sum([i > 500 for i in len_stat]) + sum([i < 80 for i in len_stat]) 13 | #print(ttt/len(len_stat)) 14 | #plt.hist(len_stat, bins=200, range=[80,500]) 15 | 16 | #plt.plot(my_short_train_data[131]) 17 | 18 | ########## plot long short 19 | #my_short_all_data = my_short_train_data + my_short_val_data 20 | # 21 | ##ll = sorted(len_stat, reverse=True) 22 | #for i in range(100): 23 | # fig = plt.figure() 24 | # plt.plot(my_short_all_data[len_stat.index(ll[i])]) 25 | # plt.savefig('img/'+str(ll[i])+'.png', bbox_inches='tight') 26 | # plt.close(fig) 27 | 28 | 29 | ############ plot qrs 30 | #QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data1/QRSinfo.csv' ) 31 | #tmp = Flatten(QRS_data) 32 | # plt.hist(tmp, bins=100, range=[600, 2000]) 33 | 34 | #Counter 35 | 36 | 37 | ############ plot long 38 | #long_pid, long_data, long_label = ReadData.ReadData( '../../data1/long.csv' ) 39 | len_stat = [len(i) for i in long_data] 40 | print(len(len_stat)) 41 | print(sum(np.array(len_stat) >= 9000)) 42 | print(sum(np.array(len_stat) >= 6000)) 43 | print(sum(np.array(len_stat) >= 3000)) 44 | 45 | 46 | -------------------------------------------------------------------------------- /references/encase/code/test_gpu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | with tf.device('/gpu:0'): 3 | a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') 4 | b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') 5 | c = tf.matmul(a, b) 6 | 7 | with tf.Session() as sess: 8 | print (sess.run(c)) -------------------------------------------------------------------------------- /references/encase/code/test_importance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 23 11:08:54 2017 5 | 6 | @author: shenda 7 | """ 8 | 9 | from collections import Counter 10 | import numpy as np 11 | import pandas as pd 12 | import MyEval 13 | import ReadData 14 | import dill 15 | from sklearn.model_selection import KFold 16 | from sklearn.model_selection import StratifiedKFold 17 | from BasicCLF import MyAdaBoost 18 | from BasicCLF import MyRF 19 | from BasicCLF import MyExtraTrees 20 | from BasicCLF import MyXGB 21 | from BasicCLF import MyGBDT 22 | from BasicCLF import MyLR 23 | from OptF import OptF 24 | import sklearn 25 | import xgboost 26 | 27 | #def TestBasic(): 28 | if __name__ == "__main__": 29 | 30 | with open('../data/features_all_v2.5.pkl', 'rb') as my_input: 31 | all_pid = dill.load(my_input) 32 | all_feature = np.array(dill.load(my_input)) 33 | all_label = np.array(dill.load(my_input)) 34 | print('features_all shape: ', all_feature.shape) 35 | 36 | with open('../data/feat_deep_centerwave_v0.1.pkl', 'rb') as my_input: 37 | feat_deep_centerwave = np.array(dill.load(my_input)) 38 | print('feat_deep_centerwave shape: ', feat_deep_centerwave.shape) 39 | 40 | with open('../data/feat_resnet.pkl', 'rb') as my_input: 41 | feat_resnet = np.array(dill.load(my_input)) 42 | print('feat_resnet shape: ', feat_resnet.shape) 43 | 44 | 45 | # k-fold cross validation 46 | all_feature = np.c_[all_feature, feat_deep_centerwave, feat_resnet] 47 | all_label = np.array(all_label) 48 | 49 | train_data = all_feature 50 | train_label = all_label 51 | 52 | clf = MyXGB() 53 | clf.fit(train_data, train_label) 54 | print('train done') 55 | 56 | imp_scores = clf.get_importance() 57 | feat_num = all_feature.shape[1] 58 | imp_scores_key_num = set([int(k[1:]) for k in imp_scores.keys()]) 59 | print(feat_num) 60 | print(len(imp_scores)) 61 | 62 | pred_train = clf.predict(train_data) 63 | MyEval.F1Score3(pred_train, train_label) 64 | 65 | with open('../../stat/feat_imp_v2.5_v0.1_v0.1.csv', 'w') as fout: 66 | for i in range(1,feat_num+1): 67 | if i in imp_scores_key_num: 68 | fout.write('{0},{1}\n'.format(i, imp_scores['f'+str(i)])) 69 | else: 70 | fout.write('{0},{1}\n'.format(i, 0)) 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # Config 4 | from . import config 5 | from .config import is_training, get_training_mode, init_graph 6 | 7 | # Import models 8 | from . import models 9 | from .models.dnn import DNN 10 | from .models.generator import SequenceGenerator 11 | 12 | # Helpers 13 | from . import helpers 14 | from .helpers.evaluator import Evaluator 15 | from .helpers.trainer import Trainer, TrainOp 16 | from .helpers.regularizer import add_weights_regularizer 17 | from .helpers.summarizer import summarize, summarize_activations, \ 18 | summarize_gradients, summarize_variables, summarize_all 19 | 20 | # Predefined ops 21 | from .layers import normalization 22 | from . import metrics 23 | from . import activations 24 | from . import losses 25 | from . import initializations 26 | from . import optimizers 27 | from . import summaries 28 | from . import optimizers 29 | from . import variables 30 | from . import collections # Add TFLearn collections to Tensorflow GraphKeys 31 | 32 | # Direct ops inclusion 33 | from .optimizers import SGD, AdaGrad, Adam, RMSProp, Momentum, Ftrl, AdaDelta, \ 34 | ProximalAdaGrad 35 | from .activations import linear, tanh, sigmoid, softmax, softplus, softsign,\ 36 | relu, relu6, leaky_relu, prelu, elu 37 | from .variables import variable, get_all_trainable_variable, \ 38 | get_all_variables, get_layer_variables_by_name 39 | from .objectives import categorical_crossentropy, binary_crossentropy, \ 40 | softmax_categorical_crossentropy, hinge_loss, mean_square 41 | from .metrics import Top_k, Accuracy, R2, top_k_op, accuracy_op, r2_op, Prediction_Counts 42 | 43 | # Direct layers inclusion 44 | from . import layers 45 | from .layers.conv import conv_2d, max_pool_2d, avg_pool_2d, conv_1d, \ 46 | highway_conv_2d, highway_conv_1d, max_pool_1d, avg_pool_1d, \ 47 | global_avg_pool, residual_block, residual_bottleneck, \ 48 | conv_2d_transpose, upsample_2d, conv_3d, max_pool_3d, avg_pool_3d, \ 49 | resnext_block 50 | from .layers.core import input_data, dropout, custom_layer, reshape, \ 51 | flatten, activation, fully_connected, single_unit, highway, \ 52 | one_hot_encoding, time_distributed 53 | from .layers.normalization import batch_normalization, local_response_normalization 54 | from .layers.estimator import regression 55 | #from .layers.recurrent import lstm, gru, simple_rnn, bidirectional_rnn, \ 56 | # BasicRNNCell, BasicLSTMCell, GRUCell 57 | from .layers.embedding_ops import embedding 58 | from .layers.merge_ops import merge, merge_outputs 59 | 60 | # Datasets 61 | from . import datasets 62 | 63 | # Utils 64 | from . import data_utils 65 | from . import utils 66 | from .utils import get_layer_by_name 67 | 68 | # Data Utils 69 | from .data_augmentation import DataAugmentation, ImageAugmentation, SequenceAugmentation 70 | from .data_preprocessing import DataPreprocessing, ImagePreprocessing, SequencePreprocessing 71 | 72 | # Init training mode 73 | config.init_training_mode() 74 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/collections.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | import tensorflow as tf 5 | 6 | """ 7 | For handling networks and keep tracks of important parameters, TFLearn is 8 | using Tensorflow collections. 9 | """ 10 | #TODO: Chek if a layer without import tflearn doesn't have problem with those 11 | # Collection for network inputs. Used by `Trainer` class for retrieving all 12 | # data input placeholders. 13 | tf.GraphKeys.INPUTS = 'inputs' 14 | 15 | # Collection for network targets. Used by `Trainer` class for retrieving all 16 | # targets (labels) placeholders. 17 | tf.GraphKeys.TARGETS = 'targets' 18 | 19 | # Collection for network train ops. Used by `Trainer` class for retrieving all 20 | # optimization processes. 21 | tf.GraphKeys.TRAIN_OPS = 'trainops' 22 | 23 | # Collection to retrieve layers variables. Variables are stored according to 24 | # the following pattern: /tf.GraphKeys.LAYER_VARIABLES/layer_name (so there 25 | # will have as many collections as layers with variables). 26 | tf.GraphKeys.LAYER_VARIABLES = 'layer_variables' 27 | 28 | # Collection to store all returned tensors for every layer 29 | tf.GraphKeys.LAYER_TENSOR = 'layer_tensor' 30 | 31 | # Collection to store all variables that will be restored 32 | tf.GraphKeys.EXCL_RESTORE_VARS = 'restore_variables' 33 | 34 | # Collection to store the default graph configuration 35 | tf.GraphKeys.GRAPH_CONFIG = 'graph_config' 36 | 37 | # Collection to store all input variable data preprocessing 38 | tf.GraphKeys.DATA_PREP = 'data_preprocessing' 39 | 40 | # Collection to store all input variable data preprocessing 41 | tf.GraphKeys.DATA_AUG = 'data_augmentation' 42 | 43 | # Collection to store all custom learning rate variable 44 | tf.GraphKeys.LR_VARIABLES = 'lr_variables' 45 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cifar10 2 | from . import imdb 3 | from . import mnist 4 | from . import oxflower17 5 | from . import titanic -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | """ CIFAR-10 Dataset 2 | 3 | Credits: A. Krizhevsky. https://www.cs.toronto.edu/~kriz/cifar.html. 4 | 5 | """ 6 | from __future__ import absolute_import, print_function 7 | 8 | import os 9 | import sys 10 | from six.moves import urllib 11 | import tarfile 12 | 13 | import numpy as np 14 | import pickle 15 | 16 | from ..data_utils import to_categorical 17 | 18 | 19 | def load_data(dirname="cifar-10-batches-py", one_hot=False): 20 | tarpath = maybe_download("cifar-10-python.tar.gz", 21 | "http://www.cs.toronto.edu/~kriz/", 22 | dirname) 23 | X_train = [] 24 | Y_train = [] 25 | 26 | if dirname != 'cifar-10-batches-py': 27 | dirname = os.path.join(dirname, 'cifar-10-batches-py') 28 | 29 | for i in range(1, 6): 30 | fpath = os.path.join(dirname, 'data_batch_' + str(i)) 31 | data, labels = load_batch(fpath) 32 | if i == 1: 33 | X_train = data 34 | Y_train = labels 35 | else: 36 | X_train = np.concatenate([X_train, data], axis=0) 37 | Y_train = np.concatenate([Y_train, labels], axis=0) 38 | 39 | fpath = os.path.join(dirname, 'test_batch') 40 | X_test, Y_test = load_batch(fpath) 41 | 42 | X_train = np.dstack((X_train[:, :1024], X_train[:, 1024:2048], 43 | X_train[:, 2048:])) / 255. 44 | X_train = np.reshape(X_train, [-1, 32, 32, 3]) 45 | X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048], 46 | X_test[:, 2048:])) / 255. 47 | X_test = np.reshape(X_test, [-1, 32, 32, 3]) 48 | 49 | if one_hot: 50 | Y_train = to_categorical(Y_train, 10) 51 | Y_test = to_categorical(Y_test, 10) 52 | 53 | return (X_train, Y_train), (X_test, Y_test) 54 | 55 | 56 | def load_batch(fpath): 57 | with open(fpath, 'rb') as f: 58 | if sys.version_info > (3, 0): 59 | # Python3 60 | d = pickle.load(f, encoding='latin1') 61 | else: 62 | # Python2 63 | d = pickle.load(f) 64 | data = d["data"] 65 | labels = d["labels"] 66 | return data, labels 67 | 68 | 69 | def maybe_download(filename, source_url, work_directory): 70 | if not os.path.exists(work_directory): 71 | os.mkdir(work_directory) 72 | filepath = os.path.join(work_directory, filename) 73 | if not os.path.exists(filepath): 74 | print("Downloading CIFAR 10, Please wait...") 75 | filepath, _ = urllib.request.urlretrieve(source_url + filename, 76 | filepath, reporthook) 77 | statinfo = os.stat(filepath) 78 | print(('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')) 79 | untar(filepath) 80 | return filepath 81 | 82 | #reporthook from stackoverflow #13881092 83 | def reporthook(blocknum, blocksize, totalsize): 84 | readsofar = blocknum * blocksize 85 | if totalsize > 0: 86 | percent = readsofar * 1e2 / totalsize 87 | s = "\r%5.1f%% %*d / %d" % ( 88 | percent, len(str(totalsize)), readsofar, totalsize) 89 | sys.stderr.write(s) 90 | if readsofar >= totalsize: # near the end 91 | sys.stderr.write("\n") 92 | else: # total size is unknown 93 | sys.stderr.write("read %d\n" % (readsofar,)) 94 | 95 | def untar(fname): 96 | if (fname.endswith("tar.gz")): 97 | tar = tarfile.open(fname) 98 | tar.extractall(path = '/'.join(fname.split('/')[:-1])) 99 | tar.close() 100 | print("File Extracted in Current Directory") 101 | else: 102 | print("Not a tar.gz file: '%s '" % sys.argv[0]) 103 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | """ CIFAR-100 Dataset 2 | 3 | Credits: A. Krizhevsky. https://www.cs.toronto.edu/~kriz/cifar.html. 4 | 5 | """ 6 | from __future__ import absolute_import, print_function 7 | 8 | import os 9 | import sys 10 | from six.moves import urllib 11 | import tarfile 12 | 13 | import numpy as np 14 | import pickle 15 | 16 | from ..data_utils import to_categorical 17 | 18 | 19 | def load_data(dirname="cifar-100-python", one_hot=False): 20 | tarpath = maybe_download("cifar-100-python.tar.gz", 21 | "http://www.cs.toronto.edu/~kriz/", 22 | dirname) 23 | X_train = [] 24 | Y_train = [] 25 | 26 | for i in ["train"]: 27 | fpath = os.path.join(dirname, i) 28 | data, labels = load_batch(fpath) 29 | if i == "train": 30 | X_train = data 31 | Y_train = labels 32 | else: 33 | X_train = np.concatenate([X_train, data], axis=0) 34 | Y_train = np.concatenate([Y_train, labels], axis=0) 35 | 36 | fpath = os.path.join(dirname, 'test') 37 | X_test, Y_test = load_batch(fpath) 38 | 39 | X_train = np.dstack((X_train[:, :1024], X_train[:, 1024:2048], 40 | X_train[:, 2048:])) / 255. 41 | X_train = np.reshape(X_train, [-1, 32, 32, 3]) 42 | X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048], 43 | X_test[:, 2048:])) / 255. 44 | X_test = np.reshape(X_test, [-1, 32, 32, 3]) 45 | 46 | if one_hot: 47 | Y_train = to_categorical(Y_train, 100) 48 | Y_test = to_categorical(Y_test, 100) 49 | 50 | return (X_train, Y_train), (X_test, Y_test) 51 | 52 | 53 | def load_batch(fpath): 54 | with open(fpath, 'rb') as f: 55 | if sys.version_info > (3, 0): 56 | # Python3 57 | d = pickle.load(f, encoding='latin1') 58 | else: 59 | # Python2 60 | d = pickle.load(f) 61 | data = d["data"] 62 | labels = d["fine_labels"] 63 | return data, labels 64 | 65 | 66 | def maybe_download(filename, source_url, work_directory): 67 | if not os.path.exists(work_directory): 68 | os.mkdir(work_directory) 69 | filepath = os.path.join(work_directory, filename) 70 | if not os.path.exists(filepath): 71 | print("Downloading CIFAR 100, Please wait...") 72 | filepath, _ = urllib.request.urlretrieve(source_url + filename, 73 | filepath) 74 | statinfo = os.stat(filepath) 75 | print(('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')) 76 | untar(filepath) 77 | return filepath 78 | 79 | 80 | def untar(fname): 81 | if (fname.endswith("tar.gz")): 82 | tar = tarfile.open(fname) 83 | tar.extractall() 84 | tar.close() 85 | print("File Extracted in Current Directory") 86 | else: 87 | print("Not a tar.gz file: '%s '" % sys.argv[0]) 88 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/oxflower17.py: -------------------------------------------------------------------------------- 1 | """ 17 Category Flower Dataset 2 | 3 | Credits: Maria-Elena Nilsback and Andrew Zisserman. 4 | http://www.robots.ox.ac.uk/~vgg/data/flowers/17/ 5 | 6 | """ 7 | from __future__ import absolute_import, print_function 8 | 9 | import os 10 | import sys 11 | from six.moves import urllib 12 | import tarfile 13 | 14 | import numpy as np 15 | import pickle 16 | 17 | '''from ..data_utils import *''' 18 | 19 | 20 | def load_data(dirname="/Users/Wadejoy/Documents/PKU/Execise/DeepLearning/DLCourse/data", resize_pics=(224, 224), shuffle=True, 21 | one_hot=False): 22 | dataset_file = os.path.join(dirname, '/17flowers') 23 | if not os.path.exists(dataset_file): 24 | tarpath = maybe_download("/Users/Wadejoy/Documents/PKU/Execise/DeepLearning/DLCourse/data/17flowers.tgz", 25 | "http://www.robots.ox.ac.uk/~vgg/data/flowers/17/", 26 | dirname) 27 | 28 | X, Y = build_image_dataset_from_dir(dirname, 29 | dataset_file=dataset_file, 30 | resize=resize_pics, 31 | filetypes=['.jpg', '.jpeg'], 32 | convert_gray=False, 33 | shuffle_data=shuffle, 34 | categorical_Y=one_hot) 35 | 36 | return X, Y 37 | 38 | import tflearn.data_utils 39 | 40 | def load_local_data(dirname="/Users/Wadejoy/Documents/PKU/Execise/DeepLearning/DLCourse/data/17flowers", resize_pics=(224, 224), shuffle=True, 41 | one_hot=False): 42 | dataset_file = os.path.join(dirname, '17flowers.pkl') 43 | 44 | X, Y = tflearn.data_utils.build_image_dataset_from_dir(dirname, 45 | dataset_file=dataset_file, 46 | resize=resize_pics, 47 | filetypes=['.jpg', '.jpeg'], 48 | convert_gray=False, 49 | shuffle_data=False, 50 | categorical_Y=one_hot) 51 | #, X_test, Y_test 52 | 53 | return X, Y 54 | #, X_test, Y_test 55 | 56 | def maybe_download(filename, source_url, work_directory): 57 | if not os.path.exists(work_directory): 58 | os.mkdir(work_directory) 59 | filepath = os.path.join(work_directory, filename) 60 | if not os.path.exists(filepath): 61 | print("Downloading Oxford 17 category Flower Dataset, Please " 62 | "wait...") 63 | filepath, _ = urllib.request.urlretrieve(source_url + filename, 64 | filepath, reporthook) 65 | statinfo = os.stat(filepath) 66 | print(('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')) 67 | 68 | untar(filepath, work_directory) 69 | build_class_directories(os.path.join(work_directory, 'jpg')) 70 | return filepath 71 | 72 | #reporthook from stackoverflow #13881092 73 | def reporthook(blocknum, blocksize, totalsize): 74 | readsofar = blocknum * blocksize 75 | if totalsize > 0: 76 | percent = readsofar * 1e2 / totalsize 77 | s = "\r%5.1f%% %*d / %d" % ( 78 | percent, len(str(totalsize)), readsofar, totalsize) 79 | sys.stderr.write(s) 80 | if readsofar >= totalsize: # near the end 81 | sys.stderr.write("\n") 82 | else: # total size is unknown 83 | sys.stderr.write("read %d\n" % (readsofar,)) 84 | 85 | def build_class_directories(dir): 86 | dir_id = 0 87 | class_dir = os.path.join(dir, str(dir_id)) 88 | if not os.path.exists(class_dir): 89 | os.mkdir(class_dir) 90 | for i in range(1, 1361): 91 | fname = "image_" + ("%.4i" % i) + ".jpg" 92 | os.rename(os.path.join(dir, fname), os.path.join(class_dir, fname)) 93 | if i % 80 == 0 and dir_id < 16: 94 | dir_id += 1 95 | class_dir = os.path.join(dir, str(dir_id)) 96 | os.mkdir(class_dir) 97 | 98 | 99 | def untar(fname, extract_dir): 100 | if fname.endswith("tar.gz") or fname.endswith("tgz"): 101 | tar = tarfile.open(fname) 102 | tar.extractall(extract_dir) 103 | tar.close() 104 | print("File Extracted") 105 | else: 106 | print("Not a tar.gz/tgz file: '%s '" % sys.argv[0]) 107 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/svhn.py: -------------------------------------------------------------------------------- 1 | ################################################ 2 | # The Street View House Numbers (SVHN) Dataset # 3 | # http://ufldl.stanford.edu/housenumbers # 4 | # Format 2: Cropped Digits # 5 | # Train set: 73257 32x32 RGB Digits # 6 | # Test set: 26032 32x32 RGB Digits # 7 | # Extra set: 531131 32x32 RGB Digits # 8 | ################################################ 9 | from __future__ import print_function 10 | import numpy as np 11 | import scipy.io 12 | from six.moves import urllib 13 | import os 14 | 15 | URL_BASE = 'http://ufldl.stanford.edu/housenumbers/' 16 | TRAIN_FILE = 'train_32x32.mat' 17 | TEST_FILE = 'test_32x32.mat' 18 | EXTRA_FILE = 'extra_32x32.mat' 19 | TRAIN_INSTANCES = 73257 20 | TEST_INSTANCES = 26032 21 | EXTRA_INSTANCES = 531131 22 | 23 | def load_data(data_dir="svhn/", one_hot=True): 24 | train_filepath = maybe_download(TRAIN_FILE,data_dir) 25 | test_filepath = maybe_download(TEST_FILE,data_dir) 26 | trainX, trainY = read_data_from_file(train_filepath,TRAIN_INSTANCES) 27 | testX, testY = read_data_from_file(test_filepath,TEST_INSTANCES) 28 | return trainX, trainY, testX, testY 29 | 30 | def load_extra_data(data_dir="svhn/", one_hot=True): 31 | extra_filepath = maybe_download(EXTRA_FILE,data_dir) 32 | extraX, extraY = read_data_from_file(extra_filepath,EXTRA_INSTANCES) 33 | return extraX, extraY 34 | 35 | def read_data_from_file(filepath,instances): 36 | print('Reading SVHN Dataset...') 37 | mat = scipy.io.loadmat(filepath) 38 | Y = mat['y'] ##Y.shape = (instances,1) 39 | X = mat['X'] #X.shape = (32, 32, 3, instances) -> 32x32 RGB 40 | nX = np.zeros(instances*3*32*32).reshape(instances,32,32,3) 41 | for n in range (instances): 42 | for rgb in range(3): 43 | for i in range(32): 44 | for j in range(32): 45 | nX[n,i,j,rgb]=X[i,j,rgb,n] #output shape: (Nx32x32x3) 46 | nY = np.zeros(instances*10).reshape(instances,10) 47 | for n in range(instances): 48 | nY[n] = label_to_one_hot_y(Y[n,0],10) 49 | print(' ...dataset read!') 50 | return nX, nY 51 | 52 | def label_to_one_hot_y(y,classes): 53 | #original .mat files has the 'y' classes labeled from 1 up to 10 54 | Y = np.zeros(classes) 55 | Y[y-1] = 1 #classes labeled from 0 up to 9: one_hot vector y 56 | return Y 57 | 58 | def maybe_download(filename, work_directory): 59 | """Download the data from Stanford's website, unless it's already here.""" 60 | if not os.path.exists(work_directory): 61 | os.mkdir(work_directory) 62 | filepath = os.path.join(work_directory, filename) 63 | if not os.path.exists(filepath): 64 | print('Downloading SVHN Dataset...') 65 | filepath, _ = urllib.request.urlretrieve(URL_BASE + filename,filepath) 66 | statinfo = os.stat(filepath) 67 | print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') 68 | return filepath 69 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/datasets/titanic.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import gzip 3 | import os 4 | from six.moves import urllib 5 | 6 | 7 | def download_dataset(filename='titanic_dataset.csv', work_directory='./'): 8 | """Download the data, unless it's already here.""" 9 | url = 'http://tflearn.org/resources/titanic_dataset.csv' 10 | if not os.path.exists(work_directory): 11 | os.mkdir(work_directory) 12 | filepath = os.path.join(work_directory, filename) 13 | if not os.path.exists(filepath): 14 | print('Downloading Titanic dataset...') 15 | filepath, _ = urllib.request.urlretrieve(url, filepath) 16 | statinfo = os.stat(filepath) 17 | print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') 18 | return filepath 19 | 20 | 21 | def load_dataset(): 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .evaluator import Evaluator 3 | from .trainer import Trainer, TrainOp 4 | from .regularizer import add_weights_regularizer 5 | from .summarizer import summarize, summarize_activations, \ 6 | summarize_gradients, summarize_variables, summarize_all 7 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/helpers/evaluator.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import tensorflow as tf 4 | 5 | import tflearn 6 | from ..utils import to_list 7 | from .. import data_flow 8 | from .. import metrics 9 | from .trainer import evaluate_flow 10 | 11 | 12 | class Evaluator(object): 13 | 14 | """ Evaluator. 15 | 16 | A class used for performing predictions and evaluate a model performance. 17 | 18 | Arguments: 19 | tensors: list of `Tensor`. A list of tensors to perform predictions. 20 | model: `str`. The model weights path (Optional). 21 | session: `Session`. The session to run the prediction (Optional). 22 | 23 | """ 24 | 25 | def __init__(self, tensors, model=None, session=None): 26 | self.tensors = to_list(tensors) 27 | self.graph = self.tensors[0].graph 28 | self.model = model 29 | self.dprep_collection = tf.get_collection(tf.GraphKeys.DATA_PREP) 30 | self.inputs = tf.get_collection(tf.GraphKeys.INPUTS) 31 | 32 | with self.graph.as_default(): 33 | self.session = tf.Session() 34 | if session: self.session = session 35 | self.saver = tf.train.Saver() 36 | if model: self.saver.restore(self.session, model) 37 | 38 | def predict(self, feed_dict): 39 | """ predict. 40 | 41 | Run data through the provided network and return the result value. 42 | 43 | Arguments: 44 | feed_dict: `dict`. Feed data dictionary, with placeholders as 45 | keys, and data as values. 46 | 47 | Returns: 48 | An `array`. In case of multiple tensors to predict, each tensor's 49 | prediction result is concatenated. 50 | 51 | """ 52 | with self.graph.as_default(): 53 | # Data Preprocessing 54 | dprep_dict = dict() 55 | for i in range(len(self.inputs)): 56 | # Support for custom inputs not using dprep/daug 57 | if len(self.dprep_collection) > i: 58 | if self.dprep_collection[i] is not None: 59 | dprep_dict[self.inputs[i]] = self.dprep_collection[i] 60 | # Apply pre-processing 61 | if len(dprep_dict) > 0: 62 | for k in dprep_dict: 63 | feed_dict[k] = dprep_dict[k].apply(feed_dict[k]) 64 | 65 | # Prediction for each tensor 66 | tflearn.is_training(False, self.session) 67 | prediction = [] 68 | for output in self.tensors: 69 | o_pred = self.session.run(output, feed_dict=feed_dict).tolist() 70 | for i, val in enumerate(o_pred): # Reshape pred per sample 71 | if len(self.tensors) > 1: 72 | if not len(prediction) > i: prediction.append([]) 73 | prediction[i].append(val) 74 | else: 75 | prediction.append(val) 76 | return prediction 77 | 78 | def evaluate(self, feed_dict, ops, batch_size=128): 79 | """ Evaluate. 80 | 81 | Evaluate a list of tensors over a whole dataset. Generally, 82 | 'ops' argument are average performance metrics (such as average mean, 83 | top-3, etc...) 84 | 85 | Arguments: 86 | feed_dict: `dict`. The feed dictionary of data. 87 | ops: list of `Tensors`. The tensors to evaluate. 88 | batch_size: `int`. A batch size. 89 | 90 | Returns: 91 | The mean average result per tensor over all batches. 92 | 93 | """ 94 | tflearn.is_training(False, self.session) 95 | coord = tf.train.Coordinator() 96 | inputs = tf.get_collection(tf.GraphKeys.INPUTS) 97 | # Data Preprocessing 98 | dprep_dict = {} 99 | dprep_collection = tf.get_collection(tf.GraphKeys.DATA_PREP) 100 | for i in range(len(inputs)): 101 | # Support for custom inputs not using dprep/daug 102 | if len(dprep_collection) > i: 103 | if dprep_collection[i] is not None: 104 | dprep_dict[inputs[i]] = dprep_collection[i] 105 | # Data Flow 106 | df = data_flow.FeedDictFlow(feed_dict, coord, 107 | batch_size=batch_size, 108 | dprep_dict=dprep_dict, 109 | daug_dict=None, 110 | index_array=None, 111 | num_threads=1) 112 | 113 | return evaluate_flow(self.session, ops, df) 114 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/helpers/generator.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import tensorflow as tf 4 | 5 | from ..utils import to_list 6 | 7 | 8 | class SequenceGenerator(object): 9 | 10 | def __init__(self, net_outputs, model=None, session=None): 11 | self.net_outputs = to_list(net_outputs) 12 | self.graph = net_outputs[0].graph 13 | self.model = model 14 | 15 | with self.graph.as_default(): 16 | self.session = tf.Session() 17 | if session: self.session = session 18 | self.saver = tf.train.Saver() 19 | if model: self.saver.restore(self.session, model) 20 | 21 | def predict(self, feed_dict): 22 | with self.graph.as_default(): 23 | prediction = [] 24 | for output in self.net_outputs: 25 | o_pred = self.session.run(output, feed_dict=feed_dict).tolist() 26 | for i, val in enumerate(o_pred): # Reshape pred per sample 27 | if len(self.net_outputs) > 1: 28 | if not len(prediction) > i: prediction.append([]) 29 | prediction[i].append(val) 30 | else: 31 | prediction.append(val) 32 | return prediction 33 | 34 | def generate(self): 35 | raise NotImplementedError 36 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/helpers/regularizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import tensorflow as tf 4 | from .. import losses 5 | 6 | 7 | """ 8 | Regularizer contains some useful functions to help add regularization to 9 | weights and activations. 10 | """ 11 | 12 | 13 | def add_weights_regularizer(variable, loss="L2", weight_decay=0.001, 14 | add_to_collection=None): 15 | """ add_weights_regularizer. 16 | 17 | Add a weights regularizer to the provided Tensor 18 | 19 | Arguments: 20 | variable: `Variable`. Tensor to add regularization. 21 | loss: `str`. Regularization mode. 22 | weight_decay: `float`. Decay to use for regularization. 23 | add_to_collection: `str`. Add the regularization loss to the 24 | specified collection. Default: tf.GraphKeys.REGULARIZATION_LOSSES. 25 | 26 | Returns: 27 | `tf.Tensor`. The weight regularizer. 28 | 29 | """ 30 | if not add_to_collection: 31 | add_to_collection = tf.GraphKeys.REGULARIZATION_LOSSES 32 | if isinstance(loss, str): 33 | regul = losses.get(loss) 34 | weights_regularizer = regul(variable, weight_decay) 35 | elif loss and callable(loss): 36 | weights_regularizer = loss(variable) 37 | else: 38 | weights_regularizer = loss 39 | if add_to_collection: 40 | tf.add_to_collection(add_to_collection, weights_regularizer) 41 | return weights_regularizer 42 | 43 | 44 | def add_activation_regularizer(op, loss="L2", activ_decay=0.001, 45 | add_to_collection=None): 46 | raise NotImplementedError 47 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/helpers/summarizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import tensorflow as tf 4 | from .. import summaries 5 | 6 | # Fix for TF 0.12 7 | try: 8 | tf012 = True 9 | merge_summary = tf.summary.merge 10 | except Exception: 11 | tf012 = False 12 | merge_summary = tf.merge_summary 13 | 14 | """ 15 | Summarizer contains some useful functions to help summarize variables, 16 | activations etc... in Tensorboard. 17 | """ 18 | 19 | 20 | def summarize_all(train_vars, grads, activations, 21 | summary_collection="tflearn_summ"): 22 | summarize_variables(train_vars, summary_collection) 23 | summarize_gradients(grads, summary_collection) 24 | return summarize_activations(activations, summary_collection) 25 | 26 | 27 | def summarize_variables(train_vars=None, summary_collection="tflearn_summ"): 28 | """ summarize_variables. 29 | 30 | Arguemnts: 31 | train_vars: list of `Variable`. The variable weights to monitor. 32 | summary_collection: A collection to add this summary to and 33 | also used for returning a merged summary over all its elements. 34 | Default: 'tflearn_summ'. 35 | 36 | Returns: 37 | `Tensor`. Merge of all summary in 'summary_collection' 38 | 39 | """ 40 | if not train_vars: train_vars = tf.trainable_variables() 41 | summaries.add_trainable_vars_summary(train_vars, "", "", summary_collection) 42 | return merge_summary(tf.get_collection(summary_collection)) 43 | 44 | 45 | def summarize_activations(activations, summary_collection="tflearn_summ"): 46 | """ summarize_activations. 47 | 48 | Arguemnts: 49 | activations: list of `Tensor`. The activations to monitor. 50 | summary_collection: A collection to add this summary to and 51 | also used for returning a merged summary over all its elements. 52 | Default: 'tflearn_summ'. 53 | 54 | Returns: 55 | `Tensor`. Merge of all summary in 'summary_collection' 56 | 57 | """ 58 | summaries.add_activations_summary(activations, "", "", summary_collection) 59 | return merge_summary(tf.get_collection(summary_collection)) 60 | 61 | 62 | def summarize_gradients(grads, summary_collection="tflearn_summ"): 63 | """ summarize_gradients. 64 | 65 | Arguemnts: 66 | grads: list of `Tensor`. The gradients to monitor. 67 | summary_collection: A collection to add this summary to and 68 | also used for returning a merged summary over all its elements. 69 | Default: 'tflearn_summ'. 70 | 71 | Returns: 72 | `Tensor`. Merge of all summary in 'summary_collection' 73 | 74 | """ 75 | summaries.add_gradients_summary(grads, "", "", summary_collection) 76 | return merge_summary(tf.get_collection(summary_collection)) 77 | 78 | 79 | def summarize(value, type, name, summary_collection="tflearn_summ"): 80 | """ summarize. 81 | 82 | A custom summarization op. 83 | 84 | Arguemnts: 85 | value: `Tensor`. The tensor value to monitor. 86 | type: `str` among 'histogram', 'scalar'. The data monitoring type. 87 | name: `str`. A name for this summary. 88 | summary_collection: A collection to add this summary to and 89 | also used for returning a merged summary over all its elements. 90 | Default: 'tflearn_summ'. 91 | 92 | Returns: 93 | `Tensor`. Merge of all summary in 'summary_collection'. 94 | 95 | """ 96 | if tf012: 97 | name = name.replace(':', '_') 98 | summaries.get_summary(type, name, value, summary_collection) 99 | return merge_summary(tf.get_collection(summary_collection)) 100 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .conv import conv_2d, max_pool_2d, avg_pool_2d, conv_1d, \ 3 | max_pool_1d, avg_pool_1d, residual_block, residual_bottleneck, \ 4 | highway_conv_1d, highway_conv_2d, upsample_2d, conv_3d, max_pool_3d, \ 5 | avg_pool_3d, resnext_block 6 | from .core import input_data, dropout, custom_layer, reshape, flatten, \ 7 | activation, fully_connected, single_unit, one_hot_encoding, time_distributed 8 | from .normalization import batch_normalization, local_response_normalization 9 | from .estimator import regression 10 | from .recurrent import lstm, gru, simple_rnn, bidirectional_rnn, \ 11 | BasicRNNCell, BasicLSTMCell, GRUCell 12 | from .embedding_ops import embedding 13 | from .merge_ops import merge, merge_outputs 14 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/layers/embedding_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | #from .recurrent import retrieve_seq_length_op 8 | from .. import variables as vs 9 | from .. import utils 10 | from .. import initializations 11 | 12 | 13 | def embedding(incoming, input_dim, output_dim, validate_indices=False, 14 | weights_init='truncated_normal', trainable=True, restore=True, 15 | reuse=False, scope=None, name="Embedding"): 16 | """ Embedding. 17 | 18 | Embedding layer for a sequence of integer ids or floats. 19 | 20 | Input: 21 | 2-D Tensor [samples, ids]. 22 | 23 | Output: 24 | 3-D Tensor [samples, embedded_ids, features]. 25 | 26 | Arguments: 27 | incoming: Incoming 2-D Tensor. 28 | input_dim: list of `int`. Vocabulary size (number of ids). 29 | output_dim: list of `int`. Embedding size. 30 | validate_indices: `bool`. Whether or not to validate gather indices. 31 | weights_init: `str` (name) or `Tensor`. Weights initialization. 32 | (see tflearn.initializations) Default: 'truncated_normal'. 33 | trainable: `bool`. If True, weights will be trainable. 34 | restore: `bool`. If True, this layer weights will be restored when 35 | loading a model 36 | reuse: `bool`. If True and 'scope' is provided, this layer variables 37 | will be reused (shared). 38 | scope: `str`. Define this layer scope (optional). A scope can be 39 | used to share variables between layers. Note that scope will 40 | override name. 41 | name: A name for this layer (optional). Default: 'Embedding'. 42 | 43 | """ 44 | 45 | input_shape = utils.get_incoming_shape(incoming) 46 | assert len(input_shape) == 2, "Incoming Tensor shape must be 2-D" 47 | 48 | W_init = weights_init 49 | if isinstance(weights_init, str): 50 | W_init = initializations.get(weights_init)() 51 | 52 | with tf.variable_scope(scope, default_name=name, values=[incoming], 53 | reuse=reuse) as scope: 54 | name = scope.name 55 | with tf.device('/cpu:0'): 56 | W = vs.variable("W", shape=[input_dim, output_dim], 57 | initializer=W_init, trainable=trainable, 58 | restore=restore) 59 | tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + name, W) 60 | 61 | inference = tf.cast(incoming, tf.int32) 62 | inference = tf.nn.embedding_lookup(W, inference, 63 | validate_indices=validate_indices) 64 | 65 | inference.W = W 66 | inference.scope = scope 67 | # Embedding doesn't support masking, so we save sequence length prior 68 | # to the lookup. Expand dim to 3d. 69 | shape = [-1] + inference.get_shape().as_list()[1:3] + [1] 70 | inference.seq_length = retrieve_seq_length_op(tf.reshape(incoming, shape)) 71 | 72 | # Track output tensor. 73 | tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, inference) 74 | 75 | return inference 76 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/layers/merge_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | import tensorflow as tf 5 | 6 | 7 | def merge(tensors_list, mode, axis=1, name="Merge"): 8 | """ Merge. 9 | 10 | Merge a list of `Tensor` into a single one. A merging 'mode' must be 11 | specified, check below for the different options. 12 | 13 | Input: 14 | List of Tensors. 15 | 16 | Output: 17 | Merged Tensors. 18 | 19 | Arguments: 20 | tensors_list: A list of `Tensor`, A list of tensors to merge. 21 | mode: `str`. Merging mode, it supports: 22 | ``` 23 | 'concat': concatenate outputs along specified axis 24 | 'elemwise_sum': outputs element-wise sum 25 | 'elemwise_mul': outputs element-wise sum 26 | 'sum': outputs element-wise sum along specified axis 27 | 'mean': outputs element-wise average along specified axis 28 | 'prod': outputs element-wise multiplication along specified axis 29 | 'max': outputs max elements along specified axis 30 | 'min': outputs min elements along specified axis 31 | 'and': `logical and` btw outputs elements along specified axis 32 | 'or': `logical or` btw outputs elements along specified axis 33 | ``` 34 | axis: `int`. Represents the axis to use for merging mode. 35 | In most cases: 0 for concat and 1 for other modes. 36 | name: A name for this layer (optional). Default: 'Merge'. 37 | 38 | """ 39 | 40 | assert len(tensors_list) > 1, "Merge required 2 or more tensors." 41 | 42 | with tf.name_scope(name) as scope: 43 | tensors = [l for l in tensors_list] 44 | if mode == 'concat': 45 | inference = tf.concat(tensors, axis) 46 | elif mode == 'elemwise_sum': 47 | inference = tensors[0] 48 | for i in range(1, len(tensors)): 49 | inference = tf.add(inference, tensors[i]) 50 | elif mode == 'elemwise_mul': 51 | inference = tensors[0] 52 | for i in range(1, len(tensors)): 53 | inference = tf.multiply(inference, tensors[i]) 54 | elif mode == 'sum': 55 | inference = tf.reduce_sum(tf.concat(tensors, axis), 56 | reduction_indices=axis) 57 | elif mode == 'mean': 58 | inference = tf.reduce_mean(tf.concat(tensors, axis), 59 | reduction_indices=axis) 60 | elif mode == 'prod': 61 | inference = tf.reduce_prod(tf.concat(tensors, axis), 62 | reduction_indices=axis) 63 | elif mode == 'max': 64 | inference = tf.reduce_max(tf.concat(tensors, axis), 65 | reduction_indices=axis) 66 | elif mode == 'min': 67 | inference = tf.reduce_min(tf.concat(tensors, axis), 68 | reduction_indices=axis) 69 | elif mode == 'and': 70 | inference = tf.reduce_all(tf.concat(tensors, axis), 71 | reduction_indices=axis) 72 | elif mode == 'or': 73 | inference = tf.reduce_any(tf.concat(tensors, axis), 74 | reduction_indices=axis) 75 | else: 76 | raise Exception("Unknown merge mode", str(mode)) 77 | 78 | # Track output tensor. 79 | tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, inference) 80 | 81 | return inference 82 | 83 | 84 | def merge_outputs(tensor_list, name="MergeOutputs"): 85 | """ Merge Outputs. 86 | 87 | A layer that concatenate all outputs of a network into a single tensor. 88 | 89 | Input: 90 | List of Tensors [_shape_]. 91 | 92 | Output: 93 | Concatenated Tensors [nb_tensors, _shape_]. 94 | 95 | Arguments: 96 | tensor_list: list of `Tensor`. The network outputs. 97 | name: `str`. A name for this layer (optional). 98 | 99 | Returns: 100 | A `Tensor`. 101 | 102 | """ 103 | with tf.name_scope(name) as scope: 104 | x = tf.concat(tensor_list, 1) 105 | 106 | # Track output tensor. 107 | tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, x) 108 | 109 | return x 110 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import tensorflow as tf 4 | from .utils import get_from_module 5 | 6 | 7 | def get(identifier): 8 | if hasattr(identifier, '__call__'): 9 | return identifier 10 | else: 11 | return get_from_module(identifier, globals(), 'regularizer') 12 | 13 | 14 | def L2(tensor, wd=0.001): 15 | """ L2. 16 | 17 | Computes half the L2 norm of a tensor without the `sqrt`: 18 | 19 | output = sum(t ** 2) / 2 * wd 20 | 21 | Arguments: 22 | tensor: `Tensor`. The tensor to apply regularization. 23 | wd: `float`. The decay. 24 | 25 | Returns: 26 | The regularization `Tensor`. 27 | 28 | """ 29 | return tf.multiply(tf.nn.l2_loss(tensor), wd, name='L2-Loss') 30 | 31 | 32 | def L1(tensor, wd=0.001): 33 | """ L1. 34 | 35 | Computes the L1 norm of a tensor: 36 | 37 | output = sum(|t|) * wd 38 | 39 | Arguments: 40 | tensor: `Tensor`. The tensor to apply regularization. 41 | wd: `float`. The decay. 42 | 43 | Returns: 44 | The regularization `Tensor`. 45 | 46 | """ 47 | return tf.multiply(tf.reduce_sum(tf.abs(tensor)), wd, name='L1-Loss') 48 | -------------------------------------------------------------------------------- /references/encase/code/tflearn/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .dnn import DNN 3 | from .generator import SequenceGenerator -------------------------------------------------------------------------------- /references/encase/code/util_vote.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | 5 | ''' 6 | 7 | import numpy as np 8 | from collections import Counter 9 | 10 | def get_voted_proba_each_1(pre): 11 | return np.mean(pre, axis=0) 12 | 13 | def get_voted_proba_each(pre): 14 | y_pre = [0. for j in range(4)] 15 | y_sec_pre = [0. for j in range(4)] 16 | y_third_pre = [0. for j in range(4)] 17 | y_pre = np.array(y_pre, dtype=np.float32) 18 | y_sec_pre = np.array(y_sec_pre, dtype=np.float32) 19 | y_third_pre = np.array(y_third_pre, dtype=np.float32) 20 | max_p = 0 21 | max_sec_p = 0 22 | max_third_p = 0 23 | sec_p = 0 24 | sec_sec_p = 0 25 | sec_third_p = 0 26 | 27 | for j in range(len(pre)): 28 | i_pred = np.array(pre[j], dtype=np.float32) 29 | 30 | cur_max_p = i_pred[np.argmax(i_pred)] 31 | cur_sec_p = 0 32 | for k in range(len(i_pred)): 33 | if i_pred[k] == cur_max_p: 34 | continue 35 | if i_pred[k] > cur_sec_p: 36 | cur_sec_p = i_pred[k] 37 | 38 | if (cur_max_p - cur_sec_p) > (max_p - sec_p): 39 | y_third_pre = y_sec_pre 40 | y_sec_pre = y_pre 41 | y_pre = i_pred 42 | max_p = cur_max_p 43 | sec_p = cur_sec_p 44 | elif len(pre) >= 2 and (cur_max_p - cur_sec_p) > (max_sec_p - sec_sec_p): 45 | y_third_pre = y_sec_pre 46 | y_sec_pre = i_pred 47 | elif len(pre) >= 3 and (cur_max_p - cur_sec_p) > (max_third_p - sec_third_p): 48 | y_third_pre = i_pred 49 | 50 | 51 | labels = [0. for j in range(4)] 52 | pred_1 = np.argmax(y_pre) 53 | labels[pred_1] +=1 54 | pred_2 = pred_3 = 0 55 | if len(pre) >= 2: 56 | pred_2 = np.argmax(y_sec_pre) 57 | labels[pred_2] +=1 58 | if len(pre) >= 3: 59 | pred_3 = np.argmax(y_third_pre) 60 | labels[pred_3] +=1 61 | 62 | # if pred_1 == 2:# and (abs(y_pre[k][np.argmax(labels)] - y_pre[k][2])/y_pre[k][np.argmax(labels)] <= 0.2): 63 | # pass 64 | # elif pred_2 == 2:# and (abs(y_pre[k][np.argmax(labels)] - y_sec_pre[k][2])/y_pre[k][np.argmax(labels)] <= 0.2): 65 | # y_pre = y_sec_pre 66 | # elif pred_3 == 2:# and (abs(y_pre[k][np.argmax(labels)] - y_third_pre[k][2])/y_pre[k][np.argmax(labels)] <= 0.2): 67 | # y_pre = y_third_pre 68 | # elif pred_1 != np.argmax(labels): 69 | # if pred_2 == np.argmax(labels): 70 | # y_pre = y_sec_pre 71 | 72 | if pred_1 != np.argmax(labels): 73 | if pred_2 == np.argmax(labels): 74 | y_pre = y_sec_pre 75 | 76 | return y_pre 77 | 78 | def get_voted_proba(set_proba, out_pid): 79 | unique_pids = sorted(list(set(out_pid))) 80 | seq_proba = [] 81 | gt = [] 82 | proba_dic = {} 83 | for i in range(len(out_pid)): 84 | if out_pid[i] in proba_dic: 85 | proba_dic[out_pid[i]].append(set_proba[i]) 86 | else: 87 | proba_dic[out_pid[i]] = [set_proba[i]] 88 | for pid in unique_pids: 89 | seq_proba.append(get_voted_proba_each(proba_dic[pid])) 90 | 91 | return seq_proba 92 | 93 | def group_gt(gt, pids): 94 | unique_pids = sorted(list(set(pids))) 95 | gt_dic = {k: [] for k in unique_pids} 96 | final_gt = [] 97 | for i in range(len(pids)): 98 | gt_dic[pids[i]].append(gt[i]) 99 | for k, v in gt_dic.items(): 100 | final_gt.append(Counter(v).most_common(1)[0][0]) 101 | return final_gt 102 | -------------------------------------------------------------------------------- /references/encase/preprocess_sub.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/references/encase/preprocess_sub.m -------------------------------------------------------------------------------- /references/encase/score2017Challenge.m: -------------------------------------------------------------------------------- 1 | % This script will score your algorithm for classification accuracy, based on the reference classification results. 2 | % Your final score for the challenge will be evaluated on the whole hidden test set. 3 | % 4 | % This script requires that you first run generateValidationSet.m 5 | % 6 | % 7 | % Written by: Chengyu Liu and Qiao Li January 20 2017 8 | % chengyu.liu@emory.edu qiao.li@emory.edu 9 | % 10 | % Last modified by: 11 | % 12 | % 13 | 14 | clear all; 15 | 16 | %% Load the answer classification results 17 | fid = fopen('answers.txt','r'); 18 | if(fid ~= -1) 19 | ANSWERS = textscan(fid, '%s %s','Delimiter',','); 20 | else 21 | error('Could not open users answer.txt for scoring. Run the generateValidationSet.m script and try again.') 22 | end 23 | fclose(fid); 24 | 25 | %% Load the reference classification results 26 | reffile = ['validation' filesep 'REFERENCE.csv']; 27 | fid = fopen(reffile, 'r'); 28 | if(fid ~= -1) 29 | Ref = textscan(fid,'%s %s','Delimiter',','); 30 | else 31 | error(['Could not open ' reffile ' for scoring. Exiting...']) 32 | end 33 | fclose(fid); 34 | 35 | RECORDS = Ref{1}; 36 | target = Ref{2}; 37 | N = length(RECORDS); 38 | 39 | a = find(strcmp(ANSWERS{2},'N')); 40 | b = find(strcmp(ANSWERS{2},'A')); 41 | c = find(strcmp(ANSWERS{2},'O')); 42 | d = find(strcmp(ANSWERS{2},'~')); 43 | ln = length(a)+length(b)+length(c)+length(d); 44 | if(length(ANSWERS{2}) ~= ln); 45 | error('Input must contain only N, A, O or ~'); 46 | end 47 | 48 | %% Scoring 49 | % We do not assume that the references and the answers are sorted in the 50 | % same order, so we search for the location of the individual records in answer.txt file. 51 | AA=zeros(4,4); 52 | 53 | for n = 1:N 54 | rec = RECORDS{n}; 55 | i = strmatch(rec, ANSWERS{1}); 56 | if(isempty(i)) 57 | warning(['Could not find answer for record ' rec '; treating it as NOISE (~).']); 58 | this_answer = '~'; 59 | else 60 | this_answer = ANSWERS{2}(i); 61 | end 62 | switch target{n} 63 | case 'N' 64 | if strcmp(this_answer,'N') 65 | AA(1,1) = AA(1,1)+1; 66 | elseif strcmp(this_answer,'A') 67 | AA(1,2) = AA(1,2)+1; 68 | elseif strcmp(this_answer,'O') 69 | AA(1,3) = AA(1,3)+1; 70 | elseif strcmp(this_answer,'~') 71 | AA(1,4) = AA(1,4)+1; 72 | end 73 | case 'A' 74 | if strcmp(this_answer,'N') 75 | AA(2,1) = AA(2,1)+1; 76 | elseif strcmp(this_answer,'A') 77 | AA(2,2) = AA(2,2)+1; 78 | elseif strcmp(this_answer,'O') 79 | AA(2,3) = AA(2,3)+1; 80 | elseif strcmp(this_answer,'~') 81 | AA(2,4) = AA(2,4)+1; 82 | end 83 | case 'O' 84 | if strcmp(this_answer,'N') 85 | AA(3,1) = AA(3,1)+1; 86 | elseif strcmp(this_answer,'A') 87 | AA(3,2) = AA(3,2)+1; 88 | elseif strcmp(this_answer,'O') 89 | AA(3,3) = AA(3,3)+1; 90 | elseif strcmp(this_answer,'~') 91 | AA(3,4) = AA(3,4)+1; 92 | end 93 | case '~' 94 | if strcmp(this_answer,'N') 95 | AA(4,1) = AA(4,1)+1; 96 | elseif strcmp(this_answer,'A') 97 | AA(4,2) = AA(4,2)+1; 98 | elseif strcmp(this_answer,'O') 99 | AA(4,3) = AA(4,3)+1; 100 | elseif strcmp(this_answer,'~') 101 | AA(4,4) = AA(4,4)+1; 102 | end 103 | end 104 | end 105 | 106 | F1n=2*AA(1,1)/(sum(AA(1,:))+sum(AA(:,1))); 107 | F1a=2*AA(2,2)/(sum(AA(2,:))+sum(AA(:,2))); 108 | F1o=2*AA(3,3)/(sum(AA(3,:))+sum(AA(:,3))); 109 | F1p=2*AA(4,4)/(sum(AA(4,:))+sum(AA(:,4))); 110 | F1=(F1n+F1a+F1o)/3; 111 | 112 | 113 | str = ['F1 measure for Normal rhythm: ' '%1.4f\n']; 114 | fprintf(str,F1n) 115 | str = ['F1 measure for AF rhythm: ' '%1.4f\n']; 116 | fprintf(str,F1a) 117 | str = ['F1 measure for Other rhythm: ' '%1.4f\n']; 118 | fprintf(str,F1o) 119 | str = ['F1 measure for Noisy recordings: ' '%1.4f\n']; 120 | fprintf(str,F1p) 121 | str = ['Final F1 measure: ' '%1.4f\n']; 122 | fprintf(str,F1) 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /references/stanford/ecg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/references/stanford/ecg/__init__.py -------------------------------------------------------------------------------- /references/stanford/ecg/load.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | from __future__ import absolute_import 4 | 5 | import json 6 | import keras 7 | import numpy as np 8 | import os 9 | import random 10 | import scipy.io as sio 11 | import tqdm 12 | 13 | STEP = 256 14 | 15 | def data_generator(batch_size, preproc, x, y): 16 | num_examples = len(x) 17 | examples = zip(x, y) 18 | examples = sorted(examples, key = lambda x: x[0].shape[0]) 19 | end = num_examples - batch_size + 1 20 | batches = [examples[i:i+batch_size] 21 | for i in range(0, end, batch_size)] 22 | random.shuffle(batches) 23 | while True: 24 | for batch in batches: 25 | x, y = zip(*batch) 26 | yield preproc.process(x, y) 27 | 28 | class Preproc: 29 | 30 | def __init__(self, ecg, labels): 31 | self.mean, self.std = compute_mean_std(ecg) 32 | self.classes = sorted(set(l for label in labels for l in label)) 33 | self.int_to_class = dict( zip(range(len(self.classes)), self.classes)) 34 | self.class_to_int = {c : i for i, c in self.int_to_class.items()} 35 | 36 | def process(self, x, y): 37 | return self.process_x(x), self.process_y(y) 38 | 39 | def process_x(self, x): 40 | x = pad(x) 41 | x = (x - self.mean) / self.std 42 | x = x[:, :, None] 43 | return x 44 | 45 | def process_y(self, y): 46 | # TODO, awni, fix hack pad with noise for cinc 47 | y = pad([[self.class_to_int[c] for c in s] for s in y], val=3, dtype=np.int32) 48 | y = keras.utils.np_utils.to_categorical( 49 | y, num_classes=len(self.classes)) 50 | return y 51 | 52 | def pad(x, val=0, dtype=np.float32): 53 | max_len = max(len(i) for i in x) 54 | padded = np.full((len(x), max_len), val, dtype=dtype) 55 | for e, i in enumerate(x): 56 | padded[e, :len(i)] = i 57 | return padded 58 | 59 | def compute_mean_std(x): 60 | x = np.hstack(x) 61 | return (np.mean(x).astype(np.float32), 62 | np.std(x).astype(np.float32)) 63 | 64 | def load_dataset(data_json): 65 | with open(data_json, 'r') as fid: 66 | data = [json.loads(l) for l in fid] 67 | labels = []; ecgs = [] 68 | for d in tqdm.tqdm(data): 69 | labels.append(d['labels']) 70 | ecgs.append(load_ecg(d['ecg'])) 71 | return ecgs, labels 72 | 73 | def load_ecg(record): 74 | if os.path.splitext(record)[1] == ".npy": 75 | ecg = np.load(record) 76 | elif os.path.splitext(record)[1] == ".mat": 77 | ecg = sio.loadmat(record)['val'].squeeze() 78 | else: # Assumes binary 16 bit integers 79 | with open(record, 'r') as fid: 80 | ecg = np.fromfile(fid, dtype=np.int16) 81 | 82 | trunc_samp = STEP * int(len(ecg) / STEP) 83 | return ecg[:trunc_samp] 84 | 85 | if __name__ == "__main__": 86 | data_json = "examples/cinc17/train.json" 87 | train = load_dataset(data_json) 88 | preproc = Preproc(*train) 89 | gen = data_generator(32, preproc, *train) 90 | for x, y in gen: 91 | print(x.shape, y.shape) 92 | break 93 | -------------------------------------------------------------------------------- /references/stanford/ecg/predict.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import numpy as np 5 | import keras 6 | import os 7 | 8 | import load 9 | import util 10 | 11 | def predict(data_json, model_path): 12 | preproc = util.load(os.path.dirname(model_path)) 13 | dataset = load.load_dataset(data_json) 14 | x, y = preproc.process(*dataset) 15 | 16 | model = keras.models.load_model(model_path) 17 | probs = model.predict(x, verbose=1) 18 | 19 | return probs 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("data_json", help="path to data json") 24 | parser.add_argument("model_path", help="path to model") 25 | args = parser.parse_args() 26 | probs = predict(args.data_json, args.model_path) 27 | -------------------------------------------------------------------------------- /references/stanford/ecg/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | from __future__ import absolute_import 4 | 5 | import argparse 6 | import json 7 | import keras 8 | import numpy as np 9 | import os 10 | import random 11 | import time 12 | 13 | import network 14 | import load 15 | import util 16 | 17 | MAX_EPOCHS = 100 18 | 19 | def make_save_dir(dirname, experiment_name): 20 | start_time = str(int(time.time())) + '-' + str(random.randrange(1000)) 21 | save_dir = os.path.join(dirname, experiment_name, start_time) 22 | if not os.path.exists(save_dir): 23 | os.makedirs(save_dir) 24 | return save_dir 25 | 26 | def get_filename_for_saving(save_dir): 27 | return os.path.join(save_dir, 28 | "{val_loss:.3f}-{val_acc:.3f}-{epoch:03d}-{loss:.3f}-{acc:.3f}.hdf5") 29 | 30 | def train(args, params): 31 | 32 | print("Loading training set...") 33 | train = load.load_dataset(params['train']) 34 | print("Loading dev set...") 35 | dev = load.load_dataset(params['dev']) 36 | print("Building preprocessor...") 37 | preproc = load.Preproc(*train) 38 | print("Training size: " + str(len(train[0])) + " examples.") 39 | print("Dev size: " + str(len(dev[0])) + " examples.") 40 | 41 | 42 | save_dir = make_save_dir(params['save_dir'], args.experiment) 43 | 44 | util.save(preproc, save_dir) 45 | 46 | params.update({ 47 | "input_shape": [None, 1], 48 | "num_categories": len(preproc.classes) 49 | }) 50 | 51 | model = network.build_network(**params) 52 | 53 | stopping = keras.callbacks.EarlyStopping(patience=8) 54 | 55 | reduce_lr = keras.callbacks.ReduceLROnPlateau( 56 | factor=0.1, 57 | patience=2, 58 | min_lr=params["learning_rate"] * 0.001) 59 | 60 | checkpointer = keras.callbacks.ModelCheckpoint( 61 | filepath=get_filename_for_saving(save_dir), 62 | save_best_only=False) 63 | 64 | batch_size = params.get("batch_size", 32) 65 | 66 | if params.get("generator", False): 67 | train_gen = load.data_generator(batch_size, preproc, *train) 68 | dev_gen = load.data_generator(batch_size, preproc, *dev) 69 | model.fit_generator( 70 | train_gen, 71 | steps_per_epoch=int(len(train[0]) / batch_size), 72 | epochs=MAX_EPOCHS, 73 | validation_data=dev_gen, 74 | validation_steps=int(len(dev[0]) / batch_size), 75 | callbacks=[checkpointer, reduce_lr, stopping]) 76 | else: 77 | train_x, train_y = preproc.process(*train) 78 | dev_x, dev_y = preproc.process(*dev) 79 | model.fit( 80 | train_x, train_y, 81 | batch_size=batch_size, 82 | epochs=MAX_EPOCHS, 83 | validation_data=(dev_x, dev_y), 84 | callbacks=[checkpointer, reduce_lr, stopping]) 85 | 86 | if __name__ == '__main__': 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument("config_file", help="path to config file") 89 | parser.add_argument("--experiment", "-e", help="tag with experiment name", 90 | default="default") 91 | args = parser.parse_args() 92 | params = json.load(open(args.config_file, 'r')) 93 | train(args, params) 94 | 95 | 96 | """ 97 | example config (https://github.com/awni/ecg/blob/master/examples/cinc17/config.json): 98 | 99 | { 100 | "conv_subsample_lengths": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2], 101 | "conv_filter_length": 16, 102 | "conv_num_filters_start": 32, 103 | "conv_init": "he_normal", 104 | "conv_activation": "relu", 105 | "conv_dropout": 0.2, 106 | "conv_num_skip": 2, 107 | "conv_increase_channels_at": 4, 108 | 109 | "learning_rate": 0.001, 110 | "batch_size": 32, 111 | 112 | "train": "examples/cinc17/train.json", 113 | "dev": "examples/cinc17/dev.json", 114 | 115 | "generator": true, 116 | 117 | "save_dir": "saved", 118 | # "input_shape": [None, 1], 119 | # "num_categories": 9, 120 | } 121 | """ 122 | -------------------------------------------------------------------------------- /references/stanford/ecg/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cPickle as pickle 3 | 4 | def load(dirname): 5 | preproc_f = os.path.join(dirname, "preproc.bin") 6 | with open(preproc_f, 'r') as fid: 7 | preproc = pickle.load(fid) 8 | return preproc 9 | 10 | def save(preproc, dirname): 11 | preproc_f = os.path.join(dirname, "preproc.bin") 12 | with open(preproc_f, 'w') as fid: 13 | pickle.dump(preproc, fid) 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.22.0 2 | scipy==1.4.1 3 | joblib>=1.2.0 4 | Cython==0.29.10 5 | pandas==1.1.0 6 | scikit-learn==0.20.0 7 | # tensorflow==1.15.2 8 | # keras==2.3.1 9 | packaging 10 | easydict 11 | wfdb 12 | biosppy 13 | torch==1.3.1 14 | torchsummary 15 | tensorboardX 16 | tqdm 17 | -------------------------------------------------------------------------------- /saved_models/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/saved_models/.keep -------------------------------------------------------------------------------- /signal_processing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | References: 4 | ----------- 5 | wfdb 6 | biosppy 7 | 8 | """ 9 | 10 | from .ecg_preproc import * 11 | from .ecg_rpeaks import * 12 | from .ecg_spectral import * 13 | from .ecg_waves import * 14 | from .ecg_waves_wavelet import * 15 | 16 | 17 | __all__ = [s for s in dir() if not s.startswith('_')] 18 | -------------------------------------------------------------------------------- /signal_processing/ecg_waves_wavelet.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | 12-lead ECG wave delineation, using algorithms proposed in ref. [1] 4 | 5 | Update: 6 | no existing implementation of daubechies cwt, 7 | priority of implementation of this algorithm is set LOW 8 | 9 | References: 10 | ----------- 11 | [1] Yochum, Maxime, Charlotte Renaud, and Sabir Jacquir. "Automatic detection of P, QRS and T patterns in 12 leads ECG signal based on CWT." Biomedical Signal Processing and Control 25 (2016): 46-52. 12 | [2] Li, Cuiwei, Chongxun Zheng, and Changfeng Tai. "Detection of ECG characteristic points using wavelet transforms." IEEE Transactions on biomedical Engineering 42.1 (1995): 21-28. 13 | [3] https://encyclopediaofmath.org/wiki/Daubechies_wavelets 14 | """ 15 | from typing import Union, Optional, NoReturn 16 | 17 | import numpy as np 18 | np.set_printoptions(precision=5, suppress=True) 19 | from pywt import cwt 20 | 21 | 22 | __all__ = [ 23 | "continuous_daubechies", 24 | ] 25 | 26 | 27 | def continuous_daubechies(): 28 | """ 29 | """ 30 | raise NotImplementedError 31 | -------------------------------------------------------------------------------- /unofficial_phase_legacy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | this folder contains 3 | 4 | legacy files of the unofficial phase 5 | 6 | deprecated (history) files of the (continuously evolving) official phase 7 | 8 | """ -------------------------------------------------------------------------------- /unofficial_phase_legacy/driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, sys 3 | 4 | import numpy as np 5 | from scipy.io import loadmat 6 | from run_12ECG_classifier import load_12ECG_model, run_12ECG_classifier 7 | 8 | 9 | def load_challenge_data(filename): 10 | 11 | x = loadmat(filename) 12 | data = np.asarray(x['val'], dtype=np.float64) 13 | 14 | new_file = filename.replace('.mat','.hea') 15 | input_header_file = os.path.join(new_file) 16 | 17 | with open(input_header_file,'r') as f: 18 | header_data = f.readlines() 19 | 20 | return data, header_data 21 | 22 | 23 | def save_challenge_predictions(output_directory,filename,scores,labels,classes): 24 | 25 | recording = os.path.splitext(filename)[0] 26 | new_file = filename.replace('.mat','.csv') 27 | output_file = os.path.join(output_directory,new_file) 28 | 29 | # Include the filename as the recording number 30 | recording_string = '#{}'.format(recording) 31 | class_string = ','.join(classes) 32 | label_string = ','.join(str(i) for i in labels) 33 | score_string = ','.join(str(i) for i in scores) 34 | 35 | with open(output_file, 'w') as f: 36 | f.write(recording_string + '\n' + class_string + '\n' + label_string + '\n' + score_string + '\n') 37 | 38 | 39 | 40 | if __name__ == '__main__': 41 | # Parse arguments. 42 | if len(sys.argv) != 4: 43 | raise Exception('Include the input and output directories as arguments, e.g., python driver.py input output.') 44 | 45 | model_input = sys.argv[1] 46 | input_directory = sys.argv[2] 47 | output_directory = sys.argv[3] 48 | 49 | # Find files. 50 | input_files = [] 51 | for f in os.listdir(input_directory): 52 | if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): 53 | input_files.append(f) 54 | 55 | if not os.path.isdir(output_directory): 56 | os.mkdir(output_directory) 57 | 58 | # Load model. 59 | print('Loading 12ECG model...') 60 | model = load_12ECG_model(model_input) 61 | 62 | # Iterate over files. 63 | print('Extracting 12ECG features...') 64 | num_files = len(input_files) 65 | 66 | for i, f in enumerate(input_files): 67 | print(' {}/{}...'.format(i+1, num_files)) 68 | tmp_input_file = os.path.join(input_directory,f) 69 | data,header_data = load_challenge_data(tmp_input_file) 70 | current_label, current_score,classes = run_12ECG_classifier(data,header_data, model) 71 | # Save results. 72 | save_challenge_predictions(output_directory,f,current_score,current_label,classes) 73 | 74 | 75 | print('Done.') 76 | -------------------------------------------------------------------------------- /unofficial_phase_legacy/run_12ECG_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import joblib 5 | from keras.models import load_model 6 | from get_12ECG_features import get_12ECG_features 7 | 8 | 9 | def run_12ECG_classifier(data,header_data,classes,model): 10 | 11 | num_classes = len(classes) 12 | current_label = np.zeros(num_classes, dtype=int) 13 | current_score = np.zeros(num_classes) 14 | 15 | all_labels = ['Normal', 'AF', 'I-AVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE'] 16 | 17 | valid_label_indices = [i for i,l in enumerate(all_labels) if l in classes] 18 | valid_labels = [l for i,l in enumerate(all_labels) if l in classes] 19 | 20 | class_map = { 21 | l: classes.index(l) for l in valid_labels 22 | } 23 | 24 | # Use your classifier here to obtain a label and score for each class. 25 | features = get_12ECG_features(data, header_data) 26 | pred_score = model.predict(features)[...,valid_label_indices] 27 | pred_score = np.mean(pred_score, axis=0) # or np.max? 28 | 29 | threshold = 0.5 30 | pred_labels = np.where(pred_score>=threshold)[0] 31 | if len(pred_labels) == 0: 32 | pred_labels = np.array([np.argmax(pred_score)], dtype=int) 33 | 34 | for l in pred_labels: 35 | ln = valid_labels[l] 36 | current_label[class_map[ln]] = 1 37 | 38 | for i in range(num_classes): 39 | current_score[class_map[valid_labels[i]]] = pred_score[i] 40 | 41 | return current_label, current_score 42 | 43 | def load_12ECG_model(): 44 | # load the model from disk 45 | filename='weights-0.22loss.hdf5' 46 | loaded_model = load_model(filename) 47 | 48 | return loaded_model 49 | 50 | 51 | def run_12ECG_classifier_old(data,header_data,classes,model): 52 | 53 | num_classes = len(classes) 54 | current_label = np.zeros(num_classes, dtype=int) 55 | current_score = np.zeros(num_classes) 56 | 57 | # Use your classifier here to obtain a label and score for each class. 58 | features=np.asarray(get_12ECG_features(data,header_data)) 59 | feats_reshape = features.reshape(1,-1) 60 | label = model.predict(feats_reshape) 61 | score = model.predict_proba(feats_reshape) 62 | 63 | current_label[label] = 1 64 | 65 | for i in range(num_classes): 66 | current_score[i] = np.array(score[0][i]) 67 | 68 | return current_label, current_score 69 | -------------------------------------------------------------------------------- /unofficial_phase_legacy/train_legacy.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | References: 4 | ----------- 5 | [1] https://www.tensorflow.org/tutorials/structured_data/imbalanced_data 6 | """ 7 | import os 8 | import argparse 9 | 10 | import numpy as np 11 | np.set_printoptions(precision=5, suppress=True) 12 | from keras import backend as K 13 | from keras.optimizers import Adam, SGD 14 | from keras.callbacks import ModelCheckpoint 15 | 16 | from models.legacy import get_model 17 | from cfg import TrainCfg 18 | 19 | 20 | def train(model, config, train_x, train_y, test_x, test_y): 21 | """ 22 | """ 23 | model.compile(loss='binary_crossentropy', optimizer=Adam(0.0001)) 24 | 25 | checkpointer = ModelCheckpoint( 26 | filepath=os.path.join(config.checkpoints,'weights.{epoch:04d}-{val_loss:.3f}.hdf5'), 27 | verbose=2, 28 | monitor='val_acc', 29 | save_best_only=False, 30 | ) 31 | 32 | model.fit(train_x, train_y, batch_size=config.batch_size, epochs=config.n_epochs, verbose=2, validation_data=(test_x, test_y), callbacks=[checkpointer]) 33 | 34 | 35 | def get_args(**kwargs): 36 | """ 37 | """ 38 | cfg = deepcopy(kwargs) 39 | parser = argparse.ArgumentParser( 40 | description='Train the Model on CINC2020', 41 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 42 | # parser.add_argument( 43 | # '-l', '--learning-rate', 44 | # metavar='LR', type=float, nargs='?', default=0.001, 45 | # help='Learning rate', 46 | # dest='learning_rate') 47 | # parser.add_argument( 48 | # '-g', '--gpu', 49 | # metavar='G', type=str, default='0', 50 | # help='GPU', 51 | # dest='gpu') 52 | parser.add_argument( 53 | '-t', '--tranches', 54 | type=str, default='', 55 | help='the tranches for training', 56 | dest='tranches_for_training') 57 | parser.add_argument( 58 | '-c', '--cnn-name', 59 | type=str, default='resnet', 60 | help='choice of cnn feature extractor', 61 | dest='cnn_name') 62 | parser.add_argument( 63 | '-r', '--rnn-name', 64 | type=str, default='lstm', 65 | help='choice of rnn structures', 66 | dest='rnn_name') 67 | parser.add_argument( 68 | '--keep-checkpoint-max', type=int, default=100, 69 | help='maximum number of checkpoints to keep. If set 0, all checkpoints will be kept', 70 | dest='keep_checkpoint_max') 71 | parser.add_argument( 72 | '--optimizer', type=str, default='adam', 73 | help='training optimizer', 74 | dest='train_optimizer') 75 | parser.add_argument( 76 | '--debug', type=str2bool, default=False, 77 | help='train with more debugging information', 78 | dest='debug') 79 | 80 | args = vars(parser.parse_args()) 81 | 82 | cfg.update(args) 83 | 84 | return ED(cfg) 85 | 86 | 87 | DAS = True # JD DAS platform 88 | 89 | if __name__ == "__main__": 90 | config = get_args(**TrainCfg) 91 | 92 | print(f"\n{'*'*20} Start Training {'*'*20}\n") 93 | print(f"GPU status: {K.tensorflow_backend._get_available_gpus()}") 94 | print(f"Using keras of version {keras.__version__}") 95 | print(f'with configuration {config}') 96 | 97 | model = get_model(config) 98 | 99 | train_ratio = int(config.train_ratio*100) 100 | test_ratio = 100 - train_ratio 101 | 102 | tranches = config.tranches_for_training or "ABEF" 103 | 104 | # not finished 105 | train_x, train_y, test_x, test_y = [], [], [], [] 106 | for t in tranches: 107 | train_x.append(np.load(os.path.join(config.db_dir, f"train_X_tranches_{t}_ratio_{train_ratio}_siglen_{config.input_len}.npy"))) 108 | train_y.append(np.load(os.path.join(config.db_dir, f"train_y_tranches_{t}_ratio_{train_ratio}_siglen_{config.input_len}.npy"))) 109 | test_x.append(np.load(os.path.join(config.db_dir, f"test_X_tranches_{t}_ratio_{test_ratio}_siglen_{config.input_len}.npy"))) 110 | test_y.append(np.load(os.path.join(config.db_dir, f"test_y_tranches_{t}_ratio_{test_ratio}_siglen_{config.input_len}.npy"))) 111 | 112 | train_x = np.concatenate(train_x, axis=0).transpose(0,2,1) 113 | train_y = np.concatenate(train_y, axis=0) 114 | test_x = np.concatenate(test_x, axis=0).transpose(0,2,1) 115 | test_y = np.concatenate(test_y, axis=0) 116 | 117 | train(model, config, train_x, train_y, test_x, test_y) 118 | -------------------------------------------------------------------------------- /unofficial_phase_legacy/weights-0.22loss.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepPSP/cinc2020/5d0d704299b50b3e4631be44c3c1891e92011ac9/unofficial_phase_legacy/weights-0.22loss.hdf5 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import os, sys 4 | 5 | _BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 6 | --------------------------------------------------------------------------------