├── ODIR
    ├── M3Care.ipynb
    └── utils
    │   ├── __pycache__
    │       ├── common_utils.cpython-36.pyc
    │       ├── common_utils.cpython-37.pyc
    │       ├── feature_extractor.cpython-36.pyc
    │       ├── feature_extractor.cpython-37.pyc
    │       ├── logging.cpython-37.pyc
    │       ├── metrics.cpython-36.pyc
    │       ├── metrics.cpython-37.pyc
    │       ├── preprocessing.cpython-36.pyc
    │       ├── preprocessing.cpython-37.pyc
    │       ├── readers.cpython-36.pyc
    │       ├── readers.cpython-37.pyc
    │       ├── utils.cpython-36.pyc
    │       └── utils.cpython-37.pyc
    │   ├── common_utils.py
    │   ├── decomp_normalizer
    │   ├── feature_extractor.py
    │   ├── logging.py
    │   ├── metrics.py
    │   ├── preprocessing.py
    │   ├── readers.py
    │   ├── resources
    │       ├── channel_info.json
    │       ├── discretizer_config.json
    │       └── valset.csv
    │   └── utils.py
├── OV
    ├── M3Care.ipynb
    └── utils
    │   ├── __pycache__
    │       ├── common_utils.cpython-36.pyc
    │       ├── common_utils.cpython-37.pyc
    │       ├── feature_extractor.cpython-36.pyc
    │       ├── feature_extractor.cpython-37.pyc
    │       ├── logging.cpython-37.pyc
    │       ├── metrics.cpython-36.pyc
    │       ├── metrics.cpython-37.pyc
    │       ├── preprocessing.cpython-36.pyc
    │       ├── preprocessing.cpython-37.pyc
    │       ├── readers.cpython-36.pyc
    │       ├── readers.cpython-37.pyc
    │       ├── utils.cpython-36.pyc
    │       └── utils.cpython-37.pyc
    │   ├── common_utils.py
    │   ├── decomp_normalizer
    │   ├── feature_extractor.py
    │   ├── logging.py
    │   ├── metrics.py
    │   ├── preprocessing.py
    │   ├── readers.py
    │   ├── resources
    │       ├── channel_info.json
    │       ├── discretizer_config.json
    │       └── valset.csv
    │   └── utils.py
└── README.md


/ODIR/utils/__pycache__/common_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/common_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/common_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/common_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/feature_extractor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/feature_extractor.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/feature_extractor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/feature_extractor.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/logging.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/logging.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/metrics.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/metrics.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/preprocessing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/preprocessing.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/preprocessing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/preprocessing.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/readers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/readers.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/readers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/readers.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/ODIR/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/ODIR/utils/common_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | import os
  6 | import json
  7 | import random
  8 | 
  9 | from .feature_extractor import extract_features
 10 | 
 11 | 
 12 | def convert_to_dict(data, header, channel_info):
 13 |     """ convert data from readers output in to array of arrays format """
 14 |     ret = [[] for i in range(data.shape[1] - 1)]
 15 |     for i in range(1, data.shape[1]):
 16 |         ret[i-1] = [(t, x) for (t, x) in zip(data[:, 0], data[:, i]) if x != ""]
 17 |         channel = header[i]
 18 |         if len(channel_info[channel]['possible_values']) != 0:
 19 |             ret[i-1] = list(map(lambda x: (x[0], channel_info[channel]['values'][x[1]]), ret[i-1]))
 20 |         ret[i-1] = list(map(lambda x: (float(x[0]), float(x[1])), ret[i-1]))
 21 |     return ret
 22 | 
 23 | 
 24 | def extract_features_from_rawdata(chunk, header, period, features):
 25 |     with open(os.path.join(os.path.dirname(__file__), "resources/channel_info.json")) as channel_info_file:
 26 |         channel_info = json.loads(channel_info_file.read())
 27 |     data = [convert_to_dict(X, header, channel_info) for X in chunk]
 28 |     return extract_features(data, period, features)
 29 | 
 30 | 
 31 | def read_chunk(reader, chunk_size):
 32 |     data = {}
 33 |     for i in range(chunk_size):
 34 |         ret = reader.read_next()
 35 |         for k, v in ret.items():
 36 |             if k not in data:
 37 |                 data[k] = []
 38 |             data[k].append(v)
 39 |     data["header"] = data["header"][0]
 40 |     return data
 41 | 
 42 | 
 43 | def sort_and_shuffle(data, batch_size):
 44 |     """ Sort data by the length and then make batches and shuffle them.
 45 |         data is tuple (X1, X2, ..., Xn) all of them have the same length.
 46 |         Usually data = (X, y).
 47 |     """
 48 |     assert len(data) >= 2
 49 |     data = list(zip(*data))
 50 | 
 51 |     random.shuffle(data)
 52 | 
 53 |     old_size = len(data)
 54 |     rem = old_size % batch_size
 55 |     head = data[:old_size - rem]
 56 |     tail = data[old_size - rem:]
 57 |     data = []
 58 | 
 59 |     head.sort(key=(lambda x: x[0].shape[0]))
 60 | 
 61 |     mas = [head[i: i+batch_size] for i in range(0, len(head), batch_size)]
 62 |     random.shuffle(mas)
 63 | 
 64 |     for x in mas:
 65 |         data += x
 66 |     data += tail
 67 | 
 68 |     data = list(zip(*data))
 69 |     return data
 70 | 
 71 | 
 72 | def add_common_arguments(parser):
 73 |     """ Add all the parameters which are common across the tasks
 74 |     """
 75 |     parser.add_argument('--network', type=str, required=True)
 76 |     parser.add_argument('--dim', type=int, default=256,
 77 |                         help='number of hidden units')
 78 |     parser.add_argument('--depth', type=int, default=1,
 79 |                         help='number of bi-LSTMs')
 80 |     parser.add_argument('--epochs', type=int, default=100,
 81 |                         help='number of chunks to train')
 82 |     parser.add_argument('--load_state', type=str, default="",
 83 |                         help='state file path')
 84 |     parser.add_argument('--mode', type=str, default="train",
 85 |                         help='mode: train or test')
 86 |     parser.add_argument('--batch_size', type=int, default=64)
 87 |     parser.add_argument('--l2', type=float, default=0, help='L2 regularization')
 88 |     parser.add_argument('--l1', type=float, default=0, help='L1 regularization')
 89 |     parser.add_argument('--save_every', type=int, default=1,
 90 |                         help='save state every x epoch')
 91 |     parser.add_argument('--prefix', type=str, default="",
 92 |                         help='optional prefix of network name')
 93 |     parser.add_argument('--dropout', type=float, default=0.0)
 94 |     parser.add_argument('--rec_dropout', type=float, default=0.0,
 95 |                         help="dropout rate for recurrent connections")
 96 |     parser.add_argument('--batch_norm', type=bool, default=False,
 97 |                         help='batch normalization')
 98 |     parser.add_argument('--timestep', type=float, default=1.0,
 99 |                         help="fixed timestep used in the dataset")
100 |     parser.add_argument('--imputation', type=str, default='previous')
101 |     parser.add_argument('--small_part', dest='small_part', action='store_true')
102 |     parser.add_argument('--whole_data', dest='small_part', action='store_false')
103 |     parser.add_argument('--optimizer', type=str, default='adam')
104 |     parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
105 |     parser.add_argument('--beta_1', type=float, default=0.9,
106 |                         help='beta_1 param for Adam optimizer')
107 |     parser.add_argument('--verbose', type=int, default=2)
108 |     parser.add_argument('--size_coef', type=float, default=4.0)
109 |     parser.add_argument('--normalizer_state', type=str, default=None,
110 |                         help='Path to a state file of a normalizer. Leave none if you want to '
111 |                              'use one of the provided ones.')
112 |     parser.set_defaults(small_part=False)
113 | 
114 | 
115 | class DeepSupervisionDataLoader:
116 |     r"""
117 |     Data loader for decompensation and length of stay task.
118 |     Reads all the data for one patient at once.
119 | 
120 |     Parameters
121 |     ----------
122 |     dataset_dir : str
123 |         Directory where timeseries files are stored.
124 |     listfile : str
125 |         Path to a listfile. If this parameter is left `None` then
126 |         `dataset_dir/listfile.csv` will be used.
127 |     """
128 |     def __init__(self, dataset_dir, listfile=None, small_part=False):
129 | 
130 |         self._dataset_dir = dataset_dir
131 |         if listfile is None:
132 |             listfile_path = os.path.join(dataset_dir, "listfile.csv")
133 |         else:
134 |             listfile_path = listfile
135 |         with open(listfile_path, "r") as lfile:
136 |             self._data = lfile.readlines()[1:]  # skip the header
137 | 
138 |         self._data = [line.split(',') for line in self._data]
139 |         self._data = [(x, float(t), y) for (x, t, y) in self._data]
140 |         self._data = sorted(self._data)
141 | 
142 |         mas = {"X": [],
143 |                "ts": [],
144 |                "ys": [],
145 |                "name": []}
146 |         i = 0
147 |         while i < len(self._data):
148 |             j = i
149 |             cur_stay = self._data[i][0]
150 |             cur_ts = []
151 |             cur_labels = []
152 |             while j < len(self._data) and self._data[j][0] == cur_stay:
153 |                 cur_ts.append(self._data[j][1])
154 |                 cur_labels.append(self._data[j][2])
155 |                 j += 1
156 | 
157 |             cur_X, header = self._read_timeseries(cur_stay)
158 |             mas["X"].append(cur_X)
159 |             mas["ts"].append(cur_ts)
160 |             mas["ys"].append(cur_labels)
161 |             mas["name"].append(cur_stay)
162 | 
163 |             i = j
164 |             if small_part and len(mas["name"]) == 256:
165 |                 break
166 | 
167 |         self._data = mas
168 | 
169 |     def _read_timeseries(self, ts_filename):
170 |         ret = []
171 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
172 |             header = tsfile.readline().strip().split(',')
173 |             assert header[0] == "Hours"
174 |             for line in tsfile:
175 |                 mas = line.strip().split(',')
176 |                 ret.append(np.array(mas))
177 |         return (np.stack(ret), header)
178 | 
179 | 
180 | def create_directory(directory):
181 |     if not os.path.exists(directory):
182 |         os.makedirs(directory)
183 | 
184 | 
185 | def pad_zeros(arr, min_length=None):
186 |     """
187 |     `arr` is an array of `np.array`s
188 | 
189 |     The function appends zeros to every `np.array` in `arr`
190 |     to equalize their first axis lenghts.
191 |     """
192 |     dtype = arr[0].dtype
193 |     max_len = max([x.shape[0] for x in arr])
194 |     ret = [np.concatenate([x, np.zeros((max_len - x.shape[0],) + x.shape[1:], dtype=dtype)], axis=0)
195 |            for x in arr]
196 |     if (min_length is not None) and ret[0].shape[0] < min_length:
197 |         ret = [np.concatenate([x, np.zeros((min_length - x.shape[0],) + x.shape[1:], dtype=dtype)], axis=0)
198 |                for x in ret]
199 |     return np.array(ret)
200 | 


--------------------------------------------------------------------------------
/ODIR/utils/decomp_normalizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/ODIR/utils/decomp_normalizer


--------------------------------------------------------------------------------
/ODIR/utils/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import numpy as np
 5 | from scipy.stats import skew
 6 | 
 7 | all_functions = [min, max, np.mean, np.std, skew, len]
 8 | 
 9 | functions_map = {
10 |     "all": all_functions,
11 |     "len": [len],
12 |     "all_but_len": all_functions[:-1]
13 | }
14 | 
15 | periods_map = {
16 |     "all": (0, 0, 1, 0),
17 |     "first4days": (0, 0, 0, 4 * 24),
18 |     "first8days": (0, 0, 0, 8 * 24),
19 |     "last12hours": (1, -12, 1, 0),
20 |     "first25percent": (2, 25),
21 |     "first50percent": (2, 50)
22 | }
23 | 
24 | sub_periods = [(2, 100), (2, 10), (2, 25), (2, 50),
25 |                (3, 10), (3, 25), (3, 50)]
26 | 
27 | 
28 | def get_range(begin, end, period):
29 |     # first p %
30 |     if period[0] == 2:
31 |         return (begin, begin + (end - begin) * period[1] / 100.0)
32 |     # last p %
33 |     if period[0] == 3:
34 |         return (end - (end - begin) * period[1] / 100.0, end)
35 | 
36 |     if period[0] == 0:
37 |         L = begin + period[1]
38 |     else:
39 |         L = end + period[1]
40 | 
41 |     if period[2] == 0:
42 |         R = begin + period[3]
43 |     else:
44 |         R = end + period[3]
45 | 
46 |     return (L, R)
47 | 
48 | 
49 | def calculate(channel_data, period, sub_period, functions):
50 |     if len(channel_data) == 0:
51 |         return np.full((len(functions, )), np.nan)
52 | 
53 |     L = channel_data[0][0]
54 |     R = channel_data[-1][0]
55 |     L, R = get_range(L, R, period)
56 |     L, R = get_range(L, R, sub_period)
57 | 
58 |     data = [x for (t, x) in channel_data
59 |             if L - 1e-6 < t < R + 1e-6]
60 | 
61 |     if len(data) == 0:
62 |         return np.full((len(functions, )), np.nan)
63 |     return np.array([fn(data) for fn in functions], dtype=np.float32)
64 | 
65 | 
66 | def extract_features_single_episode(data_raw, period, functions):
67 |     global sub_periods
68 |     extracted_features = [np.concatenate([calculate(data_raw[i], period, sub_period, functions)
69 |                                           for sub_period in sub_periods],
70 |                                          axis=0)
71 |                           for i in range(len(data_raw))]
72 |     return np.concatenate(extracted_features, axis=0)
73 | 
74 | 
75 | def extract_features(data_raw, period, features):
76 |     period = periods_map[period]
77 |     functions = functions_map[features]
78 |     return np.array([extract_features_single_episode(x, period, functions)
79 |                      for x in data_raw])
80 | 


--------------------------------------------------------------------------------
/ODIR/utils/logging.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import logging
 4 | 
 5 | 
 6 | def init_log(output_dir):
 7 |     logging.basicConfig(level=logging.DEBUG,
 8 |                         format='%(asctime)s %(message)s',
 9 |                         datefmt='%Y%m%d-%H:%M:%S',
10 |                         filename=os.path.join(output_dir, 'log.log'),
11 |                         filemode='w')
12 |     console = logging.StreamHandler()
13 |     console.setLevel(logging.INFO)
14 |     logging.getLogger('').addHandler(console)
15 |     return logging
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/ODIR/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | from sklearn import metrics
  6 | 
  7 | 
  8 | # for decompensation, in-hospital mortality
  9 | 
 10 | def print_metrics_binary(y_true, predictions, verbose=1):
 11 |     predictions = np.array(predictions)
 12 |     if len(predictions.shape) == 1:
 13 |         predictions = np.stack([1 - predictions, predictions]).transpose((1, 0))
 14 | 
 15 |     cf = metrics.confusion_matrix(y_true, predictions.argmax(axis=1))
 16 |     if verbose:
 17 |         print("confusion matrix:")
 18 |         print(cf)
 19 |     cf = cf.astype(np.float32)
 20 | 
 21 |     acc = (cf[0][0] + cf[1][1]) / np.sum(cf)
 22 |     prec0 = cf[0][0] / (cf[0][0] + cf[1][0])
 23 |     prec1 = cf[1][1] / (cf[1][1] + cf[0][1])
 24 |     rec0 = cf[0][0] / (cf[0][0] + cf[0][1])
 25 |     rec1 = cf[1][1] / (cf[1][1] + cf[1][0])
 26 |     auroc = metrics.roc_auc_score(y_true, predictions[:, 1])
 27 | 
 28 |     (precisions, recalls, thresholds) = metrics.precision_recall_curve(y_true, predictions[:, 1])
 29 |     auprc = metrics.auc(recalls, precisions)
 30 |     minpse = np.max([min(x, y) for (x, y) in zip(precisions, recalls)])
 31 |     f1_score=2*prec1*rec1/(prec1+rec1)
 32 |     if verbose:
 33 |         print("accuracy = {}".format(acc))
 34 |         print("precision class 0 = {}".format(prec0))
 35 |         print("precision class 1 = {}".format(prec1))
 36 |         print("recall class 0 = {}".format(rec0))
 37 |         print("recall class 1 = {}".format(rec1))
 38 |         print("AUC of ROC = {}".format(auroc))
 39 |         print("AUC of PRC = {}".format(auprc))
 40 |         print("min(+P, Se) = {}".format(minpse))
 41 |         print("f1_score = {}".format(f1_score))
 42 | 
 43 |     return {"acc": acc,
 44 |             "prec0": prec0,
 45 |             "prec1": prec1,
 46 |             "rec0": rec0,
 47 |             "rec1": rec1,
 48 |             "auroc": auroc,
 49 |             "auprc": auprc,
 50 |             "minpse": minpse,
 51 |             "f1_score":f1_score}
 52 | 
 53 | 
 54 | # for phenotyping
 55 | 
 56 | def print_metrics_multilabel(y_true, predictions, verbose=1):
 57 |     y_true = np.array(y_true)
 58 |     predictions = np.array(predictions)
 59 | 
 60 |     auc_scores = metrics.roc_auc_score(y_true, predictions, average=None)
 61 |     ave_auc_micro = metrics.roc_auc_score(y_true, predictions,
 62 |                                           average="micro")
 63 |     ave_auc_macro = metrics.roc_auc_score(y_true, predictions,
 64 |                                           average="macro")
 65 |     ave_auc_weighted = metrics.roc_auc_score(y_true, predictions,
 66 |                                              average="weighted")
 67 |     
 68 |     predictions2 = np.zeros_like(predictions)
 69 |     for i in range(len(predictions2)):
 70 |         for j in range(len(predictions2[i])):
 71 |             if predictions[i][j]>=0.5:
 72 |                 predictions2[i][j] = 1
 73 | #     print(predictions[:10])
 74 | #     print(predictions2[:10])
 75 |                        
 76 | #     print(y_true[:,0][:10])
 77 | #     print(predictions2[:,0][:10])    
 78 |     f1_0 = metrics.f1_score(y_true[:,0], predictions2[:,0])
 79 | #     print(f1_0)
 80 |     f1_1 = metrics.f1_score(y_true[:,1], predictions2[:,1])
 81 |     f1_2 = metrics.f1_score(y_true[:,2], predictions2[:,2])
 82 |     
 83 |     total_labels = np.array(list(y_true[:,0])+list(y_true[:,1])+list(y_true[:,2]))
 84 |     total_preds = np.array(list(predictions2[:,0])+list(predictions2[:,1])+list(predictions2[:,2]))
 85 |     
 86 |     ave_f1_micro = metrics.f1_score(total_labels, total_preds)
 87 |     ave_f1_macro = (f1_0+f1_1+f1_2)/3
 88 | 
 89 | #     ave_f1_micro = metrics.f1_score(y_true, predictions2,
 90 | #                                           average="micro")
 91 | #     ave_f1_macro = metrics.f1_score(y_true, predictions2,
 92 | #                                           average="macro")
 93 |     
 94 |     coverage_error = metrics.coverage_error(y_true, predictions)
 95 |     label_ranking_loss = metrics.label_ranking_loss(y_true, predictions)
 96 | 
 97 |     if verbose:
 98 |         print("ROC AUC scores for labels:", auc_scores)
 99 |         print("ave_auc_micro = {}".format(ave_auc_micro))
100 |         print("ave_auc_macro = {}".format(ave_auc_macro))
101 |         print("ave_auc_weighted = {}".format(ave_auc_weighted))
102 | 
103 |     return {"auc_scores": auc_scores,
104 |             "ave_auc_micro": ave_auc_micro,
105 |             "ave_auc_macro": ave_auc_macro,
106 |             "ave_auc_weighted": ave_auc_weighted,
107 |             "ave_f1_micro": ave_f1_micro,
108 |             "ave_f1_macro": ave_f1_macro,
109 |            "coverage_error": coverage_error,
110 |            "label_ranking_loss": label_ranking_loss,}
111 | 
112 | 
113 | # for length of stay
114 | 
115 | def mean_absolute_percentage_error(y_true, y_pred):
116 |     return np.mean(np.abs((y_true - y_pred) / (y_true + 0.1))) * 100
117 | 
118 | 
119 | def print_metrics_regression(y_true, predictions, verbose=1):
120 |     predictions = np.array(predictions)
121 |     predictions = np.maximum(predictions, 0).flatten()
122 |     y_true = np.array(y_true)
123 | 
124 |     y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true]
125 |     prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions]
126 |     cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
127 |     if verbose:
128 |         print("Custom bins confusion matrix:")
129 |         print(cf)
130 | 
131 |     kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins,
132 |                                       weights='linear')
133 |     mad = metrics.mean_absolute_error(y_true, predictions)
134 |     mse = metrics.mean_squared_error(y_true, predictions)
135 |     mape = mean_absolute_percentage_error(y_true, predictions)
136 | 
137 |     if verbose:
138 |         print("Mean absolute deviation (MAD) = {}".format(mad))
139 |         print("Mean squared error (MSE) = {}".format(mse))
140 |         print("Mean absolute percentage error (MAPE) = {}".format(mape))
141 |         print("Cohen kappa score = {}".format(kappa))
142 | 
143 |     return {"mad": mad,
144 |             "mse": mse,
145 |             "mape": mape,
146 |             "kappa": kappa}
147 | 
148 | 
149 | class LogBins:
150 |     nbins = 10
151 |     means = [0.611848, 2.587614, 6.977417, 16.465430, 37.053745,
152 |              81.816438, 182.303159, 393.334856, 810.964040, 1715.702848]
153 | 
154 | 
155 | def get_bin_log(x, nbins, one_hot=False):
156 |     binid = int(np.log(x + 1) / 8.0 * nbins)
157 |     if binid < 0:
158 |         binid = 0
159 |     if binid >= nbins:
160 |         binid = nbins - 1
161 | 
162 |     if one_hot:
163 |         ret = np.zeros((LogBins.nbins,))
164 |         ret[binid] = 1
165 |         return ret
166 |     return binid
167 | 
168 | 
169 | def get_estimate_log(prediction, nbins):
170 |     bin_id = np.argmax(prediction)
171 |     return LogBins.means[bin_id]
172 | 
173 | 
174 | def print_metrics_log_bins(y_true, predictions, verbose=1):
175 |     y_true_bins = [get_bin_log(x, LogBins.nbins) for x in y_true]
176 |     prediction_bins = [get_bin_log(x, LogBins.nbins) for x in predictions]
177 |     cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
178 |     if verbose:
179 |         print("LogBins confusion matrix:")
180 |         print(cf)
181 |     return print_metrics_regression(y_true, predictions, verbose)
182 | 
183 | 
184 | class CustomBins:
185 |     inf = 1e18
186 |     bins = [(-inf, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 14), (14, +inf)]
187 |     nbins = len(bins)
188 |     means = [11.450379, 35.070846, 59.206531, 83.382723, 107.487817,
189 |              131.579534, 155.643957, 179.660558, 254.306624, 585.325890]
190 | 
191 | 
192 | def get_bin_custom(x, nbins, one_hot=False):
193 |     for i in range(nbins):
194 |         a = CustomBins.bins[i][0] * 24.0
195 |         b = CustomBins.bins[i][1] * 24.0
196 |         if a <= x < b:
197 |             if one_hot:
198 |                 ret = np.zeros((CustomBins.nbins,))
199 |                 ret[i] = 1
200 |                 return ret
201 |             return i
202 |     return None
203 | 
204 | 
205 | def get_estimate_custom(prediction, nbins):
206 |     bin_id = np.argmax(prediction)
207 |     assert 0 <= bin_id < nbins
208 |     return CustomBins.means[bin_id]
209 | 
210 | 
211 | def print_metrics_custom_bins(y_true, predictions, verbose=1):
212 |     return print_metrics_regression(y_true, predictions, verbose)
213 | 


--------------------------------------------------------------------------------
/ODIR/utils/preprocessing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | import platform
  6 | import pickle
  7 | import json
  8 | import os
  9 | 
 10 | 
 11 | class Discretizer:
 12 |     def __init__(self, timestep=0.8, store_masks=True, impute_strategy='zero', start_time='zero',
 13 |                  config_path=os.path.join(os.path.dirname(__file__), 'resources/discretizer_config.json')):
 14 | 
 15 |         with open(config_path) as f:
 16 |             config = json.load(f)
 17 |             self._id_to_channel = config['id_to_channel']
 18 |             self._channel_to_id = dict(zip(self._id_to_channel, range(len(self._id_to_channel))))
 19 |             self._is_categorical_channel = config['is_categorical_channel']
 20 |             self._possible_values = config['possible_values']
 21 |             self._normal_values = config['normal_values']
 22 | 
 23 |         self._header = ["Hours"] + self._id_to_channel
 24 |         self._timestep = timestep
 25 |         self._store_masks = store_masks
 26 |         self._start_time = start_time
 27 |         self._impute_strategy = impute_strategy
 28 | 
 29 |         # for statistics
 30 |         self._done_count = 0
 31 |         self._empty_bins_sum = 0
 32 |         self._unused_data_sum = 0
 33 | 
 34 |     def transform(self, X, header=None, end=None):
 35 |         if header is None:
 36 |             header = self._header
 37 |         assert header[0] == "Hours"
 38 |         eps = 1e-6
 39 | 
 40 |         N_channels = len(self._id_to_channel)
 41 |         ts = [float(row[0]) for row in X]
 42 |         for i in range(len(ts) - 1):
 43 |             assert ts[i] < ts[i+1] + eps
 44 | 
 45 |         if self._start_time == 'relative':
 46 |             first_time = ts[0]
 47 |         elif self._start_time == 'zero':
 48 |             first_time = 0
 49 |         else:
 50 |             raise ValueError("start_time is invalid")
 51 | 
 52 |         if end is None:
 53 |             max_hours = max(ts) - first_time
 54 |         else:
 55 |             max_hours = end - first_time
 56 | 
 57 |         N_bins = int(max_hours / self._timestep + 1.0 - eps)
 58 | 
 59 |         cur_len = 0
 60 |         begin_pos = [0 for i in range(N_channels)]
 61 |         end_pos = [0 for i in range(N_channels)]
 62 |         for i in range(N_channels):
 63 |             channel = self._id_to_channel[i]
 64 |             begin_pos[i] = cur_len
 65 |             if self._is_categorical_channel[channel]:
 66 |                 end_pos[i] = begin_pos[i] + len(self._possible_values[channel])
 67 |             else:
 68 |                 end_pos[i] = begin_pos[i] + 1
 69 |             cur_len = end_pos[i]
 70 | 
 71 |         data = np.zeros(shape=(N_bins, cur_len), dtype=float)
 72 |         mask = np.zeros(shape=(N_bins, N_channels), dtype=int)
 73 |         original_value = [["" for j in range(N_channels)] for i in range(N_bins)]
 74 |         total_data = 0
 75 |         unused_data = 0
 76 | 
 77 |         def write(data, bin_id, channel, value, begin_pos):
 78 |             channel_id = self._channel_to_id[channel]
 79 |             if self._is_categorical_channel[channel]:
 80 |                 category_id = self._possible_values[channel].index(value)
 81 |                 N_values = len(self._possible_values[channel])
 82 |                 one_hot = np.zeros((N_values,))
 83 |                 one_hot[category_id] = 1
 84 |                 for pos in range(N_values):
 85 |                     data[bin_id, begin_pos[channel_id] + pos] = one_hot[pos]
 86 |             else:
 87 |                 data[bin_id, begin_pos[channel_id]] = float(value)
 88 | 
 89 |         for row in X:
 90 |             t = float(row[0]) - first_time
 91 |             if t > max_hours + eps:
 92 |                 continue
 93 |             bin_id = int(t / self._timestep - eps)
 94 |             assert 0 <= bin_id < N_bins
 95 | 
 96 |             for j in range(1, len(row)):
 97 |                 if row[j] == "":
 98 |                     continue
 99 |                 channel = header[j]
100 |                 channel_id = self._channel_to_id[channel]
101 | 
102 |                 total_data += 1
103 |                 if mask[bin_id][channel_id] == 1:
104 |                     unused_data += 1
105 |                 mask[bin_id][channel_id] = 1
106 | 
107 |                 write(data, bin_id, channel, row[j], begin_pos)
108 |                 original_value[bin_id][channel_id] = row[j]
109 | 
110 |         # impute missing values
111 | 
112 |         if self._impute_strategy not in ['zero', 'normal_value', 'previous', 'next']:
113 |             raise ValueError("impute strategy is invalid")
114 | 
115 |         if self._impute_strategy in ['normal_value', 'previous']:
116 |             prev_values = [[] for i in range(len(self._id_to_channel))]
117 |             for bin_id in range(N_bins):
118 |                 for channel in self._id_to_channel:
119 |                     channel_id = self._channel_to_id[channel]
120 |                     if mask[bin_id][channel_id] == 1:
121 |                         prev_values[channel_id].append(original_value[bin_id][channel_id])
122 |                         continue
123 |                     if self._impute_strategy == 'normal_value':
124 |                         imputed_value = self._normal_values[channel]
125 |                     if self._impute_strategy == 'previous':
126 |                         if len(prev_values[channel_id]) == 0:
127 |                             imputed_value = self._normal_values[channel]
128 |                         else:
129 |                             imputed_value = prev_values[channel_id][-1]
130 |                     write(data, bin_id, channel, imputed_value, begin_pos)
131 | 
132 |         if self._impute_strategy == 'next':
133 |             prev_values = [[] for i in range(len(self._id_to_channel))]
134 |             for bin_id in range(N_bins-1, -1, -1):
135 |                 for channel in self._id_to_channel:
136 |                     channel_id = self._channel_to_id[channel]
137 |                     if mask[bin_id][channel_id] == 1:
138 |                         prev_values[channel_id].append(original_value[bin_id][channel_id])
139 |                         continue
140 |                     if len(prev_values[channel_id]) == 0:
141 |                         imputed_value = self._normal_values[channel]
142 |                     else:
143 |                         imputed_value = prev_values[channel_id][-1]
144 |                     write(data, bin_id, channel, imputed_value, begin_pos)
145 | 
146 |         empty_bins = np.sum([1 - min(1, np.sum(mask[i, :])) for i in range(N_bins)])
147 |         self._done_count += 1
148 |         self._empty_bins_sum += empty_bins / (N_bins + eps)
149 |         self._unused_data_sum += unused_data / (total_data + eps)
150 | 
151 |         if self._store_masks:
152 |             data = np.hstack([data, mask.astype(np.float32)])
153 | 
154 |         # create new header
155 |         new_header = []
156 |         for channel in self._id_to_channel:
157 |             if self._is_categorical_channel[channel]:
158 |                 values = self._possible_values[channel]
159 |                 for value in values:
160 |                     new_header.append(channel + "->" + value)
161 |             else:
162 |                 new_header.append(channel)
163 | 
164 |         if self._store_masks:
165 |             for i in range(len(self._id_to_channel)):
166 |                 channel = self._id_to_channel[i]
167 |                 new_header.append("mask->" + channel)
168 | 
169 |         new_header = ",".join(new_header)
170 | 
171 |         return (data, new_header)
172 | 
173 |     def print_statistics(self):
174 |         print("statistics of discretizer:")
175 |         print("\tconverted {} examples".format(self._done_count))
176 |         print("\taverage unused data = {:.2f} percent".format(100.0 * self._unused_data_sum / self._done_count))
177 |         print("\taverage empty  bins = {:.2f} percent".format(100.0 * self._empty_bins_sum / self._done_count))
178 | 
179 | 
180 | class Normalizer:
181 |     def __init__(self, fields=None):
182 |         self._means = None
183 |         self._stds = None
184 |         self._fields = None
185 |         if fields is not None:
186 |             self._fields = [col for col in fields]
187 | 
188 |         self._sum_x = None
189 |         self._sum_sq_x = None
190 |         self._count = 0
191 | 
192 |     def _feed_data(self, x):
193 |         x = np.array(x)
194 |         self._count += x.shape[0]
195 |         if self._sum_x is None:
196 |             self._sum_x = np.sum(x, axis=0)
197 |             self._sum_sq_x = np.sum(x**2, axis=0)
198 |         else:
199 |             self._sum_x += np.sum(x, axis=0)
200 |             self._sum_sq_x += np.sum(x**2, axis=0)
201 | 
202 |     def _save_params(self, save_file_path):
203 |         eps = 1e-7
204 |         with open(save_file_path, "wb") as save_file:
205 |             N = self._count
206 |             self._means = 1.0 / N * self._sum_x
207 |             self._stds = np.sqrt(1.0/(N - 1) * (self._sum_sq_x - 2.0 * self._sum_x * self._means + N * self._means**2))
208 |             self._stds[self._stds < eps] = eps
209 |             pickle.dump(obj={'means': self._means,
210 |                              'stds': self._stds},
211 |                         file=save_file,
212 |                         protocol=2)
213 | 
214 |     def load_params(self, load_file_path):
215 |         with open(load_file_path, "rb") as load_file:
216 |             if platform.python_version()[0] == '2':
217 |                 dct = pickle.load(load_file)
218 |             else:
219 |                 dct = pickle.load(load_file, encoding='latin1')
220 |             self._means = dct['means']
221 |             self._stds = dct['stds']
222 | 
223 |     def transform(self, X):
224 |         if self._fields is None:
225 |             fields = range(X.shape[1])
226 |         else:
227 |             fields = self._fields
228 |         ret = 1.0 * X
229 |         for col in fields:
230 |             ret[:, col] = (X[:, col] - self._means[col]) / self._stds[col]
231 |         return ret
232 | 


--------------------------------------------------------------------------------
/ODIR/utils/readers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | 
  8 | 
  9 | class Reader(object):
 10 |     def __init__(self, dataset_dir, listfile=None):
 11 |         self._dataset_dir = dataset_dir
 12 |         self._current_index = 0
 13 |         if listfile is None:
 14 |             listfile_path = os.path.join(dataset_dir, "listfile.csv")
 15 |         else:
 16 |             listfile_path = listfile
 17 |         with open(listfile_path, "r") as lfile:
 18 |             self._data = lfile.readlines()
 19 |         self._listfile_header = self._data[0]
 20 |         self._data = self._data[1:]
 21 | 
 22 |     def get_number_of_examples(self):
 23 |         return len(self._data)
 24 | 
 25 |     def random_shuffle(self, seed=None):
 26 |         if seed is not None:
 27 |             random.seed(seed)
 28 |         random.shuffle(self._data)
 29 | 
 30 |     def read_example(self, index):
 31 |         raise NotImplementedError()
 32 | 
 33 |     def read_next(self):
 34 |         to_read_index = self._current_index
 35 |         self._current_index += 1
 36 |         if self._current_index == self.get_number_of_examples():
 37 |             self._current_index = 0
 38 |         return self.read_example(to_read_index)
 39 | 
 40 | 
 41 | class DecompensationReader(Reader):
 42 |     def __init__(self, dataset_dir, listfile=None):
 43 |         """ Reader for decompensation prediction task.
 44 |         :param dataset_dir: Directory where timeseries files are stored.
 45 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
 46 |                             `dataset_dir/listfile.csv` will be used.
 47 |         """
 48 |         Reader.__init__(self, dataset_dir, listfile)
 49 |         self._data = [line.split(',') for line in self._data]
 50 |         self._data = [(x, float(t), int(y)) for (x, t, y) in self._data]
 51 | 
 52 |     def _read_timeseries(self, ts_filename, time_bound):
 53 |         ret = []
 54 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
 55 |             header = tsfile.readline().strip().split(',')
 56 |             assert header[0] == "Hours"
 57 |             for line in tsfile:
 58 |                 mas = line.strip().split(',')
 59 |                 t = float(mas[0])
 60 |                 if t > time_bound + 1e-6:
 61 |                     break
 62 |                 ret.append(np.array(mas))
 63 |         return (np.stack(ret), header)
 64 | 
 65 |     def read_example(self, index):
 66 |         """ Read the example with given index.
 67 | 
 68 |         :param index: Index of the line of the listfile to read (counting starts from 0).
 69 |         :return: Directory with the following keys:
 70 |             X : np.array
 71 |                 2D array containing all events. Each row corresponds to a moment.
 72 |                 First column is the time and other columns correspond to different
 73 |                 variables.
 74 |             t : float
 75 |                 Length of the data in hours. Note, in general, it is not equal to the
 76 |                 timestamp of last event.
 77 |             y : int (0 or 1)
 78 |                 Mortality within next 24 hours.
 79 |             header : array of strings
 80 |                 Names of the columns. The ordering of the columns is always the same.
 81 |             name: Name of the sample.
 82 |         """
 83 |         if index < 0 or index >= len(self._data):
 84 |             raise ValueError("Index must be from 0 (inclusive) to number of examples (exclusive).")
 85 | 
 86 |         name = self._data[index][0]
 87 |         t = self._data[index][1]
 88 |         y = self._data[index][2]
 89 |         (X, header) = self._read_timeseries(name, t)
 90 | 
 91 |         return {"X": X,
 92 |                 "t": t,
 93 |                 "y": y,
 94 |                 "header": header,
 95 |                 "name": name}
 96 | 
 97 | 
 98 | class InHospitalMortalityReader(Reader):
 99 |     def __init__(self, dataset_dir, listfile=None, period_length=48.0):
100 |         """ Reader for in-hospital moratality prediction task.
101 | 
102 |         :param dataset_dir:   Directory where timeseries files are stored.
103 |         :param listfile:      Path to a listfile. If this parameter is left `None` then
104 |                               `dataset_dir/listfile.csv` will be used.
105 |         :param period_length: Length of the period (in hours) from which the prediction is done.
106 |         """
107 |         Reader.__init__(self, dataset_dir, listfile)
108 |         self._data = [line.split(',') for line in self._data]
109 |         self._data = [(x, int(y)) for (x, y) in self._data]
110 |         self._period_length = period_length
111 | 
112 |     def _read_timeseries(self, ts_filename):
113 |         ret = []
114 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
115 |             header = tsfile.readline().strip().split(',')
116 |             assert header[0] == "Hours"
117 |             for line in tsfile:
118 |                 mas = line.strip().split(',')
119 |                 ret.append(np.array(mas))
120 |         return (np.stack(ret), header)
121 | 
122 |     def read_example(self, index):
123 |         """ Reads the example with given index.
124 | 
125 |         :param index: Index of the line of the listfile to read (counting starts from 0).
126 |         :return: Dictionary with the following keys:
127 |             X : np.array
128 |                 2D array containing all events. Each row corresponds to a moment.
129 |                 First column is the time and other columns correspond to different
130 |                 variables.
131 |             t : float
132 |                 Length of the data in hours. Note, in general, it is not equal to the
133 |                 timestamp of last event.
134 |             y : int (0 or 1)
135 |                 In-hospital mortality.
136 |             header : array of strings
137 |                 Names of the columns. The ordering of the columns is always the same.
138 |             name: Name of the sample.
139 |         """
140 |         if index < 0 or index >= len(self._data):
141 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
142 | 
143 |         name = self._data[index][0]
144 |         t = self._period_length
145 |         y = self._data[index][1]
146 |         (X, header) = self._read_timeseries(name)
147 | 
148 |         return {"X": X,
149 |                 "t": t,
150 |                 "y": y,
151 |                 "header": header,
152 |                 "name": name}
153 | 
154 | 
155 | class LengthOfStayReader(Reader):
156 |     def __init__(self, dataset_dir, listfile=None):
157 |         """ Reader for length of stay prediction task.
158 | 
159 |         :param dataset_dir: Directory where timeseries files are stored.
160 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
161 |                             `dataset_dir/listfile.csv` will be used.
162 |         """
163 |         Reader.__init__(self, dataset_dir, listfile)
164 |         self._data = [line.split(',') for line in self._data]
165 |         self._data = [(x, float(t), float(y)) for (x, t, y) in self._data]
166 | 
167 |     def _read_timeseries(self, ts_filename, time_bound):
168 |         ret = []
169 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
170 |             header = tsfile.readline().strip().split(',')
171 |             assert header[0] == "Hours"
172 |             for line in tsfile:
173 |                 mas = line.strip().split(',')
174 |                 t = float(mas[0])
175 |                 if t > time_bound + 1e-6:
176 |                     break
177 |                 ret.append(np.array(mas))
178 |         return (np.stack(ret), header)
179 | 
180 |     def read_example(self, index):
181 |         """ Reads the example with given index.
182 | 
183 |         :param index: Index of the line of the listfile to read (counting starts from 0).
184 |         :return: Dictionary with the following keys:
185 |             X : np.array
186 |                 2D array containing all events. Each row corresponds to a moment.
187 |                 First column is the time and other columns correspond to different
188 |                 variables.
189 |             t : float
190 |                 Length of the data in hours. Note, in general, it is not equal to the
191 |                 timestamp of last event.
192 |             y : float
193 |                 Remaining time in ICU.
194 |             header : array of strings
195 |                 Names of the columns. The ordering of the columns is always the same.
196 |             name: Name of the sample.
197 |         """
198 |         if index < 0 or index >= len(self._data):
199 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
200 | 
201 |         name = self._data[index][0]
202 |         t = self._data[index][1]
203 |         y = self._data[index][2]
204 |         (X, header) = self._read_timeseries(name, t)
205 | 
206 |         return {"X": X,
207 |                 "t": t,
208 |                 "y": y,
209 |                 "header": header,
210 |                 "name": name}
211 | 
212 | 
213 | class PhenotypingReader(Reader):
214 |     def __init__(self, dataset_dir, listfile=None):
215 |         """ Reader for phenotype classification task.
216 | 
217 |         :param dataset_dir: Directory where timeseries files are stored.
218 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
219 |                             `dataset_dir/listfile.csv` will be used.
220 |         """
221 |         Reader.__init__(self, dataset_dir, listfile)
222 |         self._data = [line.split(',') for line in self._data]
223 |         self._data = [(mas[0], float(mas[1]), list(map(int, mas[2:]))) for mas in self._data]
224 | 
225 |     def _read_timeseries(self, ts_filename):
226 |         ret = []
227 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
228 |             header = tsfile.readline().strip().split(',')
229 |             assert header[0] == "Hours"
230 |             for line in tsfile:
231 |                 mas = line.strip().split(',')
232 |                 ret.append(np.array(mas))
233 |         return (np.stack(ret), header)
234 | 
235 |     def read_example(self, index):
236 |         """ Reads the example with given index.
237 | 
238 |         :param index: Index of the line of the listfile to read (counting starts from 0).
239 |         :return: Dictionary with the following keys:
240 |             X : np.array
241 |                 2D array containing all events. Each row corresponds to a moment.
242 |                 First column is the time and other columns correspond to different
243 |                 variables.
244 |             t : float
245 |                 Length of the data in hours. Note, in general, it is not equal to the
246 |                 timestamp of last event.
247 |             y : array of ints
248 |                 Phenotype labels.
249 |             header : array of strings
250 |                 Names of the columns. The ordering of the columns is always the same.
251 |             name: Name of the sample.
252 |         """
253 |         if index < 0 or index >= len(self._data):
254 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
255 | 
256 |         name = self._data[index][0]
257 |         t = self._data[index][1]
258 |         y = self._data[index][2]
259 |         (X, header) = self._read_timeseries(name)
260 | 
261 |         return {"X": X,
262 |                 "t": t,
263 |                 "y": y,
264 |                 "header": header,
265 |                 "name": name}
266 | 
267 | 
268 | class MultitaskReader(Reader):
269 |     def __init__(self, dataset_dir, listfile=None):
270 |         """ Reader for multitask learning.
271 | 
272 |         :param dataset_dir: Directory where timeseries files are stored.
273 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
274 |                             `dataset_dir/listfile.csv` will be used.
275 |         """
276 |         Reader.__init__(self, dataset_dir, listfile)
277 |         self._data = [line.split(',') for line in self._data]
278 | 
279 |         def process_ihm(x):
280 |             return list(map(int, x.split(';')))
281 | 
282 |         def process_los(x):
283 |             x = x.split(';')
284 |             if x[0] == '':
285 |                 return ([], [])
286 |             return (list(map(int, x[:len(x)//2])), list(map(float, x[len(x)//2:])))
287 | 
288 |         def process_ph(x):
289 |             return list(map(int, x.split(';')))
290 | 
291 |         def process_decomp(x):
292 |             x = x.split(';')
293 |             if x[0] == '':
294 |                 return ([], [])
295 |             return (list(map(int, x[:len(x)//2])), list(map(int, x[len(x)//2:])))
296 | 
297 |         self._data = [(fname, float(t), process_ihm(ihm), process_los(los),
298 |                        process_ph(pheno), process_decomp(decomp))
299 |                       for fname, t, ihm, los, pheno, decomp in self._data]
300 | 
301 |     def _read_timeseries(self, ts_filename):
302 |         ret = []
303 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
304 |             header = tsfile.readline().strip().split(',')
305 |             assert header[0] == "Hours"
306 |             for line in tsfile:
307 |                 mas = line.strip().split(',')
308 |                 ret.append(np.array(mas))
309 |         return (np.stack(ret), header)
310 | 
311 |     def read_example(self, index):
312 |         """ Reads the example with given index.
313 | 
314 |         :param index: Index of the line of the listfile to read (counting starts from 0).
315 |         :return: Return dictionary with the following keys:
316 |             X : np.array
317 |                 2D array containing all events. Each row corresponds to a moment.
318 |                 First column is the time and other columns correspond to different
319 |                 variables.
320 |             t : float
321 |                 Length of the data in hours. Note, in general, it is not equal to the
322 |                 timestamp of last event.
323 |             ihm : array
324 |                 Array of 3 integers: [pos, mask, label].
325 |             los : array
326 |                 Array of 2 arrays: [masks, labels].
327 |             pheno : array
328 |                 Array of 25 binary integers (phenotype labels).
329 |             decomp : array
330 |                 Array of 2 arrays: [masks, labels].
331 |             header : array of strings
332 |                 Names of the columns. The ordering of the columns is always the same.
333 |             name: Name of the sample.
334 |         """
335 |         if index < 0 or index >= len(self._data):
336 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
337 | 
338 |         name = self._data[index][0]
339 |         (X, header) = self._read_timeseries(name)
340 | 
341 |         return {"X": X,
342 |                 "t": self._data[index][1],
343 |                 "ihm": self._data[index][2],
344 |                 "los": self._data[index][3],
345 |                 "pheno": self._data[index][4],
346 |                 "decomp": self._data[index][5],
347 |                 "header": header,
348 |                 "name": name}
349 | 


--------------------------------------------------------------------------------
/ODIR/utils/resources/channel_info.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Glucose": {
  3 |         "possible_values": []
  4 |     },
  5 |     "Systolic blood pressure": {
  6 |         "possible_values": []
  7 |     },
  8 |     "Glascow coma scale verbal response": {
  9 |         "possible_values": ["1 No Response", "1.0 ET/Trach", "2 Incomp sounds", "3 Inapprop words", "4 Confused", "5 Oriented", "Confused", "Inappropriate Words", "Incomprehensible sounds", "No Response", "No Response-ETT", "Oriented"],
 10 |         "values": {
 11 |             "No Response-ETT": 1,
 12 |             "No Response": 1,
 13 |             "1 No Response": 1,
 14 |             "1.0 ET/Trach": 1,
 15 |             "2 Incomp sounds": 2,
 16 |             "Incomprehensible sounds": 2,
 17 |             "3 Inapprop words": 3,
 18 |             "Inappropriate Words": 3,
 19 |             "4 Confused": 4,
 20 |             "Confused": 4,
 21 |             "5 Oriented": 5,
 22 |             "Oriented": 5
 23 |         }
 24 |     },
 25 |     "Temperature": {
 26 |         "possible_values": []
 27 |     },
 28 |     "Weight": {
 29 |         "possible_values": []
 30 |     },
 31 |     "Diastolic blood pressure": {
 32 |         "possible_values": []
 33 |     },
 34 |     "Fraction inspired oxygen": {
 35 |         "possible_values": []
 36 |     },
 37 |     "Glascow coma scale total": {
 38 |         "possible_values": ["10", "11", "12", "13", "14", "15", "3", "4", "5", "6", "7", "8", "9"],
 39 |         "values": {
 40 |             "3": 3,
 41 |             "4": 4,
 42 |             "5": 5,
 43 |             "6": 6,
 44 |             "7": 7,
 45 |             "8": 8,
 46 |             "9": 9,
 47 |             "10": 10,
 48 |             "11": 11,
 49 |             "12": 12,
 50 |             "13": 13,
 51 |             "14": 14,
 52 |             "15": 15
 53 |         }
 54 |     },
 55 |     "Capillary refill rate": {
 56 |         "possible_values": ["0.0", "1.0"],
 57 |         "values": {
 58 |             "0.0": 0,
 59 |             "1.0": 1
 60 |         }
 61 |     },
 62 |     "Mean blood pressure": {
 63 |         "possible_values": []
 64 |     },
 65 |     "Heart Rate": {
 66 |         "possible_values": []
 67 |     },
 68 |     "Oxygen saturation": {
 69 |         "possible_values": []
 70 |     },
 71 |     "pH": {
 72 |         "possible_values": []
 73 |     },
 74 |     "Height": {
 75 |         "possible_values": []
 76 |     },
 77 |     "Glascow coma scale eye opening": {
 78 |         "possible_values": ["1 No Response", "2 To pain", "3 To speech", "4 Spontaneously", "None", "Spontaneously", "To Pain", "To Speech"],
 79 |         "values": {
 80 |             "None": 0,
 81 |             "1 No Response": 1,
 82 |             "2 To pain": 2, 
 83 |             "To Pain": 2,
 84 |             "3 To speech": 3, 
 85 |             "To Speech": 3,
 86 |             "4 Spontaneously": 4,
 87 |             "Spontaneously": 4
 88 |         }
 89 |     },
 90 |     "Respiratory rate": {
 91 |         "possible_values": []
 92 |     },
 93 |     "Glascow coma scale motor response": {
 94 |         "possible_values": ["1 No Response", "2 Abnorm extensn", "3 Abnorm flexion", "4 Flex-withdraws", "5 Localizes Pain", "6 Obeys Commands", "Abnormal Flexion", "Abnormal extension", "Flex-withdraws", "Localizes Pain", "No response", "Obeys Commands"],
 95 |         "values": {
 96 |             "1 No Response": 1,
 97 |             "No response": 1,
 98 |             "2 Abnorm extensn": 2,
 99 |             "Abnormal extension": 2,
100 |             "3 Abnorm flexion": 3,
101 |             "Abnormal Flexion": 3,
102 |             "4 Flex-withdraws": 4,
103 |             "Flex-withdraws": 4,
104 |             "5 Localizes Pain": 5,
105 |             "Localizes Pain": 5,
106 |             "6 Obeys Commands": 6,
107 |             "Obeys Commands": 6
108 |         }
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/ODIR/utils/resources/discretizer_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "id_to_channel": [
  3 |     "Capillary refill rate",
  4 |     "Diastolic blood pressure",
  5 |     "Fraction inspired oxygen",
  6 |     "Glascow coma scale eye opening",
  7 |     "Glascow coma scale motor response",
  8 |     "Glascow coma scale total",
  9 |     "Glascow coma scale verbal response",
 10 |     "Glucose",
 11 |     "Heart Rate",
 12 |     "Height",
 13 |     "Mean blood pressure",
 14 |     "Oxygen saturation",
 15 |     "Respiratory rate",
 16 |     "Systolic blood pressure",
 17 |     "Temperature",
 18 |     "Weight",
 19 |     "pH"
 20 |   ],
 21 |   "is_categorical_channel": {
 22 |     "Capillary refill rate": true,
 23 |     "Diastolic blood pressure": false,
 24 |     "Fraction inspired oxygen": false,
 25 |     "Glascow coma scale eye opening": true,
 26 |     "Glascow coma scale motor response": true,
 27 |     "Glascow coma scale total": true,
 28 |     "Glascow coma scale verbal response": true,
 29 |     "Glucose": false,
 30 |     "Heart Rate": false,
 31 |     "Height": false,
 32 |     "Mean blood pressure": false,
 33 |     "Oxygen saturation": false,
 34 |     "Respiratory rate": false,
 35 |     "Systolic blood pressure": false,
 36 |     "Temperature": false,
 37 |     "Weight": false,
 38 |     "pH": false
 39 |   },
 40 |   "possible_values": {
 41 |     "Capillary refill rate": [
 42 |       "0.0",
 43 |       "1.0"
 44 |     ],
 45 |     "Diastolic blood pressure": [
 46 | 
 47 |     ],
 48 |     "Fraction inspired oxygen": [
 49 | 
 50 |     ],
 51 |     "Glascow coma scale eye opening": [
 52 |       "To Pain",
 53 |       "3 To speech",
 54 |       "1 No Response",
 55 |       "4 Spontaneously",
 56 |       "None",
 57 |       "To Speech",
 58 |       "Spontaneously",
 59 |       "2 To pain"
 60 |     ],
 61 |     "Glascow coma scale motor response": [
 62 |       "1 No Response",
 63 |       "3 Abnorm flexion",
 64 |       "Abnormal extension",
 65 |       "No response",
 66 |       "4 Flex-withdraws",
 67 |       "Localizes Pain",
 68 |       "Flex-withdraws",
 69 |       "Obeys Commands",
 70 |       "Abnormal Flexion",
 71 |       "6 Obeys Commands",
 72 |       "5 Localizes Pain",
 73 |       "2 Abnorm extensn"
 74 |     ],
 75 |     "Glascow coma scale total": [
 76 |       "11",
 77 |       "10",
 78 |       "13",
 79 |       "12",
 80 |       "15",
 81 |       "14",
 82 |       "3",
 83 |       "5",
 84 |       "4",
 85 |       "7",
 86 |       "6",
 87 |       "9",
 88 |       "8"
 89 |     ],
 90 |     "Glascow coma scale verbal response": [
 91 |       "1 No Response",
 92 |       "No Response",
 93 |       "Confused",
 94 |       "Inappropriate Words",
 95 |       "Oriented",
 96 |       "No Response-ETT",
 97 |       "5 Oriented",
 98 |       "Incomprehensible sounds",
 99 |       "1.0 ET/Trach",
100 |       "4 Confused",
101 |       "2 Incomp sounds",
102 |       "3 Inapprop words"
103 |     ],
104 |     "Glucose": [
105 | 
106 |     ],
107 |     "Heart Rate": [
108 | 
109 |     ],
110 |     "Height": [
111 | 
112 |     ],
113 |     "Mean blood pressure": [
114 | 
115 |     ],
116 |     "Oxygen saturation": [
117 | 
118 |     ],
119 |     "Respiratory rate": [
120 | 
121 |     ],
122 |     "Systolic blood pressure": [
123 | 
124 |     ],
125 |     "Temperature": [
126 | 
127 |     ],
128 |     "Weight": [
129 | 
130 |     ],
131 |     "pH": [
132 | 
133 |     ]
134 |   },
135 |   "normal_values": {
136 |     "Capillary refill rate": "0.0",
137 |     "Diastolic blood pressure": "59.0",
138 |     "Fraction inspired oxygen": "0.21",
139 |     "Glascow coma scale eye opening": "4 Spontaneously",
140 |     "Glascow coma scale motor response": "6 Obeys Commands",
141 |     "Glascow coma scale total": "15",
142 |     "Glascow coma scale verbal response": "5 Oriented",
143 |     "Glucose": "128.0",
144 |     "Heart Rate": "86",
145 |     "Height": "170.0",
146 |     "Mean blood pressure": "77.0",
147 |     "Oxygen saturation": "98.0",
148 |     "Respiratory rate": "19",
149 |     "Systolic blood pressure": "118.0",
150 |     "Temperature": "36.6",
151 |     "Weight": "81.0",
152 |     "pH": "7.4"
153 |   }
154 | }


--------------------------------------------------------------------------------
/ODIR/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | from . import common_utils
 5 | import numpy as np
 6 | import os
 7 | 
 8 | 
 9 | def load_data(reader, discretizer, normalizer, small_part=False, return_names=False):
10 |     N = reader.get_number_of_examples()
11 |     if small_part:
12 |         N = 1000
13 |     ret = common_utils.read_chunk(reader, N)
14 |     data = ret["X"]
15 |     ts = ret["t"]
16 |     labels = ret["y"]
17 |     names = ret["name"]
18 |     data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
19 |     if normalizer is not None:
20 |         data = [normalizer.transform(X) for X in data]
21 |     whole_data = (np.array(data), labels)
22 |     if not return_names:
23 |         return whole_data
24 |     return {"data": whole_data, "names": names}
25 | 
26 | 
27 | def save_results(names, pred, y_true, path):
28 |     common_utils.create_directory(os.path.dirname(path))
29 |     with open(path, 'w') as f:
30 |         f.write("stay,prediction,y_true\n")
31 |         for (name, x, y) in zip(names, pred, y_true):
32 |             f.write("{},{:.6f},{}\n".format(name, x, y))
33 | 


--------------------------------------------------------------------------------
/OV/utils/__pycache__/common_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/common_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/common_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/common_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/feature_extractor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/feature_extractor.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/feature_extractor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/feature_extractor.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/logging.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/logging.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/metrics.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/metrics.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/preprocessing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/preprocessing.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/preprocessing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/preprocessing.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/readers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/readers.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/readers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/readers.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/OV/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/OV/utils/common_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | import os
  6 | import json
  7 | import random
  8 | 
  9 | from .feature_extractor import extract_features
 10 | 
 11 | 
 12 | def convert_to_dict(data, header, channel_info):
 13 |     """ convert data from readers output in to array of arrays format """
 14 |     ret = [[] for i in range(data.shape[1] - 1)]
 15 |     for i in range(1, data.shape[1]):
 16 |         ret[i-1] = [(t, x) for (t, x) in zip(data[:, 0], data[:, i]) if x != ""]
 17 |         channel = header[i]
 18 |         if len(channel_info[channel]['possible_values']) != 0:
 19 |             ret[i-1] = list(map(lambda x: (x[0], channel_info[channel]['values'][x[1]]), ret[i-1]))
 20 |         ret[i-1] = list(map(lambda x: (float(x[0]), float(x[1])), ret[i-1]))
 21 |     return ret
 22 | 
 23 | 
 24 | def extract_features_from_rawdata(chunk, header, period, features):
 25 |     with open(os.path.join(os.path.dirname(__file__), "resources/channel_info.json")) as channel_info_file:
 26 |         channel_info = json.loads(channel_info_file.read())
 27 |     data = [convert_to_dict(X, header, channel_info) for X in chunk]
 28 |     return extract_features(data, period, features)
 29 | 
 30 | 
 31 | def read_chunk(reader, chunk_size):
 32 |     data = {}
 33 |     for i in range(chunk_size):
 34 |         ret = reader.read_next()
 35 |         for k, v in ret.items():
 36 |             if k not in data:
 37 |                 data[k] = []
 38 |             data[k].append(v)
 39 |     data["header"] = data["header"][0]
 40 |     return data
 41 | 
 42 | 
 43 | def sort_and_shuffle(data, batch_size):
 44 |     """ Sort data by the length and then make batches and shuffle them.
 45 |         data is tuple (X1, X2, ..., Xn) all of them have the same length.
 46 |         Usually data = (X, y).
 47 |     """
 48 |     assert len(data) >= 2
 49 |     data = list(zip(*data))
 50 | 
 51 |     random.shuffle(data)
 52 | 
 53 |     old_size = len(data)
 54 |     rem = old_size % batch_size
 55 |     head = data[:old_size - rem]
 56 |     tail = data[old_size - rem:]
 57 |     data = []
 58 | 
 59 |     head.sort(key=(lambda x: x[0].shape[0]))
 60 | 
 61 |     mas = [head[i: i+batch_size] for i in range(0, len(head), batch_size)]
 62 |     random.shuffle(mas)
 63 | 
 64 |     for x in mas:
 65 |         data += x
 66 |     data += tail
 67 | 
 68 |     data = list(zip(*data))
 69 |     return data
 70 | 
 71 | 
 72 | def add_common_arguments(parser):
 73 |     """ Add all the parameters which are common across the tasks
 74 |     """
 75 |     parser.add_argument('--network', type=str, required=True)
 76 |     parser.add_argument('--dim', type=int, default=256,
 77 |                         help='number of hidden units')
 78 |     parser.add_argument('--depth', type=int, default=1,
 79 |                         help='number of bi-LSTMs')
 80 |     parser.add_argument('--epochs', type=int, default=100,
 81 |                         help='number of chunks to train')
 82 |     parser.add_argument('--load_state', type=str, default="",
 83 |                         help='state file path')
 84 |     parser.add_argument('--mode', type=str, default="train",
 85 |                         help='mode: train or test')
 86 |     parser.add_argument('--batch_size', type=int, default=64)
 87 |     parser.add_argument('--l2', type=float, default=0, help='L2 regularization')
 88 |     parser.add_argument('--l1', type=float, default=0, help='L1 regularization')
 89 |     parser.add_argument('--save_every', type=int, default=1,
 90 |                         help='save state every x epoch')
 91 |     parser.add_argument('--prefix', type=str, default="",
 92 |                         help='optional prefix of network name')
 93 |     parser.add_argument('--dropout', type=float, default=0.0)
 94 |     parser.add_argument('--rec_dropout', type=float, default=0.0,
 95 |                         help="dropout rate for recurrent connections")
 96 |     parser.add_argument('--batch_norm', type=bool, default=False,
 97 |                         help='batch normalization')
 98 |     parser.add_argument('--timestep', type=float, default=1.0,
 99 |                         help="fixed timestep used in the dataset")
100 |     parser.add_argument('--imputation', type=str, default='previous')
101 |     parser.add_argument('--small_part', dest='small_part', action='store_true')
102 |     parser.add_argument('--whole_data', dest='small_part', action='store_false')
103 |     parser.add_argument('--optimizer', type=str, default='adam')
104 |     parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
105 |     parser.add_argument('--beta_1', type=float, default=0.9,
106 |                         help='beta_1 param for Adam optimizer')
107 |     parser.add_argument('--verbose', type=int, default=2)
108 |     parser.add_argument('--size_coef', type=float, default=4.0)
109 |     parser.add_argument('--normalizer_state', type=str, default=None,
110 |                         help='Path to a state file of a normalizer. Leave none if you want to '
111 |                              'use one of the provided ones.')
112 |     parser.set_defaults(small_part=False)
113 | 
114 | 
115 | class DeepSupervisionDataLoader:
116 |     r"""
117 |     Data loader for decompensation and length of stay task.
118 |     Reads all the data for one patient at once.
119 | 
120 |     Parameters
121 |     ----------
122 |     dataset_dir : str
123 |         Directory where timeseries files are stored.
124 |     listfile : str
125 |         Path to a listfile. If this parameter is left `None` then
126 |         `dataset_dir/listfile.csv` will be used.
127 |     """
128 |     def __init__(self, dataset_dir, listfile=None, small_part=False):
129 | 
130 |         self._dataset_dir = dataset_dir
131 |         if listfile is None:
132 |             listfile_path = os.path.join(dataset_dir, "listfile.csv")
133 |         else:
134 |             listfile_path = listfile
135 |         with open(listfile_path, "r") as lfile:
136 |             self._data = lfile.readlines()[1:]  # skip the header
137 | 
138 |         self._data = [line.split(',') for line in self._data]
139 |         self._data = [(x, float(t), y) for (x, t, y) in self._data]
140 |         self._data = sorted(self._data)
141 | 
142 |         mas = {"X": [],
143 |                "ts": [],
144 |                "ys": [],
145 |                "name": []}
146 |         i = 0
147 |         while i < len(self._data):
148 |             j = i
149 |             cur_stay = self._data[i][0]
150 |             cur_ts = []
151 |             cur_labels = []
152 |             while j < len(self._data) and self._data[j][0] == cur_stay:
153 |                 cur_ts.append(self._data[j][1])
154 |                 cur_labels.append(self._data[j][2])
155 |                 j += 1
156 | 
157 |             cur_X, header = self._read_timeseries(cur_stay)
158 |             mas["X"].append(cur_X)
159 |             mas["ts"].append(cur_ts)
160 |             mas["ys"].append(cur_labels)
161 |             mas["name"].append(cur_stay)
162 | 
163 |             i = j
164 |             if small_part and len(mas["name"]) == 256:
165 |                 break
166 | 
167 |         self._data = mas
168 | 
169 |     def _read_timeseries(self, ts_filename):
170 |         ret = []
171 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
172 |             header = tsfile.readline().strip().split(',')
173 |             assert header[0] == "Hours"
174 |             for line in tsfile:
175 |                 mas = line.strip().split(',')
176 |                 ret.append(np.array(mas))
177 |         return (np.stack(ret), header)
178 | 
179 | 
180 | def create_directory(directory):
181 |     if not os.path.exists(directory):
182 |         os.makedirs(directory)
183 | 
184 | 
185 | def pad_zeros(arr, min_length=None):
186 |     """
187 |     `arr` is an array of `np.array`s
188 | 
189 |     The function appends zeros to every `np.array` in `arr`
190 |     to equalize their first axis lenghts.
191 |     """
192 |     dtype = arr[0].dtype
193 |     max_len = max([x.shape[0] for x in arr])
194 |     ret = [np.concatenate([x, np.zeros((max_len - x.shape[0],) + x.shape[1:], dtype=dtype)], axis=0)
195 |            for x in arr]
196 |     if (min_length is not None) and ret[0].shape[0] < min_length:
197 |         ret = [np.concatenate([x, np.zeros((min_length - x.shape[0],) + x.shape[1:], dtype=dtype)], axis=0)
198 |                for x in ret]
199 |     return np.array(ret)
200 | 


--------------------------------------------------------------------------------
/OV/utils/decomp_normalizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choczhang/M3Care/3036c425ec25665c6fe19175b6f37a6dc4fe3900/OV/utils/decomp_normalizer


--------------------------------------------------------------------------------
/OV/utils/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import numpy as np
 5 | from scipy.stats import skew
 6 | 
 7 | all_functions = [min, max, np.mean, np.std, skew, len]
 8 | 
 9 | functions_map = {
10 |     "all": all_functions,
11 |     "len": [len],
12 |     "all_but_len": all_functions[:-1]
13 | }
14 | 
15 | periods_map = {
16 |     "all": (0, 0, 1, 0),
17 |     "first4days": (0, 0, 0, 4 * 24),
18 |     "first8days": (0, 0, 0, 8 * 24),
19 |     "last12hours": (1, -12, 1, 0),
20 |     "first25percent": (2, 25),
21 |     "first50percent": (2, 50)
22 | }
23 | 
24 | sub_periods = [(2, 100), (2, 10), (2, 25), (2, 50),
25 |                (3, 10), (3, 25), (3, 50)]
26 | 
27 | 
28 | def get_range(begin, end, period):
29 |     # first p %
30 |     if period[0] == 2:
31 |         return (begin, begin + (end - begin) * period[1] / 100.0)
32 |     # last p %
33 |     if period[0] == 3:
34 |         return (end - (end - begin) * period[1] / 100.0, end)
35 | 
36 |     if period[0] == 0:
37 |         L = begin + period[1]
38 |     else:
39 |         L = end + period[1]
40 | 
41 |     if period[2] == 0:
42 |         R = begin + period[3]
43 |     else:
44 |         R = end + period[3]
45 | 
46 |     return (L, R)
47 | 
48 | 
49 | def calculate(channel_data, period, sub_period, functions):
50 |     if len(channel_data) == 0:
51 |         return np.full((len(functions, )), np.nan)
52 | 
53 |     L = channel_data[0][0]
54 |     R = channel_data[-1][0]
55 |     L, R = get_range(L, R, period)
56 |     L, R = get_range(L, R, sub_period)
57 | 
58 |     data = [x for (t, x) in channel_data
59 |             if L - 1e-6 < t < R + 1e-6]
60 | 
61 |     if len(data) == 0:
62 |         return np.full((len(functions, )), np.nan)
63 |     return np.array([fn(data) for fn in functions], dtype=np.float32)
64 | 
65 | 
66 | def extract_features_single_episode(data_raw, period, functions):
67 |     global sub_periods
68 |     extracted_features = [np.concatenate([calculate(data_raw[i], period, sub_period, functions)
69 |                                           for sub_period in sub_periods],
70 |                                          axis=0)
71 |                           for i in range(len(data_raw))]
72 |     return np.concatenate(extracted_features, axis=0)
73 | 
74 | 
75 | def extract_features(data_raw, period, features):
76 |     period = periods_map[period]
77 |     functions = functions_map[features]
78 |     return np.array([extract_features_single_episode(x, period, functions)
79 |                      for x in data_raw])
80 | 


--------------------------------------------------------------------------------
/OV/utils/logging.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import logging
 4 | 
 5 | 
 6 | def init_log(output_dir):
 7 |     logging.basicConfig(level=logging.DEBUG,
 8 |                         format='%(asctime)s %(message)s',
 9 |                         datefmt='%Y%m%d-%H:%M:%S',
10 |                         filename=os.path.join(output_dir, 'log.log'),
11 |                         filemode='w')
12 |     console = logging.StreamHandler()
13 |     console.setLevel(logging.INFO)
14 |     logging.getLogger('').addHandler(console)
15 |     return logging
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/OV/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | from sklearn import metrics
  6 | 
  7 | 
  8 | # for decompensation, in-hospital mortality
  9 | 
 10 | def print_metrics_binary(y_true, predictions, verbose=1):
 11 |     predictions = np.array(predictions)
 12 |     if len(predictions.shape) == 1:
 13 |         predictions = np.stack([1 - predictions, predictions]).transpose((1, 0))
 14 | 
 15 |     cf = metrics.confusion_matrix(y_true, predictions.argmax(axis=1))
 16 |     if verbose:
 17 |         print("confusion matrix:")
 18 |         print(cf)
 19 |     cf = cf.astype(np.float32)
 20 | 
 21 |     acc = (cf[0][0] + cf[1][1]) / np.sum(cf)
 22 |     prec0 = cf[0][0] / (cf[0][0] + cf[1][0])
 23 |     prec1 = cf[1][1] / (cf[1][1] + cf[0][1])
 24 |     rec0 = cf[0][0] / (cf[0][0] + cf[0][1])
 25 |     rec1 = cf[1][1] / (cf[1][1] + cf[1][0])
 26 |     auroc = metrics.roc_auc_score(y_true, predictions[:, 1])
 27 | 
 28 |     (precisions, recalls, thresholds) = metrics.precision_recall_curve(y_true, predictions[:, 1])
 29 |     auprc = metrics.auc(recalls, precisions)
 30 |     minpse = np.max([min(x, y) for (x, y) in zip(precisions, recalls)])
 31 |     f1_score=2*prec1*rec1/(prec1+rec1)
 32 |     if verbose:
 33 |         print("accuracy = {}".format(acc))
 34 |         print("precision class 0 = {}".format(prec0))
 35 |         print("precision class 1 = {}".format(prec1))
 36 |         print("recall class 0 = {}".format(rec0))
 37 |         print("recall class 1 = {}".format(rec1))
 38 |         print("AUC of ROC = {}".format(auroc))
 39 |         print("AUC of PRC = {}".format(auprc))
 40 |         print("min(+P, Se) = {}".format(minpse))
 41 |         print("f1_score = {}".format(f1_score))
 42 | 
 43 |     return {"acc": acc,
 44 |             "prec0": prec0,
 45 |             "prec1": prec1,
 46 |             "rec0": rec0,
 47 |             "rec1": rec1,
 48 |             "auroc": auroc,
 49 |             "auprc": auprc,
 50 |             "minpse": minpse,
 51 |             "f1_score":f1_score}
 52 | 
 53 | 
 54 | # for phenotyping
 55 | 
 56 | def print_metrics_multilabel(y_true, predictions, verbose=1):
 57 |     y_true = np.array(y_true)
 58 |     predictions = np.array(predictions)
 59 | 
 60 |     auc_scores = metrics.roc_auc_score(y_true, predictions, average=None)
 61 |     ave_auc_micro = metrics.roc_auc_score(y_true, predictions,
 62 |                                           average="micro")
 63 |     ave_auc_macro = metrics.roc_auc_score(y_true, predictions,
 64 |                                           average="macro")
 65 |     ave_auc_weighted = metrics.roc_auc_score(y_true, predictions,
 66 |                                              average="weighted")
 67 |     
 68 |     coverage_error = metrics.coverage_error(y_true, predictions)
 69 |     label_ranking_loss = metrics.label_ranking_loss(y_true, predictions)
 70 | 
 71 |     if verbose:
 72 |         print("ROC AUC scores for labels:", auc_scores)
 73 |         print("ave_auc_micro = {}".format(ave_auc_micro))
 74 |         print("ave_auc_macro = {}".format(ave_auc_macro))
 75 |         print("ave_auc_weighted = {}".format(ave_auc_weighted))
 76 | 
 77 |     return {"auc_scores": auc_scores,
 78 |             "ave_auc_micro": ave_auc_micro,
 79 |             "ave_auc_macro": ave_auc_macro,
 80 |             "ave_auc_weighted": ave_auc_weighted,
 81 |            "coverage_error": coverage_error,
 82 |            "label_ranking_loss": label_ranking_loss}
 83 | 
 84 | 
 85 | # for length of stay
 86 | 
 87 | def mean_absolute_percentage_error(y_true, y_pred):
 88 |     return np.mean(np.abs((y_true - y_pred) / (y_true + 0.1))) * 100
 89 | 
 90 | 
 91 | def print_metrics_regression(y_true, predictions, verbose=1):
 92 |     predictions = np.array(predictions)
 93 |     predictions = np.maximum(predictions, 0).flatten()
 94 |     y_true = np.array(y_true)
 95 | 
 96 |     y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true]
 97 |     prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions]
 98 |     cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
 99 |     if verbose:
100 |         print("Custom bins confusion matrix:")
101 |         print(cf)
102 | 
103 |     kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins,
104 |                                       weights='linear')
105 |     mad = metrics.mean_absolute_error(y_true, predictions)
106 |     mse = metrics.mean_squared_error(y_true, predictions)
107 |     mape = mean_absolute_percentage_error(y_true, predictions)
108 | 
109 |     if verbose:
110 |         print("Mean absolute deviation (MAD) = {}".format(mad))
111 |         print("Mean squared error (MSE) = {}".format(mse))
112 |         print("Mean absolute percentage error (MAPE) = {}".format(mape))
113 |         print("Cohen kappa score = {}".format(kappa))
114 | 
115 |     return {"mad": mad,
116 |             "mse": mse,
117 |             "mape": mape,
118 |             "kappa": kappa}
119 | 
120 | 
121 | class LogBins:
122 |     nbins = 10
123 |     means = [0.611848, 2.587614, 6.977417, 16.465430, 37.053745,
124 |              81.816438, 182.303159, 393.334856, 810.964040, 1715.702848]
125 | 
126 | 
127 | def get_bin_log(x, nbins, one_hot=False):
128 |     binid = int(np.log(x + 1) / 8.0 * nbins)
129 |     if binid < 0:
130 |         binid = 0
131 |     if binid >= nbins:
132 |         binid = nbins - 1
133 | 
134 |     if one_hot:
135 |         ret = np.zeros((LogBins.nbins,))
136 |         ret[binid] = 1
137 |         return ret
138 |     return binid
139 | 
140 | 
141 | def get_estimate_log(prediction, nbins):
142 |     bin_id = np.argmax(prediction)
143 |     return LogBins.means[bin_id]
144 | 
145 | 
146 | def print_metrics_log_bins(y_true, predictions, verbose=1):
147 |     y_true_bins = [get_bin_log(x, LogBins.nbins) for x in y_true]
148 |     prediction_bins = [get_bin_log(x, LogBins.nbins) for x in predictions]
149 |     cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
150 |     if verbose:
151 |         print("LogBins confusion matrix:")
152 |         print(cf)
153 |     return print_metrics_regression(y_true, predictions, verbose)
154 | 
155 | 
156 | class CustomBins:
157 |     inf = 1e18
158 |     bins = [(-inf, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 14), (14, +inf)]
159 |     nbins = len(bins)
160 |     means = [11.450379, 35.070846, 59.206531, 83.382723, 107.487817,
161 |              131.579534, 155.643957, 179.660558, 254.306624, 585.325890]
162 | 
163 | 
164 | def get_bin_custom(x, nbins, one_hot=False):
165 |     for i in range(nbins):
166 |         a = CustomBins.bins[i][0] * 24.0
167 |         b = CustomBins.bins[i][1] * 24.0
168 |         if a <= x < b:
169 |             if one_hot:
170 |                 ret = np.zeros((CustomBins.nbins,))
171 |                 ret[i] = 1
172 |                 return ret
173 |             return i
174 |     return None
175 | 
176 | 
177 | def get_estimate_custom(prediction, nbins):
178 |     bin_id = np.argmax(prediction)
179 |     assert 0 <= bin_id < nbins
180 |     return CustomBins.means[bin_id]
181 | 
182 | 
183 | def print_metrics_custom_bins(y_true, predictions, verbose=1):
184 |     return print_metrics_regression(y_true, predictions, verbose)
185 | 


--------------------------------------------------------------------------------
/OV/utils/preprocessing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | import platform
  6 | import pickle
  7 | import json
  8 | import os
  9 | 
 10 | 
 11 | class Discretizer:
 12 |     def __init__(self, timestep=0.8, store_masks=True, impute_strategy='zero', start_time='zero',
 13 |                  config_path=os.path.join(os.path.dirname(__file__), 'resources/discretizer_config.json')):
 14 | 
 15 |         with open(config_path) as f:
 16 |             config = json.load(f)
 17 |             self._id_to_channel = config['id_to_channel']
 18 |             self._channel_to_id = dict(zip(self._id_to_channel, range(len(self._id_to_channel))))
 19 |             self._is_categorical_channel = config['is_categorical_channel']
 20 |             self._possible_values = config['possible_values']
 21 |             self._normal_values = config['normal_values']
 22 | 
 23 |         self._header = ["Hours"] + self._id_to_channel
 24 |         self._timestep = timestep
 25 |         self._store_masks = store_masks
 26 |         self._start_time = start_time
 27 |         self._impute_strategy = impute_strategy
 28 | 
 29 |         # for statistics
 30 |         self._done_count = 0
 31 |         self._empty_bins_sum = 0
 32 |         self._unused_data_sum = 0
 33 | 
 34 |     def transform(self, X, header=None, end=None):
 35 |         if header is None:
 36 |             header = self._header
 37 |         assert header[0] == "Hours"
 38 |         eps = 1e-6
 39 | 
 40 |         N_channels = len(self._id_to_channel)
 41 |         ts = [float(row[0]) for row in X]
 42 |         for i in range(len(ts) - 1):
 43 |             assert ts[i] < ts[i+1] + eps
 44 | 
 45 |         if self._start_time == 'relative':
 46 |             first_time = ts[0]
 47 |         elif self._start_time == 'zero':
 48 |             first_time = 0
 49 |         else:
 50 |             raise ValueError("start_time is invalid")
 51 | 
 52 |         if end is None:
 53 |             max_hours = max(ts) - first_time
 54 |         else:
 55 |             max_hours = end - first_time
 56 | 
 57 |         N_bins = int(max_hours / self._timestep + 1.0 - eps)
 58 | 
 59 |         cur_len = 0
 60 |         begin_pos = [0 for i in range(N_channels)]
 61 |         end_pos = [0 for i in range(N_channels)]
 62 |         for i in range(N_channels):
 63 |             channel = self._id_to_channel[i]
 64 |             begin_pos[i] = cur_len
 65 |             if self._is_categorical_channel[channel]:
 66 |                 end_pos[i] = begin_pos[i] + len(self._possible_values[channel])
 67 |             else:
 68 |                 end_pos[i] = begin_pos[i] + 1
 69 |             cur_len = end_pos[i]
 70 | 
 71 |         data = np.zeros(shape=(N_bins, cur_len), dtype=float)
 72 |         mask = np.zeros(shape=(N_bins, N_channels), dtype=int)
 73 |         original_value = [["" for j in range(N_channels)] for i in range(N_bins)]
 74 |         total_data = 0
 75 |         unused_data = 0
 76 | 
 77 |         def write(data, bin_id, channel, value, begin_pos):
 78 |             channel_id = self._channel_to_id[channel]
 79 |             if self._is_categorical_channel[channel]:
 80 |                 category_id = self._possible_values[channel].index(value)
 81 |                 N_values = len(self._possible_values[channel])
 82 |                 one_hot = np.zeros((N_values,))
 83 |                 one_hot[category_id] = 1
 84 |                 for pos in range(N_values):
 85 |                     data[bin_id, begin_pos[channel_id] + pos] = one_hot[pos]
 86 |             else:
 87 |                 data[bin_id, begin_pos[channel_id]] = float(value)
 88 | 
 89 |         for row in X:
 90 |             t = float(row[0]) - first_time
 91 |             if t > max_hours + eps:
 92 |                 continue
 93 |             bin_id = int(t / self._timestep - eps)
 94 |             assert 0 <= bin_id < N_bins
 95 | 
 96 |             for j in range(1, len(row)):
 97 |                 if row[j] == "":
 98 |                     continue
 99 |                 channel = header[j]
100 |                 channel_id = self._channel_to_id[channel]
101 | 
102 |                 total_data += 1
103 |                 if mask[bin_id][channel_id] == 1:
104 |                     unused_data += 1
105 |                 mask[bin_id][channel_id] = 1
106 | 
107 |                 write(data, bin_id, channel, row[j], begin_pos)
108 |                 original_value[bin_id][channel_id] = row[j]
109 | 
110 |         # impute missing values
111 | 
112 |         if self._impute_strategy not in ['zero', 'normal_value', 'previous', 'next']:
113 |             raise ValueError("impute strategy is invalid")
114 | 
115 |         if self._impute_strategy in ['normal_value', 'previous']:
116 |             prev_values = [[] for i in range(len(self._id_to_channel))]
117 |             for bin_id in range(N_bins):
118 |                 for channel in self._id_to_channel:
119 |                     channel_id = self._channel_to_id[channel]
120 |                     if mask[bin_id][channel_id] == 1:
121 |                         prev_values[channel_id].append(original_value[bin_id][channel_id])
122 |                         continue
123 |                     if self._impute_strategy == 'normal_value':
124 |                         imputed_value = self._normal_values[channel]
125 |                     if self._impute_strategy == 'previous':
126 |                         if len(prev_values[channel_id]) == 0:
127 |                             imputed_value = self._normal_values[channel]
128 |                         else:
129 |                             imputed_value = prev_values[channel_id][-1]
130 |                     write(data, bin_id, channel, imputed_value, begin_pos)
131 | 
132 |         if self._impute_strategy == 'next':
133 |             prev_values = [[] for i in range(len(self._id_to_channel))]
134 |             for bin_id in range(N_bins-1, -1, -1):
135 |                 for channel in self._id_to_channel:
136 |                     channel_id = self._channel_to_id[channel]
137 |                     if mask[bin_id][channel_id] == 1:
138 |                         prev_values[channel_id].append(original_value[bin_id][channel_id])
139 |                         continue
140 |                     if len(prev_values[channel_id]) == 0:
141 |                         imputed_value = self._normal_values[channel]
142 |                     else:
143 |                         imputed_value = prev_values[channel_id][-1]
144 |                     write(data, bin_id, channel, imputed_value, begin_pos)
145 | 
146 |         empty_bins = np.sum([1 - min(1, np.sum(mask[i, :])) for i in range(N_bins)])
147 |         self._done_count += 1
148 |         self._empty_bins_sum += empty_bins / (N_bins + eps)
149 |         self._unused_data_sum += unused_data / (total_data + eps)
150 | 
151 |         if self._store_masks:
152 |             data = np.hstack([data, mask.astype(np.float32)])
153 | 
154 |         # create new header
155 |         new_header = []
156 |         for channel in self._id_to_channel:
157 |             if self._is_categorical_channel[channel]:
158 |                 values = self._possible_values[channel]
159 |                 for value in values:
160 |                     new_header.append(channel + "->" + value)
161 |             else:
162 |                 new_header.append(channel)
163 | 
164 |         if self._store_masks:
165 |             for i in range(len(self._id_to_channel)):
166 |                 channel = self._id_to_channel[i]
167 |                 new_header.append("mask->" + channel)
168 | 
169 |         new_header = ",".join(new_header)
170 | 
171 |         return (data, new_header)
172 | 
173 |     def print_statistics(self):
174 |         print("statistics of discretizer:")
175 |         print("\tconverted {} examples".format(self._done_count))
176 |         print("\taverage unused data = {:.2f} percent".format(100.0 * self._unused_data_sum / self._done_count))
177 |         print("\taverage empty  bins = {:.2f} percent".format(100.0 * self._empty_bins_sum / self._done_count))
178 | 
179 | 
180 | class Normalizer:
181 |     def __init__(self, fields=None):
182 |         self._means = None
183 |         self._stds = None
184 |         self._fields = None
185 |         if fields is not None:
186 |             self._fields = [col for col in fields]
187 | 
188 |         self._sum_x = None
189 |         self._sum_sq_x = None
190 |         self._count = 0
191 | 
192 |     def _feed_data(self, x):
193 |         x = np.array(x)
194 |         self._count += x.shape[0]
195 |         if self._sum_x is None:
196 |             self._sum_x = np.sum(x, axis=0)
197 |             self._sum_sq_x = np.sum(x**2, axis=0)
198 |         else:
199 |             self._sum_x += np.sum(x, axis=0)
200 |             self._sum_sq_x += np.sum(x**2, axis=0)
201 | 
202 |     def _save_params(self, save_file_path):
203 |         eps = 1e-7
204 |         with open(save_file_path, "wb") as save_file:
205 |             N = self._count
206 |             self._means = 1.0 / N * self._sum_x
207 |             self._stds = np.sqrt(1.0/(N - 1) * (self._sum_sq_x - 2.0 * self._sum_x * self._means + N * self._means**2))
208 |             self._stds[self._stds < eps] = eps
209 |             pickle.dump(obj={'means': self._means,
210 |                              'stds': self._stds},
211 |                         file=save_file,
212 |                         protocol=2)
213 | 
214 |     def load_params(self, load_file_path):
215 |         with open(load_file_path, "rb") as load_file:
216 |             if platform.python_version()[0] == '2':
217 |                 dct = pickle.load(load_file)
218 |             else:
219 |                 dct = pickle.load(load_file, encoding='latin1')
220 |             self._means = dct['means']
221 |             self._stds = dct['stds']
222 | 
223 |     def transform(self, X):
224 |         if self._fields is None:
225 |             fields = range(X.shape[1])
226 |         else:
227 |             fields = self._fields
228 |         ret = 1.0 * X
229 |         for col in fields:
230 |             ret[:, col] = (X[:, col] - self._means[col]) / self._stds[col]
231 |         return ret
232 | 


--------------------------------------------------------------------------------
/OV/utils/readers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | 
  8 | 
  9 | class Reader(object):
 10 |     def __init__(self, dataset_dir, listfile=None):
 11 |         self._dataset_dir = dataset_dir
 12 |         self._current_index = 0
 13 |         if listfile is None:
 14 |             listfile_path = os.path.join(dataset_dir, "listfile.csv")
 15 |         else:
 16 |             listfile_path = listfile
 17 |         with open(listfile_path, "r") as lfile:
 18 |             self._data = lfile.readlines()
 19 |         self._listfile_header = self._data[0]
 20 |         self._data = self._data[1:]
 21 | 
 22 |     def get_number_of_examples(self):
 23 |         return len(self._data)
 24 | 
 25 |     def random_shuffle(self, seed=None):
 26 |         if seed is not None:
 27 |             random.seed(seed)
 28 |         random.shuffle(self._data)
 29 | 
 30 |     def read_example(self, index):
 31 |         raise NotImplementedError()
 32 | 
 33 |     def read_next(self):
 34 |         to_read_index = self._current_index
 35 |         self._current_index += 1
 36 |         if self._current_index == self.get_number_of_examples():
 37 |             self._current_index = 0
 38 |         return self.read_example(to_read_index)
 39 | 
 40 | 
 41 | class DecompensationReader(Reader):
 42 |     def __init__(self, dataset_dir, listfile=None):
 43 |         """ Reader for decompensation prediction task.
 44 |         :param dataset_dir: Directory where timeseries files are stored.
 45 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
 46 |                             `dataset_dir/listfile.csv` will be used.
 47 |         """
 48 |         Reader.__init__(self, dataset_dir, listfile)
 49 |         self._data = [line.split(',') for line in self._data]
 50 |         self._data = [(x, float(t), int(y)) for (x, t, y) in self._data]
 51 | 
 52 |     def _read_timeseries(self, ts_filename, time_bound):
 53 |         ret = []
 54 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
 55 |             header = tsfile.readline().strip().split(',')
 56 |             assert header[0] == "Hours"
 57 |             for line in tsfile:
 58 |                 mas = line.strip().split(',')
 59 |                 t = float(mas[0])
 60 |                 if t > time_bound + 1e-6:
 61 |                     break
 62 |                 ret.append(np.array(mas))
 63 |         return (np.stack(ret), header)
 64 | 
 65 |     def read_example(self, index):
 66 |         """ Read the example with given index.
 67 | 
 68 |         :param index: Index of the line of the listfile to read (counting starts from 0).
 69 |         :return: Directory with the following keys:
 70 |             X : np.array
 71 |                 2D array containing all events. Each row corresponds to a moment.
 72 |                 First column is the time and other columns correspond to different
 73 |                 variables.
 74 |             t : float
 75 |                 Length of the data in hours. Note, in general, it is not equal to the
 76 |                 timestamp of last event.
 77 |             y : int (0 or 1)
 78 |                 Mortality within next 24 hours.
 79 |             header : array of strings
 80 |                 Names of the columns. The ordering of the columns is always the same.
 81 |             name: Name of the sample.
 82 |         """
 83 |         if index < 0 or index >= len(self._data):
 84 |             raise ValueError("Index must be from 0 (inclusive) to number of examples (exclusive).")
 85 | 
 86 |         name = self._data[index][0]
 87 |         t = self._data[index][1]
 88 |         y = self._data[index][2]
 89 |         (X, header) = self._read_timeseries(name, t)
 90 | 
 91 |         return {"X": X,
 92 |                 "t": t,
 93 |                 "y": y,
 94 |                 "header": header,
 95 |                 "name": name}
 96 | 
 97 | 
 98 | class InHospitalMortalityReader(Reader):
 99 |     def __init__(self, dataset_dir, listfile=None, period_length=48.0):
100 |         """ Reader for in-hospital moratality prediction task.
101 | 
102 |         :param dataset_dir:   Directory where timeseries files are stored.
103 |         :param listfile:      Path to a listfile. If this parameter is left `None` then
104 |                               `dataset_dir/listfile.csv` will be used.
105 |         :param period_length: Length of the period (in hours) from which the prediction is done.
106 |         """
107 |         Reader.__init__(self, dataset_dir, listfile)
108 |         self._data = [line.split(',') for line in self._data]
109 |         self._data = [(x, int(y)) for (x, y) in self._data]
110 |         self._period_length = period_length
111 | 
112 |     def _read_timeseries(self, ts_filename):
113 |         ret = []
114 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
115 |             header = tsfile.readline().strip().split(',')
116 |             assert header[0] == "Hours"
117 |             for line in tsfile:
118 |                 mas = line.strip().split(',')
119 |                 ret.append(np.array(mas))
120 |         return (np.stack(ret), header)
121 | 
122 |     def read_example(self, index):
123 |         """ Reads the example with given index.
124 | 
125 |         :param index: Index of the line of the listfile to read (counting starts from 0).
126 |         :return: Dictionary with the following keys:
127 |             X : np.array
128 |                 2D array containing all events. Each row corresponds to a moment.
129 |                 First column is the time and other columns correspond to different
130 |                 variables.
131 |             t : float
132 |                 Length of the data in hours. Note, in general, it is not equal to the
133 |                 timestamp of last event.
134 |             y : int (0 or 1)
135 |                 In-hospital mortality.
136 |             header : array of strings
137 |                 Names of the columns. The ordering of the columns is always the same.
138 |             name: Name of the sample.
139 |         """
140 |         if index < 0 or index >= len(self._data):
141 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
142 | 
143 |         name = self._data[index][0]
144 |         t = self._period_length
145 |         y = self._data[index][1]
146 |         (X, header) = self._read_timeseries(name)
147 | 
148 |         return {"X": X,
149 |                 "t": t,
150 |                 "y": y,
151 |                 "header": header,
152 |                 "name": name}
153 | 
154 | 
155 | class LengthOfStayReader(Reader):
156 |     def __init__(self, dataset_dir, listfile=None):
157 |         """ Reader for length of stay prediction task.
158 | 
159 |         :param dataset_dir: Directory where timeseries files are stored.
160 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
161 |                             `dataset_dir/listfile.csv` will be used.
162 |         """
163 |         Reader.__init__(self, dataset_dir, listfile)
164 |         self._data = [line.split(',') for line in self._data]
165 |         self._data = [(x, float(t), float(y)) for (x, t, y) in self._data]
166 | 
167 |     def _read_timeseries(self, ts_filename, time_bound):
168 |         ret = []
169 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
170 |             header = tsfile.readline().strip().split(',')
171 |             assert header[0] == "Hours"
172 |             for line in tsfile:
173 |                 mas = line.strip().split(',')
174 |                 t = float(mas[0])
175 |                 if t > time_bound + 1e-6:
176 |                     break
177 |                 ret.append(np.array(mas))
178 |         return (np.stack(ret), header)
179 | 
180 |     def read_example(self, index):
181 |         """ Reads the example with given index.
182 | 
183 |         :param index: Index of the line of the listfile to read (counting starts from 0).
184 |         :return: Dictionary with the following keys:
185 |             X : np.array
186 |                 2D array containing all events. Each row corresponds to a moment.
187 |                 First column is the time and other columns correspond to different
188 |                 variables.
189 |             t : float
190 |                 Length of the data in hours. Note, in general, it is not equal to the
191 |                 timestamp of last event.
192 |             y : float
193 |                 Remaining time in ICU.
194 |             header : array of strings
195 |                 Names of the columns. The ordering of the columns is always the same.
196 |             name: Name of the sample.
197 |         """
198 |         if index < 0 or index >= len(self._data):
199 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
200 | 
201 |         name = self._data[index][0]
202 |         t = self._data[index][1]
203 |         y = self._data[index][2]
204 |         (X, header) = self._read_timeseries(name, t)
205 | 
206 |         return {"X": X,
207 |                 "t": t,
208 |                 "y": y,
209 |                 "header": header,
210 |                 "name": name}
211 | 
212 | 
213 | class PhenotypingReader(Reader):
214 |     def __init__(self, dataset_dir, listfile=None):
215 |         """ Reader for phenotype classification task.
216 | 
217 |         :param dataset_dir: Directory where timeseries files are stored.
218 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
219 |                             `dataset_dir/listfile.csv` will be used.
220 |         """
221 |         Reader.__init__(self, dataset_dir, listfile)
222 |         self._data = [line.split(',') for line in self._data]
223 |         self._data = [(mas[0], float(mas[1]), list(map(int, mas[2:]))) for mas in self._data]
224 | 
225 |     def _read_timeseries(self, ts_filename):
226 |         ret = []
227 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
228 |             header = tsfile.readline().strip().split(',')
229 |             assert header[0] == "Hours"
230 |             for line in tsfile:
231 |                 mas = line.strip().split(',')
232 |                 ret.append(np.array(mas))
233 |         return (np.stack(ret), header)
234 | 
235 |     def read_example(self, index):
236 |         """ Reads the example with given index.
237 | 
238 |         :param index: Index of the line of the listfile to read (counting starts from 0).
239 |         :return: Dictionary with the following keys:
240 |             X : np.array
241 |                 2D array containing all events. Each row corresponds to a moment.
242 |                 First column is the time and other columns correspond to different
243 |                 variables.
244 |             t : float
245 |                 Length of the data in hours. Note, in general, it is not equal to the
246 |                 timestamp of last event.
247 |             y : array of ints
248 |                 Phenotype labels.
249 |             header : array of strings
250 |                 Names of the columns. The ordering of the columns is always the same.
251 |             name: Name of the sample.
252 |         """
253 |         if index < 0 or index >= len(self._data):
254 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
255 | 
256 |         name = self._data[index][0]
257 |         t = self._data[index][1]
258 |         y = self._data[index][2]
259 |         (X, header) = self._read_timeseries(name)
260 | 
261 |         return {"X": X,
262 |                 "t": t,
263 |                 "y": y,
264 |                 "header": header,
265 |                 "name": name}
266 | 
267 | 
268 | class MultitaskReader(Reader):
269 |     def __init__(self, dataset_dir, listfile=None):
270 |         """ Reader for multitask learning.
271 | 
272 |         :param dataset_dir: Directory where timeseries files are stored.
273 |         :param listfile:    Path to a listfile. If this parameter is left `None` then
274 |                             `dataset_dir/listfile.csv` will be used.
275 |         """
276 |         Reader.__init__(self, dataset_dir, listfile)
277 |         self._data = [line.split(',') for line in self._data]
278 | 
279 |         def process_ihm(x):
280 |             return list(map(int, x.split(';')))
281 | 
282 |         def process_los(x):
283 |             x = x.split(';')
284 |             if x[0] == '':
285 |                 return ([], [])
286 |             return (list(map(int, x[:len(x)//2])), list(map(float, x[len(x)//2:])))
287 | 
288 |         def process_ph(x):
289 |             return list(map(int, x.split(';')))
290 | 
291 |         def process_decomp(x):
292 |             x = x.split(';')
293 |             if x[0] == '':
294 |                 return ([], [])
295 |             return (list(map(int, x[:len(x)//2])), list(map(int, x[len(x)//2:])))
296 | 
297 |         self._data = [(fname, float(t), process_ihm(ihm), process_los(los),
298 |                        process_ph(pheno), process_decomp(decomp))
299 |                       for fname, t, ihm, los, pheno, decomp in self._data]
300 | 
301 |     def _read_timeseries(self, ts_filename):
302 |         ret = []
303 |         with open(os.path.join(self._dataset_dir, ts_filename), "r") as tsfile:
304 |             header = tsfile.readline().strip().split(',')
305 |             assert header[0] == "Hours"
306 |             for line in tsfile:
307 |                 mas = line.strip().split(',')
308 |                 ret.append(np.array(mas))
309 |         return (np.stack(ret), header)
310 | 
311 |     def read_example(self, index):
312 |         """ Reads the example with given index.
313 | 
314 |         :param index: Index of the line of the listfile to read (counting starts from 0).
315 |         :return: Return dictionary with the following keys:
316 |             X : np.array
317 |                 2D array containing all events. Each row corresponds to a moment.
318 |                 First column is the time and other columns correspond to different
319 |                 variables.
320 |             t : float
321 |                 Length of the data in hours. Note, in general, it is not equal to the
322 |                 timestamp of last event.
323 |             ihm : array
324 |                 Array of 3 integers: [pos, mask, label].
325 |             los : array
326 |                 Array of 2 arrays: [masks, labels].
327 |             pheno : array
328 |                 Array of 25 binary integers (phenotype labels).
329 |             decomp : array
330 |                 Array of 2 arrays: [masks, labels].
331 |             header : array of strings
332 |                 Names of the columns. The ordering of the columns is always the same.
333 |             name: Name of the sample.
334 |         """
335 |         if index < 0 or index >= len(self._data):
336 |             raise ValueError("Index must be from 0 (inclusive) to number of lines (exclusive).")
337 | 
338 |         name = self._data[index][0]
339 |         (X, header) = self._read_timeseries(name)
340 | 
341 |         return {"X": X,
342 |                 "t": self._data[index][1],
343 |                 "ihm": self._data[index][2],
344 |                 "los": self._data[index][3],
345 |                 "pheno": self._data[index][4],
346 |                 "decomp": self._data[index][5],
347 |                 "header": header,
348 |                 "name": name}
349 | 


--------------------------------------------------------------------------------
/OV/utils/resources/channel_info.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Glucose": {
  3 |         "possible_values": []
  4 |     },
  5 |     "Systolic blood pressure": {
  6 |         "possible_values": []
  7 |     },
  8 |     "Glascow coma scale verbal response": {
  9 |         "possible_values": ["1 No Response", "1.0 ET/Trach", "2 Incomp sounds", "3 Inapprop words", "4 Confused", "5 Oriented", "Confused", "Inappropriate Words", "Incomprehensible sounds", "No Response", "No Response-ETT", "Oriented"],
 10 |         "values": {
 11 |             "No Response-ETT": 1,
 12 |             "No Response": 1,
 13 |             "1 No Response": 1,
 14 |             "1.0 ET/Trach": 1,
 15 |             "2 Incomp sounds": 2,
 16 |             "Incomprehensible sounds": 2,
 17 |             "3 Inapprop words": 3,
 18 |             "Inappropriate Words": 3,
 19 |             "4 Confused": 4,
 20 |             "Confused": 4,
 21 |             "5 Oriented": 5,
 22 |             "Oriented": 5
 23 |         }
 24 |     },
 25 |     "Temperature": {
 26 |         "possible_values": []
 27 |     },
 28 |     "Weight": {
 29 |         "possible_values": []
 30 |     },
 31 |     "Diastolic blood pressure": {
 32 |         "possible_values": []
 33 |     },
 34 |     "Fraction inspired oxygen": {
 35 |         "possible_values": []
 36 |     },
 37 |     "Glascow coma scale total": {
 38 |         "possible_values": ["10", "11", "12", "13", "14", "15", "3", "4", "5", "6", "7", "8", "9"],
 39 |         "values": {
 40 |             "3": 3,
 41 |             "4": 4,
 42 |             "5": 5,
 43 |             "6": 6,
 44 |             "7": 7,
 45 |             "8": 8,
 46 |             "9": 9,
 47 |             "10": 10,
 48 |             "11": 11,
 49 |             "12": 12,
 50 |             "13": 13,
 51 |             "14": 14,
 52 |             "15": 15
 53 |         }
 54 |     },
 55 |     "Capillary refill rate": {
 56 |         "possible_values": ["0.0", "1.0"],
 57 |         "values": {
 58 |             "0.0": 0,
 59 |             "1.0": 1
 60 |         }
 61 |     },
 62 |     "Mean blood pressure": {
 63 |         "possible_values": []
 64 |     },
 65 |     "Heart Rate": {
 66 |         "possible_values": []
 67 |     },
 68 |     "Oxygen saturation": {
 69 |         "possible_values": []
 70 |     },
 71 |     "pH": {
 72 |         "possible_values": []
 73 |     },
 74 |     "Height": {
 75 |         "possible_values": []
 76 |     },
 77 |     "Glascow coma scale eye opening": {
 78 |         "possible_values": ["1 No Response", "2 To pain", "3 To speech", "4 Spontaneously", "None", "Spontaneously", "To Pain", "To Speech"],
 79 |         "values": {
 80 |             "None": 0,
 81 |             "1 No Response": 1,
 82 |             "2 To pain": 2, 
 83 |             "To Pain": 2,
 84 |             "3 To speech": 3, 
 85 |             "To Speech": 3,
 86 |             "4 Spontaneously": 4,
 87 |             "Spontaneously": 4
 88 |         }
 89 |     },
 90 |     "Respiratory rate": {
 91 |         "possible_values": []
 92 |     },
 93 |     "Glascow coma scale motor response": {
 94 |         "possible_values": ["1 No Response", "2 Abnorm extensn", "3 Abnorm flexion", "4 Flex-withdraws", "5 Localizes Pain", "6 Obeys Commands", "Abnormal Flexion", "Abnormal extension", "Flex-withdraws", "Localizes Pain", "No response", "Obeys Commands"],
 95 |         "values": {
 96 |             "1 No Response": 1,
 97 |             "No response": 1,
 98 |             "2 Abnorm extensn": 2,
 99 |             "Abnormal extension": 2,
100 |             "3 Abnorm flexion": 3,
101 |             "Abnormal Flexion": 3,
102 |             "4 Flex-withdraws": 4,
103 |             "Flex-withdraws": 4,
104 |             "5 Localizes Pain": 5,
105 |             "Localizes Pain": 5,
106 |             "6 Obeys Commands": 6,
107 |             "Obeys Commands": 6
108 |         }
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/OV/utils/resources/discretizer_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "id_to_channel": [
  3 |     "Capillary refill rate",
  4 |     "Diastolic blood pressure",
  5 |     "Fraction inspired oxygen",
  6 |     "Glascow coma scale eye opening",
  7 |     "Glascow coma scale motor response",
  8 |     "Glascow coma scale total",
  9 |     "Glascow coma scale verbal response",
 10 |     "Glucose",
 11 |     "Heart Rate",
 12 |     "Height",
 13 |     "Mean blood pressure",
 14 |     "Oxygen saturation",
 15 |     "Respiratory rate",
 16 |     "Systolic blood pressure",
 17 |     "Temperature",
 18 |     "Weight",
 19 |     "pH"
 20 |   ],
 21 |   "is_categorical_channel": {
 22 |     "Capillary refill rate": true,
 23 |     "Diastolic blood pressure": false,
 24 |     "Fraction inspired oxygen": false,
 25 |     "Glascow coma scale eye opening": true,
 26 |     "Glascow coma scale motor response": true,
 27 |     "Glascow coma scale total": true,
 28 |     "Glascow coma scale verbal response": true,
 29 |     "Glucose": false,
 30 |     "Heart Rate": false,
 31 |     "Height": false,
 32 |     "Mean blood pressure": false,
 33 |     "Oxygen saturation": false,
 34 |     "Respiratory rate": false,
 35 |     "Systolic blood pressure": false,
 36 |     "Temperature": false,
 37 |     "Weight": false,
 38 |     "pH": false
 39 |   },
 40 |   "possible_values": {
 41 |     "Capillary refill rate": [
 42 |       "0.0",
 43 |       "1.0"
 44 |     ],
 45 |     "Diastolic blood pressure": [
 46 | 
 47 |     ],
 48 |     "Fraction inspired oxygen": [
 49 | 
 50 |     ],
 51 |     "Glascow coma scale eye opening": [
 52 |       "To Pain",
 53 |       "3 To speech",
 54 |       "1 No Response",
 55 |       "4 Spontaneously",
 56 |       "None",
 57 |       "To Speech",
 58 |       "Spontaneously",
 59 |       "2 To pain"
 60 |     ],
 61 |     "Glascow coma scale motor response": [
 62 |       "1 No Response",
 63 |       "3 Abnorm flexion",
 64 |       "Abnormal extension",
 65 |       "No response",
 66 |       "4 Flex-withdraws",
 67 |       "Localizes Pain",
 68 |       "Flex-withdraws",
 69 |       "Obeys Commands",
 70 |       "Abnormal Flexion",
 71 |       "6 Obeys Commands",
 72 |       "5 Localizes Pain",
 73 |       "2 Abnorm extensn"
 74 |     ],
 75 |     "Glascow coma scale total": [
 76 |       "11",
 77 |       "10",
 78 |       "13",
 79 |       "12",
 80 |       "15",
 81 |       "14",
 82 |       "3",
 83 |       "5",
 84 |       "4",
 85 |       "7",
 86 |       "6",
 87 |       "9",
 88 |       "8"
 89 |     ],
 90 |     "Glascow coma scale verbal response": [
 91 |       "1 No Response",
 92 |       "No Response",
 93 |       "Confused",
 94 |       "Inappropriate Words",
 95 |       "Oriented",
 96 |       "No Response-ETT",
 97 |       "5 Oriented",
 98 |       "Incomprehensible sounds",
 99 |       "1.0 ET/Trach",
100 |       "4 Confused",
101 |       "2 Incomp sounds",
102 |       "3 Inapprop words"
103 |     ],
104 |     "Glucose": [
105 | 
106 |     ],
107 |     "Heart Rate": [
108 | 
109 |     ],
110 |     "Height": [
111 | 
112 |     ],
113 |     "Mean blood pressure": [
114 | 
115 |     ],
116 |     "Oxygen saturation": [
117 | 
118 |     ],
119 |     "Respiratory rate": [
120 | 
121 |     ],
122 |     "Systolic blood pressure": [
123 | 
124 |     ],
125 |     "Temperature": [
126 | 
127 |     ],
128 |     "Weight": [
129 | 
130 |     ],
131 |     "pH": [
132 | 
133 |     ]
134 |   },
135 |   "normal_values": {
136 |     "Capillary refill rate": "0.0",
137 |     "Diastolic blood pressure": "59.0",
138 |     "Fraction inspired oxygen": "0.21",
139 |     "Glascow coma scale eye opening": "4 Spontaneously",
140 |     "Glascow coma scale motor response": "6 Obeys Commands",
141 |     "Glascow coma scale total": "15",
142 |     "Glascow coma scale verbal response": "5 Oriented",
143 |     "Glucose": "128.0",
144 |     "Heart Rate": "86",
145 |     "Height": "170.0",
146 |     "Mean blood pressure": "77.0",
147 |     "Oxygen saturation": "98.0",
148 |     "Respiratory rate": "19",
149 |     "Systolic blood pressure": "118.0",
150 |     "Temperature": "36.6",
151 |     "Weight": "81.0",
152 |     "pH": "7.4"
153 |   }
154 | }


--------------------------------------------------------------------------------
/OV/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | from . import common_utils
 5 | import numpy as np
 6 | import os
 7 | 
 8 | 
 9 | def load_data(reader, discretizer, normalizer, small_part=False, return_names=False):
10 |     N = reader.get_number_of_examples()
11 |     if small_part:
12 |         N = 1000
13 |     ret = common_utils.read_chunk(reader, N)
14 |     data = ret["X"]
15 |     ts = ret["t"]
16 |     labels = ret["y"]
17 |     names = ret["name"]
18 |     data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
19 |     if normalizer is not None:
20 |         data = [normalizer.transform(X) for X in data]
21 |     whole_data = (np.array(data), labels)
22 |     if not return_names:
23 |         return whole_data
24 |     return {"data": whole_data, "names": names}
25 | 
26 | 
27 | def save_results(names, pred, y_true, path):
28 |     common_utils.create_directory(os.path.dirname(path))
29 |     with open(path, 'w') as f:
30 |         f.write("stay,prediction,y_true\n")
31 |         for (name, x, y) in zip(names, pred, y_true):
32 |             f.write("{},{:.6f},{}\n".format(name, x, y))
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # M$^3$Care: Learning with Missing Modalities in Multimodal Healthcare Data
 2 | 
 3 | The source code for *M$^3$Care: Learning with Missing Modalities in Multimodal Healthcare Data*
 4 | 
 5 | Thanks for your interest in our work.
 6 | 
 7 | Due to the limitation of upload file size, **a more detailed version** including the trained model saved file, code can be obtained through the Github repo [here](https://github.com/choczhang/M3Care) and Google drive [here](https://drive.google.com/drive/folders/1C95YymB3fOXsZ78Uk0iVQYXyo8Hu_Iqz?usp=sharing)
 8 | 
 9 | ## Requirements
10 | 
11 | * Install python, pytorch. We use Python 3.7.3, Pytorch 1.5.1.
12 | * If you plan to use GPU computation, install CUDA
13 | 
14 | 
15 | ## Run the model
16 | 
17 | All the hyper-parameters and steps are included in the `.ipynb` file. 
18 | 


--------------------------------------------------------------------------------