├── 1.png ├── README.md ├── data ├── data.csv ├── feature_table.xlsx ├── testing_data.csv └── training_data.csv └── exp └── depression_detection.py /1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isrugeek/depression_detection/3b064313a530151d2d891b8ff7442ff932d19641/1.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # depression_detection 2 | Depression is a leading cause of disability worldwide. 3 | In clinical diagnosis, psychological doctors often make face-to-face interviews referring to the commonly used Diagnostic and Statistical Manual of Mental Disorders criteria, where nine classes of depression symptoms are defined. 4 | 5 | Problem: effective but not proactive. 6 | More than 70% of people in the early stages of depression would not consult the psychological doctors 7 | 8 | Popular social media – Facebook, Twitter 9 | User generated contents (UGC) 10 | Emotions and moods 11 | Daily lives & mental states 12 | 13 | Advantage: 14 | Proactive care 15 | Depression behaviors 16 | etc.. 17 | # Results 18 |

19 | 20 |

21 | -------------------------------------------------------------------------------- /data/feature_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isrugeek/depression_detection/3b064313a530151d2d891b8ff7442ff932d19641/data/feature_table.xlsx -------------------------------------------------------------------------------- /exp/depression_detection.py: -------------------------------------------------------------------------------- 1 | # Hint: you should refer to the API in https://github.com/tensorflow/tensorflow/tree/r1.0/tensorflow/contrib 2 | # Use print(xxx) instead of print xxx 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | import shutil 10 | import os 11 | import random 12 | 13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '13' 14 | 15 | 16 | # Global config, please don't modify 17 | config = tf.ConfigProto() 18 | config.gpu_options.allow_growth = True 19 | config.gpu_options.per_process_gpu_memory_fraction = 0.20 20 | sess = tf.Session(config=config) 21 | model_dir = r'../model' 22 | 23 | # Dataset location 24 | DEPRESSION_DATASET = '../data/data.csv' 25 | DEPRESSION_TRAIN = '../data/training_data.csv' 26 | DEPRESSION_TEST = '../data/testing_data.csv' 27 | 28 | # Delete the exist model directory 29 | if os.path.exists(model_dir): 30 | shutil.rmtree(model_dir) 31 | 32 | 33 | 34 | # TODO: 1. Split data (5%) 35 | 36 | # Split data: split file DEPRESSION_DATASET into DEPRESSION_TRAIN and DEPRESSION_TEST with a ratio about 0.6:0.4. 37 | # Hint: first read DEPRESSION_DATASET, then output each line to DEPRESSION_TRAIN or DEPRESSION_TEST by use 38 | # random.random() to get a random real number between 0 and 1. 39 | 40 | 41 | # Reference https://docs.python.org/2/library/random.html 42 | #https://stackoverflow.com/questions/17412439/how-to-split-data-into-trainset-and-testset-randomly 43 | #https://cs230-stanford.github.io/train-dev-test-split.html 44 | 45 | datafile = open(DEPRESSION_DATASET) 46 | train_data = open(DEPRESSION_TRAIN, 'w') 47 | test_data = open(DEPRESSION_TEST, 'w') 48 | 49 | ''' 50 | #Method 1 51 | with open(datafile, "rb") as f: 52 | data = f.read().split('\n') 53 | 54 | random.shuffle(data) 55 | 56 | train_data = data[:60] 57 | test_data = data[60:] 58 | 59 | ''' 60 | 61 | #Method 2 62 | ''' 63 | for raw in datafile.readlines(): 64 | datafile.sort() # make sure that the filenames have a fixed order before shuffling 65 | random.seed(230) 66 | random.shuffle(datafile) # shuffles the ordering of filenames (deterministic given the chosen seed) 67 | split_1 = int(0.6 * len(filenames)) 68 | split_2 = int(0.4 * len(filenames)) 69 | train_filenames = datafile[:split_1] 70 | train_data.write(train_filenames) 71 | dev_filenames = datafile[split_1:split_2] 72 | test_filenames = datafile[split_2:] 73 | test_data.write(test_filenames) 74 | 75 | 76 | 77 | ''' 78 | 79 | #0.6 Training Data 80 | #0.4 Testing Data 81 | #Method 3 82 | train_ratio = 0.6 83 | for raw in datafile.readlines(): 84 | if random.random() < train_ratio: 85 | train_data.write(raw) 86 | 87 | else: 88 | test_data.write(raw) 89 | #print ("Training amount",train_data) 90 | #print ("Testing amount",test_data) 91 | datafile.close() 92 | train_data.close() 93 | test_data.close() 94 | 95 | # Reference https://www.tensorflow.org/versions/r1.1/get_started/tflear 96 | 97 | # TODO: 2. Load data (5%) 98 | 99 | training_set = tf.contrib.learn.datasets.base.load_csv_without_header( 100 | filename=DEPRESSION_TRAIN, 101 | target_dtype=np.int32, 102 | features_dtype=np.float32) 103 | 104 | test_set = tf.contrib.learn.datasets.base.load_csv_without_header( 105 | filename=DEPRESSION_TEST, 106 | target_dtype=np.int32, 107 | features_dtype=np.float32) 108 | 109 | features_train = tf.constant(training_set.data) 110 | features_test = tf.constant(test_set.data) 111 | labels_train = tf.constant(training_set.target) 112 | labels_test = tf.constant(test_set.target) 113 | 114 | # TODO: 3. Normalize data (15%) 115 | 116 | normalize = tf.concat(axis=0, values=[features_train, features_test]) 117 | # or 118 | ''' 119 | Reference : https://www.tensorflow.org/api_docs/python/tf/nn/l2_normalize 120 | tf.nn.l2_normalize( 121 | x, 122 | axis=None, 123 | epsilon=1e-12, 124 | name=None, 125 | dim=None 126 | ) 127 | ''' 128 | normalize = tf.nn.l2_normalize(x=normalize, dim=0) 129 | # slice from 0,0 to training data and then from trainning data to test data 130 | 131 | 132 | 133 | features_train = tf.slice(normalize, [0, 0], [len(training_set.data), -1]) 134 | features_test = tf.slice(normalize, [len(training_set.data), 0], [len(test_set.data), -1]) 135 | 136 | 137 | # Hint: 138 | # we must normalize all the data at the same time, so we should combine the training set and testing set 139 | # firstly, and split them apart after normalization. After this step, your features_train and features_test will be 140 | # new feature tensors. 141 | # Some functions you may need: tf.nn.l2_normalize, tf.concat, tf.slice 142 | 143 | # TODO: 4. Build linear classifier with `tf.contrib.learn` (5%) 144 | #dim = datafile.data.size[1] 145 | #print (dim) 146 | 147 | # we can get this from the csv file 148 | dim = 112 #How many dimensions our feature have 149 | feature_columns = [tf.contrib.layers.real_valued_column("", dimension=dim)] 150 | 151 | # You should fill in the argument of LinearClassifier 152 | 153 | ###################################################Linear_Classifier####################### 154 | 155 | #classifier = tf.contrib.learn.LinearClassifier(feature_columns=feature_columns,model_dir=model_dir, n_classes=2, optimizer=tf.train.AdamOptimizer(0.01)) 156 | ###################################################Linear_classier###################################### 157 | # TODO: 5. Build DNN classifier with `tf.contrib.learn` (5%) 158 | 159 | # You should fill in the argument of DNNClassifier 160 | ###########################DNN######################################### 161 | classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, 162 | hidden_units=[64,32,16,8,64], 163 | n_classes=2, 164 | model_dir=model_dir) 165 | ###############################END_DNN######################################################### 166 | #https://www.tensorflow.org/api_docs/python/tf/contrib/learn/LinearClassifier 167 | #linear_classifier = tf.contrib.learn.LinearClassifier(feature_columns) 168 | # Define the training inputs 169 | def get_train_inputs(): 170 | x = tf.constant(features_train.eval(session=sess)) 171 | y = tf.constant(labels_train.eval(session=sess)) 172 | 173 | return x, y 174 | 175 | # Define the test inputs 176 | def get_test_inputs(): 177 | x = tf.constant(features_test.eval(session=sess)) 178 | y = tf.constant(labels_test.eval(session=sess)) 179 | 180 | return x, y 181 | 182 | # TODO: 6. Fit model. (5%) 183 | 184 | 185 | classifier.fit(input_fn=get_train_inputs, steps=400) 186 | 187 | 188 | 189 | validation_metrics = { 190 | "true_negatives": 191 | tf.contrib.learn.MetricSpec( 192 | metric_fn=tf.contrib.metrics.streaming_true_negatives, 193 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES 194 | ), 195 | "true_positives": 196 | tf.contrib.learn.MetricSpec( 197 | metric_fn=tf.contrib.metrics.streaming_true_positives, 198 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES 199 | ), 200 | "false_negatives": 201 | tf.contrib.learn.MetricSpec( 202 | metric_fn=tf.contrib.metrics.streaming_false_negatives, 203 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES 204 | ), 205 | "false_positives": 206 | tf.contrib.learn.MetricSpec( 207 | metric_fn=tf.contrib.metrics.streaming_false_positives, 208 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES 209 | ), 210 | } 211 | 212 | # TODO: 7. Make Evaluation (10%) 213 | 214 | # evaluate the model and get TN, FN, TP, FP 215 | result = classifier.evaluate(input_fn=get_test_inputs, 216 | steps=1 217 | , metrics=validation_metrics) 218 | 219 | TN = result["true_negatives"] 220 | FN = result["false_negatives"] 221 | TP = result["true_positives"] 222 | FP = result["false_positives"] 223 | 224 | # You should evaluate your model in following metrics and print the result: 225 | # Accuracy 226 | 227 | # Precision in macro-average 228 | 229 | # Recall in macro-average 230 | 231 | 232 | acc = (TN + TP) / (TN + FN + TP + FP) 233 | print ("Accuracy",acc) 234 | 235 | 236 | pr_pos = TP / (TP + FP) 237 | pr_neg = TN / (TN + FN) 238 | pre_mac = (pr_pos + pr_neg) 239 | pre_mac = pre_mac/2 240 | print ("Precioson in macro-average",pre_mac) 241 | 242 | 243 | re_pos = TP / (TP + FN) 244 | re_neg = TN / (TN + FP) 245 | re_mac = (re_pos + re_neg) 246 | re_mac = re_mac/2 247 | print ("Recall in macro-average",re_mac) 248 | 249 | 250 | f1_score_pos = 2 * pr_pos * re_pos / (pr_pos + re_pos) 251 | f1_score_neg = 2 * pr_neg * re_neg / (pr_neg + re_neg) 252 | f1_score_macro = (f1_score_neg + f1_score_pos) / 2 253 | print ("F1-score in macro-average",f1_score_macro) 254 | --------------------------------------------------------------------------------