├── 1.png
├── README.md
├── data
├── data.csv
├── feature_table.xlsx
├── testing_data.csv
└── training_data.csv
└── exp
└── depression_detection.py
/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isrugeek/depression_detection/3b064313a530151d2d891b8ff7442ff932d19641/1.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # depression_detection
2 | Depression is a leading cause of disability worldwide.
3 | In clinical diagnosis, psychological doctors often make face-to-face interviews referring to the commonly used Diagnostic and Statistical Manual of Mental Disorders criteria, where nine classes of depression symptoms are defined.
4 |
5 | Problem: effective but not proactive.
6 | More than 70% of people in the early stages of depression would not consult the psychological doctors
7 |
8 | Popular social media – Facebook, Twitter
9 | User generated contents (UGC)
10 | Emotions and moods
11 | Daily lives & mental states
12 |
13 | Advantage:
14 | Proactive care
15 | Depression behaviors
16 | etc..
17 | # Results
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/data/feature_table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isrugeek/depression_detection/3b064313a530151d2d891b8ff7442ff932d19641/data/feature_table.xlsx
--------------------------------------------------------------------------------
/exp/depression_detection.py:
--------------------------------------------------------------------------------
1 | # Hint: you should refer to the API in https://github.com/tensorflow/tensorflow/tree/r1.0/tensorflow/contrib
2 | # Use print(xxx) instead of print xxx
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import tensorflow as tf
8 | import numpy as np
9 | import shutil
10 | import os
11 | import random
12 |
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '13'
14 |
15 |
16 | # Global config, please don't modify
17 | config = tf.ConfigProto()
18 | config.gpu_options.allow_growth = True
19 | config.gpu_options.per_process_gpu_memory_fraction = 0.20
20 | sess = tf.Session(config=config)
21 | model_dir = r'../model'
22 |
23 | # Dataset location
24 | DEPRESSION_DATASET = '../data/data.csv'
25 | DEPRESSION_TRAIN = '../data/training_data.csv'
26 | DEPRESSION_TEST = '../data/testing_data.csv'
27 |
28 | # Delete the exist model directory
29 | if os.path.exists(model_dir):
30 | shutil.rmtree(model_dir)
31 |
32 |
33 |
34 | # TODO: 1. Split data (5%)
35 |
36 | # Split data: split file DEPRESSION_DATASET into DEPRESSION_TRAIN and DEPRESSION_TEST with a ratio about 0.6:0.4.
37 | # Hint: first read DEPRESSION_DATASET, then output each line to DEPRESSION_TRAIN or DEPRESSION_TEST by use
38 | # random.random() to get a random real number between 0 and 1.
39 |
40 |
41 | # Reference https://docs.python.org/2/library/random.html
42 | #https://stackoverflow.com/questions/17412439/how-to-split-data-into-trainset-and-testset-randomly
43 | #https://cs230-stanford.github.io/train-dev-test-split.html
44 |
45 | datafile = open(DEPRESSION_DATASET)
46 | train_data = open(DEPRESSION_TRAIN, 'w')
47 | test_data = open(DEPRESSION_TEST, 'w')
48 |
49 | '''
50 | #Method 1
51 | with open(datafile, "rb") as f:
52 | data = f.read().split('\n')
53 |
54 | random.shuffle(data)
55 |
56 | train_data = data[:60]
57 | test_data = data[60:]
58 |
59 | '''
60 |
61 | #Method 2
62 | '''
63 | for raw in datafile.readlines():
64 | datafile.sort() # make sure that the filenames have a fixed order before shuffling
65 | random.seed(230)
66 | random.shuffle(datafile) # shuffles the ordering of filenames (deterministic given the chosen seed)
67 | split_1 = int(0.6 * len(filenames))
68 | split_2 = int(0.4 * len(filenames))
69 | train_filenames = datafile[:split_1]
70 | train_data.write(train_filenames)
71 | dev_filenames = datafile[split_1:split_2]
72 | test_filenames = datafile[split_2:]
73 | test_data.write(test_filenames)
74 |
75 |
76 |
77 | '''
78 |
79 | #0.6 Training Data
80 | #0.4 Testing Data
81 | #Method 3
82 | train_ratio = 0.6
83 | for raw in datafile.readlines():
84 | if random.random() < train_ratio:
85 | train_data.write(raw)
86 |
87 | else:
88 | test_data.write(raw)
89 | #print ("Training amount",train_data)
90 | #print ("Testing amount",test_data)
91 | datafile.close()
92 | train_data.close()
93 | test_data.close()
94 |
95 | # Reference https://www.tensorflow.org/versions/r1.1/get_started/tflear
96 |
97 | # TODO: 2. Load data (5%)
98 |
99 | training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
100 | filename=DEPRESSION_TRAIN,
101 | target_dtype=np.int32,
102 | features_dtype=np.float32)
103 |
104 | test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
105 | filename=DEPRESSION_TEST,
106 | target_dtype=np.int32,
107 | features_dtype=np.float32)
108 |
109 | features_train = tf.constant(training_set.data)
110 | features_test = tf.constant(test_set.data)
111 | labels_train = tf.constant(training_set.target)
112 | labels_test = tf.constant(test_set.target)
113 |
114 | # TODO: 3. Normalize data (15%)
115 |
116 | normalize = tf.concat(axis=0, values=[features_train, features_test])
117 | # or
118 | '''
119 | Reference : https://www.tensorflow.org/api_docs/python/tf/nn/l2_normalize
120 | tf.nn.l2_normalize(
121 | x,
122 | axis=None,
123 | epsilon=1e-12,
124 | name=None,
125 | dim=None
126 | )
127 | '''
128 | normalize = tf.nn.l2_normalize(x=normalize, dim=0)
129 | # slice from 0,0 to training data and then from trainning data to test data
130 |
131 |
132 |
133 | features_train = tf.slice(normalize, [0, 0], [len(training_set.data), -1])
134 | features_test = tf.slice(normalize, [len(training_set.data), 0], [len(test_set.data), -1])
135 |
136 |
137 | # Hint:
138 | # we must normalize all the data at the same time, so we should combine the training set and testing set
139 | # firstly, and split them apart after normalization. After this step, your features_train and features_test will be
140 | # new feature tensors.
141 | # Some functions you may need: tf.nn.l2_normalize, tf.concat, tf.slice
142 |
143 | # TODO: 4. Build linear classifier with `tf.contrib.learn` (5%)
144 | #dim = datafile.data.size[1]
145 | #print (dim)
146 |
147 | # we can get this from the csv file
148 | dim = 112 #How many dimensions our feature have
149 | feature_columns = [tf.contrib.layers.real_valued_column("", dimension=dim)]
150 |
151 | # You should fill in the argument of LinearClassifier
152 |
153 | ###################################################Linear_Classifier#######################
154 |
155 | #classifier = tf.contrib.learn.LinearClassifier(feature_columns=feature_columns,model_dir=model_dir, n_classes=2, optimizer=tf.train.AdamOptimizer(0.01))
156 | ###################################################Linear_classier######################################
157 | # TODO: 5. Build DNN classifier with `tf.contrib.learn` (5%)
158 |
159 | # You should fill in the argument of DNNClassifier
160 | ###########################DNN#########################################
161 | classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
162 | hidden_units=[64,32,16,8,64],
163 | n_classes=2,
164 | model_dir=model_dir)
165 | ###############################END_DNN#########################################################
166 | #https://www.tensorflow.org/api_docs/python/tf/contrib/learn/LinearClassifier
167 | #linear_classifier = tf.contrib.learn.LinearClassifier(feature_columns)
168 | # Define the training inputs
169 | def get_train_inputs():
170 | x = tf.constant(features_train.eval(session=sess))
171 | y = tf.constant(labels_train.eval(session=sess))
172 |
173 | return x, y
174 |
175 | # Define the test inputs
176 | def get_test_inputs():
177 | x = tf.constant(features_test.eval(session=sess))
178 | y = tf.constant(labels_test.eval(session=sess))
179 |
180 | return x, y
181 |
182 | # TODO: 6. Fit model. (5%)
183 |
184 |
185 | classifier.fit(input_fn=get_train_inputs, steps=400)
186 |
187 |
188 |
189 | validation_metrics = {
190 | "true_negatives":
191 | tf.contrib.learn.MetricSpec(
192 | metric_fn=tf.contrib.metrics.streaming_true_negatives,
193 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES
194 | ),
195 | "true_positives":
196 | tf.contrib.learn.MetricSpec(
197 | metric_fn=tf.contrib.metrics.streaming_true_positives,
198 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES
199 | ),
200 | "false_negatives":
201 | tf.contrib.learn.MetricSpec(
202 | metric_fn=tf.contrib.metrics.streaming_false_negatives,
203 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES
204 | ),
205 | "false_positives":
206 | tf.contrib.learn.MetricSpec(
207 | metric_fn=tf.contrib.metrics.streaming_false_positives,
208 | prediction_key=tf.contrib.learn.PredictionKey.CLASSES
209 | ),
210 | }
211 |
212 | # TODO: 7. Make Evaluation (10%)
213 |
214 | # evaluate the model and get TN, FN, TP, FP
215 | result = classifier.evaluate(input_fn=get_test_inputs,
216 | steps=1
217 | , metrics=validation_metrics)
218 |
219 | TN = result["true_negatives"]
220 | FN = result["false_negatives"]
221 | TP = result["true_positives"]
222 | FP = result["false_positives"]
223 |
224 | # You should evaluate your model in following metrics and print the result:
225 | # Accuracy
226 |
227 | # Precision in macro-average
228 |
229 | # Recall in macro-average
230 |
231 |
232 | acc = (TN + TP) / (TN + FN + TP + FP)
233 | print ("Accuracy",acc)
234 |
235 |
236 | pr_pos = TP / (TP + FP)
237 | pr_neg = TN / (TN + FN)
238 | pre_mac = (pr_pos + pr_neg)
239 | pre_mac = pre_mac/2
240 | print ("Precioson in macro-average",pre_mac)
241 |
242 |
243 | re_pos = TP / (TP + FN)
244 | re_neg = TN / (TN + FP)
245 | re_mac = (re_pos + re_neg)
246 | re_mac = re_mac/2
247 | print ("Recall in macro-average",re_mac)
248 |
249 |
250 | f1_score_pos = 2 * pr_pos * re_pos / (pr_pos + re_pos)
251 | f1_score_neg = 2 * pr_neg * re_neg / (pr_neg + re_neg)
252 | f1_score_macro = (f1_score_neg + f1_score_pos) / 2
253 | print ("F1-score in macro-average",f1_score_macro)
254 |
--------------------------------------------------------------------------------