├── APMCM ├── .idea │ ├── Code.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── DB │ ├── Advertising.csv │ ├── ArtandgraphicsandAnimation design.csv │ ├── Banking.csv │ ├── Beauty and personal care.csv │ ├── Biologyandchemicalsandpharmaceuticalsandmedical equipment.csv │ ├── Communications technology.csv │ ├── Computer hardware.csv │ ├── Computer software .csv │ ├── Constructionandinfrastructureandgardening.csv │ ├── Counselandconsulting.csv │ ├── Customer service and technical support.csv │ ├── Education.csv │ ├── Electronicsandappliancesandsemiconductorandinstrumentation.csv │ ├── Engineeringandmachineryandenergy.csv │ ├── Fashionandtextileandfurs.csv │ ├── Financeandauditingandtax.csv │ ├── General merchandiseandchainsandretail.csv │ ├── Graduates.csv │ ├── HR.csv │ ├── Hospitalandmedicalandcare.csv │ ├── Hotelsandtourism.csv │ ├── IT-Management.csv │ ├── IT-QM, technical support and more.csv │ ├── Insurance.csv │ ├── Internet development and application.csv │ ├── Legal professionandlaw.csv │ ├── Literatureandscreenwritingandwriting.csv │ ├── Logisticsandwarehousing.csv │ ├── Movies, TV and recreation.csv │ ├── Office administrationandlogistics.csv │ ├── Other.csv │ ├── PRs and news media.csv │ ├── Procurement.csv │ ├── Productionandoperation.csv │ ├── Property management.csv │ ├── QMSandsafetyandenvironmental protection.csv │ ├── Real property.csv │ ├── Restaurants & recreation.csv │ ├── Sales .csv │ ├── Sales management.csv │ ├── Science & Technology.csv │ ├── Securitiesandfinanceandinvestment.csv │ ├── Securityandhousekeepingandother.csv │ ├── Senior management.csv │ ├── Technical work.csv │ ├── Trade.csv │ ├── Translation.csv │ ├── Transportation service.csv │ └── marketandmarketing.csv ├── data │ ├── 2015 │ │ ├── 09.xlsx │ │ ├── 10.xlsx │ │ ├── 11.xlsx │ │ └── 12.xlsx │ ├── 2016 │ │ ├── 01.xlsx │ │ ├── 02.xlsx │ │ ├── 03.xlsx │ │ ├── 04.xlsx │ │ ├── 05.xlsx │ │ ├── 06.xlsx │ │ ├── 07.xlsx │ │ ├── 08.xlsx │ │ ├── 09.xlsx │ │ ├── 10.xlsx │ │ ├── 11.xlsx │ │ └── 12.xlsx │ ├── 2017 │ │ ├── 01.xlsx │ │ ├── 02.xlsx │ │ ├── 03.xlsx │ │ ├── 04.xlsx │ │ ├── 05.xlsx │ │ ├── 06.xlsx │ │ ├── 07.xlsx │ │ ├── 08.xlsx │ │ ├── 09.xlsx │ │ ├── 10.xlsx │ │ ├── 11.xlsx │ │ └── 12.xlsx │ └── 2018 │ │ ├── 01.xlsx │ │ ├── 02.xlsx │ │ ├── 03.xlsx │ │ ├── 04.xlsx │ │ ├── 05.xlsx │ │ ├── 06.xlsx │ │ ├── 07.xlsx │ │ └── 08.xlsx ├── dataVisual │ ├── Advertising.png │ ├── ArtandgraphicsandAnimation design.png │ ├── Banking.png │ ├── Beauty and personal care.png │ ├── Biologyandchemicalsandpharmaceuticalsandmedical equipment.png │ ├── Communications technology.png │ ├── Computer hardware.png │ ├── Computer software .png │ ├── Constructionandinfrastructureandgardening.png │ ├── Counselandconsulting.png │ ├── Customer service and technical support.png │ ├── Education.png │ ├── Electronicsandappliancesandsemiconductorandinstrumentation.png │ ├── Engineeringandmachineryandenergy.png │ ├── Fashionandtextileandfurs.png │ ├── Financeandauditingandtax.png │ ├── General merchandiseandchainsandretail.png │ ├── Graduates.png │ ├── HR.png │ ├── Hospitalandmedicalandcare.png │ ├── Hotelsandtourism.png │ ├── IT-Management.png │ ├── IT-QM, technical support and more.png │ ├── Insurance.png │ ├── Internet development and application.png │ ├── Legal professionandlaw.png │ ├── Literatureandscreenwritingandwriting.png │ ├── Logisticsandwarehousing.png │ ├── Movies, TV and recreation.png │ ├── Office administrationandlogistics.png │ ├── Other.png │ ├── PRs and news media.png │ ├── Procurement.png │ ├── Productionandoperation.png │ ├── Property management.png │ ├── QMSandsafetyandenvironmental protection.png │ ├── Real property.png │ ├── Restaurants & recreation.png │ ├── Sales .png │ ├── Sales management.png │ ├── Science & Technology.png │ ├── Securitiesandfinanceandinvestment.png │ ├── Securityandhousekeepingandother.png │ ├── Senior management.png │ ├── Technical work.png │ ├── Trade.png │ ├── Translation.png │ ├── Transportation service.png │ └── marketandmarketing.png ├── dataVisualize.py ├── professions │ └── pros.xlsx └── readData.py ├── CNN_SVM ├── .idea │ ├── CNN-SVM.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── digit_test.data ├── digit_test_0.data ├── digit_test_1.data ├── digit_test_2.data ├── digit_test_3.data ├── digit_test_4.data ├── digit_test_5.data ├── digit_test_6.data ├── digit_test_7.data ├── digit_test_8.data ├── digit_test_9.data ├── digit_train.data ├── digit_train_0.data ├── digit_train_1.data ├── digit_train_2.data ├── digit_train_3.data ├── digit_train_4.data ├── digit_train_5.data ├── digit_train_6.data ├── digit_train_7.data ├── digit_train_8.data ├── digit_train_9.data └── training.py ├── COCO ├── .idea │ ├── COCO.iml │ ├── encodings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── caption.py ├── data │ └── annotations │ │ ├── captions_val2017.json │ │ ├── instances_val2017.json │ │ └── person_keypoints_val2017.json ├── keypoints.py └── segmentation.py └── README.md /APMCM/.idea/Code.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /APMCM/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /APMCM/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /APMCM/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 55 | 56 | 57 | 79 | 80 | 81 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 85 | 86 | 87 | 88 | 89 | 105 | 106 | 107 | 123 | 124 | 125 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 1547689089097 161 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /CNN_SVM/training.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import random 3 | import numpy as np 4 | import tensorflow as tf 5 | from sklearn import svm 6 | 7 | 8 | for file_num in range(10): 9 | # 在十个随机生成的不相干数据集上进行测试,将结果综合 10 | print('testing NO.%d dataset.......' % file_num) 11 | f1 = open('digit_train_' + file_num.__str__() + '.data') 12 | lines1 = f1.readlines() 13 | # 训练数据 14 | X_train = [] 15 | y_train = [] 16 | y_train_temp = [] 17 | y_train_transform = [] 18 | for i in range(len(lines1)): 19 | X_train.append(list(map(int, map(float, lines1[i].split(' ')[:256])))) 20 | y_train.append(list(map(int, lines1[i].split(' ')[256:266]))) 21 | y_train_transform.append(np.argmax(list(map(int, lines1[i].split(' ')[256:266])))) 22 | f1.close() 23 | 24 | f2 = open('digit_test_' + file_num.__str__() + '.data') 25 | lines2 = f2.readlines() 26 | # 测试数据 27 | X_test = [] 28 | y_test = [] 29 | y_test_transform = [] 30 | for i in range(len(lines2)): 31 | X_test.append(list(map(int, map(float, lines2[i].split(' ')[:256])))) 32 | y_test.append(list(map(int, lines2[i].split(' ')[256:266]))) 33 | y_test_transform.append(np.argmax(list(map(int, lines2[i].split(' ')[256:266])))) 34 | f2.close() 35 | 36 | # 建立一个tensorflow的会话 37 | sess = tf.InteractiveSession() 38 | 39 | # 初始化权值向量 40 | def weight_variable(shape): 41 | initial = tf.truncated_normal(shape, stddev=0.1) 42 | return tf.Variable(initial) 43 | 44 | # 初始化偏置向量 45 | def bias_variable(shape): 46 | initial = tf.constant(0.1, shape=shape) 47 | return tf.Variable(initial) 48 | 49 | # 二维卷积运算,步长为1,输出大小不变 50 | def conv2d(x, W): 51 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 52 | 53 | # 池化运算,将卷积特征缩小为1/2 54 | def max_pool_2x2(x): 55 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 56 | 57 | 58 | # 给x,y留出占位符,以便未来填充数据 59 | x = tf.placeholder("float", [None, 256]) 60 | y_ = tf.placeholder("float", [None, 10]) 61 | 62 | # 第一个卷积层,5x5的卷积核,输出向量是32维 63 | w_conv1 = weight_variable([5, 5, 1, 32]) 64 | b_conv1 = bias_variable([32]) 65 | x_image = tf.reshape(x, [-1, 16, 16, 1]) 66 | # 图片大小是16*16,,-1代表其他维数自适应 67 | h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1) 68 | h_pool1 = max_pool_2x2(h_conv1) 69 | # 采用的最大池化,因为都是1和0,平均池化没有什么意义 70 | 71 | # 第二层卷积层,输入向量是32维,输出64维,还是5x5的卷积核 72 | w_conv2 = weight_variable([5, 5, 32, 64]) 73 | b_conv2 = bias_variable([64]) 74 | 75 | h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2) 76 | h_pool2 = max_pool_2x2(h_conv2) 77 | 78 | # 全连接层的w和b 79 | w_fc1 = weight_variable([4 * 4 * 64, 256]) 80 | b_fc1 = bias_variable([256]) 81 | # 此时输出的维数是256维 82 | h_pool2_flat = tf.reshape(h_pool2, [-1, 4 * 4 * 64]) 83 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) 84 | # h_fc1是提取出的256维特征,很关键。后面就是用这个输入到SVM中 85 | 86 | # 设置dropout,否则很容易过拟合 87 | keep_prob = tf.placeholder("float") 88 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 89 | 90 | # 输出层,在本实验中只利用它的输出反向训练CNN,至于其具体数值我不关心 91 | w_fc2 = weight_variable([256, 10]) 92 | b_fc2 = bias_variable([10]) 93 | 94 | y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2) 95 | cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) 96 | # 设置误差代价以交叉熵的形式 97 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 98 | # 用adma的优化算法优化目标函数 99 | correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) 100 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) 101 | sess.run(tf.global_variables_initializer()) 102 | for i in range(1000): 103 | # 进行1000轮迭代,每次随机从训练样本中抽出50个进行训练 104 | batch = ([], []) 105 | p = np.random.choice(range(795), 50, replace=False) 106 | for k in p: 107 | batch[0].append(X_train[k]) 108 | batch[1].append(y_train[k]) 109 | if i % 100 == 0: 110 | train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) 111 | # print "step %d, train accuracy %g" % (i, train_accuracy) 112 | train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.6}) 113 | # 设置dropout的参数为0.6,测试得到,大点收敛的慢,小点立刻出现过拟合 114 | 115 | print("test accuracy %g" % accuracy.eval(feed_dict={x: X_test, y_: y_test, keep_prob: 1.0})) 116 | 117 | # 将原来的x带入训练好的CNN中计算出来全连接层的特征向量,将结果作为SVM中的特征向量 118 | x_temp1 = [] 119 | for g in X_train: 120 | x_temp1.append(sess.run(h_fc1, feed_dict={x: np.array(g).reshape((1, 256))})[0]) 121 | # x_temp1 = preprocessing.scale(x_temp) # normalization 122 | x_temp2 = [] 123 | for g in X_test: 124 | x_temp2.append(sess.run(h_fc1, feed_dict={x: np.array(g).reshape((1, 256))})[0]) 125 | 126 | clf = svm.SVC(C=0.9, kernel='linear') # linear kernel 127 | clf.fit(x_temp1, y_train_transform) 128 | # SVM选择了RBF核,C选择了0.9 129 | print('svm testing accuracy:') 130 | print(clf.score(x_temp2, y_test_transform)) 131 | 132 | sess.close() 133 | -------------------------------------------------------------------------------- /COCO/.idea/COCO.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /COCO/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /COCO/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /COCO/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /COCO/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 66 | 67 | 68 | 78 | 79 | 80 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 |