├── data_preprocess
    ├── __init__.py
    ├── text_process.py
    ├── make_dataset.py
    ├── make_power_data.py
    └── generate_npy.py
├── image
    ├── picture.png
    └── result.png
├── requirments.txt
├── README.md
├── evaluate.py
├── stop_words.py
├── fasion_utils.py
├── fasion_dataset.py
├── all_colors.py
├── utils.py
├── load_data.py
├── compare.py
├── main.py
├── train_model.py
└── draw.py


/data_preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/image/picture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redysky/multimodel/HEAD/image/picture.png


--------------------------------------------------------------------------------
/image/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redysky/multimodel/HEAD/image/result.png


--------------------------------------------------------------------------------
/requirments.txt:
--------------------------------------------------------------------------------
 1 | python~=3.8.1
 2 | numpy~=1.18.5
 3 | jieba~=0.42.1
 4 | matplotlib~=3.2.1
 5 | tqdm~=4.46.0
 6 | pandas~=1.0.3
 7 | opencv-python~=4.3.0.38
 8 | sklearn~=0.0
 9 | scikit-learn~=0.23.1
10 | imgaug~=0.4.0
11 | Pillow~=8.2.0
12 | scipy~=1.4.1
13 | tensorflow~=2.2.0
14 | gensim~=3.8.3
15 | Keras~=2.3.1
16 | chardet~=3.0.4
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multimodal commodity image retrieval
 2 | ## 多模态商品图像检索
 3 | Not finished yet...
 4 | 
 5 | ## introduce
 6 | explain:The specific description of the project and the product image data set will be supplemented in the future. Welcome to star in advance
 7 | 
 8 | ![image text](https://github.com/redysky/multimodel/blob/master/image/picture.png)
 9 | 
10 | 使用商品图像数据集的检索结果mAP
11 | ![image text](https://github.com/redysky/multimodel/blob/master/image/result.png)
12 | 
13 | ## CD 商品图像数据集 (https://cs.hrbcu.edu.cn/info/1267/1416.htm)
14 | 并提供二进制文件(https://drive.google.com/drive/folders/1Ch3Y9Tek5MQyXLYeJpWQ1oe_YcwNf5c_?usp=sharing)
15 | ## Fasion-200k
16 | 需要初始化path和label_path,运行fasion_dataset.py将会得到训练集和测试集的图片路径，所有过滤后的文本数据以及标签(https://www.kaggle.com/mayukh18/fashion200k-dataset)
17 | 
18 | ## in addition
19 | ```
20 | python main.py
21 | ```
22 | 
23 | 所有需要的包都在requirments.txt, 代码中包含了众多注释，你可以在其中发现他们
24 | All required packages are in requirements.txt The code contains many comments, which you can find in them
25 | 
26 | # 如果觉得还错欢迎star
27 | If you have any questions, please contact me
28 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.spatial
 3 | 
 4 | 
 5 | # 结果数组中的第一行数据表示的是image数组中第一个元素点与image数组中各个元素点的距离，计算两点之间的距离
 6 | def fx_calc_map_label(image, label, k=10, dist_method='L2'):
 7 |     if dist_method == 'L2':
 8 |         dist = scipy.spatial.distance.cdist(image, image, 'euclidean')
 9 |     elif dist_method == 'COS':
10 |         dist = scipy.spatial.distance.cdist(image, image, 'cosine')
11 | 
12 |     ord = dist.argsort()
13 |     numcases = dist.shape[0]
14 |     res = []  # mAP,测试集100个样本的平均准确率
15 |     for i in range(numcases):  # 所有行的循环，待检索图像/文本的循环
16 |         order = ord[i]
17 |         p = 0.0  # 精度,分母为当前返回的图像个数，大白话 --> 返回的7张图像中有来自同一类的个数
18 |         r = 0.0  # 计数,在数据库中,与当前待检索数据库中当前待检索图像，同一类的图像个数
19 | 
20 |         for j in range(1, k+1):  # 被检索数据库的循环,这里的一个坑，不要将自身纳入待检索库中
21 |             if label[i] == label[order[j]]:
22 |                 r += 1
23 |                 p += (r / (j + 1))
24 |         if r > 0:
25 |             res += [p / r]
26 |         else:
27 |             res += [0]
28 |     return np.mean(res)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     img = np.random.randint(0, 100, (10, 3))
33 |     label = np.random.randint(0, 4, 100)
34 |     print(fx_calc_map_label(img, label))
35 | 


--------------------------------------------------------------------------------
/data_preprocess/text_process.py:
--------------------------------------------------------------------------------
 1 | #文本预处理的程序---去停用词,处理后转存到一个文件夹中--text
 2 | from collections import Counter
 3 | import jieba
 4 | import os
 5 | import chardet
 6 | 
 7 | 
 8 | def text_process():
 9 | 	cur_path = os.path.dirname(__file__)  # 获取当前文件路径
10 | 	parent_path = os.path.dirname(cur_path)  # 获取当前文件夹父目录
11 | 	stop_list = "./data/去停用词.txt"
12 | 	# 需要自己制作一个所有文本(txt)的集合，就是放在一个文件夹(o_path)中
13 | 	o_path = os.path.join(parent_path, r'data//Unprocessed_text/')
14 | 	# 最终的文本数据路径
15 | 	f_path=os.path.join(parent_path, 'data/text/')
16 | 	outstr=''
17 | 	stopwords=[' ','(',')','」','「',"'",'^','|']
18 | 	for line in open(stop_list,encoding='utf-8',errors='ignore').readlines():
19 | 		stopwords.append(line.strip())
20 | 
21 | 	list_text=[]
22 | 	for i in os.listdir(o_path):
23 | 		list_text.append(i)
24 | 	# print(list_text)
25 | 
26 | 	for i in range(len(list_text)):
27 | 		if chardet.detect(open(os.path.join(o_path+list_text[i]),'rb').read())['encoding']=='GB2312':
28 | 			for line in open(os.path.join(o_path+list_text[i]),encoding='ansi',errors='ignore'):
29 | 				for word in line:
30 | 					if word not in stopwords:
31 | 						if word !='\t':
32 | 							outstr += word
33 | 				open(os.path.join(f_path+list_text[i]),'w',encoding='utf-8-sig').write(outstr+'\n')
34 | 			outstr=''
35 | 		else:
36 | 			for line in open(os.path.join(o_path+list_text[i]),encoding='utf-8-sig',errors='ignore'):
37 | 				for word in line:
38 | 					if word not in stopwords:
39 | 						if word !='\t':
40 | 							outstr += word
41 | 				open(os.path.join(f_path+list_text[i]),'w',encoding='utf-8-sig').write(outstr+'\n')
42 | 			outstr=''
43 | 
44 | 
45 | text_process()


--------------------------------------------------------------------------------
/stop_words.py:
--------------------------------------------------------------------------------
 1 | # 根据需要酌情删减
 2 | stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself',
 3 |               'yourselves', 'he',
 4 |               'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their',
 5 |               'theirs',
 6 |               'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was',
 7 |               'were', 'be',
 8 |               'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and',
 9 |               'but', 'if',
10 |               'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between',
11 |               'into',
12 |               'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on',
13 |               'off', 'over',
14 |               'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any',
15 |               'both',
16 |               'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so',
17 |               'than', 'too',
18 |               'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y',
19 |               'ain', 'aren',
20 |               'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan',
21 |               'shouldn', 'wasn',
22 |               'weren', 'won', 'wouldn', "", "ab"]
23 | symbol = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "/", "\\", "<", ">", "~", "@", "#", "$", "%", "^",
24 |           "&", "*", ".", ":", "'", ";", "?", "-", "+", "=", "`", "!"]
25 | 


--------------------------------------------------------------------------------
/fasion_utils.py:
--------------------------------------------------------------------------------
 1 | from fasion_dataset import *
 2 | import numpy as np
 3 | import pandas as pd
 4 | import os
 5 | import matplotlib.pyplot as plt
 6 | from sklearn.utils import shuffle
 7 | import matplotlib.image as mpimg
 8 | from imgaug import augmenters as iaa
 9 | import cv2 as cv
10 | from sklearn.model_selection import train_test_split
11 | from PIL import Image
12 | from train_model import get_data
13 | from train_model import text_w2model
14 | from sklearn.model_selection import train_test_split
15 | from load_data import generate_method
16 | import matplotlib.image as mpimg
17 | 
18 | cpu_count = multiprocessing.cpu_count()
19 | 
20 | 
21 | def initialization():
22 |     img_path, txt_data, label, all_words = Fashion_200k(path, label_path)
23 |     model = Word2Vec(size=500,  # 建立一个空的模型对象，设置词向量的维度为100
24 |                      min_count=5,  # 频数
25 |                      window=3,  # 窗口大小
26 |                      workers=cpu_count,
27 |                      iter=5)
28 |     w2indx, w2vec, text_data, _ = text_w2model(model, all_words, max_len=15)
29 |     n_symbols, embedding_weights = get_data(w2indx, w2vec, vocab_dim=500)
30 |     return img_path, text_data, label, n_symbols, embedding_weights
31 | 
32 | 
33 | def Processing(imgPath):
34 |     img = mpimg.imread(imgPath)
35 |     img = cv.resize(img, (224, 224))
36 |     img = img / 255
37 |     return img
38 | 
39 | 
40 | def BatchGen(batch_size, image_path, text, label):
41 |     while True:
42 |         imageBatch, textBatch, labelBatch = [], [], []
43 |         for _ in range(batch_size):
44 |             index = np.random.randint(0, len(image_path))
45 |             image = Processing(image_path[index])
46 |             imageBatch.append(image)
47 |             textBatch.append(text[index])
48 |             labelBatch.append(label[index])
49 | 
50 |         # 对列表中的元素加入随机性，打乱，固定打乱顺序
51 |         state = np.random.get_state()
52 |         np.random.shuffle(imageBatch)
53 |         np.random.set_state(state)
54 |         np.random.shuffle(textBatch)
55 |         np.random.set_state(state)
56 |         np.random.shuffle(labelBatch)
57 |         yield (np.asarray(imageBatch), np.asarray(textBatch)), np.asarray(labelBatch)
58 | 


--------------------------------------------------------------------------------
/data_preprocess/make_dataset.py:
--------------------------------------------------------------------------------
 1 | # 数据集的图片处理---包括将所有图片排号和将所有图片转换为JPG格式
 2 | # 对原始数据集的处理，将其转变为这种形式  图片（1，2，3 ……total_img），
 3 | # 并存储到final_path中,得到的是原始图像集，和增强后的图像集
 4 | import os
 5 | from PIL import Image
 6 | import cv2 as cv
 7 | # from make_power_data import *
 8 | # from generate_npy import *
 9 | 
10 | 
11 | def make_dataset():
12 |     # 两步
13 |     # 1.定位到每一张图片源路径
14 |     # 2.遍历每一张图片转换格式并保存
15 |     cur_path = os.path.dirname(__file__)  # 获取当前文件路径
16 |     parent_path = os.path.dirname(cur_path)  # 获取当前文件夹父目录
17 |     f_path = os.path.join(parent_path,r'商品数据集')  # 源文件位置，使用时需要将 数据集描述.txt删除
18 |     s_path = os.path.join(parent_path,r'E:/aaa')  # mkdir()      #按自己的要求实现的保存路径（不用管）
19 |     final_path = os.path.join(parent_path,r'data/img')  # 需要保存的路径
20 |     list_1 = os.listdir(f_path)
21 |     list_ = []
22 |     total_list = []
23 |     for i in range(len(list_1)):
24 |         s_1 = os.path.join(f_path, list_1[i])  # 一级目录
25 |         list_2 = os.listdir(s_1)  # 这里定位到每个类顺序是随机的
26 |         total_list += list_2
27 |         for j in range(len(list_2)):
28 |             s_2 = os.path.join(s_1, list_2[j])
29 |             s_3 = os.path.join(s_2, '图像')  # 定位到图像
30 |             list_3 = os.listdir(s_3)
31 |             list_3.sort(key=lambda x: int(x[:-4]))
32 |             for k in range(100):
33 |                 s_4 = os.path.join(s_3, list_3[k])
34 |                 list_.append(s_4)  ##定位到每一张图片的路径
35 | 
36 |     #		s_5 = os.path.join(s_path,list_2[j])      #s_path的保存路径
37 |     #		os.makedirs(s_5)
38 |     count = 0
39 |     for s in range(len(total_list)):
40 |         wait_save_path = os.path.join(s_path, total_list[s])
41 |         for a in range(s * 100, s * 100 + 100):
42 |             img = Image.open(list_[a])  # 待保存图片的完整路径
43 |             if Image.open(list_[a]).format == 'PNG':  # 将所有的RGBA图片转换为RGB
44 |                 ss = Image.open(list_[a]).convert('RGB')
45 |                 img.save(final_path + '/' + str(a + 1) + '.jpg')  # .save()的格式为路径+需要保存的图像的名称
46 |                 print(final_path + '/' + str(a + 1) + '.jpg', '已保存')
47 |             else:
48 |                 pass
49 |                 img.save(final_path + '/' + str(a + 1) + '.jpg')
50 |                 print(final_path + '/' + str(a + 1) + '.jpg', '已保存')
51 |     print("每个类的顺序:",total_list)
52 |     # 这里打印存储的每个类的顺序，对应main.py中的list_name
53 |     # ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服',
54 |     # '毛呢大衣', '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤',
55 |     # '衬衫', '西装', '风衣', '马 甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴',
56 |     # '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋']
57 | 
58 | 
59 | # 生成原始图像集
60 | make_dataset()
61 | # # 生成增强图像集
62 | # make_data_power()
63 | 
64 | 


--------------------------------------------------------------------------------
/fasion_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | import numpy as np
 4 | from stop_words import *
 5 | from tqdm import tqdm
 6 | from train_model import text_w2model
 7 | from gensim.models.word2vec import Word2Vec
 8 | from gensim.corpora.dictionary import Dictionary
 9 | import multiprocessing
10 | 
11 | 
12 | path = r'E:\数据集\Fashion-200k'  # 数据集路径
13 | label_path = "labels/labels"     # 标签路径
14 | 
15 | 
16 | def Fashion_200k(path, label_path, split="train"):
17 |     all_label_path = os.path.join(path, label_path)
18 |     all_name = os.listdir(all_label_path)
19 |     all_words = []  # 所有文本的词
20 |     all_txt = []  # 文本数据
21 |     label_data = []  # 标签
22 |     i = 0  # 总类 = 5
23 |     all_img_path = []  # 所有图像的路径
24 |     max_len = 0  # 训练集最大 = 13 ，测试集最大 = 12
25 |     count = 1  # 计数用
26 |     for name in all_name:
27 |         if name.split("_")[1] == split:  # dress_train_detect_all
28 |             print(f"正在操作{name}...")
29 |             with open(os.path.join(all_label_path, name), "r", encoding='utf-8') as f:
30 |                 contents = f.readlines()
31 |                 for content in contents:
32 |                     print(f"正在读取Fasion-200k 第 {count} 行...")
33 |                     value = content.split()
34 |                     tmp = []  # 需要先初始化，保存每一段去停用词后的文本
35 |                     # 先去停用词后将所有词保存至列表，并保存文本序列
36 |                     txt = " ".join(value[2:])  # 字符串  # .gray's delaney crochet sleeve dress
37 |                     for j in symbol:
38 |                         txt = txt.replace(j, " ")  # 干净的字符串
39 |                     txt_list = txt.split(" ")  # ['', 'gray', 's', 'delaney', 'crochet', 'sleeve', 'dress']
40 |                     for word in txt_list:  # 这里需要保证读取顺序
41 |                         if word not in stop_words:
42 |                             tmp.append(word)
43 |                     all_words.append(tmp)  # 分词后的每一段文本一个列表，等待word2vec转换
44 |                     all_txt.append(" ".join(tmp))
45 |                     label_data.append(i)
46 |                     all_img_path.append(os.path.join(path, value[0]))
47 |                     count += 1
48 |             i += 1
49 |     return all_img_path, all_txt, label_data, all_words
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     cpu_count = multiprocessing.cpu_count()
54 |     img_path, txt_data, label, all_words = Fashion_200k(path, label_path)
55 |     # model = Word2Vec(size=500,  # 建立一个空的模型对象，设置词向量的维度为100
56 |     #                  min_count=5,  # 频数
57 |     #                  window=3,  #
58 |     #                  workers=cpu_count,
59 |     #                  iter=5)
60 |     # w2indx, w2vec, text_data, power_text_data = text_w2model(model, all_words, max_len=15)
61 |     #
62 |     # print("最大长度为", max_len)
63 |     # print("img_path", len(img_path))
64 |     # print(f"txt {len(txt_data)}")
65 |     # print("label", len(label))
66 |     # print("词", len(all_words))
67 |     # print(all_words[:3])
68 | 


--------------------------------------------------------------------------------
/data_preprocess/make_power_data.py:
--------------------------------------------------------------------------------
 1 | # 功能：将原始数据集中的图片使用ImageDataGenerator增强，将增强后的图像保存至save_path中
 2 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
 3 | import numpy as np
 4 | import os
 5 | from PIL import Image
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | def make_data_power():
10 |     cur_path = os.path.dirname(__file__)  # 获取当前文件路径
11 |     parent_path = os.path.dirname(cur_path)  # 获取当前文件夹父目录
12 |     tmp_path = 'data/Temporary_folder'  # # 临时文件夹
13 |     train_path = os.path.join(parent_path,tmp_path)
14 |     if not os.path.exists(train_path):
15 |         os.makedirs(train_path)
16 |     start_file = os.path.join(parent_path,'data/img')  # 原始图片集文件夹
17 | 
18 |     number = 3  # 增强数目
19 |     save_path = os.path.join(parent_path, 'data/power_img')  # 增强图像集文件夹
20 | 
21 |     train_datagen = ImageDataGenerator(
22 |         rotation_range=20,
23 |         width_shift_range=0.2,
24 |         height_shift_range=0.3,
25 |         shear_range=0.2, zoom_range=0.25,
26 |         horizontal_flip=True, vertical_flip=True)
27 | 
28 |     # 由于库函数的特殊性（需要将待增强的图像放到文件夹里传入库函数），
29 |     # 所以需要保证数据集中的图片一个图片一个文件夹,结构为 文件夹（文件夹（图像））
30 |     # 将图片转存至临时文件夹中
31 |     def gen_file():
32 |         for i in tqdm(range(len(os.listdir(start_file)))):
33 |             save_file = train_path + '/' + str(i + 1) + '/' + str(i + 1) + '/' + str(i + 1)
34 |             os.makedirs(train_path + '/' + str(i + 1) + '/' + str(i + 1))
35 |             img = Image.open(start_file + '/' + str(i + 1) + '.jpg')
36 |             img.save(save_file + '.jpg')
37 | 
38 |     if len(os.listdir(train_path)) == 0:
39 |         gen_file()
40 |     n = len(os.listdir(train_path))
41 |     # 完成图像的增强操作，并保存至final_path,也即临时文件夹图像中增加到四张
42 |     for i in tqdm(range(n)):
43 |         start_path = train_path + '/' + str(i + 1)  # E/1
44 |         final_path = train_path + '/' + str(i + 1) + '/' + str(i + 1)  # E/1/1
45 |         for i in range(number):
46 |             _, _ = next(train_datagen.flow_from_directory(start_path, target_size=(224, 224),
47 |                                                           batch_size=1, shuffle=True, seed=4, save_to_dir=final_path,
48 |                                                           save_format='jpg', follow_links=True))
49 |     # 完成对临时文件夹的转存操作，将其存储为这种形式 power_data(1,2,3,4,...,total_img)
50 |     for i in tqdm(range(n)):
51 |         list_pic = os.listdir(
52 |             train_path + '/' + str(i + 1) + '/' + str(os.listdir(train_path + '/' + str(i + 1))[0]))  # E:/aaa/1/1
53 |         # 算上原始图像list_pic应该等于4
54 |         # 逐一打开每张图像并将其改名保存至save_path,jpg格式
55 |         for j in range(len(list_pic)):
56 |             img = Image.open(
57 |                 train_path + '/' + str(i + 1) + '/' + str(os.listdir(train_path +
58 |                                                                      '/' + str(i + 1))[0]) + '/' + list_pic[j])
59 |             img.save(save_path + '/' + str(j + i * len(list_pic) + 1) + '.jpg')
60 | 
61 | 


--------------------------------------------------------------------------------
/data_preprocess/generate_npy.py:
--------------------------------------------------------------------------------
 1 | # 将数据集处理成npy格式，此为网络输入的一种形式，优点是读取方便，缺点是一次性读取造成内存溢出，
 2 | # 解决方法是使用data_generate,批次读入内存
 3 | from PIL import Image
 4 | import numpy as np
 5 | import os
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | 
 9 | def generate_npy(use_power_dataset=True):
10 |     cur_path = os.path.dirname(__file__)  # 获取当前文件路径
11 |     parent_path = os.path.dirname(cur_path)  # 获取当前文件夹父目录
12 |     if use_power_dataset:
13 |         # 指定图片和标签的路径
14 |         train_path = './data/power_img/'
15 |         train_txt = './data/labels.txt'
16 |         # 指定转换后的存储路径
17 |         x_train_save_path = './data/power_img_data_npy/x_train3.npy'
18 |         y_train_save_path = './data/power_img_data_npy/y_train3.npy'
19 |         # 数据集总图像数
20 |         total_img = len(os.listdir(train_path))
21 |         # 数据集每类图像数
22 |         class_img = 400
23 |     else:
24 |         # 指定图片和标签的路径
25 |         train_path = './data/img/'
26 |         train_txt = './data/labels.txt'
27 |         # 指定转换后的存储路径
28 |         x_train_save_path = './data/original_img_data_npy/x_train3.npy'
29 |         y_train_save_path = './data/original_img_data_npy/y_train3.npy'
30 |         # 数据集总图像数
31 |         total_img = len(os.listdir(train_path))
32 |         # 数据集每类图像数
33 |         class_img = 100
34 | 
35 |     # print(total_img)
36 |     # 打标签,该标签以0起始
37 | 
38 |     def get_label(train_txt, total_img, class_img):
39 |         j = 0
40 |         with open(train_txt, "w") as f:
41 |             for i in range(total_img):
42 |                 if i % class_img == 0:
43 |                     j += 1
44 |                 text = f.write(str(i + 1) + ".jpg" + " " + str(j - 1) + "\n")
45 |     if not os.path.exists(train_txt):
46 |         get_label(train_txt, total_img, class_img)
47 | 
48 |     def generated(path, txt):
49 |         f = open(txt, 'r')
50 |         contents = f.readlines()
51 |         f.close()
52 |         x, y_ = [], []
53 |         for content in contents:
54 |             value = content.split()
55 |             img_path = path + value[0]
56 |             img = Image.open(img_path)
57 |             img = img.resize((224, 224), Image.BILINEAR)
58 |             img = np.array(img)
59 |             x.append(img)
60 |             y_.append(value[1])
61 |             print('loading:' + content)
62 |         x = np.array(x)
63 |         # print(x)
64 |         y_ = np.array(y_)
65 |         # print(y_.shape)
66 |         y_ = y_.astype(np.int64)
67 |         return x, y_
68 | 
69 |     if os.path.exists(x_train_save_path) and os.path.exists(y_train_save_path):
70 |         print('-------Load Datasets---------')
71 |         x_train_save = np.load(x_train_save_path)
72 |         y_train_save = np.load(y_train_save_path)
73 | 
74 |     else:
75 |         print('------Generate Datasets--------')
76 |         x_train, y_train = generated(train_path, train_txt)
77 | 
78 |         print('-------Save Datasets--------')
79 |         np.save(x_train_save_path, x_train)
80 |         np.save(y_train_save_path, y_train)
81 | 


--------------------------------------------------------------------------------
/all_colors.py:
--------------------------------------------------------------------------------
 1 | color_names = {0: '#F0F8FF', 1: '#FAEBD7', 2: '#00FFFF', 3: '#7FFFD4', 4: '#F0FFFF',
 2 |                5: '#F5F5DC', 6: '#FFE4C4', 7: '#000000', 8: '#FFEBCD', 9: '#0000FF',
 3 |                10: '#8A2BE2', 11: '#A52A2A', 12: '#DEB887', 13: '#5F9EA0', 14: '#7FFF00',
 4 |                15: '#D2691E', 16: '#FF7F50', 17: '#6495ED', 18: '#FFF8DC', 19: '#DC143C',
 5 |                20: '#00FFFF', 21: '#00008B', 22: '#008B8B', 23: '#B8860B', 24: '#A9A9A9',
 6 |                25: '#006400', 26: '#BDB76B', 27: '#8B008B', 28: '#556B2F', 29: '#FF8C00',
 7 |                30: '#9932CC', 31: '#8B0000', 32: '#E9967A', 33: '#8FBC8F', 34: '#483D8B',
 8 |                35: '#2F4F4F', 36: '#00CED1', 37: '#9400D3', 38: '#FF1493', 39: '#00BFFF',
 9 |                40: '#696969', 41: '#1E90FF', 42: '#B22222', 43: '#FFFAF0', 44: '#228B22',
10 |                45: '#FF00FF', 46: '#DCDCDC', 47: '#F8F8FF', 48: '#FFD700', 49: '#DAA520',
11 |                50: '#808080', 51: '#008000', 52: '#ADFF2F', 53: '#F0FFF0', 54: '#FF69B4',
12 |                55: '#CD5C5C', 56: '#4B0082', 57: '#FFFFF0', 58: '#F0E68C', 59: '#E6E6FA',
13 |                60: '#FFF0F5', 61: '#7CFC00', 62: '#FFFACD', 63: '#ADD8E6', 64: '#F08080',
14 |                65: '#E0FFFF', 66: '#FAFAD2', 67: '#90EE90', 68: '#D3D3D3', 69: '#FFB6C1',
15 |                70: '#FFA07A', 71: '#20B2AA', 72: '#87CEFA', 73: '#778899', 74: '#B0C4DE',
16 |                75: '#FFFFE0', 76: '#00FF00', 77: '#32CD32', 78: '#FAF0E6', 79: '#FF00FF',
17 |                80: '#800000', 81: '#66CDAA', 82: '#0000CD', 83: '#BA55D3', 84: '#9370DB',
18 |                85: '#3CB371', 86: '#7B68EE', 87: '#00FA9A', 88: '#48D1CC', 89: '#C71585',
19 |                90: '#191970', 91: '#F5FFFA', 92: '#FFE4E1', 93: '#FFE4B5', 94: '#FFDEAD',
20 |                95: '#000080', 96: '#FDF5E6', 97: '#808000', 98: '#6B8E23', 99: '#FFA500',
21 |                100: '#FF4500', 101: '#DA70D6', 102: '#EEE8AA', 103: '#98FB98', 104: '#AFEEEE',
22 |                105: '#DB7093', 106: '#FFEFD5', 107: '#FFDAB9', 108: '#CD853F', 109: '#FFC0CB',
23 |                110: '#DDA0DD', 111: '#B0E0E6', 112: '#800080', 113: '#FF0000', 114: '#BC8F8F',
24 |                115: '#4169E1', 116: '#8B4513', 117: '#FA8072', 118: '#FAA460', 119: '#2E8B57',
25 |                120: '#FFF5EE', 121: '#A0522D', 122: '#C0C0C0', 123: '#87CEEB', 124: '#6A5ACD',
26 |                125: '#708090', 126: '#FFFAFA', 127: '#00FF7F', 128: '#4682B4', 129: '#D2B48C',
27 |                130: '#008080', 131: '#D8BFD8', 132: '#FF6347', 133: '#40E0D0', 134: '#EE82EE',
28 |                135: '#F5DEB3', 136: '#FFFFFF', 137: '#F5F5F5', 138: '#FFFF00', 139: '#9ACD32'}
29 | 
30 | color_name_38 = ['#F0F8FF', '#FAEBD7', '#00FFFF', '#7FFFD4', '#F0FFFF', '#F5F5DC', '#FFE4C4', '#000000', '#FFEBCD',
31 |                  '#0000FF', '#8A2BE2', '#A52A2A', '#DEB887', '#5F9EA0', '#7FFF00', '#D2691E', '#FF7F50', '#6495ED',
32 |                  '#FFF8DC', '#DC143C', '#00FFFF', '#00008B', '#008B8B', '#B8860B', '#A9A9A9', '#006400', '#BDB76B',
33 |                  '#8B008B', '#556B2F', '#FF8C00', '#9932CC', '#8B0000', '#E9967A', '#8FBC8F', '#483D8B', '#2F4F4F',
34 |                  '#00CED1', '#9400D3']
35 | 
36 | if __name__ == "__main__":
37 |     res = []
38 |     for i, j in color_names.items():
39 |         if i <= 37:
40 |             res.append(j)
41 |     print(res)
42 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import os
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.utils import shuffle
  6 | import matplotlib.image as mpimg
  7 | from imgaug import augmenters as iaa
  8 | import cv2 as cv
  9 | from sklearn.model_selection import train_test_split
 10 | from PIL import Image
 11 | 
 12 | 
 13 | # 返回每一张图片的路径,并且与文本一一对应，并且还需要返回标签信息
 14 | def LoadData():
 15 |     cur_path = os.path.dirname(__file__)  # 'E:/github文件/my-project'
 16 |     img_path = "./data/img"
 17 |     ImgPath = []
 18 |     n = len(os.listdir(os.path.join(cur_path, img_path)))  # 图像总数　
 19 |     for i in range(1, n + 1):
 20 |         ImgPath.append(cur_path + "/" +
 21 |                        os.path.join(img_path, str(i)) + ".jpg")  # 图像路径 'E:/github文件/my-project/data/img\\3798.jpg'
 22 |     return ImgPath
 23 | 
 24 | 
 25 | def augmentImage(imgPath):
 26 |     img = mpimg.imread(imgPath)
 27 |     ## PAN
 28 |     if np.random.rand() <= 0.5:
 29 |         pan = iaa.Affine(translate_percent={'x': (-0.1, 0.1), 'y': (-0.1, 0.1)})
 30 |         img = pan.augment_image(img)
 31 |     ## ZOOM
 32 |     if np.random.rand() <= 0.5:
 33 |         zoom = iaa.Affine(scale=(1, 1.2))
 34 |         img = zoom.augment_image(img)
 35 |     # BRIGHTNESS
 36 |     if np.random.rand() <= 0.5:
 37 |         brightness = iaa.Multiply((0.4, 1.2))
 38 |         img = brightness.augment_image(img)
 39 |     # FLIP
 40 |     if np.random.rand() <= 0.5:
 41 |         img = cv.flip(img, 1)
 42 |     return img
 43 | 
 44 | 
 45 | def preProcessing(img):
 46 |     img = img[:, 20:400, :]  # (高,宽,通道)
 47 |     img = cv.resize(img, (224, 224))
 48 |     img = cv.cvtColor(img, cv.COLOR_BGR2YUV)
 49 |     img = cv.GaussianBlur(img, (3, 3), 0)
 50 |     img = img / 255
 51 |     return img
 52 | 
 53 | 
 54 | def batchGen(batch_size, image_path, text, label, original=True, multiple=4):
 55 |     """
 56 |     image_path:List() 待增强图像的路径
 57 |     text:经过word2vec处理过后的文本数据
 58 |     original:bool 是否增加原生态图像数量
 59 |     multiple:int 增加的数量
 60 |     """
 61 |     while True:
 62 |         imageBatch, textBatch, labelBatch = [], [], []
 63 |         for i in range(batch_size):
 64 |             ImglittleBatch, txtlittleBatch, labellittleBatch = [], [], []
 65 |             index = np.random.randint(0, len(image_path))
 66 |             if original:
 67 |                 image = augmentImage(image_path[index])
 68 |                 image = preProcessing(image)
 69 |                 imageBatch.append(image)
 70 |                 textBatch.append(text[index])
 71 |                 labelBatch.append(label[index])
 72 |             else:
 73 |                 for j in range(multiple):
 74 |                     image = augmentImage(image_path[index])
 75 |                     image = preProcessing(image)
 76 |                     ImglittleBatch.append(image)
 77 |                     txtlittleBatch.append(text[index])
 78 |                     labellittleBatch.append(label[index])
 79 |                 imageBatch += ImglittleBatch
 80 |                 textBatch += txtlittleBatch
 81 |                 labelBatch += labellittleBatch
 82 | 
 83 |         # 对列表中的元素加入随机性，打乱，固定打乱顺序
 84 |         state = np.random.get_state()
 85 |         np.random.shuffle(imageBatch)
 86 |         np.random.set_state(state)
 87 |         np.random.shuffle(textBatch)
 88 |         np.random.set_state(state)
 89 |         np.random.shuffle(labelBatch)
 90 |         yield (np.asarray(imageBatch), np.asarray(textBatch)), np.asarray(labelBatch)
 91 | 
 92 | 
 93 | def Img_Txt(imgTest, txtTest, start, end):
 94 |     imageBatch, textBatch = [], []
 95 |     for i in range(start, end):
 96 |         image = mpimg.imread(imgTest[i]) / 255
 97 |         image = cv.resize(image, (224, 224))
 98 |         text = txtTest[i]
 99 |         imageBatch.append(image)
100 |         textBatch.append(text)
101 |     return np.asarray(imageBatch), np.asarray(textBatch)
102 | 
103 | 


--------------------------------------------------------------------------------
/load_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | from sklearn.model_selection import train_test_split
  5 | import tensorflow as tf
  6 | # from data_preprocess.text_process import *
  7 | 
  8 | 
  9 | # 包括将所有text.csv文本组合成一个统一的csv文件，并制作文本标签
 10 | def get_loader(text_load_path, list_name):
 11 |     list_csv = []
 12 | 
 13 |     for i in range(len(list_name)):
 14 |         text_path = os.path.join(text_load_path, list_name[i])
 15 |         list_csv.append(text_path + '文本.csv')
 16 | 
 17 |     # 文本数据,返回的文本是与图片一一对应的
 18 |     list_text = np.array([])
 19 |     for i in range(len(list_csv)):
 20 |         list_text = np.concatenate(
 21 |             (list_text, np.array(pd.read_csv(list_csv[i], encoding='gb18030', header=None, index_col=None)[0])))
 22 | 
 23 |     # list_num为文本的标签，对应数据集文本排列，每间隔一百变成下一个类
 24 |     text_label = np.array([], dtype=np.int32)
 25 |     for i in range(len(list_csv)):
 26 |         text_label = np.concatenate((text_label, int(i) * np.ones(100, dtype=np.int32)))
 27 | 
 28 |     # list_num1增强版文本的标签
 29 |     text_labels = np.array([], dtype=np.int32)
 30 |     for i in range(len(list_csv)):
 31 |         text_labels = np.concatenate((text_labels, int(i) * np.ones(400, dtype=np.int32)))
 32 | 
 33 |     return list_text, text_label, text_labels
 34 | 
 35 | 
 36 | def load_data_set(data_path, power_data_path, text_data, power_text_data,
 37 |                   t_label, tt_label, num_classes=38, use_power_data=False, test_size=0.2):
 38 |     if use_power_data:
 39 |         # 数据集读取
 40 |         data_img = np.load(os.path.join(power_data_path, 'x_train3.npy')) / 255
 41 |         data_label = np.load(os.path.join(power_data_path, 'y_train3.npy'))
 42 | 
 43 |         # 分割训练集和测试集，按照7：3划分
 44 |         text_train, text_test, text_train_label, text_test_label = train_test_split(power_text_data, tt_label,
 45 |                                                                                     test_size=test_size,
 46 |                                                                                     random_state=5)
 47 |         image_train, image_test, image_train_label, image_test_label = train_test_split(data_img, data_label,
 48 |                                                                                         test_size=test_size,
 49 |                                                                                         random_state=5)
 50 |         # 标签独热化
 51 |         train_onehot_label = tf.keras.utils.to_categorical(image_train_label,
 52 |                                                            num_classes=num_classes)
 53 |         test_onehot_label = tf.keras.utils.to_categorical(image_test_label,
 54 |                                                            num_classes=num_classes)
 55 | 
 56 |         return text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label
 57 |     else:
 58 |         data_img = np.load(os.path.join(data_path, 'x_train3.npy')) / 255
 59 |         data_label = np.load(os.path.join(data_path, 'y_train3.npy'))
 60 | 
 61 |         text_train, text_test, text_train_label, text_test_label = train_test_split(text_data, t_label,
 62 |                                                                                     test_size=test_size,
 63 |                                                                                     random_state=5)
 64 |         image_train, image_test, image_train_label, image_test_label = train_test_split(data_img, data_label,
 65 |                                                                                         test_size=test_size,
 66 |                                                                                         random_state=5)
 67 |         train_onehot_label = tf.keras.utils.to_categorical(image_train_label,
 68 |                                                            num_classes=num_classes)
 69 |         test_onehot_label = tf.keras.utils.to_categorical(image_test_label,
 70 |                                                           num_classes=num_classes)
 71 | 
 72 |         return text_train, text_test, image_train, image_test, \
 73 |                train_onehot_label, test_onehot_label
 74 | 
 75 | 
 76 | # 需要划分训练集，验证集和测试集
 77 | def generate_method(image_path, text, label, test_size=0.01, val_size=0.1, fasion=False):
 78 |     # 划分训练集，测试集
 79 |     imgTrain, imgTest, label_img_Train, labe_img_Tst = train_test_split(image_path, label,
 80 |                                                                        test_size=test_size, random_state=5)
 81 | 
 82 |     txtTrain, txtTest, label_txt_Train, labe_txt_Tst = train_test_split(text, label,
 83 |                                                                        test_size=test_size, random_state=5)
 84 |     # 划分训练集，验证集
 85 |     imgTrain, imgVal, label_img_Train, labe_img_Val = train_test_split(imgTrain, label_img_Train,
 86 |                                                                        test_size=val_size, random_state=5)
 87 | 
 88 |     txtTrain, txtVal, label_txt_Train, labe_txt_Val = train_test_split(txtTrain, label_txt_Train,
 89 |                                                                        test_size=val_size, random_state=5)
 90 |     if fasion:
 91 |         Train_label_one_hot = tf.keras.utils.to_categorical(label_img_Train, num_classes=5)
 92 |         Val_label_one_hot = tf.keras.utils.to_categorical(labe_img_Val, num_classes=5)
 93 |         Tst_label_one_hot = tf.keras.utils.to_categorical(labe_img_Tst, num_classes=5)
 94 |     else:
 95 |         Train_label_one_hot = tf.keras.utils.to_categorical(label_img_Train, num_classes=38)
 96 |         Val_label_one_hot = tf.keras.utils.to_categorical(labe_img_Val, num_classes=38)
 97 |         Tst_label_one_hot = tf.keras.utils.to_categorical(labe_img_Tst, num_classes=38)
 98 | 
 99 |     return imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
100 |         Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot


--------------------------------------------------------------------------------
/compare.py:
--------------------------------------------------------------------------------
  1 | from load_data import load_data_set
  2 | from train_model import text_model, image_model
  3 | from evaluate import fx_calc_map_label
  4 | import tensorflow as tf
  5 | from tensorflow.keras.callbacks import EarlyStopping
  6 | from utils import *
  7 | from tensorflow.keras.callbacks import TensorBoard
  8 | from load_data import *
  9 | import numpy as np
 10 | from tensorflow.keras.layers import *
 11 | from tensorflow.keras.models import Model
 12 | import pickle
 13 | 
 14 | 
 15 | def batchGenimg(batch_size, image_path, label, original=True, multiple=4):
 16 |     """
 17 |     image_path:List() 待增强图像的路径
 18 |     text:经过word2vec处理过后的文本数据
 19 |     original:bool 是否增加原生态图像数量
 20 |     multiple:int 增加的数量
 21 |     """
 22 |     while True:
 23 |         imageBatch, labelBatch = [], []
 24 |         for i in range(batch_size):
 25 |             ImglittleBatch, labellittleBatch = [], []
 26 |             index = np.random.randint(0, len(image_path))
 27 |             if original:
 28 |                 image = augmentImage(image_path[index])
 29 |                 image = preProcessing(image)
 30 |                 imageBatch.append(image)
 31 |                 labelBatch.append(label[index])
 32 |             else:
 33 |                 for j in range(multiple):
 34 |                     image = augmentImage(image_path[index])
 35 |                     image = preProcessing(image)
 36 |                     ImglittleBatch.append(image)
 37 |                     labellittleBatch.append(label[index])
 38 |                 imageBatch += ImglittleBatch
 39 |                 labelBatch += labellittleBatch
 40 | 
 41 |         # 对列表中的元素加入随机性，打乱，固定打乱顺序
 42 |         state = np.random.get_state()
 43 |         np.random.shuffle(imageBatch)
 44 |         np.random.set_state(state)
 45 |         np.random.shuffle(labelBatch)
 46 |         yield np.asarray(imageBatch), np.asarray(labelBatch)
 47 | 
 48 | 
 49 | # 只进行文本检索
 50 | def txt_result(text_train, text_test, train_onehot_label, test_onehot_label,
 51 |                vocab_dim, n_symbols, embedding_weights, input_length):
 52 |     txt_pre, img_out = text_model(vocab_dim, n_symbols, embedding_weights, input_length)
 53 |     txt_pre.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 54 |     early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
 55 |     Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True)
 56 |     history_txt = txt_pre.fit(text_train, train_onehot_label, batch_size=32, epochs=30,
 57 |                               validation_split=0.2, verbose=1, callbacks=[early_stopping])
 58 |     lstm_feature = img_out.predict(text_test)
 59 |     f = open('./data/lstm_feature.pkl', 'wb')
 60 |     pickle.dump(lstm_feature, f)
 61 |     f.close()
 62 |     label = tf.argmax(test_onehot_label, axis=1)
 63 |     for R in [1, 5, 10]:
 64 |         result = fx_calc_map_label(lstm_feature, label, k=R)
 65 |         print(f'...只进行文本检索@{R} = MAP = {result}')
 66 | 
 67 | 
 68 | # 只进行图像检索
 69 | def img_result(text_data, t_label):
 70 |     imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
 71 |     Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \
 72 |         generate_method(LoadData(), text_data, t_label)
 73 | 
 74 |     model_pre, model_out = image_model()
 75 |     model_pre.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 76 |     early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
 77 |     history = model_pre.fit(batchGenimg(batch_size=2, image_path=imgTrain, label=Train_label_one_hot),
 78 |                             steps_per_epoch=2, epochs=20,
 79 |                             validation_data=batchGenimg(batch_size=1, image_path=imgVal,
 80 |                                                         label=Val_label_one_hot), validation_steps=1,
 81 |                             verbose=1, callbacks=[early_stopping])
 82 |     Batch_size = 3
 83 |     n = len(imgTest)
 84 |     img_feature = np.empty((n, 512), dtype=np.float32)
 85 |     num = n // Batch_size + 1  # 为了防止内存溢出
 86 |     for i in range(num):
 87 |         start = i * Batch_size
 88 |         end = (i + 1) * Batch_size
 89 |         end = min(end, n)
 90 |         print(f"正在进行{start} to {end}的预测...")
 91 |         feature = model_out.predict(Img_Txt(imgTest, txtTest, start, end)[0])
 92 |         img_feature[start:end] = feature
 93 |     label = tf.argmax(Tst_label_one_hot, axis=1)
 94 |     for R in [1, 5, 10]:
 95 |         result = fx_calc_map_label(img_feature, label, k=R)
 96 |         print(f'...只进行图像的检索@{R} = MAP = {result}')
 97 | 
 98 | 
 99 | # 经过word2vec后的检索精度
100 | def word_vec_test_mAP(text_test, vocab_dim, n_symbols, embedding_weights, input_length, test_onehot_label):
101 |     inputs = tf.keras.layers.Input(shape=25, name="text_input")
102 |     x = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True,
103 |                   weights=[embedding_weights],
104 |                   input_length=input_length, trainable=False)(inputs)
105 |     model = Model(inputs, x)
106 |     txt_feature = model.predict(text_test)
107 |     txt_feature = np.reshape(txt_feature, (len(text_test), -1))  # 注意resize和reshape的区别
108 |     f = open('./data/txt_feature.pkl', 'wb')
109 |     pickle.dump(txt_feature, f)
110 |     f.close()
111 |     label = tf.argmax(test_onehot_label, axis=1)
112 |     result = fx_calc_map_label(txt_feature, label)
113 |     print('...输入网络前的word2vec文本检索 MAP = {}'.format(result))
114 | 
115 | 
116 | def Image_Only(test_onehot_label):
117 |     f = open("./data/img_feature.pkl", "rb")
118 |     img_feature = pickle.load(f)
119 |     label = tf.argmax(test_onehot_label, axis=1)
120 |     for R in [1, 5, 10]:
121 |         result = fx_calc_map_label(img_feature, label, k=R)
122 |         print('...Image Only @{} MAP = {}'.format(R, result))
123 | 
124 | 
125 | def Text_Only(test_onehot_label):
126 |     f = open("./data/lstm_feature.pkl", "rb")
127 |     lstm_feature = pickle.load(f)
128 |     label = tf.argmax(test_onehot_label, axis=1)
129 |     for R in [1, 5, 10]:
130 |         result = fx_calc_map_label(lstm_feature, label, k=R)
131 |         print('...Text Only MAP @{} = {}'.format(R, result))
132 | 
133 | 
134 | def cross_loss(test_onehot_label):
135 |     f = open("./data/cross_loss.pkl", "rb")
136 |     cross_loss = pickle.load(f)
137 |     label = tf.argmax(test_onehot_label, axis=1)
138 |     for R in [1, 5, 10]:
139 |         result = fx_calc_map_label(cross_loss, label, k=R)
140 |         print('...cross loss @{} = {}'.format(R, result))
141 | 
142 | 
143 | def Ours(test_onehot_label):
144 |     f = open("./data/multi_feature.pkl", "rb")
145 |     multi_feature = pickle.load(f)
146 |     label = tf.argmax(test_onehot_label, axis=1)
147 |     for R in [1, 5, 10]:
148 |         result = fx_calc_map_label(multi_feature, label, k=R)
149 |         print('...Ours MAP @{} = {}'.format(R, result))
150 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import jieba
  3 | import random
  4 | import multiprocessing
  5 | from tqdm import tqdm
  6 | import sys
  7 | import matplotlib.pyplot as plt
  8 | from load_data import *
  9 | # from data_preprocess.make_dataset import *  # 制作数据集
 10 | from data_preprocess.generate_npy import *
 11 | from train_model import *
 12 | from evaluate import fx_calc_map_label
 13 | from utils import *
 14 | from compare import *
 15 | import pickle
 16 | from tensorflow.keras.callbacks import TensorBoard
 17 | 
 18 | 
 19 | np.set_printoptions(threshold=np.inf)
 20 | np.set_printoptions(suppress=True)
 21 | np.random.seed(1337)  # For Reproducibility
 22 | sys.setrecursionlimit(1000000)
 23 | cpu_count = multiprocessing.cpu_count()
 24 | parent_path = os.path.dirname(__file__)  # 获取当前文件路径
 25 | 
 26 | if __name__ == "__main__":
 27 |     CD = False   # 商品数据集
 28 |     Fasion_200k = False  # 因缺少硬件资源，所以未做实验,若切换此数据集需要将所有的38更改为5,You need to do it yourself
 29 |     vocab_dim = 100  # 词向量的维度
 30 |     n_iterations = 5  # ideally more..
 31 |     n_exposures = 3  # 所有频数超过3的词语
 32 |     window_size = 5
 33 |     input_length = 25  # 输入序列的长度
 34 |     max_len = 25  # 经过测试，每个句子的最大长度不超过21
 35 |     num_classes = 38  # 类别总数
 36 |     old_method = False  # 选择旧方法进行训练
 37 |     new_method = False  # 选择新方法训练
 38 |     draw = False
 39 |     compared = True  # 对比实验用
 40 |     out_dim = 512
 41 |     demonstration = True  # 演示用
 42 |     # weight_path = 'mnt/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
 43 |     text_load_path = './data/text'
 44 |     power_data_path = "./data/power_img_data_npy"
 45 |     data_path = "./data/original_img_data_npy"
 46 |     # 读入顺序，对应数据集每个类的顺序,同时对应文本的读取顺序
 47 |     list_name = ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', '毛呢大衣',
 48 |                  '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', '衬衫', '西装',
 49 |                  '风衣', '马甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋']
 50 |     # generate_npy(use_power_dataset=True)  # 生成npy格式图像数据和标签
 51 | 
 52 |     text_pre, t_label, tt_label = get_loader(text_load_path, list_name)  # 纯文本， 数据集标签， 增强数据集标签
 53 | 
 54 |     # 文本分词
 55 |     text_after = [jieba.lcut(document.replace('\n', '')) for document in text_pre]
 56 | 
 57 |     model = Word2Vec(size=vocab_dim,  # 建立一个空的模型对象，设置词向量的维度为100
 58 |                      min_count=n_exposures,
 59 |                      window=window_size,
 60 |                      workers=cpu_count,
 61 |                      iter=n_iterations)
 62 |     # text_data原文本数据(词向量的索引)， power_text_data增强文本数据(词向量的索引)
 63 |     w2indx, w2vec, text_data, power_text_data = text_w2model(model, text_after, max_len)
 64 | 
 65 |     print('You will succeed...')
 66 |     n_symbols, embedding_weights = get_data(w2indx, w2vec, vocab_dim)
 67 | 
 68 |     # 需要进行对比试验吗
 69 |     if compared:
 70 |         text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \
 71 |             load_data_set(data_path, power_data_path, text_data, power_text_data,
 72 |                           t_label, tt_label)
 73 |         if demonstration:
 74 |             Image_Only(test_onehot_label)
 75 |             Text_Only(test_onehot_label)
 76 |             cross_loss(test_onehot_label)
 77 |             Ours(test_onehot_label)
 78 |         else:
 79 |             word_vec_test_mAP(text_test, vocab_dim, n_symbols, embedding_weights, input_length, test_onehot_label)
 80 |             txt_result(text_train, text_test, train_onehot_label, test_onehot_label,
 81 |                        vocab_dim, n_symbols, embedding_weights, input_length)
 82 |             img_result(text_data, t_label)
 83 | 
 84 |     if old_method:
 85 |         # 训练集和测试集的划分
 86 |         text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \
 87 |             load_data_set(data_path, power_data_path, text_data, power_text_data,
 88 |                           t_label, tt_label, use_power_data=False)
 89 | 
 90 |         # 构建多模态模型
 91 |         multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
 92 |         # run_eagerly指示模型是否应急切运行的可设置属性,这对于自定义的损失函数和张量的流动很有用
 93 |         # 急切地运行意味着您的模型将逐步运行，就像 Python 代码一样。您的模型可能运行得较慢，但您应该更容易通过进入各个层调用来调试它。
 94 |         # 默认情况下，我们会尝试将您的模型编译为静态图以提供最佳执行性能
 95 |         # 默认为False. 如果True，thisModel的逻辑将不会被包装在 a 中tf.function。建议将其保留为None除非您Model无法在 tf.function. 使用时不支持
 96 |         multi_model.compile(loss=Myloss(), optimizer='adam', run_eagerly=True)
 97 | 
 98 |         early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
 99 |         history = multi_model.fit([image_train, text_train], train_onehot_label, batch_size=64, epochs=3,
100 |                                   validation_split=0.3,
101 |                                   verbose=1, callbacks=[early_stopping])
102 |     if new_method:
103 |         # 训练集：测试集 = 8：2
104 |         # 训练集：验证集 = 9：1
105 |         if CD:
106 |             imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
107 |             Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \
108 |                 generate_method(LoadData(), text_data, t_label)
109 | 
110 |             train_size = len(imgTrain)
111 |             val_size = len(imgVal)
112 | 
113 |             multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
114 | 
115 |             multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True)
116 |             Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True)
117 |         if Fasion_200k:
118 |             from fasion_utils import  *
119 | 
120 |             vocab_dim = 500
121 |             input_length = 15
122 |             num_classes = 5
123 | 
124 |             img_path, text_data, label, n_symbols, embedding_weights = initialization()
125 | 
126 |             imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
127 |             Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \
128 |                 generate_method(img_path, text_data, label, 0.3, 0.3, True)
129 | 
130 |             multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
131 | 
132 |             multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True)
133 |             Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True)
134 | 
135 |             history = multi_model.fit(BatchGen(batch_size=3, image_path=imgTrain,
136 |                                                text=txtTrain, label=Train_label_one_hot),
137 |                                       steps_per_epoch=2, epochs=2,
138 |                                       validation_data=BatchGen(batch_size=1, image_path=imgVal,
139 |                                                                text=txtVal, label=Val_label_one_hot),
140 |                                       validation_steps=2,verbose=1)
141 |         if CD:
142 |             early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
143 |             history = multi_model.fit(batchGen(batch_size=3, image_path=imgTrain,
144 |                                                text=txtTrain, label=Train_label_one_hot),
145 |                                       steps_per_epoch=2, epochs=2,
146 |                                       validation_data=batchGen(batch_size=1, image_path=imgVal,
147 |                                                text = txtVal, label=Val_label_one_hot),validation_steps=2,
148 |                                       verbose=1, callbacks=[early_stopping])
149 | 
150 |     # 提取测试集特征
151 |     if old_method:
152 |         multi_feature = multi_model.predict([imgTest, txtTest], batch_size=32)
153 |         label = tf.argmax(test_onehot_label, axis=1)
154 |         result = fx_calc_map_label(multi_feature[:,num_classes:], label)
155 |         print('...多模态图像检索 MAP = {}'.format(result))
156 |     if new_method:
157 |         Batch_size = 3
158 |         n = len(imgTest)
159 |         multi_feature = np.empty((n, out_dim), dtype=np.float32)
160 |         num = n // Batch_size + 1 # 为了防止内存溢出
161 |         for i in range(num):
162 |             start = i * Batch_size
163 |             end = (i + 1) * Batch_size
164 |             end = min(end, n)
165 |             print(f"正在进行{start} to {end}的预测...")
166 |             feature = multi_model.predict(Img_Txt(imgTest, txtTest, start, end))
167 |             multi_feature[start:end] = feature[:, num_classes:]
168 |         # mAP计算
169 |         label = tf.argmax(Tst_label_one_hot, axis=1)
170 |         result = fx_calc_map_label(multi_feature, label)
171 |         print('...多模态图像检索 MAP = {}'.format(result))
172 | 
173 |     if draw:
174 |         # 绘制训练和验证的损失图像
175 |         plt.plot(history.history['loss'])
176 |         plt.plot(history.history['val_loss'])
177 |         plt.title('model loss')
178 |         plt.ylabel('loss')
179 |         plt.xlabel('epoch')
180 |         plt.legend(['train', 'test'], loc='upper left')
181 |         plt.show()
182 | 


--------------------------------------------------------------------------------
/train_model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras.models import Model
  3 | from sklearn.model_selection import train_test_split
  4 | from gensim.models.word2vec import Word2Vec
  5 | from gensim.corpora.dictionary import Dictionary
  6 | from keras.preprocessing import sequence
  7 | from tensorflow.keras.applications.vgg16 import VGG16
  8 | from tensorflow.keras.layers import *
  9 | from keras import backend as K
 10 | from keras import regularizers
 11 | from tensorflow.keras.utils import plot_model
 12 | from tensorflow.keras.callbacks import EarlyStopping
 13 | from tensorflow.python.ops import math_ops
 14 | import numpy as np
 15 | import numpy
 16 | 
 17 | tf.keras.backend.set_floatx('float64')
 18 | 
 19 | 
 20 | def text_w2model(model, text_after, max_len):
 21 |     data = []
 22 |     model.build_vocab(text_after)  # input: list遍历一次语料库建立词典
 23 |     model.train(text_after, epochs=20, total_examples=model.corpus_count)  # 第2次遍历语料库简建立神经网络模型
 24 |     gensim_dict = Dictionary()
 25 |     gensim_dict.doc2bow(model.wv.vocab.keys(), allow_update=True)
 26 |     w2indx = {v: k + 1 for k, v in gensim_dict.items()}  # {'T恤':1, '一':2, '一体':3, ...}1580
 27 |     # w2vec = {'T恤':array([-0.10420721, -0.50772285,...])} 1580=词库大小
 28 |     w2vec = {word: model.wv[word] for word in w2indx.keys()}
 29 | 
 30 |     for sentence in text_after:
 31 |         new_txt = []
 32 |         for word in sentence:
 33 |             try:
 34 |                 new_txt.append(w2indx[word])
 35 |             except:
 36 |                 new_txt.append(0)
 37 |         data.append(new_txt)
 38 | 
 39 |     # pad_sequences函数是将序列转化为经过填充以后的一个长度相同的新序列
 40 |     # (3800, 25) 所有文本的词索引  [0, 0, ... , 167, 139]
 41 |     text_data = sequence.pad_sequences(data, maxlen=max_len)  # 大于此长度的序列将被截短，小于此长度的序列将在后部填0，默认为pre
 42 |     # 增强后，将源文本扩充四倍
 43 |     power_text_data = np.repeat(text_data, 4, axis=0)
 44 |     return w2indx, w2vec, text_data, power_text_data
 45 | 
 46 | 
 47 | def get_data(index_dict, word_vectors, vocab_dim):
 48 |     n_symbols = len(index_dict) + 1  # 所有单词的索引数，频数小于3的词语索引为0，所以加1
 49 |     embedding_weights = np.zeros((n_symbols, vocab_dim))  # 初始化索引为0的词语
 50 |     for word, index in index_dict.items():  # 0索引都为0，从索引为1的词语开始，对每个词对应一个词向量
 51 |         embedding_weights[index, :] = word_vectors[word]
 52 |     return n_symbols, embedding_weights
 53 | 
 54 | 
 55 | class TextNet(tf.keras.Model):
 56 |     def __init__(self, vocab_dim, n_symbols, embedding_weights, input_length):
 57 |         super(TextNet, self).__init__()
 58 |         # 一个重要的结论(坑)：在构造器中不可以初始化Input函数,否则会报错 TypeError: Expected float64 passed to parameter 'y' of op 'Equal',
 59 |         # got 'collections' of type 'str' instead. Error: Expected float64, got 'collections' of type 'str' instead.
 60 |         # self.inputs = Input(shape=25, name="text_input")
 61 |         # 在embedding层中将会执行，将输入的文本索引转化成索引对应的词向量，
 62 |         # 组成完整的句子，维度为(batch_size, 句子的长度, 词向量的维度)
 63 |         self.embedding = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True,
 64 |                                    weights=[embedding_weights],
 65 |                                    input_length=input_length, trainable=False)
 66 |         self.lstm = LSTM(64, activation='relu')
 67 |         self.dropout1 = Dropout(0.2)
 68 |         self.dense = Dense(512, activation='relu')
 69 |         self.dropout2 = Dropout(0.2)
 70 |     # 隐藏的坑，call函数中不要加入用不到的调用，会有警告
 71 |     def call(self, x, training=False, **kwargs):
 72 |         x = self.embedding(x)
 73 |         x = self.lstm(x)
 74 |         x = self.dropout1(x)
 75 |         x = self.dense(x)
 76 |         x = self.dropout2(x)
 77 |         return x
 78 | 
 79 | 
 80 | class ImgNet(tf.keras.Model):
 81 |     def __init__(self, weight="imagenet"):
 82 |         super(ImgNet, self).__init__()
 83 |         # self.inputs = Input(shape=(224, 224, 3))
 84 |         # VGG16函数返回Model
 85 |         self.conv_base = VGG16(include_top=False, weights=weight, input_shape=(224, 224, 3))
 86 |         for layer in self.conv_base.layers:
 87 |             layer.trainable = False
 88 |         self.flatten = Flatten()
 89 |         self.dense = Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))
 90 |         self.dropout = Dropout(0.2)
 91 | 
 92 |     def call(self, inputs, **kwargs):
 93 |         x = self.conv_base(inputs)
 94 |         x = self.flatten(x)
 95 |         x = self.dense(x)
 96 |         x = self.dropout(x)
 97 |         return x
 98 | 
 99 | 
100 | # 一个问题：dense_pre需要手动设置成分类的类别，不然会出现调用错误
101 | class MultiModel(tf.keras.Model):
102 |     def __init__(self, vocab_dim, n_symbols, embedding_weights, input_length,
103 |                  num_classes=38, weight="imagenet", out_dim=512):
104 |         super(MultiModel, self).__init__()
105 |         self.img_net = ImgNet(weight)
106 |         self.text_net = TextNet(vocab_dim, n_symbols, embedding_weights, input_length)
107 |         self.dense_256 = Dense(256, activation='relu')
108 |         self.dense_512 = Dense(512, activation='softmax')
109 |         self.dense_64 = Dense(64, activation='relu')
110 |         self.dense_1 = Dense(1, activation='sigmoid')
111 |         self.dense_2 = Dense(2, activation='softmax')
112 |         self.dense_l2 = Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))
113 |         self.dense_pre = Dense(38, activation='softmax')
114 |         self.dropout = Dropout(0.2)
115 |         self.multiply = Multiply()
116 |         self.add = Add()
117 |         self.concat_2 = Concatenate()
118 |         self.concat_1 = Concatenate(axis=1)
119 |         self.reshape = Reshape((2, 1))
120 |         self.permute = Permute((2, 1))
121 | 
122 |     # 类通道注意力和残差模块 --> 文本和图像特征均适用
123 |     def res_and_att_block(self, x):
124 |         y = self.dense_256(x)
125 |         y = self.dense_512(y)
126 |         y = self.multiply([y,x])
127 |         y = self.add([y, x])
128 |         return y
129 | 
130 |     # 图像和文本特征的权值模块
131 |     def weight_block(self, x):
132 |         y = self.dense_64(x)
133 |         y = self.dense_1(y)
134 |         return y
135 | 
136 |     # 获取图像和文本的权重
137 |     def get_weight_block(self, x, y):
138 |         w = self.concat_2([x, y])
139 |         w = self.dense_2(w)
140 |         w = self.reshape(w)
141 |         return w
142 | 
143 |     # 加权
144 |     def weighting(self, x, y, w):
145 |         # 转换原始图像和文本特征维度
146 |         def trans_space(x_dim):
147 |             x_dim = tf.expand_dims(x_dim, axis=-1)
148 |             x_dim = self.permute(x_dim)
149 |             return x_dim
150 | 
151 |         x = trans_space(x)
152 |         y = trans_space(y)
153 |         z = self.concat_1([x, y])
154 |         # 加权
155 |         z = self.multiply([w, z])  # 此处为(2, 1) • (2, 512) 点乘 ,此处batch_size不会影响乘积的结果
156 |         # 加权后将图像特征和文本特征连接
157 |         z = self.concat_2([z[:, 0], z[:, 1]])  # z(batch_size, 指定行, 所有列)
158 |         return z
159 | 
160 |     # call函数只能接受一个参数,但是这个参数可以是列表或者元组或者字典等形式
161 |     # 这里需要注意，默认在调用Model类时，输入model的参数为(batch_size,...)
162 |     def call(self, inputs, **kwargs):
163 |         res = []
164 |         x, y = inputs
165 |         img = self.img_net(x)
166 |         txt = self.text_net(y)
167 | 
168 |         x = self.res_and_att_block(img)
169 |         y = self.res_and_att_block(txt)
170 | 
171 |         img_y = self.weight_block(x)
172 |         txt_y = self.weight_block(y)
173 | 
174 |         w = self.get_weight_block(img_y, txt_y)
175 |         z = self.weighting(x, y, w)
176 | 
177 |         # 最后走一遍全连接层进行分类
178 |         z = self.dense_l2(z)
179 |         t = self.dropout(z)
180 |         pre = self.dense_pre(t)  # 38维
181 |         c = self.concat_2([pre, z])
182 | 
183 |         return c
184 | # 我的损失函数制作成功：
185 | # y_pred为网络预测，并且网络中的call函数只能输出一个参数，
186 | # 这个参数表示成列表或者元组等都不可以，但是可以将模型中的其他特征，可以是好几项通过tf.Concatenate()函数组合成统一的形式输出
187 | # 在构造的Loss子类中通过矩阵切片分别访问各个特征
188 | # 所以在通过模型预测测试集时，不要忘了输出指定特征
189 | # 这里在训练时还有一个坑，就是accurary，loss改变后同时需要自己重写metrics方法,需要定义你是想输出分类准确率还是其他的
190 | class Myloss(tf.keras.losses.Loss):
191 |     def __init__(self, name="Myloss"):
192 |         super().__init__(name=name)
193 |         self.cross_loss = tf.losses.CategoricalCrossentropy()
194 |         self.multiply = Multiply()  # 与此对应的是tf.matmul()叉乘
195 | 
196 |     def call(self, y_true, y_pred):
197 |         # 可以拉近同类样本
198 |         # def compute_loss(y_true, y_pred):
199 |         #     y_pred = y_pred.numpy()
200 |         #     y_true = y_true.numpy()
201 |         #     y_true = np.argmax(y_true, axis=1)
202 |         #     d_i = []
203 |         #     index = list(range(y_true.shape[0])) + list(range(y_true.shape[0]))
204 |         #     for i in range(len(index)):
205 |         #         for j in range(len(index)):
206 |         #             if index[i] != index[j] and y_true[index[i]] == y_true[index[j]] and \
207 |         #                     sorted([index[i], index[j]]) not in d_i:
208 |         #                 d_i.append([index[i], index[j]])
209 |         #         if len(d_i) == 0:
210 |         #             d_i.append([index[i], index[i]])
211 |         #     n = len(d_i)
212 |         #     loss = 0
213 |         #     for a, b in d_i:
214 |         #         loss += 1 * np.log(K.sum(1+np.exp(K.square(y_pred[a] - y_pred[b]))))
215 |         #     return loss
216 |         def compute_loss(y_true, y_pred):
217 |             y_pred = y_pred.numpy()
218 |             y_true = y_true.numpy()
219 |             y_true = np.argmax(y_true, axis=1)
220 |             d = []
221 |             index = list(range(y_true.shape[0])) + list(range(y_true.shape[0]))
222 |             for i in range(len(index)):
223 |                 d_i = []
224 |                 for j in range(len(index)):
225 |                     if index[i] != index[j] and y_true[index[i]] == y_true[index[j]] and \
226 |                             sorted([index[i], index[j]]) not in d_i:
227 |                         for k in range(len(index)):
228 |                             if index[i] != index[k]:
229 |                                 d_i.append([index[i], index[j], index[k]])
230 |                 np.random.shuffle(d_i)
231 |                 d += d_i[:3]
232 |             np.random.shuffle(d)
233 |             n = len(d)
234 |             loss = 0.0
235 |             triplet_count = 1.0
236 |             for i, j, k in d:
237 |                 w = 1.0
238 |                 triplet_count += w
239 |                 loss += w * np.log(1 +
240 |                                    np.exp(pairwise_distances(y_pred[index[i]], y_pred[index[j]]) -
241 |                                           pairwise_distances(y_pred[index[i]], y_pred[index[k]]), dtype=np.float128))
242 |             loss /= triplet_count
243 |             return loss
244 | 
245 |         def pairwise_distances(x, y):
246 |             dist = sigmod(K.sum(K.square(x - y)))
247 |             return tf.clip_by_value(dist, 0.0, np.inf)
248 | 
249 |         def calc_label_sim(label):
250 |             Sim = tf.matmul(label, tf.transpose(label))
251 |             return Sim
252 | 
253 |         def sigmod(x):
254 |             return tf.keras.activations.sigmoid(x)
255 | 
256 |         cross_loss = self.cross_loss(y_true, y_pred[:, :38])
257 |         same_class = compute_loss(y_true, y_pred[:, 38:])
258 |         # theta11 = tf.expand_dims(sigmod(K.sum(K.square(y_pred[:, 38:] - y_pred[:, 38:])
259 |         #                                , axis=1)), axis=-1)  # (batch_size, 1)
260 |         # print("theta11",theta11.shape)
261 |         # # 减去相同类
262 |         # losss = cross_loss - tf.matmul(calc_label_sim(y_true), theta11)
263 | 
264 |         return cross_loss+same_class
265 | 
266 | def class_metric(y_true, y_pred):
267 |     values = math_ops.cast(
268 |         math_ops.equal(
269 |             math_ops.argmax(y_true, axis=-1), math_ops.argmax(y_pred[:,:38], axis=-1)),
270 |         K.floatx())
271 |     return values
272 | 
273 | def text_model(vocab_dim, n_symbols, embedding_weights, input_length):
274 | 	x = tf.keras.layers.Input(shape=25, name="text_input")
275 | 	x1 = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True, weights=[embedding_weights],
276 | 				   input_length=input_length, trainable=False)(x)
277 | 	x2 = LSTM(64, activation='relu')(x1)
278 | 	x3 = Dropout(0.2)(x2)
279 | 	x4 = Dense(512, activation='relu')(x3)
280 | 	x5 = Dropout(0.2)(x4)
281 | 	x6 = Dense(38, activation="softmax")(x5)
282 | 	model_pre = Model(inputs=x, outputs=x6)
283 | 	model_512 = Model(x, x4)
284 | 	return model_pre, model_512
285 | 
286 | def image_model():
287 | 	conv_base = VGG16(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
288 | 	for layer in conv_base.layers:
289 | 		layer.trainable = False
290 | 	last = conv_base.output
291 | 	x = tf.keras.layers.Flatten()(last)
292 | 	x1 = tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
293 | 	x2 = tf.keras.layers.Dropout(0.2)(x1)
294 | 	x3 = Dense(38, activation="softmax")(x2)
295 | 	model_pre = Model(inputs=conv_base.input, outputs=x3)
296 | 	model_512 = Model(conv_base.input, x1)
297 | 	return model_pre, model_512


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
  1 | from gensim.models import Word2Vec
  2 | from sklearn.decomposition import PCA
  3 | import matplotlib.pyplot as plt
  4 | from sklearn.manifold import TSNE
  5 | import os
  6 | import multiprocessing
  7 | import matplotlib.cm as cm
  8 | from load_data import *
  9 | import numpy as np
 10 | import jieba
 11 | import matplotlib
 12 | from mpl_toolkits.mplot3d import Axes3D
 13 | from sklearn.model_selection import train_test_split
 14 | from utils import LoadData
 15 | from train_model import text_w2model
 16 | from all_colors import *
 17 | from time import time
 18 | from PIL import Image
 19 | import pickle
 20 | import wordcloud
 21 | 
 22 | plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
 23 | plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
 24 | 
 25 | cpu_count = multiprocessing.cpu_count()
 26 | parent_path = os.path.dirname(__file__)
 27 | 
 28 | vocab_dim = 100  # 词向量的维度
 29 | n_iterations = 5  # ideally more..
 30 | n_exposures = 3  # 所有频数超过3的词语
 31 | window_size = 5
 32 | input_length = 25  # 输入序列的长度
 33 | max_len = 25  # 经过测试，每个句子的最大长度不超过21
 34 | text_load_path = './data/text'
 35 | all_img = False
 36 | # 读入顺序，对应数据集每个类的顺序,同时对应文本的读取顺序
 37 | list_name = ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', '毛呢大衣',
 38 |              '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', '衬衫', '西装',
 39 |              '风衣', '马甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋']
 40 | 
 41 | text_pre, t_label, _ = get_loader(text_load_path, list_name)
 42 | text = [jieba.lcut(document.replace('\n', '')) for document in text_pre]
 43 | model = Word2Vec(size=vocab_dim,  # 建立一个空的模型对象
 44 |                  min_count=n_exposures,
 45 |                  window=window_size,
 46 |                  workers=cpu_count,
 47 |                  iter=n_iterations)
 48 | model.build_vocab(text)  # input: list遍历一次语料库建立词典
 49 | model.train(text, epochs=40, total_examples=model.corpus_count)  # 第2次遍历语料库简建立神经网络模型
 50 | 
 51 | imgTrain, imgTest, label_img_Train, labe_img_Tst = train_test_split(LoadData(), t_label,
 52 |                                                                     test_size=0.2, random_state=5)
 53 | imgTrain, imgVal, label_img_Train, labe_img_Val = train_test_split(imgTrain, label_img_Train,
 54 |                                                                    test_size=0.1, random_state=5)
 55 | _, text_jieba, _, _ = train_test_split(text_pre, t_label,
 56 |                                        test_size=0.2, random_state=5)
 57 | # l = labe_img_Tst
 58 | # index = [None] * 38
 59 | # for i in range(38):
 60 | #     index[i] = np.where(i == l)[0].shape[0]
 61 | # print(index)
 62 | # 测试集各个类别样本的数量
 63 | test_sizes = [21, 20, 18, 18, 23, 22, 24, 16, 19, 16, 20, 16, 18, 15, 20, 21,
 64 |               20, 21, 22, 20, 21, 17, 18, 21, 15, 22, 24, 18, 16, 13, 29, 18, 21,
 65 |               30, 19, 20, 20, 28]
 66 | all_sizes = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
 67 |              100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]
 68 | 
 69 | 
 70 | # 词的t-SNE显示
 71 | def t_SNE_2d():
 72 |     words_ak = []
 73 |     embeddings_ak = []
 74 |     for word in list(model.wv.vocab):
 75 |         embeddings_ak.append(model.wv[word])
 76 |         words_ak.append(word)
 77 | 
 78 |     tsne_ak_2d = TSNE(perplexity=50, n_components=2, init='pca', n_iter=3500, random_state=32)
 79 |     embeddings_ak_2d = tsne_ak_2d.fit_transform(embeddings_ak)
 80 | 
 81 |     def tsne_plot_2d(embeddings, words, a=1):
 82 |         plt.figure(figsize=(16, 9))
 83 |         colors = cm.rainbow(np.linspace(0, 1, 1))
 84 |         x = embeddings[:, 0]
 85 |         y = embeddings[:, 1]
 86 |         plt.scatter(x, y, c=colors, alpha=a, label="商品文本")
 87 |         for i, word in enumerate(words):
 88 |             plt.annotate(word, alpha=0.5, xy=(x[i], y[i]), xytext=(5, 2), color="black",
 89 |                          textcoords='offset points', ha='right', va='bottom', size=10, weight="medium")
 90 |         plt.legend(loc=4)
 91 |         plt.grid(True)
 92 |         plt.savefig("t_SNE_2d.png", format='png', dpi=150, bbox_inches='tight')
 93 |         plt.show()
 94 | 
 95 |     tsne_plot_2d(embeddings_ak_2d, words=words_ak)
 96 | 
 97 | 
 98 | # 词的t-SNE显示
 99 | def t_SNE_3d():
100 |     words_wp = []
101 |     embeddings_wp = []
102 |     for word in list(model.wv.vocab):
103 |         embeddings_wp.append(model.wv[word])
104 |         words_wp.append(word)
105 |     tsne_wp_3d = TSNE(perplexity=30, n_components=3, init='pca', n_iter=3500, random_state=12)
106 |     embeddings_wp_3d = tsne_wp_3d.fit_transform(embeddings_wp)
107 | 
108 |     def tsne_plot_3d(title, embeddings, a=1):
109 |         fig = plt.figure()
110 |         ax = Axes3D(fig)
111 |         colors = cm.rainbow(np.linspace(0, 1, 1))
112 |         plt.scatter(embeddings[:, 0], embeddings[:, 1], embeddings[:, 2], c=colors, alpha=a, label="文本词")
113 |         plt.legend(loc=4)
114 |         plt.title(title)
115 |         plt.show()
116 | 
117 |     tsne_plot_3d('商品文本', embeddings_wp_3d, a=0.1)
118 | 
119 | 
120 | # 词的t-SNE显示
121 | def PCA():
122 |     # 基于2d PCA拟合数据
123 |     X = model[model.wv.vocab]
124 |     pca = PCA(n_components=2)
125 |     result = pca.fit_transform(X)
126 |     # 可视化展示
127 |     plt.scatter(result[:, 0], result[:, 1])
128 |     words = list(model.wv.vocab)
129 |     for i, word in enumerate(words):
130 |         plt.annotate(word, xy=(result[i, 0], result[i, 1]))
131 |     plt.show()
132 | 
133 | 
134 | # 初始数据集的t-SNE显示，包括训练集和测试集全部数据集的显示也可以
135 | def get_data(img_path, label, img_size=100):
136 |     Img = Img = np.empty((len(img_path), 3 * img_size * img_size), dtype=np.float64)
137 |     for i in range(len(img_path)):
138 |         img = Image.open(img_path[i])
139 |         img = img.resize((img_size, img_size))
140 |         img = np.reshape(img, (1, -1))
141 |         Img[i - 1] = img / 255.0
142 |     return Img, label
143 | 
144 | 
145 | def plot_embedding(data, label, title):
146 |     x_min, x_max = np.min(data, 0), np.max(data, 0)
147 |     data = (data - x_min) / (x_max - x_min)
148 | 
149 |     fig = plt.figure()
150 |     ax = plt.subplot(111)
151 |     for i in range(data.shape[0]):
152 |         plt.scatter(data[i, 0], data[i, 1], color=color_names[label[i] + 40])
153 |         # plt.text(data[i, 0], data[i, 1], str(label[i]),
154 |         #          color=plt.cm.Set1(label[i] / 10.),
155 |         #          fontdict={'weight': 'bold', 'size': 9})
156 |     plt.xticks([])
157 |     plt.yticks([])
158 |     plt.title(title)
159 |     return fig
160 | 
161 | 
162 | def feature_t_sne(data, label):
163 |     print('Computing t-SNE embedding')
164 |     tsne = TSNE(n_components=2, init='pca', random_state=0)
165 |     t0 = time()
166 |     result = tsne.fit_transform(data)
167 |     fig = plot_embedding(result, label,
168 |                          'T-sne embedding of text (time %.2fs)'
169 |                          % (time() - t0))
170 |     plt.show(fig)
171 | 
172 | 
173 | # 原始测试集t-SNE分布
174 | def test_img_set_t_sne(imgTest, labe_img_Tst):
175 |     data, label = get_data(imgTest, labe_img_Tst)
176 |     print('Computing t-SNE embedding')
177 |     tsne = TSNE(n_components=2, init='pca', random_state=0)
178 |     t0 = time()
179 |     result = tsne.fit_transform(data)
180 |     fig = plot_embedding(result, label,
181 |                          'T-sne embedding of test set images (time %.2fs)'
182 |                          % (time() - t0))
183 |     plt.show(fig)
184 | 
185 | 
186 | # test_img_set_t_sne(imgTest, labe_img_Tst)
187 | 
188 | 
189 | # 测试集，图文特征的t-SNE
190 | def test_multi_feature():
191 |     f = open(os.path.join(parent_path, "data/multi_feature.pkl"), "rb")
192 |     multi_feature = pickle.load(f)
193 |     feature_t_sne(multi_feature, labe_img_Tst)
194 | 
195 | 
196 | # test_multi_feature()
197 | 
198 | def test_img_feature():
199 |     f = open(os.path.join(parent_path, "data/img_feature.pkl"), "rb")
200 |     img_feature = pickle.load(f)
201 |     feature_t_sne(img_feature, labe_img_Tst)
202 | 
203 | 
204 | # test_img_feature()
205 | 
206 | def test_txt_feature():
207 |     f = open(os.path.join(parent_path, "data/txt_feature.pkl"), "rb")
208 |     txt_feature = pickle.load(f)
209 |     feature_t_sne(txt_feature, labe_img_Tst)
210 | 
211 | 
212 | # test_txt_feature()
213 | 
214 | def test_txt_lstm_feature():
215 |     f = open(os.path.join(parent_path, "data/lstm_feature.pkl"), "rb")
216 |     lstm_feature = pickle.load(f)
217 |     feature_t_sne(lstm_feature, labe_img_Tst)
218 | 
219 | 
220 | # test_txt_lstm_feature()
221 | 
222 | 
223 | # 词云展示
224 | def test_all_text_word_wordcloud():
225 |     print("开始绘制词云...")
226 |     # from scipy.misc import imread
227 |     # mask = imread("fivestart.jpg")自动图片的显示
228 |     txt_path = os.path.join(parent_path, "data/txt")
229 |     big_txt = ""
230 |     for i in os.listdir(txt_path):
231 |         Path = os.path.join(txt_path, i)
232 |         f = open(Path, "r", encoding="utf-8")
233 |         txt = f.read()
234 |         big_txt += txt
235 |     ls = jieba.lcut(big_txt)
236 |     txt = " ".join(ls)
237 |     wc = wordcloud.WordCloud(font_path="msyh.ttc", \
238 |                              width=2500, height=1500, background_color="white", max_words=1000)
239 |     wc.generate(txt)
240 |     # w.to_file("grwordcloud.png")
241 |     plt.imshow(wc)
242 |     plt.axis("off")
243 |     plt.show()
244 | 
245 | 
246 | # test_all_text_word_wordcloud()
247 | 
248 | def test_txt_word_cloud(text_jieba):
249 |     print("开始绘制词云...")
250 |     big_txt = ""
251 |     for i in text_jieba:
252 |         big_txt = big_txt + i + "\n"
253 |     ls = jieba.lcut(big_txt)
254 |     txt = " ".join(ls)
255 |     wc = wordcloud.WordCloud(font_path="msyh.ttc", \
256 |                              width=2500, height=1500, background_color="white", max_words=300)
257 |     wc.generate(txt)
258 |     # w.to_file("grwordcloud.png")
259 |     plt.imshow(wc)
260 |     plt.axis("off")
261 |     plt.show()
262 | # test_txt_word_cloud(text_jieba)
263 | 
264 | 
265 | def test_set_pie_chart(sizes=test_sizes):
266 |     print("开始绘制饼状图...")
267 |     patches, l_text, p_text = plt.pie(sizes, labels=list_name, colors=color_name_38,
268 |                                       labeldistance=1.1, autopct='%2.0f%%', shadow=False,
269 |                                       startangle=90, pctdistance=0.8)
270 | 
271 |     # labeldistance，文本的位置离远点有多远，1.1指1.1倍半径的位置
272 |     # autopct，圆里面的文本格式，%3.1f%%表示小数有三位，整数有一位的浮点数
273 |     # shadow，饼是否有阴影
274 |     # startangle，起始角度，0，表示从0开始逆时针转，为第一块。一般选择从90度开始比较好看
275 |     # pctdistance，百分比的text离圆心的距离
276 |     # patches, l_texts, p_texts，为了得到饼图的返回值，p_texts饼图内部文本的，l_texts饼图外label的文本
277 | 
278 |     # 改变文本的大小
279 |     # 方法是把每一个text遍历。调用set_size方法设置它的属性
280 |     for t in l_text:
281 |         t.set_size = 30
282 |     for t in p_text:
283 |         t.set_size = 20
284 |     # 设置x，y轴刻度一致，这样饼图才能是圆的
285 |     plt.axis('equal')
286 |     plt.legend(loc='upper left', bbox_to_anchor=(-0.1, 1))
287 |     # loc: 表示legend的位置，包括'upper right','upper left','lower right','lower left'等
288 |     # bbox_to_anchor: 表示legend距离图形之间的距离，当出现图形与legend重叠时，可使用bbox_to_anchor进行调整legend的位置
289 |     # 由两个参数决定，第一个参数为legend距离左边的距离，第二个参数为距离下面的距离
290 |     plt.title('Data set pie chart', loc="right", fontsize="xx-large")
291 |     plt.grid()
292 |     plt.show(True)
293 | # test_set_pie_chart(sizes=all_sizes)
294 | 
295 | 
296 | # 文本检索mAP最终结果
297 | text_mAP = [0.4092, 0.5894, 0.6357]
298 | img_mAP = [0.4000, 0.5608, 0.6367]
299 | cross_loss = [0.43605, 0.63707, 0.6769]
300 | c_t_mAP = [0.4520, 0.6470, 0.7068]
301 | 
302 | 
303 | def test_result_Histogram():
304 |     mark = 0.15
305 |     width = 0.15
306 |     R = ["@1", "@5", "@10"]
307 |     Image_only = [0.4092, 0.5894, 0.6357]
308 |     Text_only = [0.4000, 0.5608, 0.6367]
309 |     cross_loss = [0.4360, 0.6370, 0.6769]
310 |     cross_and_triplet_loss = [0.4520, 0.6470, 0.7068]
311 |     # 创建分组柱状图，需要自己控制x轴坐标
312 |     xticks = np.arange(len(R))
313 | 
314 |     fig, ax = plt.subplots(figsize=(10, 9))
315 | 
316 |     ax.bar(xticks, Image_only, width=width, label="Text_only", color="royalblue")
317 |     ax.bar(xticks + mark, Text_only, width=width, label="Image_only", color="gray")
318 |     ax.bar(xticks + 2*mark, cross_loss, width=width, label="cross_loss", color="burlywood")
319 |     ax.bar(xticks + 3*mark, cross_and_triplet_loss, width=width, label="cross_and_triplet_loss", color="darkblue")
320 | 
321 |     # 需要你将每个组的起始坐标写到coordinate中，并且将所有点以列表的形式重新添加到ret中
322 |     coordinate = [0.0, 1.0, 2.0]
323 |     ret = [[0.400, 0.5608, 0.6367], [0.4092, 0.5894, 0.6357], [0.4360, 0.6370, 0.6769],
324 |            [0.4520, 0.6470, 0.7068]]
325 | 
326 |     for i in range(len(ret[0])):
327 |         margin = 0
328 |         for j in range(len(ret)):
329 |             xy = (coordinate[i] + margin, ret[j][i] * (1 + j / 200))
330 |             s = str(ret[j][i])
331 |             ax.annotate(
332 |                 s=s,  # 要添加的文本
333 |                 xy=xy,  # 将文本添加到哪个位置
334 |                 fontsize=10,  # 标签大小
335 |                 color="black",  # 标签颜色
336 |                 ha="center",  # 水平对齐
337 |                 va="baseline"  # 垂直对齐
338 |             )
339 |             margin += mark
340 |     # ax.set_title("Grouped Bar plot", fontsize=15)
341 |     ax.set_ylabel("mAP")
342 |     # ax.set_xlabel("返回样本数")
343 |     ax.legend()
344 |     ax.set_xticks(xticks + 0.2)
345 |     ax.set_xticklabels(R)
346 |     plt.show()
347 | # test_result_Histogram()
348 | 
349 | # 折线图
350 | def Line_chart():
351 |     R = ["@1", "@5", "@10"]
352 |     Image_only = [0.4092, 0.5894, 0.6357]
353 |     Text_only = [0.4000, 0.5608, 0.6367]
354 |     cross_loss = [0.4360, 0.6370, 0.6769]
355 |     cross_and_triplet_loss = [0.4520, 0.6470, 0.7068]
356 |     ret = [[0.400, 0.5608, 0.6367], [0.4092, 0.5894, 0.6357], [0.4360, 0.6370, 0.6769],
357 |            [0.4520, 0.6470, 0.7068]]
358 | 
359 |     color = ['red', 'yellow', 'green', 'blue', 'black']
360 |     fig = plt.figure(figsize=(7, 4))
361 |     for i in range(len(ret)):
362 |         plt.plot(range(3), ret[i], c=color[i])
363 |     plt.legend = ('upper left')
364 |     plt.xlabel('Month')
365 |     plt.ylabel('Rate')
366 |     plt.title('Rate to Month')
367 |     plt.tick_params(axis='both')
368 |     plt.show()


--------------------------------------------------------------------------------