├── data_preprocess ├── __init__.py ├── text_process.py ├── make_dataset.py ├── make_power_data.py └── generate_npy.py ├── image ├── picture.png └── result.png ├── requirments.txt ├── README.md ├── evaluate.py ├── stop_words.py ├── fasion_utils.py ├── fasion_dataset.py ├── all_colors.py ├── utils.py ├── load_data.py ├── compare.py ├── main.py ├── train_model.py └── draw.py /data_preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image/picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redysky/multimodel/HEAD/image/picture.png -------------------------------------------------------------------------------- /image/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redysky/multimodel/HEAD/image/result.png -------------------------------------------------------------------------------- /requirments.txt: -------------------------------------------------------------------------------- 1 | python~=3.8.1 2 | numpy~=1.18.5 3 | jieba~=0.42.1 4 | matplotlib~=3.2.1 5 | tqdm~=4.46.0 6 | pandas~=1.0.3 7 | opencv-python~=4.3.0.38 8 | sklearn~=0.0 9 | scikit-learn~=0.23.1 10 | imgaug~=0.4.0 11 | Pillow~=8.2.0 12 | scipy~=1.4.1 13 | tensorflow~=2.2.0 14 | gensim~=3.8.3 15 | Keras~=2.3.1 16 | chardet~=3.0.4 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multimodal commodity image retrieval 2 | ## 多模态商品图像检索 3 | Not finished yet... 4 | 5 | ## introduce 6 | explain:The specific description of the project and the product image data set will be supplemented in the future. Welcome to star in advance 7 | 8 | ![image text](https://github.com/redysky/multimodel/blob/master/image/picture.png) 9 | 10 | 使用商品图像数据集的检索结果mAP 11 | ![image text](https://github.com/redysky/multimodel/blob/master/image/result.png) 12 | 13 | ## CD 商品图像数据集 (https://cs.hrbcu.edu.cn/info/1267/1416.htm) 14 | 并提供二进制文件(https://drive.google.com/drive/folders/1Ch3Y9Tek5MQyXLYeJpWQ1oe_YcwNf5c_?usp=sharing) 15 | ## Fasion-200k 16 | 需要初始化path和label_path,运行fasion_dataset.py将会得到训练集和测试集的图片路径,所有过滤后的文本数据以及标签(https://www.kaggle.com/mayukh18/fashion200k-dataset) 17 | 18 | ## in addition 19 | ``` 20 | python main.py 21 | ``` 22 | 23 | 所有需要的包都在requirments.txt, 代码中包含了众多注释,你可以在其中发现他们 24 | All required packages are in requirements.txt The code contains many comments, which you can find in them 25 | 26 | # 如果觉得还错欢迎star 27 | If you have any questions, please contact me 28 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.spatial 3 | 4 | 5 | # 结果数组中的第一行数据表示的是image数组中第一个元素点与image数组中各个元素点的距离,计算两点之间的距离 6 | def fx_calc_map_label(image, label, k=10, dist_method='L2'): 7 | if dist_method == 'L2': 8 | dist = scipy.spatial.distance.cdist(image, image, 'euclidean') 9 | elif dist_method == 'COS': 10 | dist = scipy.spatial.distance.cdist(image, image, 'cosine') 11 | 12 | ord = dist.argsort() 13 | numcases = dist.shape[0] 14 | res = [] # mAP,测试集100个样本的平均准确率 15 | for i in range(numcases): # 所有行的循环,待检索图像/文本的循环 16 | order = ord[i] 17 | p = 0.0 # 精度,分母为当前返回的图像个数,大白话 --> 返回的7张图像中有来自同一类的个数 18 | r = 0.0 # 计数,在数据库中,与当前待检索数据库中当前待检索图像,同一类的图像个数 19 | 20 | for j in range(1, k+1): # 被检索数据库的循环,这里的一个坑,不要将自身纳入待检索库中 21 | if label[i] == label[order[j]]: 22 | r += 1 23 | p += (r / (j + 1)) 24 | if r > 0: 25 | res += [p / r] 26 | else: 27 | res += [0] 28 | return np.mean(res) 29 | 30 | 31 | if __name__ == "__main__": 32 | img = np.random.randint(0, 100, (10, 3)) 33 | label = np.random.randint(0, 4, 100) 34 | print(fx_calc_map_label(img, label)) 35 | -------------------------------------------------------------------------------- /data_preprocess/text_process.py: -------------------------------------------------------------------------------- 1 | #文本预处理的程序---去停用词,处理后转存到一个文件夹中--text 2 | from collections import Counter 3 | import jieba 4 | import os 5 | import chardet 6 | 7 | 8 | def text_process(): 9 | cur_path = os.path.dirname(__file__) # 获取当前文件路径 10 | parent_path = os.path.dirname(cur_path) # 获取当前文件夹父目录 11 | stop_list = "./data/去停用词.txt" 12 | # 需要自己制作一个所有文本(txt)的集合,就是放在一个文件夹(o_path)中 13 | o_path = os.path.join(parent_path, r'data//Unprocessed_text/') 14 | # 最终的文本数据路径 15 | f_path=os.path.join(parent_path, 'data/text/') 16 | outstr='' 17 | stopwords=[' ','(',')','」','「',"'",'^','|'] 18 | for line in open(stop_list,encoding='utf-8',errors='ignore').readlines(): 19 | stopwords.append(line.strip()) 20 | 21 | list_text=[] 22 | for i in os.listdir(o_path): 23 | list_text.append(i) 24 | # print(list_text) 25 | 26 | for i in range(len(list_text)): 27 | if chardet.detect(open(os.path.join(o_path+list_text[i]),'rb').read())['encoding']=='GB2312': 28 | for line in open(os.path.join(o_path+list_text[i]),encoding='ansi',errors='ignore'): 29 | for word in line: 30 | if word not in stopwords: 31 | if word !='\t': 32 | outstr += word 33 | open(os.path.join(f_path+list_text[i]),'w',encoding='utf-8-sig').write(outstr+'\n') 34 | outstr='' 35 | else: 36 | for line in open(os.path.join(o_path+list_text[i]),encoding='utf-8-sig',errors='ignore'): 37 | for word in line: 38 | if word not in stopwords: 39 | if word !='\t': 40 | outstr += word 41 | open(os.path.join(f_path+list_text[i]),'w',encoding='utf-8-sig').write(outstr+'\n') 42 | outstr='' 43 | 44 | 45 | text_process() -------------------------------------------------------------------------------- /stop_words.py: -------------------------------------------------------------------------------- 1 | # 根据需要酌情删减 2 | stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 3 | 'yourselves', 'he', 4 | 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 5 | 'theirs', 6 | 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 7 | 'were', 'be', 8 | 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 9 | 'but', 'if', 10 | 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 11 | 'into', 12 | 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 13 | 'off', 'over', 14 | 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 15 | 'both', 16 | 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 17 | 'than', 'too', 18 | 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 19 | 'ain', 'aren', 20 | 'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 21 | 'shouldn', 'wasn', 22 | 'weren', 'won', 'wouldn', "", "ab"] 23 | symbol = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "/", "\\", "<", ">", "~", "@", "#", "$", "%", "^", 24 | "&", "*", ".", ":", "'", ";", "?", "-", "+", "=", "`", "!"] 25 | -------------------------------------------------------------------------------- /fasion_utils.py: -------------------------------------------------------------------------------- 1 | from fasion_dataset import * 2 | import numpy as np 3 | import pandas as pd 4 | import os 5 | import matplotlib.pyplot as plt 6 | from sklearn.utils import shuffle 7 | import matplotlib.image as mpimg 8 | from imgaug import augmenters as iaa 9 | import cv2 as cv 10 | from sklearn.model_selection import train_test_split 11 | from PIL import Image 12 | from train_model import get_data 13 | from train_model import text_w2model 14 | from sklearn.model_selection import train_test_split 15 | from load_data import generate_method 16 | import matplotlib.image as mpimg 17 | 18 | cpu_count = multiprocessing.cpu_count() 19 | 20 | 21 | def initialization(): 22 | img_path, txt_data, label, all_words = Fashion_200k(path, label_path) 23 | model = Word2Vec(size=500, # 建立一个空的模型对象,设置词向量的维度为100 24 | min_count=5, # 频数 25 | window=3, # 窗口大小 26 | workers=cpu_count, 27 | iter=5) 28 | w2indx, w2vec, text_data, _ = text_w2model(model, all_words, max_len=15) 29 | n_symbols, embedding_weights = get_data(w2indx, w2vec, vocab_dim=500) 30 | return img_path, text_data, label, n_symbols, embedding_weights 31 | 32 | 33 | def Processing(imgPath): 34 | img = mpimg.imread(imgPath) 35 | img = cv.resize(img, (224, 224)) 36 | img = img / 255 37 | return img 38 | 39 | 40 | def BatchGen(batch_size, image_path, text, label): 41 | while True: 42 | imageBatch, textBatch, labelBatch = [], [], [] 43 | for _ in range(batch_size): 44 | index = np.random.randint(0, len(image_path)) 45 | image = Processing(image_path[index]) 46 | imageBatch.append(image) 47 | textBatch.append(text[index]) 48 | labelBatch.append(label[index]) 49 | 50 | # 对列表中的元素加入随机性,打乱,固定打乱顺序 51 | state = np.random.get_state() 52 | np.random.shuffle(imageBatch) 53 | np.random.set_state(state) 54 | np.random.shuffle(textBatch) 55 | np.random.set_state(state) 56 | np.random.shuffle(labelBatch) 57 | yield (np.asarray(imageBatch), np.asarray(textBatch)), np.asarray(labelBatch) 58 | -------------------------------------------------------------------------------- /data_preprocess/make_dataset.py: -------------------------------------------------------------------------------- 1 | # 数据集的图片处理---包括将所有图片排号和将所有图片转换为JPG格式 2 | # 对原始数据集的处理,将其转变为这种形式 图片(1,2,3 ……total_img), 3 | # 并存储到final_path中,得到的是原始图像集,和增强后的图像集 4 | import os 5 | from PIL import Image 6 | import cv2 as cv 7 | # from make_power_data import * 8 | # from generate_npy import * 9 | 10 | 11 | def make_dataset(): 12 | # 两步 13 | # 1.定位到每一张图片源路径 14 | # 2.遍历每一张图片转换格式并保存 15 | cur_path = os.path.dirname(__file__) # 获取当前文件路径 16 | parent_path = os.path.dirname(cur_path) # 获取当前文件夹父目录 17 | f_path = os.path.join(parent_path,r'商品数据集') # 源文件位置,使用时需要将 数据集描述.txt删除 18 | s_path = os.path.join(parent_path,r'E:/aaa') # mkdir() #按自己的要求实现的保存路径(不用管) 19 | final_path = os.path.join(parent_path,r'data/img') # 需要保存的路径 20 | list_1 = os.listdir(f_path) 21 | list_ = [] 22 | total_list = [] 23 | for i in range(len(list_1)): 24 | s_1 = os.path.join(f_path, list_1[i]) # 一级目录 25 | list_2 = os.listdir(s_1) # 这里定位到每个类顺序是随机的 26 | total_list += list_2 27 | for j in range(len(list_2)): 28 | s_2 = os.path.join(s_1, list_2[j]) 29 | s_3 = os.path.join(s_2, '图像') # 定位到图像 30 | list_3 = os.listdir(s_3) 31 | list_3.sort(key=lambda x: int(x[:-4])) 32 | for k in range(100): 33 | s_4 = os.path.join(s_3, list_3[k]) 34 | list_.append(s_4) ##定位到每一张图片的路径 35 | 36 | # s_5 = os.path.join(s_path,list_2[j]) #s_path的保存路径 37 | # os.makedirs(s_5) 38 | count = 0 39 | for s in range(len(total_list)): 40 | wait_save_path = os.path.join(s_path, total_list[s]) 41 | for a in range(s * 100, s * 100 + 100): 42 | img = Image.open(list_[a]) # 待保存图片的完整路径 43 | if Image.open(list_[a]).format == 'PNG': # 将所有的RGBA图片转换为RGB 44 | ss = Image.open(list_[a]).convert('RGB') 45 | img.save(final_path + '/' + str(a + 1) + '.jpg') # .save()的格式为路径+需要保存的图像的名称 46 | print(final_path + '/' + str(a + 1) + '.jpg', '已保存') 47 | else: 48 | pass 49 | img.save(final_path + '/' + str(a + 1) + '.jpg') 50 | print(final_path + '/' + str(a + 1) + '.jpg', '已保存') 51 | print("每个类的顺序:",total_list) 52 | # 这里打印存储的每个类的顺序,对应main.py中的list_name 53 | # ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', 54 | # '毛呢大衣', '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', 55 | # '衬衫', '西装', '风衣', '马 甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', 56 | # '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋'] 57 | 58 | 59 | # 生成原始图像集 60 | make_dataset() 61 | # # 生成增强图像集 62 | # make_data_power() 63 | 64 | -------------------------------------------------------------------------------- /fasion_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import numpy as np 4 | from stop_words import * 5 | from tqdm import tqdm 6 | from train_model import text_w2model 7 | from gensim.models.word2vec import Word2Vec 8 | from gensim.corpora.dictionary import Dictionary 9 | import multiprocessing 10 | 11 | 12 | path = r'E:\数据集\Fashion-200k' # 数据集路径 13 | label_path = "labels/labels" # 标签路径 14 | 15 | 16 | def Fashion_200k(path, label_path, split="train"): 17 | all_label_path = os.path.join(path, label_path) 18 | all_name = os.listdir(all_label_path) 19 | all_words = [] # 所有文本的词 20 | all_txt = [] # 文本数据 21 | label_data = [] # 标签 22 | i = 0 # 总类 = 5 23 | all_img_path = [] # 所有图像的路径 24 | max_len = 0 # 训练集最大 = 13 ,测试集最大 = 12 25 | count = 1 # 计数用 26 | for name in all_name: 27 | if name.split("_")[1] == split: # dress_train_detect_all 28 | print(f"正在操作{name}...") 29 | with open(os.path.join(all_label_path, name), "r", encoding='utf-8') as f: 30 | contents = f.readlines() 31 | for content in contents: 32 | print(f"正在读取Fasion-200k 第 {count} 行...") 33 | value = content.split() 34 | tmp = [] # 需要先初始化,保存每一段去停用词后的文本 35 | # 先去停用词后将所有词保存至列表,并保存文本序列 36 | txt = " ".join(value[2:]) # 字符串 # .gray's delaney crochet sleeve dress 37 | for j in symbol: 38 | txt = txt.replace(j, " ") # 干净的字符串 39 | txt_list = txt.split(" ") # ['', 'gray', 's', 'delaney', 'crochet', 'sleeve', 'dress'] 40 | for word in txt_list: # 这里需要保证读取顺序 41 | if word not in stop_words: 42 | tmp.append(word) 43 | all_words.append(tmp) # 分词后的每一段文本一个列表,等待word2vec转换 44 | all_txt.append(" ".join(tmp)) 45 | label_data.append(i) 46 | all_img_path.append(os.path.join(path, value[0])) 47 | count += 1 48 | i += 1 49 | return all_img_path, all_txt, label_data, all_words 50 | 51 | 52 | if __name__ == "__main__": 53 | cpu_count = multiprocessing.cpu_count() 54 | img_path, txt_data, label, all_words = Fashion_200k(path, label_path) 55 | # model = Word2Vec(size=500, # 建立一个空的模型对象,设置词向量的维度为100 56 | # min_count=5, # 频数 57 | # window=3, # 58 | # workers=cpu_count, 59 | # iter=5) 60 | # w2indx, w2vec, text_data, power_text_data = text_w2model(model, all_words, max_len=15) 61 | # 62 | # print("最大长度为", max_len) 63 | # print("img_path", len(img_path)) 64 | # print(f"txt {len(txt_data)}") 65 | # print("label", len(label)) 66 | # print("词", len(all_words)) 67 | # print(all_words[:3]) 68 | -------------------------------------------------------------------------------- /data_preprocess/make_power_data.py: -------------------------------------------------------------------------------- 1 | # 功能:将原始数据集中的图片使用ImageDataGenerator增强,将增强后的图像保存至save_path中 2 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 3 | import numpy as np 4 | import os 5 | from PIL import Image 6 | from tqdm import tqdm 7 | 8 | 9 | def make_data_power(): 10 | cur_path = os.path.dirname(__file__) # 获取当前文件路径 11 | parent_path = os.path.dirname(cur_path) # 获取当前文件夹父目录 12 | tmp_path = 'data/Temporary_folder' # # 临时文件夹 13 | train_path = os.path.join(parent_path,tmp_path) 14 | if not os.path.exists(train_path): 15 | os.makedirs(train_path) 16 | start_file = os.path.join(parent_path,'data/img') # 原始图片集文件夹 17 | 18 | number = 3 # 增强数目 19 | save_path = os.path.join(parent_path, 'data/power_img') # 增强图像集文件夹 20 | 21 | train_datagen = ImageDataGenerator( 22 | rotation_range=20, 23 | width_shift_range=0.2, 24 | height_shift_range=0.3, 25 | shear_range=0.2, zoom_range=0.25, 26 | horizontal_flip=True, vertical_flip=True) 27 | 28 | # 由于库函数的特殊性(需要将待增强的图像放到文件夹里传入库函数), 29 | # 所以需要保证数据集中的图片一个图片一个文件夹,结构为 文件夹(文件夹(图像)) 30 | # 将图片转存至临时文件夹中 31 | def gen_file(): 32 | for i in tqdm(range(len(os.listdir(start_file)))): 33 | save_file = train_path + '/' + str(i + 1) + '/' + str(i + 1) + '/' + str(i + 1) 34 | os.makedirs(train_path + '/' + str(i + 1) + '/' + str(i + 1)) 35 | img = Image.open(start_file + '/' + str(i + 1) + '.jpg') 36 | img.save(save_file + '.jpg') 37 | 38 | if len(os.listdir(train_path)) == 0: 39 | gen_file() 40 | n = len(os.listdir(train_path)) 41 | # 完成图像的增强操作,并保存至final_path,也即临时文件夹图像中增加到四张 42 | for i in tqdm(range(n)): 43 | start_path = train_path + '/' + str(i + 1) # E/1 44 | final_path = train_path + '/' + str(i + 1) + '/' + str(i + 1) # E/1/1 45 | for i in range(number): 46 | _, _ = next(train_datagen.flow_from_directory(start_path, target_size=(224, 224), 47 | batch_size=1, shuffle=True, seed=4, save_to_dir=final_path, 48 | save_format='jpg', follow_links=True)) 49 | # 完成对临时文件夹的转存操作,将其存储为这种形式 power_data(1,2,3,4,...,total_img) 50 | for i in tqdm(range(n)): 51 | list_pic = os.listdir( 52 | train_path + '/' + str(i + 1) + '/' + str(os.listdir(train_path + '/' + str(i + 1))[0])) # E:/aaa/1/1 53 | # 算上原始图像list_pic应该等于4 54 | # 逐一打开每张图像并将其改名保存至save_path,jpg格式 55 | for j in range(len(list_pic)): 56 | img = Image.open( 57 | train_path + '/' + str(i + 1) + '/' + str(os.listdir(train_path + 58 | '/' + str(i + 1))[0]) + '/' + list_pic[j]) 59 | img.save(save_path + '/' + str(j + i * len(list_pic) + 1) + '.jpg') 60 | 61 | -------------------------------------------------------------------------------- /data_preprocess/generate_npy.py: -------------------------------------------------------------------------------- 1 | # 将数据集处理成npy格式,此为网络输入的一种形式,优点是读取方便,缺点是一次性读取造成内存溢出, 2 | # 解决方法是使用data_generate,批次读入内存 3 | from PIL import Image 4 | import numpy as np 5 | import os 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def generate_npy(use_power_dataset=True): 10 | cur_path = os.path.dirname(__file__) # 获取当前文件路径 11 | parent_path = os.path.dirname(cur_path) # 获取当前文件夹父目录 12 | if use_power_dataset: 13 | # 指定图片和标签的路径 14 | train_path = './data/power_img/' 15 | train_txt = './data/labels.txt' 16 | # 指定转换后的存储路径 17 | x_train_save_path = './data/power_img_data_npy/x_train3.npy' 18 | y_train_save_path = './data/power_img_data_npy/y_train3.npy' 19 | # 数据集总图像数 20 | total_img = len(os.listdir(train_path)) 21 | # 数据集每类图像数 22 | class_img = 400 23 | else: 24 | # 指定图片和标签的路径 25 | train_path = './data/img/' 26 | train_txt = './data/labels.txt' 27 | # 指定转换后的存储路径 28 | x_train_save_path = './data/original_img_data_npy/x_train3.npy' 29 | y_train_save_path = './data/original_img_data_npy/y_train3.npy' 30 | # 数据集总图像数 31 | total_img = len(os.listdir(train_path)) 32 | # 数据集每类图像数 33 | class_img = 100 34 | 35 | # print(total_img) 36 | # 打标签,该标签以0起始 37 | 38 | def get_label(train_txt, total_img, class_img): 39 | j = 0 40 | with open(train_txt, "w") as f: 41 | for i in range(total_img): 42 | if i % class_img == 0: 43 | j += 1 44 | text = f.write(str(i + 1) + ".jpg" + " " + str(j - 1) + "\n") 45 | if not os.path.exists(train_txt): 46 | get_label(train_txt, total_img, class_img) 47 | 48 | def generated(path, txt): 49 | f = open(txt, 'r') 50 | contents = f.readlines() 51 | f.close() 52 | x, y_ = [], [] 53 | for content in contents: 54 | value = content.split() 55 | img_path = path + value[0] 56 | img = Image.open(img_path) 57 | img = img.resize((224, 224), Image.BILINEAR) 58 | img = np.array(img) 59 | x.append(img) 60 | y_.append(value[1]) 61 | print('loading:' + content) 62 | x = np.array(x) 63 | # print(x) 64 | y_ = np.array(y_) 65 | # print(y_.shape) 66 | y_ = y_.astype(np.int64) 67 | return x, y_ 68 | 69 | if os.path.exists(x_train_save_path) and os.path.exists(y_train_save_path): 70 | print('-------Load Datasets---------') 71 | x_train_save = np.load(x_train_save_path) 72 | y_train_save = np.load(y_train_save_path) 73 | 74 | else: 75 | print('------Generate Datasets--------') 76 | x_train, y_train = generated(train_path, train_txt) 77 | 78 | print('-------Save Datasets--------') 79 | np.save(x_train_save_path, x_train) 80 | np.save(y_train_save_path, y_train) 81 | -------------------------------------------------------------------------------- /all_colors.py: -------------------------------------------------------------------------------- 1 | color_names = {0: '#F0F8FF', 1: '#FAEBD7', 2: '#00FFFF', 3: '#7FFFD4', 4: '#F0FFFF', 2 | 5: '#F5F5DC', 6: '#FFE4C4', 7: '#000000', 8: '#FFEBCD', 9: '#0000FF', 3 | 10: '#8A2BE2', 11: '#A52A2A', 12: '#DEB887', 13: '#5F9EA0', 14: '#7FFF00', 4 | 15: '#D2691E', 16: '#FF7F50', 17: '#6495ED', 18: '#FFF8DC', 19: '#DC143C', 5 | 20: '#00FFFF', 21: '#00008B', 22: '#008B8B', 23: '#B8860B', 24: '#A9A9A9', 6 | 25: '#006400', 26: '#BDB76B', 27: '#8B008B', 28: '#556B2F', 29: '#FF8C00', 7 | 30: '#9932CC', 31: '#8B0000', 32: '#E9967A', 33: '#8FBC8F', 34: '#483D8B', 8 | 35: '#2F4F4F', 36: '#00CED1', 37: '#9400D3', 38: '#FF1493', 39: '#00BFFF', 9 | 40: '#696969', 41: '#1E90FF', 42: '#B22222', 43: '#FFFAF0', 44: '#228B22', 10 | 45: '#FF00FF', 46: '#DCDCDC', 47: '#F8F8FF', 48: '#FFD700', 49: '#DAA520', 11 | 50: '#808080', 51: '#008000', 52: '#ADFF2F', 53: '#F0FFF0', 54: '#FF69B4', 12 | 55: '#CD5C5C', 56: '#4B0082', 57: '#FFFFF0', 58: '#F0E68C', 59: '#E6E6FA', 13 | 60: '#FFF0F5', 61: '#7CFC00', 62: '#FFFACD', 63: '#ADD8E6', 64: '#F08080', 14 | 65: '#E0FFFF', 66: '#FAFAD2', 67: '#90EE90', 68: '#D3D3D3', 69: '#FFB6C1', 15 | 70: '#FFA07A', 71: '#20B2AA', 72: '#87CEFA', 73: '#778899', 74: '#B0C4DE', 16 | 75: '#FFFFE0', 76: '#00FF00', 77: '#32CD32', 78: '#FAF0E6', 79: '#FF00FF', 17 | 80: '#800000', 81: '#66CDAA', 82: '#0000CD', 83: '#BA55D3', 84: '#9370DB', 18 | 85: '#3CB371', 86: '#7B68EE', 87: '#00FA9A', 88: '#48D1CC', 89: '#C71585', 19 | 90: '#191970', 91: '#F5FFFA', 92: '#FFE4E1', 93: '#FFE4B5', 94: '#FFDEAD', 20 | 95: '#000080', 96: '#FDF5E6', 97: '#808000', 98: '#6B8E23', 99: '#FFA500', 21 | 100: '#FF4500', 101: '#DA70D6', 102: '#EEE8AA', 103: '#98FB98', 104: '#AFEEEE', 22 | 105: '#DB7093', 106: '#FFEFD5', 107: '#FFDAB9', 108: '#CD853F', 109: '#FFC0CB', 23 | 110: '#DDA0DD', 111: '#B0E0E6', 112: '#800080', 113: '#FF0000', 114: '#BC8F8F', 24 | 115: '#4169E1', 116: '#8B4513', 117: '#FA8072', 118: '#FAA460', 119: '#2E8B57', 25 | 120: '#FFF5EE', 121: '#A0522D', 122: '#C0C0C0', 123: '#87CEEB', 124: '#6A5ACD', 26 | 125: '#708090', 126: '#FFFAFA', 127: '#00FF7F', 128: '#4682B4', 129: '#D2B48C', 27 | 130: '#008080', 131: '#D8BFD8', 132: '#FF6347', 133: '#40E0D0', 134: '#EE82EE', 28 | 135: '#F5DEB3', 136: '#FFFFFF', 137: '#F5F5F5', 138: '#FFFF00', 139: '#9ACD32'} 29 | 30 | color_name_38 = ['#F0F8FF', '#FAEBD7', '#00FFFF', '#7FFFD4', '#F0FFFF', '#F5F5DC', '#FFE4C4', '#000000', '#FFEBCD', 31 | '#0000FF', '#8A2BE2', '#A52A2A', '#DEB887', '#5F9EA0', '#7FFF00', '#D2691E', '#FF7F50', '#6495ED', 32 | '#FFF8DC', '#DC143C', '#00FFFF', '#00008B', '#008B8B', '#B8860B', '#A9A9A9', '#006400', '#BDB76B', 33 | '#8B008B', '#556B2F', '#FF8C00', '#9932CC', '#8B0000', '#E9967A', '#8FBC8F', '#483D8B', '#2F4F4F', 34 | '#00CED1', '#9400D3'] 35 | 36 | if __name__ == "__main__": 37 | res = [] 38 | for i, j in color_names.items(): 39 | if i <= 37: 40 | res.append(j) 41 | print(res) 42 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | import matplotlib.pyplot as plt 5 | from sklearn.utils import shuffle 6 | import matplotlib.image as mpimg 7 | from imgaug import augmenters as iaa 8 | import cv2 as cv 9 | from sklearn.model_selection import train_test_split 10 | from PIL import Image 11 | 12 | 13 | # 返回每一张图片的路径,并且与文本一一对应,并且还需要返回标签信息 14 | def LoadData(): 15 | cur_path = os.path.dirname(__file__) # 'E:/github文件/my-project' 16 | img_path = "./data/img" 17 | ImgPath = [] 18 | n = len(os.listdir(os.path.join(cur_path, img_path))) # 图像总数  19 | for i in range(1, n + 1): 20 | ImgPath.append(cur_path + "/" + 21 | os.path.join(img_path, str(i)) + ".jpg") # 图像路径 'E:/github文件/my-project/data/img\\3798.jpg' 22 | return ImgPath 23 | 24 | 25 | def augmentImage(imgPath): 26 | img = mpimg.imread(imgPath) 27 | ## PAN 28 | if np.random.rand() <= 0.5: 29 | pan = iaa.Affine(translate_percent={'x': (-0.1, 0.1), 'y': (-0.1, 0.1)}) 30 | img = pan.augment_image(img) 31 | ## ZOOM 32 | if np.random.rand() <= 0.5: 33 | zoom = iaa.Affine(scale=(1, 1.2)) 34 | img = zoom.augment_image(img) 35 | # BRIGHTNESS 36 | if np.random.rand() <= 0.5: 37 | brightness = iaa.Multiply((0.4, 1.2)) 38 | img = brightness.augment_image(img) 39 | # FLIP 40 | if np.random.rand() <= 0.5: 41 | img = cv.flip(img, 1) 42 | return img 43 | 44 | 45 | def preProcessing(img): 46 | img = img[:, 20:400, :] # (高,宽,通道) 47 | img = cv.resize(img, (224, 224)) 48 | img = cv.cvtColor(img, cv.COLOR_BGR2YUV) 49 | img = cv.GaussianBlur(img, (3, 3), 0) 50 | img = img / 255 51 | return img 52 | 53 | 54 | def batchGen(batch_size, image_path, text, label, original=True, multiple=4): 55 | """ 56 | image_path:List() 待增强图像的路径 57 | text:经过word2vec处理过后的文本数据 58 | original:bool 是否增加原生态图像数量 59 | multiple:int 增加的数量 60 | """ 61 | while True: 62 | imageBatch, textBatch, labelBatch = [], [], [] 63 | for i in range(batch_size): 64 | ImglittleBatch, txtlittleBatch, labellittleBatch = [], [], [] 65 | index = np.random.randint(0, len(image_path)) 66 | if original: 67 | image = augmentImage(image_path[index]) 68 | image = preProcessing(image) 69 | imageBatch.append(image) 70 | textBatch.append(text[index]) 71 | labelBatch.append(label[index]) 72 | else: 73 | for j in range(multiple): 74 | image = augmentImage(image_path[index]) 75 | image = preProcessing(image) 76 | ImglittleBatch.append(image) 77 | txtlittleBatch.append(text[index]) 78 | labellittleBatch.append(label[index]) 79 | imageBatch += ImglittleBatch 80 | textBatch += txtlittleBatch 81 | labelBatch += labellittleBatch 82 | 83 | # 对列表中的元素加入随机性,打乱,固定打乱顺序 84 | state = np.random.get_state() 85 | np.random.shuffle(imageBatch) 86 | np.random.set_state(state) 87 | np.random.shuffle(textBatch) 88 | np.random.set_state(state) 89 | np.random.shuffle(labelBatch) 90 | yield (np.asarray(imageBatch), np.asarray(textBatch)), np.asarray(labelBatch) 91 | 92 | 93 | def Img_Txt(imgTest, txtTest, start, end): 94 | imageBatch, textBatch = [], [] 95 | for i in range(start, end): 96 | image = mpimg.imread(imgTest[i]) / 255 97 | image = cv.resize(image, (224, 224)) 98 | text = txtTest[i] 99 | imageBatch.append(image) 100 | textBatch.append(text) 101 | return np.asarray(imageBatch), np.asarray(textBatch) 102 | 103 | -------------------------------------------------------------------------------- /load_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.model_selection import train_test_split 5 | import tensorflow as tf 6 | # from data_preprocess.text_process import * 7 | 8 | 9 | # 包括将所有text.csv文本组合成一个统一的csv文件,并制作文本标签 10 | def get_loader(text_load_path, list_name): 11 | list_csv = [] 12 | 13 | for i in range(len(list_name)): 14 | text_path = os.path.join(text_load_path, list_name[i]) 15 | list_csv.append(text_path + '文本.csv') 16 | 17 | # 文本数据,返回的文本是与图片一一对应的 18 | list_text = np.array([]) 19 | for i in range(len(list_csv)): 20 | list_text = np.concatenate( 21 | (list_text, np.array(pd.read_csv(list_csv[i], encoding='gb18030', header=None, index_col=None)[0]))) 22 | 23 | # list_num为文本的标签,对应数据集文本排列,每间隔一百变成下一个类 24 | text_label = np.array([], dtype=np.int32) 25 | for i in range(len(list_csv)): 26 | text_label = np.concatenate((text_label, int(i) * np.ones(100, dtype=np.int32))) 27 | 28 | # list_num1增强版文本的标签 29 | text_labels = np.array([], dtype=np.int32) 30 | for i in range(len(list_csv)): 31 | text_labels = np.concatenate((text_labels, int(i) * np.ones(400, dtype=np.int32))) 32 | 33 | return list_text, text_label, text_labels 34 | 35 | 36 | def load_data_set(data_path, power_data_path, text_data, power_text_data, 37 | t_label, tt_label, num_classes=38, use_power_data=False, test_size=0.2): 38 | if use_power_data: 39 | # 数据集读取 40 | data_img = np.load(os.path.join(power_data_path, 'x_train3.npy')) / 255 41 | data_label = np.load(os.path.join(power_data_path, 'y_train3.npy')) 42 | 43 | # 分割训练集和测试集,按照7:3划分 44 | text_train, text_test, text_train_label, text_test_label = train_test_split(power_text_data, tt_label, 45 | test_size=test_size, 46 | random_state=5) 47 | image_train, image_test, image_train_label, image_test_label = train_test_split(data_img, data_label, 48 | test_size=test_size, 49 | random_state=5) 50 | # 标签独热化 51 | train_onehot_label = tf.keras.utils.to_categorical(image_train_label, 52 | num_classes=num_classes) 53 | test_onehot_label = tf.keras.utils.to_categorical(image_test_label, 54 | num_classes=num_classes) 55 | 56 | return text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label 57 | else: 58 | data_img = np.load(os.path.join(data_path, 'x_train3.npy')) / 255 59 | data_label = np.load(os.path.join(data_path, 'y_train3.npy')) 60 | 61 | text_train, text_test, text_train_label, text_test_label = train_test_split(text_data, t_label, 62 | test_size=test_size, 63 | random_state=5) 64 | image_train, image_test, image_train_label, image_test_label = train_test_split(data_img, data_label, 65 | test_size=test_size, 66 | random_state=5) 67 | train_onehot_label = tf.keras.utils.to_categorical(image_train_label, 68 | num_classes=num_classes) 69 | test_onehot_label = tf.keras.utils.to_categorical(image_test_label, 70 | num_classes=num_classes) 71 | 72 | return text_train, text_test, image_train, image_test, \ 73 | train_onehot_label, test_onehot_label 74 | 75 | 76 | # 需要划分训练集,验证集和测试集 77 | def generate_method(image_path, text, label, test_size=0.01, val_size=0.1, fasion=False): 78 | # 划分训练集,测试集 79 | imgTrain, imgTest, label_img_Train, labe_img_Tst = train_test_split(image_path, label, 80 | test_size=test_size, random_state=5) 81 | 82 | txtTrain, txtTest, label_txt_Train, labe_txt_Tst = train_test_split(text, label, 83 | test_size=test_size, random_state=5) 84 | # 划分训练集,验证集 85 | imgTrain, imgVal, label_img_Train, labe_img_Val = train_test_split(imgTrain, label_img_Train, 86 | test_size=val_size, random_state=5) 87 | 88 | txtTrain, txtVal, label_txt_Train, labe_txt_Val = train_test_split(txtTrain, label_txt_Train, 89 | test_size=val_size, random_state=5) 90 | if fasion: 91 | Train_label_one_hot = tf.keras.utils.to_categorical(label_img_Train, num_classes=5) 92 | Val_label_one_hot = tf.keras.utils.to_categorical(labe_img_Val, num_classes=5) 93 | Tst_label_one_hot = tf.keras.utils.to_categorical(labe_img_Tst, num_classes=5) 94 | else: 95 | Train_label_one_hot = tf.keras.utils.to_categorical(label_img_Train, num_classes=38) 96 | Val_label_one_hot = tf.keras.utils.to_categorical(labe_img_Val, num_classes=38) 97 | Tst_label_one_hot = tf.keras.utils.to_categorical(labe_img_Tst, num_classes=38) 98 | 99 | return imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \ 100 | Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot -------------------------------------------------------------------------------- /compare.py: -------------------------------------------------------------------------------- 1 | from load_data import load_data_set 2 | from train_model import text_model, image_model 3 | from evaluate import fx_calc_map_label 4 | import tensorflow as tf 5 | from tensorflow.keras.callbacks import EarlyStopping 6 | from utils import * 7 | from tensorflow.keras.callbacks import TensorBoard 8 | from load_data import * 9 | import numpy as np 10 | from tensorflow.keras.layers import * 11 | from tensorflow.keras.models import Model 12 | import pickle 13 | 14 | 15 | def batchGenimg(batch_size, image_path, label, original=True, multiple=4): 16 | """ 17 | image_path:List() 待增强图像的路径 18 | text:经过word2vec处理过后的文本数据 19 | original:bool 是否增加原生态图像数量 20 | multiple:int 增加的数量 21 | """ 22 | while True: 23 | imageBatch, labelBatch = [], [] 24 | for i in range(batch_size): 25 | ImglittleBatch, labellittleBatch = [], [] 26 | index = np.random.randint(0, len(image_path)) 27 | if original: 28 | image = augmentImage(image_path[index]) 29 | image = preProcessing(image) 30 | imageBatch.append(image) 31 | labelBatch.append(label[index]) 32 | else: 33 | for j in range(multiple): 34 | image = augmentImage(image_path[index]) 35 | image = preProcessing(image) 36 | ImglittleBatch.append(image) 37 | labellittleBatch.append(label[index]) 38 | imageBatch += ImglittleBatch 39 | labelBatch += labellittleBatch 40 | 41 | # 对列表中的元素加入随机性,打乱,固定打乱顺序 42 | state = np.random.get_state() 43 | np.random.shuffle(imageBatch) 44 | np.random.set_state(state) 45 | np.random.shuffle(labelBatch) 46 | yield np.asarray(imageBatch), np.asarray(labelBatch) 47 | 48 | 49 | # 只进行文本检索 50 | def txt_result(text_train, text_test, train_onehot_label, test_onehot_label, 51 | vocab_dim, n_symbols, embedding_weights, input_length): 52 | txt_pre, img_out = text_model(vocab_dim, n_symbols, embedding_weights, input_length) 53 | txt_pre.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 54 | early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min') 55 | Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True) 56 | history_txt = txt_pre.fit(text_train, train_onehot_label, batch_size=32, epochs=30, 57 | validation_split=0.2, verbose=1, callbacks=[early_stopping]) 58 | lstm_feature = img_out.predict(text_test) 59 | f = open('./data/lstm_feature.pkl', 'wb') 60 | pickle.dump(lstm_feature, f) 61 | f.close() 62 | label = tf.argmax(test_onehot_label, axis=1) 63 | for R in [1, 5, 10]: 64 | result = fx_calc_map_label(lstm_feature, label, k=R) 65 | print(f'...只进行文本检索@{R} = MAP = {result}') 66 | 67 | 68 | # 只进行图像检索 69 | def img_result(text_data, t_label): 70 | imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \ 71 | Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \ 72 | generate_method(LoadData(), text_data, t_label) 73 | 74 | model_pre, model_out = image_model() 75 | model_pre.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 76 | early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min') 77 | history = model_pre.fit(batchGenimg(batch_size=2, image_path=imgTrain, label=Train_label_one_hot), 78 | steps_per_epoch=2, epochs=20, 79 | validation_data=batchGenimg(batch_size=1, image_path=imgVal, 80 | label=Val_label_one_hot), validation_steps=1, 81 | verbose=1, callbacks=[early_stopping]) 82 | Batch_size = 3 83 | n = len(imgTest) 84 | img_feature = np.empty((n, 512), dtype=np.float32) 85 | num = n // Batch_size + 1 # 为了防止内存溢出 86 | for i in range(num): 87 | start = i * Batch_size 88 | end = (i + 1) * Batch_size 89 | end = min(end, n) 90 | print(f"正在进行{start} to {end}的预测...") 91 | feature = model_out.predict(Img_Txt(imgTest, txtTest, start, end)[0]) 92 | img_feature[start:end] = feature 93 | label = tf.argmax(Tst_label_one_hot, axis=1) 94 | for R in [1, 5, 10]: 95 | result = fx_calc_map_label(img_feature, label, k=R) 96 | print(f'...只进行图像的检索@{R} = MAP = {result}') 97 | 98 | 99 | # 经过word2vec后的检索精度 100 | def word_vec_test_mAP(text_test, vocab_dim, n_symbols, embedding_weights, input_length, test_onehot_label): 101 | inputs = tf.keras.layers.Input(shape=25, name="text_input") 102 | x = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True, 103 | weights=[embedding_weights], 104 | input_length=input_length, trainable=False)(inputs) 105 | model = Model(inputs, x) 106 | txt_feature = model.predict(text_test) 107 | txt_feature = np.reshape(txt_feature, (len(text_test), -1)) # 注意resize和reshape的区别 108 | f = open('./data/txt_feature.pkl', 'wb') 109 | pickle.dump(txt_feature, f) 110 | f.close() 111 | label = tf.argmax(test_onehot_label, axis=1) 112 | result = fx_calc_map_label(txt_feature, label) 113 | print('...输入网络前的word2vec文本检索 MAP = {}'.format(result)) 114 | 115 | 116 | def Image_Only(test_onehot_label): 117 | f = open("./data/img_feature.pkl", "rb") 118 | img_feature = pickle.load(f) 119 | label = tf.argmax(test_onehot_label, axis=1) 120 | for R in [1, 5, 10]: 121 | result = fx_calc_map_label(img_feature, label, k=R) 122 | print('...Image Only @{} MAP = {}'.format(R, result)) 123 | 124 | 125 | def Text_Only(test_onehot_label): 126 | f = open("./data/lstm_feature.pkl", "rb") 127 | lstm_feature = pickle.load(f) 128 | label = tf.argmax(test_onehot_label, axis=1) 129 | for R in [1, 5, 10]: 130 | result = fx_calc_map_label(lstm_feature, label, k=R) 131 | print('...Text Only MAP @{} = {}'.format(R, result)) 132 | 133 | 134 | def cross_loss(test_onehot_label): 135 | f = open("./data/cross_loss.pkl", "rb") 136 | cross_loss = pickle.load(f) 137 | label = tf.argmax(test_onehot_label, axis=1) 138 | for R in [1, 5, 10]: 139 | result = fx_calc_map_label(cross_loss, label, k=R) 140 | print('...cross loss @{} = {}'.format(R, result)) 141 | 142 | 143 | def Ours(test_onehot_label): 144 | f = open("./data/multi_feature.pkl", "rb") 145 | multi_feature = pickle.load(f) 146 | label = tf.argmax(test_onehot_label, axis=1) 147 | for R in [1, 5, 10]: 148 | result = fx_calc_map_label(multi_feature, label, k=R) 149 | print('...Ours MAP @{} = {}'.format(R, result)) 150 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import jieba 3 | import random 4 | import multiprocessing 5 | from tqdm import tqdm 6 | import sys 7 | import matplotlib.pyplot as plt 8 | from load_data import * 9 | # from data_preprocess.make_dataset import * # 制作数据集 10 | from data_preprocess.generate_npy import * 11 | from train_model import * 12 | from evaluate import fx_calc_map_label 13 | from utils import * 14 | from compare import * 15 | import pickle 16 | from tensorflow.keras.callbacks import TensorBoard 17 | 18 | 19 | np.set_printoptions(threshold=np.inf) 20 | np.set_printoptions(suppress=True) 21 | np.random.seed(1337) # For Reproducibility 22 | sys.setrecursionlimit(1000000) 23 | cpu_count = multiprocessing.cpu_count() 24 | parent_path = os.path.dirname(__file__) # 获取当前文件路径 25 | 26 | if __name__ == "__main__": 27 | CD = False # 商品数据集 28 | Fasion_200k = False # 因缺少硬件资源,所以未做实验,若切换此数据集需要将所有的38更改为5,You need to do it yourself 29 | vocab_dim = 100 # 词向量的维度 30 | n_iterations = 5 # ideally more.. 31 | n_exposures = 3 # 所有频数超过3的词语 32 | window_size = 5 33 | input_length = 25 # 输入序列的长度 34 | max_len = 25 # 经过测试,每个句子的最大长度不超过21 35 | num_classes = 38 # 类别总数 36 | old_method = False # 选择旧方法进行训练 37 | new_method = False # 选择新方法训练 38 | draw = False 39 | compared = True # 对比实验用 40 | out_dim = 512 41 | demonstration = True # 演示用 42 | # weight_path = 'mnt/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' 43 | text_load_path = './data/text' 44 | power_data_path = "./data/power_img_data_npy" 45 | data_path = "./data/original_img_data_npy" 46 | # 读入顺序,对应数据集每个类的顺序,同时对应文本的读取顺序 47 | list_name = ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', '毛呢大衣', 48 | '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', '衬衫', '西装', 49 | '风衣', '马甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋'] 50 | # generate_npy(use_power_dataset=True) # 生成npy格式图像数据和标签 51 | 52 | text_pre, t_label, tt_label = get_loader(text_load_path, list_name) # 纯文本, 数据集标签, 增强数据集标签 53 | 54 | # 文本分词 55 | text_after = [jieba.lcut(document.replace('\n', '')) for document in text_pre] 56 | 57 | model = Word2Vec(size=vocab_dim, # 建立一个空的模型对象,设置词向量的维度为100 58 | min_count=n_exposures, 59 | window=window_size, 60 | workers=cpu_count, 61 | iter=n_iterations) 62 | # text_data原文本数据(词向量的索引), power_text_data增强文本数据(词向量的索引) 63 | w2indx, w2vec, text_data, power_text_data = text_w2model(model, text_after, max_len) 64 | 65 | print('You will succeed...') 66 | n_symbols, embedding_weights = get_data(w2indx, w2vec, vocab_dim) 67 | 68 | # 需要进行对比试验吗 69 | if compared: 70 | text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \ 71 | load_data_set(data_path, power_data_path, text_data, power_text_data, 72 | t_label, tt_label) 73 | if demonstration: 74 | Image_Only(test_onehot_label) 75 | Text_Only(test_onehot_label) 76 | cross_loss(test_onehot_label) 77 | Ours(test_onehot_label) 78 | else: 79 | word_vec_test_mAP(text_test, vocab_dim, n_symbols, embedding_weights, input_length, test_onehot_label) 80 | txt_result(text_train, text_test, train_onehot_label, test_onehot_label, 81 | vocab_dim, n_symbols, embedding_weights, input_length) 82 | img_result(text_data, t_label) 83 | 84 | if old_method: 85 | # 训练集和测试集的划分 86 | text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \ 87 | load_data_set(data_path, power_data_path, text_data, power_text_data, 88 | t_label, tt_label, use_power_data=False) 89 | 90 | # 构建多模态模型 91 | multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim) 92 | # run_eagerly指示模型是否应急切运行的可设置属性,这对于自定义的损失函数和张量的流动很有用 93 | # 急切地运行意味着您的模型将逐步运行,就像 Python 代码一样。您的模型可能运行得较慢,但您应该更容易通过进入各个层调用来调试它。 94 | # 默认情况下,我们会尝试将您的模型编译为静态图以提供最佳执行性能 95 | # 默认为False. 如果True,thisModel的逻辑将不会被包装在 a 中tf.function。建议将其保留为None除非您Model无法在 tf.function. 使用时不支持 96 | multi_model.compile(loss=Myloss(), optimizer='adam', run_eagerly=True) 97 | 98 | early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min') 99 | history = multi_model.fit([image_train, text_train], train_onehot_label, batch_size=64, epochs=3, 100 | validation_split=0.3, 101 | verbose=1, callbacks=[early_stopping]) 102 | if new_method: 103 | # 训练集:测试集 = 8:2 104 | # 训练集:验证集 = 9:1 105 | if CD: 106 | imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \ 107 | Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \ 108 | generate_method(LoadData(), text_data, t_label) 109 | 110 | train_size = len(imgTrain) 111 | val_size = len(imgVal) 112 | 113 | multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim) 114 | 115 | multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True) 116 | Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True) 117 | if Fasion_200k: 118 | from fasion_utils import * 119 | 120 | vocab_dim = 500 121 | input_length = 15 122 | num_classes = 5 123 | 124 | img_path, text_data, label, n_symbols, embedding_weights = initialization() 125 | 126 | imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \ 127 | Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \ 128 | generate_method(img_path, text_data, label, 0.3, 0.3, True) 129 | 130 | multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim) 131 | 132 | multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True) 133 | Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True) 134 | 135 | history = multi_model.fit(BatchGen(batch_size=3, image_path=imgTrain, 136 | text=txtTrain, label=Train_label_one_hot), 137 | steps_per_epoch=2, epochs=2, 138 | validation_data=BatchGen(batch_size=1, image_path=imgVal, 139 | text=txtVal, label=Val_label_one_hot), 140 | validation_steps=2,verbose=1) 141 | if CD: 142 | early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min') 143 | history = multi_model.fit(batchGen(batch_size=3, image_path=imgTrain, 144 | text=txtTrain, label=Train_label_one_hot), 145 | steps_per_epoch=2, epochs=2, 146 | validation_data=batchGen(batch_size=1, image_path=imgVal, 147 | text = txtVal, label=Val_label_one_hot),validation_steps=2, 148 | verbose=1, callbacks=[early_stopping]) 149 | 150 | # 提取测试集特征 151 | if old_method: 152 | multi_feature = multi_model.predict([imgTest, txtTest], batch_size=32) 153 | label = tf.argmax(test_onehot_label, axis=1) 154 | result = fx_calc_map_label(multi_feature[:,num_classes:], label) 155 | print('...多模态图像检索 MAP = {}'.format(result)) 156 | if new_method: 157 | Batch_size = 3 158 | n = len(imgTest) 159 | multi_feature = np.empty((n, out_dim), dtype=np.float32) 160 | num = n // Batch_size + 1 # 为了防止内存溢出 161 | for i in range(num): 162 | start = i * Batch_size 163 | end = (i + 1) * Batch_size 164 | end = min(end, n) 165 | print(f"正在进行{start} to {end}的预测...") 166 | feature = multi_model.predict(Img_Txt(imgTest, txtTest, start, end)) 167 | multi_feature[start:end] = feature[:, num_classes:] 168 | # mAP计算 169 | label = tf.argmax(Tst_label_one_hot, axis=1) 170 | result = fx_calc_map_label(multi_feature, label) 171 | print('...多模态图像检索 MAP = {}'.format(result)) 172 | 173 | if draw: 174 | # 绘制训练和验证的损失图像 175 | plt.plot(history.history['loss']) 176 | plt.plot(history.history['val_loss']) 177 | plt.title('model loss') 178 | plt.ylabel('loss') 179 | plt.xlabel('epoch') 180 | plt.legend(['train', 'test'], loc='upper left') 181 | plt.show() 182 | -------------------------------------------------------------------------------- /train_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.models import Model 3 | from sklearn.model_selection import train_test_split 4 | from gensim.models.word2vec import Word2Vec 5 | from gensim.corpora.dictionary import Dictionary 6 | from keras.preprocessing import sequence 7 | from tensorflow.keras.applications.vgg16 import VGG16 8 | from tensorflow.keras.layers import * 9 | from keras import backend as K 10 | from keras import regularizers 11 | from tensorflow.keras.utils import plot_model 12 | from tensorflow.keras.callbacks import EarlyStopping 13 | from tensorflow.python.ops import math_ops 14 | import numpy as np 15 | import numpy 16 | 17 | tf.keras.backend.set_floatx('float64') 18 | 19 | 20 | def text_w2model(model, text_after, max_len): 21 | data = [] 22 | model.build_vocab(text_after) # input: list遍历一次语料库建立词典 23 | model.train(text_after, epochs=20, total_examples=model.corpus_count) # 第2次遍历语料库简建立神经网络模型 24 | gensim_dict = Dictionary() 25 | gensim_dict.doc2bow(model.wv.vocab.keys(), allow_update=True) 26 | w2indx = {v: k + 1 for k, v in gensim_dict.items()} # {'T恤':1, '一':2, '一体':3, ...}1580 27 | # w2vec = {'T恤':array([-0.10420721, -0.50772285,...])} 1580=词库大小 28 | w2vec = {word: model.wv[word] for word in w2indx.keys()} 29 | 30 | for sentence in text_after: 31 | new_txt = [] 32 | for word in sentence: 33 | try: 34 | new_txt.append(w2indx[word]) 35 | except: 36 | new_txt.append(0) 37 | data.append(new_txt) 38 | 39 | # pad_sequences函数是将序列转化为经过填充以后的一个长度相同的新序列 40 | # (3800, 25) 所有文本的词索引 [0, 0, ... , 167, 139] 41 | text_data = sequence.pad_sequences(data, maxlen=max_len) # 大于此长度的序列将被截短,小于此长度的序列将在后部填0,默认为pre 42 | # 增强后,将源文本扩充四倍 43 | power_text_data = np.repeat(text_data, 4, axis=0) 44 | return w2indx, w2vec, text_data, power_text_data 45 | 46 | 47 | def get_data(index_dict, word_vectors, vocab_dim): 48 | n_symbols = len(index_dict) + 1 # 所有单词的索引数,频数小于3的词语索引为0,所以加1 49 | embedding_weights = np.zeros((n_symbols, vocab_dim)) # 初始化索引为0的词语 50 | for word, index in index_dict.items(): # 0索引都为0,从索引为1的词语开始,对每个词对应一个词向量 51 | embedding_weights[index, :] = word_vectors[word] 52 | return n_symbols, embedding_weights 53 | 54 | 55 | class TextNet(tf.keras.Model): 56 | def __init__(self, vocab_dim, n_symbols, embedding_weights, input_length): 57 | super(TextNet, self).__init__() 58 | # 一个重要的结论(坑):在构造器中不可以初始化Input函数,否则会报错 TypeError: Expected float64 passed to parameter 'y' of op 'Equal', 59 | # got 'collections' of type 'str' instead. Error: Expected float64, got 'collections' of type 'str' instead. 60 | # self.inputs = Input(shape=25, name="text_input") 61 | # 在embedding层中将会执行,将输入的文本索引转化成索引对应的词向量, 62 | # 组成完整的句子,维度为(batch_size, 句子的长度, 词向量的维度) 63 | self.embedding = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True, 64 | weights=[embedding_weights], 65 | input_length=input_length, trainable=False) 66 | self.lstm = LSTM(64, activation='relu') 67 | self.dropout1 = Dropout(0.2) 68 | self.dense = Dense(512, activation='relu') 69 | self.dropout2 = Dropout(0.2) 70 | # 隐藏的坑,call函数中不要加入用不到的调用,会有警告 71 | def call(self, x, training=False, **kwargs): 72 | x = self.embedding(x) 73 | x = self.lstm(x) 74 | x = self.dropout1(x) 75 | x = self.dense(x) 76 | x = self.dropout2(x) 77 | return x 78 | 79 | 80 | class ImgNet(tf.keras.Model): 81 | def __init__(self, weight="imagenet"): 82 | super(ImgNet, self).__init__() 83 | # self.inputs = Input(shape=(224, 224, 3)) 84 | # VGG16函数返回Model 85 | self.conv_base = VGG16(include_top=False, weights=weight, input_shape=(224, 224, 3)) 86 | for layer in self.conv_base.layers: 87 | layer.trainable = False 88 | self.flatten = Flatten() 89 | self.dense = Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)) 90 | self.dropout = Dropout(0.2) 91 | 92 | def call(self, inputs, **kwargs): 93 | x = self.conv_base(inputs) 94 | x = self.flatten(x) 95 | x = self.dense(x) 96 | x = self.dropout(x) 97 | return x 98 | 99 | 100 | # 一个问题:dense_pre需要手动设置成分类的类别,不然会出现调用错误 101 | class MultiModel(tf.keras.Model): 102 | def __init__(self, vocab_dim, n_symbols, embedding_weights, input_length, 103 | num_classes=38, weight="imagenet", out_dim=512): 104 | super(MultiModel, self).__init__() 105 | self.img_net = ImgNet(weight) 106 | self.text_net = TextNet(vocab_dim, n_symbols, embedding_weights, input_length) 107 | self.dense_256 = Dense(256, activation='relu') 108 | self.dense_512 = Dense(512, activation='softmax') 109 | self.dense_64 = Dense(64, activation='relu') 110 | self.dense_1 = Dense(1, activation='sigmoid') 111 | self.dense_2 = Dense(2, activation='softmax') 112 | self.dense_l2 = Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)) 113 | self.dense_pre = Dense(38, activation='softmax') 114 | self.dropout = Dropout(0.2) 115 | self.multiply = Multiply() 116 | self.add = Add() 117 | self.concat_2 = Concatenate() 118 | self.concat_1 = Concatenate(axis=1) 119 | self.reshape = Reshape((2, 1)) 120 | self.permute = Permute((2, 1)) 121 | 122 | # 类通道注意力和残差模块 --> 文本和图像特征均适用 123 | def res_and_att_block(self, x): 124 | y = self.dense_256(x) 125 | y = self.dense_512(y) 126 | y = self.multiply([y,x]) 127 | y = self.add([y, x]) 128 | return y 129 | 130 | # 图像和文本特征的权值模块 131 | def weight_block(self, x): 132 | y = self.dense_64(x) 133 | y = self.dense_1(y) 134 | return y 135 | 136 | # 获取图像和文本的权重 137 | def get_weight_block(self, x, y): 138 | w = self.concat_2([x, y]) 139 | w = self.dense_2(w) 140 | w = self.reshape(w) 141 | return w 142 | 143 | # 加权 144 | def weighting(self, x, y, w): 145 | # 转换原始图像和文本特征维度 146 | def trans_space(x_dim): 147 | x_dim = tf.expand_dims(x_dim, axis=-1) 148 | x_dim = self.permute(x_dim) 149 | return x_dim 150 | 151 | x = trans_space(x) 152 | y = trans_space(y) 153 | z = self.concat_1([x, y]) 154 | # 加权 155 | z = self.multiply([w, z]) # 此处为(2, 1) • (2, 512) 点乘 ,此处batch_size不会影响乘积的结果 156 | # 加权后将图像特征和文本特征连接 157 | z = self.concat_2([z[:, 0], z[:, 1]]) # z(batch_size, 指定行, 所有列) 158 | return z 159 | 160 | # call函数只能接受一个参数,但是这个参数可以是列表或者元组或者字典等形式 161 | # 这里需要注意,默认在调用Model类时,输入model的参数为(batch_size,...) 162 | def call(self, inputs, **kwargs): 163 | res = [] 164 | x, y = inputs 165 | img = self.img_net(x) 166 | txt = self.text_net(y) 167 | 168 | x = self.res_and_att_block(img) 169 | y = self.res_and_att_block(txt) 170 | 171 | img_y = self.weight_block(x) 172 | txt_y = self.weight_block(y) 173 | 174 | w = self.get_weight_block(img_y, txt_y) 175 | z = self.weighting(x, y, w) 176 | 177 | # 最后走一遍全连接层进行分类 178 | z = self.dense_l2(z) 179 | t = self.dropout(z) 180 | pre = self.dense_pre(t) # 38维 181 | c = self.concat_2([pre, z]) 182 | 183 | return c 184 | # 我的损失函数制作成功: 185 | # y_pred为网络预测,并且网络中的call函数只能输出一个参数, 186 | # 这个参数表示成列表或者元组等都不可以,但是可以将模型中的其他特征,可以是好几项通过tf.Concatenate()函数组合成统一的形式输出 187 | # 在构造的Loss子类中通过矩阵切片分别访问各个特征 188 | # 所以在通过模型预测测试集时,不要忘了输出指定特征 189 | # 这里在训练时还有一个坑,就是accurary,loss改变后同时需要自己重写metrics方法,需要定义你是想输出分类准确率还是其他的 190 | class Myloss(tf.keras.losses.Loss): 191 | def __init__(self, name="Myloss"): 192 | super().__init__(name=name) 193 | self.cross_loss = tf.losses.CategoricalCrossentropy() 194 | self.multiply = Multiply() # 与此对应的是tf.matmul()叉乘 195 | 196 | def call(self, y_true, y_pred): 197 | # 可以拉近同类样本 198 | # def compute_loss(y_true, y_pred): 199 | # y_pred = y_pred.numpy() 200 | # y_true = y_true.numpy() 201 | # y_true = np.argmax(y_true, axis=1) 202 | # d_i = [] 203 | # index = list(range(y_true.shape[0])) + list(range(y_true.shape[0])) 204 | # for i in range(len(index)): 205 | # for j in range(len(index)): 206 | # if index[i] != index[j] and y_true[index[i]] == y_true[index[j]] and \ 207 | # sorted([index[i], index[j]]) not in d_i: 208 | # d_i.append([index[i], index[j]]) 209 | # if len(d_i) == 0: 210 | # d_i.append([index[i], index[i]]) 211 | # n = len(d_i) 212 | # loss = 0 213 | # for a, b in d_i: 214 | # loss += 1 * np.log(K.sum(1+np.exp(K.square(y_pred[a] - y_pred[b])))) 215 | # return loss 216 | def compute_loss(y_true, y_pred): 217 | y_pred = y_pred.numpy() 218 | y_true = y_true.numpy() 219 | y_true = np.argmax(y_true, axis=1) 220 | d = [] 221 | index = list(range(y_true.shape[0])) + list(range(y_true.shape[0])) 222 | for i in range(len(index)): 223 | d_i = [] 224 | for j in range(len(index)): 225 | if index[i] != index[j] and y_true[index[i]] == y_true[index[j]] and \ 226 | sorted([index[i], index[j]]) not in d_i: 227 | for k in range(len(index)): 228 | if index[i] != index[k]: 229 | d_i.append([index[i], index[j], index[k]]) 230 | np.random.shuffle(d_i) 231 | d += d_i[:3] 232 | np.random.shuffle(d) 233 | n = len(d) 234 | loss = 0.0 235 | triplet_count = 1.0 236 | for i, j, k in d: 237 | w = 1.0 238 | triplet_count += w 239 | loss += w * np.log(1 + 240 | np.exp(pairwise_distances(y_pred[index[i]], y_pred[index[j]]) - 241 | pairwise_distances(y_pred[index[i]], y_pred[index[k]]), dtype=np.float128)) 242 | loss /= triplet_count 243 | return loss 244 | 245 | def pairwise_distances(x, y): 246 | dist = sigmod(K.sum(K.square(x - y))) 247 | return tf.clip_by_value(dist, 0.0, np.inf) 248 | 249 | def calc_label_sim(label): 250 | Sim = tf.matmul(label, tf.transpose(label)) 251 | return Sim 252 | 253 | def sigmod(x): 254 | return tf.keras.activations.sigmoid(x) 255 | 256 | cross_loss = self.cross_loss(y_true, y_pred[:, :38]) 257 | same_class = compute_loss(y_true, y_pred[:, 38:]) 258 | # theta11 = tf.expand_dims(sigmod(K.sum(K.square(y_pred[:, 38:] - y_pred[:, 38:]) 259 | # , axis=1)), axis=-1) # (batch_size, 1) 260 | # print("theta11",theta11.shape) 261 | # # 减去相同类 262 | # losss = cross_loss - tf.matmul(calc_label_sim(y_true), theta11) 263 | 264 | return cross_loss+same_class 265 | 266 | def class_metric(y_true, y_pred): 267 | values = math_ops.cast( 268 | math_ops.equal( 269 | math_ops.argmax(y_true, axis=-1), math_ops.argmax(y_pred[:,:38], axis=-1)), 270 | K.floatx()) 271 | return values 272 | 273 | def text_model(vocab_dim, n_symbols, embedding_weights, input_length): 274 | x = tf.keras.layers.Input(shape=25, name="text_input") 275 | x1 = Embedding(output_dim=vocab_dim, input_dim=n_symbols, mask_zero=True, weights=[embedding_weights], 276 | input_length=input_length, trainable=False)(x) 277 | x2 = LSTM(64, activation='relu')(x1) 278 | x3 = Dropout(0.2)(x2) 279 | x4 = Dense(512, activation='relu')(x3) 280 | x5 = Dropout(0.2)(x4) 281 | x6 = Dense(38, activation="softmax")(x5) 282 | model_pre = Model(inputs=x, outputs=x6) 283 | model_512 = Model(x, x4) 284 | return model_pre, model_512 285 | 286 | def image_model(): 287 | conv_base = VGG16(include_top=False, weights="imagenet", input_shape=(224, 224, 3)) 288 | for layer in conv_base.layers: 289 | layer.trainable = False 290 | last = conv_base.output 291 | x = tf.keras.layers.Flatten()(last) 292 | x1 = tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x) 293 | x2 = tf.keras.layers.Dropout(0.2)(x1) 294 | x3 = Dense(38, activation="softmax")(x2) 295 | model_pre = Model(inputs=conv_base.input, outputs=x3) 296 | model_512 = Model(conv_base.input, x1) 297 | return model_pre, model_512 -------------------------------------------------------------------------------- /draw.py: -------------------------------------------------------------------------------- 1 | from gensim.models import Word2Vec 2 | from sklearn.decomposition import PCA 3 | import matplotlib.pyplot as plt 4 | from sklearn.manifold import TSNE 5 | import os 6 | import multiprocessing 7 | import matplotlib.cm as cm 8 | from load_data import * 9 | import numpy as np 10 | import jieba 11 | import matplotlib 12 | from mpl_toolkits.mplot3d import Axes3D 13 | from sklearn.model_selection import train_test_split 14 | from utils import LoadData 15 | from train_model import text_w2model 16 | from all_colors import * 17 | from time import time 18 | from PIL import Image 19 | import pickle 20 | import wordcloud 21 | 22 | plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 23 | plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 24 | 25 | cpu_count = multiprocessing.cpu_count() 26 | parent_path = os.path.dirname(__file__) 27 | 28 | vocab_dim = 100 # 词向量的维度 29 | n_iterations = 5 # ideally more.. 30 | n_exposures = 3 # 所有频数超过3的词语 31 | window_size = 5 32 | input_length = 25 # 输入序列的长度 33 | max_len = 25 # 经过测试,每个句子的最大长度不超过21 34 | text_load_path = './data/text' 35 | all_img = False 36 | # 读入顺序,对应数据集每个类的顺序,同时对应文本的读取顺序 37 | list_name = ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', '毛呢大衣', 38 | '皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', '衬衫', '西装', 39 | '风衣', '马甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋'] 40 | 41 | text_pre, t_label, _ = get_loader(text_load_path, list_name) 42 | text = [jieba.lcut(document.replace('\n', '')) for document in text_pre] 43 | model = Word2Vec(size=vocab_dim, # 建立一个空的模型对象 44 | min_count=n_exposures, 45 | window=window_size, 46 | workers=cpu_count, 47 | iter=n_iterations) 48 | model.build_vocab(text) # input: list遍历一次语料库建立词典 49 | model.train(text, epochs=40, total_examples=model.corpus_count) # 第2次遍历语料库简建立神经网络模型 50 | 51 | imgTrain, imgTest, label_img_Train, labe_img_Tst = train_test_split(LoadData(), t_label, 52 | test_size=0.2, random_state=5) 53 | imgTrain, imgVal, label_img_Train, labe_img_Val = train_test_split(imgTrain, label_img_Train, 54 | test_size=0.1, random_state=5) 55 | _, text_jieba, _, _ = train_test_split(text_pre, t_label, 56 | test_size=0.2, random_state=5) 57 | # l = labe_img_Tst 58 | # index = [None] * 38 59 | # for i in range(38): 60 | # index[i] = np.where(i == l)[0].shape[0] 61 | # print(index) 62 | # 测试集各个类别样本的数量 63 | test_sizes = [21, 20, 18, 18, 23, 22, 24, 16, 19, 16, 20, 16, 18, 15, 20, 21, 64 | 20, 21, 22, 20, 21, 17, 18, 21, 15, 22, 24, 18, 16, 13, 29, 18, 21, 65 | 30, 19, 20, 20, 28] 66 | all_sizes = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 67 | 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] 68 | 69 | 70 | # 词的t-SNE显示 71 | def t_SNE_2d(): 72 | words_ak = [] 73 | embeddings_ak = [] 74 | for word in list(model.wv.vocab): 75 | embeddings_ak.append(model.wv[word]) 76 | words_ak.append(word) 77 | 78 | tsne_ak_2d = TSNE(perplexity=50, n_components=2, init='pca', n_iter=3500, random_state=32) 79 | embeddings_ak_2d = tsne_ak_2d.fit_transform(embeddings_ak) 80 | 81 | def tsne_plot_2d(embeddings, words, a=1): 82 | plt.figure(figsize=(16, 9)) 83 | colors = cm.rainbow(np.linspace(0, 1, 1)) 84 | x = embeddings[:, 0] 85 | y = embeddings[:, 1] 86 | plt.scatter(x, y, c=colors, alpha=a, label="商品文本") 87 | for i, word in enumerate(words): 88 | plt.annotate(word, alpha=0.5, xy=(x[i], y[i]), xytext=(5, 2), color="black", 89 | textcoords='offset points', ha='right', va='bottom', size=10, weight="medium") 90 | plt.legend(loc=4) 91 | plt.grid(True) 92 | plt.savefig("t_SNE_2d.png", format='png', dpi=150, bbox_inches='tight') 93 | plt.show() 94 | 95 | tsne_plot_2d(embeddings_ak_2d, words=words_ak) 96 | 97 | 98 | # 词的t-SNE显示 99 | def t_SNE_3d(): 100 | words_wp = [] 101 | embeddings_wp = [] 102 | for word in list(model.wv.vocab): 103 | embeddings_wp.append(model.wv[word]) 104 | words_wp.append(word) 105 | tsne_wp_3d = TSNE(perplexity=30, n_components=3, init='pca', n_iter=3500, random_state=12) 106 | embeddings_wp_3d = tsne_wp_3d.fit_transform(embeddings_wp) 107 | 108 | def tsne_plot_3d(title, embeddings, a=1): 109 | fig = plt.figure() 110 | ax = Axes3D(fig) 111 | colors = cm.rainbow(np.linspace(0, 1, 1)) 112 | plt.scatter(embeddings[:, 0], embeddings[:, 1], embeddings[:, 2], c=colors, alpha=a, label="文本词") 113 | plt.legend(loc=4) 114 | plt.title(title) 115 | plt.show() 116 | 117 | tsne_plot_3d('商品文本', embeddings_wp_3d, a=0.1) 118 | 119 | 120 | # 词的t-SNE显示 121 | def PCA(): 122 | # 基于2d PCA拟合数据 123 | X = model[model.wv.vocab] 124 | pca = PCA(n_components=2) 125 | result = pca.fit_transform(X) 126 | # 可视化展示 127 | plt.scatter(result[:, 0], result[:, 1]) 128 | words = list(model.wv.vocab) 129 | for i, word in enumerate(words): 130 | plt.annotate(word, xy=(result[i, 0], result[i, 1])) 131 | plt.show() 132 | 133 | 134 | # 初始数据集的t-SNE显示,包括训练集和测试集全部数据集的显示也可以 135 | def get_data(img_path, label, img_size=100): 136 | Img = Img = np.empty((len(img_path), 3 * img_size * img_size), dtype=np.float64) 137 | for i in range(len(img_path)): 138 | img = Image.open(img_path[i]) 139 | img = img.resize((img_size, img_size)) 140 | img = np.reshape(img, (1, -1)) 141 | Img[i - 1] = img / 255.0 142 | return Img, label 143 | 144 | 145 | def plot_embedding(data, label, title): 146 | x_min, x_max = np.min(data, 0), np.max(data, 0) 147 | data = (data - x_min) / (x_max - x_min) 148 | 149 | fig = plt.figure() 150 | ax = plt.subplot(111) 151 | for i in range(data.shape[0]): 152 | plt.scatter(data[i, 0], data[i, 1], color=color_names[label[i] + 40]) 153 | # plt.text(data[i, 0], data[i, 1], str(label[i]), 154 | # color=plt.cm.Set1(label[i] / 10.), 155 | # fontdict={'weight': 'bold', 'size': 9}) 156 | plt.xticks([]) 157 | plt.yticks([]) 158 | plt.title(title) 159 | return fig 160 | 161 | 162 | def feature_t_sne(data, label): 163 | print('Computing t-SNE embedding') 164 | tsne = TSNE(n_components=2, init='pca', random_state=0) 165 | t0 = time() 166 | result = tsne.fit_transform(data) 167 | fig = plot_embedding(result, label, 168 | 'T-sne embedding of text (time %.2fs)' 169 | % (time() - t0)) 170 | plt.show(fig) 171 | 172 | 173 | # 原始测试集t-SNE分布 174 | def test_img_set_t_sne(imgTest, labe_img_Tst): 175 | data, label = get_data(imgTest, labe_img_Tst) 176 | print('Computing t-SNE embedding') 177 | tsne = TSNE(n_components=2, init='pca', random_state=0) 178 | t0 = time() 179 | result = tsne.fit_transform(data) 180 | fig = plot_embedding(result, label, 181 | 'T-sne embedding of test set images (time %.2fs)' 182 | % (time() - t0)) 183 | plt.show(fig) 184 | 185 | 186 | # test_img_set_t_sne(imgTest, labe_img_Tst) 187 | 188 | 189 | # 测试集,图文特征的t-SNE 190 | def test_multi_feature(): 191 | f = open(os.path.join(parent_path, "data/multi_feature.pkl"), "rb") 192 | multi_feature = pickle.load(f) 193 | feature_t_sne(multi_feature, labe_img_Tst) 194 | 195 | 196 | # test_multi_feature() 197 | 198 | def test_img_feature(): 199 | f = open(os.path.join(parent_path, "data/img_feature.pkl"), "rb") 200 | img_feature = pickle.load(f) 201 | feature_t_sne(img_feature, labe_img_Tst) 202 | 203 | 204 | # test_img_feature() 205 | 206 | def test_txt_feature(): 207 | f = open(os.path.join(parent_path, "data/txt_feature.pkl"), "rb") 208 | txt_feature = pickle.load(f) 209 | feature_t_sne(txt_feature, labe_img_Tst) 210 | 211 | 212 | # test_txt_feature() 213 | 214 | def test_txt_lstm_feature(): 215 | f = open(os.path.join(parent_path, "data/lstm_feature.pkl"), "rb") 216 | lstm_feature = pickle.load(f) 217 | feature_t_sne(lstm_feature, labe_img_Tst) 218 | 219 | 220 | # test_txt_lstm_feature() 221 | 222 | 223 | # 词云展示 224 | def test_all_text_word_wordcloud(): 225 | print("开始绘制词云...") 226 | # from scipy.misc import imread 227 | # mask = imread("fivestart.jpg")自动图片的显示 228 | txt_path = os.path.join(parent_path, "data/txt") 229 | big_txt = "" 230 | for i in os.listdir(txt_path): 231 | Path = os.path.join(txt_path, i) 232 | f = open(Path, "r", encoding="utf-8") 233 | txt = f.read() 234 | big_txt += txt 235 | ls = jieba.lcut(big_txt) 236 | txt = " ".join(ls) 237 | wc = wordcloud.WordCloud(font_path="msyh.ttc", \ 238 | width=2500, height=1500, background_color="white", max_words=1000) 239 | wc.generate(txt) 240 | # w.to_file("grwordcloud.png") 241 | plt.imshow(wc) 242 | plt.axis("off") 243 | plt.show() 244 | 245 | 246 | # test_all_text_word_wordcloud() 247 | 248 | def test_txt_word_cloud(text_jieba): 249 | print("开始绘制词云...") 250 | big_txt = "" 251 | for i in text_jieba: 252 | big_txt = big_txt + i + "\n" 253 | ls = jieba.lcut(big_txt) 254 | txt = " ".join(ls) 255 | wc = wordcloud.WordCloud(font_path="msyh.ttc", \ 256 | width=2500, height=1500, background_color="white", max_words=300) 257 | wc.generate(txt) 258 | # w.to_file("grwordcloud.png") 259 | plt.imshow(wc) 260 | plt.axis("off") 261 | plt.show() 262 | # test_txt_word_cloud(text_jieba) 263 | 264 | 265 | def test_set_pie_chart(sizes=test_sizes): 266 | print("开始绘制饼状图...") 267 | patches, l_text, p_text = plt.pie(sizes, labels=list_name, colors=color_name_38, 268 | labeldistance=1.1, autopct='%2.0f%%', shadow=False, 269 | startangle=90, pctdistance=0.8) 270 | 271 | # labeldistance,文本的位置离远点有多远,1.1指1.1倍半径的位置 272 | # autopct,圆里面的文本格式,%3.1f%%表示小数有三位,整数有一位的浮点数 273 | # shadow,饼是否有阴影 274 | # startangle,起始角度,0,表示从0开始逆时针转,为第一块。一般选择从90度开始比较好看 275 | # pctdistance,百分比的text离圆心的距离 276 | # patches, l_texts, p_texts,为了得到饼图的返回值,p_texts饼图内部文本的,l_texts饼图外label的文本 277 | 278 | # 改变文本的大小 279 | # 方法是把每一个text遍历。调用set_size方法设置它的属性 280 | for t in l_text: 281 | t.set_size = 30 282 | for t in p_text: 283 | t.set_size = 20 284 | # 设置x,y轴刻度一致,这样饼图才能是圆的 285 | plt.axis('equal') 286 | plt.legend(loc='upper left', bbox_to_anchor=(-0.1, 1)) 287 | # loc: 表示legend的位置,包括'upper right','upper left','lower right','lower left'等 288 | # bbox_to_anchor: 表示legend距离图形之间的距离,当出现图形与legend重叠时,可使用bbox_to_anchor进行调整legend的位置 289 | # 由两个参数决定,第一个参数为legend距离左边的距离,第二个参数为距离下面的距离 290 | plt.title('Data set pie chart', loc="right", fontsize="xx-large") 291 | plt.grid() 292 | plt.show(True) 293 | # test_set_pie_chart(sizes=all_sizes) 294 | 295 | 296 | # 文本检索mAP最终结果 297 | text_mAP = [0.4092, 0.5894, 0.6357] 298 | img_mAP = [0.4000, 0.5608, 0.6367] 299 | cross_loss = [0.43605, 0.63707, 0.6769] 300 | c_t_mAP = [0.4520, 0.6470, 0.7068] 301 | 302 | 303 | def test_result_Histogram(): 304 | mark = 0.15 305 | width = 0.15 306 | R = ["@1", "@5", "@10"] 307 | Image_only = [0.4092, 0.5894, 0.6357] 308 | Text_only = [0.4000, 0.5608, 0.6367] 309 | cross_loss = [0.4360, 0.6370, 0.6769] 310 | cross_and_triplet_loss = [0.4520, 0.6470, 0.7068] 311 | # 创建分组柱状图,需要自己控制x轴坐标 312 | xticks = np.arange(len(R)) 313 | 314 | fig, ax = plt.subplots(figsize=(10, 9)) 315 | 316 | ax.bar(xticks, Image_only, width=width, label="Text_only", color="royalblue") 317 | ax.bar(xticks + mark, Text_only, width=width, label="Image_only", color="gray") 318 | ax.bar(xticks + 2*mark, cross_loss, width=width, label="cross_loss", color="burlywood") 319 | ax.bar(xticks + 3*mark, cross_and_triplet_loss, width=width, label="cross_and_triplet_loss", color="darkblue") 320 | 321 | # 需要你将每个组的起始坐标写到coordinate中,并且将所有点以列表的形式重新添加到ret中 322 | coordinate = [0.0, 1.0, 2.0] 323 | ret = [[0.400, 0.5608, 0.6367], [0.4092, 0.5894, 0.6357], [0.4360, 0.6370, 0.6769], 324 | [0.4520, 0.6470, 0.7068]] 325 | 326 | for i in range(len(ret[0])): 327 | margin = 0 328 | for j in range(len(ret)): 329 | xy = (coordinate[i] + margin, ret[j][i] * (1 + j / 200)) 330 | s = str(ret[j][i]) 331 | ax.annotate( 332 | s=s, # 要添加的文本 333 | xy=xy, # 将文本添加到哪个位置 334 | fontsize=10, # 标签大小 335 | color="black", # 标签颜色 336 | ha="center", # 水平对齐 337 | va="baseline" # 垂直对齐 338 | ) 339 | margin += mark 340 | # ax.set_title("Grouped Bar plot", fontsize=15) 341 | ax.set_ylabel("mAP") 342 | # ax.set_xlabel("返回样本数") 343 | ax.legend() 344 | ax.set_xticks(xticks + 0.2) 345 | ax.set_xticklabels(R) 346 | plt.show() 347 | # test_result_Histogram() 348 | 349 | # 折线图 350 | def Line_chart(): 351 | R = ["@1", "@5", "@10"] 352 | Image_only = [0.4092, 0.5894, 0.6357] 353 | Text_only = [0.4000, 0.5608, 0.6367] 354 | cross_loss = [0.4360, 0.6370, 0.6769] 355 | cross_and_triplet_loss = [0.4520, 0.6470, 0.7068] 356 | ret = [[0.400, 0.5608, 0.6367], [0.4092, 0.5894, 0.6357], [0.4360, 0.6370, 0.6769], 357 | [0.4520, 0.6470, 0.7068]] 358 | 359 | color = ['red', 'yellow', 'green', 'blue', 'black'] 360 | fig = plt.figure(figsize=(7, 4)) 361 | for i in range(len(ret)): 362 | plt.plot(range(3), ret[i], c=color[i]) 363 | plt.legend = ('upper left') 364 | plt.xlabel('Month') 365 | plt.ylabel('Rate') 366 | plt.title('Rate to Month') 367 | plt.tick_params(axis='both') 368 | plt.show() --------------------------------------------------------------------------------