├── README.md ├── requirements.txt ├── Program_to_create_Google_forms ├── Modified_Program_to_create_Google_forms ├── Multimodal_baseline_Functions.py ├── WebScrapping.py └── BiLSTM_VGG16.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Multimodal Meme Classification: Identifying Offensive Content in Image and Text 2 | 3 | If you are using the code or the dataset for your research work then please cite our paper below: 4 | 5 | @inproceedings{suryawanshi-etal-2020-MultiOFF, 6 | 7 | title = "Multimodal Meme Dataset (MultiOFF) for Identifying Offensive Content in Image and Text", 8 | 9 | author = "Suryawanshi, Shardul and Chakravarthi, Bharathi Raja and Arcan, Mihael and Buitelaar, Paul, 10 | 11 | booktitle = "Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying ({TRAC}-2020)", 12 | 13 | month = May, 14 | 15 | year = "2020", 16 | 17 | publisher = "Association for Computational Linguistics",} 18 | 19 | This is a document that involves step by step instructions to execute the code 20 | 21 | (Pre-requisite: Conda/python environment should have packages mentioned in requirement.txt file before execution 22 | Glove embedding of 50d has been used can be dowloaded from "http://nlp.stanford.edu/data/glove.6B.zip") 23 | 24 | --> Use google drive link to access the data "https://drive.google.com/drive/folders/1hKLOtpVmF45IoBmJPwojgq6XraLtHmV6?usp=sharing" 25 | 26 | --> Split dataset has train, test and validation data 27 | 28 | --> Labelled Image has memes belonging to each of the above dataset 29 | 30 | --> This data needs to be placed and directory location needs to be changed while reading the data in main code 31 | 32 | --> Once done with the setup mentioned above, one can execute the code in the sequence mentioned as below: 33 | 34 | --> Stacked_LSTM_VGG16.ipynb 35 | --> BiLSTM_VGG16.ipynb 36 | --> CNN_VGG16.ipynb 37 | --> LR_NB_DNN.ipynb 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | astor==0.8.0 3 | attrs==19.1.0 4 | backcall==0.1.0 5 | bleach==3.1.0 6 | certifi==2019.6.16 7 | colorama==0.4.1 8 | cycler==0.10.0 9 | decorator==4.4.0 10 | defusedxml==0.6.0 11 | entrypoints==0.3 12 | gast==0.2.2 13 | google-pasta==0.1.7 14 | grpcio==1.16.1 15 | h5py==2.9.0 16 | ipykernel==5.1.2 17 | ipython==7.7.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.5.1 20 | jedi==0.15.1 21 | Jinja2==2.10.1 22 | joblib==0.13.2 23 | jsonschema==3.0.2 24 | jupyter==1.0.0 25 | jupyter-client==5.3.1 26 | jupyter-console==6.0.0 27 | jupyter-core==4.5.0 28 | Keras==2.2.4 29 | Keras-Applications==1.0.8 30 | Keras-Preprocessing==1.1.0 31 | kiwisolver==1.1.0 32 | Markdown==3.1.1 33 | MarkupSafe==1.1.1 34 | matplotlib==3.1.1 35 | mistune==0.8.4 36 | mkl-fft==1.0.14 37 | mkl-random==1.0.2 38 | mkl-service==2.0.2 39 | nbconvert==5.6.0 40 | nbformat==4.4.0 41 | nltk==3.4.4 42 | notebook==6.0.0 43 | numpy==1.17.0 44 | pandas==0.25.0 45 | pandocfilters==1.4.2 46 | parso==0.5.1 47 | pickleshare==0.7.5 48 | Pillow==6.1.0 49 | prometheus-client==0.7.1 50 | prompt-toolkit==2.0.9 51 | protobuf==3.9.1 52 | pydot==1.4.1 53 | Pygments==2.4.2 54 | pyparsing==2.4.2 55 | pyreadline==2.1 56 | pyrsistent==0.15.4 57 | python-dateutil==2.8.0 58 | pytz==2019.2 59 | pywinpty==0.5.5 60 | PyYAML==5.1.2 61 | pyzmq==18.1.0 62 | qtconsole==4.5.3 63 | scikit-learn==0.21.3 64 | scipy==1.3.1 65 | seaborn==0.9.0 66 | Send2Trash==1.5.0 67 | six==1.12.0 68 | sklearn==0.0 69 | tensorboard==1.14.0 70 | tensorflow==1.14.0 71 | tensorflow-estimator==1.14.0 72 | termcolor==1.1.0 73 | terminado==0.8.2 74 | testpath==0.4.2 75 | tornado==6.0.3 76 | traitlets==4.3.2 77 | wcwidth==0.1.7 78 | webencodings==0.5.1 79 | Werkzeug==0.15.4 80 | widgetsnbextension==3.5.1 81 | wincertstore==0.2 82 | wrapt==1.11.2 83 | -------------------------------------------------------------------------------- /Program_to_create_Google_forms: -------------------------------------------------------------------------------- 1 | function myFunction() { 2 | // Creating source and destination directory 3 | var folders_with_given_name = DriveApp.getFoldersByName('Trial_JS'); 4 | var destination_folder=DriveApp.getFoldersByName('Trial_JS') 5 | //var df=destination_folder.getId 6 | while (folders_with_given_name.hasNext()) { 7 | var folder = folders_with_given_name.next(); 8 | } 9 | // Checking whether folder is present 10 | while (destination_folder.hasNext()) { 11 | var d_folder = destination_folder.next(); 12 | } 13 | // Checking csv file on source directory 14 | var files = folder.getFilesByType(MimeType.CSV); 15 | 16 | while (files.hasNext()){ 17 | var file = files.next(); 18 | var filename = file.getName(); 19 | 20 | var spreadsheet = SpreadsheetApp.create(filename); 21 | 22 | var driveFile = DriveApp.getFileById(spreadsheet.getId()); 23 | d_folder.addFile(driveFile) 24 | DriveApp.removeFile(driveFile) 25 | 26 | //var sheet= spreadsheet.insertSheet(); 27 | var csvData = Utilities.parseCsv(file.getBlob().getDataAsString(),'\t'); 28 | for(var row_idx=0;row_idx 2: 142 | uni_label = list(set(labels)) 143 | count_label = [labels.count(lab) for lab in uni_label] 144 | lab_idx = count_label.index(max(count_label)) 145 | label = uni_label[lab_idx] 146 | return label 147 | 148 | # Takes in image and preprocess it 149 | def process_input(img): 150 | # Converting image to array 151 | img_data = image.img_to_array(img) 152 | # Adding one more dimension to array 153 | img_data = np.expand_dims(img_data, axis=0) 154 | # 155 | img_data = preprocess_input(img_data) 156 | return(img_data) 157 | 158 | 159 | # In[134]: 160 | 161 | 162 | # Function to generate the data 163 | def image_generator(files,label_file, batch_size = None): 164 | """ 165 | files: list of image paths 166 | label_file: labels of the observations 167 | batch_size: Number of observations to be selected at a time 168 | 169 | return: generator object of image data 170 | """ 171 | idxs = list(range(len(files))) 172 | idx = 0 173 | while True: 174 | batch_paths = files[idx:idx+batch_size] 175 | # batch_paths = np.random.choice(a = files, size = batch_size) 176 | batch_input = [] # Batch input initialization 177 | batch_output = [] # Batch output initialization 178 | 179 | # Read in each input, perform preprocessing and get labels 180 | for input_path in batch_paths: 181 | input = get_input(input_path ) # Load image 182 | output = get_output(input_path,label_file=label_file ) # Load label of the image 183 | input = process_input(img=input) # Process the image 184 | batch_input.append(input[0]) # Append the image 185 | batch_output.append(output) # Append the label 186 | 187 | # Return a tuple of (input,output) to feed the network 188 | batch_x = np.array( batch_input ) 189 | batch_y = np.array( batch_output ) 190 | if len(batch_x) < batch_size: 191 | idx = 0 192 | else: 193 | yield (batch_x, batch_y) 194 | 195 | 196 | # In[10]: 197 | 198 | 199 | def text_generator(padded_seq, y, batch_size=None): 200 | """ 201 | padded_seq: vectorized padded text sequence 202 | y: label of the text 203 | batch_size: Number of observations to be selected at a time 204 | 205 | return: generator object of text data 206 | """ 207 | idxs = list(range(len(y))) 208 | idx = 0 209 | while True: 210 | batch_idxs = idxs[idx:idx+batch_size] 211 | idx = idx + batch_size 212 | # batch_idxs = np.random.choice(a = list(range(len(padded_seq))), size=batch_size) #Selecting the random batch indexes 213 | batch_input = [] # Initializing batch input 214 | batch_output = [] # Initializing batch output 215 | 216 | # Traversing through the batch indexes 217 | for batch_idx in batch_idxs: 218 | input = padded_seq[batch_idx] # selecting padded sequences from the batch 219 | output = y[batch_idx] # Selecting label 220 | batch_input.append(input) # Appending the input (text vector) 221 | batch_output.append(output) # Appending the label 222 | 223 | # Return a tuple of (input,output) to feed the network 224 | batch_x = np.array( batch_input ) 225 | batch_y = np.array( batch_output ) 226 | if len(batch_x) < batch_size: 227 | idx = 0 228 | else: 229 | yield (batch_x, batch_y) 230 | 231 | 232 | # In[147]: 233 | 234 | 235 | #def img_text_generator(files, padded_seq, y, batch_size=None): 236 | # """ 237 | # padded_seq: vectorized padded text sequence 238 | # y: label of the text 239 | # batch_size: Number of observations to be selected at a time 240 | # 241 | # return: generator object of text data 242 | # """ 243 | # idxs = list(range(len(padded_seq))) 244 | # idx = 0 245 | # while True: 246 | # batch_idxs = idxs[idx:idx+batch_size] 247 | ## batch_idxs = np.random.choice(a = list(range(len(padded_seq))), size=batch_size) #Selecting the random batch indexes 248 | # batch_input_txt = [] # Initializing batch input text 249 | # batch_input_img = [] # Initializing batch input image 250 | # batch_output = [] # Initializing batch output 251 | # 252 | # # Traversing through the batch indexes 253 | # for batch_idx in batch_idxs: 254 | # input_txt = padded_seq[batch_idx] # selecting padded sequences from the batch 255 | # output = y[batch_idx] # Selecting label 256 | # input_img = get_input(files[batch_idx]) 257 | # input_img = process_input(input_img) 258 | # batch_input_txt.append(input_txt) # Appending the input (text vector) 259 | # batch_input_img.append(input_img[0]) 260 | # batch_output.append(output) # Appending the label 261 | # 262 | # # Return a tuple of (input,output) to feed the network 263 | # batch_x1 = np.array( batch_input_img ) 264 | # batch_x2 = np.array( batch_input_txt ) 265 | # batch_y = np.array( batch_output ) 266 | # if (len(batch_x1) < batch_size): 267 | # idx = 0 268 | # else: 269 | # yield ([batch_x1, batch_x2], batch_y) 270 | 271 | 272 | #def image_generator(files,label_file, batch_size = None): 273 | # while True: 274 | # # Select files (paths/indices) for the batch 275 | # batch_paths = np.random.choice(a = files, 276 | # size = batch_size) 277 | # batch_input = [] 278 | # batch_output = [] 279 | # 280 | # # Read in each input, perform preprocessing and get labels 281 | # for input_path in batch_paths: 282 | # input = get_input(input_path ) 283 | # output = get_output(input_path,label_file=label_file ) 284 | ## print(output) 285 | # input = process_input(img=input) 286 | # batch_input.append(input[0]) 287 | # batch_output.append(output) 288 | # # Return a tuple of (input,output) to feed the network 289 | # batch_x = np.array( batch_input ) 290 | # batch_y = np.array( batch_output ) 291 | # 292 | # yield( batch_x, batch_y ) 293 | 294 | def img_text_generator(files, padded_seq, y, batch_size=None): 295 | """ 296 | padded_seq: vectorized padded text sequence 297 | y: label of the text 298 | batch_size: Number of observations to be selected at a time 299 | 300 | return: generator object of text data 301 | """ 302 | while True: 303 | batch_idxs = np.random.choice(a = list(range(len(padded_seq))), size=batch_size) #Selecting the random batch indexes 304 | batch_input_txt = [] # Initializing batch input text 305 | batch_input_img = [] # Initializing batch input image 306 | batch_output = [] # Initializing batch output 307 | 308 | # Traversing through the batch indexes 309 | for batch_idx in batch_idxs: 310 | input_txt = padded_seq[batch_idx] # selecting padded sequences from the batch 311 | output = y[batch_idx] # Selecting label 312 | input_img = get_input(files[batch_idx]) 313 | input_img = process_input(input_img) 314 | batch_input_txt.append(input_txt) # Appending the input (text vector) 315 | batch_input_img.append(input_img[0]) 316 | batch_output.append(output) # Appending the label 317 | 318 | # Return a tuple of (input,output) to feed the network 319 | batch_x1 = np.array( batch_input_img ) 320 | batch_x2 = np.array( batch_input_txt ) 321 | batch_y = np.array( batch_output ) 322 | yield ([batch_x1, batch_x2], batch_y) 323 | -------------------------------------------------------------------------------- /WebScrapping.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[29]: 5 | 6 | 7 | #Importing necessary libraries 8 | import urllib, re 9 | #Selenium libraries 10 | from selenium import webdriver 11 | import os 12 | import urllib.request 13 | 14 | 15 | # In[125]: 16 | 17 | 18 | #Defining function to check if image is present on url 19 | def url_status(url): 20 | try: 21 | r = urllib.request.urlopen(url) 22 | return r.code 23 | except urllib.request.HTTPError as e: 24 | r = e 25 | return r.code 26 | except urllib.request.URLError as e: 27 | r=e 28 | return e.args 29 | 30 | 31 | # In[99]: 32 | 33 | 34 | # Testing 35 | # Uncomment only if needs to be tested 36 | # url_status('http://imgur.com/a/IazT5') 37 | 38 | 39 | # In[3]: 40 | 41 | 42 | # Function to get image urls from imgur 43 | def scrap_imgur(url, filename): 44 | url += '.png' 45 | filename += '.png' 46 | work_dir = os.getcwd() + '\\Meme Data\\imgur\\' 47 | # urllib.request.urlretrieve(url, work_dir + filename) 48 | return url 49 | 50 | 51 | # In[4]: 52 | 53 | 54 | # Testing 55 | # Uncomment if needs to be tested 56 | # scrap_imgur('http://imgur.com/LgPKrP1', 'filename') 57 | 58 | 59 | # In[5]: 60 | 61 | 62 | # Importing headless selenium driver to deal FB images 63 | from selenium.webdriver.chrome.options import Options 64 | from selenium import webdriver 65 | import pandas as pd 66 | 67 | 68 | # In[6]: 69 | 70 | 71 | # Headless drivers 72 | chrome_options = Options() 73 | chrome_options.add_argument("--headless") 74 | chrome_options.add_argument("--window-size=1920x1080") 75 | driver = webdriver.Chrome(options=chrome_options) 76 | 77 | 78 | # In[7]: 79 | 80 | 81 | #Function to scrap FB images 82 | def scrap_fb(url, filename): 83 | driver.get(url) 84 | images = driver.find_elements_by_class_name("scaledImageFitWidth") 85 | # Importing image url from page 86 | filename += '.png' 87 | work_dir = os.getcwd() + '\\Meme Data\\FB\\' 88 | try: 89 | url = images[1].get_attribute('data-src') 90 | # urllib.request.urlretrieve(url, work_dir + filename) 91 | # Avoiding index error 92 | except IndexError: 93 | url = 'null' 94 | # urllib.request.urlretrieve(url, work_dir + filename) 95 | return url 96 | 97 | 98 | # In[8]: 99 | 100 | 101 | # Testing 102 | # Uncomment if needs to tested 103 | # scrap_fb('https://facebook.com/1667496210185013/posts/1757866207814679', 'filename') 104 | 105 | 106 | # In[9]: 107 | 108 | 109 | # Importing beautiful soup to take out twitter data 110 | from bs4 import BeautifulSoup as Soup 111 | from urllib.request import urlopen as ure 112 | 113 | 114 | # In[10]: 115 | 116 | 117 | # Function to scrap images from twitter url 118 | def scrap_twitter(url, filename): 119 | soup = ure(url) 120 | temp_soup = Soup(soup) 121 | try: 122 | url = temp_soup.find_all('div', {'class':"AdaptiveMedia-photoContainer"})[0].find('img').get('src') 123 | except IndexError: 124 | url = 'null' 125 | #Importing image url from page 126 | filename += '.png' 127 | work_dir = os.getcwd() + '\\Meme Data\\Twitter\\' 128 | # urllib.request.urlretrieve(url, work_dir + filename) 129 | return url 130 | 131 | 132 | # In[11]: 133 | 134 | 135 | # Importing random for random sampling 136 | import random 137 | # Importing numpy to carry out array operations 138 | import numpy as np 139 | 140 | 141 | # In[12]: 142 | 143 | 144 | # Importing re for regex 145 | import re 146 | 147 | 148 | # In[13]: 149 | 150 | 151 | # Removing longer captions from the data 152 | def count_lines(line): 153 | split_sent = line.split('\\n') 154 | match_words = [re.match(' [0-9]*:[0-9]* PM',i) for i in split_sent] 155 | boo_list = [i is None for i in match_words] 156 | PreList = [a*b for a,b in zip(split_sent,boo_list)] 157 | PreList = [i for i in PreList if i != ''] 158 | return len(PreList) 159 | 160 | 161 | # In[15]: 162 | 163 | 164 | # Function to process dataframe 165 | def preprocess_data(dirct): 166 | # df_data = pd.read_csv(dirct, engine = 'python', sep = '\t') 167 | df_data = pd.read_csv(dirct, engine = 'python') 168 | # Selecting required columns 169 | df_data = df_data[['id','link','caption', 'network']] 170 | # Removing data related to instagram 171 | df_data = df_data[df_data['network'] != 'instagram'] 172 | # Creating empty column named status to keep check on the status of the urls i.e. modified 173 | df_data['status'] = "" 174 | # Creating cap_len column to store number of lines in an observation 175 | df_data['cap_len'] = [count_lines(sent) for sent in df_data['caption']] 176 | # Subsetting dataframe as per cap_len 177 | df_data = df_data[df_data['cap_len'] < 20] 178 | # Taking out all the id 179 | id_list = df_data['id'] 180 | # Making list of ids 181 | list_id = [ID for ID in id_list] 182 | random.seed(99) 183 | if len(id_list) > 500: 184 | # Taking out 500 random ids 185 | rand_ids = random.sample(list_id, 500) 186 | else: 187 | rand_ids = list_id 188 | # Creating new data frame according to ids 189 | df_new = pd.DataFrame() 190 | for i in range(len(rand_ids)): 191 | df_new = df_new.append(df_data[df_data['id'] == rand_ids[i]]) 192 | # Checking url 193 | true_urls = [(df_new.index[i], link) for i,link in enumerate(df_new['link']) if url_status(link) in (200, 401)] 194 | # Empty DataFrame to append the rows with working urls 195 | df_final = pd.DataFrame() 196 | # Comparing the url index of working urls with preprocessed dataframe earlier 197 | for i in range(len(true_urls)): 198 | df_final = df_final.append(df_new[df_new.index == true_urls[i][0]]) 199 | return df_final 200 | 201 | 202 | # In[17]: 203 | 204 | 205 | # Testing 206 | # Uncomment if needs to be tested 207 | # test_df = preprocess_data("E:\\MSc DA\\Sem 2\\Project\\2016electionmemes\\Feel_the_Bern.csv") 208 | 209 | 210 | # In[21]: 211 | 212 | 213 | # Storing base directory in variable 214 | base_dir = 'E:\\MSc DA\\Sem 2\\Project\\2016electionmemes' 215 | 216 | 217 | # In[22]: 218 | 219 | 220 | # Creating list of csv files 221 | for root, dirs, files in os.walk(base_dir): 222 | paths_dir = [root + '\\' + name for name in files if name.endswith((".csv"))] 223 | 224 | 225 | # In[24]: 226 | 227 | 228 | # Testing 229 | # Uncomment if needs to be tested 230 | # paths_dir 231 | 232 | 233 | # In[26]: 234 | 235 | 236 | # Function to create url collection 237 | def url_collection(DF): 238 | # As index is not aligned 239 | df_idx = DF.index 240 | # Looping over the DataFrame to replace the existing urls with image urls 241 | for i in range(len(DF)): 242 | # Reading out ID as file name which later could be used to save the image with same name 243 | filename = str(DF.iloc[[i]]['id'][df_idx[i]]) 244 | # Storing url in 'url' to provide it to function that extracts image url from the url provided 245 | url = DF.iloc[[i]]['link'][df_idx[i]] 246 | status = DF['status'][df_idx[i]] 247 | # Putting if condition to avoid rewriting the url 248 | # if (not(url.endswith('.png'))): 249 | if status != 'modified': 250 | # Replacing the 'imgur' url with image url 251 | if (DF.iloc[[i]]['network'][df_idx[i]] == 'imgur'): 252 | DF.at[df_idx[i], 'link'] = scrap_imgur(url, filename) 253 | DF['status'][df_idx[i]] = 'modified' 254 | # Replacing the 'Facebook' url with image url 255 | elif (DF.iloc[[i]]['network'][df_idx[i]] == 'facebook'): 256 | DF.at[df_idx[i], 'link'] = scrap_fb(url, filename) 257 | DF['status'][df_idx[i]] = 'modified' 258 | # Replacing the 'Twitter' url with image url 259 | else: 260 | DF.at[df_idx[i], 'link'] = scrap_twitter(url, filename) 261 | DF['status'][df_idx[i]] = 'modified' 262 | else: 263 | DF.at[df_idx[i], 'link'] = url 264 | return DF 265 | 266 | 267 | # In[27]: 268 | 269 | 270 | # Storing images on local and creating working csv with image urls 271 | def creating_op(dirct): 272 | # Processing Dataframe 273 | processed_df = preprocess_data(dirct) 274 | # Collecting true urls 275 | refined_df = url_collection(processed_df) 276 | # Removing the null urls 277 | refined_df = refined_df[refined_df['link'] != 'null'] 278 | # Defining ouptup directory 279 | op_dir = dirct.replace('2016electionmemes\\', '2016electionmemes\\Refined\\') 280 | # Writing file at above location 281 | refined_df.to_csv(op_dir, sep='\t', encoding='utf-8') 282 | 283 | 284 | # In[202]: 285 | 286 | 287 | # Uncomment only if needs to be run 288 | # for path in paths_dir: 289 | # creating_op(path) 290 | 291 | 292 | # In[63]: 293 | 294 | 295 | # Creating output directory 296 | base_dir = "E:\\MSc DA\\Sem 2\\Project\\2016electionmemes\\Refined" 297 | for root, dirs, files in os.walk(base_dir): 298 | op_dir = [root + '\\' + name for name in files if name.endswith((".csv"))] 299 | 300 | 301 | # In[64]: 302 | 303 | 304 | count_per_file_op = [len(pd.read_csv(j, engine = 'python', sep='\t')) for j in op_dir] 305 | # len(pd.read_csv(op_dir[0], engine='python', sep='\t')) 306 | 307 | 308 | # In[203]: 309 | 310 | 311 | # Printing the max number of tokens 312 | # sum(count_per_file_op) 313 | 314 | 315 | # In[67]: 316 | 317 | 318 | # Taking in all the dataframes in single list 319 | combined_DF = [pd.read_csv(i, sep= '\t', encoding = 'utf-8') for i in op_dir] 320 | 321 | 322 | # In[204]: 323 | 324 | 325 | # Checking if all the files are part of the list 326 | # len(combined_DF) 327 | 328 | 329 | # In[69]: 330 | 331 | 332 | # appending all the lengths 333 | Caption_len = [] 334 | for DF in combined_DF: 335 | Caption_len.append([count_lines(i) for i in DF['caption']]) 336 | 337 | 338 | # In[72]: 339 | 340 | 341 | # taking guess of lengths in the data 342 | New_cap_len = [i for i in Caption_len if i != []] 343 | # Maximum caption lengths in each file 344 | [max(cap_len) for cap_len in New_cap_len] 345 | 346 | 347 | # In[73]: 348 | 349 | 350 | # concatenating all the dataframes 351 | con_Df = pd.concat(combined_DF) 352 | # con_Df['cap_len'] = [count_lines(i) for i in con_Df['caption']] 353 | 354 | 355 | # In[205]: 356 | 357 | 358 | # Number of rows in new Dataframe 359 | # Keeping caption lenght lower than 20 360 | len(con_Df[con_Df['cap_len'] < 20]) 361 | 362 | 363 | # In[126]: 364 | 365 | 366 | # Adding new column URL_status to the existing dataframe to store status of URL (200,404, 401) 367 | con_Df['URL_status'] = [url_status(con_Df.iloc[i]['link']) for i in range(len(con_Df['link']))] 368 | 369 | 370 | # In[165]: 371 | 372 | 373 | # url_status(con_Df['link'][0]) 374 | con_Df = con_Df[con_Df['URL_status'] != 404] 375 | 376 | 377 | # In[167]: 378 | 379 | 380 | # Writing csv output 381 | con_Df.to_csv("E:\MSc DA\\Sem 2\\Project\\2016electionmemes\\Refined\\Combined_Df.csv", sep='\t', encoding='utf-8') 382 | 383 | 384 | # In[143]: 385 | 386 | 387 | # Defining function to make list of dataframe 388 | def chunks(DF, n): 389 | n = max(1, n) 390 | return (DF.iloc[i:i+n, :] for i in range(0, len(DF), n)) 391 | 392 | 393 | # In[157]: 394 | 395 | 396 | # List of dataframe by using chunks() function 397 | list_con_Df = [i for i in chunks(con_Df,30)] 398 | 399 | 400 | # In[168]: 401 | 402 | 403 | # Writing csvs in the form 404 | for i in range(len(list_con_Df)): 405 | Filename = 'Memes_Data_Survey_'+ str(i) + '.csv' 406 | list_con_Df[i].to_csv('E:\\MSc DA\\Sem 2\\Project\\2016electionmemes\\Form csvs\\' + Filename, sep='\t', encoding='utf-8') 407 | 408 | 409 | # In[182]: 410 | 411 | 412 | # Creating directory of the forms 413 | form_dir = 'E:\\MSc DA\\Sem 2\\Project\\2016electionmemes\\Form csvs\\Forms\\' 414 | 415 | 416 | # In[208]: 417 | 418 | 419 | # form_dir 420 | 421 | 422 | # In[199]: 423 | 424 | 425 | # Creating csv compatible for google forms 426 | def comp_DF(DF_list): 427 | for i in range(len(DF_list)): 428 | with open(form_dir + 'Memes_Data_Survey_' + str(i) + '.csv','w',encoding='utf-8') as k: 429 | for index, row in DF_list[i].iterrows(): 430 | if (index % 10 )==0: 431 | print('IMAGE\t Choose the option\t',row.iloc[3],'\t', 432 | row.iloc[2]+'\t','offensive\t Non-offensiv', file=k) 433 | # Adding Page to limit the number of images on the page 434 | print('PAGE',file=k) 435 | else: 436 | print('IMAGE\t Choose the option\t',row.iloc[3],'\t', 437 | row.iloc[2]+'\t','offensive\t Non-offensiv', file=k) 438 | 439 | 440 | # In[201]: 441 | 442 | 443 | # Using function created above 444 | # Uncomment only if needs to executed 445 | # comp_DF(list_con_Df) 446 | 447 | -------------------------------------------------------------------------------- /BiLSTM_VGG16.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "# Importing necessary libraries\n", 20 | "import keras\n", 21 | "import h5py\n", 22 | "from keras import optimizers\n", 23 | "from keras.models import load_model\n", 24 | "from keras.layers import Bidirectional\n", 25 | "from Multimodal_baseline_Functions import *\n", 26 | "from keras.layers.core import Reshape, Dropout\n", 27 | "from keras.utils.vis_utils import plot_model\n", 28 | "import os\n", 29 | "# import keras_metrics\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "from keras.layers import Conv1D, MaxPooling1D, Flatten, GlobalAveragePooling3D\n", 32 | "from keras import regularizers\n", 33 | "import seaborn as sns\n", 34 | "import matplotlib.pyplot as plt \n", 35 | "from sklearn.metrics import confusion_matrix\n", 36 | "from keras import regularizers \n", 37 | "from keras.applications.inception_v3 import InceptionV3" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "class_weight = {1: 1.4,\n", 47 | " 0: 1.}" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "GLOVE_DIR = \"E:\\MSc DA\\Sem 2\\Project\\Code and Docmentation\\glove.6B\"\n", 57 | "EMBEDDING_DIM = 50" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Defining model with Adam optimizer\n", 67 | "adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)\n", 68 | "sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)\n", 69 | "adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def Image_model(base_model):\n", 79 | " # Freezing all the trainable layers\n", 80 | " for layer in base_model.layers:\n", 81 | " layer.trainable = False\n", 82 | "\n", 83 | " # Creating output layer\n", 84 | " x = base_model.output\n", 85 | " # Adding pooling layer before the output\n", 86 | " x = GlobalAveragePooling2D()(x) \n", 87 | " return x" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "def read_data(file_name):\n", 97 | " #Opening file\n", 98 | " with open(file_name,'r', encoding=\"utf8\") as f:\n", 99 | " #Creating empty set and dictonary for vocab and word respectively\n", 100 | " word_vocab = set() \n", 101 | " word2vector = {}\n", 102 | " #Iterating over each line of file\n", 103 | " for line in f:\n", 104 | " #Spliting lines\n", 105 | " line_ = line.strip() \n", 106 | " #Splitting words\n", 107 | " words_Vec = line_.split() \n", 108 | " word_vocab.add(words_Vec[0])\n", 109 | " word2vector[words_Vec[0]] = np.array(words_Vec[1:],dtype=float)\n", 110 | " print(\"Total Words in DataSet:\",len(word_vocab))\n", 111 | " return word_vocab,word2vector" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "# Dividing data in test, train, validation\n", 121 | "training_DF, testing_DF, validation_DF = preprocess_text(Training_path,Validation_path, Testing_path)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Processing image and text for each set\n", 131 | "# Creating train, test and validation image path\n", 132 | "train_img_path = create_img_path(training_DF,'image_name', img_dir)\n", 133 | "test_img_path = create_img_path(testing_DF,'image_name', img_dir)\n", 134 | "val_img_path = create_img_path(validation_DF,'image_name', img_dir)\n", 135 | "\n", 136 | "# Processing the text\n", 137 | "training_DF['sentence'] = training_DF['sentence'].apply(clean_text)\n", 138 | "testing_DF['sentence'] = testing_DF['sentence'].apply(clean_text)\n", 139 | "validation_DF['sentence'] = validation_DF['sentence'].apply(clean_text)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 9, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "# Vectorising text\n", 149 | "# process the whole observation into single list\n", 150 | "train_text_list=list(training_DF['sentence'])\n", 151 | "test_text_list = list(testing_DF['sentence'])\n", 152 | "val_text_list = list(validation_DF['sentence'])\n", 153 | "\n", 154 | "# Creating vectors for train, test, validation\n", 155 | "tokenizer = Tokenizer(num_words=1000)\n", 156 | "tokenizer.fit_on_texts(train_text_list)\n", 157 | "sequences_train = tokenizer.texts_to_sequences(train_text_list)\n", 158 | "sequences_test = tokenizer.texts_to_sequences(test_text_list)\n", 159 | "sequences_val = tokenizer.texts_to_sequences(val_text_list)\n", 160 | "\n", 161 | "x_train = preprocessing.sequence.pad_sequences(sequences_train, maxlen=maxlen)\n", 162 | "x_test = preprocessing.sequence.pad_sequences(sequences_test, maxlen=maxlen)\n", 163 | "x_val = preprocessing.sequence.pad_sequences(sequences_val, maxlen=maxlen)\n", 164 | "\n", 165 | "# encoding all the labels \n", 166 | "y_test = testing_DF['label']\n", 167 | "y_train = training_DF['label']\n", 168 | "y_val = validation_DF['label']" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 10, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "# Creating train, test, val, generator for meme\n", 178 | "img_txt_gen_train = img_text_generator(train_img_path, x_train, y_train, batch_size=32)\n", 179 | "img_txt_gen_test = img_text_generator(test_img_path, x_test, y_test, batch_size=1)\n", 180 | "img_txt_gen_val = img_text_generator(val_img_path, x_val, y_val, batch_size=1)\n", 181 | "\n", 182 | "# Creating train, test, val, generator for text\n", 183 | "txt_gen_train = text_generator(x_train, y_train, batch_size=32)\n", 184 | "txt_gen_test = text_generator(x_test, y_test, batch_size=1)\n", 185 | "txt_gen_val = text_generator(x_val, y_val, batch_size=1)\n", 186 | "\n", 187 | "# Creating train, test, val, generator for image\n", 188 | "img_gen_train = image_generator(train_img_path, training_DF, batch_size=32)\n", 189 | "img_gen_test = image_generator(test_img_path, testing_DF, batch_size=1)\n", 190 | "img_gen_val = image_generator(val_img_path, validation_DF, batch_size=1)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 11, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Total Words in DataSet: 400000\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "vocab, w2v = read_data(os.path.join(GLOVE_DIR, \"glove.6B.50d.txt\"))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 12, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "word_index = tokenizer.word_index\n", 217 | "num_tokens = len(word_index)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 13, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "#Creating embeddding weight matrix\n", 227 | "embedding_matrix = np.zeros((num_tokens + 1, EMBEDDING_DIM))\n", 228 | "\n", 229 | "for word, i in word_index.items():\n", 230 | " embedding_vector = w2v.get(word)\n", 231 | " if embedding_vector is not None:\n", 232 | " embedding_matrix[i] = embedding_vector" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 14, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stderr", 242 | "output_type": "stream", 243 | "text": [ 244 | "WARNING: Logging before flag parsing goes to stderr.\n", 245 | "W0819 19:54:07.820795 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", 246 | "\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "#Creating embedded layer using embedded matrix as weight matrix\n", 252 | "embedding_layer = Embedding(num_tokens + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable = False)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 15, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stderr", 262 | "output_type": "stream", 263 | "text": [ 264 | "W0819 19:54:07.921015 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", 265 | "\n", 266 | "W0819 19:54:07.923027 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", 267 | "\n", 268 | "W0819 19:54:07.952036 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n", 269 | "\n", 270 | "W0819 19:54:07.953037 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", 271 | "\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "# Defining input layer\n", 277 | "main_input = Input(shape=(maxlen,), dtype='int32', name='main_input')\n", 278 | "\n", 279 | "# Defining embedding layer which will encode the input sequence\n", 280 | "embedded_sequences = embedding_layer(main_input)\n", 281 | "# x = Embedding(output_dim=512, input_dim=10000, input_length=maxlen)(main_input)\n", 282 | "\n", 283 | "# A LSTM will transform the vector sequence into a single vector,\n", 284 | "# containing information about the entire sequence\n", 285 | "lstm_out = (Bidirectional(LSTM(32)))(embedded_sequences)\n", 286 | "\n", 287 | "# Output of text model\n", 288 | "txt_out = Dense(1, activation='sigmoid')(lstm_out)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 16, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "txt_model = Model(inputs = [main_input], outputs=txt_out)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 17, 303 | "metadata": { 304 | "scrolled": true 305 | }, 306 | "outputs": [ 307 | { 308 | "name": "stderr", 309 | "output_type": "stream", 310 | "text": [ 311 | "W0819 19:54:08.737271 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", 312 | "\n", 313 | "W0819 19:54:08.748280 8196 deprecation.py:323] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\tensorflow\\python\\ops\\nn_impl.py:180: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", 314 | "Instructions for updating:\n", 315 | "Use tf.where in 2.0, which has the same broadcast rule as np.where\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "txt_model.compile(loss='binary_crossentropy', optimizer=adam, metrics = [\"accuracy\"])" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 18, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "plot_model(txt_model, to_file='BiLSTM_txt_model.png', show_shapes=True, show_layer_names=True)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 19, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "Epoch 1/7\n", 342 | "2/2 [==============================] - 27s 13s/step - loss: 0.8108 - acc: 0.5625 - val_loss: 0.7017 - val_acc: 0.4295\n", 343 | "Epoch 2/7\n", 344 | "2/2 [==============================] - 22s 11s/step - loss: 0.7976 - acc: 0.5156 - val_loss: 0.6986 - val_acc: 0.4497\n", 345 | "Epoch 3/7\n", 346 | "2/2 [==============================] - 23s 11s/step - loss: 0.7674 - acc: 0.6094 - val_loss: 0.6936 - val_acc: 0.4899\n", 347 | "Epoch 4/7\n", 348 | "2/2 [==============================] - 29s 15s/step - loss: 0.8249 - acc: 0.5469 - val_loss: 0.6906 - val_acc: 0.5503\n", 349 | "Epoch 5/7\n", 350 | "2/2 [==============================] - 30s 15s/step - loss: 0.8028 - acc: 0.5938 - val_loss: 0.6889 - val_acc: 0.5503\n", 351 | "Epoch 6/7\n", 352 | "2/2 [==============================] - 27s 13s/step - loss: 0.8064 - acc: 0.5312 - val_loss: 0.6875 - val_acc: 0.5638\n", 353 | "Epoch 7/7\n", 354 | "2/2 [==============================] - 29s 14s/step - loss: 0.8393 - acc: 0.5000 - val_loss: 0.6891 - val_acc: 0.5705\n" 355 | ] 356 | }, 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "" 361 | ] 362 | }, 363 | "execution_count": 19, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "# Training text model\n", 370 | "txt_model.fit_generator(txt_gen_train, epochs=7, validation_steps = 149, steps_per_epoch=2, validation_data=txt_gen_val, shuffle = False, class_weight=class_weight)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 20, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "txt_model.save('BiLSTM_txt_model.h5')" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 21, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "y_pred_txt = (txt_model.predict_generator(txt_gen_test,steps = 149))\n", 389 | "y_pred_txt = np.round(list(itertools.chain(*y_pred_txt)))\n", 390 | "y_true = y_test.values" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 22, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "data": { 400 | "image/png": "\n", 401 | "text/plain": [ 402 | "
" 403 | ] 404 | }, 405 | "metadata": { 406 | "needs_background": "light" 407 | }, 408 | "output_type": "display_data" 409 | } 410 | ], 411 | "source": [ 412 | "labels = [1,0]\n", 413 | "cm = confusion_matrix(y_true, y_pred_txt, labels)\n", 414 | "ax= plt.subplot()\n", 415 | "sns.heatmap(cm, annot=True, ax = ax); #annot=True to annotate cells\n", 416 | "\n", 417 | "# labels, title and ticks\n", 418 | "ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); \n", 419 | "ax.set_title('Confusion Matrix'); \n", 420 | "ax.xaxis.set_ticklabels(['offensive', 'non-offensive']); ax.yaxis.set_ticklabels(['offensive', 'non-offensive']);" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 27, 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "name": "stderr", 430 | "output_type": "stream", 431 | "text": [ 432 | "W0819 19:57:44.541557 8196 deprecation_wrapper.py:119] From f:\\anaconda\\envs\\exp_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", 433 | "\n" 434 | ] 435 | } 436 | ], 437 | "source": [ 438 | "# Loading pretrained image model from previous experiment\n", 439 | "img_model = load_model('VGG16_img_model.h5')" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 29, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "# Compiling model\n", 449 | "img_model.compile(loss='binary_crossentropy', optimizer=adam, metrics = [\"accuracy\"])" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 30, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "# Concatenating the output\n", 459 | "con_layer = keras.layers.concatenate([txt_model.output, img_model.output])\n", 460 | "out = Dense(1,activation='sigmoid')(con_layer)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 31, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "# Defining model input and output\n", 470 | "com_model = Model(inputs = [img_model.input, txt_model.input], outputs=out)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 32, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "# Using Stochastic gradient descent with optimizer\n", 480 | "sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)\n", 481 | "com_model.compile(loss='binary_crossentropy', optimizer=adam, metrics = [\"accuracy\"])" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 33, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "# Plot the model\n", 491 | "plot_model(com_model, to_file='BiLSTM_VGG_mul_model.png', show_shapes=True, show_layer_names=True)" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 34, 497 | "metadata": { 498 | "scrolled": true 499 | }, 500 | "outputs": [ 501 | { 502 | "name": "stdout", 503 | "output_type": "stream", 504 | "text": [ 505 | "Epoch 1/7\n", 506 | "2/2 [==============================] - 259s 129s/step - loss: 0.7969 - acc: 0.4062 - val_loss: 0.6986 - val_acc: 0.4765\n", 507 | "Epoch 2/7\n", 508 | "2/2 [==============================] - 251s 125s/step - loss: 0.7927 - acc: 0.5156 - val_loss: 0.6956 - val_acc: 0.5302\n", 509 | "Epoch 3/7\n", 510 | "2/2 [==============================] - 252s 126s/step - loss: 0.8089 - acc: 0.4688 - val_loss: 0.6950 - val_acc: 0.4497\n", 511 | "Epoch 4/7\n", 512 | "2/2 [==============================] - 253s 127s/step - loss: 0.8190 - acc: 0.4531 - val_loss: 0.6943 - val_acc: 0.5034\n", 513 | "Epoch 5/7\n", 514 | "2/2 [==============================] - 244s 122s/step - loss: 0.7993 - acc: 0.5000 - val_loss: 0.6937 - val_acc: 0.5503\n", 515 | "Epoch 6/7\n", 516 | "2/2 [==============================] - 246s 123s/step - loss: 0.8074 - acc: 0.4844 - val_loss: 0.6953 - val_acc: 0.4966\n", 517 | "Epoch 7/7\n", 518 | "2/2 [==============================] - 251s 125s/step - loss: 0.8170 - acc: 0.6094 - val_loss: 0.6927 - val_acc: 0.5302\n" 519 | ] 520 | }, 521 | { 522 | "data": { 523 | "text/plain": [ 524 | "" 525 | ] 526 | }, 527 | "execution_count": 34, 528 | "metadata": {}, 529 | "output_type": "execute_result" 530 | } 531 | ], 532 | "source": [ 533 | "# Training model\n", 534 | "com_model.fit_generator(img_txt_gen_train, epochs=7, validation_steps = 149, steps_per_epoch=2, validation_data=img_txt_gen_val, shuffle=False, class_weight=class_weight)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 35, 540 | "metadata": {}, 541 | "outputs": [], 542 | "source": [ 543 | "# Saving the text model\n", 544 | "com_model.save('BiLSTM_VGG_mul_model.h5')" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 36, 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "# Predicting the label using combined model\n", 554 | "y_pred_com = (com_model.predict_generator(img_txt_gen_test,steps = 149))\n", 555 | "y_pred_com = np.round(list(itertools.chain(*y_pred_com)))" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 37, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAE4CAYAAAB2a2kiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZxcVZn/8c+3OwthDftEIoGwBBUhKCKCCkYmsosDCCMwbBKRGQT5qYiAAiOO6IAs4zIBBARBwjYiiAGBgCCyhISwhARJZMAwIlsMW5bu5/fHOR2LprurutO36nb19/163VfXvXXrnJPuylOnzj3nuYoIzMysfFoa3QAzM+uaA7SZWUk5QJuZlZQDtJlZSTlAm5mVlAO0mVlJOUDbCpM0QtKvJC2UdM0KlHOQpFv7s22NIOkWSYc2uh028DlADyKSPifpIUmvSXo+B5KP9kPR+wHrA2tHxP59LSQifh4RE/uhPW8jaWdJIen6Tse3zsen1VjOaZKuqHZeROwWEZf1sblmyzlADxKSTgDOBb5DCqYbAj8CPt0PxY8B5kbEsn4oqyh/BXaQtHbFsUOBuf1VgRL/n7J+4zfTICBpDeAM4F8j4vqIeD0ilkbEryLiq/mc4ZLOlbQgb+dKGp6f21nSc5L+n6QXcu/78Pzc6cA3gQNyz/zIzj1NSRvlnuqQvH+YpHmSFkmaL+mgiuP3VLxuB0kP5qGTByXtUPHcNEn/LuneXM6tktbp4dewBPgf4MD8+lbgs8DPO/2uzpP0rKS/SZou6WP5+K7ANyr+nY9UtONMSfcCbwBj87HP5+d/LOnaivLPknS7JNX8B7RBywF6cPgIsBJwQw/nnAxsD4wHtga2A06peP4fgDWADYAjgR9KWjMivkXqlV8dEatGxMU9NUTSKsD5wG4RsRqwAzCzi/PWAm7O564NnAPc3KkH/DngcGA9YBjwlZ7qBn4G/Et+/CngcWBBp3MeJP0O1gKuBK6RtFJE/KbTv3PritccAkwCVgOe6VTe/wO2yh8+HyP97g4N51iwGjhADw5rAy9WGYI4CDgjIl6IiL8Cp5MCT4el+fmlEfFr4DVgXB/b0w5sKWlERDwfEY93cc4ewFMRcXlELIuIq4Angb0qzrkkIuZGxJvAFFJg7VZE/B5YS9I4UqD+WRfnXBERL+U6zwaGU/3feWlEPJ5fs7RTeW8AB5M+YK4Ajo2I56qUZwY4QA8WLwHrdAwxdONdvL3390w+tryMTgH+DWDV3jYkIl4HDgCOBp6XdLOkLWpoT0ebNqjY/78+tOdy4N+AT9DFN4o8jDM7D6u8SvrW0NPQCcCzPT0ZEQ8A8wCRPkjMauIAPTjcB7wF7NPDOQtIF/s6bMg7v/7X6nVg5Yr9f6h8MiKmRsQ/AqNIveILa2hPR5v+3Mc2dbgcOAb4de7dLpeHIE4kjU2vGREjgYWkwArQ3bBEj8MVkv6V1BNfAHyt7023wcYBehCIiIWkC3k/lLSPpJUlDZW0m6Tv5dOuAk6RtG6+2PZN0lfyvpgJfFzShvkC5UkdT0haX9LeeSx6MWmopK2LMn4NbJ6nBg6RdADwXuCmPrYJgIiYD+xEGnPvbDVgGWnGxxBJ3wRWr3j+L8BGvZmpIWlz4NukYY5DgK9J6nEoxqyDA/QgERHnACeQLvz9lfS1/N9IMxsgBZGHgFnAo8DD+Vhf6roNuDqXNZ23B9UW0oWzBcDLpGB5TBdlvATsmc99idTz3DMiXuxLmzqVfU9EdPXtYCpwC2nq3TOkbx2Vwxcdi3BekvRwtXrykNIVwFkR8UhEPEWaCXJ5xwwZs57IF5PNzMrJPejyeTdwJzCbNA3suIrnjgXm5OPfe+dLrYmtBDwAPEL6+5+ej1+cj80CrqUPF26tvNyDLp9ReXuYNCY6nXRxb33SuOkepLHb9YAXGtRGqz8Bq5DG7IcC95A+vJ8A/pbPOYf0nvhuIxpo/a+naVfWGM/nDWARqSe9AXAU6T/e4vycg/PgEqTgDClAD83HOoKzgBFUmVFiA0vhQxySxkjaJT8eIWm1outsIhsB2wD3A5sDH8uP7wI+1LhmWYO0kmbIvADcRnovAFxCmhO+BXBBY5pmRSg0QEs6ijQu9t/50Gj+PmvAerYqcB1wPKmXNARYk7Qc+6ukBQ/O5zC4tJFWS44mLcXfMh8/nLSwZzZpEZA1iULHoCXNJL2R7o+IbfKxRyPi/d2cP4mU0wC1rvHBlpZVCmtbmQ0ZMoQb/+cybr3tLs49bzIAN//qCr73/R9y1933ATBn9r3s+LG9ePHFlxvZ1Lo75l39kR114Nv1uP1Y8sZb3HHh32cwbvrh9zBh0l5MPnLwXT8+/09Xr3BnZemL82oOhkPXGVuXzlHRQxyLI2JJx06eF9rtLyEiJkfEthGx7WANzgAXTj6b2U/+cXlwBvjljVP5xCd2BGCzzcYybNiwQRecB7NV11qNEaunxZlDhw9l3I5b8sK8BawzZv3l57zvkx/kL0/3dfGn0d5W+1YnRV8kvEvSN4ARkv6RtCDhVwXXOaDtuMOHOOTg/Zj16BM89GC6ucipp36XSy79BRddeDYzZ9zOkiVLOeLI4xvcUqun1ddbk4PPPga1tKCWFmbefB+P3zGD4645nZVWHQESC2Y/w5RTLmp0UweuaG90C96h6CGOFlJ6xYmk8dKpwEW1pFocMmwDX422d/AQh3WlX4Y4np9d+xDHqPfUZYij6B70p4GfRURXyXDMzEojStiDLnoMem9grqTLJe1RJd2lmVnjtLfXvlUh6U+SHpU0U9JD+dhakm6T9FT+uWa1cgoN0BFxOLApKcnM54CnJXmQzMzKJ9pr32rziYgYHxHb5v2vA7dHxGbA7Xm/R4X3aCNiqaRbSLM3RpCGPT5fdL1mZr3StrT6OSvm08DO+fFlwDRS/vFuFb1QZVdJlwJ/BPYDLiLlmTAzK5d+HOIgdUhvzTcenpSPrR8RzwPkn+tVK6ToHvRhwC+AL0TE4irnmpk1TG8uElYuqssmR8Tkiv0dI2KBpPWA2yQ92Zc2FRqgI+LAIss3M+s3tfWMgbSoDpjcw/ML8s8XJN1AWlH9F0mjIuJ5SaOoIeFZIUMcku7JPxdJ+lvFtkjS36q93sys7vrpIqGkVTqSwuVbu00EHgNuBA7Npx0K/LJakwrpQUfER/NPZ64zs4Gh/y4Srg/cIAlSjL0yIn4j6UFgiqQjgf8F9q9WUKFDHJI2AZ6LiMWSdga2Ii1cebXIes3Meq0XQxw9iYh5wNZdHH8J+GRvyip6ocp1QJukTUm35tkYuLLgOs3Meq//50GvsKJncbRHxDJJnwHOjYgLJM0ouE4zs97rpx50fyo6QC+V9M+kAfG98rGhBddpZtZrEfVLI1qrooc4Dgc+ApwZEfMlbQxcUXCdZma9N9iGOCLiCeBLFfvz8R2HzayM2pY1ugXvUPQsjh2B04AxuS4BERFji6zXzKzX6ninlFoVPQZ9MfBlYDrphpdmZuVUwnzQRQfohRFxS8F1mJmtuEE4i+NOSd8HrgeWJ0uKiIcLrtfMrHcGYQ/6w/nnthXHAphQcL1mZr0z2HrQEfGJIss3M+svUXzC/l4rOmH/+pIuzndUQdJ7c6IQM7Ny6d+E/f2i6IUqlwJTgXfl/bnA8QXXaWbWeyVcqFJ0gF4nIqYA7QARsQxPtzOzMiphD7roi4SvS1qbdGEQSdsDCwuu08ys9wbhLI4TSHcR2ETSvcC6pJvHmpmVy2BZ6i1p/4i4BngF2AkYR1rmPSciynep1MyshNPsihqDPin/vC4ilkXE4xHxmIOzmZXWIBqDflnSncDGkm7s/GRE7F1QvWZmfTOIxqB3Bz4AXA6cXVAdZmb9p4RDHEUF6Isj4hBJF0bEXQXVYWbWfwZRD/qDksYAB0m6kHSBcLmIeLmges3M+mawzOIAfgL8BhhLygXdQaQ50U7Yb2blMliGOCLifOB8ST8mBeuP56fujohHiqjTzGyFlDBAF73U+0nSTWLXIS1SuVzSsQXXaWbWexG1b3VS9ErCI4HtI+J1AElnAfcBFxRcr5lZ75SwB110gBZvT47URqcLhmZmpTAIA/QlwP2Sbsj7+5BuJGtmVi6DaBYHABFxjqRpwEdJPefDI2JGkXWamfVJHceWa1V0D7rjBrG+SayZldsgHOIwMxsYHKDNzEpqEC31NjMbUGJZ+e7G5wBtZgal7EEXvZLQzGxgaI/atxpIapU0Q9JNef9SSfMlzczb+GpluAdtZgZFXCQ8DpgNrF5x7KsRcW2tBbgHbWYG/XrLK0mjgT2Ai1akSQ7QZmbQ38mSzgW+BnSO5mdKmiXpB5KGVyvEAdrMDGBZW82bpEmSHqrYJnUUI2lP4IWImN6phpOALYAPAWsBJ1ZrksegzcygV7M4ImIyMLmbp3cE9pa0O7ASsLqkKyLi4Pz8YkmXAF+pVo970GZm0G+zOCLipIgYHREbAQcCd0TEwZJGAUgSKXHcY9Wa5B60mRkQxS/1/rmkdUmJ42YCR1d7gQO0mRnUPL+5NyJiGjAtP57Q29c7QJuZQSlXEjpAm5lBmqFRMg7QZmZQyBDHinKANjMDD3GYmZWWe9BmZuVUh2l2veYAbWYGsMwB2sysnDwGbWZWUh6DNjMrp3CANjMrKQdoM7OS8iwOM7OS8iwOM7NyitpuZVVXDtBmZuAxaDOz0nKANjMrJ0+zMzMrKwdoM7NyimUO0GZm5eQetJlZSZVvGrQDtJkZ+CKhmVl5uQdtZlZOvkhoZlZSJczX7wBtZgaUcoijpdoJkv5J0mr58dclTZE0vvimmZnVT7TXvtVL1QANnBYRiyTtAOwFXA38pNhmmZnVWXsvtjqpJUC35Z97Aj+KiOuA4cU1ycys/srYg65lDPp5ST8EdgW2lTSM2gK7mdmA0b6s0S14p1oC7WeBu4A9IuIVYB3g64W2ysys3kK1b3XSbQ9a0uoVu7+pOPYacG/B7TIzq6uBNs3ucSCAyo+Ljv0ANiywXWZmdRXt9esZ16rbAB0R765nQ8zMGmmg9aCXk3QgMDYiviNpNLB+REwvtmlmZvXT3la+HnQtC1X+C/gEcEg+9AaeB21mTSbaVfNWC0mtkmZIuinvbyzpfklPSbo6z4jrUS2zOHaIiC8AbwFExMtA1YLNzAaSiNq3Gh0HzK7YPwv4QURsBrwCHFmtgFoC9FJJLaQLg0ham1KuWjcz67v+7EHnoeA9gIvyvoAJwLX5lMuAfaqVU0uA/iFwHbCupNOBe0ifBGZmTaOfhzjOBb7G3zuzawOvRkTHcpjngA2qFVL1ImFE/EzSdGCXfGj/iHislhaamQ0UvRi6QNIkYFLFockRMTk/tyfwQkRMl7Rzx0u6qrJaPbWmG20FluYCvczbzJpOe1vtoS0H48ndPL0jsLek3YGVgNVJPeqRkobkXvRoYEG1emqZxXEycBXwrlzolZJOqulfYWY2QPRXsqSIOCkiRkfERsCBwB0RcRBwJ7BfPu1Q4JfV2lRLD/pg4IMR8QaApDOB6cB/1PBaM7MBob34HBsnAr+Q9G1gBnBxtRfUEqCf6XTeEGBen5pnZlZSUUCAjohpwLT8eB6wXW9e31OypB+QxpzfAB6XNDXvTyTN5DAzaxoDKhcH0DFT43Hg5orjfyiuOWZmjdGbWRz10lOypKrjI2ZmzaKtF7M46qXqGLSkTYAzgfeSpowAEBGbF9guM7O6KmIMekXV8pFxKXAJaaL1bsAU4BcFtsnMrO4KyMWxwmoJ0CtHxFSAiHg6Ik4hZbczM2sa7aGat3qpZZrd4pzo42lJRwN/BtYrtllmZvVVxiGOWgL0l4FVgS+RxqLXAI4oslEAMzb4QNFV2AC0xYNeH2XFaBtg0+wAiIj788NF/D1pv5lZUxlQPWhJN9BDtqWI+KdCWmRm1gD1HFuuVU896P+qWyvMzBqshOtUelyocns9G2Jm1kgDrQdtZjZoDKgxaDOzwaSty5ueNFbNAVrS8IhYXGRjzMwapb2Eg9C13FFlO0mPAk/l/a0lXVB4y8zM6qgd1bzVSy1Lvc8H9gReAoiIR/BSbzNrMoFq3uqlliGOloh4Jq32Xq6toPaYmTVElVsNNkQtAfpZSdsBIakVOBaYW2yzzMzqq54941rVEqC/SBrm2BD4C/DbfMzMrGksa3QDulBLLo4XSLcONzNrWgOyBy3pQrpYBRkRkwppkZlZA5QwmV1NQxy/rXi8EvAZ4NlimmNm1hj1nD5Xq1qGOK6u3Jd0OXBbYS0yM2uAEq5T6dNS742BMf3dEDOzRlqmAdiDlvQKf/9waQFeBr5eZKPMzOptwPWg870ItybdhxCgPaKe97Q1M6uPMi5U6XGpdw7GN0REW94cnM2sKbWr9q1easnF8YAk38HVzJpaGZMl9XRPwiERsQz4KHCUpKeB1wGROtcO2mbWNMo4PNDTGPQDwAeAferUFjOzhllWvkkcPQZoAUTE03Vqi5lZwwy0HvS6kk7o7smIOKeA9piZNcRAW+rdCqwKJVz/aGbWz8o4za6nAP18RJxRt5aYmTVQfwVoSSsBdwPDSTH22oj4lqRLgZ2AhfnUwyJiZk9lVR2DNjMbDKL/It5iYEJEvCZpKHCPpFvyc1+NiGtrLainAP3JFWmhmdlA0l8J+/OCvtfy7tC89ekaZLcLVSLi5b4UaGY2EEUvtmoktUqaCbwA3BYR9+enzpQ0S9IPJA2vVk4tKwnNzJpeb5Z6S5ok6aGK7W03MMmpMcYDo4HtJG0JnARsAXwIWAs4sVqb+pJu1Mys6fTmImFETAYm13Deq5KmAbtGxH/mw4slXQJ8pdrr3YM2MyMF6Fq3nkhaV9LI/HgEsAvwpKRR+ZhIK7Qfq9Ym96DNzIC2/pvFMQq4TFIrqRM8JSJuknSHpHVJM+RmAkdXK8gB2syM/psHHRGzgG26OD6ht2U5QJuZMfBycZiZDRrtJQzRDtBmZgy8XBxmZoNG+frPDtBmZsDAS9hvZjZoeAzazKykyheeHaDNzABfJDQzKy0PcZiZlVRboxvQBQdoMzPcgzYzK63yhWcHaDMzwBcJzcxKK0rYh3aANjPDPWgzs9Jqcw/azKycPIvDzKykPMRhZlZSvkhoZlZS7kGbmZWUe9BmZiW1LBygzcxKqXzh2QHazAzwNDszs9LyGLSZWUl5FoeZWUm1lTBEO0CbmeEetJlZaYWn2ZmZlZNncZiZlZSHOMzMSsrT7MzMSqotyteHdoA2M8NDHGZmpVXGIY6WRjfAzKwM2omat55IWknSA5IekfS4pNPz8Y0l3S/pKUlXSxpWrU0O0GZmpHnQtW5VLAYmRMTWwHhgV0nbA2cBP4iIzYBXgCOrFeQAbWZG//WgI3kt7w7NWwATgGvz8cuAfaq1yWPQZmb07ywOSa3AdGBT4IfA08CrEbEsn/IcsEG1ctyDNjMjdXFr3SRNkvRQxTbpbWVFtEXEeGA0sB3wnm6q7JF70GZm9G6pd0RMBibXcN6rkqYB2wMjJQ3JvejRwIJqr3cP2syMfp3Fsa6kkfnxCGAXYDZwJ7BfPu1Q4JfV2uQetJkZ/ZrNbhRwWR6HbgGmRMRNkp4AfiHp28AM4OJqBTlAm5nRfwn7I2IWsE0Xx+eRxqNr5gBtZobzQZuZlZbzQZuZlZR70GZmJeUetJlZSZUxm50DtJkZTthvZlZa7R6DNjMrJw9xmJmVlHvQZmYl5R60mVlJuQdtZlZS7dHW6Ca8gwO0mRleqGJmVlpe6m1mVlLuQZuZlZR70GZmJeWl3mZmJeUetJlZSXkM2syspNyDNjMrKa8kNDMrKfegzcxKyrM4zMxKykMcZmYl5XSjVpWGDWXslO+iYUNRaysLb7mXF869klU+shWjvnEEGjqENx/7I8+deD60le8rmRVn4r6HssrKK9PS0kJraytTfno+U+/4HT+6+ArmPfMsV114Llu+Z/NGN3PAcg/aqoolS5n/uZNpf+MtGNLKJtecxWt3P8zo/zye+QefwpL5C1jvywex5r6f5JUptzW6uVZnP73gu6w5co3l+5uOHcO53zmV079/fgNb1RzKeJGwpdENsHdqf+MtADRkCBoyhGhvJ5YsZcn8BQC8ds8M1th1h0Y20Upik402ZOMxoxvdjKbQHu01b/VSaICWtLKkUyVdmPc3k7RnkXU2hZYWNr35PN7z0OW8ds8M3pw5Fw0dwoj3bwrAGrvtyNBR6zS4kVZvkpj05ZP57BHHcs0vf93o5jSdiKh5q5eihzguAaYDH8n7zwHXADcVXO/A1t7OH/c4jpbVVmHMf3+D4ZtvyLPHfo9Rp34eDRvKa7+bQbSV7+4PVqzLf3w26627Ni+98ipHHf8NNh7zbrYd//5GN6tplG+AA1Tkp4GkhyJiW0kzImKbfOyRiNi6m/MnAZPy7uSImFxY4waOb1133XVb7rvvvvtXHJsIfB74bIPaZA02bty40xYtWvS+BQsW7J/3pwFfmTNnzkONbZn1p6LHoJdIGkH+cJK0CbC4u5MjYnJEbJu3wRqc1wVG5scjgF2uvfba8cB6+dhw4ETgJw1omzXIuHHjVhk3btxqHY+BiYsWLeqyo2PNo+ge9ETgZOC9wK3AjsBhETGtsEoHvq2Ay4BW0gfoFEl7R8SdwJ752I+BcxvXRKu3cePGjQVuyLtDgCvnzp37mc033/xM4ALSB/urwMw5c+Z8qkHNtH5WaIAGkLQ2sD0g4A8R8WKhFTahjqGiRrfDysXvi+ZX6EVCSTcCVwE3RsTrRdbV5AbrcI/1zO+LJlf0EMdOwAHAHsADwNXATRHxVmGVmpk1icKHOAAktQITgKOAXSNi9cIrNTMb4ApfSZhncewLHA18iHQBbFCR9CVJsyX9XNJwSb+VNFPSAf1Yx+/7qywrH0lb5PfMDEmbVL6n+rGOMyTt0l/l2YoreojjauDDwG+AKcC0iBImXS2YpCeB3SJivqTtgbMiYqdGt8sGDklfB0ZExLfy/vL3VGNbZoXqzfLG3m7ArkBrkXWUbQNOAB7L2/Gk+cpLgEdJ85f/CCwEZgKbAB8E7iKtuJwKjMrlTAPOIo3dzwU+lo+/Lx+bCcwCNsvHX8s/rwZ2r2jPpaRvMK3A94EH8+u+0Ojf1UDegI2A2cCFwOOkaaQjgPHAH/Lv+AZgzZ7+nl2U+47XA7sD/wf8Gbiz03vqy8AqwE/z33YG8Olc1mHA9aQO0lPA9/Lx1vy+eKyjjIr3yn7AbsCUijbtDPwqP54I3Ac8TFoVvGqj/xbNvBX15p2Qf/5TV1uj/9GF/TJTsH00/4dZNf/H3Qb4E7BOPmdn0oVSgKHA74F18/4BwE/z42nA2fnx7sBv8+MLgIPy42GkXlVlgP4McFnF88/mwDEJOCUfHw48BGzc6N/ZQN1ygF4GjM/7U4CDc2DdKR87Azi3p79nF+V29/rTgK9UnFf5nvoOcHB+PJL0AbBKDtDzgDWAlYBngHfn9+ltFWWNzD87AvQQ4H+BVfLxH+d/2zrA3RXHTwS+2ei/RTNvRU2z2wm4A9iri+eC9KnejD4K3BB5SqGk64GP9XD+OGBL4DZJkHo2z1c83/F7mk4KCJB6LydLGg1cHxFPdSrzFuB8ScNJ32Dujog386KhrSTtl89bA9gM8FfkvpsfETPz4+mkb0QjI+KufOwyUi+zQ1d/z+UkrVHl9d2ZCOwt6St5fyVgw/z49ohYmMt/AhhD6jiMlXQBcDOp979cRCyT9BtgL0nXkmZhfY30//q9wL35/TqM9H60ghQSoCOPk0XE4UWUX2Lqw/mPR8RHunm+Y1l8G/lvFRFXSrqf9J9mqqTPR8QdHS+IiLckTQM+ReqRX1VR17ERMbWXbbTuVaYtaOPvS/Srnb/87ynpEtK3rAXAP/exHQL2jYg5bzsofbiLNg6JiFckbU16j/wrKafLEZ3KvDo/9zLwYEQsUorKt0VEX9tpvVR0utHjJK2u5CJJD+eeXLO6G9gnp1ldhTTc8Lsezp8DrCvpIwCShkp6X08VSBoLzIuI84EbSUvDO/sFcDip994RkKcCX5Q0NJezeW6j9Z+FwCuSOr41HUK6vtCtiDg8IsZHxO65p9ur12dTgWNzAEXSNj2dLGkdoCUirgNOBT7QxWnT8vGjSMEa0tj4jpI2zeWsLMm3cClQ0elGj4iI8yR9ipTs53BSCtJbe37ZwBQRD0u6lHQhCOCiiJiR/990df6SPORwfv56O4SUY+PxHqo5ADhY0lLShaMzujjnVuBnpBWcSzraQvpa/XD+j/xXYJ9e/POsNocCP5G0Mmn8t7ffIvvy+n8nvW9m5b/tn0h5W7qzAXCJpI4O2kmdT4iINkk3kcaxD83H/irpMOCqPIQGcAppzNsKUPQ0u1kRsZWk80hT7G6oTD1qZmbdK3qhynRJt5KuWk+VtBow6OZBm5n1RdE96BbSvM55EfFqzmy3QUTMKqxSM7MmUegYdES0S/oL8F5JvoO4mVkvFJ1u9CzSRa0nSFN8IM2DvrvIes3MmkHRQxxzgK0iotvbXJmZWdeKvkg4j7Sc2QYQSW05c9pjkq7JU776WtbOeboWkvbOSX+6O3ekpGP6UMdpFavoqh7vdM6lFasra6lrI0mP9baNZn1R9LjwG8BMSbdTsaIpIr5UcL22Yt6MiPEAOZ3l0cA5HU/mubaKXmYmjIgbSYtrujMSOAb4Ua9bbNaEiu5B30iaRP97Uv6Bjs0Gjt8Bm+ae42xJPyJlMnu3pImS7ssrRK+RtCqApF0lPSnpHlKCLPLxwyT9V368vqQbJD2Stx2A7wKb5N779/N5X5X0oKRZkk6vKOtkSXMk/ZaU06RHko7K5Twi6bpO3wp2kfQ7SXMl7ZnPb5X0/Yq6v9BFme+T9EBu7yxJm/X+12vWvaJncVymlLB/w855Aqz88syb3UjpKiEFwsMj4pi8XPgUYJeIeF3SicAJkr5HSsE5gZRa9eouigY4H7grIj6jdMedVYGvA1tW9N4nkhI6bUfKN3GjpI8DrwMHknJYDCF9YFT74L8+Ii7M5X4bOJKUGRDSCsudSMmO7sxLmf8FWBgRH8qr5u7Nc/orL9ocDZwXET+XNIyU7Mqs3xQ9iyIZQ3MAAAQKSURBVGMv4D9JWa82ljQeOCMi9i6yXlthIyR1ZGn7HXAx8C7gmYj4Qz6+PV1nNtuClOXtKQBJV5BSnXY2gRQEiYg2YKGkNTudMzFvM/L+qqSAvRopa+AbuY6ehk06bJkD88hcTmXSqCl5uOYpSfPyv6G77H+Vy5qrZRY0WyFFj0GfRur9TAOIiJmSNi64Tltxy8egO+QgXHln9i4zm+UP4f6aGiTgPyLivzvVcXwf6rgU2CciHsn5JHaueK5zWUE32f8kbbT8pCqZBc1WVNFj0Ms6ctFWKP4utVYP3WU2e5L0bWmTfF53qSlvB76YX9sqaXVgEal33GEqcETF2PYGktYjzaP/jKQROX1AV3nHO1sNeF4pm99BnZ7bX1JLbvNYUpbBqtn/VFtmQbM+K7oH/ZikzwGt+QLKl0gXDG2A6y6zWUTMlTQJuFnSi8A9pJsSdHYcMFnSkaRFTF+MiPsk3Zunsd0SEV+V9B7gvtyDf41055CHle53OZN0l5CeUrp2OBW4P5//KG//IJhDSuu5PnB0zqldS/a/WjILmvVZ0QtVVgZOJo3nQeqVfDsi3iqsUjOzJlFIgJZ0eUQcIum4iDiv3yswMxsEigrQT5CmZ91Iuhjztoz1EfFyv1dqZtZkihqD/glp7uxY0vzUygAd+biZmfWgqB70xhExX9KPI+KL/V6BmdkgUNQ0u2vzT99Q0sysj4oa4miR9C1gc0kndH4yIs7p4jVmZlahqB70gcBbpA+A1brYzMysiqLnQe8WEbcUVoGZWRMrOkCvAXwL+Hg+dBcpWVLn5d9mZtZJ0bk4fkrKr/DZvP0NuKTgOs3MmkLRPeiZXWRFe8cxMzN7p6J70G9K+mjHjqQdgTcLrtPMrCkU3YPeGvgZKdm5gJeBQyNiVmGVmpk1iUID9PJKUq5fIuJvhVdmZtYkih7iAJYH5ivrUZeZWbOoS4DONqhjXWZmA149A/SM6qeYmVmHuoxBm5lZ7xV6T8I8re40YEyuS0BEhPNBm5lVUfQ0uyeBL5OS9rd1HI+Ilwqr1MysSRR9V++FTpZkZtY3Rfegvwu0AtcDizuOR8TDhVVqZtYkig7Qd3ZxOCJiQmGVmpk1Cc/iMDMrqULnQUtaQ9I5kh7K29k5R7SZmVXhfNBmZiXlfNBmZiXlfNBmZiVVdA96PHAZKR80wCs4H7SZWU2KDtDDgf2ATYCRwELSNLszCqvUzKxJFL2S8JfAq8DDwJ8LrsvMrKkU3YN+LCK2LKwCM7MmVvRFwt9Len/BdZiZNaWie9BPAJsC80m5ODrSjW5VWKVmZk2i6AA9pqvjEfFMYZWamTUJ5+IwMyupet6T0MzMesEB2syspBygzcxKygHazKykHKDNzErq/wMe1tmS/m5IKAAAAABJRU5ErkJggg==\n", 566 | "text/plain": [ 567 | "
" 568 | ] 569 | }, 570 | "metadata": { 571 | "needs_background": "light" 572 | }, 573 | "output_type": "display_data" 574 | } 575 | ], 576 | "source": [ 577 | "# Block for confusion matrix\n", 578 | "labels = [1,0]\n", 579 | "cm = confusion_matrix(y_true, y_pred_com, labels)\n", 580 | "ax= plt.subplot()\n", 581 | "sns.heatmap(cm, annot=True, ax = ax); #annot=True to annotate cells\n", 582 | "\n", 583 | "# labels, title and ticks\n", 584 | "ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); \n", 585 | "ax.set_title('Confusion Matrix'); \n", 586 | "ax.xaxis.set_ticklabels(['offensive', 'non-offensive']); ax.yaxis.set_ticklabels(['offensive', 'non-offensive']);" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 38, 592 | "metadata": { 593 | "scrolled": true 594 | }, 595 | "outputs": [ 596 | { 597 | "data": { 598 | "image/png": "\n", 599 | "text/plain": [ 600 | "
" 601 | ] 602 | }, 603 | "metadata": { 604 | "needs_background": "light" 605 | }, 606 | "output_type": "display_data" 607 | } 608 | ], 609 | "source": [ 610 | "# training accuracy\n", 611 | "plt.plot(com_model.history.epoch, com_model.history.history['acc'])\n", 612 | "plt.plot(txt_model.history.epoch, txt_model.history.history['acc'])\n", 613 | "plt.gca().legend(('meme model acc', 'image model acc', 'text model acc'))\n", 614 | "plt.xlabel('epoch')\n", 615 | "plt.ylabel('training accuracy')\n", 616 | "plt.show()" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 39, 622 | "metadata": { 623 | "scrolled": true 624 | }, 625 | "outputs": [ 626 | { 627 | "data": { 628 | "image/png": "\n", 629 | "text/plain": [ 630 | "
" 631 | ] 632 | }, 633 | "metadata": { 634 | "needs_background": "light" 635 | }, 636 | "output_type": "display_data" 637 | } 638 | ], 639 | "source": [ 640 | "# Validation Accuracy\n", 641 | "plt.plot(com_model.history.epoch, com_model.history.history['val_acc'])\n", 642 | "plt.plot(txt_model.history.epoch, txt_model.history.history['val_acc'])\n", 643 | "plt.gca().legend(('meme model validation acc', 'image model validation acc', 'text model validation acc'))\n", 644 | "plt.show()" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 40, 650 | "metadata": {}, 651 | "outputs": [ 652 | { 653 | "data": { 654 | "text/plain": [ 655 | "['loss', 'acc']" 656 | ] 657 | }, 658 | "execution_count": 40, 659 | "metadata": {}, 660 | "output_type": "execute_result" 661 | } 662 | ], 663 | "source": [ 664 | "com_model.metrics_names" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": 41, 670 | "metadata": {}, 671 | "outputs": [ 672 | { 673 | "data": { 674 | "text/plain": [ 675 | "[0.706167459487915, 0.0]" 676 | ] 677 | }, 678 | "execution_count": 41, 679 | "metadata": {}, 680 | "output_type": "execute_result" 681 | } 682 | ], 683 | "source": [ 684 | "com_model.evaluate_generator(img_txt_gen_test, steps=5)" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": 42, 690 | "metadata": {}, 691 | "outputs": [ 692 | { 693 | "data": { 694 | "text/plain": [ 695 | "[0.024448825046420097, 1.0]" 696 | ] 697 | }, 698 | "execution_count": 42, 699 | "metadata": {}, 700 | "output_type": "execute_result" 701 | } 702 | ], 703 | "source": [ 704 | "img_model.evaluate_generator(img_gen_test, steps=5)" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 43, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "data": { 714 | "text/plain": [ 715 | "[0.7561434626579284, 0.0]" 716 | ] 717 | }, 718 | "execution_count": 43, 719 | "metadata": {}, 720 | "output_type": "execute_result" 721 | } 722 | ], 723 | "source": [ 724 | "txt_model.evaluate_generator(txt_gen_test, steps=5)" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 44, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "from sklearn.metrics import precision_recall_fscore_support" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 45, 739 | "metadata": {}, 740 | "outputs": [ 741 | { 742 | "data": { 743 | "text/plain": [ 744 | "(array([0.61206897, 0.42424242]),\n", 745 | " array([0.78888889, 0.23728814]),\n", 746 | " array([0.68932039, 0.30434783]),\n", 747 | " array([90, 59], dtype=int64))" 748 | ] 749 | }, 750 | "execution_count": 45, 751 | "metadata": {}, 752 | "output_type": "execute_result" 753 | } 754 | ], 755 | "source": [ 756 | "# for txt\n", 757 | "precision_recall_fscore_support(y_true, y_pred_txt, beta=1.0, labels=None, pos_label=1, average=None)" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 46, 763 | "metadata": {}, 764 | "outputs": [ 765 | { 766 | "data": { 767 | "text/plain": [ 768 | "(array([0.60714286, 0.4 ]),\n", 769 | " array([0.56666667, 0.44067797]),\n", 770 | " array([0.5862069 , 0.41935484]),\n", 771 | " array([90, 59], dtype=int64))" 772 | ] 773 | }, 774 | "execution_count": 46, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "# com model\n", 781 | "precision_recall_fscore_support(y_true, y_pred_com, beta=1.0, labels=None, pos_label=1, average=None)" 782 | ] 783 | } 784 | ], 785 | "metadata": { 786 | "kernelspec": { 787 | "display_name": "Python 3", 788 | "language": "python", 789 | "name": "python3" 790 | }, 791 | "language_info": { 792 | "codemirror_mode": { 793 | "name": "ipython", 794 | "version": 3 795 | }, 796 | "file_extension": ".py", 797 | "mimetype": "text/x-python", 798 | "name": "python", 799 | "nbconvert_exporter": "python", 800 | "pygments_lexer": "ipython3", 801 | "version": "3.7.4" 802 | } 803 | }, 804 | "nbformat": 4, 805 | "nbformat_minor": 2 806 | } 807 | --------------------------------------------------------------------------------