├── Demo
    ├── Tanul.PNG
    ├── sa.PNG
    └── saving.PNG
├── Humorous Sentence Completion
    ├── config.py
    ├── inference.py
    └── training.py
├── Joke Generation
    ├── Inference.py
    ├── config.py
    └── training.py
└── README.md


/Demo/Tanul.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanulsingh/Humour.ai-Language-model-that-can-crack-Jokes/cee5fddd0939705487b04ae9f96505d81c6e45d6/Demo/Tanul.PNG


--------------------------------------------------------------------------------
/Demo/sa.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanulsingh/Humour.ai-Language-model-that-can-crack-Jokes/cee5fddd0939705487b04ae9f96505d81c6e45d6/Demo/sa.PNG


--------------------------------------------------------------------------------
/Demo/saving.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanulsingh/Humour.ai-Language-model-that-can-crack-Jokes/cee5fddd0939705487b04ae9f96505d81c6e45d6/Demo/saving.PNG


--------------------------------------------------------------------------------
/Humorous Sentence Completion/config.py:
--------------------------------------------------------------------------------
1 | from transformers import GPT2Tokenizer
2 | 
3 | BATCH_SIZE = 16
4 | EPOCHS = 4
5 | LEARNING_RATE = 3e-5
6 | MAX_LEN = 64
7 | TRAIN_PATH = "/content/gdrive/My Drive/shortjokes.csv"
8 | MODEL_FOLDER = "/content/gdrive/My Drive/Colab Notebooks/trained_models"
9 | Tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')


--------------------------------------------------------------------------------
/Humorous Sentence Completion/inference.py:
--------------------------------------------------------------------------------
 1 | # Preliminaries
 2 | import os
 3 | import numpy as np 
 4 | import pandas as pd
 5 | 
 6 | #transformers
 7 | from transformers import GPT2LMHeadModel
 8 | 
 9 | # Pytorch
10 | import torch
11 | import torch.nn as nn
12 | 
13 | #warnings
14 | import warnings
15 | warnings.filterwarnings('ignore')
16 | 
17 | # My Module
18 | import config
19 | 
20 | # HElper Function
21 | def choose_from_top(probs, n=5):
22 |     ind = np.argpartition(probs, -n)[-n:]
23 |     top_prob = probs[ind]
24 |     top_prob = top_prob / np.sum(top_prob) # Normalize
25 |     choice = np.random.choice(n, 1, p = top_prob)
26 |     token_id = ind[choice][0]
27 |     return int(token_id)
28 | 
29 | # Model Loading
30 | model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
31 | special_tokens_dict = {'pad_token': '<PAD>','bos_token':'<soq>','sep_token':'<eoq>'}
32 | num_added_toks = config.Tokenizer.add_special_tokens(special_tokens_dict)
33 | print('We have added', num_added_toks, 'tokens')
34 | model.resize_token_embeddings(len(config.Tokenizer))
35 | 
36 | #loading Model state
37 | models_path = "/kaggle/input/fine-tuning-open-gp-2/trained_models/gpt2_medium_joker_0.pt" # ADD PATH TO YOUR SAVED MODEL HERE
38 | model.load_state_dict(torch.load(models_path))
39 | 
40 | device='cuda'
41 | model.to(device)
42 | 
43 | def predict(start_of_joke,length_of_joke=96,number_of_jokes=2):
44 |     joke_num = 0
45 |     model.eval()
46 |     with torch.no_grad():
47 |         for joke_idx in range(number_of_jokes):
48 |         
49 |             joke_finished = False
50 | 
51 |             cur_ids = torch.tensor(config.Tokenizer.encode(start_of_joke)).unsqueeze(0).to(device)
52 | 
53 |             for i in range(length_of_joke):
54 |                 outputs = model(cur_ids, labels=cur_ids)
55 |                 loss, logits = outputs[:2]
56 |                 softmax_logits = torch.softmax(logits[0,-1], dim=0) #Take the first(from only one in this case) batch and the last predicted embedding
57 |                 if i < 3:
58 |                     n = 20
59 |                 else:
60 |                     n = 3
61 |                 next_token_id = choose_from_top(softmax_logits.to('cpu').numpy(), n=n) #Randomly(from the topN probability distribution) select the next word
62 |                 cur_ids = torch.cat([cur_ids, torch.ones((1,1)).long().to(device) * next_token_id], dim = 1) # Add the last word to the running sequence
63 | 
64 |                 if next_token_id in config.Tokenizer.encode('<|endoftext|>'):
65 |                     joke_finished = True
66 |                     break
67 | 
68 |             
69 |             if joke_finished:
70 |                 
71 |                 joke_num = joke_num + 1
72 |                 
73 |                 output_list = list(cur_ids.squeeze().to('cpu').numpy())
74 |                 output_text = config.Tokenizer.decode(output_list)
75 | 
76 |                 print(output_text+'\n')
77 | 
78 | # Start Predicting
79 | predict("How do you feel",64,1)


--------------------------------------------------------------------------------
/Humorous Sentence Completion/training.py:
--------------------------------------------------------------------------------
  1 | # Preliminaries
  2 | import os
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | #Pytorch
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.utils.data import DataLoader,Dataset
 10 | 
 11 | #Transformers
 12 | from transformers import GPT2Tokenizer, GPT2LMHeadModel
 13 | from transformers import AdamW, WarmUp, get_linear_schedule_with_warmup
 14 | 
 15 | #Warning
 16 | import warnings
 17 | warnings.filterwarnings('ignore')
 18 | 
 19 | #Mymodule
 20 | import config
 21 | 
 22 | # Processing Data
 23 | def process_jokes(raw_fp):
 24 |     df = pd.read_csv(raw_fp)
 25 | 
 26 |     # Append token at the end of each joke to indicate the end of a joke
 27 | 
 28 |     what_jokes = df[df.Joke.str.lower().str.startswith("what")].Joke.str.split("?")
 29 |     how_jokes = df[df.Joke.str.lower().str.startswith("how")].Joke.str.split("?")
 30 |     why_jokes = df[df.Joke.str.lower().str.startswith("why")].Joke.str.split("?")
 31 |     when_jokes = df[df.Joke.str.lower().str.startswith("when")].Joke.str.split("?")
 32 |     where_jokes = df[df.Joke.str.lower().str.startswith("where")].Joke.str.split("?")
 33 | 
 34 |     jokes = []
 35 |     for joke_ in [what_jokes, how_jokes, why_jokes, when_jokes, where_jokes]:
 36 |         joke_df_ = pd.DataFrame(joke_.values.tolist()).iloc[:, :2].dropna()
 37 |         joke_df_.columns = ["questions", "answer"]
 38 |         jokes.append(joke_df_)
 39 | 
 40 |     jokes_df = pd.concat(jokes)
 41 |     jokes_df = (
 42 |         jokes_df[~(jokes_df.answer.isin([""]))].drop_duplicates().reset_index(drop=True)
 43 |     )
 44 | 
 45 |     riddle_jokes_list = (
 46 |         "<soq> " + jokes_df.questions + " <eoq> " + jokes_df.answer + " <|endoftext|>"
 47 |     ).values.tolist()
 48 |     riddle_jokes = "\n".join(riddle_jokes_list)
 49 | 
 50 |     return riddle_jokes_list
 51 | 
 52 | 
 53 | # Creating Custom DataSet
 54 | 
 55 | class Jokesdataset(Dataset):
 56 |   def __init__(self,data,tokenizer):
 57 |     self.data = data
 58 |     self.tokenizer = tokenizer
 59 | 
 60 |   def __len__(self):
 61 |     return len(self.data)
 62 | 
 63 |   def __getitem__(self,idx):
 64 |     joke = self.data[idx]
 65 |   
 66 |     inputs = self.tokenizer.encode_plus(
 67 |             joke,
 68 |             None,
 69 |             add_special_tokens = True,
 70 |             max_length = config.MAX_LEN,
 71 |             pad_to_max_length = True
 72 |         )
 73 | 
 74 |     ids = inputs["input_ids"]
 75 |     mask = inputs["attention_mask"]
 76 | 
 77 |     return {'ids':torch.tensor(ids,dtype=torch.long),
 78 |             'mask': torch.tensor(mask,dtype=torch.long),
 79 |             'target':torch.tensor(ids,dtype=torch.long)}
 80 | 
 81 | 
 82 | # Initializing Model and adding our special Tokens to model vocab
 83 | 
 84 | model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
 85 | special_tokens_dict = {'pad_token': '<PAD>','bos_token':'<soq>','sep_token':'<eoq>'}
 86 | num_added_toks = config.Tokenizer.add_special_tokens(special_tokens_dict)
 87 | print('We have added', num_added_toks, 'tokens')
 88 | model.resize_token_embeddings(len(config.Tokenizer))
 89 | 
 90 | # Training Function
 91 | 
 92 | def train_fn(data_loader, model, optimizer, device, scheduler,epoch):
 93 |   model.train()
 94 |   
 95 |   for bi, d in enumerate(data_loader):
 96 |         ids = d["ids"]
 97 |         mask = d["mask"]
 98 |         labels = d['target']
 99 | 
100 |         ids = ids.to(device, dtype=torch.long)
101 |         mask = mask.to(device, dtype=torch.long)
102 |         labels = labels.to(device,dtype=torch.long)
103 |           
104 |         optimizer.zero_grad()
105 |         outputs = model(
106 |             input_ids =ids,
107 |             attention_mask=mask,
108 |             labels = labels
109 |         )
110 | 
111 |         loss, logits = outputs[:2]                        
112 |         loss.backward()
113 | 
114 |         optimizer.step()
115 |         if scheduler is not None:
116 |                 scheduler.step()
117 | 
118 |         if (bi+1) % 100 == 0:
119 |            print('Epoch [{}/{}], bi[{}/{}], Loss: {:.4f}' 
120 |                    .format(epoch+1, config.EPOCHS, bi+1,len(data_loader), loss.item()))
121 | 
122 | 
123 | #ENGINE
124 | 
125 | def run():
126 |   joke_list = process_jokes(config.TRAIN_PATH)
127 |   
128 |   jokes_dataset = Jokesdataset(joke_list,config.Tokenizer)
129 |   jokes_dataloader = DataLoader(jokes_dataset,
130 |                                 batch_size=config.BATCH_SIZE,
131 |                                 shuffle=True,
132 |                                 num_workers=4)
133 |   
134 |   model.to(device)
135 | 
136 |   num_train_steps = int(len(jokes_dataloader) / config.BATCH_SIZE * config.EPOCHS)
137 | 
138 |   optimizer = AdamW(model.parameters(), lr=config.LEARNING_RATE)
139 |   scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=num_train_steps)
140 | 
141 |   for epoch in range(config.EPOCHS):
142 |         print(f"EPOCH {epoch+1} started" + '=' * 30)
143 |         train_fn(jokes_dataloader, model, optimizer, device, scheduler,epoch=epoch)
144 |         
145 |         models_folder = config.MODEL_FOLDER
146 |         if not os.path.exists(models_folder):
147 |           os.mkdir(models_folder)
148 |         torch.save(model.state_dict(), os.path.join(models_folder, f"gpt2_medium_joker_3.pt"))
149 | 
150 | 
151 | # Begin Training
152 | run()


--------------------------------------------------------------------------------
/Joke Generation/Inference.py:
--------------------------------------------------------------------------------
 1 | # Preliminaries
 2 | import os
 3 | import numpy as np 
 4 | import pandas as pd
 5 | 
 6 | #transformers
 7 | from transformers import GPT2LMHeadModel
 8 | 
 9 | # Pytorch
10 | import torch
11 | import torch.nn as nn
12 | 
13 | #warnings
14 | import warnings
15 | warnings.filterwarnings('ignore')
16 | 
17 | # My Module
18 | import config
19 | 
20 | # HElper Function
21 | def choose_from_top(probs, n=5):
22 |     ind = np.argpartition(probs, -n)[-n:]
23 |     top_prob = probs[ind]
24 |     top_prob = top_prob / np.sum(top_prob) # Normalize
25 |     choice = np.random.choice(n, 1, p = top_prob)
26 |     token_id = ind[choice][0]
27 |     return int(token_id)
28 | 
29 | # Model Loading
30 | model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
31 | special_tokens_dict = {'pad_token': '<PAD>'}
32 | num_added_toks = config.Tokenizer.add_special_tokens(special_tokens_dict)
33 | print('We have added', num_added_toks, 'tokens')
34 | model.resize_token_embeddings(len(config.Tokenizer)) 
35 | 
36 | #loading Model State
37 | models_path = "/kaggle/input/fine-tuning-open-gp-2/trained_models/gpt2_medium_joker_0.pt" # ADD PATH TO YOUR SAVED MODEL HERE
38 | model.load_state_dict(torch.load(models_path))
39 | 
40 | device='cuda'
41 | model.to(device)
42 | 
43 | def predict(length_of_joke,number_of_jokes):
44 |     joke_num = 0
45 |     model.eval()
46 |     with torch.no_grad():
47 |         for joke_idx in range(number_of_jokes):
48 |         
49 |             joke_finished = False
50 | 
51 |             cur_ids = torch.tensor(config.Tokenizer.encode('JOKE')).unsqueeze(0).to(device)
52 | 
53 |             for i in range(length_of_joke):
54 |                 outputs = model(cur_ids, labels=cur_ids)
55 |                 loss, logits = outputs[:2]
56 |                 softmax_logits = torch.softmax(logits[0,-1], dim=0) #Take the first(from only one in this case) batch and the last predicted embedding
57 |                 if i < 3:
58 |                     n = 20
59 |                 else:
60 |                     n = 3
61 |                 next_token_id = choose_from_top(softmax_logits.to('cpu').numpy(), n=n) #Randomly(from the topN probability distribution) select the next word
62 |                 cur_ids = torch.cat([cur_ids, torch.ones((1,1)).long().to(device) * next_token_id], dim = 1) # Add the last word to the running sequence
63 | 
64 |                 if next_token_id in config.Tokenizer.encode('<|endoftext|>'):
65 |                     joke_finished = True
66 |                     break
67 | 
68 |             
69 |             if joke_finished:
70 |                 
71 |                 joke_num = joke_num + 1
72 |                 
73 |                 output_list = list(cur_ids.squeeze().to('cpu').numpy())
74 |                 output_text = config.Tokenizer.decode(output_list)
75 | 
76 |                 print(output_text+'\n')
77 | 
78 | # Start Predicting
79 | predict(64,5)


--------------------------------------------------------------------------------
/Joke Generation/config.py:
--------------------------------------------------------------------------------
 1 | from transformers import GPT2Tokenizer
 2 | 
 3 | 
 4 | BATCH_SIZE = 16
 5 | EPOCHS = 4
 6 | LEARNING_RATE = 2e-5
 7 | MAX_LEN = 64
 8 | TRAIN_PATH = "/kaggle/input/short-jokes/shortjokes.csv"  #ADD PATH TO YOUR DATASET HERE
 9 | MODEL_FOLDER = "/kaggle/working/trained_models"  # ADD PATH TO WHERE YOU WANT TO SAVE YOUR MODEL
10 | Tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')


--------------------------------------------------------------------------------
/Joke Generation/training.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This file containing the training code for Joke Generation Model
  3 | '''
  4 | # Preliminaries
  5 | import os
  6 | import pandas as pd
  7 | import numpy as np
  8 | 
  9 | #Pytorch
 10 | import torch
 11 | import torch.nn as nn
 12 | from torch.utils.data import DataLoader,Dataset
 13 | 
 14 | # Transformers
 15 | from transformers import GPT2LMHeadModel
 16 | from transformers import AdamW, WarmUp, get_linear_schedule_with_warmup
 17 | 
 18 | #Warnings
 19 | import warnings
 20 | warnings.filterwarnings('ignore')
 21 | 
 22 | # MyModule
 23 | import config
 24 | 
 25 | # INITIALIZING MODEL AND ADDING THE PAD TOKEN
 26 | model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
 27 | special_tokens_dict = {'pad_token': '<PAD>'}
 28 | num_added_toks = config.Tokenizer.add_special_tokens(special_tokens_dict)
 29 | print('We have added', num_added_toks, 'tokens')
 30 | model.resize_token_embeddings(len(config.Tokenizer))
 31 | 
 32 | 
 33 | # Dataset
 34 | class Jokesdataset(Dataset):
 35 |     '''
 36 |     This class builds the custom dataset for Dataloader
 37 |     '''
 38 |   def __init__(self,data,tokenizer):
 39 |     self.data = data
 40 |     self.tokenizer = tokenizer
 41 |     self.eos_tok = "<|endoftext|>"
 42 |     #Adding JOKE: at the start and EOS TOKEN at end
 43 |     self.data['Joke'] = self.data['Joke'].apply(lambda x: "JOKE:" + str(x) + self.eos_tok)
 44 | 
 45 |   def __len__(self):
 46 |     return len(self.data)
 47 | 
 48 |   def __getitem__(self,idx):
 49 |     joke = self.data.iloc[idx,1]
 50 |     
 51 |     inputs = self.tokenizer.encode_plus(
 52 |             joke,
 53 |             None,
 54 |             add_special_tokens = True,
 55 |             max_length = config.MAX_LEN,
 56 |             pad_to_max_length = True
 57 |         )
 58 | 
 59 |     ids = inputs["input_ids"]
 60 |     mask = inputs["attention_mask"]
 61 | 
 62 |     return {'ids':torch.tensor(ids,dtype=torch.long),
 63 |             'mask': torch.tensor(mask,dtype=torch.long),
 64 |             'target':torch.tensor(ids,dtype=torch.long)}
 65 | 
 66 | 
 67 | # Training Function
 68 | 
 69 | def train_fn(data_loader, model, optimizer, device, scheduler,epoch):
 70 |     model.train()
 71 |     for bi, d in enumerate(data_loader):
 72 |         ids = d["ids"]
 73 |         mask = d["mask"]
 74 |         labels = d['target']
 75 | 
 76 |         ids = ids.to(device, dtype=torch.long)
 77 |         mask = mask.to(device, dtype=torch.long)
 78 |         labels = labels.to(device,dtype=torch.long)
 79 |           
 80 |         optimizer.zero_grad()
 81 |         outputs = model(
 82 |             input_ids =ids,
 83 |             attention_mask=mask,
 84 |             labels = labels
 85 |         )
 86 | 
 87 |         loss, logits = outputs[:2]                        
 88 |         loss.backward()
 89 | 
 90 |         optimizer.step()
 91 |         if scheduler is not None:
 92 |                 scheduler.step()
 93 | 
 94 |         if (bi+1) % 500 == 0:
 95 |             print('Epoch [{}/{}], bi[{}/{}], Loss: {:.4f}' 
 96 |                    .format(epoch+1, config.EPOCHS, bi+1,len(data_loader), loss.item()))
 97 | 
 98 | device = 'cuda' # Selecting Device
 99 | 
100 | #ENGINE
101 | 
102 | def run():
103 |   jokes = pd.read_csv(config.TRAIN_PATH) #add the path to your Dataset in config File
104 | 
105 |   jokes_dataset = Jokesdataset(jokes,config.Tokenizer)
106 |   jokes_dataloader = DataLoader(jokes_dataset,
107 |                                 batch_size=config.BATCH_SIZE,
108 |                                 shuffle=True,
109 |                                 num_workers=4)
110 |   
111 |   model.to(device)
112 | 
113 |   num_train_steps = int(len(jokes_dataloader) / config.BATCH_SIZE * config.EPOCHS)
114 | 
115 |   optimizer = AdamW(model.parameters(), lr=config.LEARNING_RATE)
116 |   scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=num_train_steps)
117 | 
118 |   for epoch in range(config.EPOCHS):
119 |         print(f"EPOCH {epoch+1} started" + '=' * 30)
120 |         train_fn(jokes_dataloader, model, optimizer, device, scheduler,epoch=epoch)
121 |         
122 |         models_folder = config.MODEL_FOLDER 
123 |         if not os.path.exists(models_folder):
124 |           os.mkdir(models_folder)
125 |         # Saving Model after each Epoch
126 |         torch.save(model.state_dict(), os.path.join(models_folder, f"gpt2_joke_generator{epoch}.pt"))
127 | 
128 | 
129 | # BEGINNING TRAINING
130 | run()
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Humour.ai
 2 | 
 3 | I have seen a lot of people do cool projects in Computer Vision (the more hyped one) but hardly I have ever seen something good in NLP. After learning about transformers, I thought I should do something in NLP. I have fine tuned GPT-2 with a language model head on short jokes scrapped from reddit.
 4 | 
 5 | **Humor.ai tries to complete the sentences in a humourous way given some input words**
 6 | 
 7 | I tested my model on unseen words and sentences and got some really cool and surprising results
 8 | 
 9 | The first one is really hilarious given the fact that model doesn't know my name 😂😂 Language Model that can make you Laugh
10 | 
11 | ![Image description](https://github.com/tanulsingh/Humour.ai/blob/master/Demo/sa.PNG)
12 | 
13 | ![Image description](https://github.com/tanulsingh/Humour.ai/blob/master/Demo/Tanul.PNG)
14 | 
15 | ![Image description](https://github.com/tanulsingh/Humour.ai/blob/master/Demo/saving.PNG)
16 | 
17 | 
18 | # Data
19 | 
20 | The first challenge for any Machine Learning project is getting the data that would do the task. Fortunately I didn't have to do a  lot in getting the data , I found this awesome dataset on [Kaggle]( https://www.kaggle.com/abhinavmoudgil95/short-jokes) . It consists of short Jokes scrapped from reddit in well laid DataFrame
21 | 
22 | # Pre-Processing
23 | 
24 | Open GPT-2 is a transformer type architecture which uses the decoder of transformers . It is well known for it's language modelling tasks and thus I used it to create Humour.ai 
25 | 
26 | **There are two ways in which data can be presented to the model, depending on the objective you want to achieve**
27 | 
28 | * Joke generator 
29 | * Humorous Sentence Completion
30 | 
31 | Let's look at these two seperately
32 | 
33 | ### Joke Generation
34 | 
35 | In this task the model simply tries to generate jokes, given the length of joke and number of jokes you want it to generate.
36 | Here we append 'JOKE:' at the start of every joke in our dataframe and '<|endoftext|>' at the end of each joke which tells our model that our joke has ended.
37 | At the time of inference , we simply provide number of jokes and length of each joke and our model will print out jokes based on what it has learned
38 | 
39 | ### Humorous Sentence Completion
40 | 
41 | This is something new , a simple tweak to above mentioned task . In this our model tries to complete a sentence in a humorous way given any input word or words it has never seen before.
42 | 
43 | For this task , I took only the Jokes in our dataset which were question,answer types and started with Why,When,How,etc. Then processed the data in this format<br><br>
44 | <|soq|> question <|sep|> answer <|endoftext|> 
45 | 
46 |  It looks like an input to Question answering system , only the whole string is treated as one string , instead of getting different token_type_ids for Questions and Asnwers
47 |  
48 |  # Model
49 |  
50 |  I have used HuggingFace Library for GPT-2 Model and the whole code is written in Pytorch. I will be more than happy to share if someone takes this model and writes its equivalent in Keras/TF (that would be a good exercise) .The modelling and inference are easy to understand and self-explanatory if one reads the HuggingFace Docs.
51 |  
52 |  # HyperParameters
53 |  
54 |  I have tested two batch_sizes and two learning rates , the later works  better.It takes about 5 hours to train the first model for second task(Humorous Sentence Completion) on GPU's and  
55 |  
56 | | Task | Batch_Size | MAX_LEN | EPOCHS | Learning Rate| Train Time On GPU's | Train Time on TPU's|
57 | |----------| ------------- | ------------- |------------- | ------------- | ---------|-----------|
58 | |Humorous Sentence Completion|  32 | 64  | 4  | 3e-5  |4.5 hours| 2.5 hours|
59 | |Humorous Sentence Completition| 16  | 64  | 4  | 2e-5  | 5.5 hours | 3 hours|
60 | |Joke Generation | 32  | 64  | 4  | 3e-5  | 6.5 hours | 2.5 hours|
61 | |Joke Generation | 16  | 64  | 4  | 2e-5  | 7.5 hours | 3 hours|
62 | 
63 |  
64 |  # End Notes
65 |  
66 | * Feel Free to Fork, Experiment and play with the model . I have uploaded the code for the different tasks in different folders . 
67 |  * **I will also be uploading trained weights so that anyone can load it and play with the model by just running the inference file**
68 |  * I will be uploading the codde for taining on TPU's soon
69 |  
70 | 


--------------------------------------------------------------------------------