├── LanguageModelling ├── README.md ├── codemix_data │ ├── lan_model_1.py │ ├── preprocess_1.py │ ├── preprocess_2.py │ ├── preprocess_3.py │ ├── test_1.txt │ ├── test_2.txt │ ├── test_3.txt │ ├── test_data.json │ ├── train_1.txt │ ├── train_2.txt │ ├── train_3.txt │ └── train_data.json ├── lan_model.py ├── lan_model_1.py ├── monolingual_data │ ├── lan_model_1.py │ ├── perplexity.png │ ├── preprocess.py │ └── preprocess1.py ├── perplexity.png ├── preprocess.py └── preprocess1.py ├── Perplexity_CMI ├── 1_train_data.json ├── 2_1_preprocess.py ├── 2_2_CMI.py ├── 2_3_dataToChunks.py ├── 3_CMI_values.txt ├── 3_lang_tagged_text.txt ├── 3_text.txt ├── CMIv2programme │ ├── cmi_corpus_reader.py │ ├── cmi_stats_v2.py │ └── cmi_tagsets.py ├── CMUToolkit.tar.gz ├── Chunks │ ├── 0_10.txt │ ├── 10_20.txt │ ├── 20_30.txt │ ├── 30_40.txt │ ├── 40_50.txt │ ├── 50_60.txt │ ├── 60_70.txt │ └── 70_80.txt ├── Commands_Readme ├── Observation ├── README.md ├── a.json └── codemix_train.txt └── README.md /LanguageModelling/README.md: -------------------------------------------------------------------------------- 1 | ## Language Modelling. 2 | - Found Unigram, Bigram, Trigram perplexities on codemix data. 3 | - Results can be seen [here](https://github.com/Abhishekmamidi123/Natural-Language-Processing/blob/master/LanguageModelling/perplexity.png) 4 | - Data used: [Twitter Codemix data](https://github.com/Abhishekmamidi123/Natural-Language-Processing/tree/master/LanguageModelling/codemix_data) 5 | -------------------------------------------------------------------------------- /LanguageModelling/codemix_data/lan_model_1.py: -------------------------------------------------------------------------------- 1 | # Goal: 2 | # Apply different language models like unigram, bigram, trigram on the given twitter corpus and codemixed corpus. 3 | # Find CMI and Perplexity for each of the above models. 4 | # Compare perplexity and analyse the best among them. 5 | 6 | # Steps: 7 | # Preprocess the data (Apply tokenization and stemming). 8 | # Store all the words(V) in a dictionary with unique id's and their frequencies in a list. 9 | # Create a V*V matrix with all bigram totalLiness. 10 | # Apply add-one smoothing on the matrix. 11 | # For every sentence in the corpus, find probabilities P( word(n)|word(n-1) ) of each word in the sequence and thereby find the perplexity of each sentence. 12 | # Take the average of all the perplexities. 13 | # Analyse the perplexities of different models. 14 | 15 | # Variables: 16 | # wordDict: Dictionary which stores all the words. 17 | # index: To give unique id's to every word in the dictionary. 18 | # V: Vocabulary size. 19 | 20 | # Code 21 | import numpy as np 22 | import nltk 23 | import os 24 | import sys 25 | # from nltk.stem import PorterStemmer 26 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 27 | import json 28 | import re 29 | # porter = PorterStemmer() 30 | 31 | # Put words in dictionary 32 | index=0 # Index of word in dictionary 33 | totalLines=0 # Total number of lines 34 | tokens=0 # Total number of words in the corpus 35 | V=0 36 | V_tri=0 37 | matrix={} 38 | triMatrix = {} 39 | wordDict = {} 40 | bigram_perplex=[] 41 | secondDict={} 42 | 43 | def get_count(): 44 | global index 45 | return index 46 | 47 | def createBiMatrix(): 48 | global matrix 49 | matrix = {} 50 | 51 | def createTriMatrix(): 52 | global triMatrix 53 | triMatrix = {} 54 | 55 | def putInDict(filename): 56 | global totalLines, tokens, index 57 | with open(filename) as file: 58 | for line in file: 59 | totalLines+=1 60 | # line = "My name is Abhishek and the name of the boy who was standing there is not Abhishek" 61 | listOfWords = wordpunct_tokenize(line) 62 | tokens = tokens + len(listOfWords) 63 | for word in listOfWords: 64 | # word = porter.stem(word) 65 | if word in wordDict: 66 | wordDict[word][1]+=1 67 | else: 68 | wordDict[word] = [index, 1] 69 | index+=1 70 | # print wordDict 71 | 72 | def unigramPerplexity(): 73 | global filename, totalLines, tokens, index 74 | with open(filename) as file: 75 | perplexities=[] 76 | for line in file: 77 | listOfWords = wordpunct_tokenize(line) 78 | l = len(listOfWords) 79 | prob=[] 80 | for i in range(l): 81 | word=listOfWords[i] 82 | prob.append(wordDict[word][1]/float(tokens)) 83 | per=1 84 | for p in prob: 85 | per = per*p 86 | if per!=0: 87 | per=1/float(per) 88 | perplexities.append(pow(per, 1/float(l))) 89 | print perplexities 90 | PP=0 91 | for i in perplexities: 92 | PP=PP+i 93 | print PP 94 | PP=PP/float(len(perplexities)) 95 | return PP 96 | 97 | def createBigram(): 98 | global filename, totalLines, tokens, index 99 | with open(filename) as file: 100 | for line in file: 101 | listOfWords = wordpunct_tokenize(line) 102 | l = len(listOfWords) 103 | if l!=0: 104 | word = listOfWords[0] 105 | key = str(["",word]) 106 | if key not in matrix: 107 | matrix[key] = 1 108 | else: 109 | matrix[key] += 1 110 | # matrix[V][wordDict[word][0]]+=1 111 | for i in range(l-1): 112 | word = listOfWords[i] 113 | next_word = listOfWords[i+1] 114 | key = str([word,next_word]) 115 | if key not in matrix: 116 | matrix[key] = 1 117 | else: 118 | matrix[key] += 1 119 | # matrix[wordDict[word][0]][wordDict[next_word][0]]+=1 120 | # print wordDict 121 | # print matrix 122 | 123 | def bigramPerplexity(): 124 | global filename, totalLines, tokens, index 125 | with open(filename) as file: 126 | perplexities=[] 127 | for line in file: 128 | listOfWords = wordpunct_tokenize(line) 129 | l = len(listOfWords) 130 | prob=[] 131 | if l!=0: 132 | word=listOfWords[0] 133 | prob.append(matrix[str(["", word])]/float(totalLines)) 134 | # prob.append(matrix[V][wordDict[word][0]]/float(totalLines)) 135 | for i in range(l-1): 136 | word=listOfWords[i] 137 | next_word = listOfWords[i+1] 138 | prob.append(matrix[str([word, next_word])]/float(wordDict[word][1])) 139 | # prob.append(matrix[wordDict[word][0]][wordDict[next_word][0]]/float(wordDict[word][1])) 140 | # Find perplexity 141 | # print prob 142 | per=1 143 | for p in prob: 144 | per = per*p 145 | if per!=0: 146 | per=1/float(per) 147 | perplexities.append(pow(per, 1/float(l))) 148 | # print perplexities 149 | PP=0 150 | for i in perplexities: 151 | PP=PP+i 152 | PP=PP/float(len(perplexities)) 153 | return PP 154 | 155 | def trigramDict(): 156 | global filename, totalLines, tokens, index 157 | index=0 158 | with open(filename) as file: 159 | for line in file: 160 | listOfWords = wordpunct_tokenize(line) 161 | l = len(listOfWords) 162 | if l!=0: 163 | word=listOfWords[0] 164 | if word in secondDict: 165 | secondDict[str(word)]+=1 166 | else: 167 | secondDict[str(word)]=1 168 | if l>1: 169 | word1=listOfWords[1] 170 | s=str([word,word1]) 171 | if s in secondDict: 172 | secondDict[s]+=1 173 | else: 174 | secondDict[s]=1 175 | # for i in range(l-1): 176 | # s = str([listOfWords[i],listOfWords[i+1]]) 177 | # if s in trigram_dict: 178 | # trigram_dict[s][1]+=1 179 | # else: 180 | # trigram_dict[s]=[index, 1] 181 | # index+=1 182 | # print "\n" 183 | # print trigram_dict 184 | # print "\n" 185 | # print secondDict 186 | # print "\n" 187 | 188 | def createTrigram(): 189 | global filename, totalLines, tokens, index 190 | with open(filename) as file: 191 | for line in file: 192 | listOfWords = wordpunct_tokenize(line) 193 | l = len(listOfWords) 194 | for i in range(l-2): 195 | word1 = listOfWords[i] 196 | word2 = listOfWords[i+1] 197 | word3 = listOfWords[i+2] 198 | key = str([word1,word2,word3]) 199 | if key not in triMatrix: 200 | triMatrix[key] = 1 201 | else: 202 | triMatrix[key] += 1 203 | # matrix[trigram_dict[s][0]][wordDict[word3][0]]+=1 204 | # print triMatrix 205 | 206 | def trigramPerplexity(): 207 | global filename, totalLines, tokens, index 208 | with open(filename) as file: 209 | perplexities=[] 210 | for line in file: 211 | listOfWords = wordpunct_tokenize(line) 212 | l = len(listOfWords) 213 | prob=[] 214 | if l!=0: 215 | word=listOfWords[0] 216 | prob.append(secondDict[str(word)]/float(totalLines)) 217 | if l>1: 218 | word1=listOfWords[1] 219 | prob.append(secondDict[str([word,word1])]/float(totalLines)) 220 | for i in range(l-2): 221 | word1 = listOfWords[i] 222 | word2 = listOfWords[i+1] 223 | word3 = listOfWords[i+2] 224 | s = str([word1,word2]) 225 | num = triMatrix[str([word1,word2,word3])] 226 | # num = matrix[trigram_dict[s][0]][wordDict[word][0]] 227 | den = matrix[s] 228 | prob.append(float(num)/float(den)) 229 | per=1 230 | # print prob 231 | for p in prob: 232 | per = per*p 233 | if per!=0: 234 | per=1/float(per) 235 | perplexities.append(pow(per, 1/float(l))) 236 | PP=0 237 | # print perplexities 238 | for i in perplexities: 239 | PP=PP+i 240 | PP=PP/float(len(perplexities)) 241 | return PP 242 | 243 | ######################################################################################### 244 | 245 | # Main 246 | filename=sys.argv[1] 247 | putInDict(filename) 248 | V=get_count() 249 | # Unigram 250 | unigramPP = unigramPerplexity() 251 | print "Unigram Perplexity = "+str(unigramPP) 252 | # Bigram 253 | createBiMatrix() 254 | createBigram() 255 | bigramPP = bigramPerplexity() 256 | print "Bigram Perplexity = "+str(bigramPP) 257 | print "===========================================================" 258 | # Trigram 259 | index=0 260 | trigramDict() 261 | V_tri=get_count() 262 | createTriMatrix() 263 | createTrigram() 264 | trigramPP = trigramPerplexity() 265 | print "Trigram Perplexity = "+str(trigramPP) 266 | print "Found perplexity" 267 | print "Done." 268 | 269 | # Just for printing 270 | #for word in wordDict: 271 | # print word, wordDict[word] 272 | -------------------------------------------------------------------------------- /LanguageModelling/codemix_data/preprocess_1.py: -------------------------------------------------------------------------------- 1 | # encoding=utf8 2 | import json 3 | import sys 4 | reload(sys) 5 | sys.setdefaultencoding('utf8') 6 | import preprocessor as p 7 | f = open("train_1.txt", "w") 8 | with open('train_data.json') as data_file: 9 | data = json.load(data_file) 10 | print data 11 | print "\n" 12 | for item in data: 13 | print item["text"] 14 | l = item["text"].encode('ascii','ignore') 15 | f.write(l) 16 | f.write("\n") 17 | print item["text"] 18 | print "\n" 19 | -------------------------------------------------------------------------------- /LanguageModelling/codemix_data/preprocess_2.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("train_2.txt","w") 9 | with open("/home/nlp/Desktop/LanguageModelling/codemix_data/train_1.txt") as file: 10 | cnt=1 11 | for line in file: 12 | print cnt 13 | cnt+=1 14 | words=line.split() 15 | l=[] 16 | for i in range(len(words)): 17 | if words[i][0]!='@': 18 | l.append(words[i]) 19 | f.write(" ".join(l)+"\n") 20 | f.close() 21 | -------------------------------------------------------------------------------- /LanguageModelling/codemix_data/preprocess_3.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("train_3.txt","w") 9 | with open("/home/nlp/Desktop/LanguageModelling/codemix_data/train_2.txt") as file: 10 | cnt=1 11 | for line in file: 12 | print cnt 13 | cnt+=1 14 | line=p.clean(line) 15 | # split to words: We've --> We have 16 | line=line.lower() 17 | wordList=wordpunct_tokenize(line) 18 | if len(wordList)!=0: 19 | line="" 20 | for i in wordList: 21 | i=porter.stem(i) 22 | if re.findall(r'\W',i)==[]: 23 | line=line+i+" " 24 | if line.strip()!='': 25 | f.write(line+"\n") 26 | f.close() 27 | -------------------------------------------------------------------------------- /LanguageModelling/lan_model.py: -------------------------------------------------------------------------------- 1 | # Goal: 2 | # Apply different language models like unigram, bigram, trigram on the given twitter corpus and codemixed corpus. 3 | # Find CMI and Perplexity for each of the above models. 4 | # Compare perplexity and analyse the best among them. 5 | 6 | # Steps: 7 | # Preprocess the data (Apply tokenization and stemming). 8 | # Store all the words(V) in a dictionary with unique id's and their frequencies in a list. 9 | # Create a V*V matrix with all bigram totalLiness. 10 | # Apply add-one smoothing on the matrix. 11 | # For every sentence in the corpus, find probabilities P( word(n)|word(n-1) ) of each word in the sequence and thereby find the perplexity of each sentence. 12 | # Take the average of all the perplexities. 13 | # Analyse the perplexities of different models. 14 | 15 | # Variables: 16 | # wordDict: Dictionary which stores all the words. 17 | # index: To give unique id's to every word in the dictionary. 18 | # V: Vocabulary size. 19 | 20 | # Code 21 | import numpy as np 22 | import nltk 23 | import os 24 | import sys 25 | from nltk.stem import PorterStemmer 26 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 27 | from bs4 import BeautifulSoup 28 | import json 29 | import re 30 | porter = PorterStemmer() 31 | 32 | # Put words in dictionary 33 | index=0 # Index of word in dictionary 34 | totalLines=0 # Total number of lines 35 | tokens=0 # Total number of words in the corpus 36 | V=0 37 | V_tri=0 38 | matrix=np.zeros((1,1)) 39 | wordDict = {} 40 | bigram_perplex=[] 41 | trigram_dict={} 42 | secondDict={} 43 | 44 | def get_count(): 45 | global index 46 | return index 47 | 48 | def createMatrix(row,col): 49 | global matrix 50 | matrix = np.zeros((row,col)) 51 | 52 | def putInDict(filename): 53 | global totalLines, tokens, index 54 | with open(filename) as file: 55 | for line in file: 56 | totalLines+=1 57 | # line = "My name is Abhishek and the name of the boy who was standing there is not Abhishek" 58 | listOfWords = wordpunct_tokenize(line) 59 | tokens = tokens + len(listOfWords) 60 | for word in listOfWords: 61 | word = porter.stem(word) 62 | if word in wordDict: 63 | wordDict[word][1]+=1 64 | else: 65 | wordDict[word] = [index, 1] 66 | index+=1 67 | print wordDict 68 | 69 | def unigramPerplexity(): 70 | global filename, totalLines, tokens, index 71 | with open(filename) as file: 72 | perplexities=[] 73 | for line in file: 74 | listOfWords = wordpunct_tokenize(line) 75 | l = len(listOfWords) 76 | prob=[] 77 | for i in range(l): 78 | word=listOfWords[i] 79 | prob.append(wordDict[word][1]/float(tokens)) 80 | per=1 81 | for p in prob: 82 | per = per*p 83 | per=1/float(per) 84 | perplexities.append(pow(per, 1/float(l))) 85 | PP=0 86 | for i in perplexities: 87 | PP=PP+i 88 | PP=PP/float(len(perplexities)) 89 | return PP 90 | 91 | def createBigram(): 92 | global filename, totalLines, tokens, index 93 | with open(filename) as file: 94 | for line in file: 95 | listOfWords = wordpunct_tokenize(line) 96 | l = len(listOfWords) 97 | if l!=0: 98 | word = listOfWords[0] 99 | matrix[V][wordDict[word][0]]+=1 100 | for i in range(l-1): 101 | word = listOfWords[i] 102 | next_word = listOfWords[i+1] 103 | matrix[wordDict[word][0]][wordDict[next_word][0]]+=1 104 | print wordDict 105 | print matrix 106 | 107 | def bigramPerplexity(): 108 | global filename, totalLines, tokens, index 109 | with open(filename) as file: 110 | perplexities=[] 111 | for line in file: 112 | listOfWords = wordpunct_tokenize(line) 113 | l = len(listOfWords) 114 | prob=[] 115 | if l!=0: 116 | word=listOfWords[0] 117 | prob.append(matrix[V][wordDict[word][0]]/float(totalLines)) 118 | for i in range(l-1): 119 | word=listOfWords[i] 120 | next_word = listOfWords[i+1] 121 | prob.append(matrix[wordDict[word][0]][wordDict[next_word][0]]/float(wordDict[word][1])) 122 | # Find perplexity 123 | print prob 124 | per=1 125 | for p in prob: 126 | per = per*p 127 | per=1/float(per) 128 | perplexities.append(pow(per, 1/float(l))) 129 | print perplexities 130 | PP=0 131 | for i in perplexities: 132 | PP=PP+i 133 | PP=PP/float(len(perplexities)) 134 | return PP 135 | 136 | def trigramDict(): 137 | global filename, totalLines, tokens, index 138 | index=0 139 | with open(filename) as file: 140 | for line in file: 141 | listOfWords = wordpunct_tokenize(line) 142 | l = len(listOfWords) 143 | if l!=0: 144 | word=listOfWords[0] 145 | if word in secondDict: 146 | secondDict[str(word)]+=1 147 | else: 148 | secondDict[str(word)]=1 149 | if l>1: 150 | word1=listOfWords[1] 151 | s=str([word,word1]) 152 | if s in secondDict: 153 | secondDict[s]+=1 154 | else: 155 | secondDict[s]=1 156 | for i in range(l-2): 157 | s = str([listOfWords[i],listOfWords[i+1]]) 158 | if s in trigram_dict: 159 | trigram_dict[s][1]+=1 160 | else: 161 | trigram_dict[s]=[index, 1] 162 | index+=1 163 | print "\n" 164 | print trigram_dict 165 | print "\n" 166 | print secondDict 167 | print "\n" 168 | 169 | def createTrigram(): 170 | global filename, totalLines, tokens, index 171 | with open(filename) as file: 172 | for line in file: 173 | listOfWords = wordpunct_tokenize(line) 174 | l = len(listOfWords) 175 | for i in range(l-2): 176 | word1 = listOfWords[i] 177 | word2 = listOfWords[i+1] 178 | word3 = listOfWords[i+2] 179 | s = str([word1,word2]) 180 | matrix[trigram_dict[s][0]][wordDict[word3][0]]+=1 181 | print matrix 182 | 183 | def trigramPerplexity(): 184 | global filename, totalLines, tokens, index 185 | with open(filename) as file: 186 | perplexities=[] 187 | for line in file: 188 | listOfWords = wordpunct_tokenize(line) 189 | l = len(listOfWords) 190 | prob=[] 191 | if l!=0: 192 | word=listOfWords[0] 193 | prob.append(secondDict[str(word)]/float(totalLines)) 194 | if l>1: 195 | word1=listOfWords[1] 196 | prob.append(secondDict[str([word,word1])]/float(totalLines)) 197 | for i in range(l-2): 198 | s = str([listOfWords[i],listOfWords[i+1]]) 199 | word = listOfWords[i+2] 200 | num = matrix[trigram_dict[s][0]][wordDict[word][0]] 201 | den = trigram_dict[s][1] 202 | prob.append(float(num)/float(den)) 203 | per=1 204 | print prob 205 | for p in prob: 206 | per = per*p 207 | per=1/float(per) 208 | perplexities.append(pow(per, 1/float(l))) 209 | PP=0 210 | print perplexities 211 | for i in perplexities: 212 | PP=PP+i 213 | PP=PP/float(len(perplexities)) 214 | return PP 215 | 216 | ######################################################################################### 217 | 218 | # Main 219 | filename=sys.argv[1] 220 | putInDict(filename) 221 | V=get_count() 222 | # Unigram 223 | unigramPP = unigramPerplexity() 224 | print "Unigram Perplexity = "+str(unigramPP) 225 | # Bigram 226 | createMatrix(V+1,V) 227 | createBigram() 228 | bigramPP = bigramPerplexity() 229 | print "Bigram Perplexity = "+str(bigramPP) 230 | print "===========================================================" 231 | # Trigram 232 | index=0 233 | trigramDict() 234 | V_tri=get_count() 235 | createMatrix(V_tri,V) 236 | createTrigram() 237 | trigramPP = trigramPerplexity() 238 | print "Trigram Perplexity = "+str(trigramPP) 239 | print "Found perplexity" 240 | print "Done." 241 | 242 | # Just for printing 243 | #for word in wordDict: 244 | # print word, wordDict[word] 245 | -------------------------------------------------------------------------------- /LanguageModelling/lan_model_1.py: -------------------------------------------------------------------------------- 1 | # Goal: 2 | # Apply different language models like unigram, bigram, trigram on the given twitter corpus and codemixed corpus. 3 | # Find CMI and Perplexity for each of the above models. 4 | # Compare perplexity and analyse the best among them. 5 | 6 | # Steps: 7 | # Preprocess the data (Apply tokenization and stemming). 8 | # Store all the words(V) in a dictionary with unique id's and their frequencies in a list. 9 | # Create a V*V matrix with all bigram totalLiness. 10 | # Apply add-one smoothing on the matrix. 11 | # For every sentence in the corpus, find probabilities P( word(n)|word(n-1) ) of each word in the sequence and thereby find the perplexity of each sentence. 12 | # Take the average of all the perplexities. 13 | # Analyse the perplexities of different models. 14 | 15 | # Variables: 16 | # wordDict: Dictionary which stores all the words. 17 | # index: To give unique id's to every word in the dictionary. 18 | # V: Vocabulary size. 19 | 20 | # Code 21 | import numpy as np 22 | import nltk 23 | import os 24 | import sys 25 | # from nltk.stem import PorterStemmer 26 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 27 | import json 28 | import re 29 | # porter = PorterStemmer() 30 | 31 | # Put words in dictionary 32 | index=0 # Index of word in dictionary 33 | totalLines=0 # Total number of lines 34 | tokens=0 # Total number of words in the corpus 35 | V=0 36 | V_tri=0 37 | matrix={} 38 | triMatrix = {} 39 | wordDict = {} 40 | bigram_perplex=[] 41 | secondDict={} 42 | 43 | def get_count(): 44 | global index 45 | return index 46 | 47 | def createBiMatrix(): 48 | global matrix 49 | matrix = {} 50 | 51 | def createTriMatrix(): 52 | global triMatrix 53 | triMatrix = {} 54 | 55 | def putInDict(filename): 56 | global totalLines, tokens, index 57 | with open(filename) as file: 58 | for line in file: 59 | totalLines+=1 60 | # line = "My name is Abhishek and the name of the boy who was standing there is not Abhishek" 61 | listOfWords = wordpunct_tokenize(line) 62 | tokens = tokens + len(listOfWords) 63 | for word in listOfWords: 64 | # word = porter.stem(word) 65 | if word in wordDict: 66 | wordDict[word][1]+=1 67 | else: 68 | wordDict[word] = [index, 1] 69 | index+=1 70 | # print wordDict 71 | 72 | def unigramPerplexity(): 73 | global filename, totalLines, tokens, index 74 | with open(filename) as file: 75 | perplexities=[] 76 | for line in file: 77 | listOfWords = wordpunct_tokenize(line) 78 | l = len(listOfWords) 79 | prob=[] 80 | for i in range(l): 81 | word=listOfWords[i] 82 | prob.append(wordDict[word][1]/float(tokens)) 83 | per=1 84 | for p in prob: 85 | per = per*p 86 | if per!=0: 87 | per=1/float(per) 88 | perplexities.append(pow(per, 1/float(l))) 89 | PP=0 90 | for i in perplexities: 91 | PP=PP+i 92 | PP=PP/float(len(perplexities)) 93 | return PP 94 | 95 | def createBigram(): 96 | global filename, totalLines, tokens, index 97 | with open(filename) as file: 98 | for line in file: 99 | listOfWords = wordpunct_tokenize(line) 100 | l = len(listOfWords) 101 | if l!=0: 102 | word = listOfWords[0] 103 | key = str(["",word]) 104 | if key not in matrix: 105 | matrix[key] = 1 106 | else: 107 | matrix[key] += 1 108 | # matrix[V][wordDict[word][0]]+=1 109 | for i in range(l-1): 110 | word = listOfWords[i] 111 | next_word = listOfWords[i+1] 112 | key = str([word,next_word]) 113 | if key not in matrix: 114 | matrix[key] = 1 115 | else: 116 | matrix[key] += 1 117 | # matrix[wordDict[word][0]][wordDict[next_word][0]]+=1 118 | # print wordDict 119 | # print matrix 120 | 121 | def bigramPerplexity(): 122 | global filename, totalLines, tokens, index 123 | with open(filename) as file: 124 | perplexities=[] 125 | for line in file: 126 | listOfWords = wordpunct_tokenize(line) 127 | l = len(listOfWords) 128 | prob=[] 129 | if l!=0: 130 | word=listOfWords[0] 131 | prob.append(matrix[str(["", word])]/float(totalLines)) 132 | # prob.append(matrix[V][wordDict[word][0]]/float(totalLines)) 133 | for i in range(l-1): 134 | word=listOfWords[i] 135 | next_word = listOfWords[i+1] 136 | prob.append(matrix[str([word, next_word])]/float(wordDict[word][1])) 137 | # prob.append(matrix[wordDict[word][0]][wordDict[next_word][0]]/float(wordDict[word][1])) 138 | # Find perplexity 139 | # print prob 140 | per=1 141 | for p in prob: 142 | per = per*p 143 | if per!=0: 144 | per=1/float(per) 145 | perplexities.append(pow(per, 1/float(l))) 146 | # print perplexities 147 | PP=0 148 | for i in perplexities: 149 | PP=PP+i 150 | PP=PP/float(len(perplexities)) 151 | return PP 152 | 153 | def trigramDict(): 154 | global filename, totalLines, tokens, index 155 | index=0 156 | with open(filename) as file: 157 | for line in file: 158 | listOfWords = wordpunct_tokenize(line) 159 | l = len(listOfWords) 160 | if l!=0: 161 | word=listOfWords[0] 162 | if word in secondDict: 163 | secondDict[str(word)]+=1 164 | else: 165 | secondDict[str(word)]=1 166 | if l>1: 167 | word1=listOfWords[1] 168 | s=str([word,word1]) 169 | if s in secondDict: 170 | secondDict[s]+=1 171 | else: 172 | secondDict[s]=1 173 | # for i in range(l-1): 174 | # s = str([listOfWords[i],listOfWords[i+1]]) 175 | # if s in trigram_dict: 176 | # trigram_dict[s][1]+=1 177 | # else: 178 | # trigram_dict[s]=[index, 1] 179 | # index+=1 180 | # print "\n" 181 | # print trigram_dict 182 | # print "\n" 183 | # print secondDict 184 | # print "\n" 185 | 186 | def createTrigram(): 187 | global filename, totalLines, tokens, index 188 | with open(filename) as file: 189 | for line in file: 190 | listOfWords = wordpunct_tokenize(line) 191 | l = len(listOfWords) 192 | for i in range(l-2): 193 | word1 = listOfWords[i] 194 | word2 = listOfWords[i+1] 195 | word3 = listOfWords[i+2] 196 | key = str([word1,word2,word3]) 197 | if key not in triMatrix: 198 | triMatrix[key] = 1 199 | else: 200 | triMatrix[key] += 1 201 | # matrix[trigram_dict[s][0]][wordDict[word3][0]]+=1 202 | # print triMatrix 203 | 204 | def trigramPerplexity(): 205 | global filename, totalLines, tokens, index 206 | with open(filename) as file: 207 | perplexities=[] 208 | for line in file: 209 | listOfWords = wordpunct_tokenize(line) 210 | l = len(listOfWords) 211 | prob=[] 212 | if l!=0: 213 | word=listOfWords[0] 214 | prob.append(secondDict[str(word)]/float(totalLines)) 215 | if l>1: 216 | word1=listOfWords[1] 217 | prob.append(secondDict[str([word,word1])]/float(totalLines)) 218 | for i in range(l-2): 219 | word1 = listOfWords[i] 220 | word2 = listOfWords[i+1] 221 | word3 = listOfWords[i+2] 222 | s = str([word1,word2]) 223 | num = triMatrix[str([word1,word2,word3])] 224 | # num = matrix[trigram_dict[s][0]][wordDict[word][0]] 225 | den = matrix[s] 226 | prob.append(float(num)/float(den)) 227 | per=1 228 | # print prob 229 | for p in prob: 230 | per = per*p 231 | if per!=0: 232 | per=1/float(per) 233 | perplexities.append(pow(per, 1/float(l))) 234 | PP=0 235 | # print perplexities 236 | for i in perplexities: 237 | PP=PP+i 238 | PP=PP/float(len(perplexities)) 239 | return PP 240 | 241 | ######################################################################################### 242 | 243 | # Main 244 | filename=sys.argv[1] 245 | putInDict(filename) 246 | V=get_count() 247 | # Unigram 248 | unigramPP = unigramPerplexity() 249 | print "Unigram Perplexity = "+str(unigramPP) 250 | # Bigram 251 | createBiMatrix() 252 | createBigram() 253 | bigramPP = bigramPerplexity() 254 | print "Bigram Perplexity = "+str(bigramPP) 255 | print "===========================================================" 256 | # Trigram 257 | index=0 258 | trigramDict() 259 | V_tri=get_count() 260 | createTriMatrix() 261 | createTrigram() 262 | trigramPP = trigramPerplexity() 263 | print "Trigram Perplexity = "+str(trigramPP) 264 | print "Found perplexity" 265 | print "Done." 266 | 267 | # Just for printing 268 | #for word in wordDict: 269 | # print word, wordDict[word] 270 | -------------------------------------------------------------------------------- /LanguageModelling/monolingual_data/lan_model_1.py: -------------------------------------------------------------------------------- 1 | # Goal: 2 | # Apply different language models like unigram, bigram, trigram on the given twitter corpus and codemixed corpus. 3 | # Find CMI and Perplexity for each of the above models. 4 | # Compare perplexity and analyse the best among them. 5 | 6 | # Steps: 7 | # Preprocess the data (Apply tokenization and stemming). 8 | # Store all the words(V) in a dictionary with unique id's and their frequencies in a list. 9 | # Create a V*V matrix with all bigram totalLiness. 10 | # Apply add-one smoothing on the matrix. 11 | # For every sentence in the corpus, find probabilities P( word(n)|word(n-1) ) of each word in the sequence and thereby find the perplexity of each sentence. 12 | # Take the average of all the perplexities. 13 | # Analyse the perplexities of different models. 14 | 15 | # Variables: 16 | # wordDict: Dictionary which stores all the words. 17 | # index: To give unique id's to every word in the dictionary. 18 | # V: Vocabulary size. 19 | 20 | # Code 21 | import numpy as np 22 | import nltk 23 | import os 24 | import sys 25 | # from nltk.stem import PorterStemmer 26 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 27 | import json 28 | import re 29 | # porter = PorterStemmer() 30 | 31 | # Put words in dictionary 32 | index=0 # Index of word in dictionary 33 | totalLines=0 # Total number of lines 34 | tokens=0 # Total number of words in the corpus 35 | V=0 36 | V_tri=0 37 | matrix={} 38 | triMatrix = {} 39 | wordDict = {} 40 | bigram_perplex=[] 41 | secondDict={} 42 | 43 | def get_count(): 44 | global index 45 | return index 46 | 47 | def createBiMatrix(): 48 | global matrix 49 | matrix = {} 50 | 51 | def createTriMatrix(): 52 | global triMatrix 53 | triMatrix = {} 54 | 55 | def putInDict(filename): 56 | global totalLines, tokens, index 57 | with open(filename) as file: 58 | for line in file: 59 | totalLines+=1 60 | # line = "My name is Abhishek and the name of the boy who was standing there is not Abhishek" 61 | listOfWords = wordpunct_tokenize(line) 62 | tokens = tokens + len(listOfWords) 63 | for word in listOfWords: 64 | # word = porter.stem(word) 65 | if word in wordDict: 66 | wordDict[word][1]+=1 67 | else: 68 | wordDict[word] = [index, 1] 69 | index+=1 70 | # print wordDict 71 | 72 | def unigramPerplexity(): 73 | global filename, totalLines, tokens, index 74 | with open(filename) as file: 75 | perplexities=[] 76 | for line in file: 77 | listOfWords = wordpunct_tokenize(line) 78 | l = len(listOfWords) 79 | prob=[] 80 | for i in range(l): 81 | word=listOfWords[i] 82 | prob.append(wordDict[word][1]/float(tokens)) 83 | per=1 84 | for p in prob: 85 | per = per*p 86 | if per!=0: 87 | per=1/float(per) 88 | perplexities.append(pow(per, 1/float(l))) 89 | PP=0 90 | for i in perplexities: 91 | PP=PP+i 92 | PP=PP/float(len(perplexities)) 93 | return PP 94 | 95 | def createBigram(): 96 | global filename, totalLines, tokens, index 97 | with open(filename) as file: 98 | for line in file: 99 | listOfWords = wordpunct_tokenize(line) 100 | l = len(listOfWords) 101 | if l!=0: 102 | word = listOfWords[0] 103 | key = str(["",word]) 104 | if key not in matrix: 105 | matrix[key] = 1 106 | else: 107 | matrix[key] += 1 108 | # matrix[V][wordDict[word][0]]+=1 109 | for i in range(l-1): 110 | word = listOfWords[i] 111 | next_word = listOfWords[i+1] 112 | key = str([word,next_word]) 113 | if key not in matrix: 114 | matrix[key] = 1 115 | else: 116 | matrix[key] += 1 117 | # matrix[wordDict[word][0]][wordDict[next_word][0]]+=1 118 | # print wordDict 119 | # print matrix 120 | 121 | def bigramPerplexity(): 122 | global filename, totalLines, tokens, index 123 | with open(filename) as file: 124 | perplexities=[] 125 | for line in file: 126 | listOfWords = wordpunct_tokenize(line) 127 | l = len(listOfWords) 128 | prob=[] 129 | if l!=0: 130 | word=listOfWords[0] 131 | prob.append(matrix[str(["", word])]/float(totalLines)) 132 | # prob.append(matrix[V][wordDict[word][0]]/float(totalLines)) 133 | for i in range(l-1): 134 | word=listOfWords[i] 135 | next_word = listOfWords[i+1] 136 | prob.append(matrix[str([word, next_word])]/float(wordDict[word][1])) 137 | # prob.append(matrix[wordDict[word][0]][wordDict[next_word][0]]/float(wordDict[word][1])) 138 | # Find perplexity 139 | # print prob 140 | per=1 141 | for p in prob: 142 | per = per*p 143 | if per!=0: 144 | per=1/float(per) 145 | perplexities.append(pow(per, 1/float(l))) 146 | # print perplexities 147 | PP=0 148 | for i in perplexities: 149 | PP=PP+i 150 | PP=PP/float(len(perplexities)) 151 | return PP 152 | 153 | def trigramDict(): 154 | global filename, totalLines, tokens, index 155 | index=0 156 | with open(filename) as file: 157 | for line in file: 158 | listOfWords = wordpunct_tokenize(line) 159 | l = len(listOfWords) 160 | if l!=0: 161 | word=listOfWords[0] 162 | if word in secondDict: 163 | secondDict[str(word)]+=1 164 | else: 165 | secondDict[str(word)]=1 166 | if l>1: 167 | word1=listOfWords[1] 168 | s=str([word,word1]) 169 | if s in secondDict: 170 | secondDict[s]+=1 171 | else: 172 | secondDict[s]=1 173 | # for i in range(l-1): 174 | # s = str([listOfWords[i],listOfWords[i+1]]) 175 | # if s in trigram_dict: 176 | # trigram_dict[s][1]+=1 177 | # else: 178 | # trigram_dict[s]=[index, 1] 179 | # index+=1 180 | # print "\n" 181 | # print trigram_dict 182 | # print "\n" 183 | # print secondDict 184 | # print "\n" 185 | 186 | def createTrigram(): 187 | global filename, totalLines, tokens, index 188 | with open(filename) as file: 189 | for line in file: 190 | listOfWords = wordpunct_tokenize(line) 191 | l = len(listOfWords) 192 | for i in range(l-2): 193 | word1 = listOfWords[i] 194 | word2 = listOfWords[i+1] 195 | word3 = listOfWords[i+2] 196 | key = str([word1,word2,word3]) 197 | if key not in triMatrix: 198 | triMatrix[key] = 1 199 | else: 200 | triMatrix[key] += 1 201 | # matrix[trigram_dict[s][0]][wordDict[word3][0]]+=1 202 | # print triMatrix 203 | 204 | def trigramPerplexity(): 205 | global filename, totalLines, tokens, index 206 | with open(filename) as file: 207 | perplexities=[] 208 | for line in file: 209 | listOfWords = wordpunct_tokenize(line) 210 | l = len(listOfWords) 211 | prob=[] 212 | if l!=0: 213 | word=listOfWords[0] 214 | prob.append(secondDict[str(word)]/float(totalLines)) 215 | if l>1: 216 | word1=listOfWords[1] 217 | prob.append(secondDict[str([word,word1])]/float(totalLines)) 218 | for i in range(l-2): 219 | word1 = listOfWords[i] 220 | word2 = listOfWords[i+1] 221 | word3 = listOfWords[i+2] 222 | s = str([word1,word2]) 223 | num = triMatrix[str([word1,word2,word3])] 224 | # num = matrix[trigram_dict[s][0]][wordDict[word][0]] 225 | den = matrix[s] 226 | prob.append(float(num)/float(den)) 227 | per=1 228 | # print prob 229 | for p in prob: 230 | per = per*p 231 | if per!=0: 232 | per=1/float(per) 233 | perplexities.append(pow(per, 1/float(l))) 234 | PP=0 235 | # print perplexities 236 | for i in perplexities: 237 | PP=PP+i 238 | PP=PP/float(len(perplexities)) 239 | return PP 240 | 241 | ######################################################################################### 242 | 243 | # Main 244 | filename=sys.argv[1] 245 | putInDict(filename) 246 | V=get_count() 247 | # Unigram 248 | unigramPP = unigramPerplexity() 249 | print "Unigram Perplexity = "+str(unigramPP) 250 | # Bigram 251 | createBiMatrix() 252 | createBigram() 253 | bigramPP = bigramPerplexity() 254 | print "Bigram Perplexity = "+str(bigramPP) 255 | print "===========================================================" 256 | # Trigram 257 | index=0 258 | trigramDict() 259 | V_tri=get_count() 260 | createTriMatrix() 261 | createTrigram() 262 | trigramPP = trigramPerplexity() 263 | print "Trigram Perplexity = "+str(trigramPP) 264 | print "Found perplexity" 265 | print "Done." 266 | 267 | # Just for printing 268 | #for word in wordDict: 269 | # print word, wordDict[word] 270 | -------------------------------------------------------------------------------- /LanguageModelling/monolingual_data/perplexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Abhishekmamidi123/Natural-Language-Processing/0fbb5e6e35046297563c1ffac4000fb2030a0b16/LanguageModelling/monolingual_data/perplexity.png -------------------------------------------------------------------------------- /LanguageModelling/monolingual_data/preprocess.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("data/abhi.txt","w") 9 | with open("/home/abhishek/Desktop/SEM_5/H/LanguageModelling/data/test") as file: 10 | cnt=1 11 | for line in file: 12 | 13 | print cnt 14 | cnt+=1 15 | words=line.split() 16 | l=[] 17 | for i in range(1,len(words)-3): 18 | if words[i][0]!='@': 19 | l.append(words[i]) 20 | f.write(" ".join(l)+"\n") 21 | f.close() 22 | -------------------------------------------------------------------------------- /LanguageModelling/monolingual_data/preprocess1.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("data/vag.txt","w") 9 | with open("/home/abhishek/Desktop/SEM_5/H/LanguageModelling/data/conversations1.txt") as file: 10 | cnt=1 11 | for line in file: 12 | print cnt 13 | cnt+=1 14 | line=p.clean(line) 15 | # split to words: We've --> We have 16 | line=line.lower() 17 | wordList=wordpunct_tokenize(line) 18 | if len(wordList)!=0: 19 | line="" 20 | for i in wordList: 21 | i=porter.stem(i) 22 | if re.findall(r'\W',i)==[]: 23 | line=line+i+" " 24 | if line.strip()!='': 25 | f.write(line+"\n") 26 | f.close() 27 | -------------------------------------------------------------------------------- /LanguageModelling/perplexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Abhishekmamidi123/Natural-Language-Processing/0fbb5e6e35046297563c1ffac4000fb2030a0b16/LanguageModelling/perplexity.png -------------------------------------------------------------------------------- /LanguageModelling/preprocess.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("data/abhi.txt","w") 9 | with open("/home/abhishek/Desktop/SEM_5/H/LanguageModelling/data/test") as file: 10 | cnt=1 11 | for line in file: 12 | 13 | print cnt 14 | cnt+=1 15 | words=line.split() 16 | l=[] 17 | for i in range(1,len(words)-3): 18 | if words[i][0]!='@': 19 | l.append(words[i]) 20 | f.write(" ".join(l)+"\n") 21 | f.close() 22 | -------------------------------------------------------------------------------- /LanguageModelling/preprocess1.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import preprocessor as p 5 | from nltk.tokenize import sent_tokenize, word_tokenize, wordpunct_tokenize 6 | from nltk.stem import PorterStemmer 7 | porter = PorterStemmer() 8 | f = open("data/vag.txt","w") 9 | with open("/home/abhishek/Desktop/SEM_5/H/LanguageModelling/data/conversations1.txt") as file: 10 | cnt=1 11 | for line in file: 12 | print cnt 13 | cnt+=1 14 | line=p.clean(line) 15 | # split to words: We've --> We have 16 | line=line.lower() 17 | wordList=wordpunct_tokenize(line) 18 | if len(wordList)!=0: 19 | line="" 20 | for i in wordList: 21 | i=porter.stem(i) 22 | if re.findall(r'\W',i)==[]: 23 | line=line+i+" " 24 | if line.strip()!='': 25 | f.write(line+"\n") 26 | f.close() 27 | -------------------------------------------------------------------------------- /Perplexity_CMI/2_1_preprocess.py: -------------------------------------------------------------------------------- 1 | # Read json file. 2 | # Traverse the dictionary and Extract content from 'lang_tagged_text' and 'text' attributes. 3 | # Split the content of the attributes (say a and b lists). 4 | # If the first character of the word is "@", then remove this word form the list b and remove @ and its next element from list a. 5 | # Store the refined text in two different files(lang_tagged_text.txt: for finding CMI and text.txt: for finding Perplexity) 6 | 7 | import sys 8 | import json 9 | reload(sys) 10 | sys.setdefaultencoding('utf8') 11 | 12 | f1 = open('3_text.txt', 'w') 13 | f2 = open('3_lang_tagged_text.txt', 'w') 14 | # Data from json file 15 | dataDict = {} 16 | 17 | def readJsonFile(filename): 18 | with open(filename) as f: 19 | global dataDict 20 | dataDict = json.load(f) 21 | 22 | def extractFromAttributes(): 23 | for dic in dataDict: 24 | t = dic['text'].encode('ascii','ignore').split() 25 | ltt = dic['lang_tagged_text'].encode('ascii','ignore').split() 26 | t_final = [] 27 | ltt_final = [] 28 | 29 | for word in t: 30 | if word[0] != '@': 31 | t_final.append(word) 32 | 33 | cnt=0 34 | while(cnt=0 and cmi<=10: 32 | f1.write(line) 33 | elif cmi>10 and cmi<=20: 34 | f2.write(line) 35 | elif cmi>20 and cmi<=30: 36 | f3.write(line) 37 | elif cmi>30 and cmi<=40: 38 | f4.write(line) 39 | elif cmi>40 and cmi<=50: 40 | f5.write(line) 41 | elif cmi>50 and cmi<=60: 42 | f6.write(line) 43 | elif cmi>60 and cmi<=70: 44 | f7.write(line) 45 | elif cmi>70 and cmi<=80: 46 | f8.write(line) 47 | elif cmi>80 and cmi<=90: 48 | f9.write(line) 49 | elif cmi>90 and cmi<=100: 50 | f10.write(line) 51 | cnt+=1 52 | 53 | filename = "3_CMI_values.txt" 54 | # filename = "x.txt" 55 | getCMIValues(filename) 56 | divideDataIntoChunks() 57 | -------------------------------------------------------------------------------- /Perplexity_CMI/CMIv2programme/cmi_corpus_reader.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # 3 | # Code-Mixing Corpus Readers 4 | # Björn Gambäck, NTNU 2014-2016. 5 | # contact: 6 | # 7 | # For corpora whose documents contain words tagged with language identifiers. 8 | # The corpus reader routines are based on the NLTK Tagged Corpus Reader. 9 | # 10 | ################################################################################### 11 | 12 | from nltk.corpus.reader.tagged import * 13 | import cmi_tagsets as tagsets 14 | 15 | import io 16 | import codecs 17 | 18 | ######################################################################### 19 | # # 20 | # PREDEFINED CORPUS READERS # 21 | # # 22 | ######################################################################### 23 | 24 | ################################################################################### 25 | # 26 | # If defining a new corpus reader, it will need to (at least) specify: 27 | # a. The encoding scheme (e.g., utf8 or utf16). 28 | # b. The character separating words from tags. 29 | # c. How utterance boundaries are marked (e.g., by newline characters, '\n'). 30 | # d. Whether paragraph information is included in the corpus. 31 | # 32 | # Some corpora might not be in an easily processed format and hence could 33 | # require some preprocessing / cleaning. See the example preprocessing 34 | # routines for the FIRE anf EMNLP corpora given at the end of this file. 35 | # 36 | ################################################################################### 37 | 38 | ###################### Das & Gambäck #################################### 39 | 40 | class HindiCodeMixedCorpusReader(TaggedCorpusReader): 41 | """ 42 | A reader for Das & Gambäck's Hindi Code-Mixed corpus, using '$' as a separator. 43 | Utterance boundaries are marked by newline characters ('\n'). 44 | Paragraph information is not included in the corpus, so each paragraph 45 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 46 | a single sentence. 47 | """ 48 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 49 | TaggedCorpusReader.__init__( 50 | self, root, fileids, sep='$', 51 | word_tokenizer=WhitespaceTokenizer(), 52 | sent_tokenizer=RegexpTokenizer('\n', gaps=True), 53 | para_block_reader=read_blankline_block, 54 | 55 | # sent_tokenizer=RegexpTokenizer('\n'), 56 | # para_block_reader=self._read_block, 57 | # encoding=encoding, 58 | tagset=tagsets.dastags) 59 | 60 | def _read_block(self, stream): 61 | return read_regexp_block(stream, r'.*', r'.*_\.') 62 | 63 | class BengaliCodeMixedCorpusReader(TaggedCorpusReader): 64 | """ 65 | A reader for Das & Gambäck's Bengali Code-Mixed corpus, using '£' as a separator. 66 | Utterance boundaries are marked by newline characters ('\n'). 67 | Paragraph information is not included in the corpus, so each paragraph 68 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 69 | a single sentence. 70 | """ 71 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 72 | TaggedCorpusReader.__init__( 73 | self, root, fileids, sep='£', 74 | # word_tokenizer=LineTokenizer(), 75 | # sent_tokenizer=RegexpTokenizer('\n'), 76 | # para_block_reader=self._read_block, 77 | # encoding=encoding, 78 | tagset=tagsets.dastags) 79 | 80 | def _read_block(self, stream): 81 | return read_regexp_block(stream, r'.*', r'.*_\.') 82 | 83 | ########################### NITA ######################################## 84 | 85 | class NITAHindiCodeMixedCorpusReader(TaggedCorpusReader): 86 | """ 87 | A corpus reader for the UTF-16 NITA EN-HI corpora, using '§' as a separator. 88 | Utterance boundaries are marked by newline characters ('\n'). 89 | Paragraph information is not included in the corpus, so each paragraph 90 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 91 | a single sentence. 92 | """ 93 | def __init__(self, root, fileids, encoding='utf16', tagset=None): 94 | TaggedCorpusReader.__init__( 95 | self, root, fileids, sep='§', 96 | word_tokenizer=WhitespaceTokenizer(), 97 | sent_tokenizer=RegexpTokenizer('\n', gaps=True), 98 | para_block_reader=read_blankline_block, 99 | 100 | # sent_tokenizer=RegexpTokenizer('\n'), 101 | # para_block_reader=self._read_block, 102 | # encoding=encoding, 103 | tagset=tagsets.nitatags) 104 | 105 | def _read_block(self, stream): 106 | return read_regexp_block(stream, r'.*', r'.*_\.') 107 | 108 | class NITABengaliCodeMixedCorpusReader(TaggedCorpusReader): 109 | """ 110 | A corpus reader for the UTF-16 NITA Bengali Code-Mixed corpus, using '£' as a separator. 111 | Utterance boundaries are marked by newline characters ('\n'). 112 | Paragraph information is not included in the corpus, so each paragraph 113 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 114 | a single sentence. 115 | """ 116 | def __init__(self, root, fileids, encoding='utf16', tagset=None): 117 | TaggedCorpusReader.__init__( 118 | self, root, fileids, sep='£', 119 | # word_tokenizer=LineTokenizer(), 120 | # sent_tokenizer=RegexpTokenizer('\n'), 121 | # para_block_reader=self._read_block, 122 | # encoding=encoding, 123 | tagset=tagsets.nitatags) 124 | 125 | def _read_block(self, stream): 126 | return read_regexp_block(stream, r'.*', r'.*_\.') 127 | 128 | ##################### Nguyen & Dogruöz ################################## 129 | 130 | class DutchCodeMixedCorpusReader(TaggedCorpusReader): 131 | """ 132 | A reader for Nguyen & Dogruöz' Dutch-Turkish code-mixed chat corpus, 133 | using '/' as a separator. 134 | Utterance boundaries are marked by newline characters ('\n'). 135 | Paragraph information is not included in the corpus, so each paragraph 136 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 137 | a single sentence. 138 | The tagset marks words as Dutch or Turkish - everything else is 'skip', 139 | """ 140 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 141 | TaggedCorpusReader.__init__( 142 | self, root, fileids, sep='/', 143 | # word_tokenizer=LineTokenizer(), 144 | # sent_tokenizer=RegexpTokenizer('\n'), 145 | # para_block_reader=self._read_block, 146 | # encoding=encoding, 147 | tagset=tagsets.ndtags) 148 | 149 | def _read_block(self, stream): 150 | return read_regexp_block(stream, r'.*', r'.*_\.') 151 | 152 | ####################### Vyas et al. ##################################### 153 | 154 | class VyasHindiCodeMixedCorpusReader(TaggedCorpusReader): 155 | """ 156 | A corpus reader for Vyas et al.'s Hindi Code-Mixed corpus, 157 | using '/' as a separator. 158 | Utterance boundaries are marked by newline characters ('\n'). 159 | Paragraph information is not included in the corpus, so each paragraph 160 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 161 | a single sentence. 162 | """ 163 | def __init__(self, root, fileids, encoding='utf16', tagset=None): 164 | TaggedCorpusReader.__init__( 165 | self, root, fileids, sep='/', 166 | word_tokenizer=WhitespaceTokenizer(), 167 | sent_tokenizer=RegexpTokenizer('\n', gaps=True), 168 | para_block_reader=read_blankline_block, 169 | 170 | # sent_tokenizer=RegexpTokenizer('\n'), 171 | # para_block_reader=self._read_block, 172 | # encoding=encoding, 173 | tagset=tagsets.vyastags) 174 | 175 | def _read_block(self, stream): 176 | return read_regexp_block(stream, r'.*', r'.*_\.') 177 | 178 | ########################### FIRE ######################################## 179 | 180 | class FIRECodeMixedCorpusReader(TaggedCorpusReader): 181 | """ 182 | A reader for the FIRE shared task code-mixed corpora, 183 | using '\' as a separator. 184 | Utterance boundaries are marked by newline characters ('\n'). 185 | Paragraph information is not included in the corpus, so each paragraph 186 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 187 | a single sentence. 188 | The tagset marks words as English or the Indian language of the 189 | specific corpus, or affixed, everything else is tagged 'O' (other). 190 | """ 191 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 192 | TaggedCorpusReader.__init__( 193 | self, root, fileids, sep='\\', 194 | # word_tokenizer=LineTokenizer(), 195 | # sent_tokenizer=RegexpTokenizer('\n'), 196 | # para_block_reader=self._read_block, 197 | # encoding=encoding, 198 | tagset=tagsets.firetags) 199 | 200 | def _read_block(self, stream): 201 | return read_regexp_block(stream, r'.*', r'.*_\.') 202 | 203 | ############### EMNLP Code-Switching Workshop ############################# 204 | 205 | class CSWS14CodeMixedCorpusReader(TaggedCorpusReader): 206 | """ 207 | A reader for the EMNLP 2014 workshop shared task code-mixed corpora, 208 | using '/' as a separator. 209 | Utterance boundaries are marked by newline characters ('\n'). 210 | Paragraph information is not included in the corpus, so each paragraph 211 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 212 | a single sentence. 213 | The tagset marks words as English or the language of the 214 | specific corpus, or NE. Everything else is tagged 'other'. 215 | """ 216 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 217 | TaggedCorpusReader.__init__( 218 | self, root, fileids, sep='/', 219 | # word_tokenizer=LineTokenizer(), 220 | # sent_tokenizer=RegexpTokenizer('\n'), 221 | # para_block_reader=self._read_block, 222 | # encoding=encoding, 223 | tagset=tagsets.csws14tags) 224 | 225 | def _read_block(self, stream): 226 | return read_regexp_block(stream, r'.*', r'.*_\.') 227 | 228 | class CSWS16CodeMixedCorpusReader(TaggedCorpusReader): 229 | """ 230 | A reader for the EMNLP 2016 workshop shared task code-mixed corpora, 231 | using '/' as a separator. 232 | Utterance boundaries are marked by newline characters ('\n'). 233 | Paragraph information is not included in the corpus, so each paragraph 234 | returned by ``self.paras()`` and ``self.tagged_paras()`` contains 235 | a single sentence. 236 | The tagset marks words as English or the language of the 237 | specific corpus, or NE. Everything else is tagged 'other'. 238 | """ 239 | def __init__(self, root, fileids, encoding='utf8', tagset=None): 240 | TaggedCorpusReader.__init__( 241 | self, root, fileids, sep='/', 242 | # word_tokenizer=LineTokenizer(), 243 | # sent_tokenizer=RegexpTokenizer('\n'), 244 | # para_block_reader=self._read_block, 245 | # encoding=encoding, 246 | tagset=tagsets.csws16tags) 247 | 248 | def _read_block(self, stream): 249 | return read_regexp_block(stream, r'.*', r'.*_\.') 250 | 251 | 252 | ######################################################################### 253 | # Return a reader for a pre-defined corpus based on the language ID. 254 | # The corpora names are hardcoded here. They shouldn't be, of course. 255 | # 256 | def corpus_reader(lang): 257 | # When testing on a smaller corpus 258 | if lang == 'test': 259 | return NITAHindiCodeMixedCorpusReader('', 'hndtest.txt') 260 | 261 | # Das and Gambäck's English-Bengali corpus 262 | elif lang == 'bngtw': 263 | return BengaliCodeMixedCorpusReader('', 'en_bn_hi_lang-Final.txt') 264 | 265 | # The NITA English-Hindi corpora: in total, for tweets and for facebook 266 | elif lang == 'hndtot': 267 | return NITAHindiCodeMixedCorpusReader('', '2583_Final_Gold__Lang_UB.txt') 268 | elif lang == 'hndtw': 269 | return NITAHindiCodeMixedCorpusReader('', '1181_TW_Final_Gold_Lang_UB.txt') 270 | elif lang == 'hndfb': 271 | return NITAHindiCodeMixedCorpusReader('', '1402_FB_Final_Gold_Lang_UB.txt') 272 | 273 | # Nguyen & Dogruöz' Dutch-Turkish corpus 274 | elif lang == 'ned': 275 | return DutchCodeMixedCorpusReader('', 'dong.txt') 276 | 277 | # Vyas et al.'s English-Hindi corpus 278 | elif lang == 'vyas': 279 | return VyasHindiCodeMixedCorpusReader('', 'Vyas.txt') 280 | 281 | # The main FIRE English-Indian corpora: Bengali, Hindi, Gujarati and Kannada 282 | elif lang == 'firebng': 283 | return FIRECodeMixedCorpusReader('', 'BanglaEnglish_LIonly_AnnotatedDev.txt') 284 | elif lang == 'firehnd': 285 | return FIRECodeMixedCorpusReader('', 'HindiEnglish_LIonly_AnnotatedDev.txt') 286 | elif lang == 'firegur': 287 | return FIRECodeMixedCorpusReader('', 'GujaratiEnglish_LIonly_AnnotatedDev.txt') 288 | elif lang == 'firekan': 289 | return FIRECodeMixedCorpusReader('', 'KannadaEnglish_LIonly_AnnotatedDev.txt') 290 | 291 | # The EMNLP 2014 workshop English-other corpora: Mandarin, Nepali and Spanish 292 | elif lang == 'cswsman': 293 | return CSWS14CodeMixedCorpusReader('', 'mandarinTrain.txt') 294 | elif lang == 'cswsnep': 295 | return CSWS14CodeMixedCorpusReader('', 'nepali-english-final-training-data.txt') 296 | elif lang == 'cswsesp': 297 | return CSWS14CodeMixedCorpusReader('', 'en_es_training_offsets.txt') 298 | 299 | # The EMNLP 2014 workshop Arabic standard/dialectal corpus 300 | elif lang == 'cswsarb': 301 | return CSWS14CodeMixedCorpusReader('', 'arabicTrain-clean.txt') 302 | 303 | # The EMNLP 2016 workshop English-Spanish corpora: training and development 304 | elif lang == 'cswsest': 305 | return CSWS16CodeMixedCorpusReader('', 'emnlp16_enestrain.txt') 306 | elif lang == 'cswsesd': 307 | return CSWS16CodeMixedCorpusReader('', 'emnlp16_enesdev.txt') 308 | 309 | else: 310 | print('unknown language') 311 | 312 | ######################################################################### 313 | # Preprocessing routines for some of the corpora. 314 | # The processed corpus is output as "clean.txt". 315 | 316 | ########################## 317 | # for FIRE corpora containing the original (encoded) strings, too 318 | # 319 | def preprocess_fire(corpus): 320 | cleanfile = open('clean.txt', 'w') 321 | orgfile = open(corpus, 'r') 322 | line = orgfile.readline() 323 | while line != '': 324 | newwords=[] 325 | words = line.split() 326 | #print(words) 327 | for word in words: 328 | #print(word) 329 | head = word.split('=')[0] 330 | newwords += head + ' ' 331 | newwords+=("\n") 332 | cleanfile.write(''.join(newwords)) 333 | line = orgfile.readline() 334 | cleanfile.close() 335 | orgfile.close() 336 | 337 | ########################## 338 | # for EMNLP 2014 CS workshop corpora containing only offsets and annotation 339 | # 340 | def preprocess_csws14(corpus): 341 | cleanfile = open('clean.txt', 'w') 342 | orgfile = open(corpus, 'r') 343 | line = orgfile.readline() 344 | key = '' 345 | sentence = [] 346 | while line != '': 347 | items = line.split() 348 | if items[0] != key: 349 | sentence += "\n" 350 | cleanfile.write(''.join(sentence)) 351 | sentence = [] 352 | key = items[0] 353 | string = items[0] + ":" + items[2] + "-" + items[3] + "/" + items[4] 354 | sentence += string + ' ' 355 | line = orgfile.readline() 356 | cleanfile.write(''.join(sentence)) 357 | cleanfile.close() 358 | orgfile.close() 359 | 360 | ########################## 361 | # for EMNLP 2016 CS workshop corpora containing offsets, tokens and annotations 362 | # 363 | def preprocess_csws16(corpus): 364 | cleanfile = open('clean.txt', 'w', encoding='utf-8') 365 | orgfile = open(corpus, 'r', encoding='utf-8') 366 | line = orgfile.readline() 367 | key = '' 368 | sentence = [] 369 | while line != '': 370 | items = line.split() 371 | if items[0] != key: 372 | sentence += "\n" 373 | cleanfile.write(''.join(sentence)) 374 | sentence = [] 375 | key = items[0] 376 | if len(items) != 6: 377 | print(items[0] + ":" + items[2]) 378 | string = items[0] + ":" + items[-2] + "/" + items[-1] 379 | sentence += string + ' ' 380 | line = orgfile.readline() 381 | cleanfile.write(''.join(sentence)) 382 | cleanfile.close() 383 | orgfile.close() 384 | -------------------------------------------------------------------------------- /Perplexity_CMI/CMIv2programme/cmi_stats_v2.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # 3 | # Code-Mixing Corpus Calculation 4 | # Björn Gambäck, NTNU 2014-2016. 5 | # contact: 6 | # 7 | # For corpora whose documents contain words tagged with language identifiers. 8 | # 9 | ################################################################################### 10 | # 11 | # Prerequisites: 12 | # 1. NLTK 13 | # 2. Python >= v3 14 | # 15 | ################################################################################### 16 | # 17 | # Usage (if you're using a predefined corpus and tagset, skip to step 4 or 5!): 18 | # 19 | # 1. Define the tagset of the corpus, unless it's one of the predefined tagsets 20 | # given in the file cmi_tagsets.py (see that file for descriptions): 21 | # a. dastags used by Das and Gambäck 22 | # b. nitatags used in the NITA corpora 23 | # c. ndtags used by Nguyen & Dogruöz 24 | # d. vyastags used by Vyas et al. 25 | # e. firetags used in the FIRE shared task 26 | # f. csws14tags used in the EMNLP 2014 CS workshop shared task 27 | # g. csws16tags used in the EMNLP 2016 CS workshop shared task 28 | # 29 | # 2. If introducing a new tagset, define 30 | # a. which tags are language tags in the langtags/1 mapping in cmi_tagsets.py. 31 | # b. which tags map to which language. See further maptags/3 below. 32 | # Also check if some tags/words need special treatment, see cmi_one_utterance/3. 33 | # 34 | # 3. Define a corpus reader class, unless it's one of the predefined corpus readers 35 | # given in the file cmi_corpus_reader.py. 36 | # 37 | # 4. Add the corpus file name and its language ID to codemix/1 in cmi_corpus_reader.py. 38 | # (The corpora file names are currently hardcoded in that function. Sorry for that!) 39 | # 40 | # 5. Calculate the code-mixing of the entire corpus using cmi_stats/2. 41 | # A typical usage is: 42 | # 43 | # >>> cmi_stats('ned', tagsets.ndtags) 44 | # 45 | # where 'ned' says that the Dutch corpus should be the one processed and 46 | # tagsets.ndtags that it's annotated with the tagset of Nguyen & Dogruöz. 47 | # 48 | # Another example: 49 | # 50 | # >>> cmi_stats('cswsest', tagsets.csws16tags) 51 | # 52 | # where 'cswsest' is the 2016 EMNLP code-switching workshop English-Spanish 53 | # training corpus and tagsets.csws16tags that year's version of the tagset. 54 | # 55 | # The system output statistics come in seven groups, as follows: 56 | # 1. Cc = the overall CMI value for the entire corpus. 57 | # 2. The total number of utterances and the number of code-mixed utterances. 58 | # 3. The fraction (%) of mixed and non-mixed utterances, respectively. 59 | # 4. The average code-mixing per utterance (Cu) in the mixed utterances 60 | # and overall in the corpus. 61 | # 5. The number of utterance-internal code-switching points overall; 62 | # average number of switches inside the mixed utterances and 63 | # average for all utterances; the number of switches between utterances 64 | # and the fraction of switches between utterances. 65 | # 6. The fraction of mixed utterances in different Cu intervals together with 66 | # the average number of switch points in each of those intervals. 67 | # 7. The number and fraction of words annotated with each tag in the tagset. 68 | # 69 | ################################################################################### 70 | 71 | ################################################################################### 72 | # 73 | # The Code-Mixing Index is further described in the following two papers: 74 | # 75 | # [CMI v2] Gambäck, B. and Das, A.: Comparing the level of code-switching in corpora. 76 | # Proc. of the 10th International Conference on Language Resources and Evaluation (LREC). 77 | # Portoroz, Slovenia (May 2016), pp 1850–1855. 78 | # 79 | # [CMI v1] Gambäck, B. and Das, A.: On measuring the complexity of code-mixing. 80 | # Proc. of the 1st Workshop on Language Technologies for Indian Social Media (Social-India). 81 | # Goa, India (Dec 2014), pp. 1-7. 82 | # 83 | ################################################################################### 84 | 85 | import cmi_tagsets as tagsets 86 | import cmi_corpus_reader as creader 87 | 88 | 89 | ######################################################################### 90 | # # 91 | # MAPPING OF LANGUAGE TAGS # 92 | # FOR THE PREDEFINED TAGSETS # 93 | # (See the definitions of the predefined # 94 | # tagsets at the beginning of the file.) # 95 | # # 96 | ######################################################################### 97 | 98 | 99 | ######################################################################### 100 | # Calculate the number of words tagged by each language tag, including 101 | # defining which language any mixed words belong to, for each tagset. 102 | # 103 | # Return the number of words tagged as belonging to any language 104 | # and the number of non-language words in the utterance, as well as 105 | # the number of words belonging to the utterance's matrix language 106 | # (dominating language) and that matrix language itself. 107 | # 108 | def maptags(tags, tagset, prevmatrix): 109 | 110 | if tagset == tagsets.dastags: 111 | eng = tags[0] + tags[1] + tags[2] # EN + EN+{HI|BN}_SUFFIX 112 | bng = tags[3] + tags[4] # BN + BN+EN_SUFFIX 113 | hnd = tags[5] + tags[6] # HN + HN+EN_SUFFIX 114 | #ne = tags[7] + tags[10] 115 | #acro = tags[11] + tags[13] + tags[14] 116 | ## While this option is based on the suffixes of NEs and ACROs 117 | eng += tags[8] + tags[12] # eng above + {NE|ACRO}+EN_SUFFIX 118 | bng += tags[9] + tags[13] # bng above + {NE|ACRO}+BN_SUFFIX 119 | hnd += tags[10] + tags[14] # hnd above + {NE|ACRO}+HI_SUFFIX 120 | ne = tags[7] 121 | acro = tags[11] 122 | 123 | other = tags[15] + tags[16] 124 | nonlang = ne + acro + other 125 | lang = eng + bng + hnd 126 | #domlang = max(eng,bng,hnd) 127 | lang1 = eng 128 | lang2 = max(bng,hnd) 129 | 130 | elif tagset == tagsets.nitatags: 131 | eng = tags[0] 132 | bng = tags[1] 133 | hnd = tags[2] 134 | mix = tags[3] 135 | nonlang = tags[4] + tags[5] + tags[6] + tags[7] 136 | lang = eng + bng + hnd + mix 137 | lang1 = eng 138 | lang2 = max(bng,hnd) 139 | 140 | elif tagset == tagsets.ndtags: 141 | lang1 = tags[0] # ned 142 | lang2 = tags[1] # tur 143 | nonlang = tags[2] 144 | lang = lang1 + lang2 145 | 146 | elif tagset == tagsets.vyastags: 147 | lang1 = tags[0] 148 | lang2 = tags[1] 149 | error = tags[2] + tags[3] 150 | nonlang = 0 151 | lang = lang1 + lang2 + error 152 | 153 | elif tagset == tagsets.firetags: 154 | eng = tags[0] + tags[1] + tags[2] 155 | bng = tags[3] + tags[4] 156 | hnd = tags[5] + tags[6] 157 | gur = tags[7] 158 | kan = tags[8] 159 | mix = tags[9] 160 | nonlang = tags[10] 161 | lang = eng + bng + hnd + gur + kan + mix 162 | #domlang = max(eng,bng,hnd,gur,kan) 163 | lang1 = eng 164 | lang2 = max(bng,hnd,gur,kan) 165 | 166 | elif tagset == tagsets.csws14tags: 167 | lang1 = tags[0] 168 | lang2 = tags[1] 169 | mix = tags[2] + tags[3] 170 | nonlang = tags[4] + tags[5] 171 | lang = lang1 + lang2 + mix 172 | 173 | elif tagset == tagsets.csws16tags: 174 | lang1 = tags[0] 175 | lang2 = tags[1] 176 | fw = tags[2] 177 | mix = tags[3] + tags[4] 178 | nonlang = tags[5] + tags[6] + tags[6] 179 | lang = lang1 + lang2 + fw + mix 180 | 181 | else: 182 | print("Unknown tagset") 183 | 184 | # This isn't strictly correct for corpora mixing more than two languages, 185 | # since no inter-utterance switchpoint will be added in case the matrix 186 | # language of two utterances switches between two non-English languages. 187 | # (Since lang1 is here assumed always to be the number of English words, 188 | # while lang2 gives the most frequent other language of the utterance.) 189 | # Needs fixing! - BG 160322 190 | if lang1 > lang2: 191 | matrixlang = 'lang1' 192 | nummatrix = lang1 193 | elif lang2 > lang1: 194 | matrixlang = 'lang2' 195 | nummatrix = lang2 196 | else: 197 | matrixlang = prevmatrix 198 | nummatrix = lang1 199 | 200 | return lang, nonlang, nummatrix, matrixlang 201 | 202 | ######################################################################### 203 | # # 204 | # Cu = CODE-MIXING PER UTTERANCE # 205 | # # 206 | ######################################################################### 207 | 208 | ######################################################################### 209 | # Check if the switchpoint counter P should be increased. 210 | # 211 | # Increase P if the current word's tag is one of the language tags _and_ 212 | # the most recent preceeding language-tagged word had another language tag 213 | # (which is given by the value of the currlang argument). 214 | # 215 | def switchpoint(tag, tagset, P, currlang): 216 | langs = tagsets.langtags(tagset) 217 | if currlang == 0 and (tag in langs): 218 | # first language tagged word: change currlang, but not P 219 | return P, tag 220 | elif tag != currlang and (tag in langs): 221 | # increase P and change currlang 222 | return P+1, tag 223 | else: 224 | # no change of P and currlang 225 | return P, currlang 226 | 227 | ######################################################################### 228 | # Calculate Cu, the code-mixed index for one utterance. 229 | # 230 | # Insert an intra-utterance switch point, P, for each language change 231 | # inside the utterance, as returned from the switchpoint/4 function. 232 | # Add an inter-utterance switch, delta, if the utterance's matrix language 233 | # differs from prevmatrix, the matrix language of the previous utterance. 234 | # 235 | # The relevant formula used to calculate Cu for an utterance x is: 236 | # 237 | # Cu(x) = 100 * [N(x)- max{t}(x) + P(x)] / 2*N(x) : N(x) > 0 238 | # Cu(x) = 0 : N(x) = 0 239 | # 240 | # where N(x) is the number of tokens that belong to any of the languages in 241 | # the utterance x (i.e., all the tokens except for language independent ones); 242 | # max{t}(x) is the number of tokens in x belonging to the matrix language 243 | # (i.e., the most frequent language in the utterance x); and 244 | # P(x) the number of switching points inside the utterance x. 245 | # 246 | # The second clause defines Cu(x) to be 0 for utterances containing 247 | # no words that belong to any of the languages in the corpus (N=0). 248 | # Cu is also 0 for monolingual utterances (since then max{t}=N and P=0). 249 | # 250 | def cmi_one_utterance(utterance, tagset, prevmatrix): 251 | P = 0 252 | currlang = 0 253 | tags = [0 for x in range(len(tagset))] 254 | for i in range(len(utterance)): 255 | word,tag = utterance[i] 256 | if word: 257 | # special cases for words that contain unwanted symbols 258 | if word[0] == '[': 259 | if word == '[object' or word == '[img': 260 | # for Nguyen & Dogruöz' NED-TUR corpus, where html 261 | # links sometimes are prefixed by '[object' or '[img'. 262 | # those superfluous prefixes are thus removed here. 263 | continue 264 | else: 265 | # for the FIRE corpora, where NEs sometimes are 266 | # included in brackets, that must be removed. 267 | tags[len(tagset)-1] += 1 268 | continue 269 | if tag == '' or tag is None: 270 | print("No tag for word", word) 271 | elif tag in tagset: 272 | tags[tagset.index(tag)] += 1 273 | P, currlang = switchpoint(tag, tagset, P, currlang) 274 | else: 275 | # for Das & Gambäck's ENG-BNG corpus, where suffix tags can 276 | # be prefixed by 'wlcm:'; that prefix needs to be stripped. 277 | tail = tag.partition(':')[2] 278 | if tail in tagset: 279 | tags[tagset.index(tail)] += 1 280 | else: 281 | print("Unknown tag", tag, "for word", word, "adding to UNDEF") 282 | tags[len(tagset)-1] += 1 283 | 284 | lang, nonlang, nummatrix, matrixlang = maptags(tags, tagset, prevmatrix) 285 | 286 | # add an inter-utterance switch point if the matrix languages differ 287 | if matrixlang == prevmatrix or prevmatrix == 0: 288 | delta = 0 289 | else: 290 | delta = 1 291 | 292 | if lang == 0: 293 | return 0, P, delta, tags, prevmatrix 294 | else: 295 | return 1 - (nummatrix - P)/lang, P, delta, tags, matrixlang 296 | 297 | ######################################################################### 298 | # # 299 | # MAIN ROUTINE # 300 | # Cc = CODE-MIXING FOR A CORPUS # 301 | # # 302 | ######################################################################### 303 | 304 | ######################################################################### 305 | # Calculate code-mixed index and tag usage for an entire corpus 306 | # 307 | # The relevant formula used to calculate Cc of an utterance x is: 308 | # 309 | # Cc(x) = 100/U * [ 1/2 * Sum{1 - [max{t}(x)-P(x)]/N(x) + delta(x)} + [5/6]*S ] 310 | # 311 | # where U is the total number of utterances in the corpus 312 | # and S the number of utterances that contain any switching. 313 | # 314 | # The Sum is over all the utterances in the corpus (so x = 1 to U); 315 | # max{t}(x) is the number of tokens in each utterance x belonging to 316 | # its matrix language (i.e., the most frequent language in the utterance); 317 | # P(x) is the number of switching points inside each utterance x; 318 | # N(x) is the number of tokens that belong to any of the languages in 319 | # the utterance (i.e., all the tokens except for language independent ones); 320 | # delta(x) is 1 if a switching point precedes the utterance and 0 otherwise. 321 | # 322 | # The 5/6 weighting of S (the number of utterances containing switching) 323 | # comes from the "Reading Ease" readability score [Flesch 1948] which, 324 | # based on psycho-linguistic experiments, similarly weights the frequency 325 | # of words per sentence as 1.2 times the number of syllables per word. 326 | # 327 | def cmi_stats(lang, tagset): 328 | 329 | # initialisation 330 | nonmix = 0 331 | mix = 0 332 | cmitot = 0 333 | Ptot = 0 334 | cmi10 = cmi20 = cmi30 = cmi40 = cmiinf = 0 335 | P10 = P20 = P30 = P40 = Pinf = 0 336 | inter = 0 337 | matrixlang = 0 338 | tagstot = [0 for x in range(len(tagset))] 339 | 340 | corpus = creader.corpus_reader(lang) 341 | utterances = corpus.tagged_sents() 342 | num = len(utterances) 343 | if num == 0: 344 | print("Empty corpus") 345 | return 346 | 347 | # Calculate Cu, the CMI value for each utterance, as well as the 348 | # switch-points, P (intra-utterance) and delta (inter-utterance) 349 | for i in range(num): 350 | cmi, P, delta, tags, matrixlang = cmi_one_utterance(utterances[i], tagset, matrixlang) 351 | inter += delta 352 | for x in range(len(tagset)): 353 | tagstot[x] += tags[x] 354 | if cmi == 0: 355 | nonmix += 1 356 | else: 357 | mix += 1 358 | cmitot += cmi + delta 359 | Ptot += P 360 | 361 | # to produce statistics for different CMI intervals 362 | cmi *= 50 363 | if cmi <= 10: 364 | cmi10 += 1 365 | P10 += P 366 | elif cmi <= 20: 367 | cmi20 += 1 368 | P20 += P 369 | elif cmi <= 30: 370 | cmi30 += 1 371 | P30 += P 372 | elif cmi <= 40: 373 | cmi40 += 1 374 | P40 += P 375 | else: 376 | cmiinf += 1 377 | Pinf += P 378 | 379 | # Calculate Cc, the mixing of the entire corpus 380 | cmitot = cmitot/2 381 | Cc = (cmitot + 5*mix/6) / num 382 | 383 | # Print CMI values and overall corpus statistics 384 | print("\n***********************************") 385 | print("Language / corpus:", lang) 386 | print() 387 | print("Cc: {:6.2f}".format(100 * Cc)) 388 | print() 389 | print("Num of utterances: {:6d}".format(num)) 390 | print("Num of mixed: {:6d}".format(mix)) 391 | #print("Num of tokens: {:6d}".format(len(corpus.words()))) 392 | #print("Num of unique tokens: {:6d}".format(len(set(corpus.words())))) 393 | print() 394 | print("Fraction non-mixed: {:6.2f}".format(100 * nonmix / num)) 395 | print("Fraction mixed: {:6.2f}".format(100 * mix / num)) 396 | print() 397 | if mix > 0: 398 | print("Average Cu mixed: {:6.2f}".format(100 * cmitot / mix)) 399 | print("Average Cu total: {:6.2f}".format(100 * cmitot / num)) 400 | print() 401 | print("Num of switches: {:6d}".format(Ptot)) 402 | if mix > 0: 403 | print("Average P mixed: {:6.2f}".format(Ptot / mix)) 404 | print("Average P total: {:6.2f}".format(Ptot / num)) 405 | print("Num of interswitches: {:6d}".format(inter)) 406 | print("Fraction interswitch: {:6.2f}".format(100 * inter / num)) 407 | print() 408 | 409 | # Print statistics for different CMI intervals 410 | print("Fraction 0 < C <= 10: {:6.2f}".format(100 * cmi10 / num)) 411 | if cmi10 > 0: 412 | print("Avg P for C = (0,10]: {:6.2f}".format(P10 / cmi10)) 413 | print("Fraction 10 < C <= 20:{:6.2f}".format(100 * cmi20 / num)) 414 | if cmi20 > 0: 415 | print("Avg P for C = (10,20]:{:6.2f}".format(P20 / cmi20)) 416 | print("Fraction 20 < C <= 30:{:6.2f}".format(100 * cmi30 / num)) 417 | if cmi30 > 0: 418 | print("Avg P for C = (20,30]:{:6.2f}".format(P30 / cmi30)) 419 | print("Fraction 30 < C <= 40:{:6.2f}".format(100 * cmi40 / num)) 420 | if cmi40 > 0: 421 | print("Avg P for C = (30,40]:{:6.2f}".format(P40 / cmi40)) 422 | print("Fraction C > 40: {:6.2f}".format(100 * cmiinf / num)) 423 | if cmiinf > 0: 424 | print("Avg P for C > 40: {:6.2f}".format(Pinf / cmiinf)) 425 | print() 426 | 427 | # Print the number of words annotated with each tag 428 | print("\n******** Tags *********** % *****") 429 | w = 0 430 | for x in range(len(tagset)): 431 | w += tagstot[x] 432 | for x in range(len(tagset)): 433 | print(tagset[x].ljust(15), repr(tagstot[x]).rjust(6), " {:6.2f}".format(100*tagstot[x]/w)) 434 | print("Total:", repr(w).rjust(15)) 435 | print("\n***********************************\n\n") 436 | 437 | 438 | ######################################################################### 439 | # # 440 | # HELP ROUTINES # 441 | # # 442 | ######################################################################### 443 | 444 | ######################################################################### 445 | # Help routines for debugging and printing 446 | # 447 | def count_tag(tag, utterances, corpus): 448 | dict = {} 449 | for i in range(utterances): 450 | for w,t in corpus.tagged_sents()[i]: 451 | if t == tag: 452 | if w in dict: 453 | dict[w] += 1 454 | else: 455 | dict[w] = 1 456 | return dict 457 | 458 | ### typical usage 459 | # >>> print_dict(count_tag('UNIV', 100, codemix('bng'))) 460 | # 461 | def print_dict(dict): 462 | for i in range(1,100): 463 | print("***** {:3d} *****".format(i)) 464 | for a,b in dict.items(): 465 | if b == i: 466 | print(a) 467 | 468 | -------------------------------------------------------------------------------- /Perplexity_CMI/CMIv2programme/cmi_tagsets.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # 3 | # Tagsets Used by Code-Mixing Corpus Readers 4 | # Björn Gambäck, NTNU 2014-2016. 5 | # contact: 6 | # 7 | ################################################################################### 8 | 9 | ######################################################################### 10 | # # 11 | # PREDEFINED TAGSETS # 12 | # # 13 | ######################################################################### 14 | 15 | # Tagset used by Das & Gambäck 16 | # * Das, A. and Gambäck, B. (2013). 17 | # "Code-Mixing in Social Media Text: The Last Language Identification Frontier?" 18 | # Traitement Automatique des Langues, 54(3):41–64. 19 | # * Das, A. and Gambäck, B. (2014). 20 | # "Identifying languages at the word level in code-mixed Indian social media text." 21 | # Proc. of the 11th International Conference on Natural Language Processing (ICON), 22 | # pages 169–178, Goa, India. 23 | dastags = ['EN', 24 | 'EN+BN_SUFFIX', 25 | 'EN+HI_SUFFIX', 26 | 'BN', 27 | 'BN+EN_SUFFIX', 28 | 'HI', 29 | 'HI+EN_SUFFIX', 30 | 'NE', 31 | 'NE+EN_SUFFIX', 32 | 'NE+BN_SUFFIX', 33 | 'NE+HI_SUFFIX', 34 | 'ACRO', 35 | 'ACRO+EN_SUFFIX', 36 | 'ACRO+BN_SUFFIX', 37 | 'ACRO+HI_SUFFIX', 38 | 'UNIV', 39 | 'UNDEF'] 40 | 41 | # Tagset used for the NITA annotated corpora: 42 | # * Jamatia, A., Gambäck, B., and Das, A. (2015). 43 | # "Part-of-speech tagging for code-mixed English-Hindi Twitter and Facebook chat messages." 44 | # Proc. of the 10th International Conference on Recent Advances in 45 | # Natural Language Processing (RANLP), pages 239–248, Hissar, Bulgaria. 46 | # * Rudrapal, D., Jamatia, A., Chakma, K., Das, A. and Gambäck, B. (2015). 47 | # "Sentence Boundary Detection for Social Media Text." 48 | # Proc. of the 12th International Conference on Natural Language Processing (ICON), 49 | # pages 91-97, Trivandrum, India. 50 | # * Jamatia, A., Gambäck, B., and Das, A. (2016). 51 | # "Collecting and Annotating Indian Social Media Code-Mixed Corpora." 52 | # Proc. of the 17th International Conference on Intelligent Text Processing 53 | # and Computational Linguistics (CICLING), Konya, Turkey. 54 | nitatags = ['EN', 55 | 'BN', 56 | 'HI', 57 | 'MIXED', 58 | 'NE', 59 | 'ACRO', 60 | 'UNIV', 61 | 'UNDEF'] 62 | 63 | # Tagset used by Nguyen, D. and Dogruöz, A. S. (2013). 64 | # "Word level language identification in online multilingual communication." 65 | # Proc. of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 66 | # pages 857–862, Seattle, Washington. 67 | ndtags = ['NL','TR','SKIP'] 68 | 69 | # Tagset used by Vyas Y., Gella S., Sharma J., Bali K., and Choudhury M. (2014). 70 | # "POS tagging of English-Hindi code-mixed social media content." 71 | # Proc. of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 72 | # pages 974–979, Doha, Qatar. 73 | vyastags = ['E','H','F','O'] 74 | 75 | # Tagset used for the FIRE 2014 and 2015 shared task annotated corpora. 76 | # Sequiera, R., Choudhury, M., Gupta, P., Rosso, P., Kumar, S., Banerjee, S., 77 | # Naskar, S.K., Bandyopadhyay, S., Chittaranjan, G., Das, A., and Chakma, K. 78 | # "Overview of FIRE-2015 shared task on mixed script information retrieval." 79 | # Proc. of the 7th Forum for Information Retrieval Evaluation (FIRE), 80 | # pages 21-27, Gandhinagar, India. 81 | firetags = ['E', 'E+BN_SUFFIX', 'E+HI_SUFFIX', 82 | 'B', 'B+EN_SUFFIX', 83 | 'H', 'H+EN_SUFFIX', 84 | 'G', 85 | 'K', 86 | 'MIX', 87 | 'O'] 88 | 89 | # Tagsets used for the EMNLP code-switching workshop shared task annotation. 90 | # Solorio, T., Blair, E., Maharjan, S., Bethard, S., Diab, M., Gohneim, M., 91 | # Hawwari, A., AlGhamdi, F., Hirschberg, J., Chang, A., and Fung, P. 92 | # "Overview for the first shared task on language identification in code-switched data." 93 | # Proc. of the 1st Workshop on Computational Approaches to Code Switching, pages 62–72. 94 | # At the 2014 Conference on Empirical Methods in Natural Language Processing, Doha, Qatar. 95 | csws14tags = ['LANG1', 96 | 'LANG2', 97 | 'MIXED', 98 | 'AMBIGUOUS', 99 | 'NE', 100 | 'OTHER'] 101 | 102 | # The UNK and FW tags were added to the tagset for the 2016 CS workshop. 103 | csws16tags = ['LANG1', 104 | 'LANG2', 105 | 'FW', 106 | 'MIXED', 107 | 'AMBIGUOUS', 108 | 'NE', 109 | 'OTHER', 110 | 'UNK'] 111 | 112 | ######################################################################### 113 | # Return the mono-lingual language tags, as defined by each tagset. 114 | # 115 | def langtags(tagset): 116 | if tagset == dastags: 117 | langs = tagset[0:7] 118 | elif tagset == nitatags: 119 | langs = tagset[0:3] 120 | elif tagset == ndtags: 121 | langs = tagset[0:2] 122 | elif tagset == vyastags: 123 | langs = tagset[0:2] 124 | elif tagset == firetags: 125 | langs = tagset[0:9] 126 | elif tagset == csws14tags: 127 | langs = tagset[0:2] 128 | elif tagset == csws16tags: 129 | langs = tagset[0:3] 130 | else: 131 | print("Unknown tagset") 132 | 133 | return langs 134 | -------------------------------------------------------------------------------- /Perplexity_CMI/CMUToolkit.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Abhishekmamidi123/Natural-Language-Processing/0fbb5e6e35046297563c1ffac4000fb2030a0b16/Perplexity_CMI/CMUToolkit.tar.gz -------------------------------------------------------------------------------- /Perplexity_CMI/Chunks/30_40.txt: -------------------------------------------------------------------------------- 1 | aur air hostesses aa sakengi train me?? #railbudget2015 2 | Cabg bookg karke chalenge 3 | Han or koi option b na$hi hai .. 4 | rt #maukamauka #indvssa hum bhartiya he...panga na liyo...!!! http://t.co/gsy4lhyrqx 5 | kachua sir maths aur english dono pdate hai ? =p 6 | Bhai movie main social message kya hai batao 7 | Bhai jldi aa jae 8 | are u in ny ? do u want freedom for khader adnan ? are u awesome ? if so , u better be at the rally in ny , feb 20 4:30-7:30 at israeli embassy 9 | khair facebook se to news mil hi jayegi si hve been stick wid my phone 10 | admission dedo please 11 | Kidar wait krna h ? 12 | worlds longest confssn .... GUINNEAS buk m bhijwa do is cnfssion ko ... 13 | Codebase .. Hume kya karna hoga .. How to improve yahi sab 14 | Sorry frndz salman sir thore bzy hain abi aaty hain 5 mint me. 15 | inter milan transfer rumours: 5 players linked to the san siro in january: inter milan have started their new . . . http/URL 16 | Dont be upset friend jiska koi nahi hota uska facebook hota he . 17 | In d same sense . . bharat mittal ek pco booth ka maalik hai . . birla ki cement shop hai . . and . . ratan tata is owner of a garrage 18 | M aapko mis kar rha bhaiya kab aayega big boss plz reply karna 19 | Acha acha .... happy holi 20 | hahahaha . . . 4 games aur unke bhi repeat :d 21 | Happyg holi tog u . . . Alsog maze kur rha hai na 22 | Namsthe sir jai gurudhev 23 | Koi freeg coupong olag ka 24 | chalk ka maza toh free period mai letay thay . . . . ek dusro ko chalk se maar k :-d 25 | my #mufc xi for super sunday: de gea , rafa , rio , evans , evra , clevz , nani , young , roo , chicha , rvp . what about you ? 26 | Meko need nahi hai 27 | Tune apna analysis nhi likha 28 | Apki PA team ki watch kharab h shayad...4 toh baj gaye..:-( 29 | mobile number or password daalke login kar do . . . 30 | abki bar bjp . . . last bar congress hari thi 31 | supw--socially useful productive work , jyadatar bachche hut hi bnate the . . . . 32 | jab factory hi aisi hogi to product ho aisa hi milega na :p 33 | kejriwal - ab puri delhi me free wifi denge! sisodia - bahot kharcha ayega.. kejriwal - are lekin password kisi ko nhi denge.. #delhidecides 34 | Baaki do grpg 35 | Sallu bhaai i am w8 4r bajrangi bhaijaan.. 36 | lekin uska last din last din ni hota 37 | Bohot tasty paneer tha ladke 38 | # msgincinemas hip hip hureee babu thanxx.. 39 | Rajiv/eng chiek 40 | What ' wrong vd diz page 2day ... kuch bi post kr de rahe hai ? Confessions kaha gae ? 41 | mene sirf suna h south aacha na h north iz best 42 | bola hai admin , aankhe kholo duites , know the all facts 43 | plan something small n cosy .. a candlelight dinner or smthing .. only u n she ... nt anything big bash where there ll b many ppl .. n ds tym dnt infrm any of ur frnz !! ) 44 | aur arvind uncle kaise ho?? kya loge?? arvind- sab kuch!! :p #aapsweep 45 | hahahaha . . wo summer vacation k model yaad aagye :d 46 | bade ummed ke saat pak fans aaj match dekhne aaye the honge na poor them, kambhakt their team lol #indvspak 47 | summer holidays ke last few days ka haal :d' 48 | Woh viswanath/eng mila ? 49 | dil to hai hindustani 50 | kutte , cooler pankhe aur heatr sv ne cmnt kr diya ho to lge hath mai v cmnt kr deta hu 51 | Eid mubark ho bhai jaan advance mai it's emerjancy plz 52 | Salman bhai kahan hooo ...... 6 baj gaye ab 1 ghanta gaya to iftar ka time hota please ek reply dedijiye,, please bhaijaan ........ 53 | are school n clg me chalk lane ka kaam mera hi rahta tha . ek 2 extra nikalti thi free period me teacher giri marne k liye :p 54 | Tera post aur madhur bhandarkar ki movies ek jaisi hai .. a good one but noone will apprehend ... 55 | Hi salman sir please apse marnese pehale ek echha he ki main pako samnese ekbar dekhu filmo me dakhahe lekin ajatak main apko reall me nahi dekha but apko aur apke family ko ramzan eid ki mubarakbad 56 | ofcourse i spprt #beefban gaay hamari mata hai 57 | Abhi to. company compare ki.hai 58 | Bs mn kiya ?? 59 | modi sir....you come bihar.because neccessary to bjp government in bihar.......har har modi ghar ghar modi..... 60 | Rajiiv/eng chowk metrog stationg bestg h na uske liye bhiv . . 45 ming lagenge usko 61 | 5 gurls in one time grt ... aise hi hoga fr to ........ p 62 | guddu tri series aur test series mein , kya lene gya tha ? ? 63 | kuch b news post krne se pahile socha kro then post kiya kro . 64 | salmaan sir hm aapse milna chahte hai plz sir... 65 | Sallu bhai aaj apka dil garden garden kyu ho rha h....??? Koi to locha h.... 66 | Kal hi payg karunga 67 | abi threats aa re hain logo ko locals se 68 | Aayega Gurgaon launde ? 69 | ek moka mile or nehi mile?! #cwc15 #indvssa 70 | #replacemovienamewithmuffler hum muffler de chuke sanam #aapsweep #aap #ndtvresults 71 | #blockbustermsg thnks papa g.. aapnu v bhut bhut mubarkan papa g.. 'happy shivraatri' http://t.co/i3isa4t0in 72 | Aaj car book ki yaar 73 | modi ji namaskar . i am from upgzb . bhumi adigrahan bill ko clear kro . 74 | Hloo salu bhai...... 75 | rohan sikka this one is ultimate . . sharma ji toh mortal combat karne lage . . 76 | Abbe ye sem bahut heavyg hoga na 77 | #msgincinemas #msgincinemas mere echa thi pehla show dekhne ke pitaji excitement ko cantor karne ke sakti do g plz pitaji 78 | mummy pk movie dekhne chalein? "haan, kaunsi movie?" 79 | Bhaijaan kuch to bolo 80 | Okkk....bajrangi jaan 81 | Arre play boy thodi na hu 82 | Lambai lagbhag 6 foot 83 | Mummy meri white t . shrt to laana jo mai us din clg pahen kr gya tha ldki ko ragging se bchane ..!!! 84 | kya capacity hai yaar 85 | garbage bin gudu ki koi girlfriend bhi dikha do kabhi 86 | Asalam u alekom salman bhai. 87 | sir massage kahan se karna hai sir massage kahan se karna hai sir massage kahan se karna hai sir massage kahan se karna hai sir massage kahan se karna hai 88 | 6 miniute ago .....waiting indias super star salman khan 89 | yaar news mein aaya tha tu . . . ? ? ? ? huh . . story achhi h short film bnwaa le ispe 90 | ab to marker aa gaye school me bhi . . . . . 91 | Mechanical me to fir bhi chance h or engineers ka kya hoga ...... 92 | salman bhai sultan film ke shuts chalu hai abhi 93 | Html css php typeg ke projectg hai naa be ? ? 94 | Kya bhai hum jab Bussy ho tab hi aap online honge..... plz hamara bhi socho..... night me sab free hote he. 95 | KB spne m? ? 96 | Mujhe mailg aayi hai 97 | yahoo ke live score se ;) 98 | pura india st/sc kardo ( general quota ko alag se vardan mila h ????????? 99 | Gand fat ri h 100 | Mujhe mail$eng aayi hai 101 | supw se sirf duster bna ke dena yaad ata hai . . fir wahi duster se board mitao :p 102 | Sir baat kr lo plzzzzzzzzzzzzzzzzzzzzzzzzzzzzz 103 | saath me flowers bhi le jane hote the school . . flag k aaspas sajaane k lea . . :) 104 | hare toh hare ...apne ghar ke tv kyon break kar rahe ho ...loss pe loss... try again after 4 yrs #cantbeatindia #indvspak #wc2015 105 | Exm is coming? ? 106 | Bhijan mera no 9933742434 107 | expected hai ji. fir kis baat ki #aapsweep hain ji? #nautankiaap 108 | Bhaijaan main pakistan se ho main apka bohot bra fan ho 109 | 'namanya' apanya bar? program apo toh? pk movie maker tak bisa toh? rt itu ada programnya riz,ntar gue cek dulu namanya apaan?? 110 | Loan process k liye 111 | ab delhi aap k hatho me . vote for aap . 112 | modi: main bhi cricketer banna chahta tha. #maukamauka #indvssa 113 | arpit puranik ab tum aise marketing kar rahe ho apne products ki ? 114 | aam aadmi party congress ki b team hay isliye to kejriwal ne delhi may support liya tha . 115 | A 21st century ? o ab pta chala tu single kyu hai 116 | Girls par nahi 117 | Technology. H .. A 118 | Mil jayega projectg onlineg 119 | Kyunki kal toh restg dayg h 120 | Admin ... He has * yaar . Itna mast likha tha IITian+en_suffix ke against , fir ye galti kyu 121 | puuri class ghuurti thi . . aur sabse duur waali class se hi chalk laate the . . . . 122 | bhai yaha seen ulta tha , loki , gilki ki sabzi ka badla mummy se practice mai help kara k liya jata tha :p 123 | Aur kya plang hai ? 124 | hamara toh kv tha . . . vaha teachers ko sirf duster mangana padta tha . . . chalk toh vese b hamesa padi rehti thi . . . mam bolti thi jara apni seat k paas deko sab chalk padi h kya 125 | they could have won it , agar ladki and advertisement se time mile to . . . . ;) 126 | kachua sir angrezi bolta hai kyaa ? ? ? :p 127 | phir aapne jo mobile number daala tha us mobile number pe ek sms me password aayega . . . 128 | BJP youth ki skill niche video me dekhiye. 129 | Juniorsg ke saath nhi batchg ke saath hi 130 | Same here bhai 131 | Individualg projectg bhi ? ? 132 | tu na km bol 133 | 1 . 50 hrsg ke lite 134 | y u wana leav ua fmly'' ?. . or 2nd tmhare fmly membrz iz pge pr h kya ???? 135 | rt chalo bhai... aur koi jinx tudwana hai?? #indvssa 136 | Guyssss meri profile picture kesi leg rahi hai..??? 137 | Finalg 1 8 ko hoga 138 | But hw can u juss only watch serials.. 139 | virat ko dikhaoooo camera wale bhai pleeeaj #indvsuae 140 | toilet room abse saaf rahega ya nhi ?? #railbudget2015 141 | Aaj ki party meri trf se this eid 142 | lemme me marry uske bd dkhi jaegi ywl ... / 143 | waitingg for bhaijaan 144 | virar-dahanu third line + panvel karjat doubling in mutp 3 #railbudget2015 145 | ohho ky bat h 146 | pta nh kaunsa kidha lg gya h .... . meri sis ka 91% m b nh aya 1st list m .... . n ek toh gnrl h thts y she z too dsapntng .. 147 | Mujhe do min de 148 | admin ke mat maano ye birth se ab tak single h . 149 | bhai party ke liye abke sab frnd tayar hain...!! 150 | Thk h Sundayg chalna h phr 151 | Ha ha busy admo 152 | shadyantra to aapne rache h bechari delhi ka future khatre m h. 153 | Arya Janaki Hahahahah ... du k page per ... bygod ... khule aam besti h ye toh ! XD! '( . AnMol Dogra 154 | Apni 100 fake ids+en_suffix bna , apne hr status/pic ko lyk kr le , simple ! # hv dne da sm ! -P . . ya phr profile name chnge krke ldki bnja ! 155 | bhai , rulayega kya :( kuch yaad taazi ho gayi :d 156 | hi salman sir how are sir main apki bohot bohot bohot bari fan hu i love you salman sir ummmmmmmmmmmmmmmmmmmmmmmmh 157 | Ameen may u got a hottest figure girl ;) ) 158 | saare bhagwan laapta or baki ke dusare dharm ke pamphlet kyon nahi the pk movie mein? 159 | rt aaj sania-shoaib k ghar me khana nahin bana! sania party me gayi shoaib bhuka so gaya #indvspak 160 | guddu ko bhagwan bhi english mein translate kar diya . . . . . 161 | Kaisi link be ? 162 | tu pluto pe chali jaa ..... nobdy will follow u ..!! 163 | its not hindi its urdu and bolywood ager kamyab hai tu Urdu ki waja say 164 | kya news hai dost 165 | i am from iit and yahan bhi hostels mein chori hoti hai . . . ! 166 | out kro..jaldi game khatam kro india.. #maukamauka #indvssa #savsind 167 | bhai I mean confessor , yaar yaha pe talent waste mat karo kisi magazine or newspaper me likhna suru karo .. why should only DUC members suffer ? baki logon ka bhi bore hone ka haq hai / 168 | Oye tere github me sab files chal ri hain ? 169 | yeah...peeke pk movie dekha.... http://t.co/24ifhyrobl 170 | tym kbka ho gya...whr r u 171 | Aby yr kdr gum o gy ho rply tou kr do 172 | gaon ka name 173 | But Jo link bheji hai .. Usme crisp hai 174 | salman bhaijaan...ap ki new movie mein shoot mee mai 175 | baaki sabhi baato ko maaro goli meine butter chicken khaya hai 176 | acha acha kojju 177 | Hiiiiiii salman sir 178 | Modije mera request hai ke ap apna whatsapp no de take mai live pic apko dekha saku 179 | oh salman bhai 180 | bhag sala bakwas post update karke tym waste krta hai .. 181 | rt delhi me bjp full majority se haar gayi! #kiskidilli #aapsweep 182 | hmmm bechara cute shaan . . . . . serious ho gya how sweet their frndship <3 183 | Bhai ye f ko tu kahi double meaning mein to use ni kar raha 184 | Pata I also know 185 | main aap ka bohut bade fan hu aap to sab ka help karta hai mera bhi 1 help kardena bhai plz bhai mujhse baat karna i miss u bhai 186 | khachua sir hindi k bad maths b padhane lage ? 187 | #pk movie dekhne aaya hu. #respect 188 | 67/70 , itne to marks bhi nai aate #aapsweep #feb10witharnab #3daystomsg #replaceamovietitlewithgoat #ndtvresults 189 | Aj last date hai na ? 190 | Weat karna padega 191 | contra me s power achi thi l se , :-p 192 | Ye trance music kya hota hai ? 193 | Admin claps fr u ohhh na na slaps fr u ... Fatak fatak kyu paka lahe ho ( p 194 | admin us bandi ka name preeti hai wo still pata lg gaya 195 | Salman sir aap fir kb online rahoge plz bta do na... Wrna mai puri raat jgi rhungi or apka wait krungi. Plz bhaieya 1 reply..... 196 | I think SALMAN SIR is taking a nap... jago salman bhai jago, aapka fans kab se wait kar raha hain....! 197 | kya tum ko pata hai ke tumhara kya religion hai neither muslim nor hindu 198 | rita bahuguna arguing still. amazed at the stubbornness of #congress. nikalo aham ki patti aankon se..#delhidecides 199 | okk bhai jaan 200 | at least koe jhagda nahi karega. sabko batting mili. #indvsuae 201 | Suicide kr lega DU ka chairman . . after reading ths ..... . Plz tell me . . Kya sch me du ki cut off 99 h ? o.O iska admsn kaise hua ? Itni gr8 english ! >_< 202 | aapne to bs gfts par e focus kra h , itta hisab laga rakha h gfts ka , o 203 | I am here baat kaha krni ha plzzzzzzzzzzz baat kr lo 204 | bhai ye grah shobha ke cover page ko back cover bana diya 205 | Jo thesisg de hai 206 | haha , , , true yaar , , bahot khel khela hai baaye hath se :p in practcals also 207 | #msgincinemas #msgincinemas akheer burai da the end ho gayea... akhir clouds sun nu kdo tak cover kr sakda... 208 | chalk ke tukdo se golliyaan . . nishaane baazi . . head shot :) 209 | Try kar liyo warns lyt h 210 | ab aaenge aur...good evening bol.k good bye kr k chale jaayenge 211 | rt rt wadhwahina: rt sonusachdeva07: #msgyouthicon mere ghr b kbi aya kro 212 | wallah wallah ya habibi, ek wicket already girti. #indvsuae #bleedblue 213 | lagta hai ab ak ki wagnor se hi kaam chalana padega bjp ko #delhidecides #aapkidilli 214 | #msgincinemas #isupportmsginuttrakhand papaji movie nahi dekhi abhi tak kuch karo plz http://t.co/jwaiexzxp6 215 | Bhai aisi English mein toh tera baap bhi NDA mein select naa ho sake ..!! Get a life man -/ 216 | kal hi yaad kia tha cover pic dekhke 217 | i think every friday im going to tweet: yay degrassi tonight . #getusedtoit 218 | suna hai k pk movie main bollywood ki longest kiss hogi 219 | Abe tune woh ifmrg debateg mung ka bhara 220 | 8527106761 plz Salman sir. Mujhe aapse milne ka moka den 221 | plz rpy jst wnt to meet u once hope u cme kolkata once 222 | Carg leni hai ? ? 223 | rt bhavnainsan7: rt jasvirmanku: gurmeetramrahim #msgincinemas papa ji first show dekh ke pvr wicho bahar jan nu dil hi nahi karda c today 224 | shaan alwayz masterji ke eye-contact se bachne ki koshish me . . 225 | Salman g agar ye msg dekh rahe hoto reply karo plz 226 | fake news he ho ni sakta aisa . . . . . itni cell me konsa cell pe call aaya he jsko pata krna hi bhaut musqil ho jata he 227 | beta hum bhi engineering college mein baith ke yhi soch rahe h :p 228 | pakistan follow on bachane ke liye maidan per uteregi. #indvspak 229 | kone pe date likha hai par date nai likhi . . . :p ;) :d 230 | Dnt compromise ur career for ur so called gf ... coz agr kal ko tu kuch bna ni na to tujhe puchne wali b ni h wo ... so abhi smbhl ja nd stdy pe dhyn lga .. gf pe to bd me b dhyn lga skta h .... b practicle yr ... 231 | m musalman ladki se shadi krna chata hu mare age 25 hai 9818463635 plz col me shadi pka karuga ok plz col me 232 | #aaj #sabhi #superhits film.dekhane chale #bbthiseid #superdupar film 233 | tic toc ..tic toc...yaar ye time kyu nahi ja raha.. 234 | bhaio ye khabar afwah he Muslims ko challenge karne wale 235 | Tu kitne creditsg ke liye registerg kar rha h 236 | mast movie hai Bhai 237 | sabse best s power tha . . full goliyon ki bauchaar :) 238 | Salman sir. Me apse milna chahta hu plz mujhe apointment dijiye me realy aapse milna chahta hu plz 239 | i wish meri ye wish puri ho jaye.... 240 | plz rply kro naAAA:< 241 | kya bhai aapka ye movie bahubali record tod paey 242 | abhi ghr pe hu but thodi der baad going to ofc . . . :-( 243 | Han woi to .. ambigious 244 | Mera cmmnt like kro aur aap jeet skte h diamond ring , gold chain , maruti ertiga , a holiday to du 245 | garbage bin ye flag hosting luncher paaji ke pitaji kar rahe hai kya ! ! :p 246 | Yr salu bhai jaldi idhr pk m loadshafing hojaegi :D 247 | bharva bhindi ko pimp lady's finger kehte hai 248 | saari chalk to pt shoes ko white karne me lag jaati thi . . :p 249 | 6 mnth .??? Short duration ?? 3 mahine bd tujhe papa bolne wala ho sakta tha ... v 250 | i did it . . . pr pakda jata tha . . kyoki papa teachers ko puchh lete the . . . :-/ 251 | sir plz london aajoa plz 252 | bhai Black Friday ki deal mein mila Kaafi sastaa 253 | Awww very sad same story mne kal mne crime patrol dastak p dekhi thi 254 | Chacha traing timeg par h ? 255 | udbilaao . . wow bahut din baad yeh suna hai . . my grandma used to call me that 256 | Salman sir ne bs time btaya hai date nhi jb date bta denge to aa jaeyega aap sb comment krne okay.CLOSE IT 257 | rt karaminsan07: rt tejraminsan: gurmeetramrahim #msg100croreclub #blockbustermsg #mustwatchmsg eho mang mangaan mere satgur dyal g od 258 | sir aap ki movies ka asar hai 259 | " kon c madam nye mangvaeya hai chalk " 260 | feeling aani chaiye bhai :p 261 | Proud to be a bihari -) sale kilaste h sb bihario se qk hum h hi extraordinary -) 262 | Zomato type ekdum 263 | #blockbustermsg papa ji tusi chd aa jao.. plzzzzz 264 | kachua sir maths aur english dono pdate hai ? =p 265 | #aapkidilli #aapsweep #delhidecides photoshop ne aaj dum tod diya. 266 | To prove my MANkind 267 | champion trophy mai india pakistan kai match ke baad ek pakistani nai kaha no matter our team wins or loses , we still love green coz blue to harpic bhi hota h . esake jawab mai ek indian nai kaha tum jaise hare toilet virus ya keede ko khatam krne ke liye hm harpic hone ke liye bhi tayar hei . . :-) 268 | sobhi chata hai ki pk movie sobhi recod tor de. 269 | aisa ni hai .. sme1 cares for u so much .. Dhundo use .. 270 | aj bhi #india ne apna #rule nibhaya.... sabko betting mili :p #indvsuae #cwc15 271 | Pehle roze to rkh le fir eid manana... 272 | natural actor guddu 273 | rt me: abu g paisy chahye jutay (shoes) laynay hain. abu: beta kal kam pari thi :p #pakvsind #indvspak besti nae ho gayi ? 274 | Aj b lunch nai 275 | 6X6 ka lele 276 | Uma/eng ki mailg dekhi 277 | Uski tensiong math le 278 | nvr d less cnfsn to shi h tera yr ladki ne lga di cls nw u trust no one in grl thtz wrong sb ek jaise thodi na hote h or free thi to tera cnfsn pad k tym paas achha ho gya 279 | maharashtra govt: dhoodh maangoge tho kheer denge beef maangoge tho jail mein daldenge #beefban 280 | mere tv mein durdarshan channel b aata h . . . :-d chabad chabad 281 | Number of rapes increase nahi hue bs ab news m ane lag gye h 282 | #blockbustermsg #msgincinemas pita g tohade vachan yad age"film chalegi nhi dodegi"... http://t.co/4jejmryeb3 283 | haha rocking bandaro k pinjare se nikal k aana . . . . huhhhh teacher kya jane students ka dard . . . . ;) :p :p =d 284 | Biceps 17 inchi banane ke liye kya kare.give me some tips. 285 | Nd yar admin ... Kal ki barish m purana admin b beh gya kya . 286 | pakistan can wait more more and more . . . aakhir pakistan wait karne me number 1 hai hahahahhahahaha 287 | garbage bin : lagta h ab humara guddu adult ho gya h . am i right ? 288 | But bhaijaan Ji , aapki movie Ki shooting ka kya ?? Prm rtn dn pyo! 289 | work harder ... oh god . aaah aaahh . harder oh my god .. aah p p 290 | Elephanta caves nhi Gaya ? 291 | Yaar mera vi kch aisa hi story bt i wont give up main to apni g.f ko lekar bhag jaunga ... 292 | am i the only one jiska bahar ka cover kho jata tha ? 293 | school me 2 laddu milte they :d 294 | GUD NYT BHAI 295 | May be Ien heard next week aayegi sale dubara. 296 | election ho gaya ho toh ab #indvspak pe focus karein? kitni memes banani hain abhi toh!!! 297 | ek to pakistani h . . . 298 | wide ball ki eint . 299 | Maine its start hi kaha kara 300 | #msgincinemas tujhe zndgi m pakr meri zndgi ne jeena sikha... http://t.co/r7v2qiv1vx 301 | aap evm mistake per dhyan do 302 | chip ke pack per concentrate karo 303 | Han org kya. . 304 | #indvsuae ab kisko support karu? :o #dharam_sankat 305 | Bhai bhai somabhai bohemia, 306 | new opera bhi install kiya tha maine abhi usme open kiya hai 307 | aap ka batla house encounter ki janch ka mutalba http://t.co/z0wwq2dwl8 #aapsweep #delhidecides #kiskidilli #congress #modi #azamgarh 308 | modi wanted a congress mukht bharat. bedi n shah give him a bjp mukht delhi -madamji badhai ho #delhidecides #ndtvresults 309 | apni class to peeche ki bench pe chalti thi . . . 310 | abke schools me to cod aur gta chlega 311 | chalk to ni late the . . . 312 | Net pack khatam ho gaya..paytm se recharge karo.. 313 | misbah, in dressing room : ye mat socho ki hum haare, ye socho ki batting sab ko mili. #indvspak #sixzero #cantbeatindia #bleedblue 314 | Collegeg kaisa hai 315 | Bhay ab mera net pack khatam hone aya he 316 | kabhi suna hai...cows ka population kam hua hai? :/ #beefban 317 | time agaya hai 318 | aaj khush to bahot honge #aap #aapsweep #delhidecides #aapstorm #kiskidilli #amitabhbachchan http://t.co/3bfdmncmdf 319 | Modi ji namashkarSorry to say but aap jab v kahin Facebook pe kuch share karte hain to status ko English me update karte hai...Khusi hogi jab aaapka updates hindi me 320 | kachua sir hawkins use karte hai ! :p 321 | Hlp me bhai plzz help me 322 | Sir uttrakhand se hai ! Yaha ke fans ko apka intezaar karna padta hai ! Plz come once to nanital ! 323 | sabme 1 talent hota h . . . . . . ye sabse different hai 324 | hi.... salman bhai my self mayur and m a litill singer or meri rqst hai aapse pls mujhe bhi ek chance dijiye bhai aapke liye gaane ka......plllssssss bhai 325 | rohit sharma to guddu k first time baithne se pehle hi out ho gaya tha . 326 | end wala paragraph mast h bss isi k bharose m apna revenge ni leti 327 | bheek me . . chalk bhi milta ha kya sir 328 | Thk h launde 329 | #kiranbedi loses krishna nagar seat, by 2508 votes http://t.co/vkacgfy4yd #delhidecides #kiskidilli #aapsweep http://t.co/flmhoxgbhm 330 | bahut hi zabardast shot !! virat kohli chaa gaaye #myteammypride #indvsuae #cwc2015 331 | Love uuu bhaiii 332 | kha ho aap shona cant wait...... :-) 333 | Salman bhai ki produce ki huvi movie achi na ho ehsa ho hi ni skta.... 334 | arey wah yrr 335 | " l ki power leke " contra end karna . . really credible . . . 336 | double dragon me flying kick kaise martey they etc etc :d 337 | iss din school to jana hota he , magar school bag ghar chor kar , kya maja ata tha cycle ki race lagate the . 338 | Modiji aapko hajaro salam.bahut pm aaya gaya karod lekhe gaya but aaptho imndar pm best of luck 339 | Wow kya baat h itne comment maza aagya re baba re from khushi hasan nd my real name ruby hasan 340 | Bajrangi bhaijaan is awsome 341 | aam admi party jindawad 342 | Salllluuuu i love u....plzzzz ek bar apne apse milne do...plzzzzzz 343 | awesome wap....bhai jaan.. 344 | shifting to delhi.. bijli, paani, wifi sab free ..lmao xd #aapsweep #delhidecides 345 | Emraan hashmi police dress pehen kar 346 | login karne ke baad aapke account me 150 ka free recharge add ho jayegaa . . , ! 347 | 14 feb 2011 - " happy valentine's day everybody . i'm going to buy cookies and stuff myself full . - 348 | rt insan1preeti: rt sweetykasta: "gurmeetramrahim #blockbustermsg movie dekhne ke baad sirf ek word hi nikalta h voh h "wow"" 349 | congrats....bjp govt banane ke 9 mahine baad aaj aap ki govt bhi ban gai.... #aapsweep 350 | last semi finals mein mere dad same bahane se saara aam papad kha gaye . . aam papad is good luck for team :p 351 | isse bi acha rasta agle din goli de do :p :p bhai kabhi ni kiya and proudly matric pas :p 352 | Nic salman bai 353 | Supar hit bahi 354 | ye post turant share karne yogya hai 355 | great modi ji 356 | Congrats modi jee 357 | leave or live !! bt aj teri galti maaf ... kyuki mater sahi laya h ... india progress ki jagah degrade ho ra h ..... i agree wd wat u'r sayng .... sahi h tu 358 | bhai lv u......aapne zindgi me ek baar milna h...plz bhai kbhi ye msg pdho to.....plz mujhe ek call krke.... meri life khushiyon se bhar dena... 359 | Assalaam o alaikum bhai 360 | sir delhi badlega....desh badalwayenge....ab gunjega har disha me aap ka naam....#pressjhaadubutton 361 | frame kara lo bhai . . :p 362 | bhai aap tou best he bajrangi bhaijaan ATBB hogi ;) 363 | mohabbt hai aur btana v nhi aata feel toh kra skte ho na .. # U know what I mean # Pure pleasure at best D 364 | bhai totlly phadu super hero h nagraj ... hollywood ki trh bollywood me bhi inki muvies bnni chahiye ... wese doga ki new muvi bn rhi h nxt year tk ajayegi .... 365 | Kha h aap pls kbhi aek msv kr dijiye plss I realy miss u 366 | hello :) nau lakhiya paridhaanon ko, do takiyaa muffler nigal gaya :) #jiyohazarosaalkv #delhidecides http://t.co/7ppweqnm5v 367 | give one more chance to kejriwal with full majority . vote for aap party 368 | ami salman ke valobasi...i love u SALMAN 369 | bhai . . craft ke exam ke din to papa ka scooter aur mummy ki craft . . bas ban gya craft a 370 | p + q +r = 51 . . solved ! gudoo yeh hai answer . kachua sir se fast solve kar liya 371 | Apke life mein miths ho Cdbury Silk jise 372 | GREAT SALMAN BAI 373 | are tension na lo . . . #wontgiveitback 374 | dhoni ko milegi na india ko world cup jeet kar milegi 375 | ab toh india me manegi fir diwali 376 | mario n contra khel khel k bade hue . . . . . seriously purane din yaad dila deta h . luv u garbage bin n tivu too guddu 377 | Itni energy vaala perhaps he koi p.m.raha ho india ka, aapkey liyye bhagvaan se prey kertey hai, sabka saath... Sabka vikas....jai ho 378 | garbage bin bhai (faisal bhai) is baar to emotional hi kar diya tune . . . vakai dad . . . dad hi hote hai chahe saala kuch bhi ho jae . . . missing him in every moment . . . . :) 379 | good evening Mr. khan ... mai apki bout bout bdi fan hu..i love u so much bhaijaan..... bhagwan apko hr buri najar se bchaye or aap hmesha healthy rhe... love u bhai..... 380 | Happy lohri kudiye.... Ty 381 | bhai ma kasam full telented h 382 | contra mein 30 lives ke liye sachme chit tha ? ? :( kabhi pata hi nahi tha :'( 383 | hello bhai ji 384 | feeling sorry for pakistan.. but v r defending champions.. aisehi thodi harenge..!! #indvspak #sixzero 385 | gajab ki films aati hai on friday and sunday 386 | Acchi hai mam 387 | school mai aur koi teacher nahi hai kya ? 388 | Mast hai machine 389 | best is sharma ji ka bete ki dhunayi wali pic isi laundey k karan mujhe mere parents aj bhi sunne hai' " sharma ji k bete ko dekho " :p . 390 | Aap ki new film ka mein besabri se intzar kar raha hu please jab film release hogi to website post kar dena ta jo ham aap ki super duper hit film dowload kar sake 391 | lol you know once salman khan asked in his show 10 ka dum " cricket match ke dauran kitne pratishat bhartiyon ko lagta hai ki wo hile to india gayi ? " " :d 392 | rt obama ko delhi bulakar elections jeetne ki sochne walo 393 | salman khan nice 394 | Har ek argument ka analysis bhi karna h 395 | 1- jan dhan account mein 1 laakh ka bima va 30 , 000 ka vober draft aaya 396 | Superb sallu jaan 397 | sallu is bst 398 | rt papa ji teno vachano ka pkaa bnaye rkhna papa ji....rhmat krna love u papa ji... #msgyouthicon 399 | ab world cup jeeto ya na jeeto india ka australia tour toh safal hogya 400 | i love you salman sir...aap ki movie dekh raha hu abhi judawa .. 401 | mai to itne games me b bas ninja turtles , street fighter , car race and mario khel kar khush tha baki games par dhyan dene ka tym kiske pas tha :-d 402 | ise acha toh india ko batting dete kuch entertainment he ho jata #indvsuae 403 | inhe kaho hindi hi theek hai . . aise to guddu ki english aur bigad jayegi :d 404 | 4- pahli bar ganga aur gau mantri banaya 405 | hey guys tomorrow is now national ground hog day , i hope to see you all post about it ! ! hahahaha 406 | #budgetspeech:sab kuch itna jaldi -2 btaya ja rha hai, shaam me debate sunenge or kal ki newspaper #budget2015 #budgetnamo #unionbudget2015 407 | guddu bhai captain ban jao ab tum . . . aur 2-4 ad campaign krlo 408 | aage se jab bhi wc mein ind- pak match schedule ho to ind ko waise hi walkover de diya jaanna chahiye #indvspak 409 | nice pic modi ji 410 | bhai ab to tu support kar parents ko . . . :/ economically nahi to mentally to kar sakta h . . . 411 | Thnx sir ji 412 | hi my cute bandar kha ho kya ho rha hai thik ho miss u sona miss u so much 413 | Mtlb Kaali pariyaan Black beauty 414 | Mera placement hone do 415 | lv u salman..kya kr rahe h ap sona.. 416 | salman khan ..................................being human ka aslee avtaar. admin aamir ahmed. 417 | ahaa voh dinnn ! ! ! chupke se paise le ke comics khareedna :p :p aweesomeeee days ! ! 418 | waah , kya attention to detail ! cycle tyre ke fork par balloons and wide ball marker ke liye tuti hui eet ! wah ! 419 | Modiji govt wonderful programs; jaadu apka safai aapka-swach Bharat,paisa apka account apka-jhan dhan,body apka mat apka-yoga day,beti apka responsibility apka-beti bachao beti padau,mobile apka beti apka aur selfie be apka-save girl child program.. Amazing fekuji.. 420 | maa and papa ne jab bhi english boli hai . . main bada khush hua hoon . . kyunki i feel top of the worlds . . . . :) 421 | mammi sir dard kar raha hai aritanjan laga do na please 422 | maza to tab aata jab india pehle batting karta !! #indvsuae 423 | Kch naya ni h kya Du admin k pass ...! nywz nyc story keep it up ;-) 424 | Abe callg kar jaldi 425 | shaan book se muh chupa raha hai and silently praying ki teacher na pakde . . . typical story in everyone's school life :d :d 426 | lov u sallu bhai 427 | Hansraj se hai tu ... Tab to c?ol rahega hi . Proud to be a hansarian 428 | jesus wala punch bohot kadak hai :') 429 | bas kuchh mb ka apps download kro aur earn karo jitna chaho 430 | blockbaster movie bajrangi bhaijaan... 431 | we love u very much bhaijann( bollywood ke sann india ke jaan sub ke bhaijaan SALMAN KHAN ) !!!!!!!!!!!!!! 432 | true story , teachers staff room main bina teacher ke class wale bachho se jyada shor karte hain :p 433 | Salman khan am exited 4 ur n3w film #bajrangi #bhaijan ..................... 434 | faisal bhai . . best part is " guddu ke mom ke expressions at last :d :p 435 | dashing salman khan .. 436 | sallu bhai rocks 437 | raina abi form m nhi h 438 | Tension nehi leneka vai,kuch bhi ho jaye,kuch bhi loog kahe hum hamesha aapke saath hay,ek aan,ek shaan ek hamara bhaai Jaan :*just waiting for Bajrangi Bhaaijaan :)Love and Respect From Bangladesh.............. 439 | Nicere vai sallu 440 | prestige se toh yehi sentence banta hai bhai ! 441 | very good bhai mujy thori bohat samj lg rhi hai 442 | mera manpasand channel dd national , desh ka apana channel , 443 | Happy new year bhai 444 | Apke life mein miths ho Cdbury Silk jise 445 | " sarah g . day " i love you ! ! ! stay sweet and simple , dude ! can't wait for your show on sunday ! 446 | hehehe . . hum to chalk ke bahane lunch kar ke aate the . . phir lunch time me masti 447 | that was absolutely brilliant . ghar ghar ka kahani . . . . :) 448 | dont be upset friend jiska koi nahi hota uska facebook hota he . 449 | nyxc trailer bhaijaan 450 | only namo not bhagoda 451 | Hey koi meri sister ko pareshan mat karo 452 | 0ko ara hun 453 | Bhaijan love uuu 454 | Respect to this Madam. No offence but Gujarat ko to kisi Mahapurush ne aisa change kr diya tha ki koi bhi insan slum me ni rehta. 455 | Aapko bachpan se hi apna best hero mana.......aapne na aajtak kisi comment ka reply na likes..............aaj sad hoon main.......aap bahoot bhoot hit do....laken main ab sad hoo..aap mere favroute hero rahoge laken ek baar like ya comment de te to accha rehta ........thanku 456 | guddu ki mummy ko ek din phas gye obama ki english cls me daal do bhai 457 | sachin gaurav vikram dahiya abhishek kumar yaad hai hindi class mein kya haal hota tha :p hahahahhahaha 458 | rt bjp: arvind ne abhi se dhoka diya, jhadoo ki jagah vacuum cleaner chala diya :) #aapsweep #delhidecides #kiskidilli 459 | Salman bhai jai ho. Blessings to bajrangi bhaijann. .... 460 | love uuuu shalluuuu 461 | Asaalam aalay kumm Asaalam axelent bhai jan (y) 462 | Love u salmaan khan ji love u so much mithuuuuu...mmuuaahh 463 | Sacha bharat INDIA 464 | hahaha cant forget this dialogue . . . dusre section se laya hun . . . used to say that a lot 465 | 49 days ka sarkar, sirf ek trailer tha. picture abhi bakhi hain mere dost. hum honge kamyab. #aapsweep #delhidecides #aamaadmikisarkar. #aap 466 | woh twitter pe painting tu kia acha tha 467 | thank u faizal bhai is cameo k liye xd 468 | hmare yaha delhi me n . d . ki kunjuiyaan aati thi and kundrabawa guides . . . . :p and mujhe lagta tha sirf mere parents mera project banate h . . . . . :) 469 | hahahahhahahaha , guddu khoya hai mario or contra main lol :-d 470 | Tusi bast pm ho modi ji Sade I'm panjabi 471 | talented banda tha 472 | AJA SHAM HONE AYI....MOUSAM NE DI ANGRAIIII....???? Plz reply me BB.......LOVE U CHO MUCH...????......taktae rah te tujhko sanjh savere....love u my BIG BOSS....BB 473 | wow 2 gags in 2 days fazzi bhai sab teek tho hai na however love guddu k mummy oops mom k english :p 474 | guddu tv chod . . . . study pe dhyaan laga :p 475 | dil garden garden kar diya 476 | kasm khuda ki itna toh class12 ke result ke din v exitmnt ny tha. 477 | Salman bhai tension mat lo....hum is ovaisi ki baaton me nhi aane wale.....movie super duper hit he 478 | oh to election se pahele mamtaji ko bhi prime minister dekhna chahate the . 479 | jis section ki ladkiyan mast hoti thi us section mai jate the chalk magne . . . . . . . . . . . ha ha ha 480 | Looking katilana andaaz 481 | loveeeeeee uuuuuuu yaar thak gai tum hi reply 482 | vote fr aap 483 | isme se " tanks " mera favourite tha . . kyuki baaki kuch tha hi nahi isme . . . :d 484 | most probably its always " boondi ke laddu " and we love it . 485 | Nice sllu baiiii 486 | jo bhi h uska mind deko good job 487 | #myteammypride #indvsuae aaj india ko fir mauka milega yeah #gobleedblue 488 | ise kehte hain method acting ! ! :d 489 | agar iss baar bhi delhi me kisiko bahumat nahi mile to jeetan ram manjhi ko delhi ke cm ka charge de dena... http://t.co/jmsrsueini 490 | actor hai boss guddu to 491 | multi telanted wah 492 | nic song . bhi jan 493 | aisa school hota toh mein humeisha topper hota 494 | Love uuuuuuuuuuuuuuuuuuuuuuuuuuuu bajrangi bhaijaan 495 | Har har modi ghar ghar modi all world modi ji gud work sir ji 496 | mario ki bhi variety hoti thi 497 | jab bf ko pta h n he is still wid u to ab konse future ka darr h tuje ... 498 | aur iss saal filmfare award for best actor jata hai guddu ko . . . 499 | Bhai jaan come plz 500 | Kaise ho ap? 501 | ye India hai Bo ...!!! ) . proud to be an Indian ;) 502 | dosto yai flim tho fulle hit pef hit hoge 503 | hi sir kesey hai aap n aap ki famliy i hope aap n aap ki famliy sab theek hon gey god blees u n best of luck for ur film bajrangi bhaijan super hit film this year ....... 504 | hi sallu bhai <3 505 | gud see vaaijaan 506 | Salman khan bolliwood ki shan banchuka back to back blockbuster superhit movi name dabbang. Redy. Bodyguard. Ek tha tiger . Dabbang2. Jai ho. Kick. Bajarangi bahizan superhit jayagi 300 fix 507 | 2 mb me ap sabka kya jayega bt ye 2 mb ka app apko 2 mb se jyada mb or paisa dilayega try kre it's really work 508 | rt #msgyouthicon #msgrevolution papaji bahut hi sunder lag rhe ho ji 509 | honest man arbind kejiriwal . . sory arbind i cant vote fr u bcoz im from nepal . . 510 | Salmam sir hmesha apko to hi follow krtee h hmm. Apko dekh kr hi sikha h ki dusro ki heLP krna kya hota h. Dil ko khusi milti hh ab jana hmneeee 511 | kai bande aaj chhutti maare hai , without knowing the suprise that office mein , cafeteria ke baahar bade parde pe live project kiya ja raha hai ye match . mast watching from floor balcony 512 | Gud morning bhaijaan 513 | i wish india jeet hi jata 514 | rahul gandhi ko #landacquisitionbill ka virodh karne ke samay bhagna ko is mudde pe samarthan dena hai ? #whereisrahul 515 | chalk ke bahane chakkar laga liye guddu ne . . hum to chalk lene jate the to class over hone ke baad hi aate the . . :p :p 516 | india god jai hind 517 | 56 ench ka seena thanda pad daya, 67 ench ke seene ke saamne !! retweet to banta hai....! #aapsweep http://t.co/tjvwmxmrsk 518 | all d best mere msd lv u vry mch yaar . 519 | oh teri . . . insan h ya robot 520 | Goodluck for bajarangi bhaijaan 521 | mere papa ne to aage khade hone wali jagah special seat lagwai thi waha baith ke scooter ko race dia karta tha :d :p 522 | chak de india ! we will rock n chase ! 523 | i agree with you kamal poddar . . . 524 | jabardust post hai buddy . 525 | super bajrangibhai jaan super hit movie 526 | sach mai school days yaad aa gaye yaar thanxxx 527 | eid mubarak love u salman 528 | Sir apse milna hi h mujhe kaise bhi karke mujhe bht tklif hoti h sir schme ab or nahi saha jata agr aap samjh skte ho to please mujhe jarur milna h...apki fan Geeta Rawat.... 529 | Koi ni bhai , apne dbc wale hosla ni haarte ... \ " think to score goals instead of thinking abt goalkeepers\ " 530 | ha ha ha . . guddu ka slip of tounge ho gya . . . lolz 531 | Pyar krna h to junior se kro , Senior to citizens v hote h p 532 | main superman salman ka fan 533 | lv u salman... schi m bat kroge kya sbse.. lv uuuuuuuuuuu sooooooooooo mch....... 534 | India vs Pak ka match world cup final match se b jyda maja deta h 535 | kudos to admin jo tune iski cnfession post nhi kie aj tak ... 536 | Heheh bye Happy lohri.. 537 | salamin alaykum salman khan jumma bubarakho how are you 538 | guddu tri series aur test series mein , kya lene gya tha ? ? 539 | Koi bhi yeh chutiyo Salman Khan picture nahi dekhne musalmano be care ful picture ko flop karne ha 540 | Tera project kya h ? 541 | Salman ji aapne iss bar koi song nahi gaya mujhe wait tha k iss bar b aap ke lovely voice main koi song gaye ......I love your voice ....hangover is my all time favourite song abe tak main vo song sunte ho I just love that song 542 | bandro ka pinjra . . . lol 543 | mom is mom :) yaade yaad dila di :'( awsm :) 544 | Mare favroute sallu vi... 545 | #aapsweep :: aap ko ab sirf perform karna h or woh other states me bhi strong ho sakti hai 546 | best acting ever . . . sabhi children ko aati hai ;) 547 | super cool cool bro 548 | are bhai " repidex " to rakh dete ek table pe :p 549 | reminded me of my college egd clsses . external viva wale din kaha ki sir raat ko sheets padhte hue galti se mombatti gir gayi aur 7 sheets jal gayi . . . jali hui sheets par 10 mai se 8 mile :d acting faad kari thi lekin :p 550 | Hi salman khan ! Plz kisi ko tou reply kare na....... 551 | omg hahahahahaha . . . lolz . . guddu ab kam se kam 3 mins baad hi ayega garbage bin ;) ;) 552 | uddhav thackeray ki mazey hain. free mein trending list mein cha gaya hai launda. #aapsweep #dillicminvitation 553 | sallu bhai ilove you 554 | hahahaha contra ki l power ! ! ! 555 | haan saala meri cassate par bhi likha tha 999 in 1 but hote 20 game he the 556 | haha . . . guddu u r busted ;) baithe rahiyo uthnaa nahi :v 557 | Healthy gurls cuTe lgti h pagal , and u r beauTiful in ur own way -) and bf tjhse pyar krta h ya teri physc se . -) mat ho patli h , jesi h bst h . 558 | bohot emotion jude hue hai in sab baatoon se 559 | it is one of the best gigs of grbg bin i hv evr encontrd . . . . smply owsm . . . . . 560 | bhai agar bahar gya to baingan (brinjal) ke sath he wapas aayega guddu 561 | love u maa 562 | Hiiii.am ur big fan.i wnt ur photograph wid ur autograph.mera qustn apse h ki apki khubsurti ka raj kya h 563 | Osm mubi super duper hit sir 5000000 ke kam se jada kamaygi ye film sir 564 | kal phir train mein wifi lagne ki baat hogi.. nayi bullet trains chalegi. achcha khana milne ki baat hogi. #railbudget2015 565 | bhai ap helpful prson ho sb ko pta h pr ap ki nazae me hlpful prsn kon h 566 | unhe na manda akhayee ...... jin janme rajan ..... prophetic words of Nanak dev ji . Be positive & hopeful . ) 567 | sir aap dil jeet liya kya movie ti 568 | bhai ur evergreen 569 | Luv u bhai jaan 570 | ab bina anoriod mobile ke bhi paye free recharge , 571 | yaar jb tere bf ko tune sb bta rkha h toh fr ab kyu darr rhi h it ws ur past let it remain dre only 572 | Salman ji aapki film super duper hit ho.sare record break kre.Ginnes book of record me naam ho jaye aapki film ka.Aap reply kro ,ya na kro ham to aapke liye pray krte hi rahenge.Amen!!!! 573 | luv u soooooooo much salman....aap ke fans hmesha aap ke saath he... 574 | vote to aap 575 | guddu rebel hai apna :) 576 | sir pls ek reply de do....main to ish khushi se hi pagal ho jaunga....pls sir,pls..i beg to you. 577 | maine pehle bhi 1-2 baar try kiya , advice dene ka . . bhai , hindi font mein likho . . mazaa aa jayega . . by god 578 | himalya pe tapasya karane gaye honge,god se wardan magne #rahulonleave 579 | sala . . . zilla top krte hum aise school me . . . lekin us time bhi aisi kalpna nhi kr paaye . . . kaise sochte ho ye sab . . . :d 580 | thnks alot sallu 581 | Awsome mere fauji 582 | love u bhai jii salman khan 583 | pahle bar sun raha hoon angreji aur hindi main ek sath commentary . . sahi hai . . . 584 | my salman khan 585 | aaj ke latest xbox aur ps se zyada bhokal uss time " udne walla mario " ka hota tha 586 | haaye ! ! maa ki yaad dila di . . :) love you mom . 587 | Hi Salman sir love u piz ek bar muje se bat Karo na sir piz piz piz 588 | rahat ke yahi toh 2 pal hote the school me 589 | Salman bhai .......no tansion hm tumhare sath h ...koi frk nhi pdta hme ...koi bhe kuch bole aapke killaf 590 | rt 2022 tak poverty khatam karne ka target hai. matlab utne saal tak zinda rehna padega. #unionbudget2015 591 | BiNa muslim Ki hit karke dikhyenge hm 592 | #msgincinemas danka mch gya re mere ram piya ka....... #msgincinemas .... luv u papaji... 593 | kucch naya kahaani nahi hai sirf action mai jayda focus kiya hai new action hero ........... superhit because of salman..... 594 | now news is congress party desh ko loot ke kha jao yojna :p 595 | vo board par chalk se likh kar mitana , fir duster ka white powder friend ko lagana :d bohot yaad aata hai :') best memories of almost every person is attached with school ^_^ 596 | jain book depot :d :d yayyyyyy bachpann ki yadein taza ho gayi . . . . the adorable most memorable 90s :) 597 | Na bas cousins 598 | apne sahi kaha . . vote fr app 599 | dekh yar reservation se cast system ko aur badhawa milta h . . . jise pehle ni jante k kaunsi cast ka h , sath uthte baithte , khate hain but jub admsn ka time ata h u know what happens . . . mentality apne ap change ho jati h general wale ki . . koi mane ya na mane this is what i feel 600 | Nice salu bhai 601 | duwa karo india ya phir dhoni jite world cup 602 | new add or bhi mast h 603 | oldest joke in the book , to sa: " kaha se fata-ke laaaye ho :p " 604 | Almost ho gyi 605 | Salman Khan sir koi kuch b bole hume koi. Frk nhi pdta. Ye sb ek chal he kisiki bt i lv u sr god bless u hm to dekhege movie or dikhayege b #BBthiseid becouse ye mere salman sir ki movie ka swal hai. 606 | kachua sir weekend par gaye hain kya ? 607 | Love u salman kbse wati kar rahe h. 608 | Salman khan bn kr yaha kyi logo ne fake msg kiya h.... Bt hm hmesha real hero my salman khan se hmesha pyar krenge 609 | car racing or star galaxy b milta tha :d mast game tha yar ;) 610 | arvind bhai zindabad 611 | Supar dupar hit sab rekod tutega aesi dil se duaa 612 | film me apki pose mast hai bacho ke sath me exclusive pose bhai jaan 613 | food security bill pass , . . acchi baat hai . . . . . congress k according desh k 80 crore logo k liye tha ye 614 | Bhaijaan of bollywod salman bhai 615 | i really used to do what guddu did ! ! 616 | Jis jis ko MH ki full form pta hai thoko like p D 617 | hazipur-bachhvada double rail line k liye 720 cr ka aavantan #railbudget2015 618 | woke up full night still watching #indvsaus . . . ye junun he kuch aisa h :-) 619 | loveee u saluuuu 620 | ye gag garbage bin k itihaas me best gags me se ek h . . . :) 621 | guddu ki to halat kharab hai . . . . . . use to ab itni garmi lag rhi hogi jitni ki delhi ki summer me lagti h . . . . . . . . :p :d 622 | i sapourt u saloo bhai i hole u r bajrangi bhaijan it's going 623 | #durex new delhi, bachav hi suraksha hai. #celebtrains #railbudget2015 624 | aur ye guddu chala game shop par :p :d 625 | seeing lunchar paaji after a long time :) from chabad chabad-->chomb chomb-->sudup sudup sahi hai 626 | ink pheka unhoney itihaas racha hume #thankyou #kejriwal #aapsweep #5saalkejriwal http://t.co/ovbdsc0usa 627 | ye tere mere beef mein kyaaa hai ? #beefsongs #beefban 628 | match wale din bhi office aana pada but koi na boss se chhupte chhupate online live streaming chalu hai . . . . . . go india go . 629 | koi mujhe yeh btao #maggi sasti hui ya mehengi #budget2015 #sabkabudget #superbudget #unionbudget2015 630 | rashtrawad led by kiran bedi ki jeet hogi ! 631 | pehle pakistan ki taraf ek dal do . . . . sare terrorist khatam . . . 632 | kejriwal ji apkey man mein bahut kuch karney ka man hai desh ke liye , n . g . o ke madhyam se karey yakeen karein aap noble prize oa saktey hai . politics aap log gandi wali kartey hai 633 | sir plzzz visit comedy nights fr #shamitabh its always a pleasure to see u 634 | Unhone appoint kara ha Ohho.. Ye bat ha Wo CA wale students ko appoint karte hain ky@? 635 | bajrangi bhaijaan 1st day 1st show jaumga main 636 | the most funny line is . . . kahan khoya hai mungerilaal . . . aankhe khol aur problem solve kar 637 | wonderful show . . jaroor dekhiye 638 | Bhii jaan luv u 639 | mera vote aap ko 640 | anna hero hai aap 641 | kabhi to msg ka ans dy diya kro g 642 | atlast he is jawan 643 | eid baad hogi bhai party to wo bhi bajrangi bhaizan movie ke saath sbhi no tv no movie 644 | advanced eid mubarak 645 | #railbudget2015 prabhu hi janega ,aage kya hoga :p 646 | salman itni problem mei bhi desh ki duty ker rhe hai so why not.. 647 | feeling proud . jai hind 648 | Hum to apko apni jaan maante hai 649 | kejriwal ko live dikhao agar himmat hai 70/70seats confrm ho jayegi 650 | bajrangi bhaijan eid mubarak in advanc or apki movie 450cr ke par jaye kyuki....you are grate and smart hero of bollywood ke 651 | jab tak kuch free ka na mile, indians ko tasalli nahi hoti. #railbudget2015 652 | hiiiiiiiiiiii sallu bhai im big fan sallu bhai aapko twitter par roz mai hi massage karti ho im big fan sallu bhai reply plzzzzzz 653 | awesome sir awesome :d :d 654 | guddu ko koi centre fresh de do , hamesha bakar bakar ke chakkar mai pit jata hai . :d 655 | Super hai bhai suraj is rocking 656 | achi nature k boys hi frnd requst send kare 657 | rt kya khoob kaha,nice words,rahu ketu ko bagao,desh ko bachao,delhi ko world city banao. 658 | Mast hai bike.. 659 | u r great 660 | Awesomee bhai jaan 661 | gud job by media . aise logon ke baare mein news update karo jis se log inspired hon . 662 | i appeiciate asha somashekar ji for ur great vision n select arvind not kiran . 663 | ahaa . . . jeet gye :d congratulations to all indians :) 664 | supw- socially useful productive work . . . . . . . . . . . . sirf mom ke liye extra production work ho jata tha ! ! hum log guddu ki tarah baith kar pankha on/off hii karte reh jaate the :) thank you mom for all your hard work :) :d 665 | Ati sundar sir sabse hatkar Vande~~~~~ sir 666 | well ab bjp supporters ko anna bhi nahi bhate . kuchh to sharam karo . this is why i support aap :) :) 667 | ahttp/URL i love u mummy ! ! ! thank u garbage bin 668 | friend bhot achha free recharge ka software h use kr k dekho . . . . bhot offers aate h . . . . . . . . . aap roj 100 rupees tak earn kr skte ho . . . . . . . friends achha na lge to besk unistall kr dena . 669 | oye yaar kasam se fvr8 mam yaad aa gyi . . . . or shaitan principal b :-d :-d 670 | " bheekh maang ke " was killer . lol :p 671 | bohot emotion jude hue hai in sab baatoon se 672 | acting ka baap hai ye . . jhut bolne or acting krne me sabse aage :) 673 | hahaha so true ! 674 | yaar news mein aaya tha tu . . . ? ? ? ? huh . . story achhi h short film bnwaa le ispe 675 | Luv u bhai...u r so cute....sbki jaan salman khan......... Anitha Parmar 676 | hum class k monitor the mje alag the raja babu :) 677 | Waoo.salman nice yar 678 | as usual . . . wide ball k lie adha eint use kiya hi . . . 679 | Hw r u now 680 | Toh kya decideg kiya 681 | hell yea lmfaoo , & see you tomorrow asia baby (; 682 | sapno mein aisa school hotha tha , cricket aur wrestling ke bhi fantasy dekhthe the din mein . . wah wah . . kya din the . . 683 | aur bol bade style mai rahi hogi actully aur by the way ko lamba sa khich ke :-) :-) 684 | Bharatmata lovely son modi jaihoo 685 | school jaake parade /p . t . krna , fir vo cultural dance , speech aur last me sweet ceremony(bundi k ladoo) . . . , mst the vo din bhi . :) 686 | love salo bhai 687 | Modi sir welcome aapko or hum aap jaise sir ki jarurat 688 | bhai bajrangi bhaijaan pendant mila mujhe...Aaj ki party meri taraf se song launching par in JW Marriott Hotel me... thnx bhai 689 | hahahhahha . . . . . guddu oops moment ka shikar ho gaya :d :d 690 | Super kota salluu 691 | ab world cup jeeto ya na jeeto india ka australia tour toh safal hogya 692 | Supar hai boss 693 | vote for aap 694 | salman khan rock's ss 695 | haha.... im selected... <3 mzaaa aa gya... umahhhhh bhai luvv uhh 696 | very dhamaka song pichli baar jumme ki raat &iss baar party ki raat wow..party aapki taraf se iss party mehi kick hai... 697 | Lov u modi ji 698 | hum to double bed k neeche rkh dte thee :d 699 | congratulation sir ji 700 | supw word padhte hi pehle post like ki . . fir baaki ki padhi ! ! faizal bhai you rock yar ! ! 701 | imaaa get it tomorrow . (; 702 | aaj wo din hai jab tv par programs aayenge :d 703 | Maine mail bheji h 704 | revolution . sampoorna kranti . change . 705 | owsome job bcpn me blackboard pr jta yehi sapne aate the . . 706 | lol . . . . har movie me hero ka ek sapna hota hai canada jake taxi chalane ka :d 707 | bhaijaan love yoy 708 | dnt compromise ur career for ur so called gf . . . coz agr kal ko tu kuch bna ni na to tujhe puchne wali b ni h wo . . . so abhi smbhl ja nd stdy pe dhyn lga . . gf pe to bd me b dhyn lga skta h . . . . b practicle yr . . . 709 | rt aise calls aa rahi hain jaise mai election jeet gayi :) #aapsweep #thankyoudelhi 710 | i love u salman vai.i always like u vaijan.khuda hamesha apki saath rahega.. 711 | Sallu bhaijan ki jai ho sallu bhai pure hindustan ka tiger 712 | arvind sir u will be winner . me aapke saath hu . 713 | khamoshiya , pk movie ki 4 kadam or dil darbader .... :)) 714 | salman khan aap pakistan aa rahan par kon se city aya ga plz plz humko bhi aapse milena han plz plz batyan na 715 | modi bedi , good combination 716 | Salmanu mashallah Bahut hamsome ho!!!!khush raho!!! 717 | Nice song aaj ki patry meri tarf se 718 | kya yar , happy new year dekhao naa 719 | apki aane wali film bajrangi bhaijan suuuuuupppppppeeeeeeerrrrrrrrrr hit ho 720 | ab aap daily 150 rupe ka free recharge kar sakte hai , 721 | kachua sir bhi kaafi hi fi aadmi lagte hain . aisi tagdi definition ! 722 | ak sir well 723 | kya din they yaar . . . . . . . . mario me seedhi per battak ko fasa ker 1up banana . . . . . vo 8 bit vo 16 bit games k sapne aana ! ! 724 | mario ke hidden warp zones: level skip karne k liye ! ! ! 725 | Lubbbbb uuuu sallllluuuu 726 | i don't even dare to miss a post of garbage bin :) 727 | happy birthday mom ! the sun is shining bright today :) . i miss you always . love , daniela " rella " lol xo http/URL 728 | oh shit... 100% catch percent is gone... oh raina sirjee u r great... #indvsuae #cwc15 729 | Yar ab tu usko olx ya fir ebay yar quikr pe bech de P 730 | ab apko nice,gud,handsome etc bolke kya faida....Ei sab bohot chota word hai ap k liye.....ap mera fav actres ho pls Cl me on this nmbr....919593871431...<<>>..Pls...''' 731 | prestige se better hawkins khana banaye jhat se 732 | Bete sahi h 733 | nd ki kunjiyaan b helpful hoti thi :p 734 | BHAIJAAN ... KAVVI KOLKATA may aoo .. apni fanzzzz say milne ... already Banjarangi bhaijaan MEGA ... HIT ... 735 | bhai aap ki bhaijaan movi bhi 200 cr karegi 736 | sale crush ko dekhte he soccer skils aa gayi tere ander . . 737 | virat: koi nahi! well played shikhar! shikhar: bc, tu aa pavilion vaapas! batata hoon tujhe! #runout #indvspak 738 | gone amala the chipku #indvssa 739 | Mera stomach b limited h na 740 | rt zindagi tham si gyi yaaro... :p #aapsweep #aapstorm 741 | dil ke saath khelo, wallet ke saath nahi #unionbudget2015 742 | shastri bhee kaunsi kam panauti hai jab bolta hai " all india doesnt want to do at this stage is to loose a wicket " and koi out ho jata hai :) 743 | yaar aise problem toh main kabhi bhi solve na kar paau ? ? :p 744 | dekhna aaj pakka dhoni retirement le lega . . . . :-( 745 | ye to kuch b nai govt . school me to lady teacher's are like biggest pakau nd dimagkhau :-p i've been through this . . humare classrooms se jyaada shor to staffroom se aata tha lol :-p 746 | Salmaan khan bollywood rss or bjp ka naya kutta 747 | M promise nai karta 748 | hamare ghar me to cycle thi papa k paas . . kabi ye cheez feel he ni ki . . i am feeling sad 749 | no vote for bjp 10 lakh wala sut pante or khe te he ki me har garbi ke account me 15 lakh duga 750 | kon lagata sarat nhi jitegi india wald kap chahe karodo ki sarat lagalo mujhse nhi jitegi india 751 | arre diwali or holi k next day exam na ho to teachers ke nani mar jate the aisa lagta tha :p 752 | Lol -D or lee number . #Gal Good job !! ( y ) 753 | Sakshee SharmaSharma ... kuttiya saali .... still 3yrs more 4me ... huhhh 754 | #feb10witharnab #aapstorm raghav teri chaddi to abhi utregi..jo bola hey karo 755 | aur laughhing clrs k post pr bhi koi . . . tu tu me me nhi . . . . 756 | that batting power play is like jale pe namak or sone pe suhaga #indvsuae #cwc15 757 | no base for anna . publicity chahta he only 758 | prashant thapa vijay nainwal sala isi mario contra k chakker main xi mai grace mila tha . . . . 759 | chal be kutte , , drama baaj , , notanki karta he 760 | sach me chu banaya 761 | Aapne apna commitment nhi nibhaya Aapka dilouge wrong hai 762 | aa gaya is anna ka ghinona chehra samne . . highlight hona chahta he ye . kejri iske aandolan ki hi pedaish he . #dramaparty 763 | jo vi add karna chahta ho no kar sakta h 764 | Usse hi Hindi me translate Kara lena 765 | srinivasan ka chamcha tha . ek jail jayega toh doosra cement factory sambhalega 766 | jism meaning likh ke google pe search maro and result dekho tab pata chalega londa kitna ashleel hai 767 | sir meri tbiyt kharab thi . . cough thi khu khu khu khu khuuuuuuu khuuuuuuu . . . 768 | lagta hai aaj bhi bating nahi milega #indvssa 769 | fake , nakli . pahle se fix tha ye game . . . . 770 | rajpal yadav ki train shuru hote hi khatam ho jayegi. #celebtrains #railbudget2015 771 | mere cmnt pe na koi like krta na rply aisa kyu :( 772 | variables solve karne ke liye sufficient equations hai hi nahi , bechara guddu kaise solve karega . . . 773 | class 2 me to 99% adhe class ko aate hai . . . tu sach me mature nahi hai 774 | Aabhe salman bhai ko suna mat lagaao vo kisi ki sunne vala nahi voto jo manme aaye vo karega bhai 775 | ek mms par itna lafda q yar . . . r u all crazy 776 | sabki same problem :( 777 | dhoni ke sath ke sare fixer hai 778 | rahul gandhi ka gujrat elections me prachaar karna is like rajpal yadav trying to win over angelina jolie from brad pitt . . . . 779 | just to let u know whats happening 780 | app sirf plan hi kerega . . . . success nehi . . . plan to me bhi kar sekta . . . 781 | pakistan mein kal #pk movie ka release hona boht mushkil hai, kuch log dekhne jarahe hain, agar release hogai,... http://t.co/egebazwnmn 782 | seriously yr ullu bukhey sab kch benefit scheduld cste ko huhhh ...... 783 | right ... DU's sytem sucks ... bc result hi ni aya abi tk 784 | I don't intend to hurt you but I can't help myself . I am not in any good situation to thik good about anyone . M feeling very pessimistic and maybe you are thiking bad about me but sorry I cant do any more bcoz mujhe aisa bana diya Gaya h . I have become like this , yes , very hard . I told you that day , I lost trust on many thigs . Mai kaise rah rahan hun rahne de . Akela nhi hun family hai saath mein . Sorry and please don't expect from me bcoz mera dimag sahi jagah par nhi h bilkul bhi . Sorry kyunki mujhe pata h tujhe bura lag rha h ki mai aisa behave kar rha hun . 785 | 100 bhi naa hopayenge :p #indvsuae #iccworldcup2015 786 | Rajvir singh bhai saaab apni comment pe hi likes . Hats of to u 787 | sidhe bachche courtesy vaale , staff room men jaayen to sare teachers ko ek saath good morning kaise wish karen ek samsya yah bhi thi ! 788 | dil b to weight k hisab se bada hoga yr .. accpt kr leta .. gf aur gadde , dono ki feeling deti -P 789 | rt facts vs fiction #landacquisitionbill diyar #aaptards dont read tumse na ho payega ! http://t.co/zkoj8khdt2 790 | today is desperation day . if you know what i mean . 791 | #railbudget2015 happy that no new train announced..at lease pahle jo announce ki wo to chale..nahi to hawabazi jaari rahti ;) 792 | jo bhi ho arvind kejriwal ji , pure india ko apne comedian style se entertain bahut kar rahe hain . 793 | Lagta hai Admin metro me dusro ke armpits apni toungh se leak karta hai ... Achcha hai atleast kuch to rahat milegi Admin ki is samaj sewa se logon ko metro me ... D 794 | #unionbudget2015 sabse bada chutiya budget...gaand marane gayee #modisarkar 795 | yaha toh 10rs ka scam ho gaya 796 | i just got back from hawaii on monday and i'm feeling sick too . 797 | kobe in the 4th is like eli manning . #beware 798 | exam date jaanbhooj ke tabhi rakhi jaati thi jab matches hote the . aur maths ka paper theek match ke next day hota tha 799 | haan sunao ... humbhi dekhen ... confessions dalne ke ilawa .. wat else u do ? 800 | Kuch b install na hora 801 | well ab bjp supporters ko anna bhi nahi bhate . kuchh to sharm karo . this is why i support aap 802 | pahunch rha h rc 803 | Bhaijaan yeh msg , apke Muslim brothers ho ghuma rahe hai apko badnam karne ke liye 804 | subak subak :'( :p 805 | shan seems like an imaginary frnd guddu kii mom toh kbhi notice hii nhi karti usse 806 | kya yaar frst time koi cnfssn padha wo b bkwaas nikla :-( 807 | yaar totalg confusiong 808 | Modi ji aap ka sv asakeam fail hai... 809 | #indvsuae saala match mai break ki kya jaroorat thi. chhota mota match #iccworldcup2015 810 | ye kaisa problem diya hai kacchua sir ne last pic mein . . . ? its difficult than getting l power in contra 811 | already fixed by bb8 b'coz gautam gulati is best friend h arpita khan ka so ! ! 812 | mechanical me toh fir bhi chance hai aur engineers ka kya hoga . . . . . . 813 | mat dekho brothers and sisters , dekha nahi jayega 814 | mario our conta nhi h ismai 815 | Ap late ho ap ne apne cometment pori nai ki 816 | ek hi question me 2x-2x ! ye zehreela question kachua sir hi bana sakte hai 817 | =d kachhua sir ko senti-mental kr diya . . . . . #bollywoodcalling :) 818 | bhaii fighting sort out kr rha hu 819 | are sale sb fix h yr 820 | and any vashikaran problem 821 | he is chor 822 | situatn itni buri ho gyi h k nw indian men r tagged as rapists 823 | Ghr Jake start kruga 824 | bhai agar b button se 30 player na aaein contra me toh re start kar lena game ! ! :d 825 | ye admi pagal ha 826 | nicesssssss ........... kise jamane mai bandariya hogi . tabhe she knows very well 827 | sabse saste rate electricity ke delhi main hi hai ab free main milegi delhi walo ko electricity ye hi hai na , agr main jhut bol raha hu to google par search kar lo , free ki aadat mat daalo nahi to delhi ko uttar pradesh bnane main tym nahi lgega 828 | bahut ganda msg tha 829 | Ye fake id h salman sir ki kyuki esa ho nhi skta ki salman sir ko sat shree akal likhna aata :-) 830 | ye board pe liki problem b sapno wali problem se kam nahi h . . . 831 | saale hote hi makkar hai aajkl ke teachers :@ 832 | Unko pics moti ko dikhaunga hain 833 | ye cat level ka problem kyu diya hai ? ! 834 | Ab diyo back ka exam next year chu saale . Exam khatm karke dekh leta bc 835 | arbaaz bhi thik kehte the aap less puntual hai 836 | Tu ek short temperd or ghamandi girl hai .. And chutia ladki tere neigbr ko pta nai hoga ki usne tujhe prpos kia hai .. or bc u fuck ur self ... like oggy does ... 837 | Saale ne heavy dose de diya 838 | flop movie kutta salman 839 | Ladka bs kamau hona chaiye badiya body to mere phn k bi h p 840 | teri purani bhabhi ki yaad agyi meko i swear :'( <3 841 | aur ye out x-( 842 | pahle ki sarkar ne isse kyun nahi kiya . . 843 | rt mera naya slogan... copyright lunga iska.. doobegi dilli hogi kangal... agar aaya delhi me dubara kejriwal 844 | Tera pyarr real hota to second time tujhe pyar nahi hota .. Think about it -> 845 | Are angry men wala doc kya hai u ? 846 | M rc hu.. 847 | situation itni buri ho gayi hai ki now indian men are tagged as rapists :( 848 | salo charas bhi ban h, to kya pina chor diya tumlogo ne? baat baat pe outrage karna band karo nhi to poore twitter ko jala dunga. #beefban 849 | rt lagta hai aapne aaj ka twitter trend nhi dekha. #rahulonleave n #whereisrahul show how caring ppl 850 | mee . . . india ki lag gyi aaj :/ 851 | admin ke mat maano ye birth se ab tak single h . 852 | Ha thk kha bt har ladki ek jaise ni hoti i known kuch girl aisi hoti h jo money par depend krti h bt sare ladke bhi loyal ni h 50% ladke ladkiya dhokebaj h 853 | gandhi hatao...congress bachao....varna museum mein jao.... #aapsweep 854 | Faila bhai wo b spcl offer h Badbu b bs kallupura walo k liye 855 | bhai realy hurt jab nahi aana tha toh kyu kaha tha u know me ana study chodhkar 1 hour s msg kar rahi mera exam hai kal phir bhi bye :( :( 856 | guddu ka haseen sapne :d :v 857 | salman flap ab 858 | Mere yeha ek ladka bhi nhi jayega movie dekhne 859 | are...same to you kutte! india jeet gayi !!!! #wontgiveitback #indvssa 860 | Tu project karle 861 | worng hai yaar 862 | #aapsweep modi:-hello kejriwal merapas 3 seat hai kya karu kejriwal : olx per bech de 863 | rohit sharma to guddu k first time baithne se pehle hi out ho gaya tha . . 864 | general catogry ki kahin koi aukat nai hai .. chahe engineering ho ya bcom -_- piste saale general wale hi hain / ( 865 | Abi tk chhar n bje k konsi ghadi lga rki h,, 866 | are wo ladka sex ka bhukha hoga yar just 4get hm love dil se kiya jata hai na ki physical relation se . agar bf gf dono sehmat ho then its ok otherwise its not 867 | india doing well - sale uae pe rehem khao. india struggling - sale ye world cup jeetenge? uae ko toh hara ni pa rahe. #indvsuae #fans 868 | ptaaa ni mere frd list mei ni h 869 | kachua sir ki laat seedha bum pe xd xd 870 | bf chnge kr chnge kr liya ab usko toy frnd ki tarh use kr k yehi copy paste mar diyo galat baat h bhen yeh 871 | chalo km se km ab to pak ke patake use ho gae honge;-) 872 | exactly :( ye to kuch b nhi hai . . koi 31st dec n 1st jan ko exam dene ka dukh kya samjhega :( bt the phase is gone nw :) :d 873 | in loverz ne to metro ko b ni choda / 874 | hey south dilli gals+en_suffix ... ek pgl ldki ko dek kr tume sare south gals+en_suffix ko judge kr lia .... how stupid u r 875 | Ek website h ... yahan sbko admission milta h .. www.doob k mar ja.com p general walo k liye ye wali website best h ... Yahan pr free mai bhi admission ho jata h ... p ;) 876 | Harami ki aulaad hai salman 877 | apni mummy ko bhi item bolta hai kya sale .... jerk ... 878 | guddu: kachua sir , bas 1 minute ruko , setting jama lene do . . sawal ko mere aane na aane se farak nahi padna , solve to mujhse hona nahi hai . . 879 | Salman khan criminal. 880 | ab mai wait kar raha hu ki wo chota sa farmer ( vadra ) kab sucide karega :/ 881 | & i think i m d only laziest kaam s jee churaane vaala , meet all d baap of aalasis , nikahtoo of india #rahulonleave 882 | hahahahahahaha guddu ki ek apni duniya jaha woh rehna chahta hai . . . :p :p :p :p 883 | Are wo ladka sex ka bhukha hoga yar just 4get hm love dil se kiya jata h na ki physcl relatn se agr bf gf dono ise sehmat ho then ts ok bt otherwise ts nt 884 | Ek suggestion or lele deo lgake jaaiyo . . bhar garmi bht h !! -P #OldMonk admin summers mei bullet proof jacket tu pehan k jata h kya date pe -D 885 | badi badi baatein , allegations lgaate , economy ki haalat khrab krte , conditions lgaate , fir patli gali se bhaag jaate 886 | lol really these chip games . . . sala ye backchodi b kitni ki h 887 | Movie ke postar fardege hum sale kutte ki 888 | sahi kaha . . izzt loot li dollar ne inr ko 69 pe lake :p 889 | batao ji sentence bhi theek phir bhi teacher ka ye bartav , very very bad hai ji . 890 | wht t f**k yr avehi kuch b post kar dete h 891 | bahut embarass karwaya hai papa ne kai baar . 892 | Class attendance waala 893 | they are promising moon right now to get the cm post . . . waade aise hone chahiye jo janta k welfare k liye ho . . . free wahi baantna chahta hai jo desperate ho kisi tarah ek baar bas kursi mil jaye . 894 | you'll remain forever alone agar aise hi lame excuses dete rahe toh P D 895 | :'( :'( :'( :'( :'( tum gande pati ho bhot gande I hate u I really hate u 896 | mt le pagal 897 | aur mujhe black money se aane wala 1500000 rs nhn chahiye 898 | iss prestige ne guddu ke prestige ki watt laga di . 899 | even the master ji dont like ab junior :p 900 | kejrival ka manifesto me red lait ka kya huva k me o nahi lunga o jikar bhi nahi he aam aadmi he to lait ka jikar kyu nahi kiya 901 | fixing me saja hone ka gift 902 | bc dusre section ki monitor itna ganda face banati thi jese chalk nhi uska hath maang liya ho 903 | bhohot rone or emotional black mail k baad cassette milti thi . . or fir poppat ho jata tha . . . :( 904 | d guy cmmtd suicide ..... ye nhi dikh raha kisi grl ko ...... thnk abt d crcmtances ..... 905 | aa gaya congress ka or ek mohara 906 | india me sbse bde chalte firte terror group rss ka member hai modi . . ! ghante ka development karayega wo . . . ! ! aur jo ye kahte h ki \ " we can't give congress chance again\ " to listen tum akele pura india nai bnate . . . ! ! india boht bda h . . . ! ! saare smjhdaar he lgte ho smjh he jaao baaki . . . . ! ! 907 | tum rhne do beta.. tumse na ho payega. ##twistedbollywooddialogues #rahulonleave. #whereisrahul" 908 | mujhey laga ki guddu scooter pe uncleji ke agey khada hokey jayega :) cold drink pee ley itti thand ho rahi hey :) 909 | sorry year... sara then salman khan kiy sathe mere thenb barbat... :'( 910 | iss prestige ne guddu ke prestige ki watt laga di . 911 | hon'ble #ak49 syd apko yd ho 49 days ki govt . jhn aap chillate rhe ki koi mere under(*delhi police speially) nhi h . . . . . to ab kya agr iss br #cm (hypthetical situation) bn gye to . . . sb under aa jyenge . . . . ? ? ? ? 912 | rail mantri ko or trains chalani chahie ye paresani sirf is train ki nahi h bahut traino ki kahi bhi jana ho har train me janral ke do hi dibe h 913 | Sala hrami muslmano ka dushman badnam kiya hai Muslman ko kalank hai kabrustan mai tera paisa kaam nahi aayega balki #aamaal aor sabse pahle #imaan #pakka hai ya nahi 914 | #indvspak #indiavspakistan lag ge ron pakistan wale 915 | Phli line padh kr mje lga ' Malaji+hi_suffix garvnirodhk pills n condoms ' ka add aa gya ! # 916 | 56 games he hote the saale mein . -_- 917 | lucky bhai ki frd list mei ho skti h wo 918 | itne to kbhi number nhi aaye bc exam m mere #aapsweep 919 | nw feelng lucky ke tv ni h match dekhne ke liye .. othrwise itni gaaliyan de rhi hoti 920 | Thnk u akki 921 | is it a confession ..? yr vase mera v nhe aya avi tak ( 922 | #landordinance #landbillinparl #landacquisitionbill #landacquisitionordinance yah bill modi govt ke liye yaha se modi sunami gayab kar dega 923 | Bhaijaan aap nahi aaoge toh, apke name pe high court mein case kardungi .... 924 | Han yar job ki hai both t tension 925 | i remember those ladoos nothing else . 926 | Han be dalle 927 | aur tata wala namak aur tea bechta hai . . infosys ek cyber cafe hai . . 928 | Groupg pe chala gya message 929 | #landacquisitionbill band karo saale kisaano ka minimum support price. aao saala market price pe sale lalchi kanjoos kisaan. 930 | right aisa humari economics waali mam karti hai kabhi usne kisi bhi student ko 10 se above nhi diye 931 | delhi ki janta nahin kuch dalal media aur news traders aap ke sath hain.. 932 | guddu pakka a seciton me hoga , aisi badmash prajati ke insaan a section me hi hote hai . 933 | #railbudget2015 highlights dekhoon ya #afgvsco highlights. samajh nahin aa raha. 934 | es worldcup me v guddu purani tv dekh raha hai . . . kam se kam led tv to de dete garbage ji . . . 935 | fuck u ... AAP k khujli party wala h tu . 936 | #delhidecides ki humey to sabh kuch free chahiyae .. abh kuch paise nahin dengey :p 937 | CST se walking jaana Gateway of India .. Bahut mast h 938 | commentary: dono match accelerator par....kya bol rha hai bahenchod #indvspak #fucklogic 939 | Movies laptop par 940 | bdw 3rd yr m h to trix ka xam kaisa gaya .... p 941 | guddu ki to lottery lag gayi . . . yaha to 2 , 2 rupaye contri kar k 20 rupaye ka cricket match khelte the 942 | delhi bjp ab campaigning karne ki jagah opinion polls ko jhuthlane mai lagi hui hai.............bro ki juthi tassali 943 | modeling band karo . 944 | bhai i know . . . tabhi harami laundo ko hi tag kara tha 945 | wt hapen ab mt bolna 5bje krenge bat 946 | Missingg holi bhai 947 | #superbudget e susra budget bahute technical lagat h, cigarette iit, iim,tax chod deyo toh hazar crore ki baat lul lagat h #unionbudget2015 948 | kaun nahi chipka hua tv se :p 949 | Ya to assignment ya exam ya project 950 | chalk lane us section me jao jis section me ladki ho . . ;) ;) 951 | English ki maa behen ho gayi Bhai :v 952 | yeh questions ulta mommy log puchti hai . :-/ 953 | rt uae ke khiladiyo ko pottiyaan lagi. sab wicket chhod ke pavilion bhaag rahe hein. #indvsuae 954 | " dhabaaaaad " miss hua hai last frame me sound effect . . . 955 | meli choti si express-rahul chota bheem gandhi. #celebtrains #railbudget2015 956 | abp vale khareed rage hain . . . . hahaha majak baba rakha hai . . . . kabhi moi news bhi post kr lia karo 957 | modi ji flop ho gai ye bhi apki rally modi lehar delhi me nahi rahi hehehe 958 | feku 500 crore barbad karne wala hai apni 5 rallies main 959 | Koy musalmaan ye film nahi dekhe ga ye film flop hogi 960 | theater me ja k salman ka movie kabhi nahi dekhunga Internet pe 10 din k baad aayega download kar k dekhung!!! free me 961 | school ka wo bag firse thama de maa yah! jindagi ka booz huthana bohut muskil hai. #aapsweep #delhidecides #aapstorm http://t.co/n5m6syevh5 962 | jo ganda log hai woh ganda hi rahega according to pychology u cant change anyones behavior woh act karege par change nhi hote ! 963 | Already posted on DMCC and MEC / . Ab yaha bhi sympathy k liye agya / 964 | phone band kr de yaar idhar tera koi phone aata h udhar india ka wicket chala jata h 965 | Keh ke leli sbne tumhari .... P feel pity 4 u . D 966 | modi ki tyre puncture ho rahi hai . . . . 967 | bijlee rahegi toh hi bill aayega nah. many parts of delhi face power cuts. #aapsweep #aapstorm #aapkidilli #aap good luck delhities 968 | morning me thand me kaise nahata hai guddu . . ajkl to baraf nikal rhi hai nal se :p 969 | ek time tha jab laddu milte the . . . :) 970 | chutiye . . . sirf vade krne se vote milte to aaj mein bhi pm hota 971 | mere saath isse similar ghatna ghati . sunday ko mcg mein bahut der tak control karne ke baad toilet gaya . aur jaate hi kohli out :( 972 | Bs bhai yha b yhi haal h. Tnsn hoti h ab jobs nd all future 973 | #railbudget2015 saala kuch nhi hona is desh ka ^_^ 974 | I am from IIT and yahan bhi hostels+en_suffix mein chori hoti hai ...! 975 | kejriwal party to mahelao ki parehaniyo ko nahi hataegi balki kejriwal party delhi se mahelao ko hi hata degi islea desh ki tarakki k lea modi aur bjp ka saath de aur share kare 976 | Bhai tu delhi walo ka nam ku kharab kar raha itni tatti english likhi he gramaticl mistks kafi he yr eg . On ma byk ko tune in ma byk likh rakha he kya tu apni gf ko byk ke andr lkr ata tha 977 | and i am in office :'( 978 | looks like 'kaho na pyaar hai' phase ended for modi #aapsweep #aapkidilli #delhidecides 979 | kejriwal is such a deshdrohi . 980 | true story and ab normalization ne engineering colleges ki b maar li :( 981 | Code dkh ra hu 982 | fattu tu apne page k link bta dekh tere page ka kya hoga p Ye d.u k members ki unity h koi #Chu **yapa nhi h 983 | Or nonveg party rakhi bhaiyya ne 984 | mechanical me toh fir bhi chance hai aur engineers ka kya hoga . . . . . . 985 | bhai aj to chooza booza , old monk nd brainles kid ka threesome Zrur hoga -_- 986 | ye pschyo kbhi winner nahi hota agr salman & farah ka haath na hota . 987 | Admn tune lagta h fb k DoN se panga le liya h .... undrworld may halla ho essay pahle bat man le !! 988 | Bhai i'm so sad mje yh song site pe nhe mil rhamai kese sunu yh sng 989 | Kya yaar frst time koi cnfssn padha wo b bkwaas nikla -( 990 | horrible day for turncoats binny, krishna tirath, n of course 991 | salman khan fans is gaandu 992 | kamran akmal - bhai star sports baar baar replay kyun dikha rahe ho dropped catch ka. paise toh utne hi milne hain. #cwc15 #indvspak 993 | haha true . . . . wo 4 games hi bar bar repeat hote the . . 994 | kiski party bewakoof badtameez 995 | bhai aisi english mein toh tera baap bhi nda mein select na ho sake . . ! ! get a life man :-/ 996 | but u cn nvr b perfect itna pata chl gya tere cnfssn se 997 | Standard size gai 998 | kuchh bache toh bas khade rehkar games hi dekhte rehte they , kabhi khelte hi nahin they . chindi types . 999 | mai chahthi hu kya ki bajrangi bhaijaan film flap hona pukka 1000 | #oldmonk bhai tabhi democracy or page dono ki chudi padi hai 1001 | chalo #beefban kar diya hai to karela kaddu aur capsicum bhi kardo 1002 | arre nhi yaar tera bf week kse ho skta h week me to 7 days hote h 1003 | isi liye hr saal cold hota :d :p >:( 1004 | palika bazar dilli se 250 rs ki laya tha1994 main . sare armaan toot gayen they . 1005 | jitni request dalo guddu ka no reply . . . abhi koi ladki kuch likhe to guddu phatak se reply denge . . . bahot na insafi 1006 | aunty ko jawan ladka chahiye bht confuse h . guys . . 1007 | Musalmano plz open ur eyes salman khan only naphrat ke layak hai 1008 | Chal jhuta....answer ek qustion ka b nhi diya 1009 | nhi yr kapil dev bhot mnhus bolta h hmesha saamne wali team ki tarf se comentry krta h 1010 | ye vo teachers hein jo engineering ke exams pass nahi kar payey or inki girlfriends engineers se shadi karkey chali gayin to sari frustration bacho pe nikaltey hein :@ :( 1011 | sir kya msg krtte kartte mar jau 1012 | srinivasan ka chamcha tha . ek jail jayega toh doosra cement factory sambhalega 1013 | padh lo mere "secular" bhaijaan...islam me bhi gau hatya mana hai #beefban http://t.co/fmf4usmdin 1014 | #corruption karney walon janta maaf nahi karegi #aap #aapstorm #delhi 1015 | kisi me bhi khoj life bbc . . iss blog ke selected statements . news par no one believe . . 1016 | pehle mai musalman hun bad mai bhai ka frnz hunn frenz hai bhajrangi movie flop hogi dekh lena 1017 | hhaha but fir bhi le lete the . . . game ka bhoot jo guddu ki tarh dimag mai rehta tha . . :p 9999999 in 1 <---- ka sticker dekh k hi khush ho jate the but lagane pe pta chalta tha ki kya jhamela hai andar lol . . . :p :) 1018 | to harne mein proud mehsus hota hai apko 1019 | are media walo duniya me isse bhi important news h batane ko . . 1020 | guddu ko laat marne wale kachhua sir . . . 90s school kids kabhi maaf nhi karegi 1021 | ohh no ye case kab tak chalega or wase v clear h ke ayushi ke father dr . talwar ne uska murder kiya h so ayushi kya ese bht si ladkiyo ko mar dete h sirf chotaa sa point he kfi h 1022 | yadi main media hota to anna bhai ka poll kholta kyonki anna bikau hai . . . 1023 | ye aaiyena naaa wala character you will find in every school and college . har cheej me hoshiyari deta hai ye character :@ :@ :p 1024 | india ka log ka vote ka valu delhi me samach ayyena.india me avo modiji,idhar bhahuth problams he solve karne ka 1025 | l power leke ? sabse bakwas to l or f hi thi . :) 1026 | wait if the power is out tomorrow then i'm going to miss a new how i met your mother , nooooooooo 1027 | bhai garbage bin tune console likha to 1 ghadi ko me ghabda gya sala console hota kya hai , sala hamare govt school me to patti , slate etc bolte the , bhai hindi medium walon ka bhi dhyan rakha karo :p 1028 | if she ws frm SRCC mjhe to fir Oxford se hona chaiye tha ! 1029 | and that too the same game starting with different levels . yeh apne time ka sabse bada scam tha :p 1030 | holi ka koi post nhi hai 1031 | is suwar ko bulaya kis ne tha match ke beech me ! ! ! 1032 | mummy tea mein masala daal dena is cnfsr ne paka diya . . . #boaring 1033 | rajnigandha kahi bhi khayiye, thukiye #kanpur me aake. aapka apna rajnigandha kanpur station #brandstation #railbudget2015 1034 | bhag sala bakwas post update karke tym waste krta hai . . 1035 | mario 8-3 - yeh level pure mario game ka sabse hard level tha . . . . . final level se bhi . . . . . is level mein sabse zyada gussa tab aata tha jab dono green hathooda fekne waalo ko maaro do aur mushroom lene jaao aur miss ho jata tha . . . . . . . . . 1036 | v . true yt vry annoyn :/ bchpan ka dhoka yad agya :( 1037 | sudharega to tu kabhi nahi gaurav . . . :p 1038 | woe pass hogi .. Aur seat dete dte tu fail ...... / 1039 | baaki sab to line me khade h , , , , par guddu or shaan ko dekho . . . . khudki alag hi line bna li :p :d 1040 | bad luck to c bcoz of no power 1041 | are wo ladka sex ka bhukha hoga yar just 4get hm love dil se kiya jata hai na ki physical relation se . agar bf gf dono sehmat ho then its ok otherwise its not 1042 | Docks par tha 1043 | #indvsuae one sided booring match ! tweet karne ko man nehi kar raha yaar 1044 | bkwas . dhela ni jitna . 1045 | delhites is desh drohi mentality wale se bachiya , 1046 | and we get baba ji ka thullu :p 1047 | bhai ye sir konsa topic pada rhe hai maths me ? ? linear equation in 2 variables ? ? aisa to kabhi nahi dekha . . . . :o 1048 | jhut aisi koi dawai nhi hai ling nirdharan chrosome xx xy pr depend krta hai or girl boy male ke sperm se hote hai iske liye girl jimmedar nhi 1049 | tum log match fixer ho 1050 | saare teacher kuch zyaada hi masti karte hain . . . . :-p 1051 | tab to world cup har gai 1052 | superb gag . . . . but gussa on u , girls fail nhi hoti mene mario ka 8 level poora cross kiya tha , contra bhi saare levels . . . . koi game nhi bacha mere hath se duck hunt k alawa . . . . ye anyaay hum nhi sahenge 1053 | Enjoy krlo humare paise se ...world tour waw. ..... 1054 | Apne lips pe lipstik ki jagah poison chipka leti to maza ata .. 1055 | India ka hal aisa hote jaiega to india may koi educated person nehi nicklegi 1056 | supw ki aisi ki taisi :-d kavi marks add toh hue nahi 1057 | guddu k papa toh emotional ho gye . . . . 1058 | anna ne bhi aap join kar li public ko chuna laga ke 1059 | kachua sir ne to guddu ki prestige laal kar di . :p 1060 | ek mms par itna lafda q yar ... r u all crazy 1061 | 300:( india u r short of 30runs... kohli k jane k baad band baj gaya, fizul me sohail khan hero ban gaya..gurrrr #indvspak #cwc15 1062 | ek baar mere wrestling stickers chura liye the mere best friend ne . fir ek din uski book ke andar mile . . :p 1063 | bhai rehne de aaj mood garam ek ek ko pel dunga twitter pe :p 1064 | hum log(aap) liqor bantne wale ko redhanded pakdtey hain fir humhi log bante ge? z it psble? dn y #shantig sayin so? #aapstorm 1065 | hi is another option i guess .. wo shayad free hai . wo maine ek bar garmi me hyderabad me bhi try kiya tha .. but wo tab bhi kuch problem dikha raha tha 1066 | doni aasteliya me sala nak ktwa diya apni ak sister ke aat husband ko aasteliya lekr gya sale kisi ne bi run nhi bnaye hm bhi dekte h kese jitte h wc doni abi bhi time h tere papa sewag yuwraj jahir gotm ko lele team me nhi to 15frwri se hi bura taim suru ho rha h 1067 | l power waa bkwas . . . i alwez use s full on bauchhar hr trf . . . idiot gals mario to chorho mene contra tak last tk khela h . . . . precious child hood apna zmana zada accha tha 1068 | -------------------------------------------------------------------------------- /Perplexity_CMI/Chunks/40_50.txt: -------------------------------------------------------------------------------- 1 | lol admin atention seeker h jante bujhte post dala ki coment mile >_< 2 | mujhe politics me koi khas interest nahi 3 | is post ko like karne ka matlab hai accept karna ki ma baap ne assignments banaye the :p 4 | Chance bhut kam hai mai dekhsakti hoon ya nahi 5 | sir garbage bin wali book ka part 2 lao . . . ! 6 | Vishwanath/eng ne mail bheji hai 7 | abhe yeh really salman ki id h kya 8 | yo sir Ji mera tuition ka tym ho gaya...plz reply 9 | You better reply me Salman! twitter par nahi toh yahan toh pakka Reply karna padhega lol 10 | bina marne wala mario at red sunrise wale page par . . 11 | Han butg projectg wohg webg developmentg par nhi lunga . . 12 | sir itna to koi apni girlfriend ka b wait nae krta pta nae ap ans kyn nae d rhe 13 | so sad yr ... . prents supprt ni krnge t or kaun krega ... . dnt wry dea ... . god bless u eva .... 14 | Aap koun saa phone use karte ho that is a useless phone......mera message aapko display nhi kar paa rha hoga...warna aap to reply karte hi karte.... 15 | Parse sirf 4ft me ate h 16 | #indwins aa gaya mauka 6th time phatake phodne ka #cwc15 #indvspak if u agree plz rt 17 | main to park mein see saw pe bhi 5 min wasool karta tha . . . 18 | #blockbustermsg oh papa ji maf krna #blockbustermsg lgana bhul gye ji 19 | modi ke 10lakh ke suit ko hamara 100 rupaye ka muflar nigal gaya .. #aapsweep #aapstorm 20 | ajkal subway surfur ka tym h . . apne tym to sega tha 21 | jab bhi annualy inspection hota tha copies ka ki last copy kab check hui hai . . . mai us time nayi copy bana k check kara deta tha nd bolta tha peechli wali khtm ho gayi . . . :-) 22 | Preet vihar 23 | bhaijaan jaldi aao fb par waiting 4 U 24 | oh aaj google kholkr i'm feeling lucky pr click karungi pkka D 25 | sir jo mere 15 lac kaha gaye 26 | cz wo up d ordr ata n out hota tb ye log khte k ab semis m chang krne k kya zrurt thi ctin ordr ko ! ! ! 27 | Hlp kr do mri plzz bhai 28 | per mummy hamesha list bana k rakhti hain . . . isse match wale din hi sare kaam karane hai . . 29 | #beefban shukr manao, sirf beef ban hua hai. adrak, kanda lasoon nahin. 30 | Bhai 2 minute 31 | Okays launde 32 | salman bhaiya aap shadi kab korogi? me apki shadi ki barat me jana chahta hu 33 | Rajai and nothig 34 | india kahe dill se worldcup fir se!! #indvspak 35 | log #indvspak match ke beech bhi politics pe tweet kyun karte hain? http://t.co/mzcrrrpyr2 36 | Han 37 | Sir kisko rply mat dena 38 | new delhi seat se hi banega 39 | vote to aam aadami party friend 40 | Salman je plz kbhi to rply kiya kro tbhi pta chal e ga ki aap bhi apne fens se pyar krte ho 41 | Gandalf toh mere doggy ka name hai ( woh bhi facebook par ) 42 | WhatsApp par 43 | english newspaper laga do :) 44 | Nahi mam 45 | ho ske to team srinivasan se b resign kar dena bhai . . ;) :p :d 46 | Dp wale 47 | 7,600th like :) 48 | bhai 1 reply asssalamalaikum 49 | Kyuki lastg tymg toh 50 | #garbage bin to hmare childhood ki " yaadon ka pitara " h <3 ;) :p b-) :* 51 | kachhua sir to hindi ke teacher the english kab se padhane lage 52 | Party to tab hogi jab life mein pretty hogi and pretty life hogi and pretty hi my beloved wife hogi 53 | Plz mera dream true kardona bhaii 54 | mujhe politics me koi khas interest nahi 55 | OK Alia Bora mtmano sujao khoda Hafiz by Mafi mangne ki adat nhi samajjao byyyyyy.... 56 | 4.13 now 57 | suna ha pk movie terminator jesi ha :d 58 | ready 4 uh salman sir.... 59 | Ni yr .. Mujhe acidity ho rakhi h 60 | mast yaar 61 | Or bta kha h ?? 62 | Sana khan add karo mujhe 63 | cycle hoga to bhi chalega #aapsweep 64 | Dekhat Toc ? 65 | tu page ko borin bana raha hai admin koi competiton h organise krde hmare liye 66 | 8lez,ap fix tym bta dijiye sir ki kb are ho.ap fb p 67 | Call kar 68 | #blockbustermsg apko bhi bhot bhot mubaarak mhashivratri ki papa ji....thanx papa ji http://t.co/xxkfmh789a 69 | #rssquestionsteresa #arvindannaback #landacquisitionbill khuda khair kare india ka - are wo tau ambani kar raha hai http://t.co/ijf40j8y8o 70 | tum log mujhe jealous kar rahe ho 71 | Watch out kahi CIA na bta de govt ko !! 72 | khel ke end me south africa ke team ne indian team se kaha . bhai hum south africa se hai . hamare jersey bas green color ka hai . 73 | abey confession page h to confession bhi post kr diya kr -_- 74 | tym ho gya 4pm ho gaye where ru??? 75 | konsi city meh 76 | dd national kya tum sare match dhikha sakte ho world cup ke 2015 77 | bhai isme to apni phd hai . . . 78 | Salman sir hme pta h. W8 79 | ladkiyo ko non virgin hona chahiye . . . virgin to mobile bhi hota hain 80 | bai g kab aooo ge online 81 | kis country ka he or iska nam kya he ? 82 | salman bahi please hum ko bhi chance dena bt krne ka thanks. 83 | keeping k waqt hi gloves pehente the :p 84 | office me star sports ki live streaming se 85 | Check kar 86 | lagta h aap test me fail ho gyi aunty -P 87 | Kal ghuma marine drive aur gateway 88 | khana itna costly kyun hota hai pantry car me yaar #railbudget2015 89 | koi engineer thali baitha ho to mujhe ye bata do bhai ki ye 10 wale recharge me kitne milenege #unionbudget2015 #sabkabudget 90 | tl g1: best rail budget ever #railbudget2015 g2: i agree babes :) dm g2: didi rail mantri kaun hai? g1: pagli narendra modi, aur kaun ! 91 | tumne muje msg kya tha plz batao please 92 | o aadmi mobile compniyo abba hoga 93 | Chl byee ?? 94 | DELHI METRO CONFESSION pe Bhejna tha na idiot , Yh DU CONFESSION h sayad u dnt knw p 95 | Salman khan me aapka wait kar rahi hu pliz 96 | for kritika vasishta .... is maur se cmpetition jitna h .... hard luck .. ;) 97 | Really aap aaye kya.....i can't beleive.... 98 | 4:5 hoge 99 | Submit maara propos 100 | in topics ki utni value nhi h jitni unemployment ... corruption ki h ... admin is a fool 101 | Sir pls kaise aapse contact kare batana pls ???? 102 | 4:04 over 103 | phir confession kis topic pe karenge :/ sare topic ban kar diye :| 104 | Arrre ... Gand Mara yaar 105 | admin yr tv hi nhi h . . . :( 106 | Abbe yar 107 | chips khane ki awaz chabad chabad ki baje crunch kuch honi chahiye na . . . rubber ke chips hain kya ? ? ? 108 | bhai decide kro 109 | koi muje btayega du colleges kke form r cutt off kab ayegi 110 | Lettr to jayega ghr pr .. n dn u have to cm to univrsty .. tmhare baki k xam b cncl ho skte h .. may b ... bt jisme umc bni usme to pkka h . 111 | are ye kachua sir kine subjects padha lete hai ? kabhi english , kabhi maths :o 112 | confessions n pjs me bht difference hta h dkbos+en_suffix ..... 113 | #railbudget2015 " hey prabhu" accha rail budget dena...:) 114 | Aap reply karenge....? 115 | Kis time kal? 116 | kahin mae to nahin hahhaha ! never dude ..... vse peechle confsn pr bna padhe comment kiya tab bhi 150 replies croSs hgaye p lolzz+en_suffix u never know how many chukkers are all arround p 117 | guddu world cup final tak baithe rehna 118 | Vishal ko message karo to koe response nahi 119 | Watching bajrangi bhaijaan 120 | mai mobile se chipka hu on cst station . . . 121 | Ya number de 122 | No.diziye whatsupp ka photo bej dete hai 123 | hello #salman #sir kya mujhe ek reply mila ga plzzzzzzzzzzz 124 | Isi week me 125 | i only see rashtriya samachar on black n white tv . 126 | mehnge suit par bhari muffler #mufflermanreturns #aapsweep 127 | amir kya aap pk movie ke apne banner ko bhi change karenge aaj ke episode ke baad. 128 | sir stars n cricketers ko bhulao . . . 129 | guddu ko smart phone ki jarurat hai :p 130 | hy bodegurd you my friende.love you.... 131 | bhai is best bhai aap ki film kamayab hogi only salman bhai 132 | rt mein toh bhul chali #indvsuae ka match, #ausvnz pyara lage... 133 | salman bhai jay ho 134 | salman ke haters ko koi comment hi mat karo..tottaly ignored..great salman 135 | awesome ! maza aa gaya ! thank you ! 136 | guddu apna pura english ka don hai 137 | development chahiye next generation ke liye . . . 138 | Salman Khan apka dil sch me pure water jaisa hai 139 | bahubali kar rhi h record tod collection 140 | Ap jesi jaha miljati he fir durghatna ghatjati he 141 | I love you salman khan my aap ki bhut bhut jada fan hu 142 | or btaa 143 | salman kaha good. film bajrahgi bhaijaan oldbasyt walkam 144 | Ram ram sir ji 145 | saath mein summary bhi likh deti . . . wahi pad leta :-/ :-| 146 | best thi 64 in 1 147 | gas bill bachaye . . prestige cooker ghar laaye 148 | fantastic banda . 149 | nyc...mene active kr liya... 150 | mere pass ab bhi kayi cassettes hain :-) 151 | final to 15 feb ko jeet gaye the . . uske baad to bonus tha . . . . 152 | its k . v . . aur village ka model to thik h . girls se to teddy bear banvaya tha . . :-p :-d 153 | iska throw main kholunga hahaha too good ;-) 154 | l power leke contra end karta hai . . . classic 155 | is course me to ham sb expert k b expert hai . . :d :p 156 | Gm ladke 157 | bhut hi shi brother . . . 158 | rumaal phek kar seat booking karne wali suvida ban karni chaiye #railbudget2015 159 | sabse happy luncher paa ji :d 160 | khejriwal plant lgayega 1000 walee note ka 161 | modi & parbhuji ko dhanybad ji hind #railbudget2015 162 | bodoland boy ki tarapse ak nomste . ak jindabad . 163 | Hi salman bai 164 | ab paaiye free recharges daily is link ko click karke install kare register kare offer me jaaye aur paaiye free talktime . 165 | bt . . . iske maximum games boor hote the 166 | my sallu bro always mindblowing salam sallu bhai 167 | delhi walo vote for bjp 168 | 2 2 creditg ho gye 169 | plz cal sallu bhai bangladesh big fan me plz cal +8801719447771 jai ho 170 | sign up kijiye or daily 150 ka free recharge kijiye ! 171 | y pik or b ache ho skte h without virat 172 | best line is: mujhey cold drink peeni hai . 173 | Direct baat salman khan sir ke sath wooooooooo 174 | Osm party salman bhai apka gift 175 | i lv gujiya 176 | free bijlee 177 | ak is my hero 178 | bc , kise ne notice kiya ? guddu ke sweater par 'apple' ka symbol bana hai 3:) 179 | wowwwww..burraahhhh #msgincinemas great papa ji...:) 180 | friends ap roj 500 rupees apko free milega 181 | Oy already has d schema 182 | Okay bhayya 183 | bhagwan esi gf sb ko de .. par start mjhse kre p 184 | Sir bhajrangi bhaijan....sabse achii film hogi.....boollywodd mein. ...I wish 185 | well ab bjp supporters ko anna bhi nahi bhate . kuchh to sharam karo . this is why i support aap :) :) 186 | Salman Bhaijaan ye sab un logoo ki chal hai jo ye chahtai hai ki app ki image ko nukshan ho 187 | arbind kejriwal vs bjp . what a great man puri bjp aur congress mil kr harane mein lgi hai iss one man army ko 188 | next screen iska exam hona chahiye 189 | kitchen garden me kheti karnewale log aaj media panel me farmers ko represent kar rahe. #landacquisitionbill 190 | Happy hindustaani Garv divas 191 | india ko world cup main harana mushkil hi nahi naamumkin hai . . . 192 | okk bhai bolo ku6.. 193 | ab aisa school kholna hi mera objective h life ka 194 | New game bheja hun ek mail check kar 195 | shaan ne pehle se protection kar rakhi hai k kachua sir se eye contact he na ho :p :d 196 | #msgincinemas #welovemsg superhit jyegi papa ki film 197 | sahi he :v . . pura same 198 | free rail 199 | dusroo ki ni khud ki suno.....koi na hatthii chalti h to kutte bahukktee h...... 200 | Bhai apki movie apka name pavan kumar ur mera real life m name h pawan kumar, toh m y movie dekhuga hi, motivate JO hona h, ek request h sir aap plz mere dp p like krdo plzz! Movie blockbuster hit toh pakki h! 201 | best bahana . . . sar woh meri copy le gaya and laya he ni ya absent hai 202 | #blockbustermsg same to u papa g nd thanksss for ur such a lovely wiahes happy mahashivratri papa g 203 | awesome yaar 204 | tuje sab he pata garbage bin 205 | well ab bjp supporters ko anna bhi nahi bhate . kuchh to sharam karo . this is why i support aap :) :) 206 | wowwwwww omg aaj kyu nahi ............love u salman 207 | Bhai waiting eagerly for you...Plsss jaldi aao na, 3.30pm se 4pm aur avi 5pm ho gaya hai.Phn haanth mei lekar, aapkia page mei aapka wait kar rhe hai. 208 | mujhe aapke comment padne me bhi bada maja aata hai :) :d 209 | wide ball ke liye tuti hui brick :) epic 210 | aur faayde me rahe apne lunchar paaji . . chips ka packet mil gaya :p :d 211 | Namastey mam 212 | pls all frendns wach an the movie bhai ne Yeh harare liya present kiya sab log pls Yeh film dekha yaaya 213 | kya imagination power hai ladke ki :p 214 | Bhai hum aap par poora trust karte hai aur humme pta hai kabhi galat nahi karoge 215 | guddu me talent to haiii :) 216 | kya strategy thi iski business spread karne ki 217 | maa ka dialogue :- ajji sune to ho ye school se hero ban krta hai 218 | un sabko maine or meri cid team ne hi ander dala ha :) 219 | is bar sahi decision . my fav gauti we love we love gauti 220 | devil apke pi6e...nice bhaijan 221 | Salman sir bahot acchi movie hai super hit blockbuster movie. 222 | 64 in one ke test bhi honge ! ! ! aur sath me sports period me track and field waale games bhi :d jo 100000 in one me repeat hoke aate the :d 223 | free petrol 224 | pandey ji always super hit mmmuuaaa 225 | rt #msgincinemas papaji movie dekh li mene awesome 226 | eid mubarak ho bhai jaan love u 227 | ye jhaadu nahi vaccum cleaner hai ;) #aapsweep 228 | ab kya but team india ne accha game kiya they played nice 229 | salman bahi uar best 230 | Hye meri cutey 231 | i think " l " ki power se jyada achchi " s " ki power thi . mil jaye to 8 stages paar ! 232 | offic walo ne hr floor pe screen lagai hai . . . araam se kaam kro n tv dekho 233 | hum to copy daily leke jaate the . . . (always bina homework) :d :v 234 | SALMAN bhai ap tenshan mt lo apki movei supar dupar hit hai kuch m k l or kuch b k l hai jo apni (G) mra rhey hai or jo mra rhey hai vo bhi apki movei dekey gey mgr adath hai na suleymani kida jo ghusa hai......:) 235 | hum paidaayshi actor hain boss . . 236 | #blockbustermsg uuuuuuuuuuuuummmmhhhhhhhhhaaaaaaaaaaaa guru pappppaaaaaaa jiiii http://t.co/yvmb56gfw1 237 | coz pappu cant dance saala . . . . gucci ka perfume lagata h sala :p :d 238 | shandar song hai! 239 | BAJRANGI BHAIJAAN 700 CRORE!! 240 | handsome sallu <3 241 | lastly itne din baad garbage bin is back :) :d 242 | How's ur back pain ryt now????? I m tensed....btao na plz salman aap to khe rhe the ki 4 pm baat kroge but aap to 1 bhi cmnt ka reply nhi diya plz is waale ka de dena plz... a request 243 | oh aaj google kholkr i'm feeling lucky pr click karungi pkka 244 | Ohhhh myy salluuu 245 | i love salman sir mera no 7535964415 ap ka bahutt tagda fenss hu i love yy 246 | Aap to supar hit ho aor aapki film supar hit hai 247 | Mam namastey 248 | ooo teri climax to last me tha :) 249 | bhai totally faadu super hero hai naagraj . . . hollywood ki tarah bollywood mein bhi inki movies banni chahiye . 250 | coaching me hu #cricbuzz zindabaad , aaj ka topic to gaya , 251 | Love u jend actor wo jo har lock mein sweet beautiful guineas hot sohran lagey osy khtey hain. 252 | 100% free recharge ! :-) :-) 253 | 1 no trealir bhai 254 | Ekdum detail mein 255 | 300, crore insha Allah 256 | lol it's awesome ! mere bag me hmesa chalk rakhti thi morning me classes start hone se phle blackboard pe cartoons bnane k liye :v 257 | bhai unblock kr de unhe atleast vo jo fake nahi h 258 | Hehhe 259 | i thought yeh meri hi school mein hota tha . . i guess sab jagah same hi hai ! 260 | please sir apke rhte vito power dila dena ...................... 261 | guddu tere school ki chutti , jab tak world cup chalega ;-) 262 | nyc sallu bhai.. 263 | Good oooo 264 | Don't wary always be happy insaallhatala aap ke film sb se jada box office me kamane wale film hoge 265 | super 1 . . . ye scene frame karne yogya hai 266 | ek dum sahi post h (y) :-/ 267 | Hahahah rofl 268 | Salman realy good yar 269 | oye guddu papa se bol vo helmet lgana bhool gye . . . nhi to mummy ko police wale ko smbhalna pdega . . . ;-) 270 | thank god hm aisy jgh paida ni hue 271 | i know mai janta tha ke sallu is tarha ke bekar bat kbhi nhi bol sakta 272 | salman bhai zindabad...love u bhai... 273 | abe kachua sir / kushwah sir to hindi pdhate they na 274 | 6- modi ji ne india ko global level ka desh bana diya hai 275 | har jagah gaming , comics and har cheez dairy milky . . . . :) 276 | bhut talented log h india m 277 | income base pr quota ? ? ? ? i thnk quota systm to hona hi ni chaiye , students ko unki abilities ke base pr admission milna chaiye 278 | haha muka muka , australia ne thoka thoka . world class rounduu team . 279 | hmmm .. ryt ... bt jo real minors hain wo benefit nhi le paate ..... aajkal abve minority fayda uthate hain 280 | Kya good betichod 281 | battle tank mein main i think main level 120 tak pahuncha tha (should go into my cv) and then ammunition power itne kha liye hote the that white bricks ke saath saath collateral damage mein eagle queen khud hi ude dete the . :-) 282 | Mere cmnt pe na koi like krta na rply aisa kyu ( 283 | BC porn site ki tarah advertise karo tum bas ..... !!!! male hoe .. 284 | really miss u mahi sir . ab match dekhne ka maja nhi aayega . 2014 me sab senior player chale gaye . so sad 285 | login karne ke baad aapke account me 150 ka free recharge add ho jayegaa 286 | aaaptards be like naacho bc #kiskidilli #delhidecides #ndtvresults 287 | You tube pe search kar le ?? 288 | moti baes 289 | raina and jadeja fight with each other aur tumhe kiss ki padi h P 290 | Uberg hai . . . . kabi useg nahi kiya 291 | salman sir aap kyo nai aiye online 292 | salman kya hai aam insan hai hamari tarha jo help manga hai allah se mango 293 | supw ki to kabhi class hi ni hui :-d :-d 294 | kachua sir ne pakda hota to pakka mummy ko phone lagate . . . :o 295 | upsc ka exam jo sala clear he ni hota .... 296 | It systemsg dekha ? 297 | agar ne is class me hoti to kissi b boy ko top na karnr deti . . . isme b toper me hi hoti . . . . :d 298 | amit ji south african ho gaye kya . . :o 299 | Gm jaaneman 300 | fake hai 301 | ab to nigerians ki khair nahi. #somnathbharti #delhidecides #kiskidilli 302 | Den lunch 303 | Yr ye DUC ka new Admin bht pakka rha hai .... / 304 | guddu pakka mere school se hoga . . . harkatein same to same hein ;p 305 | (gandhi)he iz a murderer. he killed subhas ch. bose 306 | l ki power ? mujhe laga s ki power best thi :p 307 | or bta 308 | jab pk movie chali toh ramdev aur swamy ki jali #indiastandswithpk 309 | #railbudget2015 rly ke breadwinner, freight ko competitive mat banana #heyprabhu song: jhoom jhoom kauwa arre wahwah http://t.co/s3aq7pv2pr 310 | 3g , coalgate se bhi bada scam ! ! 311 | guddu ki to lag gyi , band aakhon par hero tha , aankh khuli to zero :d 312 | Thodi c* 313 | 2014 me badla lena h Congress se , we r with Sikhs 314 | dehli walo apna phone check kr lo. halke halke wi-fi k signl aane lg gye honge password : muffler_khasi #aapsweep #aapstorm #delhidecides 315 | Surprise uhh 316 | yeah gulam but sbki matureness nikal jati hai tym ane par . . 317 | sala mera maths ka paper hamesha india pakistan ke match ke agle din hota tha . 318 | Kya problem h 319 | ab to guddu ke marks minus me aayenge :p 320 | Flop jaaigi movie teri....aehsan framosh.. 321 | Ol those jerks ... jo India me rhte h , India ka khate h ... fir b by heart Indian+en_suffix nhi h .... / 322 | Ae fake salman Khan he Salman khan aur tiger khan ek adme he jo 2 2 accout use ker rha he sassery ne time wast ker deya 323 | Bakwas actor ki bakwaas film 324 | Agr replay nhi diya to me maggy khalunga 325 | ...Hostel 326 | wrong number 327 | pakistani tweet . . . haar jeet game ka part h . . important cheez yeh hai ki sabko batting mili . . . 328 | bjp-cong-aap-others 3-0-67-0 delhi election k result kaam aur ishant sharma k bowling figures same hai. #aapsweep #delhidecides 329 | Dektha hun kaise release karega movie 330 | shubham teotia . . iske number final me judenge . . lol :) 331 | ye sala aabmi hai ya transister . 332 | bhai apni toh lag gai march closing or india ka match 333 | saale raina catch chhodta he #indvsuae 334 | ghatiya family 335 | kal se guddu times of india ghotega 336 | har game ke 500 version hote the :) 337 | kejriwal is a feku 338 | sala pyr bi aajkal bahut type ka chl raha h .... causal type 339 | modi is the best , rahul is maha chore h . 340 | bhai aisi english mein toh tera baap bhi nda mein select na ho sake . . ! ! get a life man :-/ 341 | abd ka c katne wala hai #indvssa 342 | boss! agli baar, beefy sarkar. #beefban 343 | ye sali generation hi kharab thi . . :-/ shame on you rekha , sunita , kachhua and all :p :@ 344 | batao ji sentence bhi theek phir bhi teacher ka ye bartav , very 2 bad hai ji . 345 | pratiyogita darpan ki spelling mistake hai :) 346 | family me saath train mein phas gaya . . . 12 hrs late from delhi :( 347 | download hogi , toh fake toh ho hi nahi sakti 348 | Salman saale tu chorr h. Kamine kutte . 349 | isse bettr hota kuch motivatng post kiya hota for d welfar of country / ye sb faltu bakwaas post krne ka koi sense ni h 350 | supw class mein hum school garden ki jhaadiyan saaf karte thay :p 351 | saala hamaare school me hindi me baat karne pe 10 rs fine lete the . . . . 352 | Dekte hi kayse hit karta hi film Salman is kamina 353 | Kal match dekha ? 354 | second time bakra mat bano delhi 355 | haaaaa haaaa #bjpflopshow bjp kaa too delhi maii balatkar hoo gayaa #india #delhivotes #delhidecides 356 | light kati hui hai :) welcome to india :3 357 | board pe jo problem hain wo such main tough dikhta hain :p 358 | mujhe to lga tha papa ki band bjne wali h , bt yha to chkr hi ulta pd gya . . ;) :p 359 | ana bhi aap me samil hogeya . imandari koi nanhi sab politics me entry marke lutna chahate hai . an isme ana bhi samil hogeye . 360 | meri SITUATION BI KUCHH AISE HI THI BT HAME KAM TYM HUA THA SATH MOVE ON YR HE DNT WANT U IN HIS LYF 361 | 2500-3000 tak 362 | humara to sunday ko bi hai exam :( 363 | we got toffee instead of ladoo . kanjoos the sare 364 | Choro sir...ye country kbhi feelings ke respect krna ni seekh sakta... 365 | congress nahi aap nikli indian team . . . hahaha 366 | phanku man 367 | are results ko chod . . . admission to hua nhi abhi tak . . . cut off batani h to bta de 368 | class math ki hai ya english ki ? 369 | pakka frustated engineer hoga ! 370 | Han ji chacha 371 | *warna 372 | Hum fans ki koi kadar hi nahi h 373 | ek bhi dhang ka game ni hota tha aise casette mein . . . 374 | bjp chor 375 | abey confession page h to confession bhi post kr diya kr -_- 376 | rt desh virodhi log modi sarkar k #landacquisitionbill ka virodh karwa rahe hai http://t.co/8vxpoypghc 377 | india ko world cup jitana h to guddu ko tv se chipka k rakkho :p 378 | friend yahan internet bahat slow ..... hai./..... sorry 379 | staff room m party wo b bacho ke paiso se hhuh . . . :p 380 | bhag kg bhag 381 | love life jahanum ban jayega, restaurants afford karne ke liye ladkein 3 din dinner nahi khayenge bichare!! #unionbudget2015 #superbudget 382 | #railbudget2015 maan gaye tumko presstitutes kya hote hai u are prooving day by day gajab randipana gajab 383 | chor kamina kejriwala congress k corruption k bareme bolke 28seats kamaya aur congress k 8seat k support leka cm bangaya . . 384 | Tera interestg websiteg developmentg mein hai 385 | kch jyada hi bol gye , over confidence mei . . ! ! 386 | hamy apky real life se koi sarokaar nahi. ap actor hu aur apki mvz dekty hai thats it baqe ju karu ya na karu ur life 387 | Mera first kiss to Sunny Leone ke poster ke saath tha '( 388 | launde , , , nfs k jamane me contra p atke pace h . . . 389 | assalamualaikum ..... salman bhai me ye puchna chahti hu k jo baate sunne ko milri hei muje is film se related. kya aapne sach me esi koi baat boli hei ya ye sab rumors hei ....... plz tel bhai jaan 390 | sir jitni soch hogi utna hi bara manifesto banega na . and also that ls elections ke samay ka manifesto bhi to kya that . 391 | yr ye duc ka new admin bht pakka rha hai . . . . :/ 392 | hilte hi next out ho jayega . . . :d 393 | kal se free bijli, wifi and water...woohoo! far dee #bjpigs ki. #aapsweep #aapkidilli #aapstorm 394 | Thk h 395 | abh lo aagaya budget jisko b problem hai ukhaad lena #unionbudget2015 396 | ye pm jhoontha hai . . . kisi angle se honrable nahin hai . . . 397 | Coz pappu cant dance saala .... Gucci ka perfume lagata h sala p D 398 | modi ke samne to kasab bhi khada ho jata to ye use vote de dete 399 | we know bhaijaaan kwo sab fake tha 400 | ab toh india karega thrill 401 | naam bade and darshan chhote!!! #delhidecides 402 | garbage bin ka pehle gag hai jispe ladkiyan door door tak comment karti ni dikh ri :d 403 | yaar problem toh kafi tough hai 404 | kachua sir ko itni english aati hai ? ? :o 405 | flop. thaki huee moviee. bigg flo[ 406 | true story and ab normalization ne engineering colleges ki b maar li :( 407 | K bole6e,..Tui ki janis anti religius film kake bole,.. 408 | agar me delhi ka citizen hota , i wouldn't vote for aap . . . 409 | Nenu noru moseskonu 410 | Kyaaa sach me ye salman khan ki id he.........!!!!!!shayad fake he ye 411 | player ko change kro nahi to khushi lete hi rah jawoge 412 | guddu itni paas se tv dekhega toh aankhe button ho jayengi ! 413 | india is bar haregi . . karon jo new khilari he bo log australia and newzilen ki krish may jayda sofol nehi he . . 414 | anna khud hell jhoge sath m jo media neutral nhi h wo b 415 | -------------------------------------------------------------------------------- /Perplexity_CMI/Chunks/50_60.txt: -------------------------------------------------------------------------------- 1 | kejiriwal ke free pani mai modi jee bathroom jayenge #delhidecides 2 | Hum fans ki feel ki koi kadar hi nahi h 3 | kuch log itne despo hain is page par reply karne wali girls ki profile open kar k friends request bhejte hain . 4 | salaam sir kaise ho sir plz inbox per message karo 5 | beef milega show pe? #beefban 6 | koi girl mujhe frnd add kre 7 | sir apka kya stand h #pk movie ko lekar??? 8 | #blockbustermsg papa g tension muket karo.. 9 | salman ne reply kiya kya guys 10 | koi gal nai mujhe pata hai aap bahut busy rahte hai all the best veerji 11 | Bs bdiy Oy kon c trphy?? 12 | guddu lucky charm ban gya indian team ke liye ;) ;) :d 13 | salman sir so rhe h....don't disturb 14 | mario k stages alag-2 me count hote the :d 15 | hahahaha tension bhagau page h yaaaar #garbagebin :d 16 | plz rahamat karo papa ji #msgincinemas 17 | guddu india ka lucky charm hai :d 18 | i wish aisa school sach mein hota . . . masters ki degree milti 19 | use to gate se bahar hote hi utarna compulsory tha . . . ;) :d 20 | guddu shocked . . uski acting skills pr doubt 21 | kaha new year yar , yaha github block kar rakha h govt ne . . apko new year ki padi . . . :/ 22 | Pakistan m yeh move nh chil sekte chair parr de hum cinema wallo ko lhr m tu kabhi nh 23 | khud ko fans bolte hai lekin kabhi fans ban nhi sakte kyuki kabhi kosis hee nhi ki samjhne ki bhai ko 24 | pakistan green ke go ko literally le rahe hain. #indvspak #indiavspakistan 25 | oye fake id #salman_khan tera to public #acount hai ... real hai to #page se reply mar 26 | Madarchod supar dupar flop ho gae teri movieBUHUBALI ROCXxx :) 27 | -------------------------------------------------------------------------------- /Perplexity_CMI/Chunks/60_70.txt: -------------------------------------------------------------------------------- 1 | pls mane call karo 2 | Abe hr nd accounts me checking 3 | 5 class me kitte projcts bnate the 4 | plz is page ko liked kare 5 | Vishwanath/eng ke underg 6 | thats true humaara chemistry ka exam hai monday ko 7 | 1 rply sir plzz 8 | bjp layo dilhi bachao . 9 | modi ka magic he ye 10 | aap jindabaad . jai ho . 11 | Jai ho jai ho jai ho. Bhajrangi bhaijaan 12 | guddu bada hi bhaukali he yr:-) 13 | game k mamle , me top . . . padhai me phussssss 14 | english me weak to english vatavaran or math me weak hota to . . . . . 15 | -------------------------------------------------------------------------------- /Perplexity_CMI/Chunks/70_80.txt: -------------------------------------------------------------------------------- 1 | 1 no bhau 2 | -------------------------------------------------------------------------------- /Perplexity_CMI/Commands_Readme: -------------------------------------------------------------------------------- 1 | 1. Download CMUToolkit and extract and put it in Perplexity_CMI folder 2 | 2. Navigate to Perplexity_CMI folder 3 | 3. cat codemix_train.txt | CMUToolkit/bin/./text2wfreq | CMUToolkit/bin/./wfreq2vocab -top 20000 > a.vocab 4 | 4. cat codemix_train.txt | CMUToolkit/bin/./text2idngram -vocab a.vocab | CMUToolkit/bin/./idngram2lm -vocab a.vocab -idngram - -binary a.binlm -spec_num 5000000 15000000 5 | 5. echo "perplexity -text Chunks/0_10.txt" | CMUToolkit/bin/./evallm -binary a.binlm 6 | 6. echo "perplexity -text Chunks/10_20.txt" | CMUToolkit/bin/./evallm -binary a.binlm 7 | 7. echo "perplexity -text Chunks/20_30.txt" | CMUToolkit/bin/./evallm -binary a.binlm 8 | 8. echo "perplexity -text Chunks/30_40.txt" | CMUToolkit/bin/./evallm -binary a.binlm 9 | 9. echo "perplexity -text Chunks/40_50.txt" | CMUToolkit/bin/./evallm -binary a.binlm 10 | 10.echo "perplexity -text Chunks/50_60.txt" | CMUToolkit/bin/./evallm -binary a.binlm 11 | 11.echo "perplexity -text Chunks/60_70.txt" | CMUToolkit/bin/./evallm -binary a.binlm 12 | 12.echo "perplexity -text Chunks/70_80.txt" | CMUToolkit/bin/./evallm -binary a.binlm 13 | 13.echo "perplexity -text Chunks/80_90.txt" | CMUToolkit/bin/./evallm -binary a.binlm 14 | 14.echo "perplexity -text Chunks/90_100.txt" | CMUToolkit/bin/./evallm -binary a.binlm 15 | 15.Note the Perplexity in each step. 16 | -------------------------------------------------------------------------------- /Perplexity_CMI/Observation: -------------------------------------------------------------------------------- 1 | Observation: 2 | 3 | ############################################## 4 | # CMI range # Perplexity # 5 | ############################################## 6 | # 00-10 # 115.94 # 7 | # 10-20 # 92.94 # 8 | # 20-30 # 92.38 # 9 | # 30-40 # 93.92 # 10 | # 40-50 # 107.11 # 11 | # 50-60 # 138.43 # 12 | # 60-70 # 137.88 # 13 | # 70-80 # 1975.43 # 14 | ############################################## 15 | 16 | Maximum CMI for our data is in between 70 and 80. 17 | -------------------------------------------------------------------------------- /Perplexity_CMI/README.md: -------------------------------------------------------------------------------- 1 | # Objective: 2 | Find CMI(Code Mixing Index) for every utterance and divide the data into chunks based on the CMI values. 3 | And Finally find the relation between CMI and Perplexity 4 | 5 | # Files: 6 | #### "1_train_data.json": 7 | Contains an json object. It has nearly 10k tweets with tags attached to each word. 8 | 9 | #### "2_1_preprocess.py": 10 | Preprocess the data - Retreives the data from the 'text' and 'lang_tagged_text' attributes and stores them in the respective files(3_text.txt and 3_lang_tagged_text.txt). 11 | 12 | #### "2_2_CMI.py": 13 | Finds CMI for every utterance(Cu) and stores the values in order in file "3_CMI_values.txt" using the tags of each utterance that were stored in "3_lang_tagged_text.txt" 14 | 15 | #### "2_3_dataToChunks.py": 16 | Based on the CMI values stored in "3_CMI_values.txt", it divides the data in "3_text.txt" into 10 different chunks based on the CMI values in the range of 10 from 0 to 100. 17 | 18 | # Folders: 19 | #### "Chunks": 20 | Contains 10 text files and the data in each file is stored based on their CMI values ("2_3_dataToChunks.py" does this job) 21 | 22 | #### "CMIv2programme": 23 | Contains pre-written files for calculating CMI values. 24 | -------------------------------------------------------------------------------- /Perplexity_CMI/a.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 0, 4 | "lang_tagged_text": "Eid\\HI ka\\HI zaakat\\HI fitra\\HI lene\\HI kab\\HI aa\\HI rha\\HI hai\\HI ", 5 | "matrix_language": "HI", 6 | "sentiment": 0, 7 | "text": "Eid ka zaakat fitra lene kab aa rha hai" 8 | }, 9 | { 10 | "id": 5, 11 | "lang_tagged_text": "@\\UN drkumarvishwas\\EN @\\UN hemraj302015\\EN @\\UN aaptivists\\EN @\\UN drrakeshparikh\\HI @\\UN ikumar7\\EN @_\\UN mahimagupta\\HI aapka\\HI kya\\HI vichaar\\HI hai\\HI pk\\UN movie\\EN ke\\HI baare\\HI main\\HI ?\\UN ", 12 | "matrix_language": "HI", 13 | "sentiment": 0, 14 | "text": "@drkumarvishwas @hemraj302015 @aaptivists @drrakeshparikh @ikumar7 @_mahimagupta aapka kya vichaar hai pk movie ke baare main?" 15 | } 16 | ] 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Natural Language Processing 2 | 3 | Dataset used: Twitter codemix data. 4 | 5 | 1. [Language Modelling](https://github.com/Abhishekmamidi123/Natural-Language-Processing/tree/master/LanguageModelling): 6 | - Calculated Trigram, Bigram, Unigram perplexities on codemix data. 7 | 8 | 2. [CMI vs Perplexity](https://github.com/Abhishekmamidi123/Natural-Language-Processing/tree/master/Perplexity_CMI): 9 | - Calculated Code Mixing Index(CMI) for each tweet and seperated tweets into 10 sets based on the CMI values. For each set we found perplexity, and found the relation between CMI and Perplexity on the data we collected. 10 | 11 | - Each folder has `README.md` inside describing what we have done. 12 | 13 | ## Contributors: 14 | [M R Abhishek](https://github.com/Abhishekmamidi123) and [K Vagdevi](https://github.com/vagdevik) 15 | --------------------------------------------------------------------------------