├── .hgignore ├── Bengali_New.xml ├── Bengali_Primary.xml ├── Bengali_Vrinda.xml ├── Bengali_Xlit.xml ├── CART.py ├── CherryPy-3.2.2.tar.gz ├── EnglishPronouncingTrees.tar.bz2 ├── Gujarati_New.xml ├── Gujarati_Primary.xml ├── Gujarati_Shruti.xml ├── Gujarati_Xlit.xml ├── Hindi_Mangal.xml ├── Hindi_Mangal_Mobile.xml ├── Hindi_New.xml ├── Hindi_Primary.xml ├── Hindi_Xlit.xml ├── IndianPronouncingTrees.tar.bz2 ├── Kannada_New.xml ├── Kannada_Primary.xml ├── Kannada_Tunga.xml ├── Kannada_Tunga_Mobile.xml ├── Kannada_Xlit.xml ├── LICENSE.txt ├── Malayalam_Kartika.xml ├── Malayalam_New.xml ├── Malayalam_Primary.xml ├── Malayalam_Xlit.xml ├── Marathi_Mangal.xml ├── Marathi_New.xml ├── Marathi_Primary.xml ├── Marathi_Xlit.xml ├── Nepali_Mangal.xml ├── Nepali_New.xml ├── Nepali_Xlit.xml ├── Punjabi_New.xml ├── Punjabi_Raavi.xml ├── Punjabi_Xlit.xml ├── Python Cart ├── Cart.c ├── Cart.h └── python │ ├── QuillCNCart.c │ └── setup.py ├── QuillEngXlit.py ├── QuillLanguage.py ├── QuillManual.py ├── QuillManualHelper.py ├── QuillPrimary.py ├── QuillSourceProcessor.py ├── QuillTrainer.py ├── README.md ├── RingBuffers.py ├── Tamil_Latha.xml ├── Tamil_New.xml ├── Tamil_Primary.xml ├── Tamil_Xlit.xml ├── Telugu_New.xml ├── Telugu_Primary.xml ├── Telugu_Raavi.xml ├── Telugu_Xlit.xml ├── additional_text_files.zip ├── bengali.tar.bz2 ├── config.py ├── const.py ├── demjson.py ├── gujarati.tar.bz2 ├── hellocherry.py ├── hindi.tar.bz2 ├── kannada.tar.bz2 ├── logsystem.conf ├── malayalam.tar.bz2 ├── marathi.tar.bz2 ├── mysqlquill.py ├── nepali.tar.bz2 ├── primaryHelper.py ├── punjabi.tar.bz2 ├── quill_cherry8088.conf ├── quilljson.py ├── startquill_cherry.py ├── startquill_manual.py ├── tamil.tar.bz2 ├── telugu.tar.bz2 ├── unique_word_files.zip ├── wordCounter.py └── xlitGen.py /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | *.pyc 4 | *.txt 5 | *.so 6 | *.pyd 7 | cherrypy/* 8 | bengali/*.xml 9 | gujarati/*.xml 10 | hindi/*.xml 11 | hindiMobile/*.xml 12 | kannada/*.xml 13 | malayalam/*.xml 14 | marathi/*.xml 15 | nepali/*.xml 16 | punjabi/*.xml 17 | tamil/*.xml 18 | telugu/*.xml 19 | EnglishPronouncingTrees/* 20 | IndianPronouncingTrees/* 21 | logs/* 22 | dump/* 23 | *.zip 24 | *.bz2 25 | *.gz 26 | -------------------------------------------------------------------------------- /Bengali_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /CART.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | import sys 7 | import math 8 | 9 | class CARTWord(object): 10 | __slots__= ['word', 'focus', 'classID', 'count'] 11 | 12 | def __init__(self, w, f, cID=u'\u0000', freq=1): 13 | self.word = w 14 | self.focus = f 15 | self.classID = cID 16 | self.count = freq 17 | 18 | def incCount(self, freq=1): 19 | self.count += freq 20 | 21 | def getCount(self): 22 | return self.count 23 | 24 | def getKey(self, scope): 25 | start = max(self.focus-scope, 0) 26 | end = min(self.focus+scope, len(self.word)) 27 | 28 | trimword = self.word[start:end+1] 29 | trimfocus = self.focus - start 30 | 31 | return trimword+str(trimfocus) 32 | 33 | def trimToScope(self, scope): 34 | start = max(self.focus-scope-1, 0) 35 | end = min(self.focus+scope+1, len(self.word)) 36 | self.word = self.word[start:end+1] 37 | self.focus = self.focus - start 38 | 39 | class splitRule(object): 40 | __slots__= ['contextFeature', 'relativeIndex', 'contextId'] 41 | 42 | def __init__(self, rel=0, contxtId=-1, contextFeature=None ): 43 | self.relativeIndex = rel 44 | self.contextId = contxtId 45 | self.contextFeature = contextFeature 46 | 47 | def setRule(self, rel, contxtId): 48 | self.relativeIndex = rel 49 | self.contextId = contxtId 50 | 51 | def setContextFeature(self, feature): 52 | self.contextFeature = feature 53 | self.contextId = -1 54 | 55 | class CART(object): 56 | __slots__= ['classId', 'wordList', 'leftCART', 'rightCART', 'nodeSplitRule', 'terminal', 57 | 'contextLen', 'splFeatures', 'treeFocus', 'nodeID', 'features', 'contextPrefOrder'] 58 | 59 | def __init__(self, key, cartWords=[], contextLength=4, specialFeatures=[], contextPrefOrder=None): 60 | self.wordList = cartWords 61 | self.treeFocus = key 62 | self.leftCART = None 63 | self.rightCART = None 64 | self.nodeSplitRule = splitRule() 65 | self.contextLen = contextLength 66 | self.features = specialFeatures[:] 67 | self.splFeatures = specialFeatures[:] 68 | self.assignFeatures() 69 | 70 | self.contextPrefOrder = [] 71 | if contextPrefOrder == None: 72 | 73 | sign=-1 74 | sign=1 75 | for delta in [ (n+1)/2 for n in range(0, 2*self.contextLen+1)]: 76 | delta = delta*sign 77 | sign = -sign 78 | self.contextPrefOrder.extend([delta]) 79 | else: 80 | self.contextPrefOrder = contextPrefOrder 81 | 82 | def setCARTNode(self, key, nodeId, cntxtLen, spltRule, terminalStatus, classes): 83 | self.treeFocus = key 84 | self.nodeID = nodeId 85 | self.contextLen = cntxtLen 86 | self.nodeSplitRule = spltRule 87 | self.terminal = terminalStatus 88 | self.classId = classes 89 | 90 | def assignFeatures(self): 91 | contextFeatures = {} 92 | for cWord in self.wordList: 93 | literal = "#"+cWord.word+"_" 94 | minIndex = max(0, cWord.focus+1- self.contextLen) 95 | maxIndex = min(len(literal)-1, cWord.focus+1+self.contextLen) 96 | for i in range( minIndex, maxIndex+1): 97 | val = [literal[i]] 98 | val.append("Is letter "+literal[i]+"?") 99 | contextFeatures.update({literal[i]:val}) 100 | 101 | self.features.extend(contextFeatures.values()) 102 | 103 | tupleFeatures =[] 104 | for li in self.features : 105 | tupleFeatures.append( tuple(li) ) 106 | 107 | self.features = tupleFeatures 108 | 109 | def assignClassID(self): 110 | maxVal = 0 111 | wList = self.wordList 112 | if len(wList) > 0 and self.isTerminal(): 113 | counter = {} 114 | for cWord in wList: 115 | cid = cWord.classID 116 | try: 117 | counter[cid] += 1*cWord.count 118 | except KeyError: 119 | counter[cid] = 1*cWord.count 120 | 121 | items = counter.items() 122 | items.sort(cmp=lambda x, y:cmp(y, x), key=lambda x:x[1]) 123 | self.classId = items 124 | 125 | def match(self, cWord, rule): 126 | word = "#"+cWord.word+"_" 127 | realIndex = cWord.focus+1+rule.relativeIndex 128 | if realIndex < 0 or realIndex >= len(word): 129 | return False 130 | else: 131 | if rule.contextId != -1: 132 | features = self.features[rule.contextId] 133 | else: 134 | features = rule.contextFeature 135 | for f in features: 136 | if f =='': 137 | continue 138 | stop = min(len(word), realIndex+len(f)) 139 | if f == word[realIndex:stop]: 140 | return True 141 | return False 142 | 143 | def nodeAccuracy(self): 144 | accuracy = 0.0 145 | wList = self.wordList 146 | 147 | counter = {} 148 | totalWords = 0 149 | for cWord in wList: 150 | cid = cWord.classID 151 | totalWords += cWord.count 152 | try: 153 | counter[cid] += 1*cWord.count 154 | except KeyError: 155 | counter[cid] = 1*cWord.count 156 | 157 | accuracy = 1.0*sum(count*count for count in counter.values())/totalWords 158 | 159 | return accuracy/totalWords 160 | 161 | def splitAccuracy(self, rule): 162 | leftCounter={} 163 | rightCounter={} 164 | 165 | leftAccuracy=0 166 | rightAccuracy=0 167 | 168 | leftCount = 0; 169 | rightCount = 0; 170 | 171 | wList = self.wordList 172 | 173 | ruleMatch = self.match 174 | for cWord in wList: 175 | if ruleMatch(cWord, rule): 176 | leftCount += 1*cWord.count 177 | if leftCounter.has_key(cWord.classID): 178 | leftCounter[cWord.classID] += 1*cWord.count 179 | else: 180 | leftCounter[cWord.classID] = 1*cWord.count 181 | else: 182 | rightCount += 1*cWord.count 183 | if rightCounter.has_key(cWord.classID): 184 | rightCounter[cWord.classID] += 1*cWord.count 185 | else: 186 | rightCounter[cWord.classID] = 1*cWord.count 187 | 188 | if( leftCount != 0 ): 189 | leftAccuracy = 1.0*sum([count*count for count in leftCounter.values()]) 190 | leftAccuracy = leftAccuracy/leftCount 191 | 192 | if( rightCount != 0 ): 193 | rightAccuracy = 1.0*sum([count*count for count in rightCounter.values()]) 194 | rightAccuracy = rightAccuracy/rightCount 195 | 196 | accuracy = (leftAccuracy+rightAccuracy)/(leftCount+rightCount) 197 | return accuracy 198 | 199 | def bestSplit(self): 200 | currAccuracy = self.nodeAccuracy() 201 | 202 | if currAccuracy == 1: 203 | return None 204 | 205 | bestAccuracy = currAccuracy 206 | 207 | bestRule = splitRule() 208 | tempRule = splitRule() 209 | 210 | for delta in self.contextPrefOrder: 211 | for i in range(0, len(self.features)): 212 | tempRule.setRule(delta, i) 213 | newAccuracy = self.splitAccuracy(tempRule) 214 | 215 | if newAccuracy > bestAccuracy : 216 | bestAccuracy = newAccuracy 217 | bestRule.setRule(tempRule.relativeIndex, tempRule.contextId) 218 | 219 | if bestAccuracy == 1: 220 | return bestRule 221 | 222 | if bestAccuracy > currAccuracy: 223 | return bestRule 224 | else: 225 | return None 226 | 227 | def split(self, bestRule): 228 | self.terminal = False 229 | self.nodeSplitRule = bestRule 230 | 231 | leftWords = [] 232 | rightWords = [] 233 | 234 | for i in range( 0, len(self.wordList)): 235 | if self.match(self.wordList[i], bestRule): 236 | leftWords.extend([self.wordList[i]]) 237 | else: 238 | rightWords.extend([self.wordList[i]]) 239 | 240 | self.leftCART= CART(self.treeFocus, leftWords, self.contextLen, self.splFeatures, self.contextPrefOrder) 241 | self.rightCART = CART(self.treeFocus, rightWords, self.contextLen, self.splFeatures, self.contextPrefOrder) 242 | 243 | def build( self ): 244 | if len(self.wordList) == 0 : 245 | return 246 | 247 | bestRule = self.bestSplit() 248 | 249 | if bestRule != None: 250 | self.split(bestRule) 251 | self.leftCART.build() 252 | self.rightCART.build() 253 | nodeFeature = self.features[self.nodeSplitRule.contextId] 254 | self.nodeSplitRule.setContextFeature(nodeFeature) 255 | else: 256 | self.terminal = True 257 | self.assignClassID() 258 | 259 | 260 | del self.wordList 261 | del self.splFeatures 262 | del self.features 263 | 264 | def isTerminal(self): 265 | return self.terminal == True 266 | 267 | def letterToClassLookup(self, word, focus): 268 | node = self 269 | cartWord = CARTWord(word, focus) 270 | return letterToClassID(cartWord, False) 271 | 272 | def letterToClassID(self, cartWord, multiple=False): 273 | node = self 274 | while not node.isTerminal(): 275 | rule = node.nodeSplitRule 276 | if node.match(cartWord, rule): 277 | node = node.leftCART 278 | else: 279 | node = node.rightCART 280 | 281 | retValue = [c for (c, i) in node.classId] 282 | 283 | if multiple == True: 284 | return retValue 285 | else: 286 | return retValue[0] 287 | 288 | def getNodeLabel(self, node): 289 | if node.isTerminal(): 290 | label = ", ".join(["(%s, %s)"%(x, y) for (x, y) in node.classId]) 291 | terminalInfo = label.encode('utf-8') 292 | return terminalInfo 293 | return "At %d\\n%s"%(node.nodeSplitRule.relativeIndex, node.nodeSplitRule.contextFeature[-1]) 294 | 295 | def getNodeClassRepr(self, node): 296 | if node.isTerminal(): 297 | label = ", ".join(['("%s", %s)'%(x.encode('utf-8'), y) for (x, y) in node.classId]) 298 | terminalInfo = "[%s]"%label 299 | return terminalInfo 300 | 301 | return "[]" 302 | 303 | def inOrderSetLabel(self, labelGen, node): 304 | if node.isTerminal(): 305 | node.nodeID = labelGen.next() 306 | return 307 | self.inOrderSetLabel(labelGen, node.leftCART) 308 | node.nodeID = labelGen.next() 309 | self.inOrderSetLabel(labelGen, node.rightCART) 310 | 311 | def preOrderWrite(self, f, node): 312 | node.writeToFile(f) 313 | if node.isTerminal(): 314 | return 315 | self.preOrderWrite(f, node.leftCART) 316 | self.preOrderWrite(f, node.rightCART) 317 | 318 | def storeCart(self, f, treeType='predictive'): 319 | f.write('\t\n'%(self.treeFocus, treeType)) 320 | def labelGen(): 321 | i=0 322 | while 1: 323 | i=i+1 324 | yield i 325 | lblGen = labelGen() 326 | self.inOrderSetLabel(lblGen, self) 327 | self.preOrderWrite(f, self) 328 | f.write('\t\n') 329 | 330 | def writeToFile(self, f): 331 | f.write('\t\t\n'%self.nodeID) 332 | relIndex = self.nodeSplitRule.relativeIndex 333 | f.write('\t\t\t\n') 334 | f.write('\t\t\t\t%d\n'%relIndex) 335 | contextId = self.nodeSplitRule.contextId 336 | f.write('\t\t\t\t%s\n'%contextId) 337 | contextFeature = repr(self.nodeSplitRule.contextFeature) 338 | f.write('\t\t\t\t%s\n'%contextFeature) 339 | f.write('\t\t\t\n') 340 | cLen = self.contextLen 341 | f.write('\t\t\t%s\n'%cLen) 342 | terminalStatus = self.isTerminal() 343 | f.write('\t\t\t%s\n'%terminalStatus) 344 | 345 | classAssigns = self.getNodeClassRepr(self) 346 | f.write('\t\t\t%s\n'%classAssigns) 347 | f.write('\t\t\n') 348 | 349 | 350 | def addBinaryNode(self, node): 351 | cart = self 352 | while True: 353 | if(node.nodeID < cart.nodeID): 354 | if cart.leftCART is None: 355 | cart.leftCART = node 356 | return 357 | cart = cart.leftCART 358 | else: 359 | if cart.rightCART is None: 360 | cart.rightCART = node 361 | return 362 | cart = cart.rightCART 363 | 364 | @staticmethod 365 | def prepareTrainingData(li, scope, freq): 366 | data = {} 367 | for trainingPair in li: 368 | (literal, classes) = trainingPair 369 | i=0 370 | for c in literal: 371 | cMap={} 372 | if data.has_key(c): 373 | cMap = data[c] 374 | else: 375 | data[c] = cMap 376 | 377 | word = "".join(literal) 378 | cWord = CARTWord(word, i, classes[i], freq) 379 | 380 | if cMap.has_key(cWord.getKey(scope)) == False: 381 | cMap[cWord.getKey(scope)] = cWord 382 | i+=1 383 | 384 | finalData = {} 385 | for (k, v) in data.items(): 386 | finalData[k]=v.values() 387 | 388 | return finalData 389 | 390 | 391 | if __name__ == "__main__": 392 | words =[CARTWord("topi", 0, u'\u0c9f')] 393 | words.extend([CARTWord("pata", 2, u'\u0c9f')]) 394 | words.extend([CARTWord("tande", 0, u'\u0ca4')]) 395 | words.extend([CARTWord("hosatu", 4, u'\u0ca4')]) 396 | words.extend([CARTWord("hosatana", 4, u'\u0ca4')]) 397 | words.extend([CARTWord("vasati", 4, u'\u0ca4')]) 398 | words.extend([CARTWord("vasanta", 5, u'\u0ca4')]) 399 | words.extend([CARTWord("tagaru", 0, u'\u0c9f')]) 400 | words.extend([CARTWord("takadi", 0, u'\u0ca4')]) 401 | words.extend([CARTWord("vata", 2, u'\u0c9f')]) 402 | words.extend([CARTWord("rata", 2, u'\u0ca4')]) 403 | words.extend([CARTWord("virata", 4, u'\u0c9f')]) 404 | words.extend([CARTWord("viratanagara", 4, u'\u0c9f')]) 405 | words.extend([CARTWord("rajavirata", 8, u'\u0c9f')]) 406 | words.extend([CARTWord("rati", 2, u'\u0ca4')]) 407 | 408 | vowels = ['a', 'e', 'i', 'o', 'u', 'y', "Is letter a vowel ?"] 409 | cons1 = ['k', 'K', 'g', 'G', 'c', 'C', 'j', 'J', 't', 'T', 'd', 'D', 'n', '', "Is letter a G1 Cons ?"] 410 | cons2 = ['p', 'f', 'b', 'B', 'm', "Is letter a G2 Cons ?"] 411 | cons3 = ['y', 'r', 'l', 'v', 'w', 'S', 's', 'h', "Is letter a G3 Cons ?"] 412 | 413 | cons =[] 414 | cons.extend(cons1[0:-1]) 415 | cons.extend(cons2[0:-1]) 416 | cons.extend(cons3[0:-1]) 417 | cons.append('Is letter a Cons ?') 418 | 419 | splRules = [] 420 | 421 | splRules.append(vowels) 422 | splRules.append(cons1) 423 | splRules.append(cons2) 424 | splRules.append(cons3) 425 | splRules.append(cons) 426 | 427 | myCart = CART('t', words, 4, splRules) 428 | myCart.build() 429 | 430 | for word in words: 431 | print (myCart.letterToClassID(word)).encode('utf-8') 432 | 433 | word = CARTWord("soti", 2, u'\u0ca4') 434 | print (myCart.letterToClassID(word)).encode('utf-8') 435 | -------------------------------------------------------------------------------- /CherryPy-3.2.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/CherryPy-3.2.2.tar.gz -------------------------------------------------------------------------------- /EnglishPronouncingTrees.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/EnglishPronouncingTrees.tar.bz2 -------------------------------------------------------------------------------- /Gujarati_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /Hindi_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | -------------------------------------------------------------------------------- /IndianPronouncingTrees.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/IndianPronouncingTrees.tar.bz2 -------------------------------------------------------------------------------- /Kannada_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 'aa','vowel' 17 | 18 | 19 | 20 | 21 | 22 | 'ae','vowel' 23 | 24 | 25 | 26 | 27 | 28 | 29 | 'ao','vowel' 30 | 31 | 32 | 33 | 34 | 35 | 36 | 'ih','vowel','short' 37 | 38 | 39 | 40 | 41 | 42 | 43 | 'iy','vowel' 44 | 45 | 46 | 47 | 48 | 49 | 50 | 'uh','vowel' 51 | 52 | 53 | 54 | 55 | 56 | 57 | 'uw','vowel' 58 | 59 | 60 | 61 | 62 | 63 | 64 | 'eh','vowel','short' 65 | 66 | 67 | 68 | 69 | 70 | 71 | 'ey','vowel' 72 | 73 | 74 | 75 | 76 | 77 | 78 | 'ay','vowel' 79 | 80 | 81 | 82 | 83 | 84 | 'oy','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ow','vowel','short' 92 | 93 | 94 | 95 | 96 | 97 | 98 | 'ow1','vowel' 99 | 100 | 101 | 102 | 103 | 104 | 105 | 'aw','vowel' 106 | 107 | 108 | 109 | 110 | 111 | 112 | 117 | 118 | 119 | 'k','cons','ngroup' 120 | 121 | 122 | 123 | 'g','cons','ngroup' 124 | 125 | 126 | 127 | 'ng','cons' 128 | 129 | 130 | 131 | 132 | 133 | 'ch','cons','ngroup' 134 | 135 | 136 | 137 | 'jh','cons','ngroup' 138 | 139 | 140 | 141 | 142 | 't','cons','ngroup' 143 | 144 | 145 | 146 | 'd','cons','ngroup' 147 | 148 | 149 | 150 | 151 | 'th','cons','ngroup' 152 | 153 | 154 | 155 | 156 | 'dh','cons','ngroup' 157 | 158 | 159 | 160 | 'n','cons' 161 | 162 | 163 | 164 | 165 | 166 | 'p','cons','mgroup' 167 | 168 | 169 | 170 | 'f','cons','mgroup' 171 | 172 | 173 | 174 | 'b','cons','mgroup' 175 | 176 | 177 | 178 | 'm','cons' 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 'y','cons' 187 | 188 | 189 | 190 | 'r','cons' 191 | 192 | 193 | 194 | 'l','cons' 195 | 196 | 197 | 198 | 'v','cons' 199 | 200 | 201 | 202 | 'w','cons' 203 | 204 | 205 | 206 | 207 | 'sh','cons' 208 | 209 | 210 | 211 | 'zh','cons' 212 | 213 | 214 | 215 | 's','cons' 216 | 217 | 218 | 219 | 'z','cons' 220 | 221 | 222 | 223 | 'hh','cons' 224 | 225 | 226 | 227 | 228 | '-' 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Tachyon Technologies Pvt Ltd 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /Malayalam_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 'aa','vowel' 17 | 18 | 19 | 20 | 21 | 22 | 'ae','vowel' 23 | 24 | 25 | 26 | 27 | 28 | 29 | 'ao','vowel' 30 | 31 | 32 | 33 | 34 | 35 | 36 | 'ih','vowel','short' 37 | 38 | 39 | 40 | 41 | 42 | 43 | 'iy','vowel' 44 | 45 | 46 | 47 | 48 | 49 | 50 | 'uh','vowel' 51 | 52 | 53 | 54 | 55 | 56 | 57 | 'uw','vowel' 58 | 59 | 60 | 61 | 62 | 63 | 64 | 'eh','vowel','short' 65 | 66 | 67 | 68 | 69 | 70 | 71 | 'ey','vowel' 72 | 73 | 74 | 75 | 76 | 77 | 78 | 'ay','vowel' 79 | 80 | 81 | 82 | 83 | 84 | 'oy','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ow','vowel','short' 92 | 93 | 94 | 95 | 96 | 97 | 98 | 'ow1','vowel' 99 | 100 | 101 | 102 | 103 | 104 | 105 | 'aw','vowel' 106 | 107 | 108 | 109 | 110 | 111 | 112 | 117 | 118 | 119 | 'k','cons','ngroup' 120 | 121 | 122 | 123 | 'g','cons','ngroup' 124 | 125 | 126 | 127 | 'ng','cons' 128 | 129 | 130 | 131 | 132 | 133 | 'ch','cons','ngroup' 134 | 135 | 136 | 137 | 'jh','cons','ngroup' 138 | 139 | 140 | 141 | 142 | 't','cons','ngroup' 143 | 144 | 145 | 146 | 'd','cons','ngroup' 147 | 148 | 149 | 150 | 151 | 'th','cons','ngroup' 152 | 153 | 154 | 155 | 156 | 'dh','cons','ngroup' 157 | 158 | 159 | 160 | 'n','cons' 161 | 162 | 163 | 164 | 165 | 166 | 'p','cons','mgroup' 167 | 168 | 169 | 170 | 'f','cons','mgroup' 171 | 172 | 173 | 174 | 'b','cons','mgroup' 175 | 176 | 177 | 178 | 'm','cons' 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 'y','cons' 187 | 188 | 189 | 190 | 'r','cons' 191 | 192 | 193 | 194 | 'l','cons' 195 | 196 | 197 | 198 | 'v','cons' 199 | 200 | 201 | 202 | 'w','cons' 203 | 204 | 205 | 206 | 207 | 'sh','cons' 208 | 209 | 210 | 211 | 'zh','cons' 212 | 213 | 214 | 215 | 's','cons' 216 | 217 | 218 | 219 | 'z','cons' 220 | 221 | 222 | 223 | 'hh','cons' 224 | 225 | 226 | 227 | 228 | '-' 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /Marathi_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /Nepali_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /Punjabi_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 'aa','vowel' 22 | 23 | 24 | 25 | 26 | 27 | 'ae','vowel' 28 | 29 | 30 | 31 | 32 | 33 | 34 | 'ao','vowel' 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'ih','vowel','short' 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 'iy','vowel' 53 | 54 | 55 | 56 | 57 | 58 | 59 | 'uh','vowel' 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 'uw','vowel' 71 | 72 | 73 | 74 | 75 | 76 | 77 | 'eh','vowel','short' 78 | 79 | 80 | 81 | 82 | 83 | 84 | 'ey','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ay','vowel' 92 | 93 | 94 | 95 | 96 | 97 | 'oy','vowel' 98 | 99 | 100 | 101 | 102 | 103 | 104 | 'ow','vowel','short' 105 | 106 | 107 | 108 | 109 | 110 | 111 | 'ow1','vowel' 112 | 113 | 114 | 115 | 116 | 117 | 118 | 'aw','vowel' 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'k','cons','ngroup' 127 | 128 | 129 | 130 | 'g','cons','ngroup' 131 | 132 | 133 | 134 | 'ng','cons' 135 | 136 | 137 | 138 | 139 | 140 | 'ch','cons','ngroup' 141 | 142 | 143 | 144 | 'jh','cons','ngroup' 145 | 146 | 147 | 148 | 149 | 't','cons','ngroup' 150 | 151 | 152 | 153 | 'd','cons','ngroup' 154 | 155 | 156 | 157 | 158 | 'th','cons','ngroup' 159 | 160 | 161 | 162 | 163 | 'dh','cons','ngroup' 164 | 165 | 166 | 167 | 'n','cons' 168 | 169 | 170 | 171 | 172 | 173 | 'p','cons','mgroup' 174 | 175 | 176 | 177 | 'f','cons','mgroup' 178 | 179 | 180 | 181 | 'b','cons','mgroup' 182 | 183 | 184 | 185 | 'm','cons' 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 'y','cons' 194 | 195 | 196 | 197 | 'r','cons' 198 | 199 | 200 | 201 | 'l','cons' 202 | 203 | 204 | 205 | 'v','cons' 206 | 207 | 208 | 209 | 'w','cons' 210 | 211 | 212 | 213 | 214 | 'sh','cons' 215 | 216 | 217 | 218 | 219 | 'zh','cons' 220 | 221 | 222 | 223 | 's','cons' 224 | 225 | 226 | 227 | 'z','cons' 228 | 229 | 230 | 231 | 'hh','cons' 232 | 233 | 234 | 235 | 236 | '-' 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /Python Cart/Cart.h: -------------------------------------------------------------------------------- 1 | #ifndef _Cart_H 2 | #define _Cart_H 3 | 4 | typedef char * FeatureID; 5 | typedef struct { 6 | FeatureID featureID; 7 | char **featureTokens; 8 | } Feature; 9 | 10 | typedef struct { 11 | Feature *features; 12 | int noOfFeatures; 13 | } FeatureGroup; 14 | 15 | typedef struct { 16 | const char *word; 17 | int focus; 18 | char *classID; 19 | int count; 20 | } CARTWord; 21 | 22 | typedef struct { 23 | int relativeIndex; 24 | FeatureID featureID; 25 | } SplitRule; 26 | 27 | typedef struct { 28 | char *utf8Class; 29 | int frequency; 30 | } NodeClass; 31 | 32 | typedef struct CART { 33 | int nodeID; 34 | // int contextLength; 35 | char terminal; 36 | SplitRule splitRule; 37 | NodeClass *classes; 38 | struct CART *leftCART; 39 | struct CART *rightCART; 40 | } CART; 41 | 42 | CART *CARTNode( int nodeID, int contextLength, char *featureStr, char terminal, NodeClass *classes ); 43 | void SetNodeFeature( CART *node, FeatureGroup *featureGroup, const char *featureStr ); 44 | void SetNodeClasses( CART *node, const char *classesStr ); 45 | 46 | void AddBinaryNode( CART *root, CART *node ); 47 | int Match( FeatureGroup *featureGroup, CART *treeNode, const CARTWord *cartWord ); 48 | 49 | CART *BuildTree( const char *fileName, int *treeType, char *key, FeatureGroup *primaryFeatures, FeatureGroup *predictiveFeatures ); 50 | void LoadKnowledge( const char *fileName, CART **primaryCarts, CART **predictiveCarts, FeatureGroup *primaryFeatures, FeatureGroup *predictiveFeatures ); 51 | 52 | NodeClass *LetterToClassLookup( FeatureGroup *featureGroup, CART *tree, const char *word, int focus, int multiple ); 53 | 54 | void XMLElementRead( void *data, const char *element, const char *elementData ); 55 | void XMLAttrsRead( void *data, const char *element, const char **attr ); 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /Python Cart/python/QuillCNCart.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Cart.h" 3 | 4 | typedef struct { 5 | PyObject_HEAD 6 | CART **primaryCarts; 7 | CART **predictiveCarts; 8 | FeatureGroup *primaryFeatures; 9 | FeatureGroup *predictiveFeatures; 10 | } QuillCKnowledge; 11 | 12 | static PyTypeObject QuillCKnowledgeType = { 13 | PyObject_HEAD_INIT(NULL) 14 | 0, "QuillCKnowledge", sizeof(QuillCKnowledge), 0, 15 | }; 16 | 17 | static PyObject *QuillCKnowledgeGetClass( PyObject *_self, PyObject *args ) { 18 | const char *s = NULL; 19 | const char *type = NULL; 20 | int focus = -1; 21 | int multiple = -1; 22 | QuillCKnowledge *self = NULL; 23 | NodeClass *classes = NULL; 24 | int j = -1; 25 | PyObject *list; 26 | PyObject *resultStr; 27 | PyObject *frequency; 28 | PyObject *pair; 29 | CART **carts; 30 | FeatureGroup *featureGroup; 31 | 32 | if ( !PyArg_ParseTuple(args,"sisi:GetClass",&s,&focus,&type,&multiple) ) { 33 | return NULL; 34 | } 35 | self = (QuillCKnowledge *)_self; 36 | if ( strcmp(type,"predictive") == 0 ) { 37 | carts = self->predictiveCarts; 38 | featureGroup = self->predictiveFeatures; 39 | } 40 | else if ( strcmp(type,"primary") == 0 ) { 41 | carts = self->primaryCarts; 42 | featureGroup = self->primaryFeatures; 43 | } 44 | else { 45 | return NULL; 46 | } 47 | 48 | list = PyList_New( 0 ); 49 | if ( !list ) return NULL; 50 | 51 | if ( !carts[(unsigned char)s[focus]] ) { 52 | #if 0 53 | resultStr = PyUnicode_DecodeLatin1( s+focus, 1, NULL ); 54 | //resultStr = PyString_FromFormat( "%c", (unsigned int)s[focus] ); 55 | PyList_Append( list, resultStr ); 56 | Py_DECREF( resultStr ); 57 | #endif 58 | fprintf( stderr, "Cart not found for '%c' from '%s'\n", (unsigned char)s[focus], s ); 59 | } 60 | else { 61 | classes = LetterToClassLookup( featureGroup, carts[(unsigned char)s[focus]], s, focus, multiple ); 62 | assert( classes ); 63 | /*if ( !multiple ) { 64 | resultStr = PyUnicode_DecodeUTF8( classes[0], strlen(classes[0]), NULL ); 65 | for ( j = 0; classes[j]; ++j ) { 66 | free( classes[j] ); 67 | } 68 | free( classes ); 69 | return resultStr; 70 | }*/ 71 | for ( j = 0; classes[j].utf8Class; ++j ) { 72 | resultStr = PyUnicode_DecodeUTF8( classes[j].utf8Class, strlen(classes[j].utf8Class), NULL ); 73 | frequency = PyFloat_FromDouble((double)classes[j].frequency); 74 | if ( !resultStr ) { 75 | resultStr = PyUnicode_DecodeLatin1( s+focus, 1, NULL ); 76 | frequency = PyFloat_FromDouble(0.0); 77 | //resultStr = PyString_FromFormat( "%c", (unsigned int)s[focus] ); 78 | } 79 | pair = PyTuple_Pack(2, resultStr, frequency); 80 | 81 | PyList_Append( list, pair ); 82 | 83 | Py_DECREF( resultStr ); 84 | Py_DECREF( frequency ); 85 | Py_DECREF( pair ); 86 | } 87 | /* 88 | for ( j = 0; classes[j]; ++j ) { 89 | free( classes[j] ); 90 | } 91 | free( classes ); 92 | */ 93 | } 94 | return list; 95 | }; 96 | 97 | static void CollectClasses( CART *node, PyObject *classes ) { 98 | int i; 99 | int longValue; 100 | if ( node->terminal ) { 101 | for ( i = 0; node->classes[i].utf8Class; ++i ) { 102 | PyObject *value = PyDict_GetItemString( classes, node->classes[i].utf8Class ); 103 | if ( !value ) { 104 | PyDict_SetItemString( classes, node->classes[i].utf8Class, PyInt_FromLong(1) ); 105 | } 106 | else { 107 | longValue = PyInt_AsLong( value ); 108 | PyDict_SetItemString( classes, node->classes[i].utf8Class, PyInt_FromLong(longValue+1) ); 109 | } 110 | } 111 | } 112 | else { 113 | CollectClasses( node->leftCART, classes ); 114 | CollectClasses( node->rightCART, classes ); 115 | } 116 | } 117 | 118 | static PyObject *QuillCKnowledgeGetAllClasses( PyObject *_self, PyObject *args ) { 119 | QuillCKnowledge *self = NULL; 120 | unsigned char nodeKey = -1; 121 | PyObject *classesList; 122 | CART **carts; 123 | 124 | if ( !PyArg_ParseTuple(args,"B:GetAllClasses",&nodeKey) ) { 125 | return NULL; 126 | } 127 | self = (QuillCKnowledge *)_self; 128 | carts = self->predictiveCarts; 129 | 130 | if ( !carts[nodeKey] ) { 131 | fprintf( stderr, "Cart not found for %c\n", nodeKey ); 132 | return NULL; 133 | } 134 | 135 | classesList = PyDict_New(); 136 | if ( !classesList ) return NULL; 137 | CollectClasses( carts[nodeKey], classesList ); 138 | 139 | return classesList; 140 | }; 141 | 142 | static PyObject *QuillCKnowledgeGetAllCartKeys( PyObject *_self, PyObject *args ) { 143 | QuillCKnowledge *self = NULL; 144 | PyObject *list; 145 | CART **carts; 146 | int i; 147 | 148 | self = (QuillCKnowledge *)_self; 149 | carts = self->predictiveCarts; 150 | 151 | list = PyList_New( 0 ); 152 | if ( !list ) return NULL; 153 | 154 | for ( i = 0; i < 256; ++i ) { 155 | if ( !carts[i] ) continue; 156 | PyList_Append( list, PyInt_FromLong(i) ); 157 | } 158 | return list; 159 | }; 160 | 161 | static PyMethodDef quillCKnowledgeMethods[] = { 162 | { "GetClass", (PyCFunction)QuillCKnowledgeGetClass, METH_VARARGS }, 163 | { "GetAllClasses", (PyCFunction)QuillCKnowledgeGetAllClasses, METH_VARARGS }, 164 | { "GetAllCartKeys", (PyCFunction)QuillCKnowledgeGetAllCartKeys, METH_VARARGS }, 165 | { NULL, NULL } 166 | }; 167 | 168 | static PyObject *QuillCKnowledgeNew( PyTypeObject *type, PyObject *args, PyObject *kwargs ) { 169 | const char *fileName; 170 | QuillCKnowledge *knowledge; 171 | int i; 172 | 173 | if ( !PyArg_ParseTuple(args,"s",&fileName) ) { 174 | return NULL; 175 | } 176 | knowledge = (QuillCKnowledge *) type->tp_alloc( &QuillCKnowledgeType, 0 ); 177 | if ( !knowledge ) { 178 | return NULL; 179 | } 180 | knowledge->primaryCarts = (CART **) malloc( sizeof(CART *) *256 ); 181 | if ( !knowledge->primaryCarts ) { 182 | return NULL; 183 | } 184 | knowledge->predictiveCarts = (CART **) malloc( sizeof(CART *) *256 ); 185 | if ( !knowledge->predictiveCarts ) { 186 | return NULL; 187 | } 188 | for ( i = 0; i < 256; ++i ) { 189 | knowledge->primaryCarts[i] = NULL; 190 | knowledge->predictiveCarts[i] = NULL; 191 | } 192 | 193 | knowledge->primaryFeatures = (FeatureGroup *) malloc( sizeof(FeatureGroup) ); 194 | if ( !knowledge->primaryFeatures ) { 195 | return NULL; 196 | } 197 | knowledge->primaryFeatures->noOfFeatures = 0; 198 | knowledge->primaryFeatures->features = (Feature *) malloc( sizeof(Feature) ); 199 | if ( !knowledge->primaryFeatures->features ) { 200 | return NULL; 201 | } 202 | knowledge->predictiveFeatures = (FeatureGroup *) malloc( sizeof(FeatureGroup) ); 203 | if ( !knowledge->predictiveFeatures ) { 204 | return NULL; 205 | } 206 | knowledge->predictiveFeatures->noOfFeatures = 0; 207 | knowledge->predictiveFeatures->features = (Feature *) malloc( sizeof(Feature) ); 208 | if ( !knowledge->predictiveFeatures->features ) { 209 | return NULL; 210 | } 211 | 212 | Py_BEGIN_ALLOW_THREADS 213 | LoadKnowledge( fileName, knowledge->primaryCarts, knowledge->predictiveCarts, knowledge->primaryFeatures, knowledge->predictiveFeatures ); 214 | Py_END_ALLOW_THREADS 215 | 216 | return (PyObject *)knowledge; 217 | } 218 | 219 | static void QuillCKnowledgeDeAlloc( QuillCKnowledge *self ) { 220 | int i; 221 | for ( i = 0; i < 256; ++i ) { 222 | free( self->primaryCarts[i] ); 223 | free( self->predictiveCarts[i] ); 224 | } 225 | free( self->primaryCarts ); 226 | free( self->predictiveCarts ); 227 | free( self->primaryFeatures ); 228 | free( self->predictiveFeatures ); 229 | self->ob_type->tp_free( (PyObject *) self ); 230 | } 231 | 232 | static PyMethodDef quillCCartMethods[] = { 233 | { NULL, NULL, 0, NULL } 234 | }; 235 | 236 | static int init_type( PyTypeObject *type, void *f_dealloc, const char *doc, PyMethodDef *methods, void *f_new ) { 237 | type->tp_dealloc = (destructor)f_dealloc; 238 | type->tp_flags = Py_TPFLAGS_DEFAULT; 239 | type->tp_doc = (char *)doc; 240 | type->tp_methods = methods; 241 | type->tp_new = (newfunc)f_new; 242 | return PyType_Ready(type); 243 | } 244 | 245 | PyMODINIT_FUNC initQuillCCart( void ) { 246 | int result; 247 | PyObject *module = Py_InitModule( "QuillCCart", quillCCartMethods ); 248 | result = init_type( &QuillCKnowledgeType, QuillCKnowledgeDeAlloc, "Quill Knowledge object", quillCKnowledgeMethods, QuillCKnowledgeNew ); 249 | if ( result < 0 ) { 250 | return; 251 | } 252 | Py_INCREF( &QuillCKnowledgeType ); 253 | PyModule_AddObject( module, "QuillCKnowledge", (PyObject *)&QuillCKnowledgeType ); 254 | } 255 | -------------------------------------------------------------------------------- /Python Cart/python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | import os, os.path 3 | 4 | QuillCCartModule = Extension( 5 | name = 'QuillCCart', 6 | sources = ['QuillCNCart.c', '../Cart.c'], 7 | # include_dirs = ['..'], 8 | libraries = ['expat'], 9 | include_dirs = [ os.path.split(os.getcwd())[0], os.path.expanduser('/usr/include/'), os.path.expanduser('/usr/local/include/python2.4/') ], 10 | ) 11 | 12 | setup ( name = 'QuillCCart', 13 | version = '1.0', 14 | description = 'Quill C Normal Cart package', 15 | ext_modules = [QuillCCartModule] 16 | ) 17 | -------------------------------------------------------------------------------- /QuillEngXlit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | import QuillLanguage 7 | import QuillPrimary 8 | 9 | import xml.etree.cElementTree as ET 10 | import copy 11 | import re 12 | 13 | from optparse import OptionParser 14 | 15 | class QuillEngXliterator(object): 16 | def __init__(self,knowledgeDir1,knowledgeDir2,xlitDef): 17 | self.directMode = False 18 | self.lit2engEngine = None 19 | self.eng2indEngine = None 20 | self.lit2indEngine = None 21 | 22 | if knowledgeDir1 != None and knowledgeDir2 != None: 23 | self.lit2engEngine = QuillLanguage.QuillLanguage(langKnowledgeInput=knowledgeDir1, useCCart = True) 24 | self.eng2indEngine = QuillLanguage.QuillLanguage(langKnowledgeInput=knowledgeDir2, useCCart = True) 25 | else: 26 | self.lit2indEngine = QuillLanguage.QuillLanguage(langKnowledgeInput=knowledgeDir1, useCCart = True) 27 | self.directMode = True 28 | 29 | self.primEngine = None 30 | self.xlitRules = None 31 | self.loadXlitRules(xlitDef) 32 | self.compileFeatureRes() 33 | 34 | self.debugLit = '' 35 | 36 | def codeGen(self): 37 | codeChars = 'abcdefghijklmnopqrstuvwxyz' 38 | firstIndex = 0 39 | secondIndex = 0 40 | while firstIndex < len(codeChars) and secondIndex < len(codeChars): 41 | code = codeChars[firstIndex]+ codeChars[secondIndex].upper() #This will ensure that end of one code and beginning of next code, cant be another valid code! Very imp. 42 | secondIndex += 1 43 | if secondIndex == len(codeChars): 44 | secondIndex = 0 45 | firstIndex += 1 46 | yield code 47 | 48 | def loadXlitRules(self, xlitDef): 49 | try: 50 | f = open(xlitDef,'r') 51 | except IOError: 52 | print "Can't load the language definition file" 53 | return 54 | 55 | cGen = self.codeGen() 56 | 57 | xlitTree = ET.parse(f) 58 | xlitRoot = xlitTree.getroot() 59 | 60 | primaryDef = xlitRoot.attrib['primary'] 61 | self.primEngine = QuillPrimary.QuillRuleBased(primaryDef) 62 | 63 | self.alphabet = {} 64 | self.phoneme2code = {} 65 | self.code2phoneme = {} 66 | 67 | for tree in xlitRoot.getchildren(): 68 | if tree.tag == 'features': 69 | phoneme = tree.attrib['phoneme'].strip() 70 | code = cGen.next() 71 | 72 | self.phoneme2code[phoneme] = code 73 | self.code2phoneme[code] = phoneme 74 | 75 | featuresObj = features(code) 76 | for feature in tree.getchildren(): 77 | if feature.tag == 'properties': 78 | props = eval(feature.text) 79 | if type(props) == str: 80 | props = [props] 81 | else: 82 | props = list(props) 83 | featuresObj.setProps(props) 84 | elif feature.tag == 'producer': 85 | regex = eval(feature.attrib['regex']) 86 | options = eval(feature.attrib['value']) 87 | 88 | featuresObj.addProducer(regex,options) 89 | self.alphabet[code]=featuresObj 90 | f.close() 91 | 92 | def getCodes(self,prop): 93 | """Returns list of alphabet letters that satisfy this property""" 94 | letters =[] 95 | for (k,v) in self.alphabet.items(): 96 | props = v.getProps() 97 | if prop in props: 98 | letters.append(k) 99 | return letters 100 | 101 | def compileRe(self,reStr): 102 | m=re.compile(r'_([^_]+)_') 103 | result = m.search(reStr) 104 | while result: 105 | prop = result.group(1) 106 | orList = self.getCodes(prop) 107 | orRegex = '(?:'+'|'.join(orList)+')' 108 | toReplace = reStr[result.start():result.end()] 109 | reStr = reStr.replace(toReplace,orRegex) 110 | result = m.search(reStr) 111 | 112 | finalRegex = reStr 113 | return finalRegex 114 | 115 | def compileFeatureRes(self): 116 | for f in self.alphabet.values(): 117 | litProducers = f.getAllProducers() 118 | for (k,v) in litProducers: 119 | newRegex = self.compileRe(k) 120 | f.changeProducerRe(k,newRegex) 121 | 122 | def optionsChain(self,phonemeRepStr): 123 | phonesList = [x.strip() for x in phonemeRepStr.split(' ') if len(x.strip()) > 0] 124 | try: 125 | codeList = [self.phoneme2code[ph] for ph in phonesList] 126 | except KeyError: 127 | return [[]] 128 | 129 | literal = ''.join(codeList) 130 | chain =[[]]*len(codeList) 131 | 132 | for (index,code) in enumerate(codeList): 133 | f = self.alphabet[code] 134 | literalProducers = f.getAllProducers() 135 | bestMatchLen=0 136 | for (regStr,options) in literalProducers: 137 | iter = re.finditer(regStr,literal) 138 | for match in iter: 139 | matchLen = len(match.group()) 140 | matchIndex = match.start(1) 141 | 142 | if matchLen > bestMatchLen and matchIndex == (2*index): #this ensures for equal lenght match, the match appearing first will be taken 143 | chain[index] = options 144 | bestMatchLen = matchLen 145 | 146 | return chain 147 | 148 | def getIndianPronunciations(self,literal): 149 | if self.directMode == True: 150 | (indPronunciations,count) = self.lit2indEngine.literalToUnicode(list(literal),multiple = True) 151 | 152 | pronunciations = [] 153 | 154 | for indPronunciation in indPronunciations: 155 | indPronunciationStr = ' '.join([x.strip() for x in indPronunciation if x.strip() != '']) 156 | indPronunciationStr = indPronunciationStr.replace('er','ah r') 157 | indPronunciationStr = indPronunciationStr.replace('_',' ') 158 | indPronunciationStr = indPronunciationStr.replace('#','') 159 | indPronunciationStr = indPronunciationStr.replace('$','') 160 | 161 | indPronunciationStr = ' - '.join([x.strip() for x in indPronunciationStr.split('-') if x.strip() != '']) 162 | 163 | pronunciations.append(indPronunciationStr) 164 | 165 | return pronunciations 166 | else: 167 | (engPronunciation,count) = self.lit2engEngine.literalToUnicode(list(literal)) 168 | engPronunciationStr = ' '.join(engPronunciation) 169 | 170 | for (cIndex,c) in enumerate(literal): 171 | if engPronunciation[cIndex] == '': 172 | engPronunciation[cIndex] = '#' 173 | engPronunciation[cIndex] = engPronunciation[cIndex].replace(' ','_') 174 | 175 | indPronounceInput = [] 176 | for (cIndex,c) in enumerate(literal): 177 | indPronounceInput.append('%s,%s'%(engPronunciation[cIndex],c)) 178 | 179 | (indPronunciation,count) = self.eng2indEngine.literalToUnicode(indPronounceInput) 180 | 181 | indPronunciationStr = '' 182 | indPronunciationParts =[] 183 | dashAdded = False 184 | if count != -1: 185 | for (cIndex,c) in enumerate(engPronunciation): 186 | indPronunciation[cIndex] = indPronunciation[cIndex].replace('_',' ') 187 | if indPronunciation[cIndex].strip() == '': 188 | continue 189 | elif indPronunciation[cIndex][0] == '-': 190 | if dashAdded == False: 191 | indPronunciationParts.append('-') 192 | dashAdded = True 193 | if indPronunciation[cIndex][1:] != '#': 194 | indPronunciationParts.append(indPronunciation[cIndex][1:]) 195 | dashAdded = False 196 | 197 | elif indPronunciation[cIndex][-1] == '-': 198 | if indPronunciation[cIndex][:-1] != '#': 199 | indPronunciationParts.append(indPronunciation[cIndex][:-1]) 200 | dashAdded = False 201 | if dashAdded == False: 202 | indPronunciationParts.append('-') 203 | dashAdded = True 204 | else: 205 | if indPronunciation[cIndex] != '#': 206 | indPronunciationParts.append(indPronunciation[cIndex]) 207 | dashAdded = False 208 | 209 | indPronunciationStr = ' '.join(indPronunciationParts) 210 | else: 211 | indPronunciationStr = self.ignoreStress(indPronunciationStr) 212 | 213 | indPronunciationStr = indPronunciationStr.replace('er','ah r') 214 | 215 | return [indPronunciationStr] 216 | 217 | def xliterate(self, literal): 218 | indPronunciations = self.getIndianPronunciations(literal) 219 | primStringsList = [] 220 | for indPronunciationStr in indPronunciations: 221 | primStrings = self.getPrimaryStrings(indPronunciationStr) 222 | primStringsList.extend(primStrings) 223 | 224 | return self.xliterateInternal(primStringsList) 225 | 226 | def xliterateInternal(self, primaryStringsList): 227 | unicodeList = [] 228 | for primaryStr in primaryStringsList: 229 | uStr = self.primEngine.primaryToUnicode(primaryStr) 230 | unicodeList.append(uStr) 231 | return unicodeList 232 | 233 | def getPrimaryStrings(self,indPronunciationStr): 234 | optionsChain = self.optionsChain(indPronunciationStr) 235 | 236 | primLitList = [[]] 237 | count = reduce(lambda x, y: x*y, map(len, optionsChain), 1) 238 | 239 | if count > 100: 240 | print "getPrimaryString: Permutations execeeded count. Count =", count 241 | return [''.join([litList[0] for litList in optionsChain])] 242 | else: 243 | for (i,options) in enumerate(optionsChain): 244 | newList=[] 245 | 246 | for eachOption in options: 247 | temp = copy.deepcopy(primLitList) 248 | for x in temp: 249 | x.append(eachOption) 250 | newList.extend(temp) 251 | primLitList = newList 252 | 253 | return [''.join(litList) for litList in primLitList] 254 | 255 | def ignoreStress(self, pronunciation): 256 | stressRemoved = '' 257 | removeChars = '012' 258 | for (i,c) in enumerate(pronunciation): 259 | if c in removeChars: 260 | if i > 1 and pronunciation[i-2:i] == 'ow': 261 | if c == '0': 262 | continue 263 | else: 264 | stressRemoved += '1' 265 | continue 266 | elif i > 0 and pronunciation[i-1].isalpha(): 267 | continue 268 | stressRemoved += c 269 | 270 | return stressRemoved 271 | 272 | class features(object): 273 | def __init__(self,keyUnichar): 274 | self.key = keyUnichar 275 | self.props =[] 276 | self.literalProducers = [] 277 | 278 | def addProducer(self,regexStr,outLiteralsList): 279 | self.literalProducers.append([regexStr,outLiteralsList]) 280 | 281 | def changeProducerRe(self,oldRe,newRe): 282 | for prods in self.literalProducers: 283 | if prods[0] == oldRe: 284 | prods[0] = newRe 285 | break 286 | 287 | def getAllProducers(self): 288 | itemsList =[tuple(x) for x in self.literalProducers] 289 | return itemsList 290 | 291 | def setProps(self,properties): 292 | self.props = properties 293 | 294 | def getProps(self): 295 | return self.props 296 | 297 | def getLiterals(self,type): 298 | literals =[] 299 | for v in self.literalProducers: 300 | if type == 'predictive': 301 | options = v[1][1] 302 | else: 303 | options = v[1][0] 304 | for lit in options: 305 | if lit not in literals: 306 | literals.append(lit) 307 | return literals 308 | 309 | def getLiteralsForPattern(self,pattern,type): 310 | literals =[] 311 | for v in self.literalProducers: 312 | if v[0]==pattern: 313 | if type == 'predictive': 314 | literals = v[1][1] 315 | else: 316 | literals = v[1][0] 317 | break 318 | 319 | return literals 320 | 321 | def getPatterns(self): 322 | patterns =[] 323 | for v in self.literalProducers: 324 | patterns.append(v[0]) 325 | return patterns 326 | 327 | def isPropTrue(self,prop): 328 | return prop in self.props 329 | 330 | def allPropsTrue(self,propList): 331 | for prop in propList: 332 | if prop not in self.props: 333 | return False 334 | return True 335 | -------------------------------------------------------------------------------- /QuillManualHelper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash 4 | # @Version : 1 5 | 6 | import QuillManual 7 | 8 | staticQuillManual = QuillManual.QuillManual() 9 | staticQuillManual.loadPrimaryDef() 10 | 11 | def LoadPrimaryDef( primaryDefFile ) : 12 | staticQuillManual.loadPrimaryDef( primaryDefFile ) 13 | 14 | def PrimaryToUnicode( literal ) : 15 | return staticQuillManual.primaryToUnicode( literal ) 16 | 17 | def UnicodeToPrimary( uStr ) : 18 | return staticQuillManual.unicodeToPrimary( uStr.decode('utf-8') ) 19 | 20 | def UnicodeToHelperStr( uStr ) : 21 | return staticQuillManual.unicodeToHelperStr( uStr.decode('utf-8') ) 22 | 23 | def GetOptionsAt( currHelper, currUStr, pos ) : 24 | return staticQuillManual.getOptionsAt( currHelper, currUStr.decode('utf-8'), pos ) 25 | 26 | def GetInsertCorrections( currHelper, currUStr, pos, delta ) : 27 | corrections = staticQuillManual.getInsertCorrections( currHelper, currUStr.decode('utf-8'), pos, delta ) 28 | return corrections 29 | 30 | def GetDeleteCorrections( currHelper, currUStr, pos, delLen ) : 31 | corrections = staticQuillManual.getDeleteCorrections( currHelper, currUStr.decode('utf-8'), pos, delLen ) 32 | return corrections 33 | -------------------------------------------------------------------------------- /QuillSourceProcessor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jul 13, 2016 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | import QuillLanguage as qlang 7 | import QuillEngXlit as xlit 8 | import re 9 | import const 10 | import primaryHelper 11 | 12 | class QuillSourceProcessor(object): 13 | def __init__(self): 14 | useCCart=True 15 | 16 | bengaliDefFile='Bengali_Vrinda.xml' 17 | bengaliKnowledgeInput='bengali' 18 | 19 | gujaratiDefFile='Gujarati_Shruti.xml' 20 | gujaratiKnowledgeInput='gujarati' 21 | 22 | hindiDefFile='Hindi_Mangal.xml' 23 | hindiKnowledgeInput='hindi' 24 | 25 | hindiMobileDefFile='Hindi_Mangal_Mobile.xml' 26 | hindiMobileKnowledgeInput='hindiMobile' 27 | 28 | kannadaDefFile='Kannada_Tunga.xml' 29 | kannadaKnowledgeInput='kannada' 30 | 31 | kannadaMobileDefFile='Kannada_Tunga_Mobile.xml' 32 | kannadaMobileKnowledgeInput='kannada_list_mobile.txt' 33 | 34 | malayalamDefFile='Malayalam_Kartika.xml' 35 | malayalamKnowledgeInput='malayalam' 36 | 37 | malayalamMobileDefFile='Malayalam_Kartika_Mobile.xml' 38 | malayalamMobileKnowledgeInput='malayalam_list_mobile.txt' 39 | 40 | marathiDefFile='Marathi_Mangal.xml' 41 | marathiKnowledgeInput='marathi' 42 | 43 | marathiMobileDefFile='Marathi_Mangal_Mobile.xml' 44 | marathiMobileKnowledgeInput='marathi_list_mobile.txt' 45 | 46 | nepaliDefFile='Nepali_Mangal.xml' 47 | nepaliKnowledgeInput='nepali' 48 | 49 | punjabiDefFile='Punjabi_Raavi.xml' 50 | punjabiKnowledgeInput='punjabi' 51 | 52 | tamilDefFile='Tamil_Latha.xml' 53 | tamilKnowledgeInput='tamil' 54 | 55 | tamilMobileDefFile='Tamil_Latha_Mobile.xml' 56 | tamilMobileKnowledgeInput='tamil_list_mobile.txt' 57 | 58 | teluguDefFile='Telugu_Raavi.xml' 59 | teluguKnowledgeInput='telugu' 60 | 61 | teluguMobileDefFile='Telugu_Raavi_Mobile.xml' 62 | teluguMobileKnowledgeInput='telugu_list_mobile.txt' 63 | 64 | self.scriptEngines = {'english':None, 65 | 'bengali':qlang.QuillLanguage(bengaliDefFile,bengaliKnowledgeInput,useCCart), 66 | #'gujarati':qlang.QuillLanguage(gujaratiDefFile,gujaratiKnowledgeInput,useCCart), 67 | #'hindi':qlang.QuillLanguage(hindiDefFile,hindiKnowledgeInput,useCCart), 68 | #'hindiMobile':qlang.QuillLanguage(hindiMobileDefFile,hindiMobileKnowledgeInput,useCCart), 69 | #'kannada':qlang.QuillLanguage(kannadaDefFile,kannadaKnowledgeInput,useCCart), 70 | #'kannadaMobile':qlang.QuillLanguage(kannadaMobileDefFile,kannadaMobileKnowledgeInput,useCCart), 71 | #'malayalam':qlang.QuillLanguage(malayalamDefFile,malayalamKnowledgeInput,useCCart), 72 | #'malayalamMobile':qlang.QuillLanguage(malayalamMobileDefFile,malayalamMobileKnowledgeInput,useCCart), 73 | #'marathi':qlang.QuillLanguage(marathiDefFile,marathiKnowledgeInput,useCCart), 74 | #'marathiMobile':qlang.QuillLanguage(marathiMobileDefFile,marathiMobileKnowledgeInput,useCCart), 75 | #'nepali':qlang.QuillLanguage(nepaliDefFile,nepaliKnowledgeInput,useCCart), 76 | #'punjabi':qlang.QuillLanguage(punjabiDefFile,punjabiKnowledgeInput,useCCart), 77 | #'tamil':qlang.QuillLanguage(tamilDefFile,tamilKnowledgeInput,useCCart), 78 | #'tamilMobile':qlang.QuillLanguage(tamilMobileDefFile,tamilMobileKnowledgeInput,useCCart), 79 | #'telugu':qlang.QuillLanguage(teluguDefFile,teluguKnowledgeInput,useCCart), 80 | #'teluguMobile':qlang.QuillLanguage(teluguMobileDefFile,teluguMobileKnowledgeInput,useCCart) 81 | } 82 | 83 | self.xlitEngines = { 84 | 'kannada': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Kannada_Xlit.xml'), 85 | 'bengali': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Bengali_Xlit.xml'), 86 | 'gujarati': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Gujarati_Xlit.xml'), 87 | 'hindi': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Hindi_Xlit.xml'), 88 | 'marathi': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Marathi_Xlit.xml'), 89 | 'nepali': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Nepali_Xlit.xml'), 90 | 'punjabi': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Punjabi_Xlit.xml'), 91 | 'telugu': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Telugu_Xlit.xml'), 92 | 'tamil': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Tamil_Xlit.xml'), 93 | 'malayalam': xlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Malayalam_Xlit.xml') 94 | } 95 | 96 | self.clashMaps = { 97 | 'bengali': self.makeClashMap('bengaliClashList.txt'), 98 | 'gujarati': self.makeClashMap('gujaratiClash.txt'), 99 | 'hindi': self.makeClashMap('hindiClash.txt'), 100 | 'kannada': self.makeClashMap('kannadaClash.txt'), 101 | 'tamil': self.makeClashMap('tamilClash.txt'), 102 | 'marathi': self.makeClashMap('marathiClash.txt'), 103 | 'nepali': self.makeClashMap('nepaliClash.txt'), 104 | 'punjabi': self.makeClashMap('punjabiClash.txt'), 105 | 'telugu': self.makeClashMap('teluguClash.txt'), 106 | 'malayalam': self.makeClashMap('malayalamClash.txt') 107 | } 108 | 109 | self.modeTypes = ['predictive','xliterate','itrans'] 110 | 111 | self.inputBuffer ='' 112 | self.outputBuffer='' 113 | 114 | self.scriptCommandRE = r"(? 0 and len(xlitWords[0]) > 0: 305 | xlitWord = xlitWords[0] 306 | 307 | if inString in self.engWords: 308 | if inString in self.clashMaps[lang]: 309 | if xlitWord not in options[:4]: 310 | options = options[:1] + [xlitWord] + options[1:] 311 | else: 312 | if xlitWord in options: 313 | options.remove(xlitWord) 314 | options = [xlitWord] + options 315 | else: 316 | if xlitWord not in options[:4]: 317 | options = options[:3] + [xlitWord] + options[3:] 318 | 319 | response["twords"].append({ 320 | "word": True, 321 | "options": options, 322 | "optmap": dict(map(lambda x: ("".join(x), x), convertedList)) 323 | }) 324 | 325 | return response 326 | 327 | def getCorrections(self, lang, currWord, userInput, pos): 328 | if self.scriptEngines.has_key(lang): 329 | engine = self.scriptEngines[lang] 330 | else: 331 | return ["".join(currWord)] 332 | 333 | return engine.getCorrections(currWord, userInput, pos) 334 | 335 | def getCorrectionsStr(self, lang, currWord, userInput, pos): 336 | if self.scriptEngines.has_key(lang): 337 | engine = self.scriptEngines[lang] 338 | else: 339 | return currWord 340 | 341 | return engine.getCorrectionsStr(currWord, userInput, pos) 342 | 343 | 344 | if __name__ == '__main__': 345 | inString = "raja-deepthi" 346 | 347 | proc = QuillSourceProcessor() 348 | proc.switchLanguage("hindi") 349 | out = proc.processText(inString); 350 | 351 | f = open('out.txt','w') 352 | utext= out.encode('utf-8') 353 | f.write(utext) 354 | f.close() 355 | -------------------------------------------------------------------------------- /QuillTrainer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jul 13, 2016 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | import CART 7 | from QuillLanguage import QuillLanguage 8 | import pickle 9 | 10 | class QuillTrainer(object): 11 | def __init__(self,quillLang): 12 | if isinstance(quillLang,QuillLanguage): 13 | self.language = quillLang 14 | else: 15 | raise Exception,'Invalid parameter. Not of type QuillLanguage' 16 | 17 | def train(self,uWords,scope=4,splRulesFlag=True): 18 | self.language.setKnowledge(self.__buildKeyToCARTMap(uWords,scope,splRulesFlag,"primary"),"primary") 19 | self.language.setKnowledge(self.__buildKeyToCARTMap(uWords,scope,splRulesFlag,"predictive"),"predictive") 20 | return self.language 21 | 22 | def getLanguage(self): 23 | return self.language 24 | 25 | def store(self,fname=None): 26 | if fname == None: 27 | fname = self.language.language+'.qil' 28 | 29 | keyToCARTMap = self.language.keyToCARTMap 30 | keyToCARTMapPrimary = self.language.keyToCARTMapPrimary 31 | f = file(fname,'w') 32 | f.write('\n'%(self.language.language,self.language.script,self.language.default_font,self.language.epsilon.encode('utf-8'))) 33 | for (key,keyCart) in keyToCARTMap.items(): 34 | keyCart.storeCart(f,"predictive") 35 | for (key,keyCart) in keyToCARTMapPrimary.items(): 36 | keyCart.storeCart(f,"primary") 37 | 38 | f.write('') 39 | 40 | f.close() 41 | 42 | def load(self, trainedData): 43 | pass 44 | 45 | def __buildKeyToCARTMap ( self, uWords,scope=4,splRulesFlag=True,type="predictive" ): 46 | contextLen = scope 47 | splRules = [] 48 | if splRulesFlag == True: 49 | splRules = self.language.getSpecialRules(type) 50 | 51 | keyToCARTMap = {} 52 | data={} 53 | for uWord in uWords: 54 | try: 55 | trainPairs = self.language.getTrainingPairs(uWord,type) 56 | except KeyError: 57 | trainPairs = None 58 | 59 | if trainPairs != None: 60 | data1 = CART.CART.prepareTrainingData(trainPairs,contextLen,1) 61 | 62 | for key in data1.keys(): 63 | if data.has_key(key): 64 | data[key].extend( data1[key] ) 65 | else: 66 | data.update({key:data1[key]}) 67 | 68 | if type == "primary": 69 | contextPrefOrder = [0,1,2,-1,3,-2,4,-3-4] 70 | elif type == "predictive": 71 | contextPrefOrder = None 72 | 73 | for key in data.keys(): 74 | keyCart = CART.CART(key,data[key],contextLen, splRules,contextPrefOrder) 75 | keyCart.build() 76 | keyToCARTMap.update( {key:keyCart } ) 77 | 78 | return keyToCARTMap 79 | 80 | def createTrainingData( self, uWords,scope=4,splRulesType='predictive',fname = None ): 81 | contextLen = scope 82 | 83 | splRules = [] 84 | if splRulesType != None: 85 | splRules = self.language.getSpecialRules(splRulesType) 86 | 87 | if fname == None: 88 | fname = self.language.language+'.data' 89 | 90 | f = file(fname,'w') 91 | 92 | f.write('\n'%(self.language.language,self.language.script,self.language.default_font,self.language.epsilon.encode('utf-8'),scope)) 93 | 94 | f.write('\t\n') 95 | for eachRule in splRules: 96 | f.write('\t\t') 97 | f.write(repr(eachRule)) 98 | f.write('') 99 | f.write('\n') 100 | f.write('\t\t\n') 101 | 102 | keyToCARTMap = {} 103 | data={} 104 | for uWord in uWords: 105 | try: 106 | trainPairs = self.language.getTrainingPairs(uWord) 107 | except KeyError: 108 | trainPairs = None 109 | 110 | if trainPairs != None: 111 | data1 = CART.CART.prepareTrainingData(trainPairs,contextLen,1) 112 | 113 | for key in data1.keys(): 114 | if data.has_key(key): 115 | data[key].extend( data1[key] ) 116 | else: 117 | data.update({key:data1[key]}) 118 | 119 | for key in data.keys(): 120 | keyData = data[key]; 121 | f.write('\t\n'%key) 122 | for cWord in keyData: 123 | f.write('\t\t\n') 124 | f.write('\t\t\t%s\n'%cWord.word) 125 | f.write('\t\t\t%s\n'%cWord.focus) 126 | f.write('\t\t\t%s\n'%cWord.classID.encode('utf-8')) 127 | f.write('\t\t\t%s\n'%cWord.count) 128 | f.write('\t\t\n') 129 | f.write('\t\n') 130 | f.write('\n') 131 | f.close() 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### **For all further updates/language fixes please visit the fork by [Cognirel - https://cognireltech.github.io/Quillpad-Server/](https://cognireltech.github.io/Quillpad-Server/). The original maintainers of Quillpad are maintaining the code there.** 2 | 3 | # Quillpad Transliteration Server 4 | 5 | Quillpad is an indic language input technology that revolutionized the Indian language typing scene. It is one of the most popular Indic input technologies with more than a billion words typed on the website alone. 6 | 7 | > Quillpad pioneered the successful use of machine learning for 8 | > building a predictive language input technology. 9 | > Quillpad has been rated as the best by many organisations that have embraced Quillpad. 10 | 11 | ### Version 12 | 1.0.1 13 | 14 | ### Preparation 15 | 16 | There are several archive files in the repository which have to be extracted, these include trained transliteration models and additional text files necessary for the Quillpad Server 17 | 18 | * CherryPy-3.2.2.tar.gz 19 | * EnglishPronouncingTrees.tar.bz2 20 | * IndianPronouncingTrees.tar.bz2 21 | * additional_text_files.zip 22 | * bengali.tar.bz2 23 | * gujarati.tar.bz2 24 | * hindi.tar.bz2 25 | * kannada.tar.bz2 26 | * malayalam.tar.bz2 27 | * marathi.tar.bz2 28 | * nepali.tar.bz2 29 | * punjabi.tar.bz2 30 | * tamil.tar.bz2 31 | * telugu.tar.bz2 32 | * unique_word_files.zip 33 | 34 | Kindly extract all of these archives into the repository folder itself. 35 | 36 | ### Installation 37 | 38 | Quillpad Server requires [Python 2.7](https://www.python.org/downloads/) to run. 39 | 40 | First, we need to compile the Quillpad Model loader that will be used to load the trained transliteration models 41 | 42 | ```sh 43 | $ cd Python\ Cart/python 44 | $ python setup.py build_ext --inplace 45 | $ cp QuillCCart.so ../../ 46 | $ cd ../../ 47 | ``` 48 | 49 | Now, the Quillpad Server is ready to run 50 | 51 | ```sh 52 | $ python startquill_cherry.py 53 | ``` 54 | 55 | ### Additional Information 56 | 57 | * Quillpad runs on port number 8090 (Additional configuration parameters are in *quill_cherry8088.conf*) 58 | 59 | * *processWordJSON* and *processWord* are the API endpoints over which the transliteration server can be accessed. 60 | > Example: 61 | 62 | * localhost:8090/processWordJSON?inString=hello&lang=hindi 63 | * localhost:8090/processWordJSON?inString=hello&lang=kannada 64 | 65 | ### Development 66 | 67 | Additional Quillpad Documentation coming soon. Thanks for your patience. 68 | -------------------------------------------------------------------------------- /RingBuffers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Nov 22 2012 3 | # @Author : Sharath Puranik 4 | # @Version : 1 5 | 6 | class RingBuffer: 7 | def __init__(self,size_max, count): 8 | self.max = size_max 9 | self.data = [] 10 | self.cur = 0 11 | for i in range(self.max): 12 | self.append(count) 13 | def append(self,x): 14 | """append an element at the end of the buffer""" 15 | self.data.append(x) 16 | self.cur += 1 17 | if len(self.data) == self.max: 18 | self.cur=0 19 | self.__class__ = RingBufferFull 20 | def get(self): 21 | """ return a list of elements from the oldest to the newest""" 22 | return self.data 23 | def get_curr(self): 24 | return (self.cur-1) % self.max 25 | 26 | class RingBufferFull: 27 | def __init__(self,n): 28 | raise "use RingBuffer to create objects, this class will automatically be alloted as the buffer is filled" 29 | def append(self,x): 30 | self.data[self.cur]=x 31 | self.cur=(self.cur+1) % self.max 32 | def get(self): 33 | return self.data[self.cur:]+self.data[:self.cur] 34 | def get_curr(self): 35 | return (self.cur-1) % self.max 36 | -------------------------------------------------------------------------------- /Tamil_Latha.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 'zwnj' 30 | 31 | 32 | 33 | 'zwj' 34 | 35 | 36 | 37 | '.n','anusvara' 38 | 39 | 40 | 41 | 'H','visarga' 42 | 43 | 44 | 45 | 46 | 'a','vowel','short' 47 | 48 | 49 | 50 | 'A','vowel','long' 51 | 52 | 53 | 54 | 'i','vowel','short' 55 | 56 | 57 | 58 | 59 | 'I','vowel','long' 60 | 61 | 62 | 63 | 64 | 'u','vowel','short' 65 | 66 | 67 | 68 | 69 | 'U','vowel','long' 70 | 71 | 72 | 73 | 74 | 'e','vowel','short' 75 | 76 | 77 | 78 | 79 | 'E','vowel','long' 80 | 81 | 82 | 83 | 84 | 'ai','vowel','long' 85 | 86 | 87 | 88 | 'o','vowel','short' 89 | 90 | 91 | 92 | 'O','vowel','long' 93 | 94 | 95 | 96 | 'au','vowel','long' 97 | 98 | 99 | 100 | 'k','cons','cons1' 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | '~N','cons','nasal' 109 | 110 | 111 | 112 | 113 | 114 | 'ch','cons','cons2' 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 'j','cons','cons2' 127 | 128 | 129 | 130 | 131 | '~n','cons','cons2','nasal' 132 | 133 | 134 | 135 | 136 | 137 | 'T','cons','cons3' 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 'N','cons','cons3','nasal' 146 | 147 | 148 | 149 | 150 | 151 | 't','cons','cons4' 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 'n','cons','cons4','nasal' 161 | 162 | 163 | 164 | 165 | 'Nn','cons','nasal' 166 | 167 | 168 | 169 | 170 | 171 | 'p','cons','cons5','unaspirated' 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 'm','cons','cons5','nasal' 184 | 185 | 186 | 187 | 188 | 'y','cons','semi-vowel' 189 | 190 | 191 | 192 | 193 | 194 | 'r','cons','semi-vowel' 195 | 196 | 197 | 198 | 199 | 'R','cons','semi-vowel' 200 | 201 | 202 | 203 | 204 | 205 | 'l','cons','semi-vowel' 206 | 207 | 208 | 209 | 210 | 'L','cons','semi-vowel' 211 | 212 | 213 | 214 | 215 | 'zh','cons','semi-vowel' 216 | 217 | 218 | 219 | 220 | 'v','cons','semi-vowel' 221 | 222 | 223 | 224 | 225 | 'sh','cons','cons6' 226 | 227 | 228 | 229 | 230 | 's','cons','cons6' 231 | 232 | 233 | 234 | 235 | 'h','cons','cons6' 236 | 237 | 238 | 239 | 240 | 'A0','mathra','long' 241 | 242 | 243 | 244 | 'i0','mathra','short' 245 | 246 | 247 | 248 | 249 | 250 | 'I0','mathra','long' 251 | 252 | 253 | 254 | 'u0','mathra','short' 255 | 256 | 257 | 258 | 259 | 'U0','mathra','long' 260 | 261 | 262 | 263 | 'e0','mathra','short' 264 | 265 | 266 | 267 | 'E0','mathra','long' 268 | 269 | 270 | 271 | 'ai0','mathra','long' 272 | 273 | 274 | 275 | 'o0','mathra','short' 276 | 277 | 278 | 279 | 'O0','mathra','long' 280 | 281 | 282 | 283 | 'au0','mathra','long' 284 | 285 | 286 | 287 | 'pulli' 288 | 289 | 290 | 291 | '1','digit' 292 | 293 | 294 | 295 | '2','digit' 296 | 297 | 298 | 299 | '3','digit' 300 | 301 | 302 | 303 | '4','digit' 304 | 305 | 306 | 307 | '5','digit' 308 | 309 | 310 | 311 | '6','digit' 312 | 313 | 314 | 315 | '7','digit' 316 | 317 | 318 | 319 | '8','digit' 320 | 321 | 322 | 323 | '9','digit' 324 | 325 | 326 | 327 | '10','digit' 328 | 329 | 330 | 331 | '100','digit' 332 | 333 | 334 | 335 | '1000','digit' 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /Tamil_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 'aa','vowel' 17 | 18 | 19 | 20 | 21 | 22 | 'ae','vowel' 23 | 24 | 25 | 26 | 27 | 28 | 29 | 'ao','vowel' 30 | 31 | 32 | 33 | 34 | 35 | 36 | 'ih','vowel','short' 37 | 38 | 39 | 40 | 41 | 42 | 43 | 'iy','vowel' 44 | 45 | 46 | 47 | 48 | 49 | 50 | 'uh','vowel' 51 | 52 | 53 | 54 | 55 | 56 | 57 | 'uw','vowel' 58 | 59 | 60 | 61 | 62 | 63 | 64 | 'eh','vowel','short' 65 | 66 | 67 | 68 | 69 | 70 | 71 | 'ey','vowel' 72 | 73 | 74 | 75 | 76 | 77 | 78 | 'ay','vowel' 79 | 80 | 81 | 82 | 83 | 84 | 'oy','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ow','vowel','short' 92 | 93 | 94 | 95 | 96 | 97 | 98 | 'ow1','vowel' 99 | 100 | 101 | 102 | 103 | 104 | 105 | 'aw','vowel' 106 | 107 | 108 | 109 | 110 | 111 | 112 | 117 | 118 | 119 | 'k','cons','ngroup' 120 | 121 | 122 | 123 | 'g','cons','ngroup' 124 | 125 | 126 | 127 | 'ng','cons' 128 | 129 | 130 | 131 | 132 | 133 | 'ch','cons','ngroup' 134 | 135 | 136 | 137 | 'jh','cons','ngroup' 138 | 139 | 140 | 141 | 142 | 't','cons','ngroup' 143 | 144 | 145 | 146 | 'd','cons','ngroup' 147 | 148 | 149 | 150 | 151 | 'th','cons','ngroup' 152 | 153 | 154 | 155 | 'dh','cons','ngroup' 156 | 157 | 158 | 159 | 'n','cons' 160 | 161 | 162 | 163 | 164 | 165 | 'p','cons','mgroup' 166 | 167 | 168 | 169 | 'f','cons','mgroup' 170 | 171 | 172 | 173 | 'b','cons','mgroup' 174 | 175 | 176 | 177 | 'm','cons' 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 'y','cons' 186 | 187 | 188 | 189 | 'r','cons' 190 | 191 | 192 | 193 | 'l','cons' 194 | 195 | 196 | 197 | 'v','cons' 198 | 199 | 200 | 201 | 'w','cons' 202 | 203 | 204 | 205 | 206 | 'sh','cons' 207 | 208 | 209 | 210 | 'zh','cons' 211 | 212 | 213 | 214 | 's','cons' 215 | 216 | 217 | 218 | 'z','cons' 219 | 220 | 221 | 222 | 'hh','cons' 223 | 224 | 225 | 226 | 227 | '-' 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /Telugu_Xlit.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 'ah','vowel','short' 10 | 11 | 12 | 13 | 14 | 15 | 16 | 'aa','vowel' 17 | 18 | 19 | 20 | 21 | 22 | 'ae','vowel' 23 | 24 | 25 | 26 | 27 | 28 | 29 | 'ao','vowel' 30 | 31 | 32 | 33 | 34 | 35 | 36 | 'ih','vowel','short' 37 | 38 | 39 | 40 | 41 | 42 | 43 | 'iy','vowel' 44 | 45 | 46 | 47 | 48 | 49 | 50 | 'uh','vowel' 51 | 52 | 53 | 54 | 55 | 56 | 57 | 'uw','vowel' 58 | 59 | 60 | 61 | 62 | 63 | 64 | 'eh','vowel','short' 65 | 66 | 67 | 68 | 69 | 70 | 71 | 'ey','vowel' 72 | 73 | 74 | 75 | 76 | 77 | 78 | 'ay','vowel' 79 | 80 | 81 | 82 | 83 | 84 | 'oy','vowel' 85 | 86 | 87 | 88 | 89 | 90 | 91 | 'ow','vowel','short' 92 | 93 | 94 | 95 | 96 | 97 | 98 | 'ow1','vowel' 99 | 100 | 101 | 102 | 103 | 104 | 105 | 'aw','vowel' 106 | 107 | 108 | 109 | 110 | 111 | 112 | 117 | 118 | 119 | 'k','cons','ngroup' 120 | 121 | 122 | 123 | 'g','cons','ngroup' 124 | 125 | 126 | 127 | 'ng','cons' 128 | 129 | 130 | 131 | 132 | 133 | 'ch','cons','ngroup' 134 | 135 | 136 | 137 | 'jh','cons','ngroup' 138 | 139 | 140 | 141 | 142 | 't','cons','ngroup' 143 | 144 | 145 | 146 | 'd','cons','ngroup' 147 | 148 | 149 | 150 | 151 | 'th','cons','ngroup' 152 | 153 | 154 | 155 | 156 | 'dh','cons','ngroup' 157 | 158 | 159 | 160 | 'n','cons' 161 | 162 | 163 | 164 | 165 | 166 | 'p','cons','mgroup' 167 | 168 | 169 | 170 | 'f','cons','mgroup' 171 | 172 | 173 | 174 | 'b','cons','mgroup' 175 | 176 | 177 | 178 | 'm','cons' 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 'y','cons' 187 | 188 | 189 | 190 | 'r','cons' 191 | 192 | 193 | 194 | 'l','cons' 195 | 196 | 197 | 198 | 'v','cons' 199 | 200 | 201 | 202 | 'w','cons' 203 | 204 | 205 | 206 | 207 | 'sh','cons' 208 | 209 | 210 | 211 | 'zh','cons' 212 | 213 | 214 | 215 | 's','cons' 216 | 217 | 218 | 219 | 'z','cons' 220 | 221 | 222 | 223 | 'hh','cons' 224 | 225 | 226 | 227 | 228 | '-' 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /additional_text_files.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/additional_text_files.zip -------------------------------------------------------------------------------- /bengali.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/bengali.tar.bz2 -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash 4 | # @Version : 1 5 | 6 | langMap = { 7 | "bengali": ("Bengali_New.xml", "unique_bengali_words.txt"), 8 | "gujarati": ("Gujarati_New.xml", "unique_gujarati_words.txt"), 9 | "hindi": ("Hindi_New.xml", "unique_hindi_words.txt"), 10 | "kannada": ("Kannada_New.xml", "unique_kannada_words.txt"), 11 | "telugu": ("Telugu_New.xml", "unique_telugu_words.txt"), 12 | "tamil": ("Tamil_New.xml", "unique_tamil_words.txt"), 13 | "malayalam": ("Malayalam_New.xml", "unique_malayalam_words.txt"), 14 | "marathi": ("Marathi_New.xml", "unique_marathi_words.txt"), 15 | "nepali": ("Nepali_New.xml", "unique_nepali_words.txt"), 16 | "punjabi": ("Punjabi_New.xml", "unique_punjabi_words.txt") 17 | } 18 | -------------------------------------------------------------------------------- /const.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | class _const(object): 7 | class ConstError(TypeError): pass 8 | def __setattr__(self, name, value): 9 | if name in self.__dict__: 10 | raise self.ConstError, "Can't rebind const(%s)" % name 11 | self.__dict__[name] = value 12 | def __delattr__(self, name): 13 | if name in self.__dict__: 14 | raise self.ConstError, "Can't unbind const(%s)" % name 15 | raise NameError, name 16 | 17 | import sys 18 | sys.modules[__name__] = _const( ) 19 | import const 20 | const.optionSeperator = '---!!---' 21 | const.langWordMark = '--WORD--' 22 | const.gendir = "sc" 23 | 24 | const.SMTP_SERVER_URL = "localhost" 25 | const.SMTP_LOGIN_USER = "" 26 | const.SMTP_LOGIN_PASSWD = "" -------------------------------------------------------------------------------- /gujarati.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/gujarati.tar.bz2 -------------------------------------------------------------------------------- /hellocherry.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Jan 25, 2013 3 | # @Author : Ram Prakash, Sharath Puranik 4 | # @Version : 1 5 | 6 | import cherrypy 7 | 8 | class HelloWorld: 9 | def index(self): 10 | return "Hello World!" 11 | index.exposed = True 12 | 13 | cherrypy.root = HelloWorld() 14 | cherrypy.server.start() 15 | -------------------------------------------------------------------------------- /hindi.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/hindi.tar.bz2 -------------------------------------------------------------------------------- /kannada.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/kannada.tar.bz2 -------------------------------------------------------------------------------- /logsystem.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,datacollector,mailcollector 3 | 4 | [handlers] 5 | keys=default,datahandler,mailhandler 6 | 7 | [formatters] 8 | keys=default,onlymessage,mailformat 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers=default 13 | 14 | [logger_datacollector] 15 | level=INFO 16 | handlers=datahandler 17 | propagate=0 18 | qualname=data.collector 19 | 20 | [logger_mailcollector] 21 | leve=INFO 22 | handlers=mailhandler 23 | propagate=0 24 | qualname=mail.collector 25 | 26 | 27 | [handler_default] 28 | class=FileHandler 29 | level=INFO 30 | formatter=default 31 | args=('logs/quill.log', 'a') 32 | 33 | [handler_datahandler] 34 | class=handlers.TimedRotatingFileHandler 35 | level=INFO 36 | formatter=onlymessage 37 | args=('logs/data.log', 'midnight') 38 | 39 | [handler_mailhandler] 40 | class=handlers.TimedRotatingFileHandler 41 | level=INFO 42 | formatter=mailformat 43 | args=('logs/mail.log', 'midnight') 44 | 45 | 46 | [formatter_default] 47 | format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s 48 | datefmt=%m-%d %H:%M 49 | 50 | [formatter_onlymessage] 51 | format=%(message)s 52 | 53 | [formatter_mailformat] 54 | format=%(asctime)-8s %(message)s 55 | datefmt=%m-%d %H:%M 56 | 57 | -------------------------------------------------------------------------------- /malayalam.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/malayalam.tar.bz2 -------------------------------------------------------------------------------- /marathi.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/marathi.tar.bz2 -------------------------------------------------------------------------------- /mysqlquill.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Nov 22 2012 3 | # @Author : Ram Prakash 4 | # @Version : 1 5 | 6 | import cherrypy 7 | import MySQLdb 8 | 9 | def connect(thread_index): 10 | cherrypy.thread_data.db = MySQLdb.connect('127.0.0.1', 'quill', 'quill','quill') 11 | 12 | cherrypy.server.on_start_thread_list.append(connect) 13 | 14 | class Root: 15 | def index(self): 16 | c = cherrypy.thread_data.db.cursor() 17 | c.execute("select count(*) from error_log") 18 | res = c.fetchone() 19 | c.close() 20 | return "Hello, you have %d records in your table" % res[0] 21 | index.exposed = True 22 | 23 | cherrypy.root = Root() 24 | cherrypy.config.update(file='mysqlcherry.conf') 25 | cherrypy.config.update({'thread_pool':10}) 26 | cherrypy.server.start() 27 | -------------------------------------------------------------------------------- /nepali.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/nepali.tar.bz2 -------------------------------------------------------------------------------- /primaryHelper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Nov 22 2012 3 | # @Author : Ram Prakash 4 | # @Version : 1 5 | 6 | # This module can be run to generate the keyboard maps and itrans help 7 | # It can also be imported to get the dumped keyboard maps in string form 8 | 9 | import json 10 | import QuillPrimary 11 | import os 12 | import shutil 13 | import codecs 14 | import config 15 | import string 16 | try: 17 | import nevow.tags as T 18 | import nevow.flat 19 | import tidy 20 | except ImportError, e: 21 | print "Warning: Nevow not found... continuing" 22 | 23 | loadedMaps = {} 24 | loadedUniqueWords = {} 25 | 26 | def prettyXHTML(uglyXHTML): 27 | options = dict(input_xml=True, 28 | output_xhtml=True, 29 | add_xml_decl=False, 30 | doctype='omit', 31 | indent='auto', 32 | tidy_mark=False) 33 | return str(tidy.parseString(uglyXHTML, **options)) 34 | 35 | def genHelp_(lang, q, outfile): 36 | print "Attempting to generate", outfile, "... ", 37 | help,examples = q.getSchemeHelp() 38 | 39 | def exLister(context, data): 40 | for input, output, note in data: 41 | context.tag [ T.tr [ 42 | T.td (class_ = "input") [input.encode('utf-8')], 43 | T.td (class_ = "output") [output.encode('utf-8')], 44 | T.td (class_ = "note") [note.encode('utf-8')] 45 | ] 46 | ] 47 | return context.tag 48 | 49 | def processEx(e): 50 | if len(e) < 1: 51 | return 52 | 53 | return "Examples", T.ul [ 54 | map(lambda x: T.li [x.encode('utf-8')], e) ] 55 | 56 | def processTR(l): 57 | if len(l) < 1: 58 | return 59 | 60 | return T.tr [ 61 | map(lambda x: T.td [ 62 | x[0].encode('utf-8'), T.em [ x[1].encode('utf-8') ] 63 | ], l) ] 64 | 65 | def processTable(t): 66 | if len(t) < 1: 67 | return 68 | 69 | # If list of lists 70 | if type(t[0]) == type([]): 71 | return T.table(cellspacing="3", cellpadding="4") [ map(lambda x: processTR(x), t) ] 72 | else: 73 | return T.table(cellspacing="3", cellpadding="4") [ processTR(t) ] 74 | 75 | def helpLister(context, data): 76 | ret = [] 77 | for label, noteex, eachList in help: 78 | ret.append(T.h4 [ label.encode('utf-8')] ) 79 | 80 | for i in noteex: 81 | if type(i) == type("") or type(i) is unicode: 82 | ret.append(T.p(class_="note") [ i.encode('utf-8') ]) 83 | else: 84 | ret.append(processEx(i)) 85 | 86 | ret.append(processTable(eachList)) 87 | 88 | return ret 89 | 90 | t = T.div (id="help") [ 91 | T.p (class_ = "heading") [ "QuillPad allows users to type freely without having to follow any strict typing rules. While QuillPad predicts most words correctly, there may be a few cases where the desired word may not appear in the predicted options. Such words can be composed by entering the words in an ITRANS like scheme." ], 92 | T.p (style = "font-size: 12px") [ "The following examples demonstrate how to write words using the ITRANS like scheme" ], 93 | T.table (cellspacing="3", cellpadding="4") [ 94 | T.thead [ T.th ["Input"], T.th ["Output"], T.th ], 95 | T.tbody(render=exLister, data=examples) 96 | ], 97 | T.h3 ["Scheme Tables"], 98 | T.div(render=helpLister, data=help) 99 | ] 100 | 101 | ts = open('help_template.html').read() 102 | f = open(outfile, "w") 103 | lang = lang[0].upper() + lang[1:] 104 | f.write(string.Template(ts).substitute(lang = lang, content = prettyXHTML(nevow.flat.flatten(t)))) 105 | f.close() 106 | print "done" 107 | 108 | def genHelp(lang, q, outfile): 109 | print "Attempting to generate", outfile, "... ", 110 | help,examples = q.getSchemeHelp() 111 | 112 | s = '
\n' 113 | s += '

QuillPad allows users to type freely without having to follow any strict typing rules. While QuillPad predicts most words correctly, there may be a few cases where the desired word may not appear in the predicted options. Such words can be composed by entering the words in an ITRANS like scheme.

\n' 114 | s += '

The following examples demonstrate how to write words using the ITRANS like scheme

\n' 115 | s += '\n' 116 | s += '\n\n' 117 | for (input,output,note) in examples: 118 | outLine = ''%(input,output.encode('utf-8'),note.encode('utf-8')) 119 | s += '' + outLine + '' 120 | s += "
InputOutput
%s %s %s
\n" 121 | 122 | def processTR(l): 123 | if len(l) < 1: 124 | return 125 | 126 | return "\n%s\n" % (reduce(lambda x, y: x + '' + 127 | y[0].encode('utf-8') + " (" + 128 | y[1].encode('utf-8') + ")", l, "")) 129 | 130 | def processTable(t): 131 | if len(t) < 1: 132 | return 133 | 134 | out = '\n' 135 | # If list of lists 136 | if type(t[0]) == type([]): 137 | for i in t: 138 | out += processTR(i) 139 | else: 140 | out += processTR(t) 141 | return out + '
\n' 142 | 143 | def processEx(e): 144 | if len(e) < 1: 145 | return 146 | 147 | return "Examples\n" % (reduce(lambda x, y: x + "
  • " + 148 | y.encode('utf-8') + "
  • ", e, "")) 149 | 150 | s += '

    Scheme Table

    \n' 151 | for (label,noteex,eachList) in help: 152 | s += '

    %s

    \n'%label.encode('utf-8') 153 | for i in noteex: 154 | if type(i) == type("") or type(i) is unicode: 155 | s += '

    %s

    ' % (i.encode('utf-8')) 156 | else: 157 | s += processEx(i) 158 | 159 | s += processTable(eachList) 160 | 161 | s += "
    \n" 162 | print "done" 163 | 164 | ts = open('help_template.html').read() 165 | f = open(outfile, "w") 166 | lang = lang[0].upper() + lang[1:] 167 | f.write(string.Template(ts).substitute(lang = lang, content = s)) 168 | f.close() 169 | 170 | def processLang(lang, q, outfile): 171 | print "Attempting to generate", outfile, "... ", 172 | f = open(outfile, "w") 173 | f.write(lang + "_interfacemap = " + 174 | json.encode(q.virtualInterfaceMap).encode('utf-8') + ";\n") 175 | f.write(lang + "_keymap = " + 176 | json.encode({"map": q.getVirtualKB()}).encode('utf-8') + ";\n") 177 | f.write(lang + "_pattern = /" + 178 | repr(q.dumpAksharaPattern())[2:-1] + "/g ;\n") 179 | f.write(lang + "_zwnjmap = " + json.encode({ 180 | "zwjSignificant": q.zwjSignificant, 181 | "zwnjSignificant": q.zwnjSignificant, 182 | "zwjCode": repr(q.zwjCode)[2:-1], 183 | "zwnjCode": repr(q.zwnjCode)[2:-1], 184 | "halanth": repr(q.halanth)[2:-1], 185 | "nukta": repr(q.nukta)[2:-1] 186 | }) + ";") 187 | f.close() 188 | 189 | print "done" 190 | 191 | def getLangFile(lang): 192 | # Check if lang is valid, otherwise return empty string 193 | if loadedMaps.has_key(lang): 194 | return loadedMaps[lang] 195 | else: 196 | return "" 197 | 198 | def isDictWord(lang, word): 199 | try: 200 | return loadedUniqueWords[lang].has_key(word) 201 | except KeyError, e: 202 | return False 203 | 204 | def init(): 205 | """ Load the dump files into memory """ 206 | for i in config.langMap: 207 | try: 208 | #f = codecs.open("dump/" + i + "_map.js", "r", "utf-8") 209 | #loadedMaps[i] = f.read() 210 | #f.close() 211 | 212 | # Load the unique_lang_words.txt files as well 213 | loadedUniqueWords[i] = dict([(line.split('\t')[0].decode('utf-8'), 1) for line in open(config.langMap[i][1],'r').readlines()]) 214 | 215 | except IOError, e: 216 | print "Failed to load keyboard map file for %s. Exception %s" % (i, e) 217 | else: 218 | print "Loaded keyboard map for", i 219 | 220 | # Load the hindi dictionary 221 | d = loadedUniqueWords['hindi'] 222 | print "Loading hindi dictionary...", 223 | for line in open('HindiDictionary.txt').readlines(): 224 | d[line.strip().decode('utf-8')] = 1 225 | print "Done (%d words)" % (len(d.keys()),) 226 | 227 | init() 228 | -------------------------------------------------------------------------------- /punjabi.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/punjabi.tar.bz2 -------------------------------------------------------------------------------- /quill_cherry8088.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | server.socket_host = "0.0.0.0" 3 | server.socket_port = 8090 4 | server.thread_pool= 10 5 | environment = 'production' 6 | server.show_tracebacks = False 7 | request.show_tracebacks = False 8 | #decoding_filter.on = True 9 | #decoding_filter.encoding = 'utf-8' 10 | log.screen = True 11 | encoding_filter.on = True 12 | encoding_filter.encoding = 'utf-8' 13 | engine.autoreload.on = False 14 | -------------------------------------------------------------------------------- /quilljson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : Nov 22 2012 3 | # @Author : Ram Prakash 4 | # @Version : 1 5 | 6 | def dictToJSON(d): 7 | str = "{" 8 | for i in d: 9 | str += jsonmap[type(i)](i) + ": " 10 | str += jsonmap[type(d[i])](d[i]) + ", " 11 | if len(d.keys()) > 0: 12 | str = str[:-2] 13 | str += "}" 14 | return str 15 | 16 | def boolToJSON(b): 17 | if b: 18 | return "true" 19 | else: 20 | return "false" 21 | 22 | def listToJSON(b): 23 | str = "[" 24 | for i in b: 25 | str += jsonmap[type(i)](i) + ", " 26 | if len(b) > 0: 27 | str = str[:-2] 28 | str += "]" 29 | return str 30 | 31 | jsonmap = { 32 | type([]): listToJSON, 33 | type(()): listToJSON, 34 | type({}): dictToJSON, 35 | type(""): lambda x: '"%s"' % (x), 36 | type(0): lambda x: str(x), 37 | type(u""): lambda x: '"' + x + '"', 38 | type(0.1): lambda x: str(x), 39 | type(True): boolToJSON, 40 | type(None): lambda x: "null" 41 | } 42 | 43 | def encode(dict): 44 | if type(dict) != type({}): 45 | raise Exception("Expected a dictionary") 46 | 47 | return dictToJSON(dict) 48 | -------------------------------------------------------------------------------- /startquill_manual.py: -------------------------------------------------------------------------------- 1 | from aifc import Error 2 | import cherrypy 3 | from QuillManual import QuillManual 4 | 5 | class QuillManualCherry: 6 | 7 | @cherrypy.expose 8 | def primaryToUnicodeCherry(self, literal ) : 9 | try: 10 | print "Invoking primaryToUnicode on QuillManual..." 11 | return literal + "\n" + self.quillManual.primaryToUnicode( literal ) 12 | except Exception: 13 | print Exception 14 | return "-------------"; 15 | 16 | @cherrypy.expose 17 | def unicodeToPrimaryCherry(self, uStr ) : 18 | try: 19 | return uStr.decode('utf-8') + "\n" + self.quillManual.unicodeToPrimary( uStr.decode('utf-8') ) 20 | except Exception: 21 | print Exception 22 | return "-------------"; 23 | 24 | @cherrypy.expose 25 | def unicodeToHelperStrCherry(self, uStr ) : 26 | try: 27 | return uStr.decode('utf-8') + "\n" + self.quillManual.unicodeToHelperStr( uStr.decode('utf-8') ) 28 | except Exception: 29 | print Exception 30 | return "-------------"; 31 | 32 | @cherrypy.expose 33 | def getOptionsAtCherry(self, currHelper, currUStr, pos ) : 34 | try: 35 | return currUStr.decode('utf-8') + "\n" + "\n".join(self.quillManual.getOptionsAt( currHelper, currUStr.decode('utf-8'), int(pos) )) 36 | except Exception: 37 | print Exception 38 | return "-------------"; 39 | 40 | @cherrypy.expose 41 | def getInsertCorrectionsCherry(self, currHelper, currUStr, pos, delta ) : 42 | try: 43 | corrections = currUStr.decode('utf-8') + "\n" + "\n".join(self.quillManual.getInsertCorrections( currHelper, currUStr.decode('utf-8'), int(pos), delta )); 44 | return corrections 45 | except Exception: 46 | print Exception 47 | return "-------------"; 48 | 49 | @cherrypy.expose 50 | def getDeleteCorrectionsCherry(self, currHelper, currUStr, pos, delLen ) : 51 | try: 52 | return currUStr.decode('utf-8') + "\n" + "\n".join(self.quillManual.getDeleteCorrections( currHelper, currUStr.decode('utf-8'), int(pos), int(delLen) )) 53 | except Exception: 54 | print Exception 55 | return "-------------"; 56 | 57 | def __init__(self): 58 | self.quillManual = QuillManual() 59 | self.quillManual.loadPrimaryDef() 60 | 61 | def main() : 62 | cherrypy.root = QuillManualCherry() 63 | cherrypy.config.update( file='quill_manual.conf' ) 64 | cherrypy.server.start() 65 | 66 | if __name__ == '__main__' : 67 | main() 68 | -------------------------------------------------------------------------------- /tamil.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/tamil.tar.bz2 -------------------------------------------------------------------------------- /telugu.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/telugu.tar.bz2 -------------------------------------------------------------------------------- /unique_word_files.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamtachyon/Quillpad-Server/938671f65cdc02004c4bcf934e7085e1e0e13f72/unique_word_files.zip -------------------------------------------------------------------------------- /wordCounter.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, render_template, redirect, url_for 2 | import itertools, threading, json 3 | import time 4 | 5 | from RingBuffers import RingBuffer, RingBufferFull 6 | 7 | app = Flask(__name__) 8 | 9 | lock = threading.Lock() 10 | prevTime = time.time() 11 | wordCount = itertools.count(int(open('wordcount').read())) 12 | timerRingBuffer = RingBuffer(10, wordCount.next()) 13 | 14 | @app.route("/processInput") 15 | def processInput(): 16 | action = request.args.get('action') 17 | if action <> 'addWord': 18 | return 19 | count = wordCount.next() 20 | 21 | if count % 100 == 0: 22 | updateCount(count) 23 | #print 'wordcount is: ', wordCount 24 | 25 | currTime = int(time.time()) 26 | global prevTime 27 | global timerRingBuffer 28 | if currTime <> prevTime: 29 | timerRingBuffer.append(wordCount) 30 | prevTime = currTime 31 | else: 32 | timerRingBuffer.data[timerRingBuffer.get_curr()] = count 33 | 34 | return "" 35 | #print 'buffer is: ', timerRingBuffer.get() 36 | 37 | @app.route("/processWordCounts") 38 | def processWordCounts(): 39 | print 'buffer is: ', timerRingBuffer.get() 40 | print 'response is', json.listToJSON(timerRingBuffer.get()) 41 | print 42 | return json.listToJSON(timerRingBuffer.get()) 43 | 44 | def updateCount(count): 45 | lock.acquire() 46 | open('wordcount', 'w').write(str(count)) 47 | lock.release() 48 | 49 | 50 | if __name__ == "__main__": 51 | app.run(debug=True) 52 | -------------------------------------------------------------------------------- /xlitGen.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import QuillEngXlit 3 | 4 | def main() : 5 | if len(sys.argv) < 4 : 6 | print "Usage : xlitGen.py " 7 | return 8 | xlitEngine = QuillEngXlit.QuillEngXliterator('EnglishPronouncingTrees','IndianPronouncingTrees','Kannada_Xlit.xml') 9 | lines = open(sys.argv[1],'rb').readlines() 10 | xlits = {} 11 | xlitMapping = {} 12 | for line in lines : 13 | (word,freq) = line.strip().split() 14 | xlitWords = xlitEngine.xliterate(word) 15 | if len(xlitWords) == 0 : 16 | continue 17 | if xlitWords[0] not in xlits : 18 | xlits[xlitWords[0]] = 0 19 | xlits[xlitWords[0]] = xlits[xlitWords[0]] + int(freq) 20 | if xlitWords[0] not in xlitMapping : 21 | xlitMapping[xlitWords[0]] = [] 22 | xlitMapping[xlitWords[0]].append( word ) 23 | 24 | o1 = open(sys.argv[2],'wb') 25 | for (xlit,freq) in xlits.items() : 26 | o1.write( xlit.encode('utf-8') + '\t' + str(freq) + '\r\n' ) 27 | o1.close() 28 | 29 | o2 = open(sys.argv[3],'wb') 30 | for (xlit,words) in xlitMapping.items() : 31 | o2.write( xlit.encode('utf-8') + '\t' + '\t'.join(words) + '\r\n' ) 32 | o2.close() 33 | 34 | if __name__ == '__main__': 35 | main() 36 | --------------------------------------------------------------------------------