├── project ├── plugins.sbt ├── build.properties └── assembly.sbt ├── gui-config.txt ├── src ├── main │ └── scala │ │ └── jiayiwei │ │ ├── essay │ │ ├── ProjectSettings.scala │ │ ├── CommonPhrases.scala │ │ ├── Stemmer.scala │ │ ├── FSA.scala │ │ ├── LoadFile.scala │ │ ├── EssayParser.scala │ │ ├── EssayStat.scala │ │ └── PorterStemmerScala.scala │ │ └── essay_ui │ │ ├── package.scala │ │ ├── RxJComponent.scala │ │ ├── UIMain.scala │ │ ├── StatManager.scala │ │ ├── MarkerTextPane.scala │ │ └── MainFrame.scala └── test │ └── scala │ ├── StemmerTest.scala │ ├── FSATest.scala │ └── EssayParserTest.scala ├── simple-style.css ├── standard-data ├── trivial-words.txt ├── common-phrases.txt └── irregular-words.txt ├── LICENSE.txt ├── index_zh.html ├── README.md ├── index.html └── commonPhrases_irreg.txt /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.8 -------------------------------------------------------------------------------- /project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") -------------------------------------------------------------------------------- /gui-config.txt: -------------------------------------------------------------------------------- 1 | font-name: LucidaGrande 2 | font-size: 15 3 | editor-dimension: 800,600 -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/ProjectSettings.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | 4 | object ProjectSettings { 5 | val projectName = "textVary" 6 | } 7 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/CommonPhrases.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | /** 4 | * Created by weijiayi on 9/8/16. 5 | */ 6 | object CommonPhrases { 7 | def main(args: Array[String]) { 8 | val lines = LoadFile.load("commonPhrases_irreg.txt") 9 | for(line <- lines if line.nonEmpty && !line.contains("…") && !line.contains(".")){ 10 | println(line.trim) 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/package.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei 2 | 3 | import java.awt.event.{ActionEvent, ActionListener} 4 | 5 | /** 6 | * Created by weijiayi on 9/9/16. 7 | */ 8 | package object essay_ui { 9 | type CallBack = () => Unit 10 | 11 | def callback(action: =>Unit) = () => action 12 | 13 | def mkAction(action: =>Unit) = new ActionListener { 14 | override def actionPerformed(e: ActionEvent): Unit = action 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/RxJComponent.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay_ui 2 | 3 | import javax.swing.{JButton, JLabel} 4 | 5 | import rx.{Ctx, Rx} 6 | 7 | object RxJComponent { 8 | class RxJLabel(text: Rx[String], implicit val ctx: Ctx.Owner) extends JLabel{ 9 | text.trigger{ 10 | setText(text.now) 11 | } 12 | } 13 | 14 | class RxButton(text: Rx[String], implicit val ctx: Ctx.Owner) extends JButton{ 15 | text.trigger{ 16 | setText(text.now) 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/UIMain.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay_ui 2 | 3 | import javax.swing.{JLabel, JFrame} 4 | 5 | import jiayiwei.essay.{GUIConfig, EssayStat} 6 | 7 | /** 8 | * Created by weijiayi on 9/9/16. 9 | */ 10 | object UIMain { 11 | 12 | def main(args: Array[String]) { 13 | MainFrame.mkMainFrameByConfigFile("").start() 14 | } 15 | } 16 | 17 | object UITest{ 18 | def main(args: Array[String]) { 19 | MainFrame.mkMainFrameByConfigFile(EssayStat.sample).start() 20 | } 21 | } -------------------------------------------------------------------------------- /src/test/scala/StemmerTest.scala: -------------------------------------------------------------------------------- 1 | import jiayiwei.essay.SimpleStemmer 2 | import utest._ 3 | import utest.framework.{Test, Tree} 4 | 5 | 6 | object StemmerTest extends TestSuite{ 7 | override def tests: Tree[Test] = this { 8 | 9 | 'simple_stemmer_check { 10 | val s = SimpleStemmer.standard 11 | 12 | assert(s.stem("adapted") == "adapt") 13 | assert(s.stem("going") == "go") 14 | assert(s.stem("comes") == "come") 15 | assert(s.stem("it") == "it") 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /simple-style.css: -------------------------------------------------------------------------------- 1 | html{ 2 | font-family: sans-serif; 3 | } 4 | 5 | body { 6 | width: 800px; 7 | max-width: 90vw; 8 | margin: auto auto 100px; 9 | } 10 | 11 | .example { 12 | margin-left: 20px; 13 | margin-top: 30px; 14 | margin-bottom: 30px; 15 | } 16 | 17 | .example p{ 18 | max-width: 600px; 19 | } 20 | 21 | .example img{ 22 | border: solid black 1px; 23 | } 24 | 25 | .example video{ 26 | border: solid black 1px; 27 | } 28 | 29 | li{ 30 | margin-bottom: 14px; 31 | margin-top: 14px; 32 | } -------------------------------------------------------------------------------- /standard-data/trivial-words.txt: -------------------------------------------------------------------------------- 1 | // trivial ones 2 | and or not 3 | have 4 | it 5 | be get let 6 | a the that this 7 | who how what which when 8 | there here where 9 | 10 | // common Prepositions 11 | aboard 12 | about 13 | above 14 | across 15 | after 16 | against 17 | along 18 | amid 19 | among 20 | anti 21 | around 22 | as 23 | at 24 | before 25 | behind 26 | below 27 | beneath 28 | beside 29 | besides 30 | between 31 | beyond 32 | but 33 | by 34 | concerning 35 | considering 36 | despite 37 | down 38 | during 39 | except 40 | excepting 41 | excluding 42 | following 43 | for 44 | from 45 | in 46 | inside 47 | into 48 | like 49 | minus 50 | near 51 | of 52 | off 53 | on 54 | onto 55 | opposite 56 | outside 57 | over 58 | past 59 | per 60 | plus 61 | regarding 62 | round 63 | save 64 | since 65 | than 66 | through 67 | to 68 | toward 69 | towards 70 | under 71 | underneath 72 | unlike 73 | until 74 | up 75 | upon 76 | versus 77 | via 78 | with 79 | within 80 | without -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Wei Jiayi(wjydzh1@163.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/test/scala/FSATest.scala: -------------------------------------------------------------------------------- 1 | import jiayiwei.essay.{EssayStat, FSA} 2 | import jiayiwei.essay.FSA.Phrase 3 | import utest._ 4 | import utest.framework.{Test, Tree} 5 | 6 | 7 | object FSATest extends TestSuite { 8 | override def tests: Tree[Test] = this { 9 | val p1 = List("hello", "there","how","are","you") 10 | val p2 = List("hello","there","world") 11 | val p3 = List("something", "irrelevant") 12 | val ps3 = List(p1,p2,p3) 13 | 14 | 'single_phrase { 15 | val state = FSA.newRoot() 16 | state.addPhrase(p1) 17 | assert(state.subPhrases == List(p1)) 18 | } 19 | 20 | 'multi_phrase { 21 | assert(FSA.createFromPhrases(ps3).subPhrases.toSet == ps3.toSet) 22 | } 23 | 24 | 'longest_phrase_test { 25 | val fsa = FSA.createFromPhrases(ps3) 26 | val stat = new EssayStat(_=>false, fsa) 27 | assert(stat.longestPhraseInSentence(Seq("x", "y", "z")).isEmpty) 28 | assert(stat.longestPhraseInSentence(Seq("hello")).isEmpty) 29 | assert(stat.longestPhraseInSentence(p3).contains(2)) 30 | val a = stat.longestPhraseInSentence(Seq("hello", "there", "world", "extra")) 31 | assert(a == Some(3)) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/scala/EssayParserTest.scala: -------------------------------------------------------------------------------- 1 | 2 | import jiayiwei.essay.EssayParser 3 | import utest._ 4 | import utest.framework.{Test, Tree} 5 | 6 | import scala.util.Random 7 | 8 | object EssayParserTest extends TestSuite{ 9 | override def tests: Tree[Test] = this { 10 | val parser = EssayParser.standard 11 | 12 | 'test_simple_text { 13 | val sample = "This was, really good! Am I right? End." 14 | val r = parser.parseText(sample) 15 | assert(r.length == 4) 16 | } 17 | 18 | 'random_text_test { 19 | val random = new Random() 20 | val charList = (('a' to 'g') ++ ('H' to 'N') ++ "@#$%^&*()-+=~/<>" ++ " \n\n,,,,....??!!;;\t").toArray 21 | val upper = charList.length 22 | def randomChar: Char = { 23 | val n = random.nextInt(upper) 24 | charList(n) 25 | } 26 | 27 | ((0 until 10) ++ (10 to 100 by 10) ++ (1000 to 2000 by 500)).foreach { i => 28 | val text = String.copyValueOf((0 until 10*i).map { _ => randomChar }.toArray) 29 | parser.parseText(text) 30 | } 31 | } 32 | 33 | 'sentence_test { 34 | val s1 = " \n\t texts fight a fights in addition great, in addition" 35 | val result = parser.parseText(s1) 36 | assert(result.length == 2) 37 | val raw = result.head.words.map(_.original) 38 | assert(raw == Seq("texts","fight","a","fights","in","addition","great")) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /index_zh.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |17 | 在GRE/TOEFL写作中, 灵活多变的词汇通常是取得高分的关键. 而textVary便是为这个目的而生的--当你使用它练习写作时, 它的算法能发现你重复用过的单词和短语,并及时地提供视觉上的反馈, 帮助(或迫使)你养成使用不同表达的习惯. 18 |
19 | 20 |
29 | TextVary is written by Jiayi Wei(魏家一), here is his github homepage.
47 | 48 | 49 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/Stemmer.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | import scala.collection.mutable 4 | 5 | object SimpleStemmer{ 6 | 7 | def fileDefined(directory: String) = { 8 | val irregMap = new mutable.HashMap[String,String]() 9 | 10 | def addWord(root: String)(forms: String*): Unit = { 11 | forms.foreach{ 12 | f => irregMap += (f -> root) 13 | } 14 | } 15 | 16 | for { 17 | line <- LoadFile.load(s"$directory/irregular-words.txt") if !line.trim.startsWith("//") 18 | }{ 19 | val list = line.trim.split("\\s+") 20 | if(list.length >= 2){ 21 | addWord(list.head)(list.tail :_*) 22 | } 23 | } 24 | 25 | println(s"User defined stemmer with ${irregMap.size} irregular stemming rules loaded.") 26 | new SimpleStemmer(irregMap) 27 | } 28 | 29 | lazy val standard = SimpleStemmer.fileDefined(LoadFile.standard) 30 | } 31 | 32 | /** 33 | * A simple stemmer using a combination of stemming dictionary and Porter's algorithm 34 | * @param irregMap 35 | */ 36 | class SimpleStemmer(irregMap: mutable.Map[String,String]) extends Stemmer { 37 | val porter = PorterStemmer 38 | 39 | override def stem(wordText: String): String = { 40 | val lowerCase = wordText.toLowerCase 41 | irregMap.get(lowerCase) match { 42 | case Some(result) => result 43 | case None => 44 | porter.stem(lowerCase) 45 | } 46 | } 47 | } 48 | 49 | trait Stemmer{ 50 | def stem(word: String): String 51 | } 52 | 53 | object StemmerLib{ 54 | 55 | val stemmer = SimpleStemmer.standard 56 | def main(args: Array[String]) { 57 | while (true){ 58 | val word = Console.in.readLine() 59 | if(word.isEmpty) 60 | return 61 | println(stemmer.stem(word)) 62 | } 63 | } 64 | } -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/FSA.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | import FSA._ 4 | 5 | class FSAState(var acceptable: Boolean) { 6 | private var connections = Map[FSAWord, FSAState]() 7 | 8 | private def addConnection(word: FSAWord, state: FSAState) = { 9 | connections += (word -> state) 10 | } 11 | 12 | def nextState(word: FSAWord): Option[FSAState] = { 13 | connections.get(word) 14 | } 15 | 16 | def subPhrases: List[Phrase] = { 17 | val subs: List[Phrase] = connections.toList.flatMap{ 18 | case (w,s) => s.subPhrases.map(w::_) 19 | } 20 | if(acceptable) emptyPhrase :: subs else subs 21 | } 22 | 23 | def addPhrase(phrase: Phrase): Unit ={ 24 | phrase match { 25 | case Nil => 26 | acceptable = true 27 | case w::t => 28 | val next = nextState(w) match { 29 | case None => 30 | val s = new FSAState(acceptable = false) 31 | addConnection(w, s) 32 | s 33 | case Some(s) => s 34 | } 35 | next.addPhrase(t) 36 | } 37 | } 38 | } 39 | 40 | 41 | 42 | object FSA{ 43 | type FSAWord = String 44 | type Phrase = List[FSAWord] 45 | val emptyPhrase = List[FSAWord]() 46 | 47 | 48 | def newRoot() = new FSAState(acceptable = false) 49 | 50 | def createFromPhrases(phrases: Seq[Phrase]) = { 51 | val s = newRoot() 52 | phrases.foreach(p => s.addPhrase(p)) 53 | s 54 | } 55 | 56 | def fileDefined(dir: String, stemmer: Stemmer) = { 57 | val s = newRoot() 58 | var phraseNum = 0 59 | for{ 60 | line <- LoadFile.load(s"$dir/common-phrases.txt") if !line.trim.startsWith("//") 61 | words = line.split("\\s+") if words.length>=2 62 | }{ 63 | s.addPhrase(words.map(stemmer.stem).toList) 64 | phraseNum += 1 65 | } 66 | println(s"User defined phrase model with $phraseNum phrases loaded.") 67 | s 68 | } 69 | } -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/StatManager.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay_ui 2 | 3 | import jiayiwei.essay.EssayStat.PhraseUsage 4 | import jiayiwei.essay.FSA.Phrase 5 | import jiayiwei.essay.{TextRange, EssayParser, EssayStat} 6 | import rx.{Ctx, Rx, Var} 7 | 8 | 9 | 10 | class StatManager(markerPane: MarkerTextPane, statModel: EssayStat, essayParser: EssayParser, implicit val ctx: Ctx.Owner) { 11 | import StatManager._ 12 | 13 | val underlineThicknessVar = Var(3f) 14 | 15 | private val statDataVar = Var(getStat) 16 | val wordCountVar = Rx{ statDataVar().wordCount } 17 | 18 | private val caretDotVar = Var(currentCaretDot) 19 | private val markersToDisplay = Rx{ 20 | val colorBlocks = statDataVar().phraseMap.flatMap { 21 | case (p, ps) => 22 | val frequency = ps.length 23 | val line = ColorBlock(ColorBlock.colorFromFrequency(frequency)) 24 | ps.map { usage => 25 | val range = EssayStat.rangeOfPhraseUsage(usage) 26 | MarkRegion(range, line) 27 | } 28 | } 29 | 30 | val highlights = statDataVar().infoRegions.find(_.range.isNearTo(caretDotVar())) match { 31 | case Some(info) => 32 | val usageList = statDataVar().phraseMap(info.phrase) 33 | val frequency = usageList.length 34 | val block = ColorUnderline(ColorUnderline.colorFromFrequency(frequency), underlineThicknessVar()) 35 | usageList.map { usage => 36 | val range = EssayStat.rangeOfPhraseUsage(usage) 37 | MarkRegion(range, block) 38 | } 39 | case None => 40 | Seq() 41 | } 42 | 43 | colorBlocks ++ highlights 44 | } 45 | 46 | markersToDisplay.trigger{ 47 | markerPane.setMarkers(markersToDisplay.now.toSeq) 48 | } 49 | 50 | def getStat: StatData = { 51 | val text = markerPane.getText 52 | val result = statModel.stat(essayParser.parseText(text)) 53 | val phraseMap = result.stat.toMap.filter{case (p,ps) => ps.length>=2} 54 | val infoRegions = phraseMap.toArray.flatMap{ 55 | case (p, ps) => ps.map{usage => 56 | val range = EssayStat.rangeOfPhraseUsage(usage) 57 | InfoRegion(p, range) 58 | } 59 | }.sortBy(_.range.start) 60 | 61 | StatData(infoRegions,phraseMap, result.wordCount) 62 | } 63 | 64 | def currentCaretDot = markerPane.getCaret.getDot 65 | 66 | def editCallBack() = { 67 | statDataVar() = getStat 68 | } 69 | 70 | def caretCallBack() = { 71 | caretDotVar() = currentCaretDot 72 | } 73 | } 74 | 75 | object StatManager { 76 | case class InfoRegion(phrase: Phrase , range: TextRange) 77 | 78 | case class StatData(infoRegions: Array[InfoRegion], phraseMap: Map[Phrase, Vector[PhraseUsage]], wordCount: Int) 79 | } 80 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/LoadFile.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | import java.awt.{Dimension, Font} 4 | import java.io.FileWriter 5 | import javax.swing.{UIManager, JOptionPane} 6 | 7 | import scala.io.Source 8 | 9 | /** 10 | * Created by weijiayi on 9/8/16. 11 | */ 12 | object LoadFile { 13 | def load(fileName: String) = { 14 | loadOpt(fileName) match{ 15 | case Some(lines) => lines 16 | case None => 17 | JOptionPane.showMessageDialog(null, 18 | s"the file '$fileName' cannot be loaded.", 19 | "File not loaded", 20 | JOptionPane.ERROR_MESSAGE) 21 | throw new Exception(s"Can't load file '$fileName'") 22 | } 23 | } 24 | 25 | def loadOpt(fileName: String) = { 26 | try{ 27 | Some(Source.fromFile(fileName).getLines()) 28 | }catch { 29 | case _: Exception => 30 | None 31 | } 32 | } 33 | 34 | val standard = "standard-data" 35 | } 36 | 37 | 38 | case class GUIConfig(editorFont: Font, editorDimension: Dimension){ 39 | def configText = { 40 | Seq( 41 | s"font-name: ${editorFont.getFontName}", 42 | s"font-size: ${editorFont.getSize}", 43 | s"editor-dimension: ${editorDimension.width},${editorDimension.height}" 44 | ).mkString("\n") 45 | } 46 | } 47 | 48 | object GUIConfig{ 49 | val defaultConfig = GUIConfig( 50 | editorFont = UIManager.getDefaults.getFont("TextPane.font"), 51 | editorDimension = new Dimension(600,500) 52 | ) 53 | 54 | def mapFromLines(lines: Seq[String]) = { 55 | var map = Map[String, String]() 56 | for{ 57 | l <- lines if l.trim.nonEmpty 58 | line = l.trim 59 | } { 60 | val parts = line.split(":\\s*") 61 | map += (parts(0) -> parts(1)) 62 | } 63 | map 64 | } 65 | 66 | def loadFromFile(path: String = "gui-config.txt"): GUIConfig = { 67 | LoadFile.loadOpt(path) match{ 68 | case Some(lines) => 69 | try{ 70 | val map = mapFromLines(lines.toSeq) 71 | val font = new Font(map("font-name"),Font.PLAIN ,map("font-size").toInt) 72 | val d = map("editor-dimension").split(",") 73 | val dimension = new Dimension(d(0).toInt, d(1).toInt) 74 | GUIConfig(font, dimension) 75 | } catch{ 76 | case e: Exception => 77 | JOptionPane.showMessageDialog(null, 78 | s"the file '$path' cannot be parsed.", 79 | "File can't be parsed.", 80 | JOptionPane.ERROR_MESSAGE) 81 | throw e 82 | } 83 | 84 | case None => 85 | val text = defaultConfig.configText 86 | val fw = new FileWriter(path) 87 | fw.write(text) 88 | fw.close() 89 | JOptionPane.showMessageDialog(null, 90 | s"the file '$path' cannot be found, a default configuration is created.", 91 | "Config not found", 92 | JOptionPane.WARNING_MESSAGE) 93 | defaultConfig 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/EssayParser.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | import fastparse.core.Parsed.{Failure, Success} 4 | import jiayiwei.essay.WordWithRange.Root 5 | 6 | case class TextRange(start: Int, until: Int){ 7 | override def toString = s"[$start, $until]" 8 | 9 | def isNearTo(pos: Int) = start <= pos && pos <= until 10 | } 11 | 12 | object WordWithRange{ 13 | type Root = String 14 | } 15 | 16 | case class WordWithRange(original: String, root: Root, range: TextRange){ 17 | override def toString = s"$range($original->$root)" 18 | } 19 | case class SentencePart(words: Seq[WordWithRange], range: TextRange){ 20 | override def toString = s"$range{${words.mkString(" ")}}" 21 | } 22 | 23 | class EssayParser(stemmer: Stemmer) { 24 | import fastparse.all._ 25 | 26 | val capitalLetter = CharIn('A' to 'Z') 27 | val space = CharIn(" \n\t") 28 | 29 | val sentencePartDivider = space.rep ~ (CharIn(",;:!?()\"") ~ space.rep | CharIn(".-") ~ space.rep ~ &(capitalLetter|End)) 30 | 31 | val wordStopper = sentencePartDivider | space 32 | 33 | val wordParser = P(Index ~ !wordStopper ~ (CharPred(_ != ' ') ~ !(wordStopper|End)).rep.! ~ AnyChar.! ~ Index).map{ 34 | case (start,l,r, end) => 35 | val original = l+r.toString 36 | WordWithRange(original, stemmer.stem(original), TextRange(start, end)) 37 | } 38 | 39 | val sentencePartParser = P(Index ~ wordParser.rep(sep=space.rep(min=1), min = 1) ~ Index).map{ 40 | case (start, words, end) => SentencePart(words, TextRange(start, end)) 41 | } 42 | 43 | 44 | val essayParser = wordStopper.rep ~ sentencePartParser.rep(sep=sentencePartDivider.rep(min=1)) ~ wordStopper.rep ~ End 45 | 46 | def parseText(text: String) = { 47 | essayParser.parse(text) match{ 48 | case Success(parts, _) => 49 | parts 50 | case f:Failure => 51 | println("Fail to parse: ") 52 | println(text) 53 | throw new Exception(f.toString) 54 | } 55 | } 56 | } 57 | 58 | object EssayParser { 59 | lazy val standard = new EssayParser(SimpleStemmer.standard) 60 | 61 | val sampleText = 62 | """ 63 | |Horse and horses! 64 | |First, 1.5 years ago, some super-man was there. I believe the increasingly use of new innovations in means of transport will replace the utility of traditional ones, including the use of cars. Through the history of human, we've never stopped creating new methods of traveling. In ancient days, we tamed horses, invented boats and carriages. And about one hundred years ago, the first car was designed and put into use. So it's hard to believe that we will stop here and not invent some new means of transport. Those cool traveling machines you read or heard of from science fictions, like flying cars, personal mini airplanes or city transport belts, may well get popular in some near future. And when more people have adapted to those new ways of travel, they will never turn back again. Our ancestors were once used to riding on the backs of horses or sitting in carriages, but as soon as the appearance of modern cars and trains, the use of those old methods was quickly replaced. And you can even hardly see any horses in a city nowadays. Unexceptionally, this principle will apply to cars as well, so the people driving cars in that day may be as rare as those who rides a horse you see today. 65 | """.stripMargin 66 | 67 | def main(args: Array[String]) { 68 | standard.parseText("""H !,.a""".stripMargin).foreach(println) 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/MarkerTextPane.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay_ui 2 | 3 | import java.awt._ 4 | import javax.swing.JTextPane 5 | import javax.swing.event.{DocumentEvent, DocumentListener} 6 | import MarkType.withAlpha 7 | 8 | import jiayiwei.essay.{GUIConfig, TextRange} 9 | 10 | trait MarkType{ 11 | def mark(r1: Rectangle, r2: Rectangle, g: Graphics) 12 | } 13 | 14 | object MarkType{ 15 | def withAlpha(c: Color, a: Double): Color = new Color(c.getRed, c.getGreen, c.getBlue, (a*255).toInt) 16 | 17 | def IntInterpolate(y0: Int, y1: Int)(x: Double) = y0 + ((y1-y0)*x).toInt 18 | 19 | def colorInterpolate(c0: Color, c1: Color)(x: Double) = { 20 | new Color( 21 | IntInterpolate(c0.getRed,c1.getRed)(x), 22 | IntInterpolate(c0.getGreen,c1.getGreen)(x), 23 | IntInterpolate(c0.getBlue,c1.getBlue)(x), 24 | IntInterpolate(c0.getAlpha,c1.getAlpha)(x) 25 | ) 26 | } 27 | } 28 | 29 | case class ColorBlock(color: Color) extends MarkType{ 30 | override def mark(r1: Rectangle, r2: Rectangle, g: Graphics): Unit = { 31 | g.setColor(color) 32 | g.fillRect(r1.x,r2.y,r2.x-r1.x,r1.height) 33 | } 34 | } 35 | 36 | object ColorBlock { 37 | import MarkType._ 38 | 39 | // def colorFromFrequency(f: Int) = { 40 | // if(f<=3) withAlpha(Color.green, 0.15) 41 | // else if(f<=6) withAlpha(Color.yellow, 0.3) 42 | // else withAlpha(Color.orange,0.4) 43 | // } 44 | 45 | def colorFromFrequency(f: Int) = { 46 | val x = 1.0 - math.exp(-(f-2).toDouble/3) 47 | colorInterpolate( 48 | withAlpha(Color.yellow, 0.3), 49 | withAlpha(Color.red, 0.0))(x) 50 | } 51 | } 52 | 53 | case class ColorUnderline(color: Color, thickness: Float) extends MarkType{ 54 | override def mark(r1: Rectangle, r2: Rectangle, g: Graphics): Unit = { 55 | val g2d = g.asInstanceOf[Graphics2D] 56 | g2d.setColor(color) 57 | val y = r1.y+r1.height 58 | g2d.setStroke(new BasicStroke(thickness)) 59 | g2d.drawLine(r1.x, y, r2.x, y) 60 | } 61 | } 62 | 63 | object ColorUnderline{ 64 | def colorFromFrequency(f: Int) = { 65 | // if(f<=3) withAlpha(Color.green, 1.0) 66 | // else if(f<=6) withAlpha(Color.yellow, 1.0) 67 | // else withAlpha(Color.orange,1.0) 68 | Color.blue 69 | } 70 | } 71 | 72 | case class MarkRegion(range: TextRange, data: MarkType) 73 | 74 | /** 75 | * This is a special JTextPane which can draw various color regions (see `MarkRegion` case class) on top of its text. 76 | */ 77 | class MarkerTextPane() extends JTextPane{ 78 | private var currentMarkers = Seq[MarkRegion]() 79 | 80 | setFont(GUIConfig.loadFromFile().editorFont) 81 | 82 | def setMarkers(markers: Seq[MarkRegion]) = { 83 | currentMarkers = markers 84 | repaint() 85 | } 86 | 87 | override def paintComponent(g: Graphics): Unit = { 88 | super.paintComponent(g) 89 | 90 | currentMarkers.foreach{ 91 | case MarkRegion(range, markerT) => 92 | for{ 93 | r1 <- rectOfPos(range.start) 94 | r2 <- rectOfPos(range.until) 95 | }{ 96 | if(r1.y != r2.y){ 97 | val r1End = new Rectangle(getWidth, r1.y, 0, r1.height) 98 | val r2Start = new Rectangle(0, r2.y, 0, r2.height) 99 | markerT.mark(r1,r1End,g) 100 | markerT.mark(r2Start,r2,g) 101 | }else 102 | markerT.mark(r1,r2,g) 103 | } 104 | } 105 | } 106 | 107 | 108 | def rectOfPos(pos: Int): Option[Rectangle] = { 109 | try{ 110 | val r = modelToView(pos) 111 | Some(r) 112 | }catch{ 113 | case _: Exception => 114 | println(s"[Warn] Can't access position $pos of the document!") 115 | None 116 | } 117 | } 118 | 119 | def lowerLeft(r: Rectangle) = (r.x, r.y+r.height) 120 | } 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # textVary 2 | Helps you write better GRE/TOEFL essays by detecting your repeating phrases. 3 | 4 | ## What is textVary 5 | 6 | [Introduction in English (英文介绍)](https://mrvplusone.github.io/textVary/) 7 | 8 | [中文介绍 (Introduction in Chinese)](https://mrvplusone.github.io/textVary/index_zh.html) 9 | 10 | ## Download 11 | 12 | [textVary-1.0.zip](https://github.com/MrVPlusOne/textVary/releases/download/v1.0/textVary-1.0.zip) 13 | 14 | In order to run textVary, make sure you have [installed Java](https://www.java.com/en/download/) on your computer. After unzipping the downloaded zip file, simply click the `textVary.jar` file to run the application. 15 | 16 | ## How it works 17 | 18 | To get statistics of phrase usage from the input: 19 | 20 | * TextVary first parses the essay into a sequence of *SentencePart*s, each *SentencePart* is composed of many *WordUsage*s 21 | * A *WordUsage* contains information about a word's original form and position in the essay, as well as the stem of that word. The stem is used to match words and phrases in later stages. 22 | * The parser is written in the parser-combinator library [FastParse](https://github.com/lihaoyi/fastparse) 23 | * The stemming strategy makes use of a combination of Porter's algorithm and an irregular word list. The scala implementation of Porter's algorithm was copied from [here](https://github.com/scalanlp/chalk/blob/master/src/main/scala/chalk/text/analyze/PorterStemmer.scala) 24 | * TextVary stores a collection of set phrases as a Finite State Automaton(FSA). The words in those phrases are reduced to their stem forms. 25 | * To detect set phrases, textVary uses a greedy algorithm, trying to find the longest phrases accepted by the FSA. A set phrase can not span across the boundaries of *SentencePart*s. 26 | 27 | To turn the statistics into visual feedback: 28 | 29 | * The UI is implemented in a Reactive Programming (RP) style. 30 | * (The current input ~> usage statistics) + UI input information ~> visual data ~> draw visual elements on top of the input 31 | 32 | 33 | * As the frequency of a word or phrase increases, its background color varies from a translucent yellow to a transparent red. In this way, if a usage appears again and again in an essay, its highlight will eventually become unnoticeable. This is the desired behaviour because it helps the user focus on avoiding using the same expression twice or the third time, but ignore the repeating use of topic words or key words. 34 | *  35 | *  36 | *  37 | 38 | 39 | ## Licence 40 | 41 | The MIT License (MIT) 42 | 43 | Copyright (c) 2016 Jiayi Wei (wjydzh1@163.com) 44 | 45 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 46 | 47 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 50 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay_ui/MainFrame.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay_ui 2 | 3 | import java.awt.event.{AdjustmentEvent, AdjustmentListener, KeyAdapter} 4 | import java.awt._ 5 | import javax.swing.event.{CaretEvent, CaretListener, DocumentEvent, DocumentListener} 6 | import javax.swing._ 7 | import javax.swing.text.DefaultCaret 8 | 9 | import jiayiwei.essay.{GUIConfig, ProjectSettings, EssayParser, EssayStat} 10 | import jiayiwei.essay_ui.RxJComponent.{RxButton, RxJLabel} 11 | import rx._ 12 | import rx.Ctx.Owner.Unsafe._ 13 | 14 | /** 15 | * The graphical user interface for textVary 16 | */ 17 | class MainFrame(initContent: String, editSize: Dimension, font: Font) { 18 | import MainFrame._ 19 | 20 | val timeInSecVar = Var(0) 21 | val isTimeRunning = Var(false) 22 | 23 | new Timer(1000, mkAction{ 24 | if(isTimeRunning.now) 25 | timeInSecVar.synchronized{ 26 | timeInSecVar() = timeInSecVar.now + 1 27 | } 28 | }).start() 29 | 30 | val markerPane = new MarkerTextPane() { 31 | setText(initContent) 32 | setBorder(BorderFactory.createEmptyBorder(5,10,5,5)) 33 | } 34 | 35 | val markerManager = new StatManager(markerPane, EssayStat.standard, EssayParser.standard, implicitly) 36 | 37 | val wordCountLabel = new RxJLabel( 38 | markerManager.wordCountVar.map{wc => s" Word Count: $wc"}, implicitly) 39 | 40 | val timeLabel = new RxJLabel( 41 | timeInSecVar.map{ 42 | t => s" Time: ${displayTimeFromSec(t)}" 43 | }, implicitly 44 | ) 45 | 46 | val pauseButton = new RxButton( 47 | isTimeRunning.map{ r => if(r) "Pause" else "Resume"}, implicitly 48 | ) 49 | pauseButton.addActionListener(mkAction{isTimeRunning() = !isTimeRunning.now}) 50 | 51 | val resetButton = new JButton("Reset"){ 52 | addActionListener(mkAction{ 53 | timeInSecVar.synchronized{ 54 | timeInSecVar() = 0 55 | } 56 | }) 57 | } 58 | 59 | markerPane.getDocument.addDocumentListener(new DocumentListener { 60 | override def insertUpdate(e: DocumentEvent): Unit = markerManager.editCallBack() 61 | 62 | override def changedUpdate(e: DocumentEvent): Unit = markerManager.editCallBack() 63 | 64 | override def removeUpdate(e: DocumentEvent): Unit = markerManager.editCallBack() 65 | }) 66 | 67 | markerPane.addCaretListener(new CaretListener { 68 | override def caretUpdate(e: CaretEvent): Unit = markerManager.caretCallBack() 69 | }) 70 | 71 | /** 72 | * Call this method to start the JFrame 73 | * @return 74 | */ 75 | def start() = { 76 | val frame = new JFrame(ProjectSettings.projectName){ 77 | setContentPane( 78 | vContainer( 79 | hContainer( 80 | timeLabel, pauseButton, resetButton 81 | ), 82 | new JScrollPane(markerPane){ 83 | setPreferredSize(editSize) 84 | }, 85 | hContainer( 86 | wordCountLabel 87 | ) 88 | ) 89 | ) 90 | 91 | pack() 92 | setVisible(true) 93 | } 94 | 95 | frame.setDefaultCloseOperation(WindowConstants.DO_NOTHING_ON_CLOSE) 96 | 97 | import javax.swing.JOptionPane 98 | /*Some piece of code*/ 99 | frame.addWindowListener(new java.awt.event.WindowAdapter() { 100 | override def windowClosing(windowEvent: java.awt.event.WindowEvent ) { 101 | if (JOptionPane.showConfirmDialog(frame, 102 | "Are you sure to exit textVary?", "Confirm exit", 103 | JOptionPane.YES_NO_OPTION, 104 | JOptionPane.QUESTION_MESSAGE) == JOptionPane.YES_OPTION){ 105 | System.exit(0) 106 | } 107 | } 108 | }) 109 | 110 | frame 111 | } 112 | 113 | def hContainer(components: JComponent*) = { 114 | val box = Box.createHorizontalBox() 115 | components.foreach(box.add) 116 | box 117 | } 118 | 119 | def vContainer(components: JComponent*) = { 120 | val box = Box.createVerticalBox() 121 | components.foreach{c => 122 | box.add(c) 123 | c.setAlignmentX(Component.LEFT_ALIGNMENT) 124 | } 125 | box 126 | } 127 | 128 | def hSpring = { 129 | new JPanel(){ 130 | setMaximumSize(new Dimension(-1,1)) 131 | } 132 | } 133 | } 134 | 135 | object MainFrame{ 136 | def displayTimeFromSec(sec: Int): String = { 137 | "%02d:%02d:%02d".format(sec/3600, (sec/60)%60, sec%60) 138 | } 139 | 140 | def mkMainFrameByConfigFile(initContent: String) = { 141 | val config = GUIConfig.loadFromFile() 142 | new MainFrame(initContent, config.editorDimension ,config.editorFont) 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |17 | In GRE/TOEFL writing, a varied sentence structure and vocabulary often helps you a lot to receive a high score. And here comes textVary, a writing assistant program that is capable of detecting your repeating phrases and providing you with visual feedback, thus helping you form a habit of using diverse expressions. 18 |
19 | 20 |The following video demonstrates how textVary helps the user avoid a second use of the phrase 'as well as'.
22 | 23 |TextVary can also recognize various forms of the same word.
28 |
29 | In fact, textVary analyzes your essays carefully, it even understands the same punctuation may have different meaning in different contexts.
34 | 35 |To get statistics of phrase usage from the input:
46 |To turn the statistics into visual feedback:
58 |The UI is implemented in a Reactive Programming (RP) style.
60 |As the frequency of a word or phrase increases, its background color varies from a translucent yellow to a transparent red. In this way, if a usage appears again and again in an essay, its highlight will eventually become unnoticeable. This is the desired behaviour because it helps the user focus on avoiding using the same expression twice or the third time, but ignore the repeating use of topic words or key words.
65 | 66 |TextVary is written by Jiayi Wei(魏家一), here is his github homepage.
76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/EssayStat.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | import jiayiwei.essay.EssayStat.PhraseUsage 4 | import jiayiwei.essay.FSA.Phrase 5 | import jiayiwei.essay.WordWithRange.Root 6 | 7 | import scala.collection.mutable 8 | 9 | 10 | 11 | class StatResult{ 12 | val stat = new mutable.HashMap[Phrase, Vector[PhraseUsage]]() 13 | var wordCount = 0 14 | 15 | def addUsage(usage: PhraseUsage): Unit = { 16 | val phrase = EssayStat.getPhraseFromUsage(usage) 17 | val oldUse = stat.getOrElse(phrase, Vector()) 18 | stat(phrase) = oldUse :+ usage 19 | } 20 | 21 | def template(title: String, data: Seq[(Phrase, Vector[PhraseUsage])]) = { 22 | s"\n----$title----\n" + 23 | data.sortBy(_._2.length).map{ 24 | case (p, ps) => 25 | val detail = ps.map{p => p.map(_.original).mkString(" ")}.mkString(" | ") 26 | s"* ${p.mkString(" ")} : ${ps.length}\n\t${detail}" 27 | }.mkString("\n") 28 | } 29 | 30 | def phrasesStat = { 31 | template("Phrases Usage", stat.filter{case (p, _) => p.length>1}.toList) 32 | } 33 | 34 | override def toString = { 35 | template("Statistics", stat.toList) 36 | } 37 | } 38 | 39 | class EssayStat(isTrivialWord: WordWithRange => Boolean, phraseMap: FSAState){ 40 | def longestPhraseInSentence(words: Seq[Root]) = { 41 | def iterate(words: Seq[Root], currentState: FSAState, lastAccept: Option[Int], currentLen: Int): Option[Int] = { 42 | val accept = if(currentState.acceptable) Some(currentLen) else lastAccept 43 | if(words.isEmpty){ 44 | accept 45 | }else{ 46 | val w = words.head 47 | currentState.nextState(w) match{ 48 | case Some(n) => 49 | iterate(words.tail, n, accept, currentLen+1) 50 | case None => 51 | accept 52 | } 53 | } 54 | } 55 | iterate(words, phraseMap, None, 0) 56 | } 57 | 58 | 59 | def stat(sentenceParts: Seq[SentencePart]): StatResult = { 60 | val result = new StatResult 61 | 62 | def sentenceWordUse(words: Seq[WordWithRange]): Unit = { 63 | if(words.nonEmpty){ 64 | val roots = words.toStream.map(_.root) 65 | longestPhraseInSentence(roots) match { 66 | case Some(l) => 67 | val (p, left) = words.splitAt(l) 68 | result.addUsage(p.toList) 69 | sentenceWordUse(left) 70 | case None => 71 | if(!isTrivialWord(words.head)) 72 | result.addUsage(List(words.head)) 73 | 74 | sentenceWordUse(words.tail) 75 | } 76 | } 77 | } 78 | 79 | sentenceParts.foreach{ sentence => 80 | sentenceWordUse(sentence.words) 81 | result.wordCount += sentence.words.length 82 | } 83 | 84 | result 85 | } 86 | 87 | } 88 | 89 | object EssayStat { 90 | type PhraseUsage = List[WordWithRange] 91 | 92 | def getPhraseFromUsage(usage: PhraseUsage): Phrase = usage.map(_.root) 93 | 94 | def rangeOfPhraseUsage(usage: PhraseUsage) = { 95 | TextRange(usage.head.range.start, usage.last.range.until) 96 | } 97 | 98 | def fileDefined(dir: String, stemmer: Stemmer) = { 99 | val trivialSet = new mutable.HashSet[Root]() 100 | 101 | for{ 102 | line <- LoadFile.load(s"$dir/trivial-words.txt") if !line.trim.startsWith("//") 103 | word <- line.split("\\s+") 104 | }{ 105 | trivialSet += stemmer.stem(word) 106 | } 107 | 108 | new EssayStat(isTrivialWord = w => trivialSet.contains(w.root), FSA.fileDefined(dir,stemmer)) 109 | } 110 | 111 | lazy val standard = fileDefined(LoadFile.standard, SimpleStemmer.standard) 112 | 113 | val sample = 114 | """ 115 | |I believe the total number of cars twenty years later will be fewer than today. Although our industry is becoming more and more strong and there is still quite much need of cars in many developing countries today, there are three important reasons which I believe will reduce the amount of cars in the future: 116 | | 117 | |First, I believe the increasingly use of new innovations in means of transport will replace the utility of traditional ones, including the use of cars. Through the history of human, we've never stopped creating new methods of traveling. In ancient days, we tamed horses, invented boats and carriages. And about one hundred years ago, the first car was designed and put into use. So it's hard to believe that we will stop here and not invent some new means of transport. Those cool traveling machines you read or heard of from science fictions, like flying cars, personal mini airplanes or city transport belts, may well get popular in some near future. And when more people have adapted to those new ways of travel, they will never turn back again. Our ancestors were once used to riding on the backs of horses or sitting in carriages, but as soon as the appearance of modern cars and trains, the use of those old methods was quickly replaced. And you can even hardly see any horses in a city nowadays. Unexceptionally, this principle will apply to cars as well, so the people driving cars in that day may be as rare as those who rides a horse you see today. 118 | | 119 | |Second, besides those new inventions, the current trend in our traffic study is heading towards a direction of effectiveness. Congestion has become a serious problem of modern cities and many great minds are working on it. So I believe there will soon be some good solutions to this problem, like building more efficient public transportation system and make them more efficient. People's opinions may change as well, and people will realize the harm of too many cars and are more willing to use those alternatives instead. Just take the example of my surroundings, I can feel that fewer people I know like driving cars today than the past. 120 | | 121 | |Third, let's talk about something that is happening right around us. Auto-driven cars are beginning to catch the public's attention recently. There have been some auto-driven cars already put into use in Singapore, and Uber's auto car plan is about to start as well. And one major advantage of auto-driven cars is their outstanding responsiveness. Their use will drastically reduce the waiting time of passengers, and also results in a lower the idle time percentage than traditional taxis. So, in another word, there will be fewer taxis as well as fews cars, but the transporting capacity will be increased. And the auto-driven cars' ease of use will also help them soon becoming more popular, and as a result, more people will choose to rely on them instead of buying their own cars. These two factors together will naturally reduce the total amount of cars. 122 | | 123 | |So in conclusion, because the reasons I've given above, I come to the prediction that there will be fewer cars in the next twenty years. 124 | """.stripMargin 125 | 126 | 127 | def main(args: Array[String]) { 128 | 129 | val result = standard.stat(EssayParser.standard.parseText(sample)) 130 | println{ 131 | result.phrasesStat 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /standard-data/common-phrases.txt: -------------------------------------------------------------------------------- 1 | above all 2 | according to 3 | act upon each other 4 | adapt oneself to 5 | adapt to 6 | add up to 7 | address oneself to 8 | a few 9 | a great quantity of 10 | a good deal of 11 | a large amount of 12 | a little 13 | a lot of 14 | a number of 15 | above all 16 | after all 17 | ahead of 18 | all but 19 | all of a sudden 20 | all over 21 | all right 22 | in all 23 | all the same 24 | all the time 25 | and so on 26 | apart from 27 | as far as 28 | as follows 29 | as for 30 | ask for leave 31 | as long as 32 | as soon as 33 | as though 34 | as to 35 | as usual 36 | as well 37 | as well as 38 | ask for 39 | at a loss 40 | at all 41 | at all costs 42 | at all events 43 | at any rate 44 | at ease 45 | at first 46 | at hand 47 | at last 48 | at least 49 | at most 50 | at no time 51 | at once 52 | at present 53 | at the cost of 54 | at the moment 55 | at the same time 56 | at the sight of 57 | at the thought of 58 | back and forth 59 | back up 60 | be absorbed in 61 | be described as 62 | be in favor of 63 | be made up of 64 | be short of 65 | bear 66 | because of 67 | before long 68 | be worth doing sth 69 | beyond the question 70 | break down 71 | break in 72 | break into 73 | break off 74 | break one 75 | break out 76 | break through 77 | break up 78 | bring about 79 | bring forward 80 | bring in to effect 81 | bring out 82 | bring up 83 | build up 84 | but for 85 | by accident 86 | by air 87 | by all means 88 | by and by 89 | by chance 90 | by means of 91 | by mistake 92 | by no means 93 | by oneself 94 | by the way 95 | by way of 96 | call for 97 | call off 98 | call on 99 | call up 100 | cannot help doing 101 | capable of 102 | cannot 103 | care for 104 | carry forward 105 | carry off 106 | carry on 107 | carry out 108 | catch sight of 109 | catch up with 110 | cheat sb 111 | check in 112 | check out 113 | cheer up 114 | clear up 115 | come around 116 | come in handy for sth 117 | come on 118 | come out 119 | come through 120 | come to 121 | come to an end 122 | come true 123 | come up 124 | come up with 125 | compare 126 | count for little 127 | count on 128 | cut down 129 | cut in 130 | cut off 131 | cut out 132 | cut short 133 | deal with 134 | decide on 135 | die down 136 | die out 137 | do away with 138 | draw in 139 | araw up 140 | dream of 141 | dress up 142 | drop in 143 | drop out 144 | due to 145 | each other 146 | end up 147 | even if 148 | every now and then 149 | every other 150 | except for 151 | face to face 152 | fail to do 153 | fit a new suit on sb 154 | fit in with 155 | get across 156 | get along 157 | get down 158 | get into 159 | get off 160 | get on 161 | get on with 162 | get out 163 | get over 164 | get rid of 165 | get through 166 | get up 167 | give in 168 | give off 169 | give out 170 | give up 171 | give way to 172 | go after 173 | go ahead 174 | go along with 175 | go around 176 | go down 177 | go for 178 | go in for 179 | go into 180 | go into action 181 | go into effect 182 | go on 183 | go out 184 | go over 185 | go through 186 | go up 187 | go wrong 188 | grow up 189 | had better 190 | hand in 191 | hand out 192 | hand over 193 | hang on 194 | hang up 195 | have an advantage over 196 | have in mind 197 | have nothing to do with 198 | have to 199 | head for 200 | help oneself 201 | hold back 202 | hold sth back from sb 203 | hold on 204 | hold on to 205 | hold up 206 | hurry up 207 | hurt one 208 | if only 209 | in a hurry 210 | in a sense 211 | in a way 212 | in a word 213 | in addition 214 | in addition to 215 | in advance 216 | in any case 217 | in brief 218 | in case of 219 | in charge of 220 | in common 221 | in debt 222 | in detail 223 | in effect 224 | in fact 225 | in favour of 226 | in front of 227 | in general 228 | in half 229 | in honour of 230 | in no case 231 | in no time 232 | in no way 233 | in one 234 | in order 235 | in order to 236 | in other words 237 | in part 238 | in particular 239 | in person 240 | in proportion to 241 | in public 242 | in question 243 | in relation to 244 | in return 245 | in short 246 | in sight 247 | in spite of 248 | in store 249 | in that 250 | in the course of 251 | in the end 252 | in the face of 253 | in the future 254 | in the way 255 | in time 256 | in touch 257 | in turn 258 | in vain 259 | in stead of 260 | judging by 261 | just now 262 | join in 263 | keep an eye on 264 | keep in mind 265 | keep in touch with 266 | keep it up 267 | keep on 268 | keep one 269 | keep sth 270 | keep to 271 | keep up with 272 | kill off 273 | knock down 274 | knock out 275 | laugh at 276 | lay aside 277 | lay down 278 | lay out 279 | lead to 280 | 1earn by heart 281 | learn from 282 | leave behind 283 | leave off 284 | leave out 285 | 1et alone 286 | let off 287 | let out 288 | lie in 289 | line up 290 | little 291 | little by little 292 | live on 293 | live through 294 | live up to 295 | long before 296 | long for 297 | look after 298 | look at 299 | look back 300 | look down on 301 | look for 302 | look forward to 303 | look into 304 | look on 305 | look out 306 | look over 307 | look through 308 | lay out 309 | lead to 310 | 1earn by heart 311 | learn from 312 | leave behind 313 | leave off 314 | leave out 315 | 1et alone 316 | let off 317 | let out 318 | lie in 319 | line up 320 | little 321 | little by little 322 | live on 323 | live through 324 | live up to 325 | long before 326 | long for 327 | look after 328 | look at 329 | look back 330 | look down on 331 | look for 332 | look forward to 333 | look into 334 | look on 335 | look out 336 | look over 337 | look through 338 | make a Fire 339 | make for 340 | make fun of 341 | make one 342 | make out 343 | make sense 344 | make sure 345 | make up 346 | make up for 347 | make up one 348 | make use of 349 | masses of 350 | mix up 351 | more or less 352 | no doubt 353 | no less than 354 | no longer 355 | no more 356 | no more than 357 | nothing but 358 | now and then 359 | now that 360 | of course 361 | off duty 362 | on a small 363 | on account of 364 | on average 365 | on board 366 | on business 367 | on duty 368 | on earth 369 | on foot 370 | on guard 371 | on occasion 372 | on one 373 | on purpose 374 | on sale 375 | on the contrary 376 | on the one hand 377 | on the other hand 378 | on the spot 379 | on the whole 380 | on time 381 | once again 382 | once in a while 383 | once upon a time 384 | once more 385 | one another 386 | open fire 387 | or else 388 | or so 389 | other than 390 | out of 391 | out of breath 392 | out of control 393 | out of date 394 | out of doors 395 | out of order 396 | out of place 397 | our of sight 398 | out of the question 399 | out of work 400 | over and over 401 | pass away 402 | pass On 403 | pay attention to 404 | pay back 405 | pay for 406 | pay off 407 | pick out 408 | pick up 409 | play apart 410 | play fire 411 | plenty of 412 | point out 413 | prior to 414 | pull in 415 | pull into 416 | pull out 417 | put aside 418 | put away 419 | put down 420 | put forward 421 | put into practice 422 | put off 423 | put on 424 | put out 425 | put to use 426 | put up 427 | put up with 428 | quite a few 429 | rather than 430 | regardless of 431 | remind sb of sth 432 | result from 433 | result in 434 | right away 435 | ring off 436 | ring up 437 | rob sb 438 | run into 439 | run to 440 | run over 441 | run through 442 | see to 443 | send for 444 | send off 445 | set about 446 | set apart 447 | set aside 448 | set back 449 | set down 450 | Set free 451 | set off 452 | set out 453 | set up 454 | settle down 455 | show off 456 | show up 457 | side by side 458 | slow down 459 | so far 460 | so that 461 | so long as 462 | sooner or later 463 | stand for 464 | stand out 465 | stand up 466 | stick to 467 | such as 468 | suit well with 469 | sum up 470 | switch off 471 | take advantage of 472 | take after 473 | take apart 474 | take away 475 | take down 476 | take for 477 | take in 478 | take into account 479 | take off 480 | take on 481 | take one 482 | take out 483 | take over 484 | take part in 485 | take place 486 | take the place of 487 | take turns 488 | take up 489 | the moment 490 | thanks to 491 | think of 492 | think over 493 | throw away 494 | to a certain degree 495 | to the point 496 | touch on 497 | try on 498 | try one 499 | try out 500 | turn down 501 | turn in 502 | turn off 503 | turn on 504 | turn one 505 | turn out 506 | turn out to be 507 | turn over 508 | turn to 509 | turn up 510 | turn back 511 | under control 512 | undergo experiences 513 | under the circumstances 514 | up to 515 | up to date 516 | use up 517 | wait for 518 | wait on 519 | warm up 520 | wash up 521 | watch out for 522 | wear out 523 | wipe out 524 | with regard to 525 | within reach 526 | with respect to 527 | with the exception of 528 | without question 529 | work out 530 | would rather 531 | -------------------------------------------------------------------------------- /src/main/scala/jiayiwei/essay/PorterStemmerScala.scala: -------------------------------------------------------------------------------- 1 | package jiayiwei.essay 2 | 3 | /* 4 | Copyright 2009 David Hall, Daniel Ramage 5 | Licensed under the Apache License, Version 2.0 (the "License") 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | import scala.Some 17 | 18 | 19 | /** 20 | * Converts words to their stemmed form using the classic Porter stemming 21 | * algorithm. 22 | * 23 | * @author dlwh 24 | */ 25 | object PorterStemmer extends Stemmer{ 26 | 27 | 28 | override def stem(w: String) = { 29 | if (w.length < 3) w.toLowerCase 30 | else { 31 | val ret = { 32 | w.toLowerCase.replaceAll("([aeiou])y", "$1Y").replaceAll("^y", "Y") 33 | } 34 | step5(step4(step3(step2(step1(ret))))).toLowerCase 35 | } 36 | } 37 | 38 | private def step1(w: String) = step1c(step1b(step1a(w))) 39 | 40 | // get rid of s's 41 | private def step1a(w: String) = { 42 | if (w.endsWith("sses") || w.endsWith("ies")) 43 | w.substring(0, w.length - 2) 44 | else if (w.endsWith("s") && w.charAt(w.length - 2) != 's') 45 | w.substring(0, w.length - 1) 46 | else w 47 | } 48 | 49 | private def step1b(w: String) = { 50 | //println(w + " " + m(w)); 51 | def extra(w: String) = { 52 | if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) w + 'e' 53 | // double consonant: 54 | else if (doublec(w) && !("lsz".contains(w.last))) w.substring(0, w.length - 1); 55 | else if (m(w) == 1 && cvc(w)) w + "e" 56 | else w 57 | } 58 | 59 | if (w.endsWith("eed")) { 60 | if (m(w.substring(0, w.length - 3)) > 0) 61 | w.substring(0, w.length - 1) 62 | else w 63 | } else if (w.endsWith("ed")) { 64 | if (w.indexWhere(isVowel) < (w.length - 2)) extra(w.substring(0, w.length - 2)) 65 | else w 66 | } else if (w.endsWith("ing")) { 67 | if (w.indexWhere(isVowel) < (w.length - 3)) extra(w.substring(0, w.length - 3)) 68 | else w 69 | } else w 70 | } 71 | 72 | def step1c(w: String) = { 73 | //println(w + " " + m(w)); 74 | if ((w.last == 'y' || w.last == 'Y') && w.indexWhere(isVowel) < w.length - 1) { 75 | w.substring(0, w.length - 1) + 'i' 76 | } else w 77 | } 78 | 79 | private def replaceSuffix(w: String, suffix: String, repl: String) = { 80 | if (w endsWith suffix) Some((w.substring(0, w.length - suffix.length), repl)) 81 | else None 82 | } 83 | 84 | private val mgt0 = { 85 | (w: (String, String)) => m(w._1) > 0 86 | } 87 | private val mgt1 = { 88 | (w: (String, String)) => m(w._1) > 1 89 | } 90 | 91 | private def step2(w: String) = { 92 | //println(w + " " + m(w)); 93 | if (w.length < 3) w 94 | else { 95 | val opt = w(w.length - 2) match { 96 | case 'a' => replaceSuffix(w, "ational", "ate").orElse(replaceSuffix(w, "tional", "tion")) 97 | case 'c' => 98 | replaceSuffix(w, "enci", "ence").orElse(replaceSuffix(w, "anci", "ance")) 99 | case 'e' => replaceSuffix(w, "izer", "ize") 100 | case 'g' => replaceSuffix(w, "logi", "log") 101 | case 'l' => replaceSuffix(w, "bli", "ble") orElse { 102 | replaceSuffix(w, "alli", "al") 103 | } orElse { 104 | replaceSuffix(w, "entli", "ent") 105 | } orElse { 106 | replaceSuffix(w, "eli", "e") 107 | } orElse { 108 | replaceSuffix(w, "ousli", "ous") 109 | } 110 | case 'o' => replaceSuffix(w, "ization", "ize") orElse { 111 | replaceSuffix(w, "ator", "ate") 112 | } orElse { 113 | replaceSuffix(w, "ation", "ate") 114 | } 115 | case 's' => replaceSuffix(w, "alism", "al") orElse { 116 | replaceSuffix(w, "iveness", "ive") 117 | } orElse { 118 | replaceSuffix(w, "fulness", "ful") 119 | } orElse { 120 | replaceSuffix(w, "ousness", "ous") 121 | } 122 | case 't' => 123 | replaceSuffix(w, "aliti", "al") orElse { 124 | replaceSuffix(w, "iviti", "ive") 125 | } orElse { 126 | replaceSuffix(w, "biliti", "ble") 127 | } 128 | case _ => None 129 | } 130 | opt.filter(mgt0).map { 131 | case (a, b) => a + b 132 | }.getOrElse(w) 133 | } 134 | } 135 | 136 | private def step3(w: String) = { 137 | //println(w + " " + m(w)); 138 | if (w.length < 3) w 139 | else { 140 | val opt = w.last match { 141 | case 'e' => 142 | replaceSuffix(w, "icate", "ic") orElse { 143 | replaceSuffix(w, "alize", "al") 144 | } orElse { 145 | replaceSuffix(w, "ative", "") 146 | } 147 | case 'i' => replaceSuffix(w, "iciti", "ic") 148 | case 'l' => replaceSuffix(w, "ical", "ic").orElse(replaceSuffix(w, "ful", "")) 149 | case 's' => replaceSuffix(w, "ness", "") 150 | case _ => None 151 | } 152 | opt.filter(mgt0).map { 153 | case (a, b) => a + b 154 | }.getOrElse(w) 155 | } 156 | } 157 | 158 | private def step4(w: String) = { 159 | //println(w + " " + m(w)); 160 | if (w.length < 3) 161 | w 162 | else { 163 | val opt = w(w.length - 2) match { 164 | case 'a' => replaceSuffix(w, "al", "") 165 | case 'c' => replaceSuffix(w, "ance", "").orElse(replaceSuffix(w, "ence", "")) 166 | case 'e' => replaceSuffix(w, "er", "") 167 | case 'i' => replaceSuffix(w, "ic", "") 168 | case 'l' => replaceSuffix(w, "able", "").orElse(replaceSuffix(w, "ible", "")) 169 | case 'n' => replaceSuffix(w, "ant", "") orElse { 170 | replaceSuffix(w, "ement", "") 171 | } orElse { 172 | //println("here") 173 | replaceSuffix(w, "ment", "") 174 | } orElse { 175 | //println("hereX") 176 | replaceSuffix(w, "ent", "") 177 | } 178 | case 'o' => replaceSuffix(w, "ion", "").filter(a => a._1.endsWith("t") || a._1.endsWith("s")). 179 | orElse(replaceSuffix(w, "ou", "")) 180 | case 's' => replaceSuffix(w, "ism", "") 181 | case 't' => replaceSuffix(w, "ate", "").orElse(replaceSuffix(w, "iti", "")) 182 | case 'u' => replaceSuffix(w, "ous", "") 183 | case 'v' => replaceSuffix(w, "ive", "") 184 | case 'z' => replaceSuffix(w, "ize", "") 185 | case _ => None 186 | } 187 | opt.filter(mgt1).map { 188 | case (a, b) => a + b 189 | }.getOrElse(w) 190 | } 191 | } 192 | 193 | private def step5(w: String) = { 194 | //println(w + " " + m(w)); 195 | if (w.length < 3) w 196 | else 197 | step5b(step5a(w)) 198 | } 199 | 200 | 201 | private def step5a(w: String) = { 202 | if (w.length < 3) w 203 | else 204 | if (w.last == 'e') { 205 | val n = m(w) 206 | if (n > 1) w.substring(0, w.length - 1) 207 | else if (n == 1 && !cvc(w.substring(0, w.length - 1))) w.substring(0, w.length - 1) 208 | else w 209 | } 210 | else { 211 | w 212 | } 213 | } 214 | 215 | private def step5b(w: String) = { 216 | if (w.last == 'l' && doublec(w) && m(w) > 1) w.substring(0, w.length - 1) 217 | else w 218 | } 219 | 220 | def m(w: String): Int = { 221 | val firstV = w.indexWhere(isVowel) 222 | if (firstV == -1) 0 223 | else { 224 | var m = 0 225 | var x: Seq[Char] = w.substring(firstV) 226 | if (x.isEmpty) m 227 | else { 228 | while (!x.isEmpty) { 229 | x = x.dropWhile(isVowel) 230 | if (x.isEmpty) return m 231 | m += 1 232 | if (m > 1) return m; // don't need anything bigger than this. 233 | x = x.dropWhile(isConsonant) 234 | } 235 | m 236 | } 237 | } 238 | } 239 | 240 | private def cvc(w: String) = ( 241 | w.length > 2 242 | && isConsonant(w.last) 243 | && !("wxY" contains w.last) 244 | && isVowel(w(w.length - 2)) 245 | && isConsonant(w.charAt(w.length - 3)) 246 | ) 247 | 248 | private def doublec(w: String) = { 249 | (w.length > 2 && w.last == w.charAt(w.length - 2) && isConsonant(w.last)) 250 | } 251 | 252 | def isConsonant(letter: Char) = !isVowel(letter) 253 | 254 | def isVowel(letter: Char) = "aeiouy" contains letter 255 | } 256 | -------------------------------------------------------------------------------- /commonPhrases_irreg.txt: -------------------------------------------------------------------------------- 1 | 2 | above all 3 | according to 4 | act upon each other 5 | adapt oneself to 6 | adapt to 7 | add…to… 8 | add up to 9 | address oneself to 10 | a few 11 | a great quantity of 12 | a good deal of 13 | a large amount of 14 | a little 15 | a lot of 16 | a number of 17 | above all 18 | after all 19 | ahead of 20 | all but 21 | all of a sudden 22 | all over 23 | all right 24 | in all 25 | all the same 26 | all the time 27 | and so on 28 | apart from 29 | as…as… 30 | as far as 31 | as follows 32 | as for 33 | ask for leave 34 | as long as 35 | as soon as 36 | as though 37 | as to 38 | as usual 39 | as well 40 | as well as 41 | ask for 42 | at a loss 43 | at all 44 | at all costs 45 | at all events 46 | at any rate 47 | at ease 48 | at first 49 | at hand 50 | at last 51 | at least 52 | at most 53 | at no time 54 | at once 55 | at present 56 | at the cost of 57 | at the moment 58 | at the same time 59 | at the sight of 60 | at the thought of 61 | 62 | back and forth 63 | back up 64 | be absorbed in 65 | be described as 66 | be in favor of 67 | be made up of 68 | be short of 69 | bear 70 | because of 71 | before long 72 | be worth doing sth 73 | beyond the question 74 | both...and 75 | break down 76 | break in 77 | break into 78 | break off 79 | break one 80 | break out 81 | break through 82 | break up 83 | bring about 84 | bring forward 85 | bring in to effect 86 | brmgin to operation … 87 | bring out 88 | bring up 89 | build up 90 | but for 91 | by accident 92 | by air 93 | by all means 94 | by and by 95 | by chance 96 | by far … 97 | by means of 98 | by mistake 99 | by no means 100 | by oneself 101 | by the way 102 | by way of 103 | 104 | 105 | call for 106 | call off 107 | call on 108 | call up 109 | cannot help doing 110 | capable of 111 | cannot 112 | care forr 113 | carry forward 114 | carry off 115 | carry on 116 | carry out 117 | catch sight of 118 | catch up with 119 | cheat sb 120 | check in 121 | check out 122 | cheer up 123 | clear up 124 | come around 125 | come in handy for sth 126 | come on 127 | come out 128 | come through 129 | come to 130 | come to an end 131 | come true 132 | come up 133 | come up with 134 | compare 135 | count for little 136 | count on 137 | cut down 138 | cut in 139 | cut off 140 | cut out 141 | cut short 142 | 143 | deal with 144 | decide on 145 | die down 146 | die out 147 | do away with 148 | do..a favour 149 | draw in 150 | araw up 151 | dream of 152 | dress up 153 | drop in 154 | drop out 155 | due to 156 | 157 | 158 | 159 | each other 160 | elther...or 161 | end up 162 | even if 163 | every now and then 164 | every other 165 | except for 166 | 167 | face to face 168 | fail to do 169 | fit a new suit on sb 170 | fit in with 171 | 172 | get across 173 | get along 174 | get down 175 | get into 176 | get off 177 | get on 178 | get on with 179 | get out 180 | get over 181 | get rid of 182 | get through 183 | get up 184 | give in 185 | give off 186 | give out 187 | give up 188 | give way to 189 | go after 190 | go ahead 191 | go along with 192 | go around 193 | go down 194 | go for 195 | go in for 196 | go into 197 | go into action 198 | go into effect 199 | go on 200 | go out 201 | go over 202 | go through 203 | go up 204 | go wrong 205 | grow up 206 | 207 | had better 208 | had rather...than 209 | hand in 210 | hand out 211 | hand over 212 | hang on 213 | hang up 214 | have an advantage over 215 | have in mind 216 | have nothing to do with 217 | have 218 | head for 219 | help oneself 220 | hold back 221 | hold sth back from sb 222 | hold on 223 | hold on to 224 | hold up 225 | hurry up 226 | hurt one 227 | 228 | 229 | if only 230 | in a hurry 231 | in a sense 232 | in a way 233 | in a word 234 | in addition to 235 | in advance 236 | in any case 237 | in brief 238 | in case of 239 | in charge of 240 | in common 241 | in debt 242 | in detail 243 | in effect 244 | in fact 245 | in favour of 246 | in front of 247 | in general 248 | in half 249 | in honour of 250 | in no case 251 | in no time 252 | in no way 253 | in one 254 | in order 255 | in order to 256 | in other words 257 | in part 258 | in particular 259 | in person 260 | in proportion to 261 | in public 262 | in question 263 | in relation to 264 | in return 265 | in short 266 | in sight 267 | in spite of 268 | in store 269 | in that 270 | in the course of 271 | in the end 272 | in the face of 273 | in the future 274 | in the way 275 | in time 276 | in touch 277 | in turn 278 | in vain 279 | in stead of 280 | 281 | judging by 282 | just now 283 | join in 284 | 285 | keep an eye on 286 | keep in mind 287 | keep in touch with 288 | keep it up 289 | keep on 290 | keep one 291 | keep sth 292 | keep to 293 | keep up with 294 | kill off 295 | knock down 296 | knock out 297 | 298 | laugh at 299 | lay aside 300 | lay down 301 | lay out 302 | lead to 303 | 1earn by heart 304 | learn from 305 | leave behind 306 | leave off 307 | leave out 308 | 1et alone 309 | let off 310 | let out 311 | lie in 312 | line up 313 | little 314 | little by little 315 | live on 316 | live through 317 | live up to 318 | long before 319 | long for 320 | look after 321 | look at 322 | look back 323 | look down on 324 | look for 325 | look forward to 326 | look into 327 | look on 328 | look out 329 | look over 330 | look through 331 | lay out 332 | lead to 333 | 1earn by heart 334 | learn from 335 | leave behind 336 | leave off 337 | leave out 338 | 1et alone 339 | let off 340 | let out 341 | lie in 342 | line up 343 | little 344 | little by little 345 | live on 346 | live through 347 | live up to 348 | long before 349 | long for 350 | look after 351 | look at 352 | look back 353 | look down on 354 | look for 355 | look forward to 356 | look into 357 | look on 358 | look out 359 | look over 360 | look through 361 | 362 | make a Fire 363 | make for 364 | make fun of 365 | make one 366 | make out 367 | make sense 368 | make sure 369 | make up 370 | make up for 371 | make up one 372 | make use of 373 | masses of 374 | mix up 375 | more or less 376 | 377 | neither...nor... 378 | no doubt 379 | no less than 380 | no longer 381 | no more 382 | no more than 383 | no sooner...than 384 | not only...but also 385 | nothing but 386 | now and then 387 | now that 388 | 389 | of course 390 | off duty 391 | on a small 392 | on account of 393 | on average 394 | on board 395 | on business 396 | on duty 397 | on earth 398 | on foot 399 | on guard 400 | on occasion 401 | on one 402 | on purpose 403 | on sale 404 | on the contrary 405 | on the one hand 406 | on the other hand 407 | on the spot 408 | on the whole 409 | on time 410 | once again 411 | once in a while 412 | once upon a time 413 | once more 414 | one another 415 | one...the other 416 | open fire 417 | or else 418 | or so 419 | other than 420 | out of 421 | out of breath 422 | out of control 423 | out of date 424 | out of doors 425 | out of order 426 | out of place 427 | our of sight 428 | out of the question 429 | out of work 430 | over and over 431 | 432 | 433 | pass away 434 | pass On 435 | pay attention to 436 | pay back 437 | pay for 438 | pay off 439 | pick out 440 | pick up 441 | play apart 442 | play fire 443 | plenty of 444 | point out 445 | prior to 446 | pull in 447 | pull into 448 | pull out 449 | put aside 450 | put away 451 | put down 452 | put forward 453 | put into practice 454 | put off 455 | put on 456 | put out 457 | put to use 458 | put up 459 | put up with 460 | 461 | quite a few 462 | 463 | rather than 464 | refer to...as 465 | regardless of 466 | remind sb of sth 467 | result from 468 | result in 469 | right away 470 | ring off 471 | ring up 472 | rob sb 473 | run into 474 | run to 475 | run over 476 | run through 477 | 478 | 479 | see...off 480 | see...through 481 | see to 482 | send for 483 | send off 484 | set about 485 | set a fire to… 486 | set apart 487 | set aside 488 | set back 489 | set down 490 | Set free 491 | set off 492 | set out 493 | set up 494 | settle down 495 | show off 496 | show up 497 | side by side 498 | slow down 499 | so...as to 500 | so far 501 | so 502 | so long as 503 | some…others... 504 | sooner or later 505 | no sooner...than 506 | stand for 507 | stand out 508 | stand up 509 | stick to 510 | such as 511 | suit well with 512 | sum up 513 | switch off 514 | 515 | take...for 516 | take advantage of 517 | take after 518 | take apart 519 | take away 520 | take down 521 | take for 522 | take… for granted 523 | take in 524 | take into account 525 | take off 526 | take on 527 | take one 528 | take out 529 | take over 530 | take part in 531 | take place 532 | take the place of 533 | take turns 534 | take up 535 | tell...from 536 | the moment 537 | thanks to 538 | think of 539 | think of...as 540 | think over 541 | throw away 542 | to a certain degree 543 | to the point 544 | touch on 545 | try on 546 | try one 547 | try out 548 | turn down 549 | turn in 550 | turn...into 551 | turn off 552 | turn on 553 | turn one 554 | turn out 555 | turn out to be 556 | turn over 557 | turn to 558 | turn up 559 | 560 | 561 | under control 562 | undergo experiences 563 | under the circumstances 564 | up to 565 | up to date 566 | use up 567 | 568 | wait for 569 | wait on 570 | warm up 571 | wash up 572 | watch out for 573 | wear out 574 | What about... 575 | What if... 576 | whether...or 577 | wipe out 578 | with regard to 579 | within reach 580 | with respect to 581 | with the exception of 582 | without question 583 | work out 584 | would rather -------------------------------------------------------------------------------- /standard-data/irregular-words.txt: -------------------------------------------------------------------------------- 1 | it its he his him she her hers they those them their we us our you your yours i me 2 | use use uses using used 3 | 4 | this this 5 | that that 6 | who whom 7 | 8 | have has had having 9 | 10 | // nouns 11 | one ones 12 | life lives 13 | news news 14 | 15 | // verbs 16 | arise arose arisen 17 | awake awakened awoke awakened awoken 18 | 19 | be was were been am is are 20 | backslide backslid backslidden backslid 21 | bear bore born borne 22 | beat beat beaten beat 23 | become became become 24 | begin began begun 25 | bend bent bent 26 | bet bet betted bet betted 27 | bid bid bade bidden 28 | bid bid bid 29 | bind bound bound 30 | bite bit bitten 31 | bleed bled bled 32 | blow blew blown 33 | break broke broken 34 | breed bred bred 35 | bring brought brought 36 | broadcast broadcast broadcasted broadcast broadcasted 37 | browbeat browbeat browbeaten browbeat 38 | build built built 39 | burn burned burnt burned burnt 40 | burst burst burst 41 | bust busted bust busted bust 42 | buy bought bought 43 | 44 | cast cast cast 45 | catch caught caught 46 | choose chose chosen 47 | cling clung clung 48 | clothe clothed clad clothed clad 49 | come came come 50 | cost cost cost 51 | creep crept crept 52 | crossbreed crossbred crossbred 53 | cut cut cut 54 | 55 | daydream daydreamed daydreamt daydreamed daydreamt 56 | deal dealt dealt 57 | dig dug dug 58 | disprove disproved disproved disproven 59 | dive dove dived dived 60 | dive dived dove dived 61 | do did done does 62 | draw drew drawn 63 | dream dreamed dreamt dreamed dreamt 64 | drink drank drunk 65 | drive drove driven 66 | dwell dwelt dwelled dwelt dwelled 67 | 68 | eat ate eaten 69 | 70 | fall fell fallen 71 | feed fed fed 72 | feel felt felt 73 | fight fought fought 74 | find found found 75 | fit fitted fit fitted fit 76 | fit fit fitted fit fitted 77 | flee fled fled 78 | fling flung flung 79 | fly flew flown 80 | forbid forbade forbidden 81 | forecast forecast forecast 82 | forego forewent foregone 83 | foresee foresaw foreseen 84 | foretell foretold foretold 85 | forget forgot forgotten forgot 86 | forgive forgave forgiven 87 | forsake forsook forsaken 88 | freeze froze frozen 89 | frostbite frostbit frostbitten 90 | 91 | get got gotten got 92 | give gave given 93 | go went gone goes 94 | grind ground ground 95 | grow grew grown 96 | 97 | hand-feed hand-fed hand-fed 98 | handwrite handwrote handwritten 99 | hang hung hung 100 | have had had 101 | hear heard heard 102 | hew hewed hewn hewed 103 | hide hid hidden 104 | hit hit hit 105 | hold held held 106 | hurt hurt hurt 107 | 108 | inbreed inbred inbred 109 | inlay inlaid inlaid 110 | input input inputted input inputted 111 | interbreed interbred interbred 112 | interweave interwove interweaved interwoven interweaved 113 | interwind interwound interwound 114 | 115 | jerry-build jerry-built jerry-built 116 | 117 | keep kept kept 118 | kneel knelt kneeled knelt kneeled 119 | knit knitted knit knitted knit 120 | know knew known 121 | 122 | lay laid laid 123 | lead led led 124 | lean leaned leant leaned leant 125 | leap leaped leapt leaped leapt 126 | learn learned learnt learned learnt 127 | leave left left 128 | lend lent lent 129 | let let let 130 | lie lay lain 131 | lie lied lied 132 | light lit lighted lit lighted 133 | lip-read lip-read lip-read 134 | lose lost lost 135 | 136 | make made made 137 | mean meant meant 138 | meet met met 139 | miscast miscast miscast 140 | misdeal misdealt misdealt 141 | misdo misdid misdone 142 | mishear misheard misheard 143 | mislay mislaid mislaid 144 | mislead misled misled 145 | mislearn mislearned mislearnt mislearned mislearnt 146 | misread misread misread 147 | misset misset misset 148 | misspeak misspoke misspoken 149 | misspell misspelled misspelt misspelled misspelt 150 | misspend misspent misspent 151 | mistake mistook mistaken 152 | misteach mistaught mistaught 153 | misunderstand misunderstood misunderstood 154 | miswrite miswrote miswritten 155 | mow mowed mowed mown 156 | 157 | offset offset offset 158 | outbid outbid outbid 159 | outbreed outbred outbred 160 | outdo outdid outdone 161 | outdraw outdrew outdrawn 162 | outdrink outdrank outdrunk 163 | outdrive outdrove outdriven 164 | outfight outfought outfought 165 | outfly outflew outflown 166 | outgrow outgrew outgrown 167 | outleap outleaped outleapt outleaped outleapt 168 | outlie outlied outlied 169 | outride outrode outridden 170 | outrun outran outrun 171 | outsell outsold outsold 172 | outshine outshined outshone outshined outshone 173 | outshoot outshot outshot 174 | outsing outsang outsung 175 | outsit outsat outsat 176 | outsleep outslept outslept 177 | outsmell outsmelled outsmelt outsmelled outsmelt 178 | outspeak outspoke outspoken 179 | outspeed outsped outsped 180 | outspend outspent outspent 181 | outswear outswore outsworn 182 | outswim outswam outswum 183 | outthink outthought outthought 184 | outthrow outthrew outthrown 185 | outwrite outwrote outwritten 186 | overbid overbid overbid 187 | overbreed overbred overbred 188 | overbuild overbuilt overbuilt 189 | overbuy overbought overbought 190 | overcome overcame overcome 191 | overdo overdid overdone 192 | overdraw overdrew overdrawn 193 | overdrink overdrank overdrunk 194 | overeat overate overeaten 195 | overfeed overfed overfed 196 | overhang overhung overhung 197 | overhear overheard overheard 198 | overlay overlaid overlaid 199 | overpay overpaid overpaid 200 | override overrode overridden 201 | overrun overran overrun 202 | oversee oversaw overseen 203 | oversell oversold oversold 204 | oversew oversewed oversewn oversewed 205 | overshoot overshot overshot 206 | oversleep overslept overslept 207 | overspeak overspoke overspoken 208 | overspend overspent overspent 209 | overspill overspilled overspilt overspilled overspilt 210 | overtake overtook overtaken 211 | overthink overthought overthought 212 | overthrow overthrew overthrown 213 | overwind overwound overwound 214 | overwrite overwrote overwritten 215 | 216 | partake partook partaken 217 | pay paid paid 218 | plead pleaded pled pleaded pled 219 | prebuild prebuilt prebuilt 220 | predo predid predone 221 | premake premade premade 222 | prepay prepaid prepaid 223 | presell presold presold 224 | preset preset preset 225 | preshrink preshrank preshrunk 226 | proofread proofread proofread 227 | prove proved proven proved 228 | put put put 229 | 230 | quick-freeze quick-froze quick-frozen 231 | quit quit quitted quit quitted 232 | 233 | read read read 234 | reawake reawoke reawaken 235 | rebid rebid rebid 236 | rebind rebound rebound 237 | rebroadcast rebroadcast rebroadcasted rebroadcast rebroadcasted 238 | rebuild rebuilt rebuilt 239 | recast recast recast 240 | recut recut recut 241 | redeal redealt redealt 242 | redo redid redone 243 | redraw redrew redrawn 244 | refit refit refitted refit refitted 245 | refit refitted refit refitted refit 246 | regrind reground reground 247 | regrow regrew regrown 248 | rehang rehung rehung 249 | rehear reheard reheard 250 | reknit reknitted reknit reknitted reknit 251 | relay relaid relaid 252 | relay relayed relayed 253 | relearn relearned relearnt relearned relearnt 254 | relight relit relighted relit relighted 255 | remake remade remade 256 | repay repaid repaid 257 | reread reread reread 258 | rerun reran rerun 259 | resell resold resold 260 | resend resent resent 261 | reset reset reset 262 | resew resewed resewn resewed 263 | retake retook retaken 264 | reteach retaught retaught 265 | retear retore retorn 266 | retell retold retold 267 | rethink rethought rethought 268 | retread retread retread 269 | retrofit retrofitted retrofit retrofitted retrofit 270 | rewake rewoke rewaked rewaken rewaked 271 | rewear rewore reworn 272 | reweave rewove reweaved rewoven reweaved 273 | rewed rewed rewedded rewed rewedded 274 | rewet rewet rewetted rewet rewetted 275 | rewin rewon rewon 276 | rewind rewound rewound 277 | rewrite rewrote rewritten 278 | rid rid rid 279 | ride rode ridden 280 | ring rang rung 281 | rise rose risen 282 | roughcast roughcast roughcast 283 | run ran run 284 | 285 | sand-cast sand-cast sand-cast 286 | saw sawed sawed sawn 287 | say said said 288 | see saw seen 289 | seek sought sought 290 | sell sold sold 291 | send sent sent 292 | set set set 293 | sew sewed sewn sewed 294 | shake shook shaken 295 | shave shaved shaved shaven 296 | shear sheared sheared shorn 297 | shed shed shed 298 | shine shined shone shined shone 299 | shit shit shat shitted shit shat shitted 300 | shoot shot shot 301 | show showed shown showed 302 | shrink shrank shrunk shrunk 303 | shut shut shut 304 | sight-read sight-read sight-read 305 | sing sang sung 306 | sink sank sunk sunk 307 | sit sat sat 308 | slay slew slayed slain slayed 309 | slay slayed slayed 310 | sleep slept slept 311 | slide slid slid 312 | sling slung slung 313 | slink slinked slunk slinked slunk 314 | slit slit slit 315 | smell smelled smelt smelled smelt 316 | sneak sneaked snuck sneaked snuck 317 | sow sowed sown sowed 318 | speak spoke spoken 319 | speed sped speeded sped speeded 320 | spell spelled spelt spelled spelt 321 | spend spent spent 322 | spill spilled spilt spilled spilt 323 | spin spun spun 324 | spit spit spat spit spat 325 | split split split 326 | spoil spoiled spoilt spoiled spoilt 327 | spoon-feed spoon-fed spoon-fed 328 | spread spread spread 329 | spring sprang sprung sprung 330 | stand stood stood 331 | steal stole stolen 332 | stick stuck stuck 333 | sting stung stung 334 | stink stunk stank stunk 335 | strew strewed strewn strewed 336 | stride strode stridden 337 | strike struck stricken 338 | strike struck struck stricken 339 | string strung strung 340 | strive strove strived striven strived 341 | sublet sublet sublet 342 | sunburn sunburned sunburnt sunburned sunburnt 343 | swear swore sworn 344 | sweat sweat sweated sweat sweated 345 | sweep swept swept 346 | swell swelled swollen swelled 347 | swim swam swum 348 | swing swung swung 349 | 350 | take took taken 351 | teach taught taught 352 | tear tore torn 353 | telecast telecast telecast 354 | tell told told 355 | test-drive test-drove test-driven 356 | test-fly test-flew test-flown 357 | think thought thought 358 | throw threw thrown 359 | thrust thrust thrust 360 | tread trod trodden trod 361 | typecast typecast typecast 362 | typeset typeset typeset 363 | typewrite typewrote typewritten 364 | 365 | unbend unbent unbent 366 | unbind unbound unbound 367 | unclothe unclothed unclad unclothed unclad 368 | underbid underbid underbid 369 | undercut undercut undercut 370 | underfeed underfed underfed 371 | undergo underwent undergone 372 | underlie underlay underlain 373 | undersell undersold undersold 374 | underspend underspent underspent 375 | understand understood understood 376 | undertake undertook undertaken 377 | underwrite underwrote underwritten 378 | undo undid undone 379 | unfreeze unfroze unfrozen 380 | unhang unhung unhung 381 | unhide unhid unhidden 382 | unknit unknitted unknit unknitted unknit 383 | unlearn unlearned unlearnt unlearned unlearnt 384 | unsew unsewed unsewn unsewed 385 | unsling unslung unslung 386 | unspin unspun unspun 387 | unstick unstuck unstuck 388 | unstring unstrung unstrung 389 | unweave unwove unweaved unwoven unweaved 390 | unwind unwound unwound 391 | uphold upheld upheld 392 | upset upset upset 393 | wake woke waked woken waked 394 | waylay waylaid waylaid 395 | wear wore worn 396 | weave wove weaved woven weaved 397 | wed wed wedded wed wedded 398 | weep wept wept 399 | wet wet wetted wet wetted 400 | whet whetted whetted 401 | win won won 402 | wind wound wound 403 | withdraw withdrew withdrawn 404 | withhold withheld withheld 405 | withstand withstood withstood 406 | wring wrung wrung 407 | write wrote written --------------------------------------------------------------------------------