├── .gitignore ├── LICENSE ├── README.md ├── pom.xml └── src ├── main ├── resources │ └── mystem-sources.conf └── scala │ └── ru │ └── stachek66 │ ├── nlp │ └── mystem │ │ ├── CLIRunner.scala │ │ ├── Properties.scala │ │ ├── holding │ │ ├── Communication.scala │ │ └── Factory.scala │ │ ├── model │ │ ├── GrammarInfo.scala │ │ ├── GrammarInfoParts.scala │ │ └── Info.scala │ │ ├── package.scala │ │ └── parsing │ │ ├── GrammarInfoParsing.scala │ │ └── JsonRepresentationParser.scala │ └── tools │ ├── Decompressor.scala │ ├── Downloader.scala │ ├── TarGz.scala │ ├── Tools.scala │ ├── Zip.scala │ └── external │ ├── ExternalProcessServer.scala │ ├── FailSafeExternalProcessServer.scala │ └── SyncServer.scala └── test ├── resources ├── res.txt ├── test.tar ├── test.tar.gz ├── test.txt └── test.zip └── scala └── ru └── stachek66 ├── nlp └── mystem │ ├── Properties$Test.scala │ ├── holding │ └── Holder.scala │ ├── model │ └── GrammarMapBuilder$Test.scala │ └── parsing │ └── GrammarInfoParsing$Test.scala └── tools ├── Downloader$Test.scala ├── Exec$Test.scala ├── TarGz$Test.scala └── Zip$Test.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea 3 | target 4 | *.zip 5 | *.gz 6 | *~ 7 | *log 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Anton Alekseev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Scala wrapper for morphological analyzer Yandex.MyStem 2 | 3 | ## Introduction 4 | 5 | Details about the algorithm can be found in [I. Segalovich «A fast morphological algorithm with unknown word guessing induced by a dictionary for a web search engine», MLMTA-2003, Las Vegas, Nevada, USA.](http://download.yandex.ru/company/iseg-las-vegas.pdf) 6 | 7 | The wrapper's code in under MIT license, but please remember that Yandex.MyStem is not open source and licensed under [conditions of the Yandex License](https://legal.yandex.ru/mystem/). 8 | 9 | ## System Requirements 10 | 11 | The wrapper should at least work on Ubuntu Linux 12.04+, Windows 7+ (+ people say it also works on OS X). 12 | 13 | ## Install 14 | 15 | ### Maven 16 | 17 | [Maven central](http://search.maven.org/#artifactdetails|ru.stachek66.nlp|mystem-scala|0.1.4|jar) 18 | 19 | ```xml 20 | 21 | ru.stachek66.nlp 22 | mystem-scala 23 | 0.1.6 24 | 25 | ``` 26 | 27 | ## Issues 28 | 29 | Only mystem 3.{0,1} are supported currently. 30 | Please [create issues for compatibility troubles and other requests.](https://github.com/alexeyev/mystem-scala/issues) 31 | 32 | ## Examples 33 | 34 | Probably the most important thing to remember when working with mystem-scala is 35 | that you should have just one MyStem instance per mystem/mystem.exe file in your application. 36 | 37 | ### Scala 38 | 39 | ```scala 40 | import java.io.File 41 | 42 | import ru.stachek66.nlp.mystem.holding.{Factory, MyStem, Request} 43 | 44 | object MystemSingletonScala { 45 | 46 | val mystemAnalyzer: MyStem = 47 | new Factory("-igd --eng-gr --format json --weight") 48 | .newMyStem( 49 | "3.0", 50 | Option(new File("/home/coolguy/coolproject/3dparty/mystem"))).get() 51 | } 52 | 53 | object AppExampleScala extends App { 54 | 55 | MystemSingletonScala 56 | .mystemAnalyzer 57 | .analyze(Request("Есть большие пассажиры мандариновой травы")) 58 | .info 59 | .foreach(info => println(info.initial + " -> " + info.lex)) 60 | } 61 | ``` 62 | 63 | ### Java 64 | 65 | ```java 66 | import ru.stachek66.nlp.mystem.holding.Factory; 67 | import ru.stachek66.nlp.mystem.holding.MyStem; 68 | import ru.stachek66.nlp.mystem.holding.MyStemApplicationException; 69 | import ru.stachek66.nlp.mystem.holding.Request; 70 | import ru.stachek66.nlp.mystem.model.Info; 71 | import scala.Option; 72 | import scala.collection.JavaConversions; 73 | 74 | import java.io.File; 75 | 76 | public class MyStemJavaExample { 77 | 78 | private final static MyStem mystemAnalyzer = 79 | new Factory("-igd --eng-gr --format json --weight") 80 | .newMyStem("3.0", Option.empty()).get(); 81 | 82 | public static void main(final String[] args) throws MyStemApplicationException { 83 | 84 | final Iterable result = 85 | JavaConversions.asJavaIterable( 86 | mystemAnalyzer 87 | .analyze(Request.apply("И вырвал грешный мой язык")) 88 | .info() 89 | .toIterable()); 90 | 91 | for (final Info info : result) { 92 | System.out.println(info.initial() + " -> " + info.lex() + " | " + info.rawResponse()); 93 | } 94 | } 95 | } 96 | ``` 97 | ## How to Cite 98 | 99 | The references to this repository are highly appreciated, if you use our work. 100 | 101 | ```bibtex 102 | @misc{alekseev2018mystemscala, 103 | author = {Anton Alekseev}, 104 | title = {mystem-scala}, 105 | year = {2018}, 106 | publisher = {GitHub}, 107 | journal = {GitHub repository}, 108 | howpublished = {\url{https://github.com/alexeyev/mystem-scala/}}, 109 | commit = {the latest commit of the codebase you have used} 110 | } 111 | ``` 112 | 113 | If you do cite it, please do not forget to cite [the original algorithm's author's paper](http://download.yandex.ru/company/iseg-las-vegas.pdf) as well. 114 | 115 | ## Contacts 116 | 117 | Anton Alekseev 118 | 119 | ## Thanks for reviews, reports and contributions 120 | 121 | * Vladislav Dolbilov, @darl 122 | * Mikhail Malchevsky 123 | * @anton-shirikov 124 | * Filipp Malkovsky 125 | * @dizzy7 126 | 127 | ## Also please see 128 | 129 | * https://tech.yandex.ru/mystem/ 130 | * https://nlpub.ru/Mystem 131 | * https://github.com/Digsolab/pymystem3 132 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | ru.stachek66.nlp 8 | mystem-scala 9 | 0.2.1 10 | jar 11 | 12 | ${project.groupId}:${project.artifactId} 13 | A Scala wrapper for morphological analyzer Yandex.MyStem 14 | https://github.com/alexeyev/mystem-scala 15 | 16 | 17 | 18 | MIT License 19 | http://www.opensource.org/licenses/mit-license.php 20 | 21 | 22 | 23 | 24 | 25 | Anton Alekseev 26 | anton.m.alexeyev@gmail.com 27 | https://alexeyev.github.io/ 28 | 29 | 30 | 31 | 32 | scm:git:git@github.com:alexeyev/mystem-scala.git 33 | scm:git:git@github.com:alexeyev/mystem-scala.git 34 | git@github.com:alexeyev/mystem-scala.git 35 | 36 | 37 | 38 | 3.0.2 39 | 40 | 41 | 42 | 2.13 43 | ${scala.base.version}.4 44 | 3.0.9 45 | 1.7.32 46 | UTF-8 47 | 1.2.13 48 | 4.5.6 49 | 1.2.1 50 | 2.15.1 51 | 4.13.1 52 | 2.7 53 | 1.4.11 54 | 1.26.0 55 | 56 | 57 | 58 | 59 | 60 | org.json 61 | json 62 | 20231013 63 | 64 | 65 | 66 | ch.qos.logback 67 | logback-classic 68 | ${logback.version} 69 | 70 | 71 | 72 | org.slf4j 73 | slf4j-api 74 | ${slf4j.version} 75 | 76 | 77 | 78 | org.scala-lang 79 | scala-library 80 | ${scala.version} 81 | 82 | 83 | 84 | org.apache.commons 85 | commons-compress 86 | ${compress.version} 87 | 88 | 89 | 90 | org.scalatest 91 | scalatest_${scala.base.version} 92 | ${scalatest.version} 93 | test 94 | 95 | 96 | org.scala-lang 97 | scala-library 98 | 99 | 100 | 101 | 102 | 103 | com.typesafe 104 | config 105 | ${typesafe.config.version} 106 | 107 | 108 | 109 | commons-io 110 | commons-io 111 | ${commons.version} 112 | 113 | 114 | 115 | junit 116 | junit 117 | ${junit.version} 118 | test 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | org.scoverage 128 | scoverage-maven-plugin 129 | ${scoverage.plugin.version} 130 | 131 | 132 | 133 | 134 | 135 | 136 | org.apache.maven.plugins 137 | maven-release-plugin 138 | 2.1 139 | 140 | 141 | org.apache.maven.plugins 142 | maven-resources-plugin 143 | 2.5 144 | 145 | 146 | net.alchim31.maven 147 | scala-maven-plugin 148 | ${scala.plugin.version} 149 | 150 | 151 | 152 | compile 153 | testCompile 154 | 155 | 156 | 157 | 158 | 159 | maven-compiler-plugin 160 | 2.3.2 161 | 162 | 1.8 163 | 1.8 164 | 165 | 166 | 167 | org.apache.maven.plugins 168 | maven-enforcer-plugin 169 | 1.4 170 | 171 | 172 | enforce-em-all 173 | package 174 | 175 | 176 | 177 | 178 | 179 | 180 | enforce 181 | 182 | 183 | 184 | 185 | 186 | org.scalatest 187 | scalatest-maven-plugin 188 | 1.0 189 | 190 | ${project.build.directory}/surefire-reports 191 | W 192 | 193 | 194 | 195 | scala-test 196 | 197 | test 198 | 199 | 200 | 201 | 202 | 203 | org.scoverage 204 | scoverage-maven-plugin 205 | 206 | 30 207 | true 208 | 209 | 210 | 211 | coverage 212 | test 213 | 214 | check 215 | 216 | 217 | 218 | 219 | 220 | org.apache.maven.plugins 221 | maven-source-plugin 222 | 3.2.1 223 | 224 | 225 | attach-sources 226 | 227 | jar 228 | 229 | 230 | 231 | 232 | 233 | org.apache.maven.plugins 234 | maven-javadoc-plugin 235 | 3.2.0 236 | 237 | ${project.build.directory} 238 | ${project.reporting.outputDirectory} 239 | 240 | 241 | 242 | attach-javadocs 243 | package 244 | 245 | jar 246 | 247 | 248 | 249 | 250 | 251 | org.apache.maven.plugins 252 | maven-gpg-plugin 253 | 1.6 254 | 255 | 256 | sign-artifacts 257 | verify 258 | 259 | sign 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | osssh 275 | https://oss.sonatype.org/content/repositories/snapshots 276 | 277 | 278 | ossr 279 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 280 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /src/main/resources/mystem-sources.conf: -------------------------------------------------------------------------------- 1 | version { 2 | 3 | 3.1 { 4 | win64 = "http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip" 5 | linux64 = "http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz" 6 | osx = "http://download.cdn.yandex.net/mystem/mystem-3.1-macosx.tar.gz" 7 | } 8 | 9 | 3.0 { 10 | win32 = "http://download.cdn.yandex.net/mystem/mystem-3.0-win7-32bit.zip" 11 | win64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-win7-64bit.zip" 12 | linux32 = "http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.5-32bit.tar.gz" 13 | linux64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz" 14 | freebsd64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-freebsd9.0-64bit.tar.gz" 15 | osx = "http://download.cdn.yandex.net/mystem/mystem-3.0-macosx10.8.tar.gz" 16 | } 17 | 18 | 2.1 { 19 | 20 | } 21 | 22 | 2.0 { 23 | 24 | } 25 | 26 | 1.0 { 27 | 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/CLIRunner.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem 2 | 3 | import org.slf4j._ 4 | 5 | /** 6 | * alexeyev 7 | * 31.08.14. 8 | */ 9 | object CLIRunner extends App { 10 | 11 | val log: Logger = LoggerFactory.getLogger(getClass) 12 | 13 | log.info("*************************************************************") 14 | log.info("Hello, this is MyStem-for-jvm-wrapper library, it has no CLI.") 15 | log.info("Please see \n* http://api.yandex.ru/mystem/doc/\n* https://github.com/alexeyev/mystem-scala ") 16 | 17 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/Properties.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem 2 | 3 | import java.net.URL 4 | 5 | import com.typesafe.config.ConfigFactory 6 | import org.slf4j.LoggerFactory 7 | 8 | /** 9 | * Various configs for interaction with outer world 10 | * alexeyev 11 | * 31.08.14. 12 | */ 13 | object Properties { 14 | 15 | private val log = LoggerFactory.getLogger(getClass) 16 | 17 | val BinDestination = System.getProperty("user.home") + "/.local/bin/" 18 | 19 | private val systemOsName = System.getProperty("os.name") 20 | private val systemOsArchitecture = System.getProperty("os.arch") 21 | val CurrentOs: String = os(systemOsName, systemOsArchitecture) 22 | 23 | log.debug(s"OS detected: $CurrentOs, system properties: $systemOsName | $systemOsArchitecture ") 24 | 25 | val BIN_FILE_NAME: String = CurrentOs match { 26 | case name if name.startsWith("win") => "mystem.exe" 27 | case name => "mystem" 28 | } 29 | 30 | private lazy val rootProp = ConfigFactory.load("mystem-sources.conf") 31 | private lazy val version = rootProp.getConfig("version") 32 | 33 | private val versionPattern = "\\d+\\.\\d+".r.pattern 34 | 35 | private def doOrDie[T](action: => T, message: String = "Unknown error"): T = 36 | try action 37 | catch { 38 | case e: Throwable => throw new Exception(message) 39 | } 40 | 41 | @throws(classOf[Exception]) 42 | def getUrl(versionRaw: String, os: String = CurrentOs): URL = { 43 | 44 | require(versionPattern.matcher(versionRaw).matches, 45 | "Troubles with version name, should match pattern .") 46 | 47 | val versionProps = 48 | doOrDie( 49 | version.getConfig(versionRaw), 50 | s"No binaries sources for version [$versionRaw] found") 51 | 52 | val url = 53 | doOrDie( 54 | versionProps.getString(os), 55 | s"Version number is correct, no binaries sources for OS [$os] found") 56 | 57 | doOrDie( 58 | new URL(url), 59 | s"URL configs troubles. If you see this message, please email anton.m.alexeyev@gmail.com") 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/holding/Communication.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.holding 2 | 3 | import ru.stachek66.nlp.mystem.model.Info 4 | import ru.stachek66.nlp.mystem.parsing.JsonRepresentationParser 5 | import ru.stachek66.tools.external.FailSafeExternalProcessServer 6 | 7 | import scala.util.{Failure, Success} 8 | 9 | /** 10 | * alexeyev 11 | * 16.10.14. 12 | */ 13 | case class Request(text: String) 14 | 15 | case class Response(info: Traversable[Info]) 16 | 17 | trait MyStem { 18 | 19 | def normalize(text: String): String = text.replaceAll("\n", " ") 20 | 21 | @throws(classOf[MyStemApplicationException]) 22 | def analyze(request: Request): Response 23 | } 24 | 25 | class MyStemApplicationException(e: Throwable) extends java.lang.Exception 26 | 27 | // We need this because mystem.v < 3.0 doesn't support json AFAIK 28 | class MyStem3 private[holding](s: FailSafeExternalProcessServer) extends MyStem { 29 | 30 | @throws(classOf[MyStemApplicationException]) 31 | override def analyze(request: Request): Response = { 32 | s.syncRequest(normalize(request.text)) match { 33 | case Failure(e) => throw new MyStemApplicationException(e) 34 | case Success(json) => Response(JsonRepresentationParser.toInfo(json)) 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/holding/Factory.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.holding 2 | 3 | import java.io.{File, IOException} 4 | import java.nio.file.Files 5 | import java.nio.file.attribute.PosixFilePermissions 6 | 7 | import org.slf4j.LoggerFactory 8 | import ru.stachek66.tools.external.FailSafeExternalProcessServer 9 | import ru.stachek66.tools.{Decompressor, Downloader, Tools} 10 | 11 | import scala.concurrent.duration._ 12 | import scala.sys.process._ 13 | import scala.util.Try 14 | 15 | /** 16 | * Provides fresh mystem binaries; a factory 17 | * alexeyev 18 | * 31.08.14. 19 | */ 20 | class Factory(parsingOptions: String = "-igd --eng-gr --format json --weight") { 21 | 22 | import ru.stachek66.nlp.mystem.Properties._ 23 | 24 | private val log = LoggerFactory.getLogger(getClass) 25 | 26 | /** 27 | * Creates a new instance of mystem server 28 | * Uses .local if customExecutable was not set 29 | */ 30 | def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try { 31 | 32 | val ex = customExecutable match { 33 | case Some(exe) => exe 34 | case None => getExecutable(version) 35 | } 36 | 37 | version match { 38 | case "3.0" | "3.1" => 39 | new MyStem3( 40 | new FailSafeExternalProcessServer( 41 | ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else ""))) 42 | case _ => throw new NotImplementedError() 43 | } 44 | } 45 | 46 | @throws(classOf[Exception]) 47 | private[holding] def getExecutable(version: String): File = { 48 | 49 | val destFile = new File(BinDestination + BIN_FILE_NAME) 50 | val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}") 51 | 52 | if (destFile.exists) { 53 | 54 | log.info("Old executable file found") 55 | 56 | try { 57 | val suggestedVersion = (destFile.getAbsolutePath + " -v").!! 58 | 59 | log.info("Version | " + suggestedVersion) 60 | // not scala-way stuff 61 | if (suggestedVersion.contains(version)) 62 | destFile 63 | else 64 | throw new Exception("Wrong version!") 65 | } catch { 66 | case e: Exception => 67 | log.warn("Removing old binary files...", e) 68 | destFile.delete 69 | getExecutable(version) 70 | } 71 | } else Tools.withAttempt(10, 1.second) { 72 | try { 73 | Decompressor.select.unpack( 74 | Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile) 75 | } finally { 76 | tempFile.delete() 77 | try { 78 | Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile 79 | } catch { 80 | case ioe: IOException => 81 | log.warn("Can't set POSIX permissions to file " + destFile.toPath) 82 | destFile 83 | } 84 | } 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/model/GrammarInfo.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.model 2 | 3 | /** 4 | * alexeyev 5 | * 01.09.14. 6 | */ 7 | case class GrammarInfo(pos: Set[POS.Value] = Set.empty, 8 | tense: Set[Tense.Value] = Set.empty, 9 | `case`: Set[Case.Value] = Set.empty, 10 | number: Set[Number.Value] = Set.empty, 11 | verbFormInfo: Set[VerbForms.Value] = Set.empty[VerbForms.Value], 12 | adjFormInfo: Set[AdjectiveForms.Value] = Set.empty[AdjectiveForms.Value], 13 | gender: Set[Gender.Value] = Set.empty, 14 | aspect: Set[Aspect.Value] = Set.empty, 15 | voice: Set[Voice.Value] = Set.empty, 16 | animacy: Set[Animacy.Value] = Set.empty, 17 | other: Set[Other.Value] = Set.empty[Other.Value]) 18 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/model/GrammarInfoParts.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.model 2 | 3 | /** 4 | * alexeyev 5 | * 31.08.14. 6 | */ 7 | object GrammarMapBuilder { 8 | 9 | //todo: make sure everything is covered 10 | 11 | lazy val tagToEnumMap: Map[String, Enumeration] = 12 | (tagToEnum(POS) ++ tagToEnum(Tense) ++ tagToEnum(Animacy) ++ 13 | tagToEnum(Aspect) ++ tagToEnum(VerbForms) ++ tagToEnum(Gender) ++ 14 | tagToEnum(Number) ++ tagToEnum(Voice) ++ tagToEnum(Other) ++ 15 | tagToEnum(AdjectiveForms) ++ tagToEnum(Person) ++ tagToEnum(Case) 16 | ).toMap 17 | 18 | private def tagToEnum(enum: Enumeration): Set[(String, Enumeration)] = enum.values.unsorted.map(value => value.toString -> enum) 19 | } 20 | 21 | object POS extends Enumeration { 22 | val A = Value("A") 23 | val ADV = Value("ADV") 24 | val CONJ = Value("CONJ") 25 | val INTJ = Value("INTJ") 26 | val NUM = Value("NUM") 27 | val PART = Value("PART") 28 | val PR = Value("PR") 29 | val S = Value("S") 30 | val V = Value("V") 31 | } 32 | 33 | object Tense extends Enumeration { 34 | val present = Value("praes") 35 | val inpraes = Value("inpraes") 36 | val past = Value("past") 37 | } 38 | 39 | object Case extends Enumeration { 40 | val nominative = Value("nom") 41 | val genitive = Value("gen") 42 | val dative = Value("dat") 43 | val accusative = Value("acc") 44 | val vocative = Value("voc") 45 | val instrumental = Value("ins") 46 | 47 | val ablative = Value("abl") 48 | val locative = Value("loc") 49 | val partitive = Value("part") 50 | } 51 | 52 | object Number extends Enumeration { 53 | val plural = Value("pl") 54 | val singular = Value("sg") 55 | } 56 | 57 | object VerbForms extends Enumeration { 58 | val transgressive = Value("ger") 59 | val infinitive = Value("inf") 60 | val participle = Value("partcp") 61 | 62 | val indicativeMood = Value("ind") 63 | val imperativeMood = Value("imper") 64 | 65 | val transitive = Value("tran") 66 | val intransitive = Value("intr") 67 | } 68 | 69 | object AdjectiveForms extends Enumeration { 70 | val brev = Value("brev") 71 | val plen = Value("plen") 72 | val possessive = Value("poss") 73 | val supreme = Value("supr") 74 | val comparative = Value("comp") 75 | } 76 | 77 | object Person extends Enumeration { 78 | val p1 = Value("1p") 79 | val p2 = Value("2p") 80 | val p3 = Value("3p") 81 | } 82 | 83 | object Gender extends Enumeration { 84 | val feminine = Value("f") 85 | val masculine = Value("m") 86 | val neuter = Value("n") 87 | } 88 | 89 | object Aspect extends Enumeration { 90 | val perfective = Value("pf") 91 | val imperfective = Value("ipf") 92 | } 93 | 94 | object Voice extends Enumeration { 95 | val active = Value("act") 96 | val passive = Value("pass") 97 | } 98 | 99 | object Animacy extends Enumeration { 100 | val animate = Value("anim") 101 | val inanimate = Value("inan") 102 | } 103 | 104 | object Other extends Enumeration { 105 | // вводное слово 106 | val parenth = Value("parenth") 107 | val geo = Value("geo") 108 | // образование формы затруднено 109 | val awkward = Value("awkw") 110 | val personal = Value("persn") 111 | val distorted = Value("dist") 112 | // общая форма мужского и женского рода 113 | val mf = Value("mf") 114 | val obscene = Value("obsc") 115 | val patrn = Value("patrn") 116 | // предикатив 117 | val praedicative = Value("praed") 118 | // разговорная форма 119 | val informal = Value("inform") 120 | val rare = Value("rare") 121 | val abbr = Value("abbr") 122 | val obsolete = Value("obsol") 123 | val familyName = Value("famn") 124 | } 125 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/model/Info.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.model 2 | 3 | /** 4 | * alexeyev 5 | * 31.08.14. 6 | */ 7 | case class Info(initial: String, lex: Option[String], rawResponse: String) -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/package.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp 2 | 3 | import org.slf4j.LoggerFactory 4 | 5 | /** 6 | * alexeyev 7 | * 11.09.14. 8 | */ 9 | package object mystem { 10 | 11 | private val log = LoggerFactory.getLogger(getClass) 12 | 13 | val os: Map[(String, String), String] = Map( 14 | ("Linux", "x86_64") -> "linux64", 15 | ("Linux", "amd64") -> "linux64", 16 | ("Linux", "x86") -> "linux32", 17 | ("Windows7", "x86") -> "win32", 18 | ("Windows7", "x86_64") -> "win64" 19 | ) withDefault { 20 | _ => 21 | log.warn("Getting OSX binaries!") 22 | "osx" 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/parsing/GrammarInfoParsing.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.parsing 2 | 3 | import ru.stachek66.nlp.mystem.model._ 4 | 5 | /** 6 | * alexeyev 7 | * 31.08.14. 8 | */ 9 | object GrammarInfoParsing { 10 | 11 | /** 12 | * Grammar info parsing. 13 | */ 14 | def toGrammarInfo(commaSeparatedTags: String): GrammarInfo = { 15 | 16 | val mappedEnums = 17 | (commaSeparatedTags 18 | .split("[,=]") 19 | .map { 20 | case name: String => 21 | val obj: Enumeration = GrammarMapBuilder.tagToEnumMap(name) 22 | (obj, obj.withName(name)) 23 | } groupBy { 24 | case (obj: Enumeration, _) => obj 25 | } mapValues { 26 | case array => array.map(_._2) 27 | }).toMap 28 | 29 | def findByEnum[T <: scala.Enumeration](enum: T): Set[T#Value] = 30 | mappedEnums 31 | .get(enum) 32 | .map(_.map(_.asInstanceOf[T#Value]).toSet) 33 | .getOrElse(Set.empty[T#Value]) 34 | 35 | GrammarInfo( 36 | pos = findByEnum(POS), 37 | tense = findByEnum(Tense), 38 | `case` = findByEnum(Case), 39 | number = findByEnum(Number), 40 | verbFormInfo = findByEnum(VerbForms), 41 | adjFormInfo = findByEnum(AdjectiveForms), 42 | gender = findByEnum(Gender), 43 | aspect = findByEnum(Aspect), 44 | voice = findByEnum(Voice), 45 | animacy = findByEnum(Animacy), 46 | other = findByEnum(Other) 47 | ) 48 | } 49 | 50 | def toStringRepresentation(gi: GrammarInfo): String = 51 | (gi.`case` ++ gi.adjFormInfo ++ gi.animacy ++ gi.aspect ++ gi.gender ++ 52 | gi.number ++ gi.pos ++ gi.other ++ gi.tense ++ gi.verbFormInfo ++ gi.voice).mkString(",") 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/nlp/mystem/parsing/JsonRepresentationParser.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.parsing 2 | 3 | import org.json.JSONArray 4 | import ru.stachek66.nlp.mystem.model.Info 5 | 6 | /** 7 | * alexeyev 8 | * 31.08.14. 9 | */ 10 | object JsonRepresentationParser { 11 | 12 | def toInfo(json: String): Traversable[Info] = toInfo(new JSONArray(json)) 13 | 14 | private def toInfo(json: JSONArray): Traversable[Info] = { 15 | 16 | //todo: fix and enable GrammarInfo parsing 17 | 18 | val stuff: Traversable[Info] = 19 | for (i <- 0 until json.length) 20 | yield { 21 | val item = json.getJSONObject(i) 22 | val initial = item.getString("text") 23 | 24 | if (item.has("analysis")) { 25 | val analysis = item.getJSONArray("analysis") 26 | 27 | if (analysis.length() == 0) 28 | Info(initial, None, item.toString) 29 | else { 30 | val result = 31 | for (j <- 0 until analysis.length) 32 | yield { 33 | val anItem = analysis.getJSONObject(j) 34 | new Info(initial, Option(anItem.getString("lex")), item.toString) 35 | } 36 | result.head 37 | } 38 | } else { 39 | Info(initial, None, item.toString) 40 | } 41 | 42 | } 43 | stuff 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/Decompressor.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.{IOException, File, FileOutputStream} 4 | 5 | import org.apache.commons.compress.archivers.ArchiveInputStream 6 | import org.apache.commons.compress.archivers.ArchiveEntry 7 | import org.apache.commons.io.IOUtils 8 | import ru.stachek66.nlp.mystem.Properties 9 | 10 | /** 11 | * alexeyev 12 | * 11.09.14. 13 | */ 14 | trait Decompressor { 15 | 16 | def traditionalExtension: String 17 | 18 | def unpack(src: File, dst: File): File 19 | 20 | @throws(classOf[IOException]) 21 | private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream[_ <: ArchiveEntry], dest: File): File = { 22 | 23 | // must be read 24 | val entry = stream.getNextEntry 25 | if (entry.isDirectory) 26 | throw new IOException("Decompressed entry is a directory (unexpectedly)") 27 | 28 | val os = new FileOutputStream(dest) 29 | 30 | try { 31 | IOUtils.copy(stream, os) 32 | } finally { 33 | os.close() 34 | stream.close() 35 | } 36 | dest 37 | } 38 | } 39 | 40 | object Decompressor { 41 | def select: Decompressor = 42 | if (Properties.CurrentOs.contains("win")) Zip else TarGz 43 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/Downloader.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.File 4 | import java.net.URL 5 | 6 | import org.apache.commons.io.FileUtils 7 | import org.slf4j.LoggerFactory 8 | 9 | /** 10 | * alexeyev 11 | * 31.08.14. 12 | */ 13 | object Downloader { 14 | 15 | private val log = LoggerFactory.getLogger(getClass) 16 | 17 | def downloadBinaryFile(url: URL, destination: File) = { 18 | log.debug(s"Getting binaries from $url, writing to $destination ") 19 | 20 | if (!destination.getAbsoluteFile.getParentFile.mkdirs && !destination.getAbsoluteFile.getParentFile.exists) 21 | throw new Exception("Could not create directory: " + destination.getParentFile) 22 | 23 | FileUtils.copyURLToFile(url, destination) 24 | log.debug("Downloading binaries done.") 25 | destination 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/TarGz.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.{IOException, File, FileInputStream} 4 | import java.util.zip.GZIPInputStream 5 | 6 | import org.apache.commons.compress.archivers.tar.TarArchiveInputStream 7 | import org.slf4j.LoggerFactory 8 | 9 | import scala.util.Try 10 | 11 | /** 12 | * *.tar.gz files decompression tool 13 | * alexeyev 14 | * 31.08.14. 15 | */ 16 | private object TarGz extends Decompressor { 17 | 18 | private val log = LoggerFactory.getLogger(getClass) 19 | 20 | def traditionalExtension: String = "tar.gz" 21 | 22 | /** 23 | * Untars -single- file 24 | */ 25 | @throws(classOf[IOException]) 26 | def unpack(src: File, dst: File): File = { 27 | 28 | log.debug(s"Unpacking $src to $dst...") 29 | 30 | val tarIn = 31 | new TarArchiveInputStream( 32 | new GZIPInputStream( 33 | new FileInputStream(src))) 34 | 35 | val result = copyUncompressedAndClose(tarIn, dst) 36 | log.debug(s"Done.") 37 | result 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/Tools.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import org.slf4j.LoggerFactory 4 | 5 | import scala.concurrent.duration._ 6 | 7 | /** 8 | * alexeyev 9 | * 02.09.14. 10 | */ 11 | object Tools { 12 | 13 | private val log = LoggerFactory.getLogger(getClass) 14 | 15 | @throws(classOf[Exception]) 16 | def withAttempt[T](n: Int, timeout: Duration = 0.millis)(action: => T): T = try { 17 | action 18 | } catch { 19 | case e: Exception if n > 1 => 20 | log.warn(s"${n - 1} attempts left", e) 21 | Thread.sleep(timeout.toMillis) 22 | withAttempt(n - 1)(action) 23 | case e: Exception => 24 | throw new Exception("No attempts left", e) 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/Zip.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.{IOException, BufferedInputStream, File, FileInputStream} 4 | 5 | import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream 6 | import org.slf4j.LoggerFactory 7 | 8 | /** 9 | * *.zip decompressor tool 10 | * alexeyev 11 | * 31.08.14. 12 | */ 13 | private object Zip extends Decompressor { 14 | 15 | private val log = LoggerFactory.getLogger(getClass) 16 | 17 | def traditionalExtension: String = "zip" 18 | 19 | /** 20 | * Unzips single file 21 | */ 22 | @throws(classOf[IOException]) 23 | def unpack(src: File, dst: File): File = { 24 | 25 | log.debug(s"Unpacking $src to $dst...") 26 | 27 | val zipIn = new ZipArchiveInputStream( 28 | new BufferedInputStream( 29 | new FileInputStream(src))) 30 | 31 | val res = copyUncompressedAndClose(zipIn, dst) 32 | 33 | log.debug("Done.") 34 | 35 | res 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/external/ExternalProcessServer.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools.external 2 | 3 | import java.io.{BufferedReader, BufferedWriter, OutputStreamWriter} 4 | import java.nio.charset.Charset 5 | 6 | import org.slf4j.LoggerFactory 7 | 8 | import scala.util._ 9 | 10 | 11 | /** 12 | * Please be careful when using! No death handling. 13 | * alexeyev 14 | * 13.09.14. 15 | */ 16 | private[external] class ExternalProcessServer(starterCommand: String) extends SyncServer { 17 | 18 | private val log = LoggerFactory.getLogger(getClass) 19 | 20 | private val p = Runtime.getRuntime.exec(starterCommand) 21 | private val (in, out, err) = (p.getInputStream, p.getOutputStream, p.getErrorStream) 22 | 23 | private val writer = new BufferedWriter(new OutputStreamWriter(out, Charset.forName("utf-8")), 1) 24 | private val reader = io.Source.fromInputStream(in).reader() 25 | private val bufferedReader = new BufferedReader(reader) 26 | 27 | def syncRequest(request: String): Try[String] = Try { 28 | 29 | writer.write(request) 30 | writer.newLine() 31 | writer.flush() 32 | 33 | while (!bufferedReader.ready()) {} 34 | 35 | val builder = new StringBuilder() 36 | while (bufferedReader.ready) builder.append(bufferedReader.readLine()) 37 | builder.toString() 38 | } 39 | 40 | def isAlive: Boolean = { 41 | Try(p.exitValue()) match { 42 | case Success(_) => false 43 | case Failure(e: IllegalThreadStateException) => true 44 | case Failure(e) => throw new RuntimeException(e) // unknown exception 45 | } 46 | } 47 | 48 | def kill() { 49 | p.destroy() 50 | } 51 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/external/FailSafeExternalProcessServer.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools.external 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | 5 | import ru.stachek66.tools.Tools 6 | 7 | import scala.util.Try 8 | 9 | /** 10 | * alexeyev 11 | * 16.10.14. 12 | */ 13 | class FailSafeExternalProcessServer(starterCommand: String, attempts: Int = 30) extends SyncServer { 14 | 15 | private val ps = new AtomicReference[ExternalProcessServer](new ExternalProcessServer(starterCommand)) 16 | 17 | override def syncRequest(request: String): Try[String] = this.synchronized { 18 | Tools.withAttempt(attempts) { 19 | if (!ps.get.isAlive) ps.set(new ExternalProcessServer(starterCommand)) 20 | ps.get.syncRequest(request) 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /src/main/scala/ru/stachek66/tools/external/SyncServer.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools.external 2 | 3 | import scala.util.Try 4 | 5 | /** 6 | * alexeyev 7 | * 16.10.14. 8 | */ 9 | trait SyncServer { 10 | 11 | /** 12 | * You give it a string, and you get either response string or nothing. 13 | */ 14 | def syncRequest(request: String): Try[String] 15 | 16 | } -------------------------------------------------------------------------------- /src/test/resources/res.txt: -------------------------------------------------------------------------------- 1 | Прозрачные зелёные идеи живут и побеждают! 2 | -------------------------------------------------------------------------------- /src/test/resources/test.tar: -------------------------------------------------------------------------------- 1 | test.txt0000664000175000017500000000011712404154521013023 0ustar alexeyevalexeyevПрозрачные зелёные идеи живут и побеждают! 2 | -------------------------------------------------------------------------------- /src/test/resources/test.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexeyev/mystem-scala/9e089eb0ae0c6adf91aff2167c553a74a140bf87/src/test/resources/test.tar.gz -------------------------------------------------------------------------------- /src/test/resources/test.txt: -------------------------------------------------------------------------------- 1 | Прозрачные зелёные идеи живут и побеждают! 2 | -------------------------------------------------------------------------------- /src/test/resources/test.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexeyev/mystem-scala/9e089eb0ae0c6adf91aff2167c553a74a140bf87/src/test/resources/test.zip -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/nlp/mystem/Properties$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem 2 | 3 | import java.net.URL 4 | 5 | import org.junit.runner.RunWith 6 | import org.scalatest.FunSuite 7 | import org.scalatest.junit.JUnitRunner 8 | 9 | /** 10 | * alexeyev 11 | * 31.08.14. 12 | */ 13 | class Properties$Test extends FunSuite { 14 | 15 | test("getting-download-url") { 16 | 17 | assert(Properties.getUrl("3.0", "win32") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-win7-32bit.zip")) 18 | assert(Properties.getUrl("3.0", "linux64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz")) 19 | assert(Properties.getUrl("3.1", "win64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip")) 20 | assert(Properties.getUrl("3.1", "linux64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz")) 21 | 22 | } 23 | } -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/nlp/mystem/holding/Holder.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.holding 2 | 3 | /** 4 | * alexeyev 5 | * 12.09.14. 6 | */ 7 | object HolderApp extends App { 8 | 9 | val h = new Factory() 10 | println("holder ready") 11 | val p = h.newMyStem("3.0").get 12 | println("raw process created") 13 | 14 | while (true) { 15 | println("asking") 16 | println(p.analyze(Request("леново"))) 17 | println("answer printed") 18 | Thread.sleep(math.round(math.random * 10000)) 19 | } 20 | } -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/nlp/mystem/model/GrammarMapBuilder$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.model 2 | 3 | import org.junit.runner.RunWith 4 | import org.scalatest.FunSuite 5 | import org.scalatest.junit.JUnitRunner 6 | 7 | /** 8 | * alexeyev 9 | * 16.09.14. 10 | */ 11 | class GrammarMapBuilder$Test extends FunSuite { 12 | 13 | test("grammar") { 14 | println(GrammarMapBuilder.tagToEnumMap("ADV").withName("ADV")) 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/nlp/mystem/parsing/GrammarInfoParsing$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.nlp.mystem.parsing 2 | 3 | import org.scalatest.FunSuite 4 | 5 | /** 6 | * alexeyev 7 | * 01.09.14. 8 | */ 9 | class GrammarInfoParsing$Test extends FunSuite { 10 | 11 | //todo 12 | // test("g-i-p-t-королевский") { 13 | // val testString = "A=acc,sg,plen,m,inan" 14 | // import ru.stachek66.nlp.mystem.parsing.GrammarInfoParsing._ 15 | // assert(toGrammarInfo(testString) === toGrammarInfo(toStringRepresentation(toGrammarInfo(testString)))) 16 | // } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/tools/Downloader$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.File 4 | import java.net.URL 5 | 6 | import org.junit.runner.RunWith 7 | import org.scalatest.{Ignore, FunSuite} 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | /** 11 | * alexeyev 12 | * 07.09.14. 13 | */ 14 | @Ignore 15 | class Downloader$Test extends FunSuite { 16 | 17 | test("downloading-something") { 18 | 19 | val hello = new File("hello-test.html") 20 | val mystem = new File("atmta.binary") 21 | 22 | Downloader.downloadBinaryFile(new URL("http://www.stachek66.ru/"), hello) 23 | 24 | Downloader.downloadBinaryFile( 25 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"), 26 | mystem 27 | ) 28 | 29 | Downloader.downloadBinaryFile( 30 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip"), 31 | mystem 32 | ) 33 | 34 | hello.delete 35 | mystem.delete 36 | } 37 | 38 | test("download-and-unpack") { 39 | val bin = new File("atmta.binary.tar.gz") 40 | val bin2 = new File("executable") 41 | 42 | Decompressor.select.unpack( 43 | Downloader.downloadBinaryFile( 44 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"), 45 | bin), 46 | bin2 47 | ) 48 | 49 | bin.delete 50 | bin2.delete 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/tools/Exec$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.File 4 | 5 | import org.junit.runner.RunWith 6 | import org.scalatest.FunSuite 7 | import org.scalatest.junit.JUnitRunner 8 | 9 | /** 10 | * alexeyev 11 | * 12.09.14. 12 | */ 13 | class Exec$Test extends FunSuite { 14 | 15 | 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/tools/TarGz$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.{File, FileInputStream} 4 | 5 | import org.apache.commons.io.IOUtils 6 | import org.junit.runner.RunWith 7 | import org.scalatest.FunSuite 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | /** 11 | * alexeyev 12 | * 11.09.14. 13 | */ 14 | class TarGz$Test extends FunSuite { 15 | 16 | test("tgz-test") { 17 | val src = new File("src/test/resources/test.txt") 18 | TarGz.unpack( 19 | new File("src/test/resources/test.tar.gz"), 20 | new File("src/test/resources/res.txt")) match { 21 | case f => 22 | val content0 = IOUtils.toString(new FileInputStream(f)) 23 | val content1 = IOUtils.toString(new FileInputStream(src)) 24 | print(content0.trim + " vs " + content1.trim) 25 | // trimming thanks to line separators; should be more careful maybe 26 | assert(content0.trim === content1.trim) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/ru/stachek66/tools/Zip$Test.scala: -------------------------------------------------------------------------------- 1 | package ru.stachek66.tools 2 | 3 | import java.io.{File, FileInputStream} 4 | 5 | import org.apache.commons.io.IOUtils 6 | import org.scalatest.FunSuite 7 | import org.scalatest.junit.JUnitRunner 8 | 9 | import org.junit.runner.RunWith 10 | 11 | /** 12 | * alexeyev 13 | * 11.09.14. 14 | */ 15 | class Zip$Test extends FunSuite { 16 | 17 | test("zip-test") { 18 | val src = new File("src/test/resources/test.txt") 19 | Zip.unpack( 20 | new File("src/test/resources/test.zip"), 21 | new File("src/test/resources/res.txt")) match { 22 | case f => 23 | val content0 = IOUtils.toString(new FileInputStream(f)) 24 | val content1 = IOUtils.toString(new FileInputStream(src)) 25 | print(content0.trim + " vs " + content1.trim) 26 | // trimming thanks to line separators; should be more careful maybe 27 | assert(content0.trim === content1.trim) 28 | } 29 | } 30 | 31 | } 32 | --------------------------------------------------------------------------------