├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
├── main
├── resources
│ └── mystem-sources.conf
└── scala
│ └── ru
│ └── stachek66
│ ├── nlp
│ └── mystem
│ │ ├── CLIRunner.scala
│ │ ├── Properties.scala
│ │ ├── holding
│ │ ├── Communication.scala
│ │ └── Factory.scala
│ │ ├── model
│ │ ├── GrammarInfo.scala
│ │ ├── GrammarInfoParts.scala
│ │ └── Info.scala
│ │ ├── package.scala
│ │ └── parsing
│ │ ├── GrammarInfoParsing.scala
│ │ └── JsonRepresentationParser.scala
│ └── tools
│ ├── Decompressor.scala
│ ├── Downloader.scala
│ ├── TarGz.scala
│ ├── Tools.scala
│ ├── Zip.scala
│ └── external
│ ├── ExternalProcessServer.scala
│ ├── FailSafeExternalProcessServer.scala
│ └── SyncServer.scala
└── test
├── resources
├── res.txt
├── test.tar
├── test.tar.gz
├── test.txt
└── test.zip
└── scala
└── ru
└── stachek66
├── nlp
└── mystem
│ ├── Properties$Test.scala
│ ├── holding
│ └── Holder.scala
│ ├── model
│ └── GrammarMapBuilder$Test.scala
│ └── parsing
│ └── GrammarInfoParsing$Test.scala
└── tools
├── Downloader$Test.scala
├── Exec$Test.scala
├── TarGz$Test.scala
└── Zip$Test.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | .idea
3 | target
4 | *.zip
5 | *.gz
6 | *~
7 | *log
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Anton Alekseev
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # A Scala wrapper for morphological analyzer Yandex.MyStem
2 |
3 | ## Introduction
4 |
5 | Details about the algorithm can be found in [I. Segalovich «A fast morphological algorithm with unknown word guessing induced by a dictionary for a web search engine», MLMTA-2003, Las Vegas, Nevada, USA.](http://download.yandex.ru/company/iseg-las-vegas.pdf)
6 |
7 | The wrapper's code in under MIT license, but please remember that Yandex.MyStem is not open source and licensed under [conditions of the Yandex License](https://legal.yandex.ru/mystem/).
8 |
9 | ## System Requirements
10 |
11 | The wrapper should at least work on Ubuntu Linux 12.04+, Windows 7+ (+ people say it also works on OS X).
12 |
13 | ## Install
14 |
15 | ### Maven
16 |
17 | [Maven central](http://search.maven.org/#artifactdetails|ru.stachek66.nlp|mystem-scala|0.1.4|jar)
18 |
19 | ```xml
20 |
21 | ru.stachek66.nlp
22 | mystem-scala
23 | 0.1.6
24 |
25 | ```
26 |
27 | ## Issues
28 |
29 | Only mystem 3.{0,1} are supported currently.
30 | Please [create issues for compatibility troubles and other requests.](https://github.com/alexeyev/mystem-scala/issues)
31 |
32 | ## Examples
33 |
34 | Probably the most important thing to remember when working with mystem-scala is
35 | that you should have just one MyStem instance per mystem/mystem.exe file in your application.
36 |
37 | ### Scala
38 |
39 | ```scala
40 | import java.io.File
41 |
42 | import ru.stachek66.nlp.mystem.holding.{Factory, MyStem, Request}
43 |
44 | object MystemSingletonScala {
45 |
46 | val mystemAnalyzer: MyStem =
47 | new Factory("-igd --eng-gr --format json --weight")
48 | .newMyStem(
49 | "3.0",
50 | Option(new File("/home/coolguy/coolproject/3dparty/mystem"))).get()
51 | }
52 |
53 | object AppExampleScala extends App {
54 |
55 | MystemSingletonScala
56 | .mystemAnalyzer
57 | .analyze(Request("Есть большие пассажиры мандариновой травы"))
58 | .info
59 | .foreach(info => println(info.initial + " -> " + info.lex))
60 | }
61 | ```
62 |
63 | ### Java
64 |
65 | ```java
66 | import ru.stachek66.nlp.mystem.holding.Factory;
67 | import ru.stachek66.nlp.mystem.holding.MyStem;
68 | import ru.stachek66.nlp.mystem.holding.MyStemApplicationException;
69 | import ru.stachek66.nlp.mystem.holding.Request;
70 | import ru.stachek66.nlp.mystem.model.Info;
71 | import scala.Option;
72 | import scala.collection.JavaConversions;
73 |
74 | import java.io.File;
75 |
76 | public class MyStemJavaExample {
77 |
78 | private final static MyStem mystemAnalyzer =
79 | new Factory("-igd --eng-gr --format json --weight")
80 | .newMyStem("3.0", Option.empty()).get();
81 |
82 | public static void main(final String[] args) throws MyStemApplicationException {
83 |
84 | final Iterable result =
85 | JavaConversions.asJavaIterable(
86 | mystemAnalyzer
87 | .analyze(Request.apply("И вырвал грешный мой язык"))
88 | .info()
89 | .toIterable());
90 |
91 | for (final Info info : result) {
92 | System.out.println(info.initial() + " -> " + info.lex() + " | " + info.rawResponse());
93 | }
94 | }
95 | }
96 | ```
97 | ## How to Cite
98 |
99 | The references to this repository are highly appreciated, if you use our work.
100 |
101 | ```bibtex
102 | @misc{alekseev2018mystemscala,
103 | author = {Anton Alekseev},
104 | title = {mystem-scala},
105 | year = {2018},
106 | publisher = {GitHub},
107 | journal = {GitHub repository},
108 | howpublished = {\url{https://github.com/alexeyev/mystem-scala/}},
109 | commit = {the latest commit of the codebase you have used}
110 | }
111 | ```
112 |
113 | If you do cite it, please do not forget to cite [the original algorithm's author's paper](http://download.yandex.ru/company/iseg-las-vegas.pdf) as well.
114 |
115 | ## Contacts
116 |
117 | Anton Alekseev
118 |
119 | ## Thanks for reviews, reports and contributions
120 |
121 | * Vladislav Dolbilov, @darl
122 | * Mikhail Malchevsky
123 | * @anton-shirikov
124 | * Filipp Malkovsky
125 | * @dizzy7
126 |
127 | ## Also please see
128 |
129 | * https://tech.yandex.ru/mystem/
130 | * https://nlpub.ru/Mystem
131 | * https://github.com/Digsolab/pymystem3
132 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | ru.stachek66.nlp
8 | mystem-scala
9 | 0.2.1
10 | jar
11 |
12 | ${project.groupId}:${project.artifactId}
13 | A Scala wrapper for morphological analyzer Yandex.MyStem
14 | https://github.com/alexeyev/mystem-scala
15 |
16 |
17 |
18 | MIT License
19 | http://www.opensource.org/licenses/mit-license.php
20 |
21 |
22 |
23 |
24 |
25 | Anton Alekseev
26 | anton.m.alexeyev@gmail.com
27 | https://alexeyev.github.io/
28 |
29 |
30 |
31 |
32 | scm:git:git@github.com:alexeyev/mystem-scala.git
33 | scm:git:git@github.com:alexeyev/mystem-scala.git
34 | git@github.com:alexeyev/mystem-scala.git
35 |
36 |
37 |
38 | 3.0.2
39 |
40 |
41 |
42 | 2.13
43 | ${scala.base.version}.4
44 | 3.0.9
45 | 1.7.32
46 | UTF-8
47 | 1.2.13
48 | 4.5.6
49 | 1.2.1
50 | 2.15.1
51 | 4.13.1
52 | 2.7
53 | 1.4.11
54 | 1.26.0
55 |
56 |
57 |
58 |
59 |
60 | org.json
61 | json
62 | 20231013
63 |
64 |
65 |
66 | ch.qos.logback
67 | logback-classic
68 | ${logback.version}
69 |
70 |
71 |
72 | org.slf4j
73 | slf4j-api
74 | ${slf4j.version}
75 |
76 |
77 |
78 | org.scala-lang
79 | scala-library
80 | ${scala.version}
81 |
82 |
83 |
84 | org.apache.commons
85 | commons-compress
86 | ${compress.version}
87 |
88 |
89 |
90 | org.scalatest
91 | scalatest_${scala.base.version}
92 | ${scalatest.version}
93 | test
94 |
95 |
96 | org.scala-lang
97 | scala-library
98 |
99 |
100 |
101 |
102 |
103 | com.typesafe
104 | config
105 | ${typesafe.config.version}
106 |
107 |
108 |
109 | commons-io
110 | commons-io
111 | ${commons.version}
112 |
113 |
114 |
115 | junit
116 | junit
117 | ${junit.version}
118 | test
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 | org.scoverage
128 | scoverage-maven-plugin
129 | ${scoverage.plugin.version}
130 |
131 |
132 |
133 |
134 |
135 |
136 | org.apache.maven.plugins
137 | maven-release-plugin
138 | 2.1
139 |
140 |
141 | org.apache.maven.plugins
142 | maven-resources-plugin
143 | 2.5
144 |
145 |
146 | net.alchim31.maven
147 | scala-maven-plugin
148 | ${scala.plugin.version}
149 |
150 |
151 |
152 | compile
153 | testCompile
154 |
155 |
156 |
157 |
158 |
159 | maven-compiler-plugin
160 | 2.3.2
161 |
162 | 1.8
163 | 1.8
164 |
165 |
166 |
167 | org.apache.maven.plugins
168 | maven-enforcer-plugin
169 | 1.4
170 |
171 |
172 | enforce-em-all
173 | package
174 |
175 |
176 |
177 |
178 |
179 |
180 | enforce
181 |
182 |
183 |
184 |
185 |
186 | org.scalatest
187 | scalatest-maven-plugin
188 | 1.0
189 |
190 | ${project.build.directory}/surefire-reports
191 | W
192 |
193 |
194 |
195 | scala-test
196 |
197 | test
198 |
199 |
200 |
201 |
202 |
203 | org.scoverage
204 | scoverage-maven-plugin
205 |
206 | 30
207 | true
208 |
209 |
210 |
211 | coverage
212 | test
213 |
214 | check
215 |
216 |
217 |
218 |
219 |
220 | org.apache.maven.plugins
221 | maven-source-plugin
222 | 3.2.1
223 |
224 |
225 | attach-sources
226 |
227 | jar
228 |
229 |
230 |
231 |
232 |
233 | org.apache.maven.plugins
234 | maven-javadoc-plugin
235 | 3.2.0
236 |
237 | ${project.build.directory}
238 | ${project.reporting.outputDirectory}
239 |
240 |
241 |
242 | attach-javadocs
243 | package
244 |
245 | jar
246 |
247 |
248 |
249 |
250 |
251 | org.apache.maven.plugins
252 | maven-gpg-plugin
253 | 1.6
254 |
255 |
256 | sign-artifacts
257 | verify
258 |
259 | sign
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 | osssh
275 | https://oss.sonatype.org/content/repositories/snapshots
276 |
277 |
278 | ossr
279 | https://oss.sonatype.org/service/local/staging/deploy/maven2/
280 |
281 |
282 |
283 |
--------------------------------------------------------------------------------
/src/main/resources/mystem-sources.conf:
--------------------------------------------------------------------------------
1 | version {
2 |
3 | 3.1 {
4 | win64 = "http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip"
5 | linux64 = "http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz"
6 | osx = "http://download.cdn.yandex.net/mystem/mystem-3.1-macosx.tar.gz"
7 | }
8 |
9 | 3.0 {
10 | win32 = "http://download.cdn.yandex.net/mystem/mystem-3.0-win7-32bit.zip"
11 | win64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-win7-64bit.zip"
12 | linux32 = "http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.5-32bit.tar.gz"
13 | linux64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"
14 | freebsd64 = "http://download.cdn.yandex.net/mystem/mystem-3.0-freebsd9.0-64bit.tar.gz"
15 | osx = "http://download.cdn.yandex.net/mystem/mystem-3.0-macosx10.8.tar.gz"
16 | }
17 |
18 | 2.1 {
19 |
20 | }
21 |
22 | 2.0 {
23 |
24 | }
25 |
26 | 1.0 {
27 |
28 | }
29 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/CLIRunner.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem
2 |
3 | import org.slf4j._
4 |
5 | /**
6 | * alexeyev
7 | * 31.08.14.
8 | */
9 | object CLIRunner extends App {
10 |
11 | val log: Logger = LoggerFactory.getLogger(getClass)
12 |
13 | log.info("*************************************************************")
14 | log.info("Hello, this is MyStem-for-jvm-wrapper library, it has no CLI.")
15 | log.info("Please see \n* http://api.yandex.ru/mystem/doc/\n* https://github.com/alexeyev/mystem-scala ")
16 |
17 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/Properties.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem
2 |
3 | import java.net.URL
4 |
5 | import com.typesafe.config.ConfigFactory
6 | import org.slf4j.LoggerFactory
7 |
8 | /**
9 | * Various configs for interaction with outer world
10 | * alexeyev
11 | * 31.08.14.
12 | */
13 | object Properties {
14 |
15 | private val log = LoggerFactory.getLogger(getClass)
16 |
17 | val BinDestination = System.getProperty("user.home") + "/.local/bin/"
18 |
19 | private val systemOsName = System.getProperty("os.name")
20 | private val systemOsArchitecture = System.getProperty("os.arch")
21 | val CurrentOs: String = os(systemOsName, systemOsArchitecture)
22 |
23 | log.debug(s"OS detected: $CurrentOs, system properties: $systemOsName | $systemOsArchitecture ")
24 |
25 | val BIN_FILE_NAME: String = CurrentOs match {
26 | case name if name.startsWith("win") => "mystem.exe"
27 | case name => "mystem"
28 | }
29 |
30 | private lazy val rootProp = ConfigFactory.load("mystem-sources.conf")
31 | private lazy val version = rootProp.getConfig("version")
32 |
33 | private val versionPattern = "\\d+\\.\\d+".r.pattern
34 |
35 | private def doOrDie[T](action: => T, message: String = "Unknown error"): T =
36 | try action
37 | catch {
38 | case e: Throwable => throw new Exception(message)
39 | }
40 |
41 | @throws(classOf[Exception])
42 | def getUrl(versionRaw: String, os: String = CurrentOs): URL = {
43 |
44 | require(versionPattern.matcher(versionRaw).matches,
45 | "Troubles with version name, should match pattern .")
46 |
47 | val versionProps =
48 | doOrDie(
49 | version.getConfig(versionRaw),
50 | s"No binaries sources for version [$versionRaw] found")
51 |
52 | val url =
53 | doOrDie(
54 | versionProps.getString(os),
55 | s"Version number is correct, no binaries sources for OS [$os] found")
56 |
57 | doOrDie(
58 | new URL(url),
59 | s"URL configs troubles. If you see this message, please email anton.m.alexeyev@gmail.com")
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/holding/Communication.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.holding
2 |
3 | import ru.stachek66.nlp.mystem.model.Info
4 | import ru.stachek66.nlp.mystem.parsing.JsonRepresentationParser
5 | import ru.stachek66.tools.external.FailSafeExternalProcessServer
6 |
7 | import scala.util.{Failure, Success}
8 |
9 | /**
10 | * alexeyev
11 | * 16.10.14.
12 | */
13 | case class Request(text: String)
14 |
15 | case class Response(info: Traversable[Info])
16 |
17 | trait MyStem {
18 |
19 | def normalize(text: String): String = text.replaceAll("\n", " ")
20 |
21 | @throws(classOf[MyStemApplicationException])
22 | def analyze(request: Request): Response
23 | }
24 |
25 | class MyStemApplicationException(e: Throwable) extends java.lang.Exception
26 |
27 | // We need this because mystem.v < 3.0 doesn't support json AFAIK
28 | class MyStem3 private[holding](s: FailSafeExternalProcessServer) extends MyStem {
29 |
30 | @throws(classOf[MyStemApplicationException])
31 | override def analyze(request: Request): Response = {
32 | s.syncRequest(normalize(request.text)) match {
33 | case Failure(e) => throw new MyStemApplicationException(e)
34 | case Success(json) => Response(JsonRepresentationParser.toInfo(json))
35 | }
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/holding/Factory.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.holding
2 |
3 | import java.io.{File, IOException}
4 | import java.nio.file.Files
5 | import java.nio.file.attribute.PosixFilePermissions
6 |
7 | import org.slf4j.LoggerFactory
8 | import ru.stachek66.tools.external.FailSafeExternalProcessServer
9 | import ru.stachek66.tools.{Decompressor, Downloader, Tools}
10 |
11 | import scala.concurrent.duration._
12 | import scala.sys.process._
13 | import scala.util.Try
14 |
15 | /**
16 | * Provides fresh mystem binaries; a factory
17 | * alexeyev
18 | * 31.08.14.
19 | */
20 | class Factory(parsingOptions: String = "-igd --eng-gr --format json --weight") {
21 |
22 | import ru.stachek66.nlp.mystem.Properties._
23 |
24 | private val log = LoggerFactory.getLogger(getClass)
25 |
26 | /**
27 | * Creates a new instance of mystem server
28 | * Uses .local if customExecutable was not set
29 | */
30 | def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try {
31 |
32 | val ex = customExecutable match {
33 | case Some(exe) => exe
34 | case None => getExecutable(version)
35 | }
36 |
37 | version match {
38 | case "3.0" | "3.1" =>
39 | new MyStem3(
40 | new FailSafeExternalProcessServer(
41 | ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else "")))
42 | case _ => throw new NotImplementedError()
43 | }
44 | }
45 |
46 | @throws(classOf[Exception])
47 | private[holding] def getExecutable(version: String): File = {
48 |
49 | val destFile = new File(BinDestination + BIN_FILE_NAME)
50 | val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}")
51 |
52 | if (destFile.exists) {
53 |
54 | log.info("Old executable file found")
55 |
56 | try {
57 | val suggestedVersion = (destFile.getAbsolutePath + " -v").!!
58 |
59 | log.info("Version | " + suggestedVersion)
60 | // not scala-way stuff
61 | if (suggestedVersion.contains(version))
62 | destFile
63 | else
64 | throw new Exception("Wrong version!")
65 | } catch {
66 | case e: Exception =>
67 | log.warn("Removing old binary files...", e)
68 | destFile.delete
69 | getExecutable(version)
70 | }
71 | } else Tools.withAttempt(10, 1.second) {
72 | try {
73 | Decompressor.select.unpack(
74 | Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile)
75 | } finally {
76 | tempFile.delete()
77 | try {
78 | Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile
79 | } catch {
80 | case ioe: IOException =>
81 | log.warn("Can't set POSIX permissions to file " + destFile.toPath)
82 | destFile
83 | }
84 | }
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/model/GrammarInfo.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.model
2 |
3 | /**
4 | * alexeyev
5 | * 01.09.14.
6 | */
7 | case class GrammarInfo(pos: Set[POS.Value] = Set.empty,
8 | tense: Set[Tense.Value] = Set.empty,
9 | `case`: Set[Case.Value] = Set.empty,
10 | number: Set[Number.Value] = Set.empty,
11 | verbFormInfo: Set[VerbForms.Value] = Set.empty[VerbForms.Value],
12 | adjFormInfo: Set[AdjectiveForms.Value] = Set.empty[AdjectiveForms.Value],
13 | gender: Set[Gender.Value] = Set.empty,
14 | aspect: Set[Aspect.Value] = Set.empty,
15 | voice: Set[Voice.Value] = Set.empty,
16 | animacy: Set[Animacy.Value] = Set.empty,
17 | other: Set[Other.Value] = Set.empty[Other.Value])
18 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/model/GrammarInfoParts.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.model
2 |
3 | /**
4 | * alexeyev
5 | * 31.08.14.
6 | */
7 | object GrammarMapBuilder {
8 |
9 | //todo: make sure everything is covered
10 |
11 | lazy val tagToEnumMap: Map[String, Enumeration] =
12 | (tagToEnum(POS) ++ tagToEnum(Tense) ++ tagToEnum(Animacy) ++
13 | tagToEnum(Aspect) ++ tagToEnum(VerbForms) ++ tagToEnum(Gender) ++
14 | tagToEnum(Number) ++ tagToEnum(Voice) ++ tagToEnum(Other) ++
15 | tagToEnum(AdjectiveForms) ++ tagToEnum(Person) ++ tagToEnum(Case)
16 | ).toMap
17 |
18 | private def tagToEnum(enum: Enumeration): Set[(String, Enumeration)] = enum.values.unsorted.map(value => value.toString -> enum)
19 | }
20 |
21 | object POS extends Enumeration {
22 | val A = Value("A")
23 | val ADV = Value("ADV")
24 | val CONJ = Value("CONJ")
25 | val INTJ = Value("INTJ")
26 | val NUM = Value("NUM")
27 | val PART = Value("PART")
28 | val PR = Value("PR")
29 | val S = Value("S")
30 | val V = Value("V")
31 | }
32 |
33 | object Tense extends Enumeration {
34 | val present = Value("praes")
35 | val inpraes = Value("inpraes")
36 | val past = Value("past")
37 | }
38 |
39 | object Case extends Enumeration {
40 | val nominative = Value("nom")
41 | val genitive = Value("gen")
42 | val dative = Value("dat")
43 | val accusative = Value("acc")
44 | val vocative = Value("voc")
45 | val instrumental = Value("ins")
46 |
47 | val ablative = Value("abl")
48 | val locative = Value("loc")
49 | val partitive = Value("part")
50 | }
51 |
52 | object Number extends Enumeration {
53 | val plural = Value("pl")
54 | val singular = Value("sg")
55 | }
56 |
57 | object VerbForms extends Enumeration {
58 | val transgressive = Value("ger")
59 | val infinitive = Value("inf")
60 | val participle = Value("partcp")
61 |
62 | val indicativeMood = Value("ind")
63 | val imperativeMood = Value("imper")
64 |
65 | val transitive = Value("tran")
66 | val intransitive = Value("intr")
67 | }
68 |
69 | object AdjectiveForms extends Enumeration {
70 | val brev = Value("brev")
71 | val plen = Value("plen")
72 | val possessive = Value("poss")
73 | val supreme = Value("supr")
74 | val comparative = Value("comp")
75 | }
76 |
77 | object Person extends Enumeration {
78 | val p1 = Value("1p")
79 | val p2 = Value("2p")
80 | val p3 = Value("3p")
81 | }
82 |
83 | object Gender extends Enumeration {
84 | val feminine = Value("f")
85 | val masculine = Value("m")
86 | val neuter = Value("n")
87 | }
88 |
89 | object Aspect extends Enumeration {
90 | val perfective = Value("pf")
91 | val imperfective = Value("ipf")
92 | }
93 |
94 | object Voice extends Enumeration {
95 | val active = Value("act")
96 | val passive = Value("pass")
97 | }
98 |
99 | object Animacy extends Enumeration {
100 | val animate = Value("anim")
101 | val inanimate = Value("inan")
102 | }
103 |
104 | object Other extends Enumeration {
105 | // вводное слово
106 | val parenth = Value("parenth")
107 | val geo = Value("geo")
108 | // образование формы затруднено
109 | val awkward = Value("awkw")
110 | val personal = Value("persn")
111 | val distorted = Value("dist")
112 | // общая форма мужского и женского рода
113 | val mf = Value("mf")
114 | val obscene = Value("obsc")
115 | val patrn = Value("patrn")
116 | // предикатив
117 | val praedicative = Value("praed")
118 | // разговорная форма
119 | val informal = Value("inform")
120 | val rare = Value("rare")
121 | val abbr = Value("abbr")
122 | val obsolete = Value("obsol")
123 | val familyName = Value("famn")
124 | }
125 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/model/Info.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.model
2 |
3 | /**
4 | * alexeyev
5 | * 31.08.14.
6 | */
7 | case class Info(initial: String, lex: Option[String], rawResponse: String)
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/package.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp
2 |
3 | import org.slf4j.LoggerFactory
4 |
5 | /**
6 | * alexeyev
7 | * 11.09.14.
8 | */
9 | package object mystem {
10 |
11 | private val log = LoggerFactory.getLogger(getClass)
12 |
13 | val os: Map[(String, String), String] = Map(
14 | ("Linux", "x86_64") -> "linux64",
15 | ("Linux", "amd64") -> "linux64",
16 | ("Linux", "x86") -> "linux32",
17 | ("Windows7", "x86") -> "win32",
18 | ("Windows7", "x86_64") -> "win64"
19 | ) withDefault {
20 | _ =>
21 | log.warn("Getting OSX binaries!")
22 | "osx"
23 | }
24 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/parsing/GrammarInfoParsing.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.parsing
2 |
3 | import ru.stachek66.nlp.mystem.model._
4 |
5 | /**
6 | * alexeyev
7 | * 31.08.14.
8 | */
9 | object GrammarInfoParsing {
10 |
11 | /**
12 | * Grammar info parsing.
13 | */
14 | def toGrammarInfo(commaSeparatedTags: String): GrammarInfo = {
15 |
16 | val mappedEnums =
17 | (commaSeparatedTags
18 | .split("[,=]")
19 | .map {
20 | case name: String =>
21 | val obj: Enumeration = GrammarMapBuilder.tagToEnumMap(name)
22 | (obj, obj.withName(name))
23 | } groupBy {
24 | case (obj: Enumeration, _) => obj
25 | } mapValues {
26 | case array => array.map(_._2)
27 | }).toMap
28 |
29 | def findByEnum[T <: scala.Enumeration](enum: T): Set[T#Value] =
30 | mappedEnums
31 | .get(enum)
32 | .map(_.map(_.asInstanceOf[T#Value]).toSet)
33 | .getOrElse(Set.empty[T#Value])
34 |
35 | GrammarInfo(
36 | pos = findByEnum(POS),
37 | tense = findByEnum(Tense),
38 | `case` = findByEnum(Case),
39 | number = findByEnum(Number),
40 | verbFormInfo = findByEnum(VerbForms),
41 | adjFormInfo = findByEnum(AdjectiveForms),
42 | gender = findByEnum(Gender),
43 | aspect = findByEnum(Aspect),
44 | voice = findByEnum(Voice),
45 | animacy = findByEnum(Animacy),
46 | other = findByEnum(Other)
47 | )
48 | }
49 |
50 | def toStringRepresentation(gi: GrammarInfo): String =
51 | (gi.`case` ++ gi.adjFormInfo ++ gi.animacy ++ gi.aspect ++ gi.gender ++
52 | gi.number ++ gi.pos ++ gi.other ++ gi.tense ++ gi.verbFormInfo ++ gi.voice).mkString(",")
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/nlp/mystem/parsing/JsonRepresentationParser.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.parsing
2 |
3 | import org.json.JSONArray
4 | import ru.stachek66.nlp.mystem.model.Info
5 |
6 | /**
7 | * alexeyev
8 | * 31.08.14.
9 | */
10 | object JsonRepresentationParser {
11 |
12 | def toInfo(json: String): Traversable[Info] = toInfo(new JSONArray(json))
13 |
14 | private def toInfo(json: JSONArray): Traversable[Info] = {
15 |
16 | //todo: fix and enable GrammarInfo parsing
17 |
18 | val stuff: Traversable[Info] =
19 | for (i <- 0 until json.length)
20 | yield {
21 | val item = json.getJSONObject(i)
22 | val initial = item.getString("text")
23 |
24 | if (item.has("analysis")) {
25 | val analysis = item.getJSONArray("analysis")
26 |
27 | if (analysis.length() == 0)
28 | Info(initial, None, item.toString)
29 | else {
30 | val result =
31 | for (j <- 0 until analysis.length)
32 | yield {
33 | val anItem = analysis.getJSONObject(j)
34 | new Info(initial, Option(anItem.getString("lex")), item.toString)
35 | }
36 | result.head
37 | }
38 | } else {
39 | Info(initial, None, item.toString)
40 | }
41 |
42 | }
43 | stuff
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/Decompressor.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.{IOException, File, FileOutputStream}
4 |
5 | import org.apache.commons.compress.archivers.ArchiveInputStream
6 | import org.apache.commons.compress.archivers.ArchiveEntry
7 | import org.apache.commons.io.IOUtils
8 | import ru.stachek66.nlp.mystem.Properties
9 |
10 | /**
11 | * alexeyev
12 | * 11.09.14.
13 | */
14 | trait Decompressor {
15 |
16 | def traditionalExtension: String
17 |
18 | def unpack(src: File, dst: File): File
19 |
20 | @throws(classOf[IOException])
21 | private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream[_ <: ArchiveEntry], dest: File): File = {
22 |
23 | // must be read
24 | val entry = stream.getNextEntry
25 | if (entry.isDirectory)
26 | throw new IOException("Decompressed entry is a directory (unexpectedly)")
27 |
28 | val os = new FileOutputStream(dest)
29 |
30 | try {
31 | IOUtils.copy(stream, os)
32 | } finally {
33 | os.close()
34 | stream.close()
35 | }
36 | dest
37 | }
38 | }
39 |
40 | object Decompressor {
41 | def select: Decompressor =
42 | if (Properties.CurrentOs.contains("win")) Zip else TarGz
43 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/Downloader.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.File
4 | import java.net.URL
5 |
6 | import org.apache.commons.io.FileUtils
7 | import org.slf4j.LoggerFactory
8 |
9 | /**
10 | * alexeyev
11 | * 31.08.14.
12 | */
13 | object Downloader {
14 |
15 | private val log = LoggerFactory.getLogger(getClass)
16 |
17 | def downloadBinaryFile(url: URL, destination: File) = {
18 | log.debug(s"Getting binaries from $url, writing to $destination ")
19 |
20 | if (!destination.getAbsoluteFile.getParentFile.mkdirs && !destination.getAbsoluteFile.getParentFile.exists)
21 | throw new Exception("Could not create directory: " + destination.getParentFile)
22 |
23 | FileUtils.copyURLToFile(url, destination)
24 | log.debug("Downloading binaries done.")
25 | destination
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/TarGz.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.{IOException, File, FileInputStream}
4 | import java.util.zip.GZIPInputStream
5 |
6 | import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
7 | import org.slf4j.LoggerFactory
8 |
9 | import scala.util.Try
10 |
11 | /**
12 | * *.tar.gz files decompression tool
13 | * alexeyev
14 | * 31.08.14.
15 | */
16 | private object TarGz extends Decompressor {
17 |
18 | private val log = LoggerFactory.getLogger(getClass)
19 |
20 | def traditionalExtension: String = "tar.gz"
21 |
22 | /**
23 | * Untars -single- file
24 | */
25 | @throws(classOf[IOException])
26 | def unpack(src: File, dst: File): File = {
27 |
28 | log.debug(s"Unpacking $src to $dst...")
29 |
30 | val tarIn =
31 | new TarArchiveInputStream(
32 | new GZIPInputStream(
33 | new FileInputStream(src)))
34 |
35 | val result = copyUncompressedAndClose(tarIn, dst)
36 | log.debug(s"Done.")
37 | result
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/Tools.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import org.slf4j.LoggerFactory
4 |
5 | import scala.concurrent.duration._
6 |
7 | /**
8 | * alexeyev
9 | * 02.09.14.
10 | */
11 | object Tools {
12 |
13 | private val log = LoggerFactory.getLogger(getClass)
14 |
15 | @throws(classOf[Exception])
16 | def withAttempt[T](n: Int, timeout: Duration = 0.millis)(action: => T): T = try {
17 | action
18 | } catch {
19 | case e: Exception if n > 1 =>
20 | log.warn(s"${n - 1} attempts left", e)
21 | Thread.sleep(timeout.toMillis)
22 | withAttempt(n - 1)(action)
23 | case e: Exception =>
24 | throw new Exception("No attempts left", e)
25 | }
26 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/Zip.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.{IOException, BufferedInputStream, File, FileInputStream}
4 |
5 | import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream
6 | import org.slf4j.LoggerFactory
7 |
8 | /**
9 | * *.zip decompressor tool
10 | * alexeyev
11 | * 31.08.14.
12 | */
13 | private object Zip extends Decompressor {
14 |
15 | private val log = LoggerFactory.getLogger(getClass)
16 |
17 | def traditionalExtension: String = "zip"
18 |
19 | /**
20 | * Unzips single file
21 | */
22 | @throws(classOf[IOException])
23 | def unpack(src: File, dst: File): File = {
24 |
25 | log.debug(s"Unpacking $src to $dst...")
26 |
27 | val zipIn = new ZipArchiveInputStream(
28 | new BufferedInputStream(
29 | new FileInputStream(src)))
30 |
31 | val res = copyUncompressedAndClose(zipIn, dst)
32 |
33 | log.debug("Done.")
34 |
35 | res
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/external/ExternalProcessServer.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools.external
2 |
3 | import java.io.{BufferedReader, BufferedWriter, OutputStreamWriter}
4 | import java.nio.charset.Charset
5 |
6 | import org.slf4j.LoggerFactory
7 |
8 | import scala.util._
9 |
10 |
11 | /**
12 | * Please be careful when using! No death handling.
13 | * alexeyev
14 | * 13.09.14.
15 | */
16 | private[external] class ExternalProcessServer(starterCommand: String) extends SyncServer {
17 |
18 | private val log = LoggerFactory.getLogger(getClass)
19 |
20 | private val p = Runtime.getRuntime.exec(starterCommand)
21 | private val (in, out, err) = (p.getInputStream, p.getOutputStream, p.getErrorStream)
22 |
23 | private val writer = new BufferedWriter(new OutputStreamWriter(out, Charset.forName("utf-8")), 1)
24 | private val reader = io.Source.fromInputStream(in).reader()
25 | private val bufferedReader = new BufferedReader(reader)
26 |
27 | def syncRequest(request: String): Try[String] = Try {
28 |
29 | writer.write(request)
30 | writer.newLine()
31 | writer.flush()
32 |
33 | while (!bufferedReader.ready()) {}
34 |
35 | val builder = new StringBuilder()
36 | while (bufferedReader.ready) builder.append(bufferedReader.readLine())
37 | builder.toString()
38 | }
39 |
40 | def isAlive: Boolean = {
41 | Try(p.exitValue()) match {
42 | case Success(_) => false
43 | case Failure(e: IllegalThreadStateException) => true
44 | case Failure(e) => throw new RuntimeException(e) // unknown exception
45 | }
46 | }
47 |
48 | def kill() {
49 | p.destroy()
50 | }
51 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/external/FailSafeExternalProcessServer.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools.external
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 |
5 | import ru.stachek66.tools.Tools
6 |
7 | import scala.util.Try
8 |
9 | /**
10 | * alexeyev
11 | * 16.10.14.
12 | */
13 | class FailSafeExternalProcessServer(starterCommand: String, attempts: Int = 30) extends SyncServer {
14 |
15 | private val ps = new AtomicReference[ExternalProcessServer](new ExternalProcessServer(starterCommand))
16 |
17 | override def syncRequest(request: String): Try[String] = this.synchronized {
18 | Tools.withAttempt(attempts) {
19 | if (!ps.get.isAlive) ps.set(new ExternalProcessServer(starterCommand))
20 | ps.get.syncRequest(request)
21 | }
22 | }
23 | }
--------------------------------------------------------------------------------
/src/main/scala/ru/stachek66/tools/external/SyncServer.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools.external
2 |
3 | import scala.util.Try
4 |
5 | /**
6 | * alexeyev
7 | * 16.10.14.
8 | */
9 | trait SyncServer {
10 |
11 | /**
12 | * You give it a string, and you get either response string or nothing.
13 | */
14 | def syncRequest(request: String): Try[String]
15 |
16 | }
--------------------------------------------------------------------------------
/src/test/resources/res.txt:
--------------------------------------------------------------------------------
1 | Прозрачные зелёные идеи живут и побеждают!
2 |
--------------------------------------------------------------------------------
/src/test/resources/test.tar:
--------------------------------------------------------------------------------
1 | test.txt 0000664 0001750 0001750 00000000117 12404154521 013023 0 ustar alexeyev alexeyev Прозрачные зелёные идеи живут и побеждают!
2 |
--------------------------------------------------------------------------------
/src/test/resources/test.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexeyev/mystem-scala/9e089eb0ae0c6adf91aff2167c553a74a140bf87/src/test/resources/test.tar.gz
--------------------------------------------------------------------------------
/src/test/resources/test.txt:
--------------------------------------------------------------------------------
1 | Прозрачные зелёные идеи живут и побеждают!
2 |
--------------------------------------------------------------------------------
/src/test/resources/test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexeyev/mystem-scala/9e089eb0ae0c6adf91aff2167c553a74a140bf87/src/test/resources/test.zip
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/nlp/mystem/Properties$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem
2 |
3 | import java.net.URL
4 |
5 | import org.junit.runner.RunWith
6 | import org.scalatest.FunSuite
7 | import org.scalatest.junit.JUnitRunner
8 |
9 | /**
10 | * alexeyev
11 | * 31.08.14.
12 | */
13 | class Properties$Test extends FunSuite {
14 |
15 | test("getting-download-url") {
16 |
17 | assert(Properties.getUrl("3.0", "win32") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-win7-32bit.zip"))
18 | assert(Properties.getUrl("3.0", "linux64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"))
19 | assert(Properties.getUrl("3.1", "win64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip"))
20 | assert(Properties.getUrl("3.1", "linux64") === new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz"))
21 |
22 | }
23 | }
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/nlp/mystem/holding/Holder.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.holding
2 |
3 | /**
4 | * alexeyev
5 | * 12.09.14.
6 | */
7 | object HolderApp extends App {
8 |
9 | val h = new Factory()
10 | println("holder ready")
11 | val p = h.newMyStem("3.0").get
12 | println("raw process created")
13 |
14 | while (true) {
15 | println("asking")
16 | println(p.analyze(Request("леново")))
17 | println("answer printed")
18 | Thread.sleep(math.round(math.random * 10000))
19 | }
20 | }
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/nlp/mystem/model/GrammarMapBuilder$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.model
2 |
3 | import org.junit.runner.RunWith
4 | import org.scalatest.FunSuite
5 | import org.scalatest.junit.JUnitRunner
6 |
7 | /**
8 | * alexeyev
9 | * 16.09.14.
10 | */
11 | class GrammarMapBuilder$Test extends FunSuite {
12 |
13 | test("grammar") {
14 | println(GrammarMapBuilder.tagToEnumMap("ADV").withName("ADV"))
15 | }
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/nlp/mystem/parsing/GrammarInfoParsing$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.nlp.mystem.parsing
2 |
3 | import org.scalatest.FunSuite
4 |
5 | /**
6 | * alexeyev
7 | * 01.09.14.
8 | */
9 | class GrammarInfoParsing$Test extends FunSuite {
10 |
11 | //todo
12 | // test("g-i-p-t-королевский") {
13 | // val testString = "A=acc,sg,plen,m,inan"
14 | // import ru.stachek66.nlp.mystem.parsing.GrammarInfoParsing._
15 | // assert(toGrammarInfo(testString) === toGrammarInfo(toStringRepresentation(toGrammarInfo(testString))))
16 | // }
17 | }
18 |
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/tools/Downloader$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.File
4 | import java.net.URL
5 |
6 | import org.junit.runner.RunWith
7 | import org.scalatest.{Ignore, FunSuite}
8 | import org.scalatest.junit.JUnitRunner
9 |
10 | /**
11 | * alexeyev
12 | * 07.09.14.
13 | */
14 | @Ignore
15 | class Downloader$Test extends FunSuite {
16 |
17 | test("downloading-something") {
18 |
19 | val hello = new File("hello-test.html")
20 | val mystem = new File("atmta.binary")
21 |
22 | Downloader.downloadBinaryFile(new URL("http://www.stachek66.ru/"), hello)
23 |
24 | Downloader.downloadBinaryFile(
25 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"),
26 | mystem
27 | )
28 |
29 | Downloader.downloadBinaryFile(
30 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.1-win-64bit.zip"),
31 | mystem
32 | )
33 |
34 | hello.delete
35 | mystem.delete
36 | }
37 |
38 | test("download-and-unpack") {
39 | val bin = new File("atmta.binary.tar.gz")
40 | val bin2 = new File("executable")
41 |
42 | Decompressor.select.unpack(
43 | Downloader.downloadBinaryFile(
44 | new URL("http://download.cdn.yandex.net/mystem/mystem-3.0-linux3.1-64bit.tar.gz"),
45 | bin),
46 | bin2
47 | )
48 |
49 | bin.delete
50 | bin2.delete
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/tools/Exec$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.File
4 |
5 | import org.junit.runner.RunWith
6 | import org.scalatest.FunSuite
7 | import org.scalatest.junit.JUnitRunner
8 |
9 | /**
10 | * alexeyev
11 | * 12.09.14.
12 | */
13 | class Exec$Test extends FunSuite {
14 |
15 |
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/tools/TarGz$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.{File, FileInputStream}
4 |
5 | import org.apache.commons.io.IOUtils
6 | import org.junit.runner.RunWith
7 | import org.scalatest.FunSuite
8 | import org.scalatest.junit.JUnitRunner
9 |
10 | /**
11 | * alexeyev
12 | * 11.09.14.
13 | */
14 | class TarGz$Test extends FunSuite {
15 |
16 | test("tgz-test") {
17 | val src = new File("src/test/resources/test.txt")
18 | TarGz.unpack(
19 | new File("src/test/resources/test.tar.gz"),
20 | new File("src/test/resources/res.txt")) match {
21 | case f =>
22 | val content0 = IOUtils.toString(new FileInputStream(f))
23 | val content1 = IOUtils.toString(new FileInputStream(src))
24 | print(content0.trim + " vs " + content1.trim)
25 | // trimming thanks to line separators; should be more careful maybe
26 | assert(content0.trim === content1.trim)
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/test/scala/ru/stachek66/tools/Zip$Test.scala:
--------------------------------------------------------------------------------
1 | package ru.stachek66.tools
2 |
3 | import java.io.{File, FileInputStream}
4 |
5 | import org.apache.commons.io.IOUtils
6 | import org.scalatest.FunSuite
7 | import org.scalatest.junit.JUnitRunner
8 |
9 | import org.junit.runner.RunWith
10 |
11 | /**
12 | * alexeyev
13 | * 11.09.14.
14 | */
15 | class Zip$Test extends FunSuite {
16 |
17 | test("zip-test") {
18 | val src = new File("src/test/resources/test.txt")
19 | Zip.unpack(
20 | new File("src/test/resources/test.zip"),
21 | new File("src/test/resources/res.txt")) match {
22 | case f =>
23 | val content0 = IOUtils.toString(new FileInputStream(f))
24 | val content1 = IOUtils.toString(new FileInputStream(src))
25 | print(content0.trim + " vs " + content1.trim)
26 | // trimming thanks to line separators; should be more careful maybe
27 | assert(content0.trim === content1.trim)
28 | }
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------