├── .gitignore
├── LICENSE.txt
├── MANIFEST.in
├── README
├── README.rst
├── docs
    ├── Makefile
    ├── _build
    │   ├── doctrees
    │   │   ├── disqus_jnlp.html.doctree
    │   │   ├── environment.pickle
    │   │   └── index.doctree
    │   └── html
    │   │   ├── .buildinfo
    │   │   ├── _sources
    │   │       ├── disqus_jnlp.html.txt
    │   │       └── index.txt
    │   │   ├── _static
    │   │       ├── ajax-loader.gif
    │   │       ├── basic.css
    │   │       ├── comment-bright.png
    │   │       ├── comment-close.png
    │   │       ├── comment.png
    │   │       ├── default.css
    │   │       ├── dialog-note.png
    │   │       ├── dialog-seealso.png
    │   │       ├── dialog-topic.png
    │   │       ├── dialog-warning.png
    │   │       ├── doctools.js
    │   │       ├── down-pressed.png
    │   │       ├── down.png
    │   │       ├── epub.css
    │   │       ├── file.png
    │   │       ├── footerbg.png
    │   │       ├── headerbg.png
    │   │       ├── ie6.css
    │   │       ├── jquery.js
    │   │       ├── middlebg.png
    │   │       ├── minus.png
    │   │       ├── plus.png
    │   │       ├── pygments.css
    │   │       ├── pyramid.css
    │   │       ├── searchtools.js
    │   │       ├── sidebar.js
    │   │       ├── transparent.gif
    │   │       ├── underscore.js
    │   │       ├── up-pressed.png
    │   │       ├── up.png
    │   │       └── websupport.js
    │   │   ├── disqus_jnlp.html.html
    │   │   ├── genindex.html
    │   │   ├── index.html
    │   │   ├── objects.inv
    │   │   ├── search.html
    │   │   └── searchindex.js
    ├── conf.py
    ├── disqus_jnlp.html.rst
    ├── index.rst
    └── make.bat
├── scripts
    └── vcabocha.py
├── setup.py
└── src
    ├── jNlp
        ├── __init__.py
        ├── aquisition
        │   ├── OpenSubtitles.py
        │   ├── SubtitleDatabase.py
        │   ├── __init__.py
        │   ├── aquire.py
        │   ├── download_subs.xml
        │   └── movies.txt
        ├── callunix.py
        ├── data
        │   ├── JapaneseSentiWordNet.txt
        │   ├── __init__.py
        │   ├── chasen_pos.txt
        │   ├── hiraganaChart.txt
        │   └── katakanaChart.txt
        ├── eProcessing.py
        ├── edict_search_monash
        │   ├── __init__.py
        │   ├── ambiguous_words.p
        │   ├── edict_examples.p
        │   ├── edict_examples.py
        │   └── edict_search.py
        ├── jCabocha.py
        ├── jColor.py
        ├── jConvert.py
        ├── jProcessing.py
        ├── jSentiments.py
        ├── jTokenize.py
        ├── summarize.py
        ├── url2text.py
        └── vcabocha.py
    └── jProcessing.egg-info
        ├── PKG-INFO
        ├── SOURCES.txt
        ├── dependency_links.txt
        └── top_level.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.*[#~]
 2 | #.*?#
 3 | upload.sh
 4 | push.sh
 5 | *.pyc
 6 | dist/
 7 | build/
 8 | src/jNlp/*.p
 9 | src/jNlp/_dicts/
10 | src/jNlp/classifiers/
11 | runsetup.py
12 | src/jNlp/_corpora


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2011, Pulkit Kathuria
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions
 6 | # are met:
 7 | #
 8 | #     * Redistributions of source code must retain the above copyright
 9 | #       notice, this list of conditions and the following disclaimer.
10 | #     * Redistributions in binary form must reproduce the above
11 | #       copyright notice, this list of conditions and the following
12 | #       disclaimer in the documentation and/or other materials provided
13 | #       with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | graft src
 2 | prune src/jNlp/.git
 3 | exclude src/jNlp/jnlp/upload.sh
 4 | exclude push.sh
 5 | exclude src/jNlp/*.p
 6 | prune src/jNlp/_dicts
 7 | prune src/jNlp/_corpora
 8 | prune src/jNlp/classifiers
 9 | exclude runsetup.py
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | ====================
 2 | Japanese NLP Library
 3 | ====================
 4 | 
 5 | 
 6 | Requirements
 7 | ============
 8 | 
 9 | - Third Party Dependencies
10 | 
11 |   - Cabocha Japanese Morphological parser http://sourceforge.net/projects/cabocha/
12 | 
13 | - Python Dependencies
14 | 
15 |   - ``Python 2.6.*`` or above
16 | 
17 | 
18 | ``Links``
19 | ---------
20 | 
21 | - All code at jProcessing Repo GitHub_
22 | 
23 | .. _GitHub: https://github.com/kevincobain2000/jProcessing
24 | 
25 | - Documentation_ and HomePage_ and Sphinx_
26 | 
27 | .. _Documentation: http://www.jaist.ac.jp/~s1010205/jnlp
28 | 
29 | .. _HomePage: http://www.jaist.ac.jp/~s1010205/
30 | 
31 | .. _Sphinx: http://readthedocs.org/docs/jprocessing/en/latest/
32 | 
33 | 
34 | - PyPi_ Python Package
35 | 
36 | .. _PyPi: http://pypi.python.org/pypi/jProcessing/0.1
37 | 
38 | ::
39 | 
40 |   clone git@github.com:kevincobain2000/jProcessing.git
41 |  
42 | 
43 | ``Install``
44 | -----------
45 | 
46 | In ``Terminal`` ::
47 | 
48 |   >>>bash$ python setup.py install
49 | 
50 | History
51 | -------
52 | 
53 | - ``0.2``
54 | 
55 |         + Sentiment Analysis of Japanese Text
56 | 
57 | - ``0.1`` 
58 |         + Morphologically Tokenize Japanese Sentence
59 |         + Kanji / Hiragana / Katakana to Romaji Converter
60 |         + Edict Dictionary Search - borrowed
61 |         + Edict Examples Search - incomplete
62 |         + Sentence Similarity between two JP Sentences
63 |         + Run Cabocha(ISO--8859-1 configured) in Python. 
64 |         + Longest Common String between Sentences
65 |         + Kanji to Katakana Pronunciation
66 |         + Hiragana, Katakana Chart Parser
67 | 
68 | Contacts
69 | ========
70 | 
71 |   - ContactForm_
72 |   - BugReport_
73 |   - Contribute_
74 |   
75 |   .. _ContactForm: http://www.jaist.ac.jp/~s1010205/styled-2/index.html
76 |   .. _BugReport: http://www.jaist.ac.jp/~s1010205/styled/index.html
77 |   .. _Contribute: https://github.com/kevincobain2000/jProcessing
78 | 
79 |   :Author: `pulkit[at]jaist.ac.jp` [change ``at`` with ``@``]
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. raw:: html
  2 | 
  3 |   <HEAD>
  4 |     <LINK href="http://www.jaist.ac.jp/~s1010205/gh-buttons.css" rel="stylesheet" type="text/css">
  5 |   </HEAD>
  6 | 
  7 | .. raw:: html
  8 | 
  9 |   <br><a href="http://www.jaist.ac.jp/~s1010205" class="button icon home">Back to Home</a>
 10 | 
 11 | ====================
 12 | Japanese NLP Library
 13 | ====================
 14 | 
 15 | 
 16 | .. sectnum::
 17 | .. contents::
 18 | 
 19 | Requirements
 20 | ============
 21 | 
 22 | - Third Party Dependencies
 23 | 
 24 |   - Cabocha Japanese Morphological parser http://sourceforge.net/projects/cabocha/
 25 | 
 26 | - Python Dependencies
 27 | 
 28 |   - ``Python 2.6.*`` or above
 29 | 
 30 | 
 31 | ``Links``
 32 | ---------
 33 | 
 34 | - All code at jProcessing Repo GitHub_
 35 | 
 36 | .. _GitHub: https://github.com/kevincobain2000/jProcessing
 37 | 
 38 | - Documentation_ and HomePage_ and Sphinx_
 39 | 
 40 | .. _Documentation: http://www.jaist.ac.jp/~s1010205/jnlp
 41 | 
 42 | .. _HomePage: http://www.jaist.ac.jp/~s1010205/
 43 | 
 44 | .. _Sphinx: http://readthedocs.org/docs/jprocessing/en/latest/
 45 | 
 46 | 
 47 | - PyPi_ Python Package
 48 | 
 49 | .. _PyPi: http://pypi.python.org/pypi/jProcessing/0.1
 50 | 
 51 | ::
 52 | 
 53 |   clone git@github.com:kevincobain2000/jProcessing.git
 54 |  
 55 | 
 56 | ``Install``
 57 | -----------
 58 | 
 59 | In ``Terminal`` ::
 60 | 
 61 |   bash$ python setup.py install
 62 | 
 63 | History
 64 | -------
 65 | 
 66 | - ``0.2``
 67 | 
 68 |         + Sentiment Analysis of Japanese Text
 69 | 
 70 | - ``0.1`` 
 71 |         + Morphologically Tokenize Japanese Sentence
 72 |         + Kanji / Hiragana / Katakana to Romaji Converter
 73 |         + Edict Dictionary Search - borrowed
 74 |         + Edict Examples Search - incomplete
 75 |         + Sentence Similarity between two JP Sentences
 76 |         + Run Cabocha(ISO--8859-1 configured) in Python. 
 77 |         + Longest Common String between Sentences
 78 |         + Kanji to Katakana Pronunciation
 79 |         + Hiragana, Katakana Chart Parser
 80 | 
 81 | Libraries and Modules
 82 | =====================
 83 | 
 84 | Tokenize ``jTokenize.py``
 85 | -------------------------
 86 | In ``Python`` ::
 87 | 
 88 |   >>> from jNlp.jTokenize import jTokenize
 89 |   >>> input_sentence = u'私は彼を５日前、つまりこの前の金曜日に駅で見かけた'
 90 |   >>> list_of_tokens = jTokenize(input_sentence)
 91 |   >>> print list_of_tokens
 92 |   >>> print '--'.join(list_of_tokens).encode('utf-8')
 93 | 
 94 | Returns: 
 95 | 
 96 | ::
 97 | 
 98 |   ... [u'\u79c1', u'\u306f', u'\u5f7c', u'\u3092', u'\uff15'...]
 99 |   ... 私--は--彼--を--５--日--前--、--つまり--この--前--の--金曜日--に--駅--で--見かけ--た
100 | 
101 | Katakana Pronunciation:
102 | 
103 | ::
104 | 
105 |   >>> print '--'.join(jReads(input_sentence)).encode('utf-8')
106 |   ... ワタシ--ハ--カレ--ヲ--ゴ--ニチ--マエ--、--ツマリ--コノ--マエ--ノ--キンヨウビ--ニ--エキ--デ--ミカケ--タ
107 | 
108 | 
109 | Cabocha ``jCabocha.py``
110 | -----------------------
111 | 
112 | Run Cabocha_ with original ``EUCJP`` or ``IS0-8859-1`` configured encoding, with ``utf8`` python
113 | 
114 | .. _Cabocha: http://code.google.com/p/cabocha/
115 | 
116 | - If cabocha is configured as ``utf8`` then see this http://nltk.googlecode.com/svn/trunk/doc/book-jp/ch12.html#cabocha
117 | 
118 | .. code-block:: python
119 | 
120 |   >>> from jNlp.jCabocha import cabocha
121 |   >>> print cabocha(input_sentence).encode('utf-8')
122 | 
123 | Output:
124 | 
125 | .. code-block:: xml
126 | 
127 |   <sentence>
128 |    <chunk id="0" link="8" rel="D" score="0.971639" head="0" func="1">
129 |     <tok id="0" read="ワタシ" base="私" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">私</tok>
130 |     <tok id="1" read="ハ" base="は" pos="助詞-係助詞" ctype="" cform="" ne="O">は</tok>
131 |    </chunk>
132 |    <chunk id="1" link="2" rel="D" score="0.488672" head="2" func="3">
133 |     <tok id="2" read="カレ" base="彼" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">彼</tok>
134 |     <tok id="3" read="ヲ" base="を" pos="助詞-格助詞-一般" ctype="" cform="" ne="O">を</tok>
135 |    </chunk>
136 |    <chunk id="2" link="8" rel="D" score="2.25834" head="6" func="6">
137 |     <tok id="4" read="ゴ" base="５" pos="名詞-数" ctype="" cform="" ne="B-DATE">５</tok>
138 |     <tok id="5" read="ニチ" base="日" pos="名詞-接尾-助数詞" ctype="" cform="" ne="I-DATE">日</tok>
139 |     <tok id="6" read="マエ" base="前" pos="名詞-副詞可能" ctype="" cform="" ne="I-DATE">前</tok>
140 |     <tok id="7" read="、" base="、" pos="記号-読点" ctype="" cform="" ne="O">、</tok>
141 |    </chunk>
142 | 
143 | 
144 | 
145 | Kanji / Katakana /Hiragana to Tokenized Romaji ``jConvert.py``
146 | --------------------------------------------------------------
147 | 
148 | Uses ``data/katakanaChart.txt`` and parses the chart. See katakanaChart_.
149 | 
150 | .. code-block:: python
151 | 
152 |   >>> from jNlp.jConvert import *
153 |   >>> input_sentence = u'気象庁が２１日午前４時４８分、発表した天気概況によると、'
154 |   >>> print ' '.join(tokenizedRomaji(input_sentence))
155 |   >>> print tokenizedRomaji(input_sentence)
156 | 
157 | .. code-block:: python
158 | 
159 |   ...kisyoutyou ga ni ichi nichi gozen yon ji yon hachi hun  hapyou si ta tenki gaikyou ni yoru to
160 |   ...[u'kisyoutyou', u'ga', u'ni', u'ichi', u'nichi', u'gozen',...]
161 | 
162 |  
163 | **katakanaChart.txt**
164 | 
165 | 
166 | .. _katakanaChart:
167 | 
168 | - katakanaChartFile_ and hiraganaChartFile_
169 | 
170 | .. _katakanaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/katakanaChart.txt
171 | 
172 | .. _hiraganaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/hiraganaChart.txt
173 | 
174 | 
175 | Longest Common String Japanese ``jProcessing.py``
176 | -------------------------------------------------
177 | 
178 | On English Strings ::
179 | 
180 | >>> from jNlp.jProcessing import long_substr
181 | >>> a = 'Once upon a time in Italy'
182 | >>> b = 'Thre was a time in America'
183 | >>> print long_substr(a, b)
184 | 
185 | Output ::
186 | 
187 | ...a time in
188 | 
189 | On Japanese Strings ::
190 | 
191 | >>> a = u'これでアナタも冷え知らず'
192 | >>> b = u'これでア冷え知らずナタも'
193 | >>> print long_substr(a, b).encode('utf-8')
194 | 
195 | Output ::
196 | 
197 | ...冷え知らず
198 | 
199 | Similarity between two sentences ``jProcessing.py``
200 | ---------------------------------------------------
201 | Uses MinHash by checking the overlap http://en.wikipedia.org/wiki/MinHash
202 | 
203 | :English Strings:
204 | 
205 | >>> from jNlp.jProcessing import Similarities
206 | >>> s = Similarities()
207 | >>> a = 'There was'
208 | >>> b = 'There is'
209 | >>> print s.minhash(a,b)
210 | ...0.444444444444
211 | 
212 | :Japanese Strings:
213 | 
214 | >>> from jNlp.jProcessing import *
215 | >>> a = u'これは何ですか？'
216 | >>> b = u'これはわからないです'
217 | >>> print s.minhash(' '.join(jTokenize(a)), ' '.join(jTokenize(b)))
218 | ...0.210526315789
219 | 
220 | Edict Japanese Dictionary Search with Example sentences
221 | =======================================================
222 | 
223 | Sample Ouput Demo
224 | -----------------
225 | 
226 | .. raw:: html
227 | 
228 |   <script language="JavaScript">
229 |   <!--
230 |   function autoResize(id){
231 |     var newheight;
232 |     var newwidth;
233 | 
234 |     if(document.getElementById){
235 |         newheight=document.getElementById(id).contentWindow.document .body.scrollHeight;
236 |         newwidth=document.getElementById(id).contentWindow.document .body.scrollWidth;
237 |     }
238 | 
239 |     document.getElementById(id).height= (newheight) + "px";
240 |     document.getElementById(id).width= (newwidth) + "px";
241 |   }
242 |   //-->
243 |   </script>
244 |   <IFRAME SRC="http://www.jaist.ac.jp/~s1010205/cgi-bin/edict_search_app/edict_search.cgi" width="120%" height="150px" id="iframe1" marginheight="0" frameborder="0" onLoad="autoResize('iframe1');"></iframe>
245 | 
246 | 
247 | Edict dictionary and example sentences parser.
248 | ----------------------------------------------
249 | 
250 | This package uses the EDICT_ and KANJIDIC_ dictionary files.
251 | These files are the property of the
252 | Electronic Dictionary Research and Development Group_ , and
253 | are used in conformance with the Group's licence_ .
254 | 
255 | .. _EDICT: http://www.csse.monash.edu.au/~jwb/edict.html
256 | .. _KANJIDIC: http://www.csse.monash.edu.au/~jwb/kanjidic.html
257 | .. _Group: http://www.edrdg.org/
258 | .. _licence: http://www.edrdg.org/edrdg/licence.html
259 | 
260 | Edict Parser By **Paul Goins**, see ``edict_search.py``
261 | Edict Example sentences Parse by query, **Pulkit Kathuria**, see ``edict_examples.py``
262 | Edict examples pickle files are provided but latest example files can be downloaded from the links provided.
263 | 
264 | Charset
265 | -------
266 | Two files
267 | 
268 | - ``utf8`` Charset example file if not using ``src/jNlp/data/edict_examples``
269 | 
270 |   To convert ``EUCJP/ISO-8859-1`` to ``utf8`` ::
271 |        
272 |     iconv -f EUCJP -t UTF-8 path/to/edict_examples > path/to/save_with_utf-8
273 |       
274 | - ``ISO-8859-1`` edict_dictionary file
275 | 
276 | Outputs example sentences for a query in Japanese only for ambiguous words.
277 | 
278 | 
279 | Links
280 | -----
281 | 
282 | **Latest** Dictionary files can be downloaded here_
283 | 
284 | .. _here: http://www.csse.monash.edu.au/~jwb/edict.html
285 | 
286 | ``edict_search.py``
287 | -------------------
288 | :author: Paul Goins `License included` linkToOriginal_:
289 | 
290 | .. _linkToOriginal: http://repo.or.cz/w/jbparse.git/blame/8e42831ca5f721c0320b27d7d83cb553d6e9c68f:/jbparse/edict.py
291 | 
292 | For all entries of sense definitions
293 | 
294 | >>> from jNlp.edict_search import *
295 | >>> query = u'認める'
296 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
297 | >>> kp = Parser(edict_path)
298 | >>> for i, entry in enumerate(kp.search(query)):
299 | ...     print entry.to_string().encode('utf-8')
300 | 
301 | 
302 | ``edict_examples.py``
303 | ---------------------
304 | :`Note`: Only outputs the examples sentences for ambiguous words (if word has one or more senses)
305 | 
306 | :author: Pulkit Kathuria
307 | 
308 | >>> from jNlp.edict_examples import *
309 | >>> query = u'認める'
310 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
311 | >>> edict_examples_path = 'src/jNlp/data/edict_examples'
312 | >>> search_with_example(edict_path, edict_examples_path, query)
313 | 
314 | Output ::
315 | 
316 |   認める
317 | 
318 |   Sense (1) to recognize;
319 |     EX:01 我々は彼の才能を*認*めている。We appreciate his talent.
320 | 
321 |   Sense (2) to observe;
322 |     EX:01 ｘ線写真で異状が*認*められます。We have detected an abnormality on your x-ray.
323 | 
324 |   Sense (3) to admit;
325 |     EX:01 母は私の計画をよいと*認*めた。Mother approved my plan.
326 |     EX:02 母は決して私の結婚を*認*めないだろう。Mother will never approve of my marriage.
327 |     EX:03 父は決して私の結婚を*認*めないだろう。Father will never approve of my marriage.
328 |     EX:04 彼は女性の喫煙をいいものだと*認*めない。He doesn't approve of women smoking.
329 |     ...
330 | 
331 | Sentiment Analysis Japanese Text
332 | ================================
333 | 
334 | This section covers (1) Sentiment Analysis on Japanese text using Word Sense Disambiguation, Wordnet-jp_ (Japanese Word Net file name ``wnjpn-all.tab``), SentiWordnet_ (English SentiWordNet file name ``SentiWordNet_3.*.txt``).
335 | 
336 | .. _Wordnet-jp: http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
337 | .. _SentiWordnet: http://sentiwordnet.isti.cnr.it/
338 | 
339 | Wordnet files download links
340 | ----------------------------
341 | 
342 | 1. http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
343 | 2. http://sentiwordnet.isti.cnr.it/
344 | 
345 | How to Use
346 | ----------
347 | 
348 | The following classifier is baseline, which works as simple mapping of Eng to Japanese using Wordnet and classify on polarity score using SentiWordnet. 
349 | 
350 | - (Adnouns, nouns, verbs, .. all included)
351 | - No WSD module on Japanese Sentence
352 | - Uses word as its common sense for polarity score
353 | 
354 | >>> from jNlp.jSentiments import *
355 | >>> jp_wn = '../../../../data/wnjpn-all.tab'
356 | >>> en_swn = '../../../../data/SentiWordNet_3.0.0_20100908.txt'
357 | >>> classifier = Sentiment()
358 | >>> classifier.train(en_swn, jp_wn)
359 | >>> text = u'監督、俳優、ストーリー、演出、全部最高！'
360 | >>> print classifier.baseline(text)
361 | ...Pos Score = 0.625 Neg Score = 0.125
362 | ...Text is Positive
363 | 
364 | Japanese Word Polarity Score
365 | ----------------------------
366 | 
367 | >>> from jNlp.jSentiments import *
368 | >>> jp_wn = '_dicts/wnjpn-all.tab' #path to Japanese Word Net
369 | >>> en_swn = '_dicts/SentiWordNet_3.0.0_20100908.txt' #Path to SentiWordNet
370 | >>> classifier = Sentiment()
371 | >>> sentiwordnet, jpwordnet  = classifier.train(en_swn, jp_wn)
372 | >>> positive_score = sentiwordnet[jpwordnet[u'全部']][0]
373 | >>> negative_score = sentiwordnet[jpwordnet[u'全部']][1]
374 | >>> print 'pos score = {0}, neg score = {1}'.format(positive_score, negative_score)
375 | ...pos score = 0.625, neg score = 0.0
376 | 
377 | 
378 | Contacts
379 | ========
380 | 
381 |   :Author: `pulkit[at]jaist.ac.jp` [change ``at`` with ``@``]
382 | 
383 | 
384 | .. include:: disqus_jnlp.html.rst
385 | 
386 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/jProcessing.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/jProcessing.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/jProcessing"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/jProcessing"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/_build/doctrees/disqus_jnlp.html.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/doctrees/disqus_jnlp.html.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/_build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: c5db09488a4b62fff5c534b5b975854f
4 | tags: fbb0d17656682115ca4d033fb2f83ba1
5 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/disqus_jnlp.html.txt:
--------------------------------------------------------------------------------
 1 | .. raw:: html
 2 | 
 3 |   <div id="disqus_thread"></div>
 4 |   <script type="text/javascript">
 5 |     /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
 6 |     var disqus_shortname = 'jnlp'; // required: replace example with your forum shortname
 7 | 
 8 |     /* * * DON'T EDIT BELOW THIS LINE * * */
 9 |     (function() {
10 |         var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
11 |         dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
12 |         (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
13 |     })();
14 |   </script>
15 |   <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
16 |   <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
17 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/index.txt:
--------------------------------------------------------------------------------
  1 | .. raw:: html
  2 | 
  3 |   <HEAD>
  4 |     <LINK href="http://www.jaist.ac.jp/~s1010205/gh-buttons.css" rel="stylesheet" type="text/css">
  5 |   </HEAD>
  6 | 
  7 | .. raw:: html
  8 | 
  9 |   <br><a href="http://www.jaist.ac.jp/~s1010205" class="button icon home">Back to Home</a>
 10 | 
 11 | ====================
 12 | Japanese NLP Library
 13 | ====================
 14 | 
 15 | 
 16 | .. sectnum::
 17 | .. contents::
 18 | 
 19 | Requirements
 20 | ============
 21 | 
 22 | - Third Party Dependencies
 23 | 
 24 |   - Cabocha Japanese Morphological parser http://sourceforge.net/projects/cabocha/
 25 | 
 26 | - Python Dependencies
 27 | 
 28 |   - ``Python 2.6.*`` or above
 29 | 
 30 | 
 31 | ``Links``
 32 | ---------
 33 | 
 34 | - All code at jProcessing Repo GitHub_
 35 | 
 36 | .. _GitHub: https://github.com/kevincobain2000/jProcessing
 37 | 
 38 | - Documentation_ and HomePage_ and Sphinx_
 39 | 
 40 | .. _Documentation: http://www.jaist.ac.jp/~s1010205/jnlp
 41 | 
 42 | .. _HomePage: http://www.jaist.ac.jp/~s1010205/
 43 | 
 44 | .. _Sphinx: http://readthedocs.org/docs/jprocessing/en/latest/
 45 | 
 46 | 
 47 | - PyPi_ Python Package
 48 | 
 49 | .. _PyPi: http://pypi.python.org/pypi/jProcessing/0.1
 50 | 
 51 | ::
 52 | 
 53 |   clone git@github.com:kevincobain2000/jProcessing.git
 54 |  
 55 | 
 56 | ``Install``
 57 | -----------
 58 | 
 59 | In ``Terminal`` ::
 60 | 
 61 |   bash$ python setup.py install
 62 | 
 63 | History
 64 | -------
 65 | 
 66 | - ``0.2``
 67 | 
 68 |         + Sentiment Analysis of Japanese Text
 69 | 
 70 | - ``0.1`` 
 71 |         + Morphologically Tokenize Japanese Sentence
 72 |         + Kanji / Hiragana / Katakana to Romaji Converter
 73 |         + Edict Dictionary Search - borrowed
 74 |         + Edict Examples Search - incomplete
 75 |         + Sentence Similarity between two JP Sentences
 76 |         + Run Cabocha(ISO--8859-1 configured) in Python. 
 77 |         + Longest Common String between Sentences
 78 |         + Kanji to Katakana Pronunciation
 79 |         + Hiragana, Katakana Chart Parser
 80 | 
 81 | Libraries and Modules
 82 | =====================
 83 | 
 84 | Tokenize ``jTokenize.py``
 85 | -------------------------
 86 | In ``Python`` ::
 87 | 
 88 |   >>> from jNlp.jTokenize import jTokenize
 89 |   >>> input_sentence = u'私は彼を５日前、つまりこの前の金曜日に駅で見かけた'
 90 |   >>> list_of_tokens = jTokenize(input_sentence)
 91 |   >>> print list_of_tokens
 92 |   >>> print '--'.join(list_of_tokens).encode('utf-8')
 93 | 
 94 | Returns: 
 95 | 
 96 | ::
 97 | 
 98 |   ... [u'\u79c1', u'\u306f', u'\u5f7c', u'\u3092', u'\uff15'...]
 99 |   ... 私--は--彼--を--５--日--前--、--つまり--この--前--の--金曜日--に--駅--で--見かけ--た
100 | 
101 | Katakana Pronunciation:
102 | 
103 | ::
104 | 
105 |   >>> print '--'.join(jReads(input_sentence)).encode('utf-8')
106 |   ... ワタシ--ハ--カレ--ヲ--ゴ--ニチ--マエ--、--ツマリ--コノ--マエ--ノ--キンヨウビ--ニ--エキ--デ--ミカケ--タ
107 | 
108 | 
109 | Cabocha ``jCabocha.py``
110 | -----------------------
111 | 
112 | Run Cabocha_ with original ``EUCJP`` or ``IS0-8859-1`` configured encoding, with ``utf8`` python
113 | 
114 | .. _Cabocha: http://code.google.com/p/cabocha/
115 | 
116 | - If cobocha is configured as ``utf8`` then see this http://nltk.googlecode.com/svn/trunk/doc/book-jp/ch12.html#cabocha
117 | 
118 | .. code-block:: python
119 | 
120 |   >>> from jNlp.jCabocha import cabocha
121 |   >>> print cabocha(input_sentence).encode('utf-8')
122 | 
123 | Output:
124 | 
125 | .. code-block:: xml
126 | 
127 |   <sentence>
128 |    <chunk id="0" link="8" rel="D" score="0.971639" head="0" func="1">
129 |     <tok id="0" read="ワタシ" base="私" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">私</tok>
130 |     <tok id="1" read="ハ" base="は" pos="助詞-係助詞" ctype="" cform="" ne="O">は</tok>
131 |    </chunk>
132 |    <chunk id="1" link="2" rel="D" score="0.488672" head="2" func="3">
133 |     <tok id="2" read="カレ" base="彼" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">彼</tok>
134 |     <tok id="3" read="ヲ" base="を" pos="助詞-格助詞-一般" ctype="" cform="" ne="O">を</tok>
135 |    </chunk>
136 |    <chunk id="2" link="8" rel="D" score="2.25834" head="6" func="6">
137 |     <tok id="4" read="ゴ" base="５" pos="名詞-数" ctype="" cform="" ne="B-DATE">５</tok>
138 |     <tok id="5" read="ニチ" base="日" pos="名詞-接尾-助数詞" ctype="" cform="" ne="I-DATE">日</tok>
139 |     <tok id="6" read="マエ" base="前" pos="名詞-副詞可能" ctype="" cform="" ne="I-DATE">前</tok>
140 |     <tok id="7" read="、" base="、" pos="記号-読点" ctype="" cform="" ne="O">、</tok>
141 |    </chunk>
142 | 
143 | 
144 | 
145 | Kanji / Katakana /Hiragana to Tokenized Romaji ``jConvert.py``
146 | --------------------------------------------------------------
147 | 
148 | Uses ``data/katakanaChart.txt`` and parses the chart. See katakanaChart_.
149 | 
150 | .. code-block:: python
151 | 
152 |   >>> from jNlp.jConvert import *
153 |   >>> input_sentence = u'気象庁が２１日午前４時４８分、発表した天気概況によると、'
154 |   >>> print ' '.join(tokenizedRomaji(input_sentence))
155 |   >>> print tokenizedRomaji(input_sentence)
156 | 
157 | .. code-block:: python
158 | 
159 |   ...kisyoutyou ga ni ichi nichi gozen yon ji yon hachi hun  hapyou si ta tenki gaikyou ni yoru to
160 |   ...[u'kisyoutyou', u'ga', u'ni', u'ichi', u'nichi', u'gozen',...]
161 | 
162 |  
163 | **katakanaChart.txt**
164 | 
165 | 
166 | .. _katakanaChart:
167 | 
168 | - katakanaChartFile_ and hiraganaChartFile_
169 | 
170 | .. _katakanaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/katakanaChart.txt
171 | 
172 | .. _hiraganaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/hiraganaChart.txt
173 | 
174 | 
175 | Longest Common String Japanese ``jProcessing.py``
176 | -------------------------------------------------
177 | 
178 | On English Strings ::
179 | 
180 | >>> from jNlp.jProcessing import long_substr
181 | >>> a = 'Once upon a time in Italy'
182 | >>> b = 'Thre was a time in America'
183 | >>> print long_substr(a, b)
184 | 
185 | Output ::
186 | 
187 | ...a time in
188 | 
189 | On Japanese Strings ::
190 | 
191 | >>> a = u'これでアナタも冷え知らず'
192 | >>> b = u'これでア冷え知らずナタも'
193 | >>> print long_substr(a, b).encode('utf-8')
194 | 
195 | Output ::
196 | 
197 | ...冷え知らず
198 | 
199 | Similarity between two sentences ``jProcessing.py``
200 | ---------------------------------------------------
201 | Uses MinHash by checking the overlap http://en.wikipedia.org/wiki/MinHash
202 | 
203 | :English Strings:
204 | 
205 | >>> from jNlp.jProcessing import Similarities
206 | >>> s = Similarities()
207 | >>> a = 'There was'
208 | >>> b = 'There is'
209 | >>> print s.minhash(a,b)
210 | ...0.444444444444
211 | 
212 | :Japanese Strings:
213 | 
214 | >>> from jNlp.jProcessing import *
215 | >>> a = u'これは何ですか？'
216 | >>> b = u'これはわからないです'
217 | >>> print s.minhash(' '.join(jTokenize(a)), ' '.join(jTokenize(b)))
218 | ...0.210526315789
219 | 
220 | Edict Japanese Dictionary Search with Example sentences
221 | =======================================================
222 | 
223 | Sample Ouput Demo
224 | -----------------
225 | 
226 | .. raw:: html
227 | 
228 |   <script language="JavaScript">
229 |   <!--
230 |   function autoResize(id){
231 |     var newheight;
232 |     var newwidth;
233 | 
234 |     if(document.getElementById){
235 |         newheight=document.getElementById(id).contentWindow.document .body.scrollHeight;
236 |         newwidth=document.getElementById(id).contentWindow.document .body.scrollWidth;
237 |     }
238 | 
239 |     document.getElementById(id).height= (newheight) + "px";
240 |     document.getElementById(id).width= (newwidth) + "px";
241 |   }
242 |   //-->
243 |   </script>
244 |   <IFRAME SRC="http://www.jaist.ac.jp/~s1010205/cgi-bin/edict_search_app/edict_search.cgi" width="120%" height="150px" id="iframe1" marginheight="0" frameborder="0" onLoad="autoResize('iframe1');"></iframe>
245 | 
246 | 
247 | Edict dictionary and example sentences parser.
248 | ----------------------------------------------
249 | 
250 | This package uses the EDICT_ and KANJIDIC_ dictionary files.
251 | These files are the property of the
252 | Electronic Dictionary Research and Development Group_ , and
253 | are used in conformance with the Group's licence_ .
254 | 
255 | .. _EDICT: http://www.csse.monash.edu.au/~jwb/edict.html
256 | .. _KANJIDIC: http://www.csse.monash.edu.au/~jwb/kanjidic.html
257 | .. _Group: http://www.edrdg.org/
258 | .. _licence: http://www.edrdg.org/edrdg/licence.html
259 | 
260 | Edict Parser By **Paul Goins**, see ``edict_search.py``
261 | Edict Example sentences Parse by query, **Pulkit Kathuria**, see ``edict_examples.py``
262 | Edict examples pickle files are provided but latest example files can be downloaded from the links provided.
263 | 
264 | Charset
265 | -------
266 | Two files
267 | 
268 | - ``utf8`` Charset example file if not using ``src/jNlp/data/edict_examples``
269 | 
270 |   To convert ``EUCJP/ISO-8859-1`` to ``utf8`` ::
271 |        
272 |     iconv -f EUCJP -t UTF-8 path/to/edict_examples > path/to/save_with_utf-8
273 |       
274 | - ``ISO-8859-1`` edict_dictionary file
275 | 
276 | Outputs example sentences for a query in Japanese only for ambiguous words.
277 | 
278 | 
279 | Links
280 | -----
281 | 
282 | **Latest** Dictionary files can be downloaded here_
283 | 
284 | .. _here: http://www.csse.monash.edu.au/~jwb/edict.html
285 | 
286 | ``edict_search.py``
287 | -------------------
288 | :author: Paul Goins `License included` linkToOriginal_:
289 | 
290 | .. _linkToOriginal: http://repo.or.cz/w/jbparse.git/blame/8e42831ca5f721c0320b27d7d83cb553d6e9c68f:/jbparse/edict.py
291 | 
292 | For all entries of sense definitions
293 | 
294 | >>> from jNlp.edict_search import *
295 | >>> query = u'認める'
296 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
297 | >>> kp = Parser(edict_path)
298 | >>> for i, entry in enumerate(kp.search(query)):
299 | ...     print entry.to_string().encode('utf-8')
300 | 
301 | 
302 | ``edict_examples.py``
303 | ---------------------
304 | :`Note`: Only outputs the examples sentences for ambiguous words (if word has one or more senses)
305 | 
306 | :author: Pulkit Kathuria
307 | 
308 | >>> from jNlp.edict_examples import *
309 | >>> query = u'認める'
310 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
311 | >>> edict_examples_path = 'src/jNlp/data/edict_examples'
312 | >>> search_with_example(edict_path, edict_examples_path, query)
313 | 
314 | Output ::
315 | 
316 |   認める
317 | 
318 |   Sense (1) to recognize;
319 |     EX:01 我々は彼の才能を*認*めている。We appreciate his talent.
320 | 
321 |   Sense (2) to observe;
322 |     EX:01 ｘ線写真で異状が*認*められます。We have detected an abnormality on your x-ray.
323 | 
324 |   Sense (3) to admit;
325 |     EX:01 母は私の計画をよいと*認*めた。Mother approved my plan.
326 |     EX:02 母は決して私の結婚を*認*めないだろう。Mother will never approve of my marriage.
327 |     EX:03 父は決して私の結婚を*認*めないだろう。Father will never approve of my marriage.
328 |     EX:04 彼は女性の喫煙をいいものだと*認*めない。He doesn't approve of women smoking.
329 |     ...
330 | 
331 | Sentiment Analysis Japanese Text
332 | ================================
333 | 
334 | This section covers (1) Sentiment Analysis on Japanese text using Word Sense Disambiguation, Wordnet-jp_ (Japanese Word Net file name ``wnjpn-all.tab``), SentiWordnet_ (English SentiWordNet file name ``SentiWordNet_3.*.txt``).
335 | 
336 | .. _Wordnet-jp: http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
337 | .. _SentiWordnet: http://sentiwordnet.isti.cnr.it/
338 | 
339 | Wordnet files download links
340 | ----------------------------
341 | 
342 | 1. http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
343 | 2. http://sentiwordnet.isti.cnr.it/
344 | 
345 | How to Use
346 | ----------
347 | 
348 | The following classifier is baseline, which works as simple mapping of Eng to Japanese using Wordnet and classify on polarity score using SentiWordnet. 
349 | 
350 | - (Adnouns, nouns, verbs, .. all included)
351 | - No WSD module on Japanese Sentence
352 | - Uses word as its common sense for polarity score
353 | 
354 | >>> from jNlp.jSentiments import *
355 | >>> jp_wn = '../../../../data/wnjpn-all.tab'
356 | >>> en_swn = '../../../../data/SentiWordNet_3.0.0_20100908.txt'
357 | >>> classifier = Sentiment()
358 | >>> classifier.train(en_swn, jp_wn)
359 | >>> text = u'監督、俳優、ストーリー、演出、全部最高！'
360 | >>> print classifier.baseline(text)
361 | ...Pos Score = 0.625 Neg Score = 0.125
362 | ...Text is Positive
363 | 
364 | Japanese Word Polarity Score
365 | ----------------------------
366 | 
367 | >>> from jNlp.jSentiments import *
368 | >>> jp_wn = '_dicts/wnjpn-all.tab' #path to Japanese Word Net
369 | >>> en_swn = '_dicts/SentiWordNet_3.0.0_20100908.txt' #Path to SentiWordNet
370 | >>> classifier = Sentiment()
371 | >>> sentiwordnet, jpwordnet  = classifier.train(en_swn, jp_wn)
372 | >>> positive_score = sentiwordnet[jpwordnet[u'全部']][0]
373 | >>> negative_score = sentiwordnet[jpwordnet[u'全部']][1]
374 | >>> print 'pos score = {0}, neg score = {1}'.format(positive_score, negative_score)
375 | ...pos score = 0.625, neg score = 0.0
376 | 
377 | 
378 | Contacts
379 | ========
380 | 
381 |   :Author: `pulkit[at]jaist.ac.jp` [change ``at`` with ``@``]
382 | 
383 | 
384 | .. include:: disqus_jnlp.html.rst
385 | 
386 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/_build/html/_static/basic.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * basic.css
  3 |  * ~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- basic theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | /* -- main layout ----------------------------------------------------------- */
 13 | 
 14 | div.clearer {
 15 |     clear: both;
 16 | }
 17 | 
 18 | /* -- relbar ---------------------------------------------------------------- */
 19 | 
 20 | div.related {
 21 |     width: 100%;
 22 |     font-size: 90%;
 23 | }
 24 | 
 25 | div.related h3 {
 26 |     display: none;
 27 | }
 28 | 
 29 | div.related ul {
 30 |     margin: 0;
 31 |     padding: 0 0 0 10px;
 32 |     list-style: none;
 33 | }
 34 | 
 35 | div.related li {
 36 |     display: inline;
 37 | }
 38 | 
 39 | div.related li.right {
 40 |     float: right;
 41 |     margin-right: 5px;
 42 | }
 43 | 
 44 | /* -- sidebar --------------------------------------------------------------- */
 45 | 
 46 | div.sphinxsidebarwrapper {
 47 |     padding: 10px 5px 0 10px;
 48 | }
 49 | 
 50 | div.sphinxsidebar {
 51 |     float: left;
 52 |     width: 230px;
 53 |     margin-left: -100%;
 54 |     font-size: 90%;
 55 | }
 56 | 
 57 | div.sphinxsidebar ul {
 58 |     list-style: none;
 59 | }
 60 | 
 61 | div.sphinxsidebar ul ul,
 62 | div.sphinxsidebar ul.want-points {
 63 |     margin-left: 20px;
 64 |     list-style: square;
 65 | }
 66 | 
 67 | div.sphinxsidebar ul ul {
 68 |     margin-top: 0;
 69 |     margin-bottom: 0;
 70 | }
 71 | 
 72 | div.sphinxsidebar form {
 73 |     margin-top: 10px;
 74 | }
 75 | 
 76 | div.sphinxsidebar input {
 77 |     border: 1px solid #98dbcc;
 78 |     font-family: sans-serif;
 79 |     font-size: 1em;
 80 | }
 81 | 
 82 | div.sphinxsidebar input[type="text"] {
 83 |     width: 170px;
 84 | }
 85 | 
 86 | div.sphinxsidebar input[type="submit"] {
 87 |     width: 30px;
 88 | }
 89 | 
 90 | img {
 91 |     border: 0;
 92 | }
 93 | 
 94 | /* -- search page ----------------------------------------------------------- */
 95 | 
 96 | ul.search {
 97 |     margin: 10px 0 0 20px;
 98 |     padding: 0;
 99 | }
100 | 
101 | ul.search li {
102 |     padding: 5px 0 5px 20px;
103 |     background-image: url(file.png);
104 |     background-repeat: no-repeat;
105 |     background-position: 0 7px;
106 | }
107 | 
108 | ul.search li a {
109 |     font-weight: bold;
110 | }
111 | 
112 | ul.search li div.context {
113 |     color: #888;
114 |     margin: 2px 0 0 30px;
115 |     text-align: left;
116 | }
117 | 
118 | ul.keywordmatches li.goodmatch a {
119 |     font-weight: bold;
120 | }
121 | 
122 | /* -- index page ------------------------------------------------------------ */
123 | 
124 | table.contentstable {
125 |     width: 90%;
126 | }
127 | 
128 | table.contentstable p.biglink {
129 |     line-height: 150%;
130 | }
131 | 
132 | a.biglink {
133 |     font-size: 1.3em;
134 | }
135 | 
136 | span.linkdescr {
137 |     font-style: italic;
138 |     padding-top: 5px;
139 |     font-size: 90%;
140 | }
141 | 
142 | /* -- general index --------------------------------------------------------- */
143 | 
144 | table.indextable {
145 |     width: 100%;
146 | }
147 | 
148 | table.indextable td {
149 |     text-align: left;
150 |     vertical-align: top;
151 | }
152 | 
153 | table.indextable dl, table.indextable dd {
154 |     margin-top: 0;
155 |     margin-bottom: 0;
156 | }
157 | 
158 | table.indextable tr.pcap {
159 |     height: 10px;
160 | }
161 | 
162 | table.indextable tr.cap {
163 |     margin-top: 10px;
164 |     background-color: #f2f2f2;
165 | }
166 | 
167 | img.toggler {
168 |     margin-right: 3px;
169 |     margin-top: 3px;
170 |     cursor: pointer;
171 | }
172 | 
173 | div.modindex-jumpbox {
174 |     border-top: 1px solid #ddd;
175 |     border-bottom: 1px solid #ddd;
176 |     margin: 1em 0 1em 0;
177 |     padding: 0.4em;
178 | }
179 | 
180 | div.genindex-jumpbox {
181 |     border-top: 1px solid #ddd;
182 |     border-bottom: 1px solid #ddd;
183 |     margin: 1em 0 1em 0;
184 |     padding: 0.4em;
185 | }
186 | 
187 | /* -- general body styles --------------------------------------------------- */
188 | 
189 | a.headerlink {
190 |     visibility: hidden;
191 | }
192 | 
193 | h1:hover > a.headerlink,
194 | h2:hover > a.headerlink,
195 | h3:hover > a.headerlink,
196 | h4:hover > a.headerlink,
197 | h5:hover > a.headerlink,
198 | h6:hover > a.headerlink,
199 | dt:hover > a.headerlink {
200 |     visibility: visible;
201 | }
202 | 
203 | div.body p.caption {
204 |     text-align: inherit;
205 | }
206 | 
207 | div.body td {
208 |     text-align: left;
209 | }
210 | 
211 | .field-list ul {
212 |     padding-left: 1em;
213 | }
214 | 
215 | .first {
216 |     margin-top: 0 !important;
217 | }
218 | 
219 | p.rubric {
220 |     margin-top: 30px;
221 |     font-weight: bold;
222 | }
223 | 
224 | img.align-left, .figure.align-left, object.align-left {
225 |     clear: left;
226 |     float: left;
227 |     margin-right: 1em;
228 | }
229 | 
230 | img.align-right, .figure.align-right, object.align-right {
231 |     clear: right;
232 |     float: right;
233 |     margin-left: 1em;
234 | }
235 | 
236 | img.align-center, .figure.align-center, object.align-center {
237 |   display: block;
238 |   margin-left: auto;
239 |   margin-right: auto;
240 | }
241 | 
242 | .align-left {
243 |     text-align: left;
244 | }
245 | 
246 | .align-center {
247 |     text-align: center;
248 | }
249 | 
250 | .align-right {
251 |     text-align: right;
252 | }
253 | 
254 | /* -- sidebars -------------------------------------------------------------- */
255 | 
256 | div.sidebar {
257 |     margin: 0 0 0.5em 1em;
258 |     border: 1px solid #ddb;
259 |     padding: 7px 7px 0 7px;
260 |     background-color: #ffe;
261 |     width: 40%;
262 |     float: right;
263 | }
264 | 
265 | p.sidebar-title {
266 |     font-weight: bold;
267 | }
268 | 
269 | /* -- topics ---------------------------------------------------------------- */
270 | 
271 | div.topic {
272 |     border: 1px solid #ccc;
273 |     padding: 7px 7px 0 7px;
274 |     margin: 10px 0 10px 0;
275 | }
276 | 
277 | p.topic-title {
278 |     font-size: 1.1em;
279 |     font-weight: bold;
280 |     margin-top: 10px;
281 | }
282 | 
283 | /* -- admonitions ----------------------------------------------------------- */
284 | 
285 | div.admonition {
286 |     margin-top: 10px;
287 |     margin-bottom: 10px;
288 |     padding: 7px;
289 | }
290 | 
291 | div.admonition dt {
292 |     font-weight: bold;
293 | }
294 | 
295 | div.admonition dl {
296 |     margin-bottom: 0;
297 | }
298 | 
299 | p.admonition-title {
300 |     margin: 0px 10px 5px 0px;
301 |     font-weight: bold;
302 | }
303 | 
304 | div.body p.centered {
305 |     text-align: center;
306 |     margin-top: 25px;
307 | }
308 | 
309 | /* -- tables ---------------------------------------------------------------- */
310 | 
311 | table.docutils {
312 |     border: 0;
313 |     border-collapse: collapse;
314 | }
315 | 
316 | table.docutils td, table.docutils th {
317 |     padding: 1px 8px 1px 5px;
318 |     border-top: 0;
319 |     border-left: 0;
320 |     border-right: 0;
321 |     border-bottom: 1px solid #aaa;
322 | }
323 | 
324 | table.field-list td, table.field-list th {
325 |     border: 0 !important;
326 | }
327 | 
328 | table.footnote td, table.footnote th {
329 |     border: 0 !important;
330 | }
331 | 
332 | th {
333 |     text-align: left;
334 |     padding-right: 5px;
335 | }
336 | 
337 | table.citation {
338 |     border-left: solid 1px gray;
339 |     margin-left: 1px;
340 | }
341 | 
342 | table.citation td {
343 |     border-bottom: none;
344 | }
345 | 
346 | /* -- other body styles ----------------------------------------------------- */
347 | 
348 | ol.arabic {
349 |     list-style: decimal;
350 | }
351 | 
352 | ol.loweralpha {
353 |     list-style: lower-alpha;
354 | }
355 | 
356 | ol.upperalpha {
357 |     list-style: upper-alpha;
358 | }
359 | 
360 | ol.lowerroman {
361 |     list-style: lower-roman;
362 | }
363 | 
364 | ol.upperroman {
365 |     list-style: upper-roman;
366 | }
367 | 
368 | dl {
369 |     margin-bottom: 15px;
370 | }
371 | 
372 | dd p {
373 |     margin-top: 0px;
374 | }
375 | 
376 | dd ul, dd table {
377 |     margin-bottom: 10px;
378 | }
379 | 
380 | dd {
381 |     margin-top: 3px;
382 |     margin-bottom: 10px;
383 |     margin-left: 30px;
384 | }
385 | 
386 | dt:target, .highlighted {
387 |     background-color: #fbe54e;
388 | }
389 | 
390 | dl.glossary dt {
391 |     font-weight: bold;
392 |     font-size: 1.1em;
393 | }
394 | 
395 | .field-list ul {
396 |     margin: 0;
397 |     padding-left: 1em;
398 | }
399 | 
400 | .field-list p {
401 |     margin: 0;
402 | }
403 | 
404 | .refcount {
405 |     color: #060;
406 | }
407 | 
408 | .optional {
409 |     font-size: 1.3em;
410 | }
411 | 
412 | .versionmodified {
413 |     font-style: italic;
414 | }
415 | 
416 | .system-message {
417 |     background-color: #fda;
418 |     padding: 5px;
419 |     border: 3px solid red;
420 | }
421 | 
422 | .footnote:target  {
423 |     background-color: #ffa;
424 | }
425 | 
426 | .line-block {
427 |     display: block;
428 |     margin-top: 1em;
429 |     margin-bottom: 1em;
430 | }
431 | 
432 | .line-block .line-block {
433 |     margin-top: 0;
434 |     margin-bottom: 0;
435 |     margin-left: 1.5em;
436 | }
437 | 
438 | .guilabel, .menuselection {
439 |     font-family: sans-serif;
440 | }
441 | 
442 | .accelerator {
443 |     text-decoration: underline;
444 | }
445 | 
446 | .classifier {
447 |     font-style: oblique;
448 | }
449 | 
450 | abbr, acronym {
451 |     border-bottom: dotted 1px;
452 |     cursor: help;
453 | }
454 | 
455 | /* -- code displays --------------------------------------------------------- */
456 | 
457 | pre {
458 |     overflow: auto;
459 |     overflow-y: hidden;  /* fixes display issues on Chrome browsers */
460 | }
461 | 
462 | td.linenos pre {
463 |     padding: 5px 0px;
464 |     border: 0;
465 |     background-color: transparent;
466 |     color: #aaa;
467 | }
468 | 
469 | table.highlighttable {
470 |     margin-left: 0.5em;
471 | }
472 | 
473 | table.highlighttable td {
474 |     padding: 0 0.5em 0 0.5em;
475 | }
476 | 
477 | tt.descname {
478 |     background-color: transparent;
479 |     font-weight: bold;
480 |     font-size: 1.2em;
481 | }
482 | 
483 | tt.descclassname {
484 |     background-color: transparent;
485 | }
486 | 
487 | tt.xref, a tt {
488 |     background-color: transparent;
489 |     font-weight: bold;
490 | }
491 | 
492 | h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt {
493 |     background-color: transparent;
494 | }
495 | 
496 | .viewcode-link {
497 |     float: right;
498 | }
499 | 
500 | .viewcode-back {
501 |     float: right;
502 |     font-family: sans-serif;
503 | }
504 | 
505 | div.viewcode-block:target {
506 |     margin: -1px -10px;
507 |     padding: 0 10px;
508 | }
509 | 
510 | /* -- math display ---------------------------------------------------------- */
511 | 
512 | img.math {
513 |     vertical-align: middle;
514 | }
515 | 
516 | div.body div.math p {
517 |     text-align: center;
518 | }
519 | 
520 | span.eqno {
521 |     float: right;
522 | }
523 | 
524 | /* -- printout stylesheet --------------------------------------------------- */
525 | 
526 | @media print {
527 |     div.document,
528 |     div.documentwrapper,
529 |     div.bodywrapper {
530 |         margin: 0 !important;
531 |         width: 100%;
532 |     }
533 | 
534 |     div.sphinxsidebar,
535 |     div.related,
536 |     div.footer,
537 |     #top-link {
538 |         display: none;
539 |     }
540 | }


--------------------------------------------------------------------------------
/docs/_build/html/_static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/comment-bright.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/comment-close.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/comment.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/default.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * default.css_t
  3 |  * ~~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- default theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | @import url("basic.css");
 13 | 
 14 | /* -- page layout ----------------------------------------------------------- */
 15 | 
 16 | body {
 17 |     font-family: sans-serif;
 18 |     font-size: 100%;
 19 |     background-color: #11303d;
 20 |     color: #000;
 21 |     margin: 0;
 22 |     padding: 0;
 23 | }
 24 | 
 25 | div.document {
 26 |     background-color: #1c4e63;
 27 | }
 28 | 
 29 | div.documentwrapper {
 30 |     float: left;
 31 |     width: 100%;
 32 | }
 33 | 
 34 | div.bodywrapper {
 35 |     margin: 0 0 0 230px;
 36 | }
 37 | 
 38 | div.body {
 39 |     background-color: #ffffff;
 40 |     color: #000000;
 41 |     padding: 0 20px 30px 20px;
 42 | }
 43 | 
 44 | div.footer {
 45 |     color: #ffffff;
 46 |     width: 100%;
 47 |     padding: 9px 0 9px 0;
 48 |     text-align: center;
 49 |     font-size: 75%;
 50 | }
 51 | 
 52 | div.footer a {
 53 |     color: #ffffff;
 54 |     text-decoration: underline;
 55 | }
 56 | 
 57 | div.related {
 58 |     background-color: #133f52;
 59 |     line-height: 30px;
 60 |     color: #ffffff;
 61 | }
 62 | 
 63 | div.related a {
 64 |     color: #ffffff;
 65 | }
 66 | 
 67 | div.sphinxsidebar {
 68 | }
 69 | 
 70 | div.sphinxsidebar h3 {
 71 |     font-family: 'Trebuchet MS', sans-serif;
 72 |     color: #ffffff;
 73 |     font-size: 1.4em;
 74 |     font-weight: normal;
 75 |     margin: 0;
 76 |     padding: 0;
 77 | }
 78 | 
 79 | div.sphinxsidebar h3 a {
 80 |     color: #ffffff;
 81 | }
 82 | 
 83 | div.sphinxsidebar h4 {
 84 |     font-family: 'Trebuchet MS', sans-serif;
 85 |     color: #ffffff;
 86 |     font-size: 1.3em;
 87 |     font-weight: normal;
 88 |     margin: 5px 0 0 0;
 89 |     padding: 0;
 90 | }
 91 | 
 92 | div.sphinxsidebar p {
 93 |     color: #ffffff;
 94 | }
 95 | 
 96 | div.sphinxsidebar p.topless {
 97 |     margin: 5px 10px 10px 10px;
 98 | }
 99 | 
100 | div.sphinxsidebar ul {
101 |     margin: 10px;
102 |     padding: 0;
103 |     color: #ffffff;
104 | }
105 | 
106 | div.sphinxsidebar a {
107 |     color: #98dbcc;
108 | }
109 | 
110 | div.sphinxsidebar input {
111 |     border: 1px solid #98dbcc;
112 |     font-family: sans-serif;
113 |     font-size: 1em;
114 | }
115 | 
116 | 
117 | 
118 | /* -- hyperlink styles ------------------------------------------------------ */
119 | 
120 | a {
121 |     color: #355f7c;
122 |     text-decoration: none;
123 | }
124 | 
125 | a:visited {
126 |     color: #355f7c;
127 |     text-decoration: none;
128 | }
129 | 
130 | a:hover {
131 |     text-decoration: underline;
132 | }
133 | 
134 | 
135 | 
136 | /* -- body styles ----------------------------------------------------------- */
137 | 
138 | div.body h1,
139 | div.body h2,
140 | div.body h3,
141 | div.body h4,
142 | div.body h5,
143 | div.body h6 {
144 |     font-family: 'Trebuchet MS', sans-serif;
145 |     background-color: #f2f2f2;
146 |     font-weight: normal;
147 |     color: #20435c;
148 |     border-bottom: 1px solid #ccc;
149 |     margin: 20px -20px 10px -20px;
150 |     padding: 3px 0 3px 10px;
151 | }
152 | 
153 | div.body h1 { margin-top: 0; font-size: 200%; }
154 | div.body h2 { font-size: 160%; }
155 | div.body h3 { font-size: 140%; }
156 | div.body h4 { font-size: 120%; }
157 | div.body h5 { font-size: 110%; }
158 | div.body h6 { font-size: 100%; }
159 | 
160 | a.headerlink {
161 |     color: #c60f0f;
162 |     font-size: 0.8em;
163 |     padding: 0 4px 0 4px;
164 |     text-decoration: none;
165 | }
166 | 
167 | a.headerlink:hover {
168 |     background-color: #c60f0f;
169 |     color: white;
170 | }
171 | 
172 | div.body p, div.body dd, div.body li {
173 |     text-align: justify;
174 |     line-height: 130%;
175 | }
176 | 
177 | div.admonition p.admonition-title + p {
178 |     display: inline;
179 | }
180 | 
181 | div.admonition p {
182 |     margin-bottom: 5px;
183 | }
184 | 
185 | div.admonition pre {
186 |     margin-bottom: 5px;
187 | }
188 | 
189 | div.admonition ul, div.admonition ol {
190 |     margin-bottom: 5px;
191 | }
192 | 
193 | div.note {
194 |     background-color: #eee;
195 |     border: 1px solid #ccc;
196 | }
197 | 
198 | div.seealso {
199 |     background-color: #ffc;
200 |     border: 1px solid #ff6;
201 | }
202 | 
203 | div.topic {
204 |     background-color: #eee;
205 | }
206 | 
207 | div.warning {
208 |     background-color: #ffe4e4;
209 |     border: 1px solid #f66;
210 | }
211 | 
212 | p.admonition-title {
213 |     display: inline;
214 | }
215 | 
216 | p.admonition-title:after {
217 |     content: ":";
218 | }
219 | 
220 | pre {
221 |     padding: 5px;
222 |     background-color: #eeffcc;
223 |     color: #333333;
224 |     line-height: 120%;
225 |     border: 1px solid #ac9;
226 |     border-left: none;
227 |     border-right: none;
228 | }
229 | 
230 | tt {
231 |     background-color: #ecf0f3;
232 |     padding: 0 1px 0 1px;
233 |     font-size: 0.95em;
234 | }
235 | 
236 | th {
237 |     background-color: #ede;
238 | }
239 | 
240 | .warning tt {
241 |     background: #efc2c2;
242 | }
243 | 
244 | .note tt {
245 |     background: #d6d6d6;
246 | }
247 | 
248 | .viewcode-back {
249 |     font-family: sans-serif;
250 | }
251 | 
252 | div.viewcode-block:target {
253 |     background-color: #f4debf;
254 |     border-top: 1px solid #ac9;
255 |     border-bottom: 1px solid #ac9;
256 | }


--------------------------------------------------------------------------------
/docs/_build/html/_static/dialog-note.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/dialog-note.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/dialog-seealso.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/dialog-seealso.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/dialog-topic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/dialog-topic.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/dialog-warning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/dialog-warning.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/doctools.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * doctools.js
  3 |  * ~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx JavaScript utilities for all documentation.
  6 |  *
  7 |  * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | /**
 13 |  * select a different prefix for underscore
 14 |  */
 15 | $u = _.noConflict();
 16 | 
 17 | /**
 18 |  * make the code below compatible with browsers without
 19 |  * an installed firebug like debugger
 20 | if (!window.console || !console.firebug) {
 21 |   var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
 22 |     "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
 23 |     "profile", "profileEnd"];
 24 |   window.console = {};
 25 |   for (var i = 0; i < names.length; ++i)
 26 |     window.console[names[i]] = function() {};
 27 | }
 28 |  */
 29 | 
 30 | /**
 31 |  * small helper function to urldecode strings
 32 |  */
 33 | jQuery.urldecode = function(x) {
 34 |   return decodeURIComponent(x).replace(/\+/g, ' ');
 35 | }
 36 | 
 37 | /**
 38 |  * small helper function to urlencode strings
 39 |  */
 40 | jQuery.urlencode = encodeURIComponent;
 41 | 
 42 | /**
 43 |  * This function returns the parsed url parameters of the
 44 |  * current request. Multiple values per key are supported,
 45 |  * it will always return arrays of strings for the value parts.
 46 |  */
 47 | jQuery.getQueryParameters = function(s) {
 48 |   if (typeof s == 'undefined')
 49 |     s = document.location.search;
 50 |   var parts = s.substr(s.indexOf('?') + 1).split('&');
 51 |   var result = {};
 52 |   for (var i = 0; i < parts.length; i++) {
 53 |     var tmp = parts[i].split('=', 2);
 54 |     var key = jQuery.urldecode(tmp[0]);
 55 |     var value = jQuery.urldecode(tmp[1]);
 56 |     if (key in result)
 57 |       result[key].push(value);
 58 |     else
 59 |       result[key] = [value];
 60 |   }
 61 |   return result;
 62 | };
 63 | 
 64 | /**
 65 |  * small function to check if an array contains
 66 |  * a given item.
 67 |  */
 68 | jQuery.contains = function(arr, item) {
 69 |   for (var i = 0; i < arr.length; i++) {
 70 |     if (arr[i] == item)
 71 |       return true;
 72 |   }
 73 |   return false;
 74 | };
 75 | 
 76 | /**
 77 |  * highlight a given string on a jquery object by wrapping it in
 78 |  * span elements with the given class name.
 79 |  */
 80 | jQuery.fn.highlightText = function(text, className) {
 81 |   function highlight(node) {
 82 |     if (node.nodeType == 3) {
 83 |       var val = node.nodeValue;
 84 |       var pos = val.toLowerCase().indexOf(text);
 85 |       if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) {
 86 |         var span = document.createElement("span");
 87 |         span.className = className;
 88 |         span.appendChild(document.createTextNode(val.substr(pos, text.length)));
 89 |         node.parentNode.insertBefore(span, node.parentNode.insertBefore(
 90 |           document.createTextNode(val.substr(pos + text.length)),
 91 |           node.nextSibling));
 92 |         node.nodeValue = val.substr(0, pos);
 93 |       }
 94 |     }
 95 |     else if (!jQuery(node).is("button, select, textarea")) {
 96 |       jQuery.each(node.childNodes, function() {
 97 |         highlight(this);
 98 |       });
 99 |     }
100 |   }
101 |   return this.each(function() {
102 |     highlight(this);
103 |   });
104 | };
105 | 
106 | /**
107 |  * Small JavaScript module for the documentation.
108 |  */
109 | var Documentation = {
110 | 
111 |   init : function() {
112 |     this.fixFirefoxAnchorBug();
113 |     this.highlightSearchWords();
114 |     this.initIndexTable();
115 |   },
116 | 
117 |   /**
118 |    * i18n support
119 |    */
120 |   TRANSLATIONS : {},
121 |   PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; },
122 |   LOCALE : 'unknown',
123 | 
124 |   // gettext and ngettext don't access this so that the functions
125 |   // can safely bound to a different name (_ = Documentation.gettext)
126 |   gettext : function(string) {
127 |     var translated = Documentation.TRANSLATIONS[string];
128 |     if (typeof translated == 'undefined')
129 |       return string;
130 |     return (typeof translated == 'string') ? translated : translated[0];
131 |   },
132 | 
133 |   ngettext : function(singular, plural, n) {
134 |     var translated = Documentation.TRANSLATIONS[singular];
135 |     if (typeof translated == 'undefined')
136 |       return (n == 1) ? singular : plural;
137 |     return translated[Documentation.PLURALEXPR(n)];
138 |   },
139 | 
140 |   addTranslations : function(catalog) {
141 |     for (var key in catalog.messages)
142 |       this.TRANSLATIONS[key] = catalog.messages[key];
143 |     this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
144 |     this.LOCALE = catalog.locale;
145 |   },
146 | 
147 |   /**
148 |    * add context elements like header anchor links
149 |    */
150 |   addContextElements : function() {
151 |     $('div[id] > :header:first').each(function() {
152 |       $('<a class="headerlink">\u00B6</a>').
153 |       attr('href', '#' + this.id).
154 |       attr('title', _('Permalink to this headline')).
155 |       appendTo(this);
156 |     });
157 |     $('dt[id]').each(function() {
158 |       $('<a class="headerlink">\u00B6</a>').
159 |       attr('href', '#' + this.id).
160 |       attr('title', _('Permalink to this definition')).
161 |       appendTo(this);
162 |     });
163 |   },
164 | 
165 |   /**
166 |    * workaround a firefox stupidity
167 |    */
168 |   fixFirefoxAnchorBug : function() {
169 |     if (document.location.hash && $.browser.mozilla)
170 |       window.setTimeout(function() {
171 |         document.location.href += '';
172 |       }, 10);
173 |   },
174 | 
175 |   /**
176 |    * highlight the search words provided in the url in the text
177 |    */
178 |   highlightSearchWords : function() {
179 |     var params = $.getQueryParameters();
180 |     var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
181 |     if (terms.length) {
182 |       var body = $('div.body');
183 |       window.setTimeout(function() {
184 |         $.each(terms, function() {
185 |           body.highlightText(this.toLowerCase(), 'highlighted');
186 |         });
187 |       }, 10);
188 |       $('<p class="highlight-link"><a href="javascript:Documentation.' +
189 |         'hideSearchWords()">' + _('Hide Search Matches') + '</a></p>')
190 |           .appendTo($('#searchbox'));
191 |     }
192 |   },
193 | 
194 |   /**
195 |    * init the domain index toggle buttons
196 |    */
197 |   initIndexTable : function() {
198 |     var togglers = $('img.toggler').click(function() {
199 |       var src = $(this).attr('src');
200 |       var idnum = $(this).attr('id').substr(7);
201 |       $('tr.cg-' + idnum).toggle();
202 |       if (src.substr(-9) == 'minus.png')
203 |         $(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
204 |       else
205 |         $(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
206 |     }).css('display', '');
207 |     if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
208 |         togglers.click();
209 |     }
210 |   },
211 | 
212 |   /**
213 |    * helper function to hide the search marks again
214 |    */
215 |   hideSearchWords : function() {
216 |     $('#searchbox .highlight-link').fadeOut(300);
217 |     $('span.highlighted').removeClass('highlighted');
218 |   },
219 | 
220 |   /**
221 |    * make the url absolute
222 |    */
223 |   makeURL : function(relativeURL) {
224 |     return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
225 |   },
226 | 
227 |   /**
228 |    * get the current relative url
229 |    */
230 |   getCurrentURL : function() {
231 |     var path = document.location.pathname;
232 |     var parts = path.split(/\//);
233 |     $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
234 |       if (this == '..')
235 |         parts.pop();
236 |     });
237 |     var url = parts.join('/');
238 |     return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
239 |   }
240 | };
241 | 
242 | // quick alias for translations
243 | _ = Documentation.gettext;
244 | 
245 | $(document).ready(function() {
246 |   Documentation.init();
247 | });
248 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/down-pressed.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/down.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/epub.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * default.css_t
  3 |  * ~~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- default theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | @import url("basic.css");
 13 | 
 14 | /* -- page layout ----------------------------------------------------------- */
 15 | 
 16 | body {
 17 |     font-family: {{ theme_bodyfont }};
 18 |     font-size: 100%;
 19 |     background-color: {{ theme_footerbgcolor }};
 20 |     color: #000;
 21 |     margin: 0;
 22 |     padding: 0;
 23 | }
 24 | 
 25 | div.document {
 26 |     background-color: {{ theme_sidebarbgcolor }};
 27 | }
 28 | 
 29 | div.documentwrapper {
 30 |     float: left;
 31 |     width: 100%;
 32 | }
 33 | 
 34 | div.bodywrapper {
 35 |     margin: 0 0 0 230px;
 36 | }
 37 | 
 38 | div.body {
 39 |     background-color: {{ theme_bgcolor }};
 40 |     color: {{ theme_textcolor }};
 41 |     padding: 0 20px 30px 20px;
 42 | }
 43 | 
 44 | {%- if theme_rightsidebar|tobool %}
 45 | div.bodywrapper {
 46 |     margin: 0 230px 0 0;
 47 | }
 48 | {%- endif %}
 49 | 
 50 | div.footer {
 51 |     color: {{ theme_footertextcolor }};
 52 |     width: 100%;
 53 |     padding: 9px 0 9px 0;
 54 |     text-align: center;
 55 |     font-size: 75%;
 56 | }
 57 | 
 58 | div.footer a {
 59 |     color: {{ theme_footertextcolor }};
 60 |     text-decoration: underline;
 61 | }
 62 | 
 63 | div.related {
 64 |     background-color: {{ theme_relbarbgcolor }};
 65 |     line-height: 30px;
 66 |     color: {{ theme_relbartextcolor }};
 67 | }
 68 | 
 69 | div.related a {
 70 |     color: {{ theme_relbarlinkcolor }};
 71 | }
 72 | 
 73 | div.sphinxsidebar {
 74 |     {%- if theme_stickysidebar|tobool %}
 75 |     top: 30px;
 76 |     bottom: 0;
 77 |     margin: 0;
 78 |     position: fixed;
 79 |     overflow: auto;
 80 |     height: auto;
 81 |     {%- endif %}
 82 |     {%- if theme_rightsidebar|tobool %}
 83 |     float: right;
 84 |     {%- if theme_stickysidebar|tobool %}
 85 |     right: 0;
 86 |     {%- endif %}
 87 |     {%- endif %}
 88 | }
 89 | 
 90 | {%- if theme_stickysidebar|tobool %}
 91 | /* this is nice, but it it leads to hidden headings when jumping
 92 |    to an anchor */
 93 | /*
 94 | div.related {
 95 |     position: fixed;
 96 | }
 97 | 
 98 | div.documentwrapper {
 99 |     margin-top: 30px;
100 | }
101 | */
102 | {%- endif %}
103 | 
104 | div.sphinxsidebar h3 {
105 |     font-family: {{ theme_headfont }};
106 |     color: {{ theme_sidebartextcolor }};
107 |     font-size: 1.4em;
108 |     font-weight: normal;
109 |     margin: 0;
110 |     padding: 0;
111 | }
112 | 
113 | div.sphinxsidebar h3 a {
114 |     color: {{ theme_sidebartextcolor }};
115 | }
116 | 
117 | div.sphinxsidebar h4 {
118 |     font-family: {{ theme_headfont }};
119 |     color: {{ theme_sidebartextcolor }};
120 |     font-size: 1.3em;
121 |     font-weight: normal;
122 |     margin: 5px 0 0 0;
123 |     padding: 0;
124 | }
125 | 
126 | div.sphinxsidebar p {
127 |     color: {{ theme_sidebartextcolor }};
128 | }
129 | 
130 | div.sphinxsidebar p.topless {
131 |     margin: 5px 10px 10px 10px;
132 | }
133 | 
134 | div.sphinxsidebar ul {
135 |     margin: 10px;
136 |     padding: 0;
137 |     color: {{ theme_sidebartextcolor }};
138 | }
139 | 
140 | div.sphinxsidebar a {
141 |     color: {{ theme_sidebarlinkcolor }};
142 | }
143 | 
144 | div.sphinxsidebar input {
145 |     border: 1px solid {{ theme_sidebarlinkcolor }};
146 |     font-family: sans-serif;
147 |     font-size: 1em;
148 | }
149 | 
150 | {% if theme_collapsiblesidebar|tobool %}
151 | /* for collapsible sidebar */
152 | div#sidebarbutton {
153 |     background-color: {{ theme_sidebarbtncolor }};
154 | }
155 | {% endif %}
156 | 
157 | /* -- hyperlink styles ------------------------------------------------------ */
158 | 
159 | a {
160 |     color: {{ theme_linkcolor }};
161 |     text-decoration: none;
162 | }
163 | 
164 | a:visited {
165 |     color: {{ theme_visitedlinkcolor }};
166 |     text-decoration: none;
167 | }
168 | 
169 | a:hover {
170 |     text-decoration: underline;
171 | }
172 | 
173 | {% if theme_externalrefs|tobool %}
174 | a.external {
175 |    text-decoration: none;
176 |    border-bottom: 1px dashed {{ theme_linkcolor }};
177 | }
178 | 
179 | a.external:hover {
180 |    text-decoration: none;
181 |    border-bottom: none;
182 | }
183 | 
184 | a.external:visited {
185 |     text-decoration: none;
186 |     border-bottom: 1px dashed {{ theme_visitedlinkcolor }};
187 | }
188 | {% endif %}
189 | 
190 | /* -- body styles ----------------------------------------------------------- */
191 | 
192 | div.body h1,
193 | div.body h2,
194 | div.body h3,
195 | div.body h4,
196 | div.body h5,
197 | div.body h6 {
198 |     font-family: {{ theme_headfont }};
199 |     background-color: {{ theme_headbgcolor }};
200 |     font-weight: normal;
201 |     color: {{ theme_headtextcolor }};
202 |     border-bottom: 1px solid #ccc;
203 |     margin: 20px -20px 10px -20px;
204 |     padding: 3px 0 3px 10px;
205 | }
206 | 
207 | div.body h1 { margin-top: 0; font-size: 200%; }
208 | div.body h2 { font-size: 160%; }
209 | div.body h3 { font-size: 140%; }
210 | div.body h4 { font-size: 120%; }
211 | div.body h5 { font-size: 110%; }
212 | div.body h6 { font-size: 100%; }
213 | 
214 | a.headerlink {
215 |     color: {{ theme_headlinkcolor }};
216 |     font-size: 0.8em;
217 |     padding: 0 4px 0 4px;
218 |     text-decoration: none;
219 | }
220 | 
221 | a.headerlink:hover {
222 |     background-color: {{ theme_headlinkcolor }};
223 |     color: white;
224 | }
225 | 
226 | div.body p, div.body dd, div.body li {
227 |     text-align: justify;
228 |     line-height: 130%;
229 | }
230 | 
231 | div.admonition p.admonition-title + p {
232 |     display: inline;
233 | }
234 | 
235 | div.admonition p {
236 |     margin-bottom: 5px;
237 | }
238 | 
239 | div.admonition pre {
240 |     margin-bottom: 5px;
241 | }
242 | 
243 | div.admonition ul, div.admonition ol {
244 |     margin-bottom: 5px;
245 | }
246 | 
247 | div.note {
248 |     background-color: #eee;
249 |     border: 1px solid #ccc;
250 | }
251 | 
252 | div.seealso {
253 |     background-color: #ffc;
254 |     border: 1px solid #ff6;
255 | }
256 | 
257 | div.topic {
258 |     background-color: #eee;
259 | }
260 | 
261 | div.warning {
262 |     background-color: #ffe4e4;
263 |     border: 1px solid #f66;
264 | }
265 | 
266 | p.admonition-title {
267 |     display: inline;
268 | }
269 | 
270 | p.admonition-title:after {
271 |     content: ":";
272 | }
273 | 
274 | pre {
275 |     padding: 5px;
276 |     background-color: {{ theme_codebgcolor }};
277 |     color: {{ theme_codetextcolor }};
278 |     line-height: 120%;
279 |     border: 1px solid #ac9;
280 |     border-left: none;
281 |     border-right: none;
282 | }
283 | 
284 | tt {
285 |     background-color: #ecf0f3;
286 |     padding: 0 1px 0 1px;
287 |     font-size: 0.95em;
288 | }
289 | 
290 | th {
291 |     background-color: #ede;
292 | }
293 | 
294 | .warning tt {
295 |     background: #efc2c2;
296 | }
297 | 
298 | .note tt {
299 |     background: #d6d6d6;
300 | }
301 | 
302 | .viewcode-back {
303 |     font-family: {{ theme_bodyfont }};
304 | }
305 | 
306 | div.viewcode-block:target {
307 |     background-color: #f4debf;
308 |     border-top: 1px solid #ac9;
309 |     border-bottom: 1px solid #ac9;
310 | }
311 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/footerbg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/footerbg.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/headerbg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/headerbg.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/ie6.css:
--------------------------------------------------------------------------------
1 | * html img,
2 | * html .png{position:relative;behavior:expression((this.runtimeStyle.behavior="none")&&(this.pngSet?this.pngSet=true:(this.nodeName == "IMG" && this.src.toLowerCase().indexOf('.png')>-1?(this.runtimeStyle.backgroundImage = "none",
3 | this.runtimeStyle.filter = "progid:DXImageTransform.Microsoft.AlphaImageLoader(src='" + this.src + "',sizingMethod='image')",
4 | this.src = "_static/transparent.gif"):(this.origBg = this.origBg? this.origBg :this.currentStyle.backgroundImage.toString().replace('url("','').replace('")',''),
5 | this.runtimeStyle.filter = "progid:DXImageTransform.Microsoft.AlphaImageLoader(src='" + this.origBg + "',sizingMethod='crop')",
6 | this.runtimeStyle.backgroundImage = "none")),this.pngSet=true)
7 | );}
8 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/middlebg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/middlebg.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 8 | .highlight .cp { color: #007020 } /* Comment.Preproc */
 9 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
10 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
11 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
12 | .highlight .ge { font-style: italic } /* Generic.Emph */
13 | .highlight .gr { color: #FF0000 } /* Generic.Error */
14 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
15 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
16 | .highlight .go { color: #303030 } /* Generic.Output */
17 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
18 | .highlight .gs { font-weight: bold } /* Generic.Strong */
19 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
20 | .highlight .gt { color: #0040D0 } /* Generic.Traceback */
21 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
22 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
23 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
24 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
25 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
26 | .highlight .kt { color: #902000 } /* Keyword.Type */
27 | .highlight .m { color: #208050 } /* Literal.Number */
28 | .highlight .s { color: #4070a0 } /* Literal.String */
29 | .highlight .na { color: #4070a0 } /* Name.Attribute */
30 | .highlight .nb { color: #007020 } /* Name.Builtin */
31 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
32 | .highlight .no { color: #60add5 } /* Name.Constant */
33 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
34 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
35 | .highlight .ne { color: #007020 } /* Name.Exception */
36 | .highlight .nf { color: #06287e } /* Name.Function */
37 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
38 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
39 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
40 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
41 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
42 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
43 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
44 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
45 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
46 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
47 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
48 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
49 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
50 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
51 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
52 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
53 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
54 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
55 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
56 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
57 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
58 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
59 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
60 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
61 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
62 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/_build/html/_static/pyramid.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * pylons.css_t
  3 |  * ~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- pylons theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 |  
 12 | @import url("basic.css");
 13 |  
 14 | /* -- page layout ----------------------------------------------------------- */
 15 |  
 16 | body {
 17 |     font-family: "Nobile", sans-serif;
 18 |     font-size: 100%;
 19 |     background-color: #393939;
 20 |     color: #ffffff;
 21 |     margin: 0;
 22 |     padding: 0;
 23 | }
 24 | 
 25 | div.documentwrapper {
 26 |     float: left;
 27 |     width: 100%;
 28 | }
 29 | 
 30 | div.bodywrapper {
 31 |     margin: 0 0 0 230px;
 32 | }
 33 | 
 34 | hr {
 35 |     border: 1px solid #B1B4B6;
 36 | }
 37 |  
 38 | div.document {
 39 |     background-color: #eee;
 40 | }
 41 | 
 42 | div.header {
 43 |     width:100%;
 44 |     background: #f4ad32 url(headerbg.png) repeat-x 0 top;
 45 |     border-bottom: 2px solid #ffffff;
 46 | }
 47 | 
 48 | div.logo {
 49 |     text-align: center;
 50 |     padding-top: 10px;
 51 | }
 52 | 
 53 | div.body {
 54 |     background-color: #ffffff;
 55 |     color: #3E4349;
 56 |     padding: 0 30px 30px 30px;
 57 |     font-size: 1em;
 58 |     border: 2px solid #ddd;
 59 |     border-right-style: none;
 60 |     overflow: auto;
 61 | }
 62 |  
 63 | div.footer {
 64 |     color: #ffffff;
 65 |     width: 100%;
 66 |     padding: 13px 0;
 67 |     text-align: center;
 68 |     font-size: 75%;
 69 |     background: transparent;
 70 |     clear:both;
 71 | }
 72 |  
 73 | div.footer a {
 74 |     color: #ffffff;
 75 |     text-decoration: none;
 76 | }
 77 | 
 78 | div.footer a:hover {
 79 |     color: #e88f00;
 80 |     text-decoration: underline;
 81 | }
 82 |  
 83 | div.related {
 84 |     line-height: 30px;
 85 |     color: #373839;
 86 |     font-size: 0.8em;
 87 |     background-color: #eee;
 88 | }
 89 |  
 90 | div.related a {
 91 |     color: #1b61d6;
 92 | }
 93 | 
 94 | div.related ul {
 95 |     padding-left: 240px;
 96 | }
 97 |  
 98 | div.sphinxsidebar {
 99 |     font-size: 0.75em;
100 |     line-height: 1.5em;
101 | }
102 | 
103 | div.sphinxsidebarwrapper{
104 |     padding: 10px 0;
105 | }
106 |  
107 | div.sphinxsidebar h3,
108 | div.sphinxsidebar h4 {
109 |     font-family: "Neuton", sans-serif;
110 |     color: #373839;
111 |     font-size: 1.4em;
112 |     font-weight: normal;
113 |     margin: 0;
114 |     padding: 5px 10px;
115 |     border-bottom: 2px solid #ddd;
116 | }
117 | 
118 | div.sphinxsidebar h4{
119 |     font-size: 1.3em;
120 | }
121 |  
122 | div.sphinxsidebar h3 a {
123 |     color: #000000;
124 | }
125 |  
126 |  
127 | div.sphinxsidebar p {
128 |     color: #888;
129 |     padding: 5px 20px;
130 | }
131 |  
132 | div.sphinxsidebar p.topless {
133 | }
134 |  
135 | div.sphinxsidebar ul {
136 |     margin: 10px 20px;
137 |     padding: 0;
138 |     color: #373839;
139 | }
140 |  
141 | div.sphinxsidebar a {
142 |     color: #444;
143 | }
144 |  
145 | div.sphinxsidebar input {
146 |     border: 1px solid #ccc;
147 |     font-family: sans-serif;
148 |     font-size: 1em;
149 | }
150 | 
151 | div.sphinxsidebar input[type=text]{
152 |     margin-left: 20px;
153 | }
154 | 
155 | /* -- sidebars -------------------------------------------------------------- */
156 | 
157 | div.sidebar {
158 |     margin: 0 0 0.5em 1em;
159 |     border: 2px solid #c6d880;
160 |     background-color: #e6efc2;
161 |     width: 40%;
162 |     float: right;
163 |     border-right-style: none;
164 |     border-left-style: none;
165 |     padding: 10px 20px;
166 | }
167 | 
168 | p.sidebar-title {
169 |     font-weight: bold;
170 | }
171 | 
172 | /* -- body styles ----------------------------------------------------------- */
173 |  
174 | a, a .pre {
175 |     color: #1b61d6;
176 |     text-decoration: none;
177 | }
178 |  
179 | a:hover, a:hover .pre {
180 |     text-decoration: underline;
181 | }
182 |  
183 | div.body h1,
184 | div.body h2,
185 | div.body h3,
186 | div.body h4,
187 | div.body h5,
188 | div.body h6 {
189 |     font-family: "Neuton", sans-serif;
190 |     background-color: #ffffff;
191 |     font-weight: normal;
192 |     color: #373839;
193 |     margin: 30px 0px 10px 0px;
194 |     padding: 5px 0;
195 | }
196 |  
197 | div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 200%; }
198 | div.body h2 { font-size: 150%; background-color: #ffffff; }
199 | div.body h3 { font-size: 120%; background-color: #ffffff; }
200 | div.body h4 { font-size: 110%; background-color: #ffffff; }
201 | div.body h5 { font-size: 100%; background-color: #ffffff; }
202 | div.body h6 { font-size: 100%; background-color: #ffffff; }
203 |  
204 | a.headerlink {
205 |     color: #1b61d6;
206 |     font-size: 0.8em;
207 |     padding: 0 4px 0 4px;
208 |     text-decoration: none;
209 | }
210 |  
211 | a.headerlink:hover {
212 |     text-decoration: underline;
213 | }
214 |  
215 | div.body p, div.body dd, div.body li {
216 |     line-height: 1.5em;
217 | }
218 |  
219 | div.admonition p.admonition-title + p {
220 |     display: inline;
221 | }
222 | 
223 | div.highlight{
224 |     background-color: white;
225 | }
226 | 
227 | div.note {
228 |     border: 2px solid #7a9eec;
229 |     border-right-style: none;
230 |     border-left-style: none;
231 |     padding: 10px 20px 10px 60px;
232 |     background: #e1ecfe url(dialog-note.png) no-repeat 10px 8px;
233 | }
234 |  
235 | div.seealso {
236 |     background: #fff6bf url(dialog-seealso.png) no-repeat 10px 8px;
237 |     border: 2px solid #ffd324;
238 |     border-left-style: none;
239 |     border-right-style: none;
240 |     padding: 10px 20px 10px 60px;
241 | }
242 |  
243 | div.topic {
244 |     background: #eeeeee;
245 |     border: 2px solid #C6C9CB;
246 |     padding: 10px 20px;
247 |     border-right-style: none;
248 |     border-left-style: none;
249 | }
250 |  
251 | div.warning {
252 |     background: #fbe3e4 url(dialog-warning.png) no-repeat 10px 8px;
253 |     border: 2px solid #fbc2c4;
254 |     border-right-style: none;
255 |     border-left-style: none;
256 |     padding: 10px 20px 10px 60px;
257 | }
258 |  
259 | p.admonition-title {
260 |     display: none;
261 | }
262 |  
263 | p.admonition-title:after {
264 |     content: ":";
265 | }
266 |  
267 | pre {
268 |     padding: 10px;
269 |     background-color: #fafafa;
270 |     color: #222;
271 |     line-height: 1.2em;
272 |     border: 2px solid #C6C9CB;
273 |     font-size: 1.1em;
274 |     margin: 1.5em 0 1.5em 0;
275 |     border-right-style: none;
276 |     border-left-style: none;
277 | }
278 |  
279 | tt {
280 |     background-color: transparent;
281 |     color: #222;
282 |     font-size: 1.1em;
283 |     font-family: monospace;
284 | }
285 | 
286 | .viewcode-back {
287 |     font-family: "Nobile", sans-serif;
288 | }
289 | 
290 | div.viewcode-block:target {
291 |     background-color: #fff6bf;
292 |     border: 2px solid #ffd324;
293 |     border-left-style: none;
294 |     border-right-style: none;
295 |     padding: 10px 20px;
296 | }
297 | 
298 | table.highlighttable {
299 |     width: 100%;
300 | }
301 | 
302 | table.highlighttable td {
303 |     padding: 0;
304 | }
305 | 
306 | a em.std-term {
307 |    color: #007f00;
308 | }
309 | 
310 | a:hover em.std-term {
311 |     text-decoration: underline;
312 | }
313 | 
314 | .download {
315 |     font-family: "Nobile", sans-serif;
316 |     font-weight: normal;
317 |     font-style: normal;
318 | }
319 | 
320 | tt.xref {
321 |     font-weight: normal;
322 |     font-style: normal;
323 | }


--------------------------------------------------------------------------------
/docs/_build/html/_static/sidebar.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * sidebar.js
  3 |  * ~~~~~~~~~~
  4 |  *
  5 |  * This script makes the Sphinx sidebar collapsible.
  6 |  *
  7 |  * .sphinxsidebar contains .sphinxsidebarwrapper.  This script adds
  8 |  * in .sphixsidebar, after .sphinxsidebarwrapper, the #sidebarbutton
  9 |  * used to collapse and expand the sidebar.
 10 |  *
 11 |  * When the sidebar is collapsed the .sphinxsidebarwrapper is hidden
 12 |  * and the width of the sidebar and the margin-left of the document
 13 |  * are decreased. When the sidebar is expanded the opposite happens.
 14 |  * This script saves a per-browser/per-session cookie used to
 15 |  * remember the position of the sidebar among the pages.
 16 |  * Once the browser is closed the cookie is deleted and the position
 17 |  * reset to the default (expanded).
 18 |  *
 19 |  * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
 20 |  * :license: BSD, see LICENSE for details.
 21 |  *
 22 |  */
 23 | 
 24 | $(function() {
 25 |   // global elements used by the functions.
 26 |   // the 'sidebarbutton' element is defined as global after its
 27 |   // creation, in the add_sidebar_button function
 28 |   var bodywrapper = $('.bodywrapper');
 29 |   var sidebar = $('.sphinxsidebar');
 30 |   var sidebarwrapper = $('.sphinxsidebarwrapper');
 31 | 
 32 |   // for some reason, the document has no sidebar; do not run into errors
 33 |   if (!sidebar.length) return;
 34 | 
 35 |   // original margin-left of the bodywrapper and width of the sidebar
 36 |   // with the sidebar expanded
 37 |   var bw_margin_expanded = bodywrapper.css('margin-left');
 38 |   var ssb_width_expanded = sidebar.width();
 39 | 
 40 |   // margin-left of the bodywrapper and width of the sidebar
 41 |   // with the sidebar collapsed
 42 |   var bw_margin_collapsed = '.8em';
 43 |   var ssb_width_collapsed = '.8em';
 44 | 
 45 |   // colors used by the current theme
 46 |   var dark_color = $('.related').css('background-color');
 47 |   var light_color = $('.document').css('background-color');
 48 | 
 49 |   function sidebar_is_collapsed() {
 50 |     return sidebarwrapper.is(':not(:visible)');
 51 |   }
 52 | 
 53 |   function toggle_sidebar() {
 54 |     if (sidebar_is_collapsed())
 55 |       expand_sidebar();
 56 |     else
 57 |       collapse_sidebar();
 58 |   }
 59 | 
 60 |   function collapse_sidebar() {
 61 |     sidebarwrapper.hide();
 62 |     sidebar.css('width', ssb_width_collapsed);
 63 |     bodywrapper.css('margin-left', bw_margin_collapsed);
 64 |     sidebarbutton.css({
 65 |         'margin-left': '0',
 66 |         'height': bodywrapper.height()
 67 |     });
 68 |     sidebarbutton.find('span').text('»');
 69 |     sidebarbutton.attr('title', _('Expand sidebar'));
 70 |     document.cookie = 'sidebar=collapsed';
 71 |   }
 72 | 
 73 |   function expand_sidebar() {
 74 |     bodywrapper.css('margin-left', bw_margin_expanded);
 75 |     sidebar.css('width', ssb_width_expanded);
 76 |     sidebarwrapper.show();
 77 |     sidebarbutton.css({
 78 |         'margin-left': ssb_width_expanded-12,
 79 |         'height': bodywrapper.height()
 80 |     });
 81 |     sidebarbutton.find('span').text('«');
 82 |     sidebarbutton.attr('title', _('Collapse sidebar'));
 83 |     document.cookie = 'sidebar=expanded';
 84 |   }
 85 | 
 86 |   function add_sidebar_button() {
 87 |     sidebarwrapper.css({
 88 |         'float': 'left',
 89 |         'margin-right': '0',
 90 |         'width': ssb_width_expanded - 28
 91 |     });
 92 |     // create the button
 93 |     sidebar.append(
 94 |         '<div id="sidebarbutton"><span>&laquo;</span></div>'
 95 |     );
 96 |     var sidebarbutton = $('#sidebarbutton');
 97 |     light_color = sidebarbutton.css('background-color');
 98 |     // find the height of the viewport to center the '<<' in the page
 99 |     var viewport_height;
100 |     if (window.innerHeight)
101 |  	  viewport_height = window.innerHeight;
102 |     else
103 | 	  viewport_height = $(window).height();
104 |     sidebarbutton.find('span').css({
105 |         'display': 'block',
106 |         'margin-top': (viewport_height - sidebar.position().top - 20) / 2
107 |     });
108 | 
109 |     sidebarbutton.click(toggle_sidebar);
110 |     sidebarbutton.attr('title', _('Collapse sidebar'));
111 |     sidebarbutton.css({
112 |         'color': '#FFFFFF',
113 |         'border-left': '1px solid ' + dark_color,
114 |         'font-size': '1.2em',
115 |         'cursor': 'pointer',
116 |         'height': bodywrapper.height(),
117 |         'padding-top': '1px',
118 |         'margin-left': ssb_width_expanded - 12
119 |     });
120 | 
121 |     sidebarbutton.hover(
122 |       function () {
123 |           $(this).css('background-color', dark_color);
124 |       },
125 |       function () {
126 |           $(this).css('background-color', light_color);
127 |       }
128 |     );
129 |   }
130 | 
131 |   function set_position_from_cookie() {
132 |     if (!document.cookie)
133 |       return;
134 |     var items = document.cookie.split(';');
135 |     for(var k=0; k<items.length; k++) {
136 |       var key_val = items[k].split('=');
137 |       var key = key_val[0];
138 |       if (key == 'sidebar') {
139 |         var value = key_val[1];
140 |         if ((value == 'collapsed') && (!sidebar_is_collapsed()))
141 |           collapse_sidebar();
142 |         else if ((value == 'expanded') && (sidebar_is_collapsed()))
143 |           expand_sidebar();
144 |       }
145 |     }
146 |   }
147 | 
148 |   add_sidebar_button();
149 |   var sidebarbutton = $('#sidebarbutton');
150 |   set_position_from_cookie();
151 | });
152 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/transparent.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/transparent.gif


--------------------------------------------------------------------------------
/docs/_build/html/_static/underscore.js:
--------------------------------------------------------------------------------
 1 | // Underscore.js 0.5.5
 2 | // (c) 2009 Jeremy Ashkenas, DocumentCloud Inc.
 3 | // Underscore is freely distributable under the terms of the MIT license.
 4 | // Portions of Underscore are inspired by or borrowed from Prototype.js,
 5 | // Oliver Steele's Functional, and John Resig's Micro-Templating.
 6 | // For all details and documentation:
 7 | // http://documentcloud.github.com/underscore/
 8 | (function(){var j=this,n=j._,i=function(a){this._wrapped=a},m=typeof StopIteration!=="undefined"?StopIteration:"__break__",b=j._=function(a){return new i(a)};if(typeof exports!=="undefined")exports._=b;var k=Array.prototype.slice,o=Array.prototype.unshift,p=Object.prototype.toString,q=Object.prototype.hasOwnProperty,r=Object.prototype.propertyIsEnumerable;b.VERSION="0.5.5";b.each=function(a,c,d){try{if(a.forEach)a.forEach(c,d);else if(b.isArray(a)||b.isArguments(a))for(var e=0,f=a.length;e<f;e++)c.call(d,
 9 | a[e],e,a);else{var g=b.keys(a);f=g.length;for(e=0;e<f;e++)c.call(d,a[g[e]],g[e],a)}}catch(h){if(h!=m)throw h;}return a};b.map=function(a,c,d){if(a&&b.isFunction(a.map))return a.map(c,d);var e=[];b.each(a,function(f,g,h){e.push(c.call(d,f,g,h))});return e};b.reduce=function(a,c,d,e){if(a&&b.isFunction(a.reduce))return a.reduce(b.bind(d,e),c);b.each(a,function(f,g,h){c=d.call(e,c,f,g,h)});return c};b.reduceRight=function(a,c,d,e){if(a&&b.isFunction(a.reduceRight))return a.reduceRight(b.bind(d,e),c);
10 | var f=b.clone(b.toArray(a)).reverse();b.each(f,function(g,h){c=d.call(e,c,g,h,a)});return c};b.detect=function(a,c,d){var e;b.each(a,function(f,g,h){if(c.call(d,f,g,h)){e=f;b.breakLoop()}});return e};b.select=function(a,c,d){if(a&&b.isFunction(a.filter))return a.filter(c,d);var e=[];b.each(a,function(f,g,h){c.call(d,f,g,h)&&e.push(f)});return e};b.reject=function(a,c,d){var e=[];b.each(a,function(f,g,h){!c.call(d,f,g,h)&&e.push(f)});return e};b.all=function(a,c,d){c=c||b.identity;if(a&&b.isFunction(a.every))return a.every(c,
11 | d);var e=true;b.each(a,function(f,g,h){(e=e&&c.call(d,f,g,h))||b.breakLoop()});return e};b.any=function(a,c,d){c=c||b.identity;if(a&&b.isFunction(a.some))return a.some(c,d);var e=false;b.each(a,function(f,g,h){if(e=c.call(d,f,g,h))b.breakLoop()});return e};b.include=function(a,c){if(b.isArray(a))return b.indexOf(a,c)!=-1;var d=false;b.each(a,function(e){if(d=e===c)b.breakLoop()});return d};b.invoke=function(a,c){var d=b.rest(arguments,2);return b.map(a,function(e){return(c?e[c]:e).apply(e,d)})};b.pluck=
12 | function(a,c){return b.map(a,function(d){return d[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);var e={computed:-Infinity};b.each(a,function(f,g,h){g=c?c.call(d,f,g,h):f;g>=e.computed&&(e={value:f,computed:g})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);var e={computed:Infinity};b.each(a,function(f,g,h){g=c?c.call(d,f,g,h):f;g<e.computed&&(e={value:f,computed:g})});return e.value};b.sortBy=function(a,c,d){return b.pluck(b.map(a,
13 | function(e,f,g){return{value:e,criteria:c.call(d,e,f,g)}}).sort(function(e,f){e=e.criteria;f=f.criteria;return e<f?-1:e>f?1:0}),"value")};b.sortedIndex=function(a,c,d){d=d||b.identity;for(var e=0,f=a.length;e<f;){var g=e+f>>1;d(a[g])<d(c)?(e=g+1):(f=g)}return e};b.toArray=function(a){if(!a)return[];if(a.toArray)return a.toArray();if(b.isArray(a))return a;if(b.isArguments(a))return k.call(a);return b.values(a)};b.size=function(a){return b.toArray(a).length};b.first=function(a,c,d){return c&&!d?k.call(a,
14 | 0,c):a[0]};b.rest=function(a,c,d){return k.call(a,b.isUndefined(c)||d?1:c)};b.last=function(a){return a[a.length-1]};b.compact=function(a){return b.select(a,function(c){return!!c})};b.flatten=function(a){return b.reduce(a,[],function(c,d){if(b.isArray(d))return c.concat(b.flatten(d));c.push(d);return c})};b.without=function(a){var c=b.rest(arguments);return b.select(a,function(d){return!b.include(c,d)})};b.uniq=function(a,c){return b.reduce(a,[],function(d,e,f){if(0==f||(c===true?b.last(d)!=e:!b.include(d,
15 | e)))d.push(e);return d})};b.intersect=function(a){var c=b.rest(arguments);return b.select(b.uniq(a),function(d){return b.all(c,function(e){return b.indexOf(e,d)>=0})})};b.zip=function(){for(var a=b.toArray(arguments),c=b.max(b.pluck(a,"length")),d=new Array(c),e=0;e<c;e++)d[e]=b.pluck(a,String(e));return d};b.indexOf=function(a,c){if(a.indexOf)return a.indexOf(c);for(var d=0,e=a.length;d<e;d++)if(a[d]===c)return d;return-1};b.lastIndexOf=function(a,c){if(a.lastIndexOf)return a.lastIndexOf(c);for(var d=
16 | a.length;d--;)if(a[d]===c)return d;return-1};b.range=function(a,c,d){var e=b.toArray(arguments),f=e.length<=1;a=f?0:e[0];c=f?e[0]:e[1];d=e[2]||1;e=Math.ceil((c-a)/d);if(e<=0)return[];e=new Array(e);f=a;for(var g=0;1;f+=d){if((d>0?f-c:c-f)>=0)return e;e[g++]=f}};b.bind=function(a,c){var d=b.rest(arguments,2);return function(){return a.apply(c||j,d.concat(b.toArray(arguments)))}};b.bindAll=function(a){var c=b.rest(arguments);if(c.length==0)c=b.functions(a);b.each(c,function(d){a[d]=b.bind(a[d],a)});
17 | return a};b.delay=function(a,c){var d=b.rest(arguments,2);return setTimeout(function(){return a.apply(a,d)},c)};b.defer=function(a){return b.delay.apply(b,[a,1].concat(b.rest(arguments)))};b.wrap=function(a,c){return function(){var d=[a].concat(b.toArray(arguments));return c.apply(c,d)}};b.compose=function(){var a=b.toArray(arguments);return function(){for(var c=b.toArray(arguments),d=a.length-1;d>=0;d--)c=[a[d].apply(this,c)];return c[0]}};b.keys=function(a){if(b.isArray(a))return b.range(0,a.length);
18 | var c=[];for(var d in a)q.call(a,d)&&c.push(d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=function(a){return b.select(b.keys(a),function(c){return b.isFunction(a[c])}).sort()};b.extend=function(a,c){for(var d in c)a[d]=c[d];return a};b.clone=function(a){if(b.isArray(a))return a.slice(0);return b.extend({},a)};b.tap=function(a,c){c(a);return a};b.isEqual=function(a,c){if(a===c)return true;var d=typeof a;if(d!=typeof c)return false;if(a==c)return true;if(!a&&c||a&&!c)return false;
19 | if(a.isEqual)return a.isEqual(c);if(b.isDate(a)&&b.isDate(c))return a.getTime()===c.getTime();if(b.isNaN(a)&&b.isNaN(c))return true;if(b.isRegExp(a)&&b.isRegExp(c))return a.source===c.source&&a.global===c.global&&a.ignoreCase===c.ignoreCase&&a.multiline===c.multiline;if(d!=="object")return false;if(a.length&&a.length!==c.length)return false;d=b.keys(a);var e=b.keys(c);if(d.length!=e.length)return false;for(var f in a)if(!b.isEqual(a[f],c[f]))return false;return true};b.isEmpty=function(a){return b.keys(a).length==
20 | 0};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=function(a){return!!(a&&a.concat&&a.unshift)};b.isArguments=function(a){return a&&b.isNumber(a.length)&&!b.isArray(a)&&!r.call(a,"length")};b.isFunction=function(a){return!!(a&&a.constructor&&a.call&&a.apply)};b.isString=function(a){return!!(a===""||a&&a.charCodeAt&&a.substr)};b.isNumber=function(a){return p.call(a)==="[object Number]"};b.isDate=function(a){return!!(a&&a.getTimezoneOffset&&a.setUTCFullYear)};b.isRegExp=function(a){return!!(a&&
21 | a.test&&a.exec&&(a.ignoreCase||a.ignoreCase===false))};b.isNaN=function(a){return b.isNumber(a)&&isNaN(a)};b.isNull=function(a){return a===null};b.isUndefined=function(a){return typeof a=="undefined"};b.noConflict=function(){j._=n;return this};b.identity=function(a){return a};b.breakLoop=function(){throw m;};var s=0;b.uniqueId=function(a){var c=s++;return a?a+c:c};b.template=function(a,c){a=new Function("obj","var p=[],print=function(){p.push.apply(p,arguments);};with(obj){p.push('"+a.replace(/[\r\t\n]/g,
22 | " ").replace(/'(?=[^%]*%>)/g,"\t").split("'").join("\\'").split("\t").join("'").replace(/<%=(.+?)%>/g,"',$1,'").split("<%").join("');").split("%>").join("p.push('")+"');}return p.join('');");return c?a(c):a};b.forEach=b.each;b.foldl=b.inject=b.reduce;b.foldr=b.reduceRight;b.filter=b.select;b.every=b.all;b.some=b.any;b.head=b.first;b.tail=b.rest;b.methods=b.functions;var l=function(a,c){return c?b(a).chain():a};b.each(b.functions(b),function(a){var c=b[a];i.prototype[a]=function(){var d=b.toArray(arguments);
23 | o.call(d,this._wrapped);return l(c.apply(b,d),this._chain)}});b.each(["pop","push","reverse","shift","sort","splice","unshift"],function(a){var c=Array.prototype[a];i.prototype[a]=function(){c.apply(this._wrapped,arguments);return l(this._wrapped,this._chain)}});b.each(["concat","join","slice"],function(a){var c=Array.prototype[a];i.prototype[a]=function(){return l(c.apply(this._wrapped,arguments),this._chain)}});i.prototype.chain=function(){this._chain=true;return this};i.prototype.value=function(){return this._wrapped}})();
24 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/up-pressed.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/_static/up.png


--------------------------------------------------------------------------------
/docs/_build/html/disqus_jnlp.html.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 3 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 4 | 
 5 | 
 6 | <html xmlns="http://www.w3.org/1999/xhtml">
 7 |   <head>
 8 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 9 |     
10 |     <title>&lt;no title&gt; &mdash; Japanese Natural Language Processing</title>
11 |     
12 |     <link rel="stylesheet" href="_static/pyramid.css" type="text/css" />
13 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
14 |     
15 |     <script type="text/javascript">
16 |       var DOCUMENTATION_OPTIONS = {
17 |         URL_ROOT:    '',
18 |         VERSION:     '0.1',
19 |         COLLAPSE_INDEX: false,
20 |         FILE_SUFFIX: '.html',
21 |         HAS_SOURCE:  true
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <link rel="top" title="Japanese Natural Language Processing" href="index.html" />
28 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Neuton&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
29 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Nobile:regular,italic,bold,bolditalic&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
30 | <!--[if lte IE 6]>
31 | <link rel="stylesheet" href="_static/ie6.css" type="text/css" media="screen" charset="utf-8" />
32 | <![endif]-->
33 | 
34 |   </head>
35 |   <body>
36 | 
37 |     <div class="related">
38 |       <h3>Navigation</h3>
39 |       <ul>
40 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
41 |       </ul>
42 |     </div>  
43 | 
44 |     <div class="document">
45 |       <div class="documentwrapper">
46 |         <div class="bodywrapper">
47 |           <div class="body">
48 |             
49 |   <div id="disqus_thread"></div>
50 | <script type="text/javascript">
51 |   /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
52 |   var disqus_shortname = 'jnlp'; // required: replace example with your forum shortname
53 | 
54 |   /* * * DON'T EDIT BELOW THIS LINE * * */
55 |   (function() {
56 |       var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
57 |       dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
58 |       (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
59 |   })();
60 | </script>
61 | <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
62 | <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
63 | 
64 |           </div>
65 |         </div>
66 |       </div>
67 |       <div class="sphinxsidebar">
68 |         <div class="sphinxsidebarwrapper">
69 | <div id="searchbox" style="display: none">
70 |   <h3>Quick search</h3>
71 |     <form class="search" action="search.html" method="get">
72 |       <input type="text" name="q" />
73 |       <input type="submit" value="Go" />
74 |       <input type="hidden" name="check_keywords" value="yes" />
75 |       <input type="hidden" name="area" value="default" />
76 |     </form>
77 |     <p class="searchtip" style="font-size: 90%">
78 |     Enter search terms or a module, class or function name.
79 |     </p>
80 | </div>
81 | <script type="text/javascript">$('#searchbox').show(0);</script>
82 |         </div>
83 |       </div>
84 |       <div class="clearer"></div>
85 |     </div>
86 |     <div class="related">
87 |       <h3>Navigation</h3>
88 |       <ul>
89 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
90 |       </ul>
91 |     </div>
92 |     <div class="footer">
93 |         &copy; Copyright 2012, Pulkit Kathuria.
94 |     </div>
95 |   </body>
96 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/genindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  5 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  6 | 
  7 | 
  8 | <html xmlns="http://www.w3.org/1999/xhtml">
  9 |   <head>
 10 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 11 |     
 12 |     <title>Index &mdash; Japanese Natural Language Processing</title>
 13 |     
 14 |     <link rel="stylesheet" href="_static/pyramid.css" type="text/css" />
 15 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 |     
 17 |     <script type="text/javascript">
 18 |       var DOCUMENTATION_OPTIONS = {
 19 |         URL_ROOT:    '',
 20 |         VERSION:     '0.1',
 21 |         COLLAPSE_INDEX: false,
 22 |         FILE_SUFFIX: '.html',
 23 |         HAS_SOURCE:  true
 24 |       };
 25 |     </script>
 26 |     <script type="text/javascript" src="_static/jquery.js"></script>
 27 |     <script type="text/javascript" src="_static/underscore.js"></script>
 28 |     <script type="text/javascript" src="_static/doctools.js"></script>
 29 |     <link rel="top" title="Japanese Natural Language Processing" href="index.html" />
 30 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Neuton&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
 31 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Nobile:regular,italic,bold,bolditalic&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
 32 | <!--[if lte IE 6]>
 33 | <link rel="stylesheet" href="_static/ie6.css" type="text/css" media="screen" charset="utf-8" />
 34 | <![endif]-->
 35 | 
 36 |   </head>
 37 |   <body>
 38 | 
 39 |     <div class="related">
 40 |       <h3>Navigation</h3>
 41 |       <ul>
 42 |         <li class="right" style="margin-right: 10px">
 43 |           <a href="#" title="General Index"
 44 |              accesskey="I">index</a></li>
 45 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
 46 |       </ul>
 47 |     </div>  
 48 | 
 49 |     <div class="document">
 50 |       <div class="documentwrapper">
 51 |         <div class="bodywrapper">
 52 |           <div class="body">
 53 |             
 54 | 
 55 | <h1 id="index">Index</h1>
 56 | 
 57 | <div class="genindex-jumpbox">
 58 |  
 59 | </div>
 60 | 
 61 | 
 62 |           </div>
 63 |         </div>
 64 |       </div>
 65 |       <div class="sphinxsidebar">
 66 |         <div class="sphinxsidebarwrapper">
 67 | 
 68 |    
 69 | 
 70 | <div id="searchbox" style="display: none">
 71 |   <h3>Quick search</h3>
 72 |     <form class="search" action="search.html" method="get">
 73 |       <input type="text" name="q" />
 74 |       <input type="submit" value="Go" />
 75 |       <input type="hidden" name="check_keywords" value="yes" />
 76 |       <input type="hidden" name="area" value="default" />
 77 |     </form>
 78 |     <p class="searchtip" style="font-size: 90%">
 79 |     Enter search terms or a module, class or function name.
 80 |     </p>
 81 | </div>
 82 | <script type="text/javascript">$('#searchbox').show(0);</script>
 83 |         </div>
 84 |       </div>
 85 |       <div class="clearer"></div>
 86 |     </div>
 87 |     <div class="related">
 88 |       <h3>Navigation</h3>
 89 |       <ul>
 90 |         <li class="right" style="margin-right: 10px">
 91 |           <a href="#" title="General Index"
 92 |              >index</a></li>
 93 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
 94 |       </ul>
 95 |     </div>
 96 |     <div class="footer">
 97 |         &copy; Copyright 2012, Pulkit Kathuria.
 98 |       Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.2.
 99 |     </div>
100 |   </body>
101 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/docs/_build/html/objects.inv


--------------------------------------------------------------------------------
/docs/_build/html/search.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 3 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 4 | 
 5 | 
 6 | <html xmlns="http://www.w3.org/1999/xhtml">
 7 |   <head>
 8 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 9 |     
10 |     <title>Search &mdash; Japanese Natural Language Processing</title>
11 |     
12 |     <link rel="stylesheet" href="_static/pyramid.css" type="text/css" />
13 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
14 |     
15 |     <script type="text/javascript">
16 |       var DOCUMENTATION_OPTIONS = {
17 |         URL_ROOT:    '',
18 |         VERSION:     '0.1',
19 |         COLLAPSE_INDEX: false,
20 |         FILE_SUFFIX: '.html',
21 |         HAS_SOURCE:  true
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <script type="text/javascript" src="_static/searchtools.js"></script>
28 |     <link rel="top" title="Japanese Natural Language Processing" href="index.html" />
29 |   <script type="text/javascript">
30 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
31 |   </script>
32 |   
33 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Neuton&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
34 | <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Nobile:regular,italic,bold,bolditalic&amp;subset=latin" type="text/css" media="screen" charset="utf-8" />
35 | <!--[if lte IE 6]>
36 | <link rel="stylesheet" href="_static/ie6.css" type="text/css" media="screen" charset="utf-8" />
37 | <![endif]-->
38 | 
39 | 
40 |   </head>
41 |   <body>
42 | 
43 |     <div class="related">
44 |       <h3>Navigation</h3>
45 |       <ul>
46 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
47 |       </ul>
48 |     </div>  
49 | 
50 |     <div class="document">
51 |       <div class="documentwrapper">
52 |         <div class="bodywrapper">
53 |           <div class="body">
54 |             
55 |   <h1 id="search-documentation">Search</h1>
56 |   <div id="fallback" class="admonition warning">
57 |   <script type="text/javascript">$('#fallback').hide();</script>
58 |   <p>
59 |     Please activate JavaScript to enable the search
60 |     functionality.
61 |   </p>
62 |   </div>
63 |   <p>
64 |     From here you can search these documents. Enter your search
65 |     words into the box below and click "search". Note that the search
66 |     function will automatically search for all of the words. Pages
67 |     containing fewer words won't appear in the result list.
68 |   </p>
69 |   <form action="" method="get">
70 |     <input type="text" name="q" value="" />
71 |     <input type="submit" value="search" />
72 |     <span id="search-progress" style="padding-left: 10px"></span>
73 |   </form>
74 |   
75 |   <div id="search-results">
76 |   
77 |   </div>
78 | 
79 |           </div>
80 |         </div>
81 |       </div>
82 |       <div class="sphinxsidebar">
83 |         <div class="sphinxsidebarwrapper">
84 |         </div>
85 |       </div>
86 |       <div class="clearer"></div>
87 |     </div>
88 |     <div class="related">
89 |       <h3>Navigation</h3>
90 |       <ul>
91 |         <li><a href="index.html">Japanese Natural Language Processing</a> &raquo;</li> 
92 |       </ul>
93 |     </div>
94 |     <div class="footer">
95 |         &copy; Copyright 2012, Pulkit Kathuria.
96 |     </div>
97 |   </body>
98 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({objects:{},terms:{all:0,code:0,queri:0,follow:0,"\u3064\u307e\u308a\u3053\u306e\u524d\u306e\u91d1\u66dc\u65e5\u306b\u99c5\u3067\u898b\u304b\u3051\u305f":0,research:0,morpholog:0,iconv:0,depend:0,father:0,"\u4e00\u822c":0,contentwindow:0,cform:0,bugreport:[],sens:0,hiraganachartfil:0,"\u79c1":0,string:0,"\u30cb":0,electron:0,verb:0,join:0,hachi:0,wsd:0,iframe1:0,button:0,div:[0,1],"\u8aad\u70b9":0,pleas:[0,1],past:[0,1],"\uff15":0,download:0,appendchild:[0,1],neg:0,section:0,version:[],newheight:0,net:0,can:0,gozen:0,jconvert:0,never:0,nichi:0,bodi:[0,1],trunk:0,path:0,u5f7c:0,gaikyou:0,search:0,talent:0,forum:[0,1],"\u6c17\u8c61\u5e81\u304c\uff12\uff11\u65e5\u5348\u524d\uff14\u6642\uff14\uff18\u5206":0,"\u5f7c":0,smoke:0,chang:0,head:[0,1],search_with_exampl:0,scrollwidth:0,negative_scor:0,modul:0,"\u8a8d":0,"\u3081\u3066\u3044\u308b":0,href:[0,1],u79c1:0,instal:0,txt:0,"\u30ef\u30bf\u30b7":0,from:0,thre:0,"\u898b\u304b\u3051":0,two:0,kevincobain2000:0,stylesheet:0,minhash:0,disambigu:0,edict_search:0,edict_dictionari:0,more:0,src:[0,1],ctype:0,"\u4fc2\u52a9\u8a5e":0,train:0,women:0,"\u30cb\u30c1":0,word:0,setup:0,work:0,histori:0,disqus_thread:[0,1],tab:0,eng:0,charset:0,kisyouty:0,classifi:0,"\u30ab\u30ec":0,how:0,"\uff58\u7dda\u5199\u771f\u3067\u7570\u72b6\u304c":0,"\u540d\u8a5e":0,simpl:0,css:0,map:0,"\u6f14\u51fa":0,clone:0,"\u524d":0,befor:[0,1],date:0,marginheight:0,data:0,"\u30c4\u30de\u30ea":0,github:0,noun:0,third:0,author:0,ambigu:0,jcabocha:0,nltk:0,ouput:0,approv:0,jp_wn:0,nbsp:0,edict_examples_path:0,input_sent:0,katakanachart:0,group:0,hiragana:0,"return":0,"\u30c7":0,python:0,sentenc:0,"\u30df\u30ab\u30b1":0,"\u51b7\u3048\u77e5\u3089\u305a":0,hapyou:0,pulkit:0,"\u65e5":0,name:0,edit:[0,1],"\u91d1\u66dc\u65e5":0,token:0,"\u3053\u308c\u3067\u30a2\u30ca\u30bf\u3082\u51b7\u3048\u77e5\u3089\u305a":0,createel:[0,1],"\u76e3\u7763":0,replac:[0,1],chunk:0,ifram:0,recogn:0,baselin:0,variabl:[0,1],"\u6bcd\u306f\u6c7a\u3057\u3066\u79c1\u306e\u7d50\u5a5a\u3092":0,sentiment:0,is0:0,dsq:[0,1],content:0,rel:0,print:0,yoru:0,"\u5168\u90e8":0,base:0,dictionari:0,wnjpn:0,org:0,"\u30ce":0,bash:0,sentiwordnet_3:0,jsentiment:0,iso:0,getelementsbytagnam:[0,1],origin:0,onc:0,"\u3053\u308c\u306f\u308f\u304b\u3089\u306a\u3044\u3067\u3059":0,script:[0,1],licens:0,tok:0,long_substr:0,termin:0,licenc:0,disqu:[0,1],hun:0,pars:0,pronunci:0,provid:0,project:0,contactform:[],posit:0,analysi:0,jread:0,ichi:0,rai:0,packag:0,"\u63a5\u5c3e":0,have:0,kanjid:0,katakana:0,u306f:0,"150px":0,adnoun:0,isti:0,note:0,exampl:[0,1],which:0,"\u5f7c\u306f\u5973\u6027\u306e\u55ab\u7159\u3092\u3044\u3044\u3082\u306e\u3060\u3068":0,"\u8a18\u53f7":0,romaji:0,english:0,chart:0,plan:0,america:0,homepag:0,"class":[0,1],kathuria:0,don:[0,1],"\u52a9\u8a5e":0,doc:0,cover:0,"\u30f2":0,abnorm:0,shortnam:[0,1],"\u3064\u307e\u308a":0,"\u3081\u306a\u3044":0,cabocha:0,nlp:0,wikipedia:0,sentiwordnet:0,"_dict":0,onli:0,configur:[0,1],"\u4ff3\u512a":0,edict:0,"\u3081\u3089\u308c\u307e\u3059":0,contribut:[],pypi:0,"\u6bcd\u306f\u79c1\u306e\u8a08\u753b\u3092\u3088\u3044\u3068":0,repo:0,longest:0,"\u8a8d\u3081\u308b":0,requir:[0,1],enabl:[0,1],emb:[0,1],jprocess:0,borrow:0,common:0,view:[0,1],wiki:0,conform:0,"\u5168\u90e8\u6700\u9ad8":0,see:0,detect:0,enumer:0,en_swn:0,score:0,between:0,edict_search_app:0,"import":0,appreci:0,"\u683c\u52a9\u8a5e":0,javascript:[0,1],here:0,"\u6211\u3005\u306f\u5f7c\u306e\u624d\u80fd\u3092":0,ch12:0,"\u4ee3\u540d\u8a5e":0,admit:0,"\u79c1\u306f\u5f7c\u3092\uff15\u65e5\u524d":0,"\u526f\u8a5e\u53ef\u80fd":0,com:[0,1],disqus_shortnam:[0,1],comment:[0,1],height:0,jnlp:[0,1],list_of_token:0,ref_noscript:[0,1],convert:0,func:0,positive_scor:0,nlpwww:0,properti:0,sourceforg:0,cobocha:0,"\u6570":0,marriag:0,abov:0,"\u52a9\u6570\u8a5e":0,observ:0,demo:0,wordnet:0,develop:0,japanes:0,parti:0,"\u30de\u30a8":0,read:0,html:0,itali:0,document:[0,1],scrollheight:0,brlink:[0,1],http:[0,1],utf8:0,upon:0,"\u3081\u306a\u3044\u3060\u308d\u3046":0,jtoken:0,tokenizedromaji:0,"\u767a\u8868\u3057\u305f\u5929\u6c17\u6982\u6cc1\u306b\u3088\u308b\u3068":0,entri:0,getelementbyid:0,pickl:0,contact:0,thi:[0,1],mother:0,latest:0,paul:0,languag:0,noscript:[0,1],onload:0,blog:[0,1],framebord:0,"\u30bf":0,bin:0,"\u99c5":0,"\u3053\u308c\u3067\u30a2\u51b7\u3048\u77e5\u3089\u305a\u30ca\u30bf\u3082":0,format:0,webpag:[0,1],"\u30cf":0,uff15:0,output:0,tenki:0,www:0,edict_exampl:0,back:0,sampl:0,home:0,librari:0,cnr:0,definit:0,overlap:0,"\u30b4":0,"\u30ad\u30f3\u30e8\u30a6\u30d3":0,cgi:0,run:0,power:[0,1],usag:[],async:[0,1],"0_20100908":0,eucjp:0,edict_path:0,encod:0,"\u7236\u306f\u6c7a\u3057\u3066\u79c1\u306e\u7d50\u5a5a\u3092":0,your:[0,1],git:0,span:[0,1],s1010205:0,width:0,includ:0,newwidth:0,"var":[0,1],icon:0,"function":[0,1],jpwordnet:0,link:0,"\u30a8\u30ad":0,line:[0,1],"true":[0,1],utf:0,type:[0,1],googlecod:0,below:[0,1],linktoorigin:0,similar:0,parser:0,doesn:0,incomplet:0,"\u30b3\u30ce":0,file:0,logo:[0,1],check:0,yon:0,titl:1,save_with_utf:0,nict:0,book:0,katakanachartfil:0,polar:0,"\u30b9\u30c8\u30fc\u30ea\u30fc":0,u3092:0,"\u3053\u308c\u306f\u4f55\u3067\u3059\u304b":0,goin:0,svn:0,sphinx:0,kanji:0,to_str:0,text:[0,1],time:0,autores:0,jaist:0},objtypes:{},titles:["1&nbsp;&nbsp;&nbsp;Japanese NLP Library","&lt;no title&gt;"],objnames:{},filenames:["index","disqus_jnlp.html"]})


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # jProcessing documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Mar  7 20:02:01 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = []
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'jProcessing'
 44 | copyright = u'2012, Pulkit Kathuria'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '0.1'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '0.1'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'pyramid'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | html_title = "Japanese Natural Language Processing"
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | html_use_index = False
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | html_show_sourcelink = False
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | html_show_sphinx = False
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'jProcessingdoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'jProcessing.tex', u'jProcessing Documentation',
187 |    u'Pulkit Kathuria', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'jprocessing', u'jProcessing Documentation',
217 |      [u'Pulkit Kathuria'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'jProcessing', u'jProcessing Documentation',
231 |    u'Pulkit Kathuria', 'jProcessing', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/docs/disqus_jnlp.html.rst:
--------------------------------------------------------------------------------
 1 | .. raw:: html
 2 | 
 3 |   <div id="disqus_thread"></div>
 4 |   <script type="text/javascript">
 5 |     /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
 6 |     var disqus_shortname = 'jnlp'; // required: replace example with your forum shortname
 7 | 
 8 |     /* * * DON'T EDIT BELOW THIS LINE * * */
 9 |     (function() {
10 |         var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
11 |         dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
12 |         (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
13 |     })();
14 |   </script>
15 |   <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
16 |   <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
17 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. raw:: html
  2 | 
  3 |   <HEAD>
  4 |     <LINK href="http://www.jaist.ac.jp/~s1010205/gh-buttons.css" rel="stylesheet" type="text/css">
  5 |   </HEAD>
  6 | 
  7 | .. raw:: html
  8 | 
  9 |   <br><a href="http://www.jaist.ac.jp/~s1010205" class="button icon home">Back to Home</a>
 10 | 
 11 | ====================
 12 | Japanese NLP Library
 13 | ====================
 14 | 
 15 | 
 16 | .. sectnum::
 17 | .. contents::
 18 | 
 19 | Requirements
 20 | ============
 21 | 
 22 | - Third Party Dependencies
 23 | 
 24 |   - Cabocha Japanese Morphological parser http://sourceforge.net/projects/cabocha/
 25 | 
 26 | - Python Dependencies
 27 | 
 28 |   - ``Python 2.6.*`` or above
 29 | 
 30 | 
 31 | ``Links``
 32 | ---------
 33 | 
 34 | - All code at jProcessing Repo GitHub_
 35 | 
 36 | .. _GitHub: https://github.com/kevincobain2000/jProcessing
 37 | 
 38 | - Documentation_ and HomePage_ and Sphinx_
 39 | 
 40 | .. _Documentation: http://www.jaist.ac.jp/~s1010205/jnlp
 41 | 
 42 | .. _HomePage: http://www.jaist.ac.jp/~s1010205/
 43 | 
 44 | .. _Sphinx: http://readthedocs.org/docs/jprocessing/en/latest/
 45 | 
 46 | 
 47 | - PyPi_ Python Package
 48 | 
 49 | .. _PyPi: http://pypi.python.org/pypi/jProcessing/0.1
 50 | 
 51 | ::
 52 | 
 53 |   clone git@github.com:kevincobain2000/jProcessing.git
 54 |  
 55 | 
 56 | ``Install``
 57 | -----------
 58 | 
 59 | In ``Terminal`` ::
 60 | 
 61 |   bash$ python setup.py install
 62 | 
 63 | History
 64 | -------
 65 | 
 66 | - ``0.2``
 67 | 
 68 |         + Sentiment Analysis of Japanese Text
 69 | 
 70 | - ``0.1`` 
 71 |         + Morphologically Tokenize Japanese Sentence
 72 |         + Kanji / Hiragana / Katakana to Romaji Converter
 73 |         + Edict Dictionary Search - borrowed
 74 |         + Edict Examples Search - incomplete
 75 |         + Sentence Similarity between two JP Sentences
 76 |         + Run Cabocha(ISO--8859-1 configured) in Python. 
 77 |         + Longest Common String between Sentences
 78 |         + Kanji to Katakana Pronunciation
 79 |         + Hiragana, Katakana Chart Parser
 80 | 
 81 | Libraries and Modules
 82 | =====================
 83 | 
 84 | Tokenize ``jTokenize.py``
 85 | -------------------------
 86 | In ``Python`` ::
 87 | 
 88 |   >>> from jNlp.jTokenize import jTokenize
 89 |   >>> input_sentence = u'私は彼を５日前、つまりこの前の金曜日に駅で見かけた'
 90 |   >>> list_of_tokens = jTokenize(input_sentence)
 91 |   >>> print list_of_tokens
 92 |   >>> print '--'.join(list_of_tokens).encode('utf-8')
 93 | 
 94 | Returns: 
 95 | 
 96 | ::
 97 | 
 98 |   ... [u'\u79c1', u'\u306f', u'\u5f7c', u'\u3092', u'\uff15'...]
 99 |   ... 私--は--彼--を--５--日--前--、--つまり--この--前--の--金曜日--に--駅--で--見かけ--た
100 | 
101 | Katakana Pronunciation:
102 | 
103 | ::
104 | 
105 |   >>> print '--'.join(jReads(input_sentence)).encode('utf-8')
106 |   ... ワタシ--ハ--カレ--ヲ--ゴ--ニチ--マエ--、--ツマリ--コノ--マエ--ノ--キンヨウビ--ニ--エキ--デ--ミカケ--タ
107 | 
108 | 
109 | Cabocha ``jCabocha.py``
110 | -----------------------
111 | 
112 | Run Cabocha_ with original ``EUCJP`` or ``IS0-8859-1`` configured encoding, with ``utf8`` python
113 | 
114 | .. _Cabocha: http://code.google.com/p/cabocha/
115 | 
116 | - If cabocha is configured as ``utf8`` then see this http://nltk.googlecode.com/svn/trunk/doc/book-jp/ch12.html#cabocha
117 | 
118 | .. code-block:: python
119 | 
120 |   >>> from jNlp.jCabocha import cabocha
121 |   >>> print cabocha(input_sentence).encode('utf-8')
122 | 
123 | Output:
124 | 
125 | .. code-block:: xml
126 | 
127 |   <sentence>
128 |    <chunk id="0" link="8" rel="D" score="0.971639" head="0" func="1">
129 |     <tok id="0" read="ワタシ" base="私" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">私</tok>
130 |     <tok id="1" read="ハ" base="は" pos="助詞-係助詞" ctype="" cform="" ne="O">は</tok>
131 |    </chunk>
132 |    <chunk id="1" link="2" rel="D" score="0.488672" head="2" func="3">
133 |     <tok id="2" read="カレ" base="彼" pos="名詞-代名詞-一般" ctype="" cform="" ne="O">彼</tok>
134 |     <tok id="3" read="ヲ" base="を" pos="助詞-格助詞-一般" ctype="" cform="" ne="O">を</tok>
135 |    </chunk>
136 |    <chunk id="2" link="8" rel="D" score="2.25834" head="6" func="6">
137 |     <tok id="4" read="ゴ" base="５" pos="名詞-数" ctype="" cform="" ne="B-DATE">５</tok>
138 |     <tok id="5" read="ニチ" base="日" pos="名詞-接尾-助数詞" ctype="" cform="" ne="I-DATE">日</tok>
139 |     <tok id="6" read="マエ" base="前" pos="名詞-副詞可能" ctype="" cform="" ne="I-DATE">前</tok>
140 |     <tok id="7" read="、" base="、" pos="記号-読点" ctype="" cform="" ne="O">、</tok>
141 |    </chunk>
142 | 
143 | 
144 | 
145 | Kanji / Katakana /Hiragana to Tokenized Romaji ``jConvert.py``
146 | --------------------------------------------------------------
147 | 
148 | Uses ``data/katakanaChart.txt`` and parses the chart. See katakanaChart_.
149 | 
150 | .. code-block:: python
151 | 
152 |   >>> from jNlp.jConvert import *
153 |   >>> input_sentence = u'気象庁が２１日午前４時４８分、発表した天気概況によると、'
154 |   >>> print ' '.join(tokenizedRomaji(input_sentence))
155 |   >>> print tokenizedRomaji(input_sentence)
156 | 
157 | .. code-block:: python
158 | 
159 |   ...kisyoutyou ga ni ichi nichi gozen yon ji yon hachi hun  hapyou si ta tenki gaikyou ni yoru to
160 |   ...[u'kisyoutyou', u'ga', u'ni', u'ichi', u'nichi', u'gozen',...]
161 | 
162 |  
163 | **katakanaChart.txt**
164 | 
165 | 
166 | .. _katakanaChart:
167 | 
168 | - katakanaChartFile_ and hiraganaChartFile_
169 | 
170 | .. _katakanaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/katakanaChart.txt
171 | 
172 | .. _hiraganaChartFile: https://raw.github.com/kevincobain2000/jProcessing/master/src/jNlp/data/hiraganaChart.txt
173 | 
174 | 
175 | Longest Common String Japanese ``jProcessing.py``
176 | -------------------------------------------------
177 | 
178 | On English Strings ::
179 | 
180 | >>> from jNlp.jProcessing import long_substr
181 | >>> a = 'Once upon a time in Italy'
182 | >>> b = 'Thre was a time in America'
183 | >>> print long_substr(a, b)
184 | 
185 | Output ::
186 | 
187 | ...a time in
188 | 
189 | On Japanese Strings ::
190 | 
191 | >>> a = u'これでアナタも冷え知らず'
192 | >>> b = u'これでア冷え知らずナタも'
193 | >>> print long_substr(a, b).encode('utf-8')
194 | 
195 | Output ::
196 | 
197 | ...冷え知らず
198 | 
199 | Similarity between two sentences ``jProcessing.py``
200 | ---------------------------------------------------
201 | Uses MinHash by checking the overlap http://en.wikipedia.org/wiki/MinHash
202 | 
203 | :English Strings:
204 | 
205 | >>> from jNlp.jProcessing import Similarities
206 | >>> s = Similarities()
207 | >>> a = 'There was'
208 | >>> b = 'There is'
209 | >>> print s.minhash(a,b)
210 | ...0.444444444444
211 | 
212 | :Japanese Strings:
213 | 
214 | >>> from jNlp.jProcessing import *
215 | >>> a = u'これは何ですか？'
216 | >>> b = u'これはわからないです'
217 | >>> print s.minhash(' '.join(jTokenize(a)), ' '.join(jTokenize(b)))
218 | ...0.210526315789
219 | 
220 | Edict Japanese Dictionary Search with Example sentences
221 | =======================================================
222 | 
223 | Sample Ouput Demo
224 | -----------------
225 | 
226 | .. raw:: html
227 | 
228 |   <script language="JavaScript">
229 |   <!--
230 |   function autoResize(id){
231 |     var newheight;
232 |     var newwidth;
233 | 
234 |     if(document.getElementById){
235 |         newheight=document.getElementById(id).contentWindow.document .body.scrollHeight;
236 |         newwidth=document.getElementById(id).contentWindow.document .body.scrollWidth;
237 |     }
238 | 
239 |     document.getElementById(id).height= (newheight) + "px";
240 |     document.getElementById(id).width= (newwidth) + "px";
241 |   }
242 |   //-->
243 |   </script>
244 |   <IFRAME SRC="http://www.jaist.ac.jp/~s1010205/cgi-bin/edict_search_app/edict_search.cgi" width="120%" height="150px" id="iframe1" marginheight="0" frameborder="0" onLoad="autoResize('iframe1');"></iframe>
245 | 
246 | 
247 | Edict dictionary and example sentences parser.
248 | ----------------------------------------------
249 | 
250 | This package uses the EDICT_ and KANJIDIC_ dictionary files.
251 | These files are the property of the
252 | Electronic Dictionary Research and Development Group_ , and
253 | are used in conformance with the Group's licence_ .
254 | 
255 | .. _EDICT: http://www.csse.monash.edu.au/~jwb/edict.html
256 | .. _KANJIDIC: http://www.csse.monash.edu.au/~jwb/kanjidic.html
257 | .. _Group: http://www.edrdg.org/
258 | .. _licence: http://www.edrdg.org/edrdg/licence.html
259 | 
260 | Edict Parser By **Paul Goins**, see ``edict_search.py``
261 | Edict Example sentences Parse by query, **Pulkit Kathuria**, see ``edict_examples.py``
262 | Edict examples pickle files are provided but latest example files can be downloaded from the links provided.
263 | 
264 | Charset
265 | -------
266 | Two files
267 | 
268 | - ``utf8`` Charset example file if not using ``src/jNlp/data/edict_examples``
269 | 
270 |   To convert ``EUCJP/ISO-8859-1`` to ``utf8`` ::
271 |        
272 |     iconv -f EUCJP -t UTF-8 path/to/edict_examples > path/to/save_with_utf-8
273 |       
274 | - ``ISO-8859-1`` edict_dictionary file
275 | 
276 | Outputs example sentences for a query in Japanese only for ambiguous words.
277 | 
278 | 
279 | Links
280 | -----
281 | 
282 | **Latest** Dictionary files can be downloaded here_
283 | 
284 | .. _here: http://www.csse.monash.edu.au/~jwb/edict.html
285 | 
286 | ``edict_search.py``
287 | -------------------
288 | :author: Paul Goins `License included` linkToOriginal_:
289 | 
290 | .. _linkToOriginal: http://repo.or.cz/w/jbparse.git/blame/8e42831ca5f721c0320b27d7d83cb553d6e9c68f:/jbparse/edict.py
291 | 
292 | For all entries of sense definitions
293 | 
294 | >>> from jNlp.edict_search import *
295 | >>> query = u'認める'
296 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
297 | >>> kp = Parser(edict_path)
298 | >>> for i, entry in enumerate(kp.search(query)):
299 | ...     print entry.to_string().encode('utf-8')
300 | 
301 | 
302 | ``edict_examples.py``
303 | ---------------------
304 | :`Note`: Only outputs the examples sentences for ambiguous words (if word has one or more senses)
305 | 
306 | :author: Pulkit Kathuria
307 | 
308 | >>> from jNlp.edict_examples import *
309 | >>> query = u'認める'
310 | >>> edict_path = 'src/jNlp/data/edict-yy-mm-dd'
311 | >>> edict_examples_path = 'src/jNlp/data/edict_examples'
312 | >>> search_with_example(edict_path, edict_examples_path, query)
313 | 
314 | Output ::
315 | 
316 |   認める
317 | 
318 |   Sense (1) to recognize;
319 |     EX:01 我々は彼の才能を*認*めている。We appreciate his talent.
320 | 
321 |   Sense (2) to observe;
322 |     EX:01 ｘ線写真で異状が*認*められます。We have detected an abnormality on your x-ray.
323 | 
324 |   Sense (3) to admit;
325 |     EX:01 母は私の計画をよいと*認*めた。Mother approved my plan.
326 |     EX:02 母は決して私の結婚を*認*めないだろう。Mother will never approve of my marriage.
327 |     EX:03 父は決して私の結婚を*認*めないだろう。Father will never approve of my marriage.
328 |     EX:04 彼は女性の喫煙をいいものだと*認*めない。He doesn't approve of women smoking.
329 |     ...
330 | 
331 | Sentiment Analysis Japanese Text
332 | ================================
333 | 
334 | This section covers (1) Sentiment Analysis on Japanese text using Word Sense Disambiguation, Wordnet-jp_ (Japanese Word Net file name ``wnjpn-all.tab``), SentiWordnet_ (English SentiWordNet file name ``SentiWordNet_3.*.txt``).
335 | 
336 | .. _Wordnet-jp: http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
337 | .. _SentiWordnet: http://sentiwordnet.isti.cnr.it/
338 | 
339 | Wordnet files download links
340 | ----------------------------
341 | 
342 | 1. http://nlpwww.nict.go.jp/wn-ja/eng/downloads.html
343 | 2. http://sentiwordnet.isti.cnr.it/
344 | 
345 | How to Use
346 | ----------
347 | 
348 | The following classifier is baseline, which works as simple mapping of Eng to Japanese using Wordnet and classify on polarity score using SentiWordnet. 
349 | 
350 | - (Adnouns, nouns, verbs, .. all included)
351 | - No WSD module on Japanese Sentence
352 | - Uses word as its common sense for polarity score
353 | 
354 | >>> from jNlp.jSentiments import *
355 | >>> jp_wn = '../../../../data/wnjpn-all.tab'
356 | >>> en_swn = '../../../../data/SentiWordNet_3.0.0_20100908.txt'
357 | >>> classifier = Sentiment()
358 | >>> classifier.train(en_swn, jp_wn)
359 | >>> text = u'監督、俳優、ストーリー、演出、全部最高！'
360 | >>> print classifier.baseline(text)
361 | ...Pos Score = 0.625 Neg Score = 0.125
362 | ...Text is Positive
363 | 
364 | Japanese Word Polarity Score
365 | ----------------------------
366 | 
367 | >>> from jNlp.jSentiments import *
368 | >>> jp_wn = '_dicts/wnjpn-all.tab' #path to Japanese Word Net
369 | >>> en_swn = '_dicts/SentiWordNet_3.0.0_20100908.txt' #Path to SentiWordNet
370 | >>> classifier = Sentiment()
371 | >>> sentiwordnet, jpwordnet  = classifier.train(en_swn, jp_wn)
372 | >>> positive_score = sentiwordnet[jpwordnet[u'全部']][0]
373 | >>> negative_score = sentiwordnet[jpwordnet[u'全部']][1]
374 | >>> print 'pos score = {0}, neg score = {1}'.format(positive_score, negative_score)
375 | ...pos score = 0.625, neg score = 0.0
376 | 
377 | 
378 | Contacts
379 | ========
380 | 
381 |   :Author: `pulkit[at]jaist.ac.jp` [change ``at`` with ``@``]
382 | 
383 | 
384 | .. include:: disqus_jnlp.html.rst
385 | 
386 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\jProcessing.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\jProcessing.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/scripts/vcabocha.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from jNlp.jCabocha import *
 4 | from jNlp.jTokenize import *
 5 | import argparse
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser(add_help = True)
 9 |     parser = argparse.ArgumentParser(description= 'No description sepecified')
10 |     parser.add_argument('-a', action="store", dest="action", type=unicode, help='-a [cabocha, tokenize, base, read, pos]')
11 |     parser.add_argument('-s', action="store", dest="sentence", type=str, help='-s Sentence')
12 |     myarguments = parser.parse_args()
13 |     sent = unicode(myarguments.sentence,'utf-8')
14 |     print myarguments.action
15 |     if myarguments.action == "cabocha":
16 |         print cabocha(sent).encode('utf-8')
17 |     elif myarguments.action == "tokenize":
18 |         print 'Tokenized'
19 |         print '========='
20 |         print '\n'.join(jTokenize(sent))
21 |     elif myarguments.action:
22 |         tokenized = jTokenize(sent)
23 |         info = jInfo(sent, infotype=myarguments.action)
24 |         mxlen = len(max(max(tokenized, key=len), max(info, key=len))) + 30
25 |         print '{0:{mx}}{1:}'.format('Sent',myarguments.action, mx = mxlen)
26 |         print '{0:{mx}}{1:}'.format('====','='*len(myarguments.action), mx = mxlen)
27 |         
28 |         for i, j in zip(tokenized, info):
29 |             i = i.encode('utf-8')
30 |             j = j.encode('utf-8')
31 |             print '{0:{mx}}{1:<}'.format(i,j, mx = mxlen)
32 |     else:
33 |         print cabocha(sent).encode('utf-8')
34 |         
35 | 
36 | 
37 |         
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | from setuptools import setup, find_packages
 5 | def read(fname):
 6 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 7 | setup(
 8 |     name = 'jProcessing', #First Level Dir
 9 |     version='0.1',
10 |     author='KATHURIA Pulkit',
11 |     author_email='pulkit@jaist.ac.jp',
12 |     packages= find_packages('src'),
13 |     scripts = ['scripts/vcabocha.py'],
14 |     package_dir = {'':'src'},
15 |     package_data = {'': ['data/*'],
16 |     },
17 |     include_package_data = True,
18 |     exclude_package_data = {'': ['jNlp/*.p']},
19 |     url='http://www.jaist.ac.jp/~s1010205',
20 |     license='LICENSE.txt',
21 |     description='Japanese NLP Utilities',
22 |     long_description=open('README').read(),
23 |     classifiers=['Development Status :: 2 - Pre-Alpha','Natural Language :: Japanese',
24 |                  'Topic :: Scientific/Engineering :: Artificial Intelligence'],
25 |                  
26 |     )
27 | 
28 | """
29 | File System
30 | ===========
31 | jNlp/
32 |     setup.py
33 |     README
34 |     LICENCE.txt
35 |     scripts/
36 |       ...
37 |     src/
38 |       jNlp/
39 |           __init__.py
40 |           jCabocha.py #see foo.py to check how to access somefile.dat
41 |           jTokenize.py
42 |           jConvert.py
43 |           jColor.py
44 |           edict_search.py
45 |           edict_examples.py
46 |           jSentiments.py
47 |           
48 |           classifiers/
49 |             ..
50 |           data/
51 |             katakanaChart.txt
52 |             hiraganaChart.txt
53 |             edict dictionary files *not included*
54 |           jnlp/
55 |             *not with this package*#see MANIFEST.in
56 |               ...
57 |           _dicts/
58 |             dict files *NA*
59 | """
60 | 


--------------------------------------------------------------------------------
/src/jNlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/src/jNlp/__init__.py


--------------------------------------------------------------------------------
/src/jNlp/aquisition/OpenSubtitles.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #   This file is part of periscope.
  4 | #
  5 | #    periscope is free software; you can redistribute it and/or modify
  6 | #    it under the terms of the GNU Lesser General Public License as published by
  7 | #    the Free Software Foundation; either version 2 of the License, or
  8 | #    (at your option) any later version.
  9 | #
 10 | #    periscope is distributed in the hope that it will be useful,
 11 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #    GNU Lesser General Public License for more details.
 14 | #
 15 | #    You should have received a copy of the GNU Lesser General Public License
 16 | #    along with periscope; if not, write to the Free Software
 17 | #    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 18 | 
 19 | import os, struct, xmlrpclib, commands, gzip, traceback, logging
 20 | import socket # For timeout purposes
 21 | 
 22 | import SubtitleDatabase
 23 | 
 24 | log = logging.getLogger(__name__)
 25 | 
 26 | OS_LANGS ={ "en": "eng", 
 27 |             "fr" : "fre", 
 28 |             "hu": "hun", 
 29 |             "cs": "cze", 
 30 |             "pl" : "pol", 
 31 |             "sk" : "slo", 
 32 |             "pt" : "por", 
 33 |             "pt-br" : "pob", 
 34 |             "es" : "spa", 
 35 |             "el" : "ell", 
 36 |             "ar":"ara",
 37 |             'sq':'alb',
 38 |             "hy":"arm",
 39 |             "ay":"ass",
 40 |             "bs":"bos",
 41 |             "bg":"bul",
 42 |             "ca":"cat",
 43 |             "zh":"chi",
 44 |             "hr":"hrv",
 45 |             "da":"dan",
 46 |             "nl":"dut",
 47 |             "eo":"epo",
 48 |             "et":"est",
 49 |             "fi":"fin",
 50 |             "gl":"glg",
 51 |             "ka":"geo",
 52 |             "de":"ger",
 53 |             "he":"heb",
 54 |             "hi":"hin",
 55 |             "is":"ice",
 56 |             "id":"ind",
 57 |             "it":"ita",
 58 |             "ja":"jpn",
 59 |             "kk":"kaz",
 60 |             "ko":"kor",
 61 |             "lv":"lav",
 62 |             "lt":"lit",
 63 |             "lb":"ltz",
 64 |             "mk":"mac",
 65 |             "ms":"may",
 66 |             "no":"nor",
 67 |             "oc":"oci",
 68 |             "fa":"per",
 69 |             "ro":"rum",
 70 |             "ru":"rus",
 71 |             "sr":"scc",
 72 |             "sl":"slv",
 73 |             "sv":"swe",
 74 |             "th":"tha",
 75 |             "tr":"tur",
 76 |             "uk":"ukr",
 77 |             "vi":"vie"}
 78 | 
 79 | class OpenSubtitles(SubtitleDatabase.SubtitleDB):
 80 |     url = "http://www.opensubtitles.org/"
 81 |     site_name = "OpenSubtitles"
 82 |     
 83 |     def __init__(self):#, config, cache_folder_path):
 84 |         super(OpenSubtitles, self).__init__(OS_LANGS)
 85 |         self.server_url = 'http://api.opensubtitles.org/xml-rpc'
 86 |         self.revertlangs = dict(map(lambda item: (item[1],item[0]), self.langs.items()))
 87 | 
 88 |     def process(self, filepath, langs):
 89 |         ''' main method to call on the plugin, pass the filename and the wished 
 90 |         languages and it will query OpenSubtitles.org '''
 91 |         if os.path.isfile(filepath):
 92 |             filehash = self.hashFile(filepath)
 93 |             log.debug(filehash)
 94 |             size = os.path.getsize(filepath)
 95 |             fname = self.getFileName(filepath)
 96 |             return self.query(moviehash=filehash, langs=langs, bytesize=size, filename=fname)
 97 |         else:
 98 |             fname = self.getFileName(filepath)
 99 |             return self.query(langs=langs, filename=fname)
100 |         
101 |     def createFile(self, subtitle):
102 |         '''pass the URL of the sub and the file it matches, will unzip it
103 |         and return the path to the created file'''
104 |         suburl = subtitle["link"]
105 |         videofilename = subtitle["filename"]
106 |         srtbasefilename = videofilename.rsplit(".", 1)[0]
107 |         self.downloadFile(suburl, srtbasefilename + ".srt.gz")
108 |         f = gzip.open(srtbasefilename+".srt.gz")
109 |         dump = open(srtbasefilename+".srt", "wb")
110 |         dump.write(f.read())
111 |         dump.close()
112 |         f.close()
113 |         os.remove(srtbasefilename+".srt.gz")
114 |         return srtbasefilename+".srt"
115 | 
116 |     def query(self, filename, imdbID=None, moviehash=None, bytesize=None, langs=None):
117 |         ''' Makes a query on opensubtitles and returns info about found subtitles.
118 |             Note: if using moviehash, bytesize is required.    '''
119 |         log.debug('query')
120 |         #Prepare the search
121 |         search = {}
122 |         sublinks = []
123 |         if moviehash: search['moviehash'] = moviehash
124 |         if imdbID: search['imdbid'] = imdbID
125 |         if bytesize: search['moviebytesize'] = str(bytesize)
126 |         if langs: search['sublanguageid'] = ",".join([self.getLanguage(lang) for lang in langs])
127 |         if len(search) == 0:
128 |             log.debug("No search term, we'll use the filename")
129 |             # Let's try to guess what to search:
130 |             guessed_data = self.guessFileData(filename)
131 |             search['query'] = guessed_data['name']
132 |             log.debug(search['query'])
133 |             
134 |         #Login
135 |         self.server = xmlrpclib.Server(self.server_url)
136 |         socket.setdefaulttimeout(10)
137 |         try:
138 |             log_result = self.server.LogIn("","","eng","periscope")
139 |             log.debug(log_result)
140 |             token = log_result["token"]
141 |         except Exception:
142 |             log.error("Open subtitles could not be contacted for login")
143 |             token = None
144 |             socket.setdefaulttimeout(None)
145 |             return []
146 |         if not token:
147 |             log.error("Open subtitles did not return a token after logging in.")
148 |             return []            
149 |             
150 |         # Search
151 |         self.filename = filename #Used to order the results
152 |         sublinks += self.get_results(token, search)
153 | 
154 |         # Logout
155 |         try:
156 |             self.server.LogOut(token)
157 |         except:
158 |             log.error("Open subtitles could not be contacted for logout")
159 |         socket.setdefaulttimeout(None)
160 |         return sublinks
161 |         
162 |         
163 |     def get_results(self, token, search):
164 |         log.debug("query: token='%s', search='%s'" % (token, search))
165 |         try:
166 |             if search:
167 |                 results = self.server.SearchSubtitles(token, [search])
168 |         except Exception, e:
169 |             log.error("Could not query the server OpenSubtitles")
170 |             log.debug(e)
171 |             return []
172 |         log.debug("Result: %s" %str(results))
173 | 
174 |         sublinks = []
175 |         if results['data']:
176 |             log.debug(results['data'])
177 |             # OpenSubtitles hash function is not robust ... We'll use the MovieReleaseName to help us select the best candidate
178 |             for r in sorted(results['data'], self.sort_by_moviereleasename):
179 |                 # Only added if the MovieReleaseName matches the file
180 |                 result = {}
181 |                 result["release"] = r['SubFileName']
182 |                 result["link"] = r['SubDownloadLink']
183 |                 result["page"] = r['SubDownloadLink']
184 |                 result["lang"] = self.getLG(r['SubLanguageID'])
185 |                 if search.has_key("query") : #We are using the guessed file name, let's remove some results
186 |                     if r["MovieReleaseName"].startswith(self.filename):
187 |                         sublinks.append(result)
188 |                     else:
189 |                         log.debug("Removing %s because release '%s' has not right start %s" %(result["release"], r["MovieReleaseName"], self.filename))
190 |                 else :
191 |                     sublinks.append(result)
192 |         return sublinks
193 | 
194 |     def sort_by_moviereleasename(self, x, y):
195 |         ''' sorts based on the movierelease name tag. More matching, returns 1'''
196 |         #TODO add also support for subtitles release
197 |         xmatch = x['MovieReleaseName'] and (x['MovieReleaseName'].find(self.filename)>-1 or self.filename.find(x['MovieReleaseName'])>-1)
198 |         ymatch = y['MovieReleaseName'] and (y['MovieReleaseName'].find(self.filename)>-1 or self.filename.find(y['MovieReleaseName'])>-1)
199 |         #print "analyzing %s and %s = %s and %s" %(x['MovieReleaseName'], y['MovieReleaseName'], xmatch, ymatch)
200 |         if xmatch and ymatch:
201 |             if x['MovieReleaseName'] == self.filename or x['MovieReleaseName'].startswith(self.filename) :
202 |                 return -1
203 |             return 0
204 |         if not xmatch and not ymatch:
205 |             return 0
206 |         if xmatch and not ymatch:
207 |             return -1
208 |         if not xmatch and ymatch:
209 |             return 1
210 |         return 0
211 | 
212 | if __name__ == "__main__":
213 |     subs = OpenSubtitles()
214 |     print subs.query('Titanic')
215 | 


--------------------------------------------------------------------------------
/src/jNlp/aquisition/SubtitleDatabase.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #   This file is part of periscope.
  4 | #
  5 | #    periscope is free software; you can redistribute it and/or modify
  6 | #    it under the terms of the GNU Lesser General Public License as published by
  7 | #    the Free Software Foundation; either version 2 of the License, or
  8 | #    (at your option) any later version.
  9 | #
 10 | #    periscope is distributed in the hope that it will be useful,
 11 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #    GNU Lesser General Public License for more details.
 14 | #
 15 | #    You should have received a copy of the GNU Lesser General Public License
 16 | #    along with periscope; if not, write to the Free Software
 17 | #    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 18 | 
 19 | import os, shutil, urllib2, sys, logging, traceback, zipfile
 20 | import struct
 21 | import socket # For timeout purposes
 22 | import re
 23 | 
 24 | log = logging.getLogger(__name__)
 25 | 
 26 | USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.3)'
 27 | 
 28 | class SubtitleDB(object):
 29 |     ''' Base (kind of abstract) class that represent a SubtitleDB, usually a website. Should be rewritten using abc module in Python 2.6/3K'''
 30 |     def __init__(self, langs, revertlangs = None):
 31 |         if langs:
 32 |             self.langs = langs
 33 |             self.revertlangs = dict(map(lambda item: (item[1],item[0]), self.langs.items()))
 34 |         if revertlangs:
 35 |             self.revertlangs = revertlangs
 36 |             self.langs = dict(map(lambda item: (item[1],item[0]), self.revertlangs.items()))
 37 |         self.tvshowRegex = re.compile('(?P<show>.*)S(?P<season>[0-9]{2})E(?P<episode>[0-9]{2}).(?P<teams>.*)', re.IGNORECASE)
 38 |         self.tvshowRegex2 = re.compile('(?P<show>.*).(?P<season>[0-9]{1,2})x(?P<episode>[0-9]{1,2}).(?P<teams>.*)', re.IGNORECASE)
 39 |         self.movieRegex = re.compile('(?P<movie>.*)[\.|\[|\(| ]{1}(?P<year>(?:(?:19|20)[0-9]{2}))(?P<teams>.*)', re.IGNORECASE)
 40 | 
 41 |     def searchInThread(self, queue, filename, langs):
 42 |         ''' search subtitles with the given filename for the given languages'''
 43 |         try:
 44 |             subs = self.process(filename, langs)
 45 |             map(lambda item: item.setdefault("plugin", self), subs)
 46 |             map(lambda item: item.setdefault("filename", filename), subs)
 47 |             log.info("%s writing %s items to queue" % (self.__class__.__name__, len(subs)))
 48 |         except:
 49 |             log.exception("Error occured")
 50 |             subs = []
 51 |         queue.put(subs, True) # Each plugin must write as the caller periscopy.py waits for an result on the queue
 52 |     
 53 |     def process(self, filepath, langs):
 54 |         ''' main method to call on the plugin, pass the filename and the wished 
 55 |         languages and it will query the subtitles source '''
 56 |         fname = self.getFileName(filepath)
 57 |         try:
 58 |             return self.query(fname, langs)
 59 |         except Exception, e:
 60 |             log.exception("Error occured")
 61 |             return []
 62 |         
 63 |     def createFile(self, subtitle):
 64 |         '''pass the URL of the sub and the file it matches, will unzip it
 65 |         and return the path to the created file'''
 66 |         suburl = subtitle["link"]
 67 |         videofilename = subtitle["filename"]
 68 |         srtbasefilename = videofilename.rsplit(".", 1)[0]
 69 |         zipfilename = srtbasefilename +".zip"
 70 |         self.downloadFile(suburl, zipfilename)
 71 |         
 72 |         if zipfile.is_zipfile(zipfilename):
 73 |             log.debug("Unzipping file " + zipfilename)
 74 |             zf = zipfile.ZipFile(zipfilename, "r")
 75 |             for el in zf.infolist():
 76 |                 if el.orig_filename.rsplit(".", 1)[1] in ("srt", "sub", "txt"):
 77 |                     outfile = open(srtbasefilename + "." + el.orig_filename.rsplit(".", 1)[1], "wb")
 78 |                     outfile.write(zf.read(el.orig_filename))
 79 |                     outfile.flush()
 80 |                     outfile.close()
 81 |                 else:
 82 |                     log.info("File %s does not seem to be valid " %el.orig_filename)
 83 |             # Deleting the zip file
 84 |             zf.close()
 85 |             os.remove(zipfilename)
 86 |             return srtbasefilename + ".srt"
 87 |         else:
 88 |             log.info("Unexpected file type (not zip)")
 89 |             os.remove(zipfilename)
 90 |             return None
 91 | 
 92 |     def downloadContent(self, url, timeout = None):
 93 |         ''' Downloads the given url and returns its contents.'''
 94 |         try:
 95 |             log.debug("Downloading %s" % url)
 96 |             req = urllib2.Request(url, headers={'Referer' : url, 'User-Agent' : USER_AGENT})
 97 |             if timeout:
 98 |                 socket.setdefaulttimeout(timeout)
 99 |             f = urllib2.urlopen(req)
100 |             content = f.read()
101 |             f.close()
102 |             return content
103 |         except urllib2.HTTPError, e:
104 |             log.warning("HTTP Error: %s - %s" % (e.code, url))
105 |         except urllib2.URLError, e:
106 |             log.warning("URL Error: %s - %s" % (e.reason, url))
107 | 
108 |     def downloadFile(self, url, filename):
109 |         ''' Downloads the given url to the given filename '''
110 |         content = self.downloadContent(url)
111 |         dump = open(filename, "wb")
112 |         dump.write(content)
113 |         dump.close()
114 |         log.debug("Download finished to file %s. Size : %s"%(filename,os.path.getsize(filename)))
115 |         
116 |     def getLG(self, language):
117 |         ''' Returns the short (two-character) representation of the long language name'''
118 |         try:
119 |             return self.revertlangs[language]
120 |         except KeyError, e:
121 |             log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, language))
122 |         
123 |     def getLanguage(self, lg):
124 |         ''' Returns the long naming of the language on a two character code '''
125 |         try:
126 |             return self.langs[lg]
127 |         except KeyError, e:
128 |             log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, lg))
129 |     
130 |     def query(self, token):
131 |         raise TypeError("%s has not implemented method '%s'" %(self.__class__.__name__, sys._getframe().f_code.co_name))
132 |         
133 |     def fileExtension(self, filename):
134 |         ''' Returns the file extension (without the dot)'''
135 |         return os.path.splitext(filename)[1][1:].lower()
136 |         
137 |     def getFileName(self, filepath):
138 |         if os.path.isfile(filepath):
139 |             filename = os.path.basename(filepath)
140 |         else:
141 |             filename = filepath
142 |         if filename.endswith(('.avi', '.wmv', '.mov', '.mp4', '.mpeg', '.mpg', '.mkv')):
143 |             fname = filename.rsplit('.', 1)[0]
144 |         else:
145 |             fname = filename
146 |         return fname
147 |         
148 |     def guessFileData(self, filename):
149 |         filename = unicode(self.getFileName(filename).lower())
150 |         matches_tvshow = self.tvshowRegex.match(filename)
151 |         if matches_tvshow: # It looks like a tv show
152 |             (tvshow, season, episode, teams) = matches_tvshow.groups()
153 |             tvshow = tvshow.replace(".", " ").strip()
154 |             teams = teams.split('.')
155 |             return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams}
156 |         else:
157 |             matches_tvshow = self.tvshowRegex2.match(filename)
158 |             if matches_tvshow:
159 |                 (tvshow, season, episode, teams) = matches_tvshow.groups()
160 |                 tvshow = tvshow.replace(".", " ").strip()
161 |                 teams = teams.split('.')
162 |                 return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams}
163 |             else:
164 |                 matches_movie = self.movieRegex.match(filename)
165 |                 if matches_movie:
166 |                     (movie, year, teams) = matches_movie.groups()
167 |                     teams = teams.split('.')
168 |                     part = None
169 |                     if "cd1" in teams :
170 |                         teams.remove('cd1')
171 |                         part = 1
172 |                     if "cd2" in teams :
173 |                         teams.remove('cd2')
174 |                         part = 2
175 |                     return {'type' : 'movie', 'name' : movie.strip(), 'year' : year, 'teams' : teams, 'part' : part}
176 |                 else:
177 |                     return {'type' : 'unknown', 'name' : filename, 'teams' : [] }
178 | 
179 |     def hashFile(self, name):
180 |         '''
181 |         Calculates the Hash à-la Media Player Classic as it is the hash used by OpenSubtitles.
182 |         By the way, this is not a very robust hash code.
183 |         ''' 
184 |         longlongformat = 'Q'  # unsigned long long little endian
185 |         bytesize = struct.calcsize(longlongformat)
186 |         format= "<%d%s" % (65536//bytesize, longlongformat)
187 |         
188 |         f = open(name, "rb") 
189 |         filesize = os.fstat(f.fileno()).st_size
190 |         hash = filesize 
191 |         
192 |         if filesize < 65536 * 2:
193 |             log.error('File is too small')
194 |             return "SizeError" 
195 |         
196 |         buffer= f.read(65536)
197 |         longlongs= struct.unpack(format, buffer)
198 |         hash+= sum(longlongs)
199 |         
200 |         f.seek(-65536, os.SEEK_END) # size is always > 131072
201 |         buffer= f.read(65536)
202 |         longlongs= struct.unpack(format, buffer)
203 |         hash+= sum(longlongs)
204 |         hash&= 0xFFFFFFFFFFFFFFFF
205 |         
206 |         f.close() 
207 |         returnedhash =  "%016x" % hash
208 |         return returnedhash
209 | 
210 | 
211 | class InvalidFileException(Exception):
212 |     ''' Exception object to be raised when the file is invalid'''
213 |     def __init__(self, filename, reason):
214 |         self.filename = filename
215 |         self.reason = reason
216 |     def __str__(self):
217 |         return (repr(filename), repr(reason))
218 | 


--------------------------------------------------------------------------------
/src/jNlp/aquisition/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/src/jNlp/aquisition/__init__.py


--------------------------------------------------------------------------------
/src/jNlp/aquisition/aquire.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from OpenSubtitles import *
 4 | 
 5 | def get_movie_names(directory):
 6 |     movienames = []
 7 |     for line in open('movies.txt').readlines():
 8 |         if not line.strip():continue
 9 |         movienames.append(line.strip())
10 |     return movienames
11 | 
12 | if __name__ == '__main__':
13 |     opensubs = OpenSubtitles()
14 |     out = open('download_subs.xml','wb')
15 |     for moviename in get_movie_names('movies.txt'):
16 |         avail_en = ''
17 |         avail_jp = ''
18 |         try:
19 |             all_langs = opensubs.query(moviename)
20 |             for info_dic in all_langs:
21 |                 if info_dic['lang'] == 'en':
22 |                     avail_en = 'en'
23 |                     down_en = info_dic['link']
24 |                 if info_dic['lang'] == 'ja':
25 |                     avail_jp = 'jp'
26 |                     down_jp = info_dic['link']
27 |         
28 |             if avail_en and avail_jp:
29 |                 print moviename
30 |                 output = "<movie name='%s' down_en='%s' down_jp='%s'></movie>"%(moviename, down_en, down_jp)
31 |                 out.write(output)
32 |                 out.write('\n')
33 |         except: pass
34 |     
35 | 
36 | 


--------------------------------------------------------------------------------
/src/jNlp/aquisition/download_subs.xml:
--------------------------------------------------------------------------------
 1 | <movie name='127 Hours' down_en='http://www.opensubtitles.org/en/download/filead/1952822438.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1952902080.gz'></movie>
 2 | <movie name='300' down_en='http://www.opensubtitles.org/en/download/filead/1951676395.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1951652631.gz'></movie>
 3 | <movie name='Battle Royale' down_en='http://www.opensubtitles.org/en/download/filead/282678.gz' down_jp='http://www.opensubtitles.org/en/download/filead/83030.gz'></movie>
 4 | <movie name='Beauty and the Beast' down_en='http://www.opensubtitles.org/en/download/filead/67029.gz' down_jp='http://www.opensubtitles.org/en/download/filead/60524.gz'></movie>
 5 | <movie name='Behind Enemy Lines' down_en='http://www.opensubtitles.org/en/download/filead/1951715560.gz' down_jp='http://www.opensubtitles.org/en/download/filead/46131.gz'></movie>
 6 | <movie name='Charlie and the Chocolate Factory' down_en='http://www.opensubtitles.org/en/download/filead/319365.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1951700001.gz'></movie>
 7 | <movie name='A Cinderella Story' down_en='http://www.opensubtitles.org/en/download/filead/296901.gz' down_jp='http://www.opensubtitles.org/en/download/filead/298511.gz'></movie>
 8 | <movie name='Equilibrium' down_en='http://www.opensubtitles.org/en/download/filead/1951729890.gz' down_jp='http://www.opensubtitles.org/en/download/filead/299158.gz'></movie>
 9 | <movie name='Eternal Sunshine of the Spotless Mind' down_en='http://www.opensubtitles.org/en/download/filead/1952313640.gz' down_jp='http://www.opensubtitles.org/en/download/filead/162099.gz'></movie>
10 | <movie name='Finding Nemo' down_en='http://www.opensubtitles.org/en/download/filead/259608.gz' down_jp='http://www.opensubtitles.org/en/download/filead/204458.gz'></movie>
11 | <movie name='Finding Neverland' down_en='http://www.opensubtitles.org/en/download/filead/319751.gz' down_jp='http://www.opensubtitles.org/en/download/filead/213613.gz'></movie>
12 | <movie name='Girl Next Door, The' down_en='http://www.opensubtitles.org/en/download/filead/131947.gz' down_jp='http://www.opensubtitles.org/en/download/filead/271580.gz'></movie>
13 | <movie name='Gosford Park' down_en='http://www.opensubtitles.org/en/download/filead/1951682712.gz' down_jp='http://www.opensubtitles.org/en/download/filead/270520.gz'></movie>
14 | <movie name='Hancock' down_en='http://www.opensubtitles.org/en/download/filead/1951940219.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1951917762.gz'></movie>
15 | <movie name='Harry Potter and the Chamber of Secrets' down_en='http://www.opensubtitles.org/en/download/filead/1951639463.gz' down_jp='http://www.opensubtitles.org/en/download/filead/46116.gz'></movie>
16 | <movie name='How to Lose a Guy in 10 Days' down_en='http://www.opensubtitles.org/en/download/filead/281174.gz' down_jp='http://www.opensubtitles.org/en/download/filead/70723.gz'></movie>
17 | <movie name='Idiocracy' down_en='http://www.opensubtitles.org/en/download/filead/1951622518.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1951967900.gz'></movie>
18 | <movie name='Inception' down_en='http://www.opensubtitles.org/en/download/filead/1952594403.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1952471517.gz'></movie>
19 | <movie name='Invictus' down_en='http://www.opensubtitles.org/en/download/filead/1952250697.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1952286940.gz'></movie>
20 | <movie name='Italian Job, The (2003)' down_en='http://www.opensubtitles.org/en/download/filead/68974.gz' down_jp='http://www.opensubtitles.org/en/download/filead/278117.gz'></movie>
21 | <movie name='K-PAX' down_en='http://www.opensubtitles.org/en/download/filead/51566.gz' down_jp='http://www.opensubtitles.org/en/download/filead/46127.gz'></movie>
22 | <movie name='Ladder 49' down_en='http://www.opensubtitles.org/en/download/filead/1951707511.gz' down_jp='http://www.opensubtitles.org/en/download/filead/299978.gz'></movie>
23 | <movie name='Lilo & Stitch' down_en='http://www.opensubtitles.org/en/download/filead/141138.gz' down_jp='http://www.opensubtitles.org/en/download/filead/46118.gz'></movie>
24 | <movie name='Lost in Space' down_en='http://www.opensubtitles.org/en/download/filead/197787.gz' down_jp='http://www.opensubtitles.org/en/download/filead/56264.gz'></movie>
25 | <movie name='Mean Girls' down_en='http://www.opensubtitles.org/en/download/filead/1952831223.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1952882230.gz'></movie>
26 | <movie name='Princess Mononoke' down_en='http://www.opensubtitles.org/en/download/filead/285383.gz' down_jp='http://www.opensubtitles.org/en/download/filead/198134.gz'></movie>
27 | <movie name='Shrek' down_en='http://www.opensubtitles.org/en/download/filead/1951699059.gz' down_jp='http://www.opensubtitles.org/en/download/filead/1951682289.gz'></movie>
28 | <movie name='Sin City' down_en='http://www.opensubtitles.org/en/download/filead/1952179435.gz' down_jp='http://www.opensubtitles.org/en/download/filead/297155.gz'></movie>
29 | <movie name='Zoolander' down_en='http://www.opensubtitles.org/en/download/filead/159570.gz' down_jp='http://www.opensubtitles.org/en/download/filead/46121.gz'></movie>
30 | 


--------------------------------------------------------------------------------
/src/jNlp/callunix.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import subprocess
 4 | from subprocess import call
 5 | def _checklist(argument):
 6 |     if not type(argument) is list:
 7 |         return argument.split()
 8 |     else: return argument
 9 | 
10 | def shell_out(command):
11 |     command = _checklist(command)
12 |     process = subprocess.Popen(command, stdout=subprocess.PIPE)
13 |     return process.communicate()[0]
14 | 
15 | def shell_call(command):
16 |     command = _checklist(command)
17 |     subprocess.Popen(command, stdout=subprocess.PIPE)
18 |     return ''
19 |     
20 |     
21 | 


--------------------------------------------------------------------------------
/src/jNlp/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/src/jNlp/data/__init__.py


--------------------------------------------------------------------------------
/src/jNlp/data/chasen_pos.txt:
--------------------------------------------------------------------------------
 1 | 0 0 BOS/EOS
 2 | 1 1 名詞
 3 | 1 2 名詞-一般
 4 | 1 3 名詞-固有名詞
 5 | 1 4 名詞-固有名詞-一般
 6 | 1 5 名詞-固有名詞-人名
 7 | 1 6 名詞-固有名詞-人名-一般
 8 | 1 7 名詞-固有名詞-人名-姓
 9 | 1 8 名詞-固有名詞-人名-名
10 | 1 9 名詞-固有名詞-組織
11 | 1 10 名詞-固有名詞-地域
12 | 1 11 名詞-固有名詞-地域-一般
13 | 1 12 名詞-固有名詞-地域-国
14 | 1 13 名詞-代名詞
15 | 1 14 名詞-代名詞-一般
16 | 1 15 名詞-代名詞-縮約
17 | 1 16 名詞-副詞可能
18 | 1 17 名詞-サ変接続
19 | 1 18 名詞-形容動詞語幹
20 | 0 19 名詞-数
21 | 0 20 名詞-非自立
22 | 0 21 名詞-非自立-一般
23 | 0 22 名詞-非自立-副詞可能
24 | 0 23 名詞-非自立-助動詞語幹
25 | 0 24 名詞-非自立-形容動詞語幹
26 | 0 25 名詞-特殊
27 | 0 26 名詞-特殊-助動詞語幹
28 | 1 27 名詞-接尾
29 | 1 28 名詞-接尾-一般
30 | 1 29 名詞-接尾-人名
31 | 1 30 名詞-接尾-地域
32 | 1 31 名詞-接尾-サ変接続
33 | 0 32 名詞-接尾-助動詞語幹
34 | 1 33 名詞-接尾-形容動詞語幹
35 | 1 34 名詞-接尾-副詞可能
36 | 1 35 名詞-接尾-助数詞
37 | 0 36 名詞-接尾-特殊
38 | 0 37 名詞-接続詞的
39 | 0 38 名詞-動詞非自立的
40 | 0 39 名詞-引用文字列
41 | 1 40 名詞-ナイ形容詞語幹
42 | 0 41 接頭詞
43 | 0 42 接頭詞-名詞接続
44 | 0 43 接頭詞-動詞接続
45 | 0 44 接頭詞-形容詞接続
46 | 0 45 接頭詞-数接続
47 | 1 46 動詞
48 | 1 47 動詞-自立
49 | 0 48 動詞-非自立
50 | 0 49 動詞-接尾
51 | 1 50 形容詞
52 | 1 51 形容詞-自立
53 | 0 52 形容詞-非自立
54 | 1 53 形容詞-接尾
55 | 1 54 副詞
56 | 1 55 副詞-一般
57 | 1 56 副詞-助詞類接続
58 | 0 57 連体詞
59 | 0 58 接続詞
60 | 0 59 助詞
61 | 0 60 助詞-格助詞
62 | 0 61 助詞-格助詞-一般
63 | 0 62 助詞-格助詞-引用
64 | 0 63 助詞-格助詞-連語
65 | 0 64 助詞-接続助詞
66 | 0 65 助詞-係助詞
67 | 0 66 助詞-副助詞
68 | 0 67 助詞-間投助詞
69 | 0 68 助詞-並立助詞
70 | 0 69 助詞-終助詞
71 | 0 70 助詞-副助詞／並立助詞／終助詞
72 | 0 71 助詞-連体化
73 | 0 72 助詞-副詞化
74 | 0 73 助詞-特殊
75 | 0 74 助動詞
76 | 0 75 感動詞
77 | 0 76 記号
78 | 0 77 記号-一般
79 | 0 78 記号-句点
80 | 0 79 記号-読点
81 | 0 80 記号-空白
82 | 0 81 記号-アルファベット
83 | 0 82 記号-括弧開
84 | 0 83 記号-括弧閉
85 | 0 84 その他
86 | 0 85 その他-間投
87 | 0 86 フィラー
88 | 0 87 非言語音
89 | 0 88 語断片
90 | 


--------------------------------------------------------------------------------
/src/jNlp/data/hiraganaChart.txt:
--------------------------------------------------------------------------------
 1 | 	a	i	u	e	o	ya	yu	yo	n
 2 | X	あ	い	う	え	お	X	X	X	ん
 3 | k	か	き	く	け	こ	きゃ	きゅ	きょ	X
 4 | a	さ	し	す	せ	そ	しゃ	しゅ	しょ	X
 5 | t	た	X	X	て	と	ちゃ	ちゅ	ちょ	X
 6 | n	な	に	ぬ	ね	の	にゃ	にゅ	にょ	X
 7 | h	は	ひ	ふ	へ	ほ	ひゃ	ひゅ	ひょ	X
 8 | m	ま	み	む	め	も	みゃ	みゅ	みょ	X
 9 | y	や	X	ゆ	X	よ	X	X	X	X
10 | r	ら	り	る	れ	ろ	りゃ	りゅ	りょ	X
11 | w	わ	ゐ	X	ゑ	を	X	X	X	X
12 | g	が	ぎ	ぐ	げ	ご	ぎゃ	ぎゅ	ぎょ	X
13 | z	ざ	じ	ず	ぜ	ぞ	じゃ	じゅ	じょ	X
14 | d	だ	ぢ	づ	で	ど	ぢゃ	ぢゅ	ぢょ	X
15 | b	ば	び	ぶ	べ	ぼ	びゃ	びゅ	びょ	X
16 | p	ぱ	ぴ	ぷ	ぺ	ぽ	ぴゃ	ぴゅ	ぴょ	X
17 | ch	X	ち	X	X	X	X	X	X	X
18 | ts	X	X	つ	X	X	X	X	X	X
19 | 


--------------------------------------------------------------------------------
/src/jNlp/data/katakanaChart.txt:
--------------------------------------------------------------------------------
 1 | 	a	i	u	e	o	ya	yu	yo	n
 2 | X	ア	イ	ウ	エ	オ	X	X	X	ン
 3 | k	カ	キ	ク	ケ	コ	キャ	キュ	キョ	X
 4 | s	サ	シ	ス	セ	ソ	シャ	シュ	ショ	X
 5 | t	タ	X	X	テ	ト	チャ	チュ	チョ	X
 6 | n	ナ	ニ	ヌ	ネ	ノ	ニャ	ニュ	ニョ	X
 7 | h	ハ	ヒ	フ	ヘ	ホ	ヒャ	ヒュ	ヒョ	X
 8 | m	マ	ミ	ム	メ	モ	ミャ	ミュ	ミョ	X
 9 | y	ヤ	X	ユ	X	ヨ	X	X	X	X
10 | r	ラ	リ	ル	レ	ロ	リャ	リュ	リョ	X
11 | w	ワ	ヰ	X	ヱ	ヲ	X	X	X	X
12 | g	ガ	ギ	グ	ゲ	ゴ	ギャ	ギュ	ギョ	X
13 | z	ザ	X	ズ	ゼ	ゾ	X	X	X	X
14 | d	ダ	ヂ	ヅ	デ	ド	ヂャ	ヂュ	ヂョ	X
15 | b	バ	ビ	ブ	ベ	ボ	ビャ	ビュ	ビョ	X
16 | p	パ	ピ	プ	ペ	ポ	ピャ	ピュ	ピョ	X
17 | ch	X	チ	X	X	X	X	X	X	X
18 | ts	X	X	ツ	X	X	X	X	X	X	
19 | j	X	ジ	X	X	X	ジャ	ジュ	ジョ	X
20 | 


--------------------------------------------------------------------------------
/src/jNlp/eProcessing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from pkg_resources import resource_stream
 4 | import sys, os, subprocess
 5 | from subprocess import call
 6 | import xml.etree.cElementTree as etree
 7 | 
 8 | import nltk
 9 | from nltk.stem.wordnet import WordNetLemmatizer
10 | 
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     pass
15 | 


--------------------------------------------------------------------------------
/src/jNlp/edict_search_monash/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/src/jNlp/edict_search_monash/__init__.py


--------------------------------------------------------------------------------
/src/jNlp/edict_search_monash/edict_examples.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevincobain2000/jProcessing/5ea303cc4bf6e8aaa4a3c5f9d023368191919f75/src/jNlp/edict_search_monash/edict_examples.p


--------------------------------------------------------------------------------
/src/jNlp/edict_search_monash/edict_examples.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | This package uses the EDICT_ and KANJIDIC_ dictionary files.
  5 | These files are the property of the
  6 | Electronic Dictionary Research and Development Group_ , and
  7 | are used in conformance with the Group's licence_ .
  8 | 
  9 | .. _EDICT: http://www.csse.monash.edu.au/~jwb/edict.html
 10 | .. _KANJIDIC: http://www.csse.monash.edu.au/~jwb/kanjidic.html
 11 | .. _Group: http://www.edrdg.org/
 12 | .. _licence: http://www.edrdg.org/edrdg/licence.html
 13 | .. 
 14 | """
 15 | # Copyright (c) 2011, Pulkit Kathuria
 16 | # All rights reserved.
 17 | #
 18 | # Redistribution and use in source and binary forms, with or without
 19 | # modification, are permitted provided that the following conditions
 20 | # are met:
 21 | #
 22 | #     * Redistributions of source code must retain the above copyright
 23 | #       notice, this list of conditions and the following disclaimer.
 24 | #     * Redistributions in binary form must reproduce the above
 25 | #       copyright notice, this list of conditions and the following
 26 | #       disclaimer in the documentation and/or other materials provided
 27 | #       with the distribution.
 28 | #
 29 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 30 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 31 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 32 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 33 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 34 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 36 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 37 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 38 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 39 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 40 | # POSSIBILITY OF SUCH DAMAGE.
 41 |   
 42 | """
 43 | Edict Parser By **Paul Goins**, see ``edict_search.py``
 44 | Edict Example sentences, by search query, **Pulkit Kathuria**
 45 | Edict examples pickle files are provided but latest example files
 46 | can be downloaded from the links provided.
 47 | 
 48 | Charset:
 49 | 
 50 | - utf-8 charset example file
 51 | - ISO-8859-1 edict_dictionary file
 52 | 
 53 | Outputs example sentences for a query in Japanese only for ambiguous words.
 54 | """
 55 | 
 56 | import re, os, subprocess
 57 | from jNlp.edict_search_monash.edict_search import Parser
 58 | import cPickle as pickle
 59 | 
 60 | 
 61 | def word_and_id(BSent):
 62 |     results = []
 63 |     for item in BSent.split():
 64 |         brackets = re.compile('\[.*?\]')
 65 |         flter = re.sub('\(.*?\)','',item)
 66 |         word = re.split('\[|\]', re.sub('\{.*?\}','',flter))[0]
 67 |         try: s_id = re.split('\[|\]', re.sub('\{.*?\}','',flter))[1]
 68 |         except: pass
 69 |         if re.search(brackets, flter):
 70 |             results.append((word, s_id))
 71 |     return results
 72 | 
 73 | def parse_examples(edict_examples_file):
 74 |     """
 75 |     Edict examples format
 76 |     ---------------------
 77 |     ::
 78 |     
 79 |       A: 誰にでも長所と..  Everyone has....points.#ID=276471_4870
 80 |       B: 才[01]{歳} 以上[01] 生きる (こと){こと} は 決して ..
 81 |     
 82 |       ambiguous_words: @type = dictionary
 83 |       format: Kanji ==> id ==> [examples_sent_id, ..]
 84 |                 才  ==> 01 ==> [#ID=276471_4870, ...]
 85 |       call:
 86 |         >>> ambiguous_words[kanji][01]
 87 |         ...[#ID=276471_4870, ...]
 88 |                 
 89 |       edict_examples: @type = dictionary
 90 |       format:
 91 |                            ID  ==> u'example_sentence'
 92 |                #ID=276471_4870 ==> u'誰にでも長所と..  Everyone has....points'
 93 |     
 94 |     """
 95 |     ambiguous_words = {}
 96 |     edict_examples = {}
 97 |     for line in edict_examples_file.readlines():
 98 |         line = unicode(line,'utf-8')
 99 |         if line.startswith('A:'):
100 |             eg_sent = line.split('#ID=')[0]
101 |             eg_sent_id = line.split('#ID=')[1]
102 |             edict_examples[eg_sent_id] = eg_sent
103 |             continue
104 |         for item in word_and_id(line):
105 |             word = item[0]
106 |             s_id = int(item[1])
107 |             if not ambiguous_words.has_key(word): ambiguous_words[word] = {}
108 |             if not ambiguous_words[word].has_key(s_id): ambiguous_words[word][s_id] = []
109 |             ambiguous_words[word][s_id].append(eg_sent_id)
110 |     return ambiguous_words, edict_examples
111 | 
112 | def edict_entry(edict_file_path, query):
113 |     kp = Parser(edict_file_path)
114 |     for entry in kp.search(query):
115 |         if entry.to_string().split()[0] == query:
116 |             entry = entry.to_string()
117 |             glosses = re.findall('\(\d\).*?;',entry)
118 |             s_ids = [int(re.search('\d',gloss).group(0)) for gloss in glosses]
119 |             return s_ids, glosses
120 |     return [],[]
121 | 
122 | def check_pickles(edict_examples_path):
123 |     f = open(edict_examples_path)
124 |     __checkpickles__ = ['edict_examples.p','ambiguous_words.p']
125 |     for pickl in __checkpickles__:
126 |         if not os.path.exists(pickl):
127 |             ambiguous_words, edict_examples = parse_examples(f)
128 |             pickle.dump(ambiguous_words, open("ambiguous_words.p",'wb'))
129 |             pickle.dump(edict_examples, open("edict_examples.p",'wb'))
130 |         else:
131 |             ambiguous_words = pickle.load(open('ambiguous_words.p'))
132 |             edict_examples = pickle.load(open('edict_examples.p'))
133 |         return ambiguous_words, edict_examples
134 |     
135 | def search_with_example(edict_path, edict_examples_path, query):
136 |     ambiguous_words, edict_examples = check_pickles(edict_examples_path)
137 |     s_ids, glosses = edict_entry(edict_path, query)
138 |     print query.encode('utf-8')
139 |     for s_id, gloss in enumerate(glosses):
140 |         print 
141 |         print 'Sense', gloss
142 |         if ambiguous_words.has_key(query) and ambiguous_words[query].has_key(s_ids[s_id]):
143 |             for ex_num, ex_id in enumerate(ambiguous_words[query][s_ids[s_id]], 1):
144 |                 ex_sentence = edict_examples[ex_id].replace(query[0], '*'+query[0]+'*')
145 |                 print '\t', ex_sentence.replace('A:','EX:'+str(ex_num).zfill(2)).encode('utf-8')
146 | 
147 | def _mime(f_path):
148 |     command = ['file','--mime',f_path]
149 |     process = subprocess.Popen(command, stdout=subprocess.PIPE)
150 |     charset = process.communicate()[0].split('charset=')[1]
151 |     return charset.strip()
152 | 
153 | def _encoding_check(edict_path, edict_examples_path):
154 |     if _mime(edict_path) <> 'iso-8859-1' or _mime(edict_examples_path) <>'utf-8':
155 |         print _mime(edict_path)
156 |         print 'examples file must utf-8 encoded'
157 |         print 'edict dictionary must be iso-8859-1 encoded'
158 |         print 'man iconv'
159 |     return True
160 |     
161 | if __name__ == '__main__':
162 |     query = u'水'
163 |     edict_path = '../_dicts/edict-2011-08-30'
164 |     edict_examples_path = '../_dicts/edict_examples'
165 |     search_with_example(edict_path, edict_examples_path, query)
166 |     
167 |     
168 |     
169 | 
170 | 
171 |     
172 |             
173 |             
174 |         
175 | 
176 | 
177 |     
178 | 
179 | 


--------------------------------------------------------------------------------
/src/jNlp/edict_search_monash/edict_search.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 |   
  4 | # Copyright (c) 2009, Paul Goins
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions
  9 | # are met:
 10 | #
 11 | #     * Redistributions of source code must retain the above copyright
 12 | #       notice, this list of conditions and the following disclaimer.
 13 | #     * Redistributions in binary form must reproduce the above
 14 | #       copyright notice, this list of conditions and the following
 15 | #       disclaimer in the documentation and/or other materials provided
 16 | #       with the distribution.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 21 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 22 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 23 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 24 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 25 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 27 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 28 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 29 | # POSSIBILITY OF SUCH DAMAGE.
 30 |   
 31 | """A parser for EDICT.
 32 |   
 33 | This version is intended to be a more-or-less complete EDICT parser,
 34 | with the exception of not doing special parsing for loan word tags.
 35 | If you require special handling for those, then you probably ought to
 36 | be using JMdict instead.
 37 |   
 38 | """
 39 |   
 40 | import sys, os, re, gzip, gettext
 41 | gettext.install('pyjben', unicode=True)
 42 |   
 43 |   
 44 | # Below follows the information codes sorted more-or-less as they are
 45 | # on http://www.csse.monash.edu.au/~jwb/edict_doc.html, however more
 46 | # up to date.  These sets are accurate as of 2009-Jul-17.
 47 |   
 48 | # Part of speech codes
 49 | valid_pos_codes = set((
 50 |     "adj-i", "adj-na", "adj-no", "adj-pn", "adj-t", "adj-f", "adj",
 51 |     "adv", "adv-to", "aux", "aux-v", "aux-adj", "conj", "ctr", "exp",
 52 |     "int", "iv", "n", "n-adv", "n-suf", "n-pref", "n-t", "num", "pn",
 53 |     "pref", "prt", "suf", "v1", "v2a-s", "v4h", "v4r", "v5", "v5aru",
 54 |     "v5b", "v5g", "v5k", "v5k-s", "v5m", "v5n", "v5r", "v5r-i", "v5s",
 55 |     "v5t", "v5u", "v5u-s", "v5uru", "v5z", "vz", "vi", "vk", "vn",
 56 |     "vr", "vs", "vs-s", "vs-i", "vt",
 57 |     ))
 58 |   
 59 | # Field of application codes
 60 | valid_foa_codes = set((
 61 |     "Buddh", "MA", "comp", "food", "geom", "ling", "math", "mil",
 62 |     "physics", "chem"
 63 |     ))
 64 |   
 65 | # Miscellaneous marking codes
 66 | valid_misc_codes = set((
 67 |     "X", "abbr", "arch", "ateji", "chn", "col", "derog", "eK", "ek",
 68 |     "fam", "fem", "gikun", "hon", "hum", "iK", "id", "ik", "io",
 69 |     "m-sl", "male", "male-sl", "oK", "obs", "obsc", "ok", "on-mim",
 70 |     "poet", "pol", "rare", "sens", "sl", "uK", "uk", "vulg"
 71 |     ))
 72 |   
 73 | # Dialect codes
 74 | valid_dialect_codes = set((
 75 |     "kyb", "osb", "ksb", "ktb", "tsb", "thb", "tsug", "kyu", "rkb",
 76 |     "nab"
 77 |     ))
 78 |   
 79 | # Grab all ()'s before a gloss
 80 | all_paren_match = re.compile("^(\([^)]*\)[ ]*)+")
 81 | # Grab the first () data entry, with group(1) set to the contents
 82 | paren_match = re.compile(u"^[ ]*\(([^)]+)\)[ ]*")
 83 |   
 84 | def info_field_valid(i_field):
 85 |     """Returns whether a given info code is valid."""
 86 |   
 87 |     # Validity is a sticky issue since there's so many fields:
 88 |     #
 89 |     # - Sense markers (1, 2, 3, ...)
 90 |     # - Part of speech markers (n, adv, v5r)
 91 |     # - Field of application markers (comp, math, mil)
 92 |     # - Miscellaneous meanings (X, abbr, arch, ateji, ..........)
 93 |     # - Word priority (P)
 94 |     # ? Okurigana variants (Maybe this is JMdict only?)
 95 |     # - Loan words, a.k.a. Gairaigo
 96 |     # - Regional Japanese words (Kansai-ben, etc.)
 97 |     #
 98 |     # Thankfully, this function should be reusable in the edict2 parser...
 99 |   
100 |     if i_field in valid_pos_codes: return True
101 |     if i_field == "P": return True
102 |     if i_field in valid_misc_codes: return True
103 |     if i_field in valid_foa_codes: return True
104 |     if i_field[:-1] in valid_dialect_codes: return True
105 |     # Check for (1), (2), etc.
106 |     try:
107 |         i = int(i_field)
108 |         return True
109 |     except:
110 |         return False
111 |   
112 | class EdictEntry(object):
113 |   
114 |     def __init__(self, raw_entry, quick_parsing=True):
115 |   
116 |         # Japanese - note, if only a kana reading is present, it's
117 |         # stored as "japanese", and furigana is left as None.
118 |         self.japanese = None
119 |         self.furigana = None
120 |         # Native language glosses
121 |         self.glosses = []
122 |         # Info fields should be inserted here as "tags".
123 |         self.tags = set()
124 |         # Currently unhandled stuff goes here...
125 |         self.unparsed = []
126 |   
127 |         # Most people don't need ultra-fancy parsing and can happily
128 |         # take glosses with keywords stuck in them.  In this case,
129 |         # they can save processing time by using parse_entry_quick.
130 |         # However, this will mean that "J-Ben"-style entry sorting may
131 |         # not work exactly as expected because of tags being appended
132 |         # to the beginning or end.
133 |   
134 |         # Note: Even with full parsing, due to a few entries with tags
135 |         # at the end of their glosses, there's a few entries which will not
136 |         # successfully match on an "ends with" search.
137 |   
138 |         # ENABLE THIS once parse_entry_quick is implemented.
139 |         if quick_parsing:
140 |             self.parse_entry_quick(raw_entry)
141 |         else:
142 |             self.parse_entry(raw_entry)
143 |   
144 |     def parse_entry(self, raw_entry):
145 |         if not raw_entry:
146 |             return None
147 |   
148 |         jdata, ndata = raw_entry.split(u'/', 1)
149 |   
150 |         # Get Japanese
151 |         pieces = jdata.split(u'[', 1)
152 |         self.japanese = pieces[0].strip()
153 |         if len(pieces) > 1:
154 |             # Store furigana without '[]'
155 |             self.furigana = pieces[1].strip()[:-1]
156 |   
157 |         #if self.furigana:
158 |         #    print "JAPANESE: %s, FURIGANA: %s" % (self.japanese, self.furigana)
159 |         #else:
160 |         #    print "JAPANESE: %s" % self.japanese
161 |   
162 |         # Get native language data
163 |         glosses = ndata.split(u'/')
164 |         for gloss in glosses:
165 |             # For each gloss, we need to check for ()'s at the beginning.
166 |             # Multiple such ()'s may be present.
167 |             # The actual gloss does not begin until the last set (or
168 |             # an unhandled one) is encountered.
169 |   
170 |             if not gloss: continue
171 |             #print "Unparsed gloss: [%s]" % gloss
172 |   
173 |             info = None
174 |             m = all_paren_match.match(gloss)
175 |             if m:
176 |                 info = m.group(0)
177 |             if info:
178 |                 gloss_start = m.span()[1]
179 |                 gloss = gloss[gloss_start:]
180 |                 #print "Info field captured: [%s]" % info
181 |   
182 |             while info:
183 |                 m = paren_match.match(info)
184 |                 #if not m: break  # Shouldn't ever happen...
185 |                 i_field = m.group(1)
186 |                 #print "INFO FIELD FOUND:", i_field
187 |                 i_fields = i_field.split(u',')
188 |   
189 |                 # Check that all i_fields are valid
190 |                 bools = map(info_field_valid, i_fields)
191 |                 ok = reduce(lambda x, y: x and y, bools)
192 |   
193 |                 if not ok:
194 |                     #print "INVALID INFO FIELD FOUND, REVERTING"
195 |                     #print "INFO WAS %s, GLOSS WAS %s" % (info, gloss)
196 |                     print info
197 |                     gloss = info + gloss
198 |                     #print "RESTORED GLOSS:", gloss
199 |                     break
200 |   
201 |                 for tag in i_fields:
202 |                     self.tags.add(tag.rstrip(':')) # Handles "ksb:"
203 |                                                     # and other
204 |                                                     # dialect codes
205 |                     #print "INFO FIELD FOUND:", i
206 |                 next_i = m.span()[1]
207 |                 info = info[next_i:]
208 |   
209 |             #print "APPENDING GLOSS:", gloss
210 |             self.glosses.append(gloss)
211 |   
212 |     def parse_entry_quick(self, raw_entry):
213 |         if not raw_entry:
214 |             return None
215 |   
216 |         jdata, ndata = raw_entry.split(u'/', 1)
217 |   
218 |         # Get Japanese
219 |         pieces = jdata.split(u'[', 1)
220 |         self.japanese = pieces[0].strip()
221 |         if len(pieces) > 1:
222 |             # Store furigana without '[]'
223 |             self.furigana = pieces[1].strip()[:-1]
224 |   
225 |         # Get native language data
226 |         self.glosses = [g for g in ndata.split(u'/') if g]
227 |   
228 |     def to_string(self, **kwargs):
229 |         if self.furigana:
230 |             ja = _(u"%s [%s]") % (self.japanese, self.furigana)
231 |         else:
232 |             ja = self.japanese
233 |         native = _(u"; ").join(self.glosses)
234 |         return _(u"%s: %s") % (ja, native)
235 |   
236 |     def __unicode__(self):
237 |         """Dummy string dumper"""
238 |         return unicode(self.__repr__())
239 |   
240 | class Parser(object):
241 |     def __init__(self, filename, use_cache=True, encoding="EUC-JP"):
242 |         if not os.path.exists(filename):
243 |             raise Exception("Dictionary file does not exist.")
244 |         self.filename = filename
245 |         self.encoding = encoding
246 |         self.use_cache = use_cache
247 |         self.cache = {}
248 |   
249 |     def search(self, query):
250 |         """Returns a list of entries matching the query."""
251 |         results = []
252 |   
253 |         def proc_entry(entry):
254 |             if query in entry.japanese:
255 |                 results.append(entry)
256 |             else:
257 |                 for gloss in entry.glosses:
258 |                     if query in gloss:
259 |                         results.append(entry)
260 |                         break
261 |   
262 |         if self.use_cache and self.cache:
263 |             # Read from cache
264 |             for k, entry in self.cache.iteritems():
265 |                 proc_entry(entry)
266 |         else:
267 |             # Read from file
268 |             if len(self.filename) >= 3 and self.filename[-3:] == ".gz":
269 |                 f = gzip.open(self.filename)
270 |             else:
271 |                 f = open(self.filename, "rb")
272 |             fdata = f.read()
273 |             f.close()
274 |             fdata = fdata.decode(self.encoding)
275 |             lines = fdata.splitlines()
276 |             lines = [line for line in lines if line and (line[0] != u"#")]
277 |   
278 |             data = {}
279 |             for line in lines:
280 |                 entry = EdictEntry(line)
281 |                 if self.use_cache:
282 |                     self.cache[entry.japanese] = entry
283 |                 proc_entry(entry)
284 |   
285 |         # Very simple sorting of results.
286 |         # (Requires that (P) is left in glosses...)
287 |         common = []
288 |         other = []
289 |   
290 |         for item in results:
291 |             is_common = False
292 |             for gloss in item.glosses:
293 |                 if u'(P)' in gloss:
294 |                     is_common = True
295 |                     break
296 |             if is_common:
297 |                 common.append(item)
298 |             else:
299 |                 other.append(item)
300 |   
301 |         results = common
302 |         results.extend(other)
303 |   
304 |         # Return results
305 |         return results
306 |   
307 | if __name__ == "__main__":
308 |     kp = Parser('../_dicts/edict-2011-08-30')
309 |     query = u'私'
310 |     for i, entry in enumerate(kp.search(query)):
311 |         print entry.to_string().encode('utf-8')
312 | 
313 | 


--------------------------------------------------------------------------------
/src/jNlp/jCabocha.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python                                                        
 2 | # -*- coding: utf-8 -*-
 3 | import sys, subprocess, os
 4 | from subprocess import call
 5 | from tempfile import NamedTemporaryFile
 6 | 
 7 | def formdamage(sent):
 8 |     rectify = []
 9 |     for ch in sent:
10 |         try: rectify.append(ch.encode('utf-8'))
11 |         except: pass
12 |     return ''.join(rectify)
13 |         
14 | def cabocha(sent):
15 |     if os.path.exists('/home_lab_local/s1010205/tmp/'):
16 |         temp = NamedTemporaryFile(delete=False, dir='/home_lab_local/s1010205/tmp/')
17 |     else:
18 |         temp = NamedTemporaryFile(delete=False)
19 |     try: sent = sent.encode('utf-8')
20 |     except: sent = formdamage(sent)
21 |     temp.write(sent)
22 |     temp.close()
23 |     command = ['cabocha', '-f', '3']
24 |     process = subprocess.Popen(command, stdin=open(temp.name,'r'), stdout=subprocess.PIPE)
25 |     output = process.communicate()[0]
26 |     os.unlink(temp.name)
27 |     return unicode(output, 'utf-8')
28 | 
29 | def main():
30 |     pass
31 | 
32 | if __name__ == '__main__':
33 |     input_sentence = u'私が五年前にこの団体を仲間たちと結成したのはマルコス疑惑などで日本のＯＤＡ（政府開発援助）が問題になり、国まかせでなく、民間による国際協力が必要だと痛感したのが大きな理由です。'
34 |     print cabocha(input_sentence).encode('utf-8')
35 | 
36 | 
37 |     
38 |     
39 |     
40 | 


--------------------------------------------------------------------------------
/src/jNlp/jColor.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | def color(raw_string, colour):
 4 |     """
 5 |     @returns a bold font
 6 |     usage: color("raw string here", 'red')
 7 |     """
 8 |     black = ('28', '1')
 9 |     red = ('31','1')
10 |     green = ('32','1')
11 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(eval(colour)), raw_string)
12 | 
13 | if __name__ == "__main__":
14 |     print color("this string","black")
15 | 


--------------------------------------------------------------------------------
/src/jNlp/jConvert.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import sys 
 4 | from jNlp.jTokenize import jTokenize, jReads
 5 | from jNlp.jCabocha import cabocha
 6 | from pkg_resources import resource_stream
 7 | 
 8 | class ChartParser(object):
 9 |     def __init__(self, chartFile):
10 |         self.chart = resource_stream('jNlp', chartFile).read()
11 |     def chartParse(self):
12 |         """
13 |         @return chartDict
14 |         ガ ==> g,a
15 |         キ ==> k,i
16 |         キャ ==> k,ya
17 |         Similarily for Hiragana
18 |         @setrofim : http://www.python-forum.org/pythonforum/viewtopic.php?f=3&t=31935
19 |         """
20 |         lines = self.chart.split('\n')
21 |         chartDict = {}
22 |         output = {}
23 |         col_headings = lines.pop(0).split()
24 |         for line in lines:
25 |             cells = line.split()
26 |             for i, c in enumerate(cells[1:]):
27 |                 output[c] = (cells[0], col_headings[i])
28 |         for k in sorted(output.keys()):
29 |             #@k = katakana
30 |             #@r = first romaji in row
31 |             #@c = concatinating romaji in column
32 |             r, c = output[k]
33 |             k, r, c = [unicode(item,'utf-8') for item in [k,r,c]]
34 |             if k == 'X':continue
35 |             romaji = ''.join([item.replace('X', '') for item in [r,c]])
36 |             chartDict[k] = romaji
37 |         return chartDict
38 |     
39 | def tokenizedRomaji(jSent):
40 |     kataDict = ChartParser('data/katakanaChart.txt').chartParse()
41 |     tokenizeRomaji = []
42 |     for kataChunk in jReads(jSent):
43 |         romaji = ''
44 |         for idx, kata in enumerate(kataChunk,1):
45 |             if idx != len(kataChunk):
46 |                 doubles = kata+kataChunk[idx]
47 |                 if kataDict.has_key(doubles):
48 |                     romaji += kataDict[doubles]
49 |                     continue
50 |             if kataDict.has_key(kata):
51 |                 romaji += kataDict[kata]
52 |             else:
53 |                 pass
54 |                 #checkPunctuation(kata)
55 |         tokenizeRomaji.append(romaji)
56 |     return tokenizeRomaji
57 | 
58 | if __name__ == '__main__':
59 |     #kataDict = ChartParser('data/katakanaChart.txt').chartParse()
60 |     sent = u'気象庁が２１日午前４時４８分、発表した天気概況によると、'
61 |     print ' '.join(tokenizedRomaji(sent)).encode('utf-8')
62 |     #print tokenizedRomaji(sent)
63 | 
64 |     
65 | 
66 |             
67 |     
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 |         
77 |     
78 |     
79 |     
80 |     
81 | 
82 |         
83 |             
84 |     
85 | 
86 |     
87 | 


--------------------------------------------------------------------------------
/src/jNlp/jProcessing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from jNlp.jTokenize import *
  4 | from pkg_resources import resource_stream
  5 | import sys, os, subprocess, re
  6 | from subprocess import call
  7 | import xml.etree.cElementTree as etree
  8 | 
  9 | def long_substr(str1, str2):
 10 |     data = [str1, str2]
 11 |     substr = ''
 12 |     if len(data) > 1 and len(data[0]) > 0:
 13 |         for i in range(len(data[0])):
 14 |             for j in range(len(data[0])-i+1):
 15 |                 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
 16 |                     substr = data[0][i:i+j]
 17 |     return substr.strip()
 18 | 
 19 | class Similarities(object):
 20 |     def minhash(self, *args):
 21 |         """
 22 |         :*args: tokenized string like a nd b
 23 |         :Sentences: should be tokenized in string
 24 |         a = u"これ はな ん です"
 25 |         b = u"かこ れ何 です"
 26 |         """
 27 |         score = 0.0
 28 |         tok_sent_1 = args[0]
 29 |         tok_sent_2 = args[1]
 30 |         shingles = lambda s: set(s[i:i+3] for i in range(len(s)-2))
 31 |         try:
 32 |             jaccard_distance = lambda seta, setb: len(seta & setb)/float(len(seta | setb))
 33 |             score = jaccard_distance(shingles(tok_sent_1), shingles(tok_sent_2))
 34 |             return score
 35 |         except ZeroDivisionError: return score
 36 | 
 37 | class Property(object):
 38 |     def __init__(self):
 39 |         pass
 40 |     def kanaChars(self):
 41 |         Chars = []
 42 |         tables = ['hiraganaChart.txt', 'katakanaChart.txt']
 43 |         for table in tables:
 44 |             buff = resource_stream('jNlp', 'data/%s'%table).readlines()
 45 |             for line in buff:
 46 |                 line = unicode(line, 'utf-8')
 47 |                 Chars += line.split()
 48 |         return Chars
 49 |             
 50 |     def iscontent(self, pos):
 51 |         self.pos = pos
 52 |         self.file = resource_stream('jNlp', 'data/chasen_pos.txt').readlines()
 53 |         self.content = {}
 54 |         for line in self.file:
 55 |             if not line.strip(): continue
 56 |             line = unicode(line,'utf-8')
 57 |             pos = line.split()[2].strip()
 58 |             self.content[pos] = int(line.split()[0].strip())
 59 |         if self.content.has_key(self.pos) and self.content[self.pos]: return True
 60 |         return False
 61 |     def tok_xml(self, sent, word):
 62 |         #Usage
 63 |         #tok_xml(u'これでアナタも冷え知らず', u'冷').get('pos')
 64 |         self.sent = sent.replace(word, '*'+word+'*')
 65 |         cTree = jCabocha_with_target(self.sent)
 66 |         for chunk in cTree.getchildren():#chunks
 67 |             for tok in chunk.getchildren():
 68 |                 if tok.get('target'):return tok
 69 |         return etree.fromstring(u'<tok></tok>')
 70 |     def iskana(self, word):
 71 |         romaji = ['a', 'b', 'c', 'd', 'e', 'f', 'g', \
 72 |                   'h', 'i', 'j', 'k', 'l', 'm', 'n', \
 73 |                   'o', 'p', 'q', 'r', 's', 't', 'u', \
 74 |                   'v', 'w', 'x', 'y', 'z']
 75 |         if len(word) == 1 and word in self.kanaChars() and word not in romaji:
 76 |             return True
 77 |         else: return False
 78 |         
 79 |         
 80 | 
 81 | if __name__ == '__main__':
 82 |     a = 'Once upon a time in Italy'
 83 |     b = 'Thre was a time in America'
 84 |     #print long_substr(a, b)
 85 |     a = u'これでアナタも冷え知らず'
 86 |     b = u'これでア冷え知らずナタも'
 87 |     #print long_substr(a, b).encode('utf-8')
 88 |     #similarity = Similarities()
 89 |     #print similarity.minhash(' '.join(jTokenize(a)), ' '.join(jTokenize(b)))
 90 |     pos = Property()
 91 |     #print pos.iscontent(u'地域')
 92 |     #print pos.tok_xml(u'これでアナタも冷え知らず', u'冷').get('pos')
 93 |     print pos.iskana(u'冷')
 94 | 
 95 | 
 96 | 
 97 | 
 98 |     
 99 | 
100 | 


--------------------------------------------------------------------------------
/src/jNlp/jSentiments.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from __future__ import division
 4 | import sys, subprocess, argparse
 5 | from subprocess import call
 6 | from jNlp.jTokenize import jTokenize
 7 | from jNlp.jColor import color
 8 | 
 9 | class Sentiment(object):
10 |     def train(self, senti_path, wnjpn_path):
11 |         """
12 |         ``idSenti & idjWord = type<dict> `` ::
13 | 
14 |           idSenti[00004980] = [posScore, negScore]
15 |           idjWord[u'kanji/jword'] = 00004980
16 |         """
17 |         self.idSenti = {}
18 |         self.idjWord = {}
19 |         with open(senti_path) as senti_f:
20 |             senti_text = senti_f.readlines()
21 |         for line in senti_text:
22 |             if line.startswith('#'): continue
23 |             try:
24 |                 ID, pScore, nScore = line.split()[1:4]
25 |                 self.idSenti[ID] = [float(pScore), float(nScore)]
26 |             except (IndexError, ValueError): pass
27 |         with open(wnjpn_path) as jwn_f:
28 |             jwn_text = jwn_f.readlines()
29 |         for line in jwn_text:
30 |             ID = line.split()[0].split('-')[0]
31 |             jWord = unicode(line.split()[1].strip(), 'utf-8')
32 |             self.idjWord[jWord] = ID
33 |         return self.idSenti, self.idjWord
34 |     
35 |     def polarScores_word(self, word):
36 |         """
37 |         returns pos, neg score for one kanji
38 |         """
39 |         if not self.idjWord.has_key(word): return 0.0, 0.0
40 |         pScore = self.idSenti[self.idjWord[word]][0]
41 |         nScore = self.idSenti[self.idjWord[word]][1]
42 |         return pScore, nScore
43 | 
44 |     def polarScores_text(self, text):
45 |         pScore = 0.0
46 |         nScore = 0.0
47 |         for sent in text.split(u'。'):
48 |             if len(sent.strip()) == 0: continue
49 |             for word in jTokenize(sent):
50 |                 if not self.idjWord.has_key(word): continue
51 |                 pScore += self.idSenti[self.idjWord[word]][0]
52 |                 nScore += self.idSenti[self.idjWord[word]][1]
53 |         return pScore, nScore
54 | 
55 |     def baseline(self, text):
56 |         pScore, nScore = self.polarScores_text(text)
57 |         print 'Pos Score = %.3f Neg Score = %.3f'%(pScore, nScore)
58 |         if pScore == nScore:
59 |             print 'Text is Neural or Cannot Determine'
60 |             return ''
61 |         if pScore > nScore:
62 |             print 'Text is', color('Positive', "green")
63 |             return ''
64 |         else:
65 |             print 'Text is', color('Negative',"red")
66 |             return ''
67 | 
68 | if __name__ == '__main__':
69 |     parser = argparse.ArgumentParser(add_help = True)
70 |     parser = argparse.ArgumentParser(description= 'Sentiment Classifier for Japanese Text')
71 |     #parser.add_argument('-f', action="store", nargs = 2, dest="files", type=argparse.FileType('rt'), help='-f senti.txt jwn.txt')
72 |     myarguments = parser.parse_args()
73 | 
74 |     jp_wn = '_dicts/wnjpn-all.tab'
75 |     en_swn = '_dicts/SentiWordNet_3.0.0_20100908.txt'
76 |     classifier = Sentiment()
77 |     sentiwordnet, jpwordnet  = classifier.train(en_swn, jp_wn)
78 |     positive_score = sentiwordnet[jpwordnet[u'全部']][0]
79 |     negative_score = sentiwordnet[jpwordnet[u'全部']][1]
80 |     print 'pos score = {0}, neg score = {1}'.format(positive_score, negative_score)
81 | 
82 |     text = u'監督、俳優、ストーリー、演出、全部最高！'
83 |     print classifier.baseline(text)
84 |     
85 |     
86 | 
87 |     
88 | 


--------------------------------------------------------------------------------
/src/jNlp/jTokenize.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import sys 
  4 | import xml.etree.cElementTree as etree
  5 | #Package imports
  6 | from jNlp.jCabocha import *
  7 | import argparse
  8 | def add_target(jCabocha_tree,target_sent,**kwargs):
  9 |     """
 10 |     Following is to mark a target word
 11 |     Not called
 12 |     See jCabocha_with_target()
 13 |     """
 14 |     if kwargs.has_key('id'): attach_id = kwargs['id']
 15 |     else: attach_id = 'unknown'
 16 |     start_pos = len(target_sent.split('*')[0])
 17 |     tw = target_sent.split('*')[1]
 18 |     sent = u''
 19 |     for chunk in jCabocha_tree.getchildren():
 20 |         for tok in chunk:
 21 |             if tw in tok.text and len(sent) >= start_pos -3:
 22 |                 tok.set("target", attach_id)
 23 |                 return jCabocha_tree
 24 |             else: sent += tok.text
 25 |     return jCabocha_tree
 26 | 
 27 | def jTokenize(target_sent):
 28 |     default_marker = '*'
 29 |     target = target_sent.replace(default_marker,'')
 30 |     sentence = etree.fromstring(cabocha(target).encode('utf-8'))
 31 |     jTokenized_sent = []
 32 |     if default_marker in target_sent:
 33 |         added_target = add_target(sentence, target_sent)
 34 |     else: added_target = sentence
 35 |     for chunk in added_target.findall('chunk'):
 36 |         for tok in chunk.findall('tok'):
 37 |             if tok.get("target"): jTokenized_sent.append('*'+tok.text+'*')
 38 |             else: jTokenized_sent.append(tok.text)
 39 |     return jTokenized_sent
 40 | 
 41 | def jReads(target_sent):
 42 |     sentence = etree.fromstring(cabocha(target_sent).encode('utf-8'))
 43 |     jReadsToks = []
 44 |     for chunk in sentence:
 45 |         for tok in chunk.findall('tok'):
 46 |             if tok.get("feature"):
 47 |                 read_tag = tok.get("feature").split(',')[-2]
 48 |                 if read_tag == '*': read_tag = ''
 49 |             elif tok.get("read"):
 50 |                 read_tag = tok.get("read")
 51 |             else:
 52 |                 pass
 53 |             if read_tag: jReadsToks.append(read_tag)
 54 |     return jReadsToks
 55 | 
 56 | def jCabocha_with_target(target_sent, *args):
 57 |     #target_sent has to be marked with *
 58 |     if '*' not in target_sent: return cabocha(target_sent)
 59 |     if args: attach_id = args[0]
 60 |     else: attach_id = "unknown"
 61 |     sent_plain = etree.fromstring(cabocha(target_sent.replace('*', '')).encode('utf-8'))
 62 |     return add_target(sent_plain, target_sent, id = attach_id)
 63 | 
 64 | def jInfo(target_sent, infotype='base'):
 65 |     #return Info
 66 |     #Eg for base form do
 67 |     #>>>jInfo(target_sent, infotype='base')
 68 |     #...returns [word1baseform, word2baseform, ..]
 69 |     sentence = etree.fromstring(cabocha(target_sent).encode('utf-8'))
 70 |     Info = []
 71 |     for chunk in sentence:
 72 |         for tok in chunk:
 73 |             if tok.get(infotype): Info.append(tok.get(infotype))
 74 |     return Info
 75 | 
 76 | 
 77 | if __name__ == '__main__':
 78 |     parser = argparse.ArgumentParser(add_help = True)
 79 |     parser = argparse.ArgumentParser(description= 'No description sepecified')
 80 |     parser.add_argument('-a', action="store", dest="action", type=unicode, help='-a base')
 81 |     parser.add_argument('-s', action="store", dest="sentence", type=str, help='-s Sentence')
 82 |     myarguments = parser.parse_args()
 83 |     print cabocha(unicode(myarguments.sentence,'utf-8')).encode('utf-8')
 84 |     print jReads(unicode(myarguments.sentence,'utf-8'))
 85 | 
 86 |     """
 87 |     TO Mark the target word use * 1byte
 88 |     """
 89 |     """
 90 |     a = u'私は彼を５日前、つまりこの前の金曜日に駅で見かけた'
 91 |     print jTokenize(a)
 92 |     #print '--'.join(jTokenize(a)).encode('utf-8')
 93 |     #print '--'.join(jReads(a)).encode('utf-8')
 94 |     #--------------------------------------------------------------#
 95 |     a = u'私は彼を５日*前*、つまりこの前の金曜日に駅で見かけた'
 96 |     #print jTokenize(a)
 97 |     #input sentence has to be marked with target word otherwise target is not marked
 98 |     #print etree.tostring(jCabocha_with_target(a, 'nn:00:11'), 'utf-8')
 99 |     #print etree.tostring(jCabocha_with_target(a), 'utf-8') #default id = 'unknown'
100 | 
101 |     sent = u'日本最大級のポータルサイト'
102 |     print jInfo(sent, 'base')
103 |     #print ' '.join(jReads(a)).encode('utf-8')
104 |     """
105 | 


--------------------------------------------------------------------------------
/src/jNlp/summarize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from collections import defaultdict
 3 | from itertools import repeat
 4 | import re
 5 | 
 6 | class Summary(object):
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def tokenize(self, text):
11 |         return text.split()
12 | 
13 |     def split_to_sentences(self, text):
14 |         sentences = []
15 |         start = 0
16 |         for match in re.finditer("(\s*[.!?]\s*)|(\n{2,})", text):
17 |             sentences.append(text[start:match.end()].strip())
18 |             start = match.end()
19 |         if start < len(text):
20 |             sentences.append(text[start:].strip())
21 |         return sentences
22 | 
23 |     def token_frequency(self, text):
24 |         '''Return frequency (count) for each token in the text'''
25 |         frequencies = defaultdict(repeat(0).next)
26 |         for token in self.tokenize(text):
27 |             frequencies[token] += 1
28 |         return frequencies
29 | 
30 |     def sentence_score(self, sentence, frequencies):
31 |         return sum((frequencies[token] for token in self.tokenize(sentence)))
32 | 
33 |     def create_summary(self, sentences, max_length):
34 |         summary = []
35 |         size = 0
36 |         for sentence in sentences:
37 |             size += len(sentence)
38 |             if size >= max_length: break
39 |             summary.append(sentence)
40 |         return "\n".join(summary)
41 | 
42 |     def summarize(self, text, max_summary_size):
43 |         frequencies = self.token_frequency(text)
44 |         sentences = self.split_to_sentences(text)
45 |         sentences.sort(key=lambda s: self.sentence_score(s, frequencies), reverse=1)
46 |         summary = self.create_summary(sentences, max_summary_size)
47 |         return summary
48 | 
49 | if __name__ == "__main__":
50 |     
51 |     raw_text = """you know , i've seen network before , and it's a much better film . bulworth is , in the kindest of words , an " homage " to that picture , and at least it has an excellent role model . simply take the story about a tv newsman who goes nuts , stirs up controversy , and fatally angers the establishment and change it to a us senator who does the same thing , and you've got bulworth . warren beatty's title role performance is the only reason bulworth has anything going for it at all . much like tom cruise in jerry maguire , beatty takes a difficult character and makes it his own , and while beatty as a foul-mouthed politician is not exactly playing against type , it's still his very aggressive performance that carries the picture . everything else , from the dismal supporting cast ( halle berry has never looked so lost ) to the throw-away one-liners ( you've seen all the best over and over again on the trailers ) is cut-and-pasted from network or clearly dredged from some late night rewrite session . still , beatty's in fine form , and his outrageous wackiness takes the film halfway to where it could have been . ( and geez , he directed , produced , wrote , and starred in the film . . . maybe someone was a little too busy ? ) but overall , the missed opportunities , the overtly silly anti-pc message backed up by nothing , and the all-too-forseeable ending make bulworth little more than a fable that we already knew : that anyone involved with politics is totally insane . """
52 |     s= Summary()
53 |     MAX_SUMMARY_SIZE = len(raw_text)/3
54 |     print s.summarize(raw_text, MAX_SUMMARY_SIZE)
55 | 


--------------------------------------------------------------------------------
/src/jNlp/url2text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from __future__ import division 
 4 | from HTMLParser import HTMLParser  
 5 | from re import sub  
 6 | from sys import stderr  
 7 | from traceback import print_exc  
 8 | from urllib import *
 9 | import re, string
10 |  
11 | class Parser(HTMLParser):  
12 |     def __init__(self):  
13 |         HTMLParser.__init__(self)  
14 |         self.__text = []  
15 |  
16 |     def handle_data(self, data):  
17 |         text = data.strip()  
18 |         if len(text) > 0:  
19 |             text = sub('[ \t\r\n]+', ' ', text)  
20 |             self.__text.append(text + ' ')  
21 |  
22 |     def handle_starttag(self, tag, attrs):  
23 |         if tag == 'p':  
24 |             self.__text.append('\n\n')  
25 |         elif tag == 'br':  
26 |             self.__text.append('\n')  
27 |  
28 |     def handle_startendtag(self, tag, attrs):  
29 |         if tag == 'br':  
30 |             self.__text.append('\n\n')  
31 |  
32 |     def text(self):  
33 |         return ''.join(self.__text).strip()  
34 | class Url2Text(object):
35 |     def raw_text(self, html_text):  
36 |         try:
37 |             parser = Parser()  
38 |             parser.feed(html_text)  
39 |             parser.close()  
40 |             return parser.text()  
41 |         except:
42 |             print "Couldn't extract"
43 |             exit()
44 |     
45 |     def url2text(self, url):
46 |         clean_text = []
47 |         html_text = urlopen(url).read()
48 |         count = lambda l1, l2: len(list(filter(lambda c: c in l2, l1)))
49 |         counts = []
50 |         text = self.raw_text(html_text)
51 |         for line in text.splitlines():
52 |             counts.append(count(line, string.punctuation))
53 |         for line, punct in zip(text.splitlines(), counts):
54 |             if line and punct < max(counts)/3:
55 |                 clean_text.append(line.strip())
56 |         return clean_text
57 |     
58 | if __name__ == '__main__':
59 |     url = "http://content.usatoday.com/communities/onpolitics/post/2012/03/mitt-romney-super-tuesday-results-rick-santorum-ohio/1"
60 |     #url = 'http://www.terminally-incoherent.com/blog/2007/09/19/latex-squeezing-the-vertical-white-space/'
61 |     a = Url2Text()
62 |     print a.url2text(url)
63 |     
64 |     
65 | 
66 | 


--------------------------------------------------------------------------------
/src/jNlp/vcabocha.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from jNlp.jCabocha import *
 4 | from jNlp.jTokenize import *
 5 | import argparse
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser(add_help = True)
 9 |     parser = argparse.ArgumentParser(description= 'No description sepecified')
10 |     parser.add_argument('-a', action="store", dest="action", type=unicode, help='-a [cabocha, tokenize, base, read, pos]')
11 |     parser.add_argument('-s', action="store", dest="sentence", type=str, help='-s Sentence')
12 |     myarguments = parser.parse_args()
13 |     sent = unicode(myarguments.sentence,'utf-8')
14 |     print myarguments.action
15 |     if myarguments.action == "cabocha":
16 |         print cabocha(sent).encode('utf-8')
17 |     elif myarguments.action == "tokenize":
18 |         print 'Tokenized'
19 |         print '========='
20 |         print '\n'.join(jTokenize(sent))
21 |     elif myarguments.action:
22 |         tokenized = jTokenize(sent)
23 |         info = jInfo(sent, infotype=myarguments.action)
24 |         mxlen = len(max(max(tokenized, key=len), max(info, key=len))) + 30
25 |         print '{0:{mx}}{1:}'.format('Sent',myarguments.action, mx = mxlen)
26 |         print '{0:{mx}}{1:}'.format('====','='*len(myarguments.action), mx = mxlen)
27 |         
28 |         for i, j in zip(tokenized, info):
29 |             i = i.encode('utf-8')
30 |             j = j.encode('utf-8')
31 |             print '{0:{mx}}{1:<}'.format(i,j, mx = mxlen)
32 |     else:
33 |         print cabocha(sent).encode('utf-8')
34 |         
35 | 
36 | 
37 |         
38 | 


--------------------------------------------------------------------------------
/src/jProcessing.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: jProcessing
 3 | Version: 0.1
 4 | Summary: Japanese NLP Utilities
 5 | Home-page: http://www.jaist.ac.jp/~s1010205
 6 | Author: KATHURIA Pulkit
 7 | Author-email: pulkit@jaist.ac.jp
 8 | License: LICENSE.txt
 9 | Description: ====================
10 |         Japanese NLP Library
11 |         ====================
12 |         
13 |         
14 |         Requirements
15 |         ============
16 |         
17 |         - Third Party Dependencies
18 |         
19 |         - Cabocha Japanese Morphological parser http://sourceforge.net/projects/cabocha/
20 |         
21 |         - Python Dependencies
22 |         
23 |         - ``Python 2.6.*`` or above
24 |         
25 |         
26 |         ``Links``
27 |         ---------
28 |         
29 |         - All code at jProcessing Repo GitHub_
30 |         
31 |         .. _GitHub: https://github.com/kevincobain2000/jProcessing
32 |         
33 |         - Documentation_ and HomePage_ and Sphinx_
34 |         
35 |         .. _Documentation: http://www.jaist.ac.jp/~s1010205/jnlp
36 |         
37 |         .. _HomePage: http://www.jaist.ac.jp/~s1010205/
38 |         
39 |         .. _Sphinx: http://readthedocs.org/docs/jprocessing/en/latest/
40 |         
41 |         
42 |         - PyPi_ Python Package
43 |         
44 |         .. _PyPi: http://pypi.python.org/pypi/jProcessing/0.1
45 |         
46 |         ::
47 |         
48 |         clone git@github.com:kevincobain2000/jProcessing.git
49 |         
50 |         
51 |         ``Install``
52 |         -----------
53 |         
54 |         In ``Terminal`` ::
55 |         
56 |         >>>bash$ python setup.py install
57 |         
58 |         History
59 |         -------
60 |         
61 |         - ``0.2``
62 |         
63 |         + Sentiment Analysis of Japanese Text
64 |         
65 |         - ``0.1``
66 |         + Morphologically Tokenize Japanese Sentence
67 |         + Kanji / Hiragana / Katakana to Romaji Converter
68 |         + Edict Dictionary Search - borrowed
69 |         + Edict Examples Search - incomplete
70 |         + Sentence Similarity between two JP Sentences
71 |         + Run Cabocha(ISO--8859-1 configured) in Python.
72 |         + Longest Common String between Sentences
73 |         + Kanji to Katakana Pronunciation
74 |         + Hiragana, Katakana Chart Parser
75 |         
76 |         Contacts
77 |         ========
78 |         
79 |         - ContactForm_
80 |         - BugReport_
81 |         - Contribute_
82 |         
83 |         .. _ContactForm: http://www.jaist.ac.jp/~s1010205/styled-2/index.html
84 |         .. _BugReport: http://www.jaist.ac.jp/~s1010205/styled/index.html
85 |         .. _Contribute: https://github.com/kevincobain2000/jProcessing
86 |         
87 |         :Author: `pulkit[at]jaist.ac.jp` [change ``at`` with ``@``]
88 |         
89 |         
90 |         
91 |         
92 |         
93 | Platform: UNKNOWN
94 | Classifier: Development Status :: 2 - Pre-Alpha
95 | Classifier: Natural Language :: Japanese
96 | Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
97 | 


--------------------------------------------------------------------------------
/src/jProcessing.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | MANIFEST.in
 2 | README
 3 | setup.py
 4 | scripts
 5 | scripts/vcabocha.py
 6 | src/jNlp/__init__.py
 7 | src/jNlp/eProcessing.py
 8 | src/jNlp/jCabocha.py
 9 | src/jNlp/jCabocha.pyc
10 | src/jNlp/jColor.py
11 | src/jNlp/jConvert.py
12 | src/jNlp/jProcessing.py
13 | src/jNlp/jSentiments.py
14 | src/jNlp/jTokenize.py
15 | src/jNlp/vcabocha.py
16 | src/jNlp/aquisition/OpenSubtitles.py
17 | src/jNlp/aquisition/OpenSubtitles.pyc
18 | src/jNlp/aquisition/SubtitleDatabase.py
19 | src/jNlp/aquisition/SubtitleDatabase.pyc
20 | src/jNlp/aquisition/__init__.py
21 | src/jNlp/aquisition/aquire.py
22 | src/jNlp/aquisition/download_subs.xml
23 | src/jNlp/aquisition/movies.txt
24 | src/jNlp/data/JapaneseSentiWordNet.txt
25 | src/jNlp/data/__init__.py
26 | src/jNlp/data/chasen_pos.txt
27 | src/jNlp/data/hiraganaChart.txt
28 | src/jNlp/data/katakanaChart.txt
29 | src/jNlp/edict_search_monash/__init__.py
30 | src/jNlp/edict_search_monash/ambiguous_words.p
31 | src/jNlp/edict_search_monash/edict_examples.p
32 | src/jNlp/edict_search_monash/edict_examples.py
33 | src/jNlp/edict_search_monash/edict_search.py
34 | src/jProcessing.egg-info/PKG-INFO
35 | src/jProcessing.egg-info/SOURCES.txt
36 | src/jProcessing.egg-info/dependency_links.txt
37 | src/jProcessing.egg-info/top_level.txt


--------------------------------------------------------------------------------
/src/jProcessing.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/jProcessing.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | jNlp
2 | 


--------------------------------------------------------------------------------