├── README
├── project.clj
├── resources
    ├── ChangeLog
    ├── INSTALL.txt
    ├── LICENSE.txt
    ├── MANIFEST.in
    ├── Makefile
    ├── NOTICE.txt
    ├── README.md
    ├── README.txt
    ├── RELEASE-HOWTO
    ├── distribute_setup.py
    ├── emacs
    │   ├── doctest-mode.el
    │   ├── psvn.el
    │   ├── pycomplete.el
    │   ├── pycomplete.py
    │   ├── python-mode.el
    │   ├── rst-mode.el
    │   └── rst.el
    ├── examples
    │   ├── grammars
    │   │   ├── Makefile
    │   │   ├── basque_grammars
    │   │   │   ├── basque1.cfg
    │   │   │   ├── basque1.fcfg
    │   │   │   ├── basque1.pcfg
    │   │   │   ├── basque1.regexp
    │   │   │   ├── basque2.cfg
    │   │   │   ├── basque2.fcfg
    │   │   │   ├── basque2.pcfg
    │   │   │   ├── basque2.regexp
    │   │   │   ├── basque3.cfg
    │   │   │   ├── basque3.fcfg
    │   │   │   ├── basque3.regexp
    │   │   │   ├── basque4.regexp
    │   │   │   └── basque5.regexp
    │   │   ├── book_grammars
    │   │   │   ├── background.fol
    │   │   │   ├── discourse.fcfg
    │   │   │   ├── drt.fcfg
    │   │   │   ├── feat0.fcfg
    │   │   │   ├── feat1.fcfg
    │   │   │   ├── german.fcfg
    │   │   │   ├── simple-sem.fcfg
    │   │   │   ├── sql0.fcfg
    │   │   │   ├── sql1.fcfg
    │   │   │   └── storage.fcfg
    │   │   ├── sample_grammars
    │   │   │   ├── background0.fol
    │   │   │   ├── bindop.fcfg
    │   │   │   ├── chat80.fcfg
    │   │   │   ├── chat_pnames.fcfg
    │   │   │   ├── dep_test2.dep
    │   │   │   ├── drt_glue.semtype
    │   │   │   ├── drt_glue_event.semtype
    │   │   │   ├── event.fcfg
    │   │   │   ├── glue.semtype
    │   │   │   ├── glue_event.semtype
    │   │   │   ├── glue_train.conll
    │   │   │   ├── gluesemantics.fcfg
    │   │   │   ├── hole.fcfg
    │   │   │   ├── np.fcfg
    │   │   │   ├── sem0.fcfg
    │   │   │   ├── sem1.fcfg
    │   │   │   ├── sem2.fcfg
    │   │   │   ├── sql.fcfg
    │   │   │   ├── toy.cfg
    │   │   │   └── valuation1.val
    │   │   └── spanish_grammars
    │   │   │   ├── spanish1.cfg
    │   │   │   ├── spanish1.fcfg
    │   │   │   ├── spanish1.pcfg
    │   │   │   ├── spanish1.regexp
    │   │   │   ├── spanish2.cfg
    │   │   │   ├── spanish2.fcfg
    │   │   │   ├── spanish2.pcfg
    │   │   │   ├── spanish2.regexp
    │   │   │   ├── spanish3.cfg
    │   │   │   ├── spanish3.regexp
    │   │   │   ├── spanish4.regexp
    │   │   │   └── spanish5.regexp
    │   ├── school
    │   │   ├── README
    │   │   ├── categories.py
    │   │   ├── count.py
    │   │   ├── generate.py
    │   │   ├── parse1.py
    │   │   ├── parse2.py
    │   │   ├── parse3.py
    │   │   ├── parser.py
    │   │   ├── search.py
    │   │   └── words.py
    │   └── semantics
    │   │   ├── chat.db
    │   │   ├── chat80.cfg
    │   │   ├── chat_pnames.cfg
    │   │   ├── chat_sentences
    │   │   ├── demo_sentences
    │   │   ├── model0.py
    │   │   ├── model1.py
    │   │   ├── sem0.cfg
    │   │   ├── sem1.cfg
    │   │   ├── sem2.cfg
    │   │   ├── sem3.cfg
    │   │   └── syn2sem.py
    ├── javasrc
    │   ├── Makefile
    │   ├── README.txt
    │   └── org
    │   │   └── nltk
    │   │       └── mallet
    │   │           ├── CRFInfo.java
    │   │           ├── RunCRF.java
    │   │           └── TrainCRF.java
    ├── nltk
    │   ├── VERSION
    │   ├── __init__.py
    │   ├── align.py
    │   ├── app
    │   │   ├── __init__.py
    │   │   ├── chartparser_app.py
    │   │   ├── chunkparser_app.py
    │   │   ├── collocations_app.py
    │   │   ├── concordance_app.py
    │   │   ├── nemo_app.py
    │   │   ├── rdparser_app.py
    │   │   ├── srparser_app.py
    │   │   ├── wordfreq_app.py
    │   │   └── wordnet_app.py
    │   ├── book.py
    │   ├── ccg
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── chart.py
    │   │   ├── combinator.py
    │   │   └── lexicon.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   ├── eliza.py
    │   │   ├── iesha.py
    │   │   ├── rude.py
    │   │   ├── suntsu.py
    │   │   ├── util.py
    │   │   └── zen.py
    │   ├── chunk
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── named_entity.py
    │   │   ├── regexp.py
    │   │   └── util.py
    │   ├── classify
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── decisiontree.py
    │   │   ├── mallet.py
    │   │   ├── maxent.py
    │   │   ├── megam.py
    │   │   ├── naivebayes.py
    │   │   ├── positivenaivebayes.py
    │   │   ├── rte_classify.py
    │   │   ├── scikitlearn.py
    │   │   ├── svm.py
    │   │   ├── tadm.py
    │   │   ├── util.py
    │   │   └── weka.py
    │   ├── cluster
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── em.py
    │   │   ├── gaac.py
    │   │   ├── kmeans.py
    │   │   └── util.py
    │   ├── collocations.py
    │   ├── corpus
    │   │   ├── __init__.py
    │   │   ├── europarl_raw.py
    │   │   ├── reader
    │   │   │   ├── __init__.py
    │   │   │   ├── aligned.py
    │   │   │   ├── api.py
    │   │   │   ├── bnc.py
    │   │   │   ├── bracket_parse.py
    │   │   │   ├── chasen.py
    │   │   │   ├── childes.py
    │   │   │   ├── chunked.py
    │   │   │   ├── cmudict.py
    │   │   │   ├── conll.py
    │   │   │   ├── dependency.py
    │   │   │   ├── ieer.py
    │   │   │   ├── indian.py
    │   │   │   ├── ipipan.py
    │   │   │   ├── knbc.py
    │   │   │   ├── lin.py
    │   │   │   ├── nombank.py
    │   │   │   ├── nps_chat.py
    │   │   │   ├── pl196x.py
    │   │   │   ├── plaintext.py
    │   │   │   ├── ppattach.py
    │   │   │   ├── propbank.py
    │   │   │   ├── rte.py
    │   │   │   ├── semcor.py
    │   │   │   ├── senseval.py
    │   │   │   ├── sinica_treebank.py
    │   │   │   ├── string_category.py
    │   │   │   ├── switchboard.py
    │   │   │   ├── tagged.py
    │   │   │   ├── timit.py
    │   │   │   ├── toolbox.py
    │   │   │   ├── util.py
    │   │   │   ├── verbnet.py
    │   │   │   ├── wordlist.py
    │   │   │   ├── wordnet.py
    │   │   │   ├── xmldocs.py
    │   │   │   └── ycoe.py
    │   │   └── util.py
    │   ├── data.py
    │   ├── decorators.py
    │   ├── downloader.py
    │   ├── draw
    │   │   ├── __init__.py
    │   │   ├── cfg.py
    │   │   ├── dispersion.py
    │   │   ├── table.py
    │   │   ├── tree.py
    │   │   └── util.py
    │   ├── examples
    │   │   ├── __init__.py
    │   │   └── pt.py
    │   ├── featstruct.py
    │   ├── grammar.py
    │   ├── help.py
    │   ├── inference
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── discourse.py
    │   │   ├── mace.py
    │   │   ├── nonmonotonic.py
    │   │   ├── prover9.py
    │   │   ├── resolution.py
    │   │   └── tableau.py
    │   ├── internals.py
    │   ├── lazyimport.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── agreement.py
    │   │   ├── artstein_poesio_example.txt
    │   │   ├── association.py
    │   │   ├── confusionmatrix.py
    │   │   ├── distance.py
    │   │   ├── scores.py
    │   │   ├── segmentation.py
    │   │   ├── spearman.py
    │   │   └── windowdiff.py
    │   ├── misc
    │   │   ├── __init__.py
    │   │   ├── babelfish.py
    │   │   ├── chomsky.py
    │   │   ├── minimalset.py
    │   │   ├── sort.py
    │   │   └── wordfinder.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   └── ngram.py
    │   ├── nltk.jar
    │   ├── parse
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── broker_test.cfg
    │   │   ├── chart.py
    │   │   ├── dependencygraph.py
    │   │   ├── earleychart.py
    │   │   ├── featurechart.py
    │   │   ├── generate.py
    │   │   ├── generate2.py
    │   │   ├── malt.py
    │   │   ├── nonprojectivedependencyparser.py
    │   │   ├── pchart.py
    │   │   ├── projectivedependencyparser.py
    │   │   ├── rd.py
    │   │   ├── sr.py
    │   │   ├── test.cfg
    │   │   ├── util.py
    │   │   └── viterbi.py
    │   ├── probability.py
    │   ├── sem
    │   │   ├── __init__.py
    │   │   ├── boxer.py
    │   │   ├── chat80.py
    │   │   ├── cooper_storage.py
    │   │   ├── drt.py
    │   │   ├── drt_glue_demo.py
    │   │   ├── evaluate.py
    │   │   ├── glue.py
    │   │   ├── hole.py
    │   │   ├── lfg.py
    │   │   ├── linearlogic.py
    │   │   ├── logic.py
    │   │   ├── relextract.py
    │   │   ├── skolemize.py
    │   │   └── util.py
    │   ├── sourcedstring.py
    │   ├── stem
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── isri.py
    │   │   ├── lancaster.py
    │   │   ├── porter.py
    │   │   ├── regexp.py
    │   │   ├── rslp.py
    │   │   ├── snowball.py
    │   │   └── wordnet.py
    │   ├── tag
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── brill.py
    │   │   ├── crf.py
    │   │   ├── hmm.py
    │   │   ├── hunpos.py
    │   │   ├── senna.py
    │   │   ├── simplify.py
    │   │   ├── stanford.py
    │   │   ├── tnt.py
    │   │   └── util.py
    │   ├── test
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── align.doctest
    │   │   ├── all.py
    │   │   ├── ccg.doctest
    │   │   ├── chat80.doctest
    │   │   ├── childes.doctest
    │   │   ├── chunk.doctest
    │   │   ├── classify.doctest
    │   │   ├── collocations.doctest
    │   │   ├── corpus.doctest
    │   │   ├── data.doctest
    │   │   ├── dependency.doctest
    │   │   ├── discourse.doctest
    │   │   ├── doctest_driver.py
    │   │   ├── doctest_nose_plugin.py
    │   │   ├── doctest_utils.py
    │   │   ├── drt.doctest
    │   │   ├── featgram.doctest
    │   │   ├── featstruct.doctest
    │   │   ├── floresta.txt
    │   │   ├── gluesemantics.doctest
    │   │   ├── grammar.doctest
    │   │   ├── grammartestsuites.doctest
    │   │   ├── inference.doctest
    │   │   ├── internals.doctest
    │   │   ├── japanese.doctest
    │   │   ├── logic.doctest
    │   │   ├── metrics.doctest
    │   │   ├── misc.doctest
    │   │   ├── nonmonotonic.doctest
    │   │   ├── onto1.fol
    │   │   ├── parse.doctest
    │   │   ├── portuguese.doctest_latin1
    │   │   ├── portuguese_en.doctest
    │   │   ├── probability.doctest
    │   │   ├── relextract.doctest
    │   │   ├── resolution.doctest
    │   │   ├── runtests.py
    │   │   ├── segmentation.doctest
    │   │   ├── sem3.cfg
    │   │   ├── semantics.doctest
    │   │   ├── simple.doctest
    │   │   ├── sourcedstring.doctest
    │   │   ├── stem.doctest
    │   │   ├── tag.doctest
    │   │   ├── tokenize.doctest
    │   │   ├── toolbox.doctest
    │   │   ├── toy.cfg
    │   │   ├── tree.doctest
    │   │   ├── treetransforms.doctest
    │   │   ├── util.doctest
    │   │   └── wordnet.doctest
    │   ├── text.py
    │   ├── tokenize
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── punkt.py
    │   │   ├── regexp.py
    │   │   ├── sexpr.py
    │   │   ├── simple.py
    │   │   ├── texttiling.py
    │   │   ├── treebank.py
    │   │   └── util.py
    │   ├── toolbox.py
    │   ├── tree.py
    │   ├── treetransforms.py
    │   ├── util.py
    │   └── yamltags.py
    ├── papers
    │   ├── acl-02
    │   │   ├── .cvsignore
    │   │   ├── Makefile
    │   │   ├── acl-02.tex
    │   │   ├── acl.bst
    │   │   ├── acl2002.sty
    │   │   ├── chartparse.eps.gz
    │   │   ├── contest.ps.gz
    │   │   └── nltk.bib
    │   ├── acl-04
    │   │   ├── .cvsignore
    │   │   ├── Makefile
    │   │   ├── acl-04.tex
    │   │   ├── acl.bst
    │   │   ├── acl04.sty
    │   │   ├── chart-matrix.gif
    │   │   ├── chart.eps.gz
    │   │   └── nltk.bib
    │   ├── acl-06
    │   │   ├── acl-06.tex
    │   │   ├── acl.bst
    │   │   ├── colacl06.sty
    │   │   ├── rdparser.eps.gz
    │   │   └── srparser.eps.gz
    │   ├── acl-08
    │   │   ├── acl-08.bib
    │   │   ├── acl-08.tex
    │   │   ├── acl08.sty
    │   │   ├── grammar1.py
    │   │   ├── grammar2.py
    │   │   └── police.py
    │   ├── altw-06
    │   │   ├── acl.bst
    │   │   ├── altw-06.bib
    │   │   ├── altw-06.tex
    │   │   └── colacl06.sty
    │   ├── icon-05
    │   │   ├── acl.bst
    │   │   ├── acl2005.sty
    │   │   └── icon-05.tex
    │   └── iwcs-08
    │   │   ├── drs.png
    │   │   ├── garrette-klein.tar.gz
    │   │   ├── iwcs.doctest
    │   │   ├── lingmacros.sty
    │   │   ├── modules.graffle
    │   │   ├── modules.pdf
    │   │   ├── nltk_iwcs_09.bib
    │   │   └── nltk_iwcs_09.tex
    ├── setup.cfg
    ├── setup.py
    ├── tools
    │   ├── find_deprecated.py
    │   ├── global_replace.py
    │   ├── nltk_term_index.py
    │   ├── nltk_term_index.stoplist
    │   └── svnmime.py
    ├── tox.ini
    └── web
    │   ├── Makefile
    │   ├── api
    │       └── nltk.rst
    │   ├── conf.py
    │   ├── data.rst
    │   ├── dev
    │       ├── jenkins.rst
    │       └── local_testing.rst
    │   ├── images
    │       ├── book.gif
    │       └── tree.gif
    │   ├── index.rst
    │   ├── install.rst
    │   └── news.rst
├── src
    └── clojure_nltk
    │   └── core.clj
└── test
    └── clojure_nltk
        └── core_test.clj


/README:
--------------------------------------------------------------------------------
 1 | # clojure-nltk
 2 | 
 3 | Most of the functionality in the Python-based Natural Language Toolkit (NLTK)
 4 | works in Jython (it has a few dependencies only available in CPython).  However
 5 | with some minor tweaks it is possible to use a sizable subset of NLTK in Jython,
 6 | and by extension, in Clojure.
 7 | 
 8 | ## Usage
 9 | 
10 | (ns clojure-nltk.core
11 |   (:require [clojure-nltk.core :as nltk]))
12 | 
13 | (nltk/init) ; initialize nltk
14 | 
15 | ## Installation
16 | 
17 | To include as a dependency:
18 | 
19 | Copy the config section found at http://clojars.org/clojure-nltk into your
20 | dependencies in your project's project.clj.
21 | 
22 | ## License
23 | 
24 | Copyright (C) 2010-2012 Robert P. Levy
25 | 
26 | Distributed under the Eclipse Public License, the same as Clojure.
27 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clojure-nltk "2.0.3-clj-0"
2 |   :description "Python's NLTK for Clojure (interop / partial port)."
3 |   :license {:name "Eclipse Public License"
4 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
5 |   :dependencies [[org.clojure/clojure "1.4.0"]
6 |                  [clojure-python "0.4.1"]]
7 |   :profiles {:dev {:dependencies [[midje "1.4.0"]]}}
8 |   :plugins [[lein-midje "2.0.0"]])
9 | 


--------------------------------------------------------------------------------
/resources/INSTALL.txt:
--------------------------------------------------------------------------------
1 | To install NLTK, run setup.py from an administrator account, e.g.:
2 | 
3 |     sudo python setup.py install
4 | 
5 | For full installation instructions, please see http://nltk.github.com/install.html
6 | 
7 | 


--------------------------------------------------------------------------------
/resources/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2001-2012 NLTK Project
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the 'License');
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an 'AS IS' BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/resources/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt INSTALL.txt README.txt MANIFEST.in
2 | include setup.py distribute_setup.py
3 | include nltk/nltk.jar
4 | include nltk/test/*.doctest
5 | include nltk/VERSION
6 | recursive-include javasrc *.java *.txt Makefile
7 | global-exclude *~
8 | 


--------------------------------------------------------------------------------
/resources/Makefile:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: source Makefile
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | #	 Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | PYTHON = python
10 | VERSION = $(shell $(PYTHON) -c 'import nltk; print nltk.__version__' | sed '/^Warning: */d')
11 | NLTK_URL = $(shell $(PYTHON) -c 'import nltk; print nltk.__url__' | sed '/^Warning: */d')
12 | 
13 | .PHONY: all clean clean_code
14 | 
15 | all: dist
16 | 
17 | ########################################################################
18 | # TESTING
19 | ########################################################################
20 | 
21 | DOCTEST_DRIVER = nltk/test/doctest_driver.py
22 | DOCTEST_FLAGS = --ellipsis --normalize_whitespace
23 | DOCTEST_FILES = nltk/test/*.doctest
24 | DOCTEST_CODE_FILES = nltk/*.py nltk/*/*.py
25 | 
26 | doctest:
27 | 	$(PYTHON) $(DOCTEST_DRIVER) $(DOCTEST_FLAGS) $(DOCTEST_FILES)
28 | 
29 | doctest_code:
30 | 	$(PYTHON) $(DOCTEST_DRIVER) $(DOCTEST_FLAGS) $(DOCTEST_CODE_FILES)
31 | 
32 | demotest:
33 | 	find nltk -name "*.py"\
34 |         -and -not -path *misc* \
35 |         -and -not -name brown_ic.py \
36 |         -exec echo ==== '{}' ==== \; -exec python '{}' \;
37 | 
38 | ########################################################################
39 | # JAVA
40 | ########################################################################
41 | 
42 | jar: nltk/nltk.jar
43 | 
44 | JAVA_SRC = $(shell find javasrc/org/nltk -name '*.java')
45 | nltk/nltk.jar: $(JAVA_SRC)
46 | 	$(MAKE) -C javasrc jar
47 | 	cp javasrc/nltk.jar nltk/nltk.jar
48 | 
49 | ########################################################################
50 | # DISTRIBUTIONS
51 | ########################################################################
52 | 
53 | dist: zipdist gztardist windist
54 | 
55 | gztardist: clean_code
56 | 	$(PYTHON) setup.py -q sdist --format=gztar
57 | zipdist: clean_code
58 | 	$(PYTHON) setup.py -q sdist --format=zip
59 | windist: clean_code
60 | 	$(PYTHON) setup.py -q bdist --format=wininst --plat-name=win32
61 | 
62 | ########################################################################
63 | # CLEAN
64 | ########################################################################
65 | 
66 | clean: clean_code
67 | 	rm -rf build iso dist api MANIFEST nltk-$(VERSION) nltk.egg-info
68 | 	$(MAKE) -C javasrc clean
69 | #	rm -f nltk/nltk.jar
70 | 
71 | clean_code:
72 | 	rm -f `find . -name '*.pyc'`
73 | 	rm -f `find . -name '*.pyo'`
74 | 	rm -f `find . -name '*~'`
75 | 	rm -f MANIFEST # regenerate manifest from MANIFEST.in
76 | 


--------------------------------------------------------------------------------
/resources/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Natural Language Toolkit (NLTK) http://www.nltk.org/
2 | 
3 | Copyright (C) 2001-2012 NLTK Project
4 | 
5 | Bird, Steven, Edward Loper and Ewan Klein (2009).
6 | Natural Language Processing with Python.  O'Reilly Media Inc.
7 | 


--------------------------------------------------------------------------------
/resources/README.md:
--------------------------------------------------------------------------------
 1 | Natural Language Toolkit (NLTK)   www.nltk.org
 2 | ====================================
 3 | 
 4 | Authors
 5 | ----------------
 6 |   - Steven Bird <stevenbird1@gmail.com>
 7 |   - Edward Loper <edloper@gmail.com>
 8 |   - Ewan Klein <ewan@inf.ed.ac.uk>
 9 | 
10 | Copyright (C) 2001-2012 NLTK Project
11 | 
12 | For license information, see LICENSE.txt
13 | 
14 | NLTK -- the Natural Language Toolkit -- is a suite of open source
15 | Python modules, data sets and tutorials supporting research and
16 | development in Natural Language Processing.
17 | 
18 | Documentation
19 | ------------------------
20 | A substantial amount of documentation about NLTK is available:
21 | 
22 |   - The [NLTK website](http://nltk.org/) has information about the NLTK community. 
23 | 
24 |   - The [NLTK Book](https://sites.google.com/site/naturallanguagetoolkit/book) covers a wide range of introductory topics in NLP, and
25 |     shows how to do all the processing tasks using the toolkit.
26 | 
27 |   - The [API Documentation](http://nltk.github.com/api/) describes every module,
28 |     interface, class, method, function, and variable in the toolkit.
29 |   
30 | Mailing Lists
31 | --------------------
32 |  There are several mailing lists associated with NLTK:
33 | 
34 |   - [nltk](http://groups.google.com/group/nltk): Public information and announcements about NLTK (very low volume).
35 |   - [nltk-users](http://groups.google.com/group/nltk-users): Discussions amongst NLTK users.
36 |   - [nltk-dev](http://groups.google.com/group/nltk-dev): Discussions amongst NLTK developers.
37 |   - [nltk-translation](http://groups.google.com/group/nltk-translation): Discussions about translating the NLTK book.
38 |       
39 | 
40 | Contributing
41 | ------------------
42 | If you would like to contribute to NLTK, please post your ideas to nltk-dev, or [fork nltk on github](https://github.com/nltk/nltk).
43 | 
44 | Donating
45 | ---------------
46 | Have you found the toolkit helpful?  Please support NLTK development 
47 | by donating to the project via PayPal, using the link on the NLTK homepage.
48 | 
49 | Redistributing
50 | ----------------------
51 | NLTK source code is distributed under the Apache 2.0 License.  
52 | NLTK documentation is distributed under the Creative Commons Attribution-Noncommercial-No Derivative Works 3.0 United States license.  
53 | NLTK corpora are provided under the terms given in the README file for each corpus; all are redistributable, and available for non-commercial use.  
54 | NLTK may be freely redistributed, subject to the provisions of these licenses.  
55 | 
56 | Citing
57 | ---------
58 |  If you publish work that uses NLTK, please cite the NLTK book, as follows:
59 | 
60 |     Bird, Steven, Edward Loper and Ewan Klein (2009).
61 |     Natural Language Processing with Python.  O'Reilly Media Inc.
62 | 


--------------------------------------------------------------------------------
/resources/README.txt:
--------------------------------------------------------------------------------
 1 | Natural Language Toolkit (NLTK)   www.nltk.org
 2 | 
 3 | Authors: Steven Bird <sb@csse.unimelb.edu.au>
 4 |          Edward Loper <edloper@gradient.cis.upenn.edu>
 5 |          Ewan Klein <ewan@inf.ed.ac.uk>
 6 | 
 7 | Copyright (C) 2001-2012 NLTK Project
 8 | 
 9 | For license information, see LICENSE.txt
10 | 
11 | NLTK -- the Natural Language Toolkit -- is a suite of open source
12 | Python modules, data sets and tutorials supporting research and
13 | development in Natural Language Processing.
14 | 
15 | Documentation: A substantial amount of documentation about how
16 | to use NLTK, including a textbook and API documention, is
17 | available from the NLTK website: http://www.nltk.org/
18 | 
19 |   - The book covers a wide range of introductory topics in NLP, and
20 |     shows how to do all the processing tasks using the toolkit.
21 | 
22 |   - The toolkit's reference documentation describes every module,
23 |     interface, class, method, function, and variable in the toolkit.
24 |     This documentation should be useful to both users and developers.  
25 | 
26 | Mailing Lists: There are several mailing lists associated with NLTK:
27 | 
28 |   - nltk: Public information and announcements about NLTK (very low volume)
29 |       http://groups.google.com/group/nltk
30 |   - nltk-users: Discussions amongst NLTK users
31 |       http://groups.google.com/group/nltk-users
32 |   - nltk-dev: Discussions amongst NLTK developers
33 |       http://groups.google.com/group/nltk-dev
34 |   - nltk-translation: Discussions about translating the NLTK book
35 |       http://groups.google.com/group/nltk-translation
36 |   - nltk-commits: Subversion commit logs for NLTK
37 |       http://groups.google.com/group/nltk-commits
38 | 
39 | Contributing: If you would like to contribute to NLTK,
40 |     please see http://www.nltk.org/contribute
41 | 
42 | Donating: Have you found the toolkit helpful?  Please support NLTK development
43 |     by donating to the project via PayPal, using the link on the NLTK homepage.
44 | 
45 | Redistributing: NLTK source code is distributed under the Apache 2.0 License.
46 |     NLTK documentation is distributed under the Creative Commons
47 |     Attribution-Noncommercial-No Derivative Works 3.0 United States license.
48 |     NLTK corpora are provided under the terms given in the README file
49 |     for each corpus; all are redistributable, and available for non-commercial use.
50 |     NLTK may be freely redistributed, subject to the provisions of these licenses.
51 | 
52 | Citing: If you publish work that uses NLTK, please cite the NLTK book, as follows:
53 | 
54 |     Bird, Steven, Edward Loper and Ewan Klein (2009).
55 |     Natural Language Processing with Python.  O'Reilly Media Inc.
56 | 


--------------------------------------------------------------------------------
/resources/RELEASE-HOWTO:
--------------------------------------------------------------------------------
 1 | Building an NLTK distribution
 2 | ----------------------------------
 3 | 
 4 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 5 | @@@ BUILD
 6 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 7 | 
 8 | A. PREPARATION
 9 | 
10 |   1. Check that installation instructions are up-to-date
11 |   2. Update the data index (make data_index) and commit
12 |   3. Update the ChangeLog (for nltk, nltk_data)
13 |        git log --since=20XX-YY-ZZ
14 |   4. install the new version, since its the installed code that is checked
15 |   5. cd nltk/test; make (run the tests in nltk.test)
16 |   6. make demotest (run the demonstration code included in many modules)
17 | 
18 | B. BUILD
19 | 
20 |   1. Modify nltk/VERSION with the version number and commit
21 |   2. Make dist
22 |   ?. (cd ../nltk_contrib; make dist???)
23 | 
24 | D. RELEASE
25 | 
26 |   1. Update the news page in nltk/web/news.rst
27 |   2. git tag -a 2.X.Y -m "version 2.X.Y"
28 |   3. sudo python setup.py register
29 |   4. Log in to http://pypi.python.org/pypi and upload distributions
30 |   5. post announcement to NLTK the mailing lists:
31 |        nltk-dev (for beta releases)
32 |        nltk (for final releases)
33 |   6. post announcement to external mailing lists, for major N.N releases only
34 |        CORPORA@uib.no, linguist@linguistlist.org,
35 |        PythonSIL@lists.sil.org, edu-sig@python.org
36 |        mailing lists for any local courses using NLTK
37 | 
38 | 
39 | 
40 | 
41 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
42 | @@@ BOOK BUILD
43 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
44 | 
45 | The build requires docutils, pdflatex, python imaging library, epydoc,
46 |   cdrtools, ImageMagick
47 | 
48 |   1. Check out a clean copy of the subversion repository (or make clean)
49 |      and install locally with sudo python setup.py install; make clean
50 |   2. make doc (slow; see doc/ for the results) and commit
51 | 
52 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
53 | @@@ INSTALL
54 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
55 | 
56 | D. INSTALLATION
57 | 
58 |   1. download and install new version on all machines
59 |   2. contact relevant sysads to install new version
60 |   3. copy dist directory to memory stick
61 | 
62 | E. NEW VERSION NUMBER (optional)
63 | 
64 |   1. update the version numbers in the repository so that builds
65 |      off the repository don't have the same version as the release,
66 |      e.g. after release 0.9.6, update repository version to 0.9.7a (alpha)
67 | 


--------------------------------------------------------------------------------
/resources/emacs/pycomplete.el:
--------------------------------------------------------------------------------
 1 | ;;; Complete symbols at point using Pymacs.
 2 | 
 3 | ;;; See pycomplete.py for the Python side of things and a short description
 4 | ;;; of what to expect.
 5 | 
 6 | (require 'pymacs)
 7 | (require 'python-mode)
 8 | 
 9 | (pymacs-load "pycomplete")
10 | 
11 | (defun py-complete ()
12 |   (interactive)
13 |   (let ((pymacs-forget-mutability t)) 
14 |     (insert (pycomplete-pycomplete (py-symbol-near-point)
15 | 				   (py-find-global-imports)))))
16 | 
17 | (defun py-find-global-imports ()
18 |   (save-excursion
19 |     (let (first-class-or-def imports)
20 |       (goto-char (point-min))
21 |       (setq first-class-or-def
22 | 	    (re-search-forward "^ *\\(def\\|class\\) " nil t))
23 |       (goto-char (point-min))
24 |       (setq imports nil)
25 |       (while (re-search-forward
26 | 	      "^\\(import \\|from \\([A-Za-z_][A-Za-z_0-9]*\\) import \\).*"
27 | 	      nil t)
28 | 	(setq imports (append imports
29 | 			      (list (buffer-substring
30 | 				     (match-beginning 0)
31 | 				     (match-end 0))))))
32 |       imports)))
33 | 
34 | (define-key py-mode-map "\M-\C-i"  'py-complete)
35 | 
36 | (provide 'pycomplete)
37 | 


--------------------------------------------------------------------------------
/resources/emacs/pycomplete.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Python dot expression completion using Pymacs.
 4 | 
 5 | This almost certainly needs work, but if you add
 6 | 
 7 |     (require 'pycomplete)
 8 | 
 9 | to your .xemacs/init.el file (untried w/ GNU Emacs so far) and have Pymacs
10 | installed, when you hit M-TAB it will try to complete the dot expression
11 | before point.  For example, given this import at the top of the file:
12 | 
13 |     import time
14 | 
15 | typing "time.cl" then hitting M-TAB should complete "time.clock".
16 | 
17 | This is unlikely to be done the way Emacs completion ought to be done, but
18 | it's a start.  Perhaps someone with more Emacs mojo can take this stuff and
19 | do it right.
20 | 
21 | See pycomplete.el for the Emacs Lisp side of things.
22 | """
23 | 
24 | import sys
25 | import os.path
26 | 
27 | try:
28 |     x = set
29 | except NameError:
30 |     from sets import Set as set
31 | else:
32 |     del x
33 | 
34 | def get_all_completions(s, imports=None):
35 |     """Return contextual completion of s (string of >= zero chars).
36 | 
37 |     If given, imports is a list of import statements to be executed first.
38 |     """
39 |     locald = {}
40 |     if imports is not None:
41 |         for stmt in imports:
42 |             try:
43 |                 exec stmt in globals(), locald
44 |             except TypeError:
45 |                 raise TypeError, "invalid type: %s" % stmt
46 | 
47 |     dots = s.split(".")
48 |     if not s or len(dots) == 1:
49 |         keys = set()
50 |         keys.update(locald.keys())
51 |         keys.update(globals().keys())
52 |         import __builtin__
53 |         keys.update(dir(__builtin__))
54 |         keys = list(keys)
55 |         keys.sort()
56 |         if s:
57 |             return [k for k in keys if k.startswith(s)]
58 |         else:
59 |             return keys
60 | 
61 |     sym = None
62 |     for i in range(1, len(dots)):
63 |         s = ".".join(dots[:i])
64 |         try:
65 |             sym = eval(s, globals(), locald)
66 |         except NameError:
67 |             try:
68 |                 sym = __import__(s, globals(), locald, [])
69 |             except ImportError:
70 |                 return []
71 |     if sym is not None:
72 |         s = dots[-1]
73 |         return [k for k in dir(sym) if k.startswith(s)]
74 | 
75 | def pycomplete(s, imports=None):
76 |     completions = get_all_completions(s, imports)
77 |     dots = s.split(".")
78 |     return os.path.commonprefix([k[len(dots[-1]):] for k in completions])
79 | 
80 | if __name__ == "__main__":
81 |     print "<empty> ->", pycomplete("")
82 |     print "sys.get ->", pycomplete("sys.get")
83 |     print "sy ->", pycomplete("sy")
84 |     print "sy (sys in context) ->", pycomplete("sy", imports=["import sys"])
85 |     print "foo. ->", pycomplete("foo.")
86 |     print "Enc (email * imported) ->",
87 |     print pycomplete("Enc", imports=["from email import *"])
88 |     print "E (email * imported) ->",
89 |     print pycomplete("E", imports=["from email import *"])
90 | 
91 |     print "Enc ->", pycomplete("Enc")
92 |     print "E ->", pycomplete("E")
93 | 
94 | # Local Variables :
95 | # pymacs-auto-reload : t
96 | # End :
97 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/Makefile:
--------------------------------------------------------------------------------
 1 | # NLTK: Documentation Makefile
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | DATADIR =  ../../../nltk_data
 9 | PUBLISH = $(DATADIR)/packages/grammars
10 | 
11 | PACKAGE_DIRS = book_grammars sample_grammars #basque_grammars spanish_grammars
12 | PACKAGES := $(addsuffix .zip, $(PACKAGE_DIRS))
13 | 
14 | ZIP = zip
15 | 
16 | define remove
17 |   $(if $(wildcard $1), rm $1,)
18 | endef
19 | 
20 | all: publish
21 | 
22 | ci:
23 | 	git ci -m "updated grammar files" 
24 | 
25 | zip: clean $(PACKAGES)
26 | 
27 | 
28 | clean:	
29 | 	$(call remove, *.zip)
30 | 
31 | %.zip: %
32 | 	$(ZIP) -r $< $<
33 | 	git add *zip
34 | 
35 | publish: zip
36 | 	cp $(PACKAGES) $(PUBLISH)
37 | 	$(MAKE) -C $(DATADIR) grammars
38 | 	$(MAKE) -C $(DATADIR) pkg_index
39 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque1.cfg:
--------------------------------------------------------------------------------
 1 | 	P -> IS	AS
 2 | 	AS -> IS ADI
 3 | 	AS -> ADI
 4 | 	IS -> IM erl_atz
 5 | 	IM -> ize_arr
 6 | 	IM -> ize_izb
 7 | 	ADI -> adt
 8 | 	erl_atz -> "k" | "a"
 9 | 	ize_arr -> "ardo" | "egunkari" | "baloi"
10 | 	ize_izb -> "Pintxo" | "Kepa" 
11 | 	adt -> "dakar" | "darama"
12 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque1.fcfg:
--------------------------------------------------------------------------------
 1 | % start AS
 2 | # ############################
 3 | # Grammar Rules
 4 | # ############################
 5 | # AS expansion rules
 6 | AS[ergnum=?n1, absnum=?n2] -> IS[kas=erg, num=?n1] AS[ergnum=?n1, absnum=?n2]
 7 | AS[ergnum=?n1, absnum=?n2] -> AS[ergnum=?n1, absnum=?n2] IS[kas=erg, num=?n1] 
 8 | AS[ergnum=?n1, absnum=?n2] -> IS[kas=abs, num=?n2] AS[ergnum=?n1, absnum=?n2]
 9 | AS[ergnum=?n1, absnum=?n2] -> AS[ergnum=?n1, absnum=?n2] IS[kas=abs, num=?n2] 
10 | IS[kas=?k, num=?n] -> ize[azp=arr] knmdek[kas=?k, num=?n]
11 | AS[ergnum=?n1, absnum=?n2] -> adt[ergnum=?n1, absnum=?n2]
12 | # ############################
13 | # Lexicon
14 | # ############################
15 | adt[ergnum=hu, absnum=hu] -> 'dakar' | 'darama'
16 | adt[ergnum=hk, absnum=hu] -> 'dakarte' | 'daramate'
17 | knmdek[kas=erg, num=hu] -> 'ak'
18 | knmdek[kas=erg, num=hk] -> 'ek'
19 | knmdek[kas=abs, num=hk] -> 'ak'
20 | knmdek[kas=abs, num=hu] -> 'a'
21 | ize[azp=arr] -> 'zakur' | 'gizon'
22 | 
23 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque1.pcfg:
--------------------------------------------------------------------------------
 1 | 	as -> mendekoa as [0.15]
 2 | 	as -> adlg mendekoa as [0.31]
 3 | 	as -> adlg adlg mendekoa as [0.08]
 4 | 	as -> adi adl [0.46]
 5 | 	mendekoa -> adlg mendekoa [0.37]
 6 | 	mendekoa -> adlg adlg mendekoa [0.09]
 7 | 	mendekoa -> 'joatea' [0.18]
 8 | 	mendekoa -> 'joateko' [0.27]
 9 | 	mendekoa -> 'sartzera' [0.09]
10 | 	adi -> 'esan' [0.5]
11 | 	adi -> 'debekatzen' [0.33]
12 | 	adi -> 'eraman' [0.17]
13 | 	adl -> 'zuen' [0.17]
14 | 	adl -> 'zioten' [0.83]
15 | 	adlg -> 'bozgorailuarekin' [0.28]
16 | 	adlg -> 'euskal_presoekin' [0.18]
17 | 	adlg -> 'epaitegian' [0.09]
18 | 	adlg -> 'mendira' [0.18]
19 | 	adlg -> 'ejertzitoan' [0.09]
20 | 	adlg -> 'derrigorrean' [0.09]
21 | 	adlg -> 'lagunekin' [0.09]
22 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque1.regexp:
--------------------------------------------------------------------------------
1 | NP: {<IZE.*>+<ADJ.*>*<DET.*>*} """ # adjetibo edo determinatzaileei loturiko izenak nahiz izen segidak topatzen ditu
2 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque2.cfg:
--------------------------------------------------------------------------------
 1 | 	S -> is as
 2 | 	is -> ize adj | ior
 3 | 	ize -> 'gaizkile' | 'epaile' | 'bizilagun'
 4 | 	adj -> 'gaiztoek' | 'gaiztoak' | 'kanpotarrak' | 'kanpotarrek' | 'berriak' | 'berriek'
 5 | 	ior -> 'haiek' | 'hark'
 6 | 	as -> mendekoa as | adlg mendekoa as | adlg adlg mendekoa as | adi adl
 7 | 	mendekoa -> adlg mendekoa | adlg adlg mendekoa | 'joatea' | 'joateko' | 'sartzera'
 8 | 	adi -> 'esan' | 'debekatzen' | 'eraman'
 9 | 	adl -> 'zuen' |'zioten' 
10 | 	adlg -> 'bozgorailuarekin' | 'euskal_presoekin' | 'epaitegian' | 'mendira' | 'ejertzitoan' | 'derrigorrean' | 'lagunekin'
11 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque2.fcfg:
--------------------------------------------------------------------------------
 1 | % start S
 2 | # ############################
 3 | # Grammar Rules
 4 | # ############################
 5 | S -> IS[kas=erg] AS/IS
 6 | # IS erregelak
 7 | IS[kas=?k, num=?n] -> ize[azp=arr] knmdek[kas=?k, num=?n]
 8 | IS[kas=?k, num=?n] -> ize[azp=ber] knmdek[kas=?k, num=?n]
 9 | IS[kas=?k, num=?n]/IS ->
10 | # AS erregelak
11 | AS[ergnum=?n1, absnum=?n2]/?x -> IS[kas=abs, num=?n1]/?x AS[ergnum=?n1, absnum=?n2]
12 | AS[ergnum=?n1, absnum=?n2] -> adi adl[ergnum=?n1, absnum=?n2]
13 | # ############################
14 | # Lexicon
15 | # ############################
16 | knmdek[kas=erg, num=hu] -> 'ak'
17 | knmdek[kas=erg, num=hk] -> 'ek'
18 | knmdek[kas=abs, num=hk] -> 'ak'
19 | knmdek[kas=abs, num=hu] -> 'a'
20 | ize[azp=arr] -> 'bizilagun' | 'aita' | 'gizon' | 'emakume'
21 | ize[azp=ber] -> 'Kepa' | 'Ainara'
22 | adi -> 'ekarri' | 'eraman' | 'puskatu' | 'lapurtu'
23 | adl[ergnum=hu, absnum=hu]  -> 'du'      | 'zuen'
24 | adl[ergnum=hk, absnum=hu]  -> 'dute'    | 'zuten'
25 | adl[ergnum=hu, absnum=hk]  -> 'ditu'    | 'zituen'
26 | adl[ergnum=hk, absnum=hk]  -> 'dituzte' | 'zituzten'
27 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque2.pcfg:
--------------------------------------------------------------------------------
1 | 	IS -> IZE_ARR [0.5] | IZE_ARR ADJ [0.3] | IS LOT IS [0.2]
2 | 	IZE_ARR -> 'gizon' [0.1] | 'emakume' [0.2] | 'ume' [0.3] | IZE_ARR LOT IZE_ARR [0.4]
3 | 	ADJ -> 'zaharrak' [0.4] | 'gazteak' [0.6] 
4 | 	LOT -> 'eta' [0.9] | 'edo' [0.1]
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque2.regexp:
--------------------------------------------------------------------------------
1 | 	NP: {<IZE.*><ADJ.*>*<DET.*>*} # adjetibo edo determinatzaileei loturiko izenak topatzen ditu
2 | 	NP: {<IZE.*>+} # izen segidak topatzen ditu
3 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque3.cfg:
--------------------------------------------------------------------------------
1 | 	IS -> IZE_ARR | IZE_ARR ADJ | IS LOT IS
2 | 	IZE_ARR -> 'gizon' | 'emakume' | 'ume' | IZE_ARR LOT IZE_ARR
3 | 	ADJ -> 'zaharrak' | 'gazteak' 
4 | 	LOT -> 'eta' | 'edo'
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque3.fcfg:
--------------------------------------------------------------------------------
 1 | % start S
 2 | # ############################
 3 | # Grammar Rules
 4 | # ############################
 5 | 
 6 | ## NORK-NOR Kasuak
 7 | 
 8 | S -> IS[kas=erg] AS/IS
 9 | # IS erregelak
10 | IS[kas=?k, num=?n] -> ize[azp=arr] knmdek[kas=?k, num=?n]
11 | IS[kas=?k, num=?n] -> ize[azp=ber] knmdek[kas=?k, num=?n]
12 | 
13 | IS[kas=?k, num=?n]/IS ->
14 | 
15 | # AS erregelak
16 | AS[ergnum=?n1, absnum=?n2]/?x -> IS[kas=abs, num=?n1]/?x AS[ergnum=?n1, absnum=?n2]
17 | AS[ergnum=?n1, absnum=?n2] -> adi adl[ergnum=?n1, absnum=?n2]
18 | # ############################
19 | # Lexicon
20 | # ############################
21 | 
22 | knmdek[kas=erg, num=hu] -> 'ak'
23 | knmdek[kas=erg, num=hk] -> 'ek'
24 | 
25 | knmdek[kas=abs, num=hk] -> 'ak'
26 | knmdek[kas=abs, num=hu] -> 'a'
27 | 
28 | ize[azp=arr] -> 'bizilagun' | 'aita' | 'gizon' | 'emakume'
29 | ize[azp=ber] -> 'Kepa' | 'Ainara'
30 | 
31 | adi -> 'ekarri' | 'eraman' | 'puskatu' | 'lapurtu'
32 | 
33 | adl[ergnum=hu, absnum=hu]  -> 'du'      | 'zuen'
34 | adl[ergnum=hk, absnum=hu]  -> 'dute'    | 'zuten'
35 | adl[ergnum=hu, absnum=hk]  -> 'ditu'    | 'zituen'
36 | adl[ergnum=hk, absnum=hk]  -> 'dituzte' | 'zituzten'
37 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque3.regexp:
--------------------------------------------------------------------------------
1 | IS:
2 |      {<.*>+}           # Edozer Onartzen Duen Chunkerra
3 |      }<ADI.*|ADL.*|ADT.*|PUNT.*|POST.*|LOT.*|ADB.*>+{      # Chink Bezala Barneratu Aditzak (ADI.*, ADT.* eta ADL.*), Adberbioak (ADB.*), Preposizioak (POST.*), Loturak (LOT.*) Eta Puntuazio Ikurrak (PUNT.*)
4 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque4.regexp:
--------------------------------------------------------------------------------
1 |   IS: {(<ADJ.*>*<DET.*>*<IZE.*>+<ADJ.*>*<DET.*>*)*} #noun phrase chunks
2 |   AS: {(<ADI.*|ADL.*|ADT.*>*)+} # verb phrase chunks
3 |   PS: {<POST.*>+} # prepositional phrase chunks
4 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/basque_grammars/basque5.regexp:
--------------------------------------------------------------------------------
1 |   IS: {(<ADJ.*>*<DET.*>*<IZE.*>*<ADJ.*>*<DET.*>*)*} #noun phrase chunks
2 |   AS: {(<ADI.*|ADL.*|ADT.*>+)+<POST.*>*}            # verb phrase chunks
3 |   PS: {<POST.*>+} 				    # prepositional phrase chunks
4 |   S:  {<IS><AS>}
5 |       {<AS><IS>}				    # Chunk NP, VP
6 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/background.fol:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: background1.fol
 2 | ##
 3 | ## Illustration of simple knowledge base for use with inference tools.
 4 | ## To accompany sem4.fcfg
 5 | ##
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | all x. (boxerdog(x) -> dog(x))
11 | all x. (boxer(x) -> person(x))
12 | 
13 | all x. (-(dog(x) & person(x)))
14 | 
15 | all x. (married(x) <-> exists y. marry(x,y)) 
16 | all x. (bark(x) -> dog(x))
17 | 
18 | all x. all y. (marry(x,y) -> (person(x) & person(y)))
19 | 
20 | (-(Vincent = Mia))
21 | (-(Vincent = Fido))
22 | (-(Mia = Fido))
23 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/feat0.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: feat0.fcfg
 2 | ##
 3 | ## First example of a feature-based grammar for English, illustrating
 4 | ## value-sharing of NUM and TENSE features.
 5 | ## Used in Feature-Based Grammars chapter.
 6 | ## 
 7 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 8 | ## URL: <http://nltk.sourceforge.net>
 9 | ## For license information, see LICENSE.TXT
10 | 
11 | % start S
12 | # ###################
13 | # Grammar Productions
14 | # ###################
15 | 
16 | # S expansion productions
17 | S -> NP[NUM=?n] VP[NUM=?n]
18 | 
19 | # NP expansion productions
20 | NP[NUM=?n] -> N[NUM=?n] 
21 | NP[NUM=?n] -> PropN[NUM=?n] 
22 | NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
23 | NP[NUM=pl] -> N[NUM=pl] 
24 | 
25 | # VP expansion productions
26 | VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
27 | VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
28 | 
29 | # ###################
30 | # Lexical Productions
31 | # ###################
32 | 
33 | Det[NUM=sg] -> 'this' | 'every'
34 | Det[NUM=pl] -> 'these' | 'all'
35 | Det -> 'the' | 'some' | 'several'
36 | 
37 | PropN[NUM=sg]-> 'Kim' | 'Jody'
38 | 
39 | N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
40 | N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' 
41 | 
42 | IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
43 | TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
44 | 
45 | IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
46 | TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
47 | 
48 | IV[TENSE=past] -> 'disappeared' | 'walked'
49 | TV[TENSE=past] -> 'saw' | 'liked'
50 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/feat1.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: feat1.fcfg
 2 | ##
 3 | ## Second example of a feature-based grammar, illustrating
 4 | ## SUBCAT and slash features. Also introduces SBar and embedded
 5 | ## clauses.
 6 | ## Used in Feature-Based Grammars chapter.
 7 | ## 
 8 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 9 | ## URL: <http://nltk.sourceforge.net>
10 | ## For license information, see LICENSE.TXT
11 | 
12 | % start S
13 | # ###################
14 | # Grammar Productions
15 | # ###################
16 | 
17 | S[-INV] -> NP VP
18 | S[-INV]/?x -> NP VP/?x
19 | 
20 | S[-INV] -> NP S/NP
21 | S[-INV] -> Adv[+NEG] S[+INV]
22 | 
23 | S[+INV] -> V[+AUX] NP VP
24 | S[+INV]/?x -> V[+AUX] NP VP/?x
25 | 
26 | SBar -> Comp S[-INV]
27 | SBar/?x -> Comp S[-INV]/?x
28 | 
29 | VP -> V[SUBCAT=intrans, -AUX]
30 | 
31 | VP -> V[SUBCAT=trans, -AUX] NP
32 | VP/?x -> V[SUBCAT=trans, -AUX] NP/?x
33 | 
34 | VP -> V[SUBCAT=clause, -AUX] SBar
35 | VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x
36 | 
37 | VP -> V[+AUX] VP
38 | VP/?x -> V[+AUX] VP/?x
39 | 
40 | # ###################
41 | # Lexical Productions
42 | # ###################
43 | V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing'
44 | V[SUBCAT=trans, -AUX] -> 'see' | 'like'
45 | V[SUBCAT=clause, -AUX] -> 'say' | 'claim'
46 | V[+AUX] -> 'do' | 'can'
47 | 
48 | NP[-WH] -> 'you' | 'cats'
49 | NP[+WH] -> 'who'
50 | 
51 | Adv[+NEG] -> 'rarely' | 'never'
52 | 
53 | NP/NP ->
54 | 
55 | Comp -> 'that'
56 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/german.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: german.fcfg
 2 | ##
 3 | ## Example of a feature-based grammar for German, illustrating
 4 | ## CASE and AGR features (PER, GND, NUM) working as a bundle.
 5 | ## Used in Feature-Based Grammars chapter.
 6 | ## 
 7 | ## Author: Michaela Atterer <atterer@ims.uni-stuttgart.de> 
 8 | ##         Ewan Klein <ewan@inf.ed.ac.uk> 
 9 | ##
10 | ## Plural transitive verbs productions by Jordan Boyd-Graber (ezubaric at users.sourceforge.net)
11 | 
12 | % start S
13 | #####################
14 | # Grammar Productions
15 | #####################
16 | S -> NP[CASE=nom, AGR=?a] VP[AGR=?a]
17 | 
18 | NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a]
19 | NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a]
20 | 
21 | VP[AGR=?a] -> IV[AGR=?a]
22 | VP[AGR=?a] -> TV[OBJCASE=?c, AGR=?a] NP[CASE=?c]
23 | 
24 | #####################
25 | # Lexical Productions
26 | #####################
27 | # Singular determiners
28 | 
29 | # masc
30 | Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der' 
31 | Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem'
32 | Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den'
33 | 
34 | # fem
35 | Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' 
36 | Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der'
37 | Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' 
38 | 
39 | # Plural determiners
40 | Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die' 
41 | Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den' 
42 | Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die' 
43 | 
44 | # Nouns
45 | N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund'
46 | N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
47 | N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden'
48 | N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
49 | 
50 | N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze'
51 | N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen'
52 | 
53 | # Pronouns
54 | PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich'
55 | PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich'
56 | PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir'
57 | PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du'
58 | PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es'
59 | PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir'
60 | PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns'
61 | PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns'
62 | PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr'
63 | PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie'
64 | 
65 | # Verbs
66 | IV[AGR=[NUM=sg,PER=1]] -> 'komme'
67 | IV[AGR=[NUM=sg,PER=2]] -> 'kommst'
68 | IV[AGR=[NUM=sg,PER=3]] -> 'kommt'
69 | IV[AGR=[NUM=pl, PER=1]] -> 'kommen'
70 | IV[AGR=[NUM=pl, PER=2]] -> 'kommt'
71 | IV[AGR=[NUM=pl, PER=3]] -> 'kommen'
72 | 
73 | TV[OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag'
74 | TV[OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst'
75 | TV[OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag'
76 | TV[OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe'
77 | TV[OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst'
78 | TV[OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft'
79 | TV[OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen'
80 | TV[OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt'
81 | TV[OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen'
82 | TV[OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen'
83 | TV[OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft'
84 | TV[OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen'
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/simple-sem.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem3.fcfg
 2 | ##
 3 | ## Alternative simple grammar with transitive verbs and 
 4 | ## quantifiers for the book. 
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | 
11 | % start S
12 | ############################
13 | # Grammar Rules
14 | #############################
15 | 
16 | S[SEM = <?subj(?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
17 | 
18 | NP[NUM=?n,SEM=<?det(?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
19 | NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
20 | 
21 | Nom[NUM=?n,SEM=?nom] -> N[NUM=?n,SEM=?nom]
22 | 
23 | VP[NUM=?n,SEM=?v] -> IV[NUM=?n,SEM=?v]
24 | VP[NUM=?n,SEM=<?v(?obj)>] -> TV[NUM=?n,SEM=?v] NP[SEM=?obj]
25 | VP[NUM=?n,SEM=<?v(?obj,?pp)>] -> DTV[NUM=?n,SEM=?v] NP[SEM=?obj] PP[+TO,SEM=?pp]
26 | 
27 | PP[+TO, SEM=?np] -> P[+TO] NP[SEM=?np]
28 | 
29 | #############################
30 | # Lexical Rules
31 | #############################
32 | 
33 | PropN[-LOC,NUM=sg,SEM=<\P.P(angus)>] -> 'Angus'
34 | PropN[-LOC,NUM=sg,SEM=<\P.P(cyril)>] -> 'Cyril'
35 | PropN[-LOC,NUM=sg,SEM=<\P.P(irene)>] -> 'Irene'
36 | 
37 | Det[NUM=sg,SEM=<\P Q.all x.(P(x) -> Q(x))>] -> 'every'
38 | Det[NUM=pl,SEM=<\P Q.all x.(P(x) -> Q(x))>] -> 'all'
39 | Det[SEM=<\P Q.exists x.(P(x) & Q(x))>] -> 'some'
40 | Det[NUM=sg,SEM=<\P Q.exists x.(P(x) & Q(x))>] -> 'a'
41 | Det[NUM=sg,SEM=<\P Q.exists x.(P(x) & Q(x))>] -> 'an'
42 | 
43 | N[NUM=sg,SEM=<\x.man(x)>] -> 'man'
44 | N[NUM=sg,SEM=<\x.girl(x)>] -> 'girl'
45 | N[NUM=sg,SEM=<\x.boy(x)>] -> 'boy'
46 | N[NUM=sg,SEM=<\x.bone(x)>] -> 'bone'
47 | N[NUM=sg,SEM=<\x.ankle(x)>] -> 'ankle'
48 | N[NUM=sg,SEM=<\x.dog(x)>] -> 'dog'
49 | N[NUM=pl,SEM=<\x.dog(x)>] -> 'dogs'
50 | 
51 | IV[NUM=sg,SEM=<\x.bark(x)>,TNS=pres] -> 'barks'
52 | IV[NUM=pl,SEM=<\x.bark(x)>,TNS=pres] -> 'bark'
53 | IV[NUM=sg,SEM=<\x.walk(x)>,TNS=pres] -> 'walks'
54 | IV[NUM=pl,SEM=<\x.walk(x)>,TNS=pres] -> 'walk'
55 | TV[NUM=sg,SEM=<\X x.X(\y.chase(x,y))>,TNS=pres] -> 'chases'
56 | TV[NUM=pl,SEM=<\X x.X(\y.chase(x,y))>,TNS=pres] -> 'chase'
57 | TV[NUM=sg,SEM=<\X x.X(\y.see(x,y))>,TNS=pres] -> 'sees'
58 | TV[NUM=pl,SEM=<\X x.X(\y.see(x,y))>,TNS=pres] -> 'see'
59 | TV[NUM=sg,SEM=<\X x.X(\y.bite(x,y))>,TNS=pres] -> 'bites'
60 | TV[NUM=pl,SEM=<\X x.X(\y.bite(x,y))>,TNS=pres] -> 'bite'
61 | DTV[NUM=sg,SEM=<\Y X x.X(\z.Y(\y.give(x,y,z)))>,TNS=pres] -> 'gives'
62 | DTV[NUM=pl,SEM=<\Y X x.X(\z.Y(\y.give(x,y,z)))>,TNS=pres] -> 'give'
63 | 
64 | P[+to] -> 'to'
65 | 
66 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/sql0.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sql.fcfg
 2 | ##
 3 | ## Deliberately naive string-based grammar for 
 4 | ## deriving SQL queries from English
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
13 | 
14 | VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
15 | VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
16 | NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
17 | PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
18 | AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
19 | 
20 | NP[SEM='Country="greece"'] -> 'Greece'
21 | NP[SEM='Country="china"'] -> 'China'
22 | 
23 | Det[SEM='SELECT'] -> 'Which' | 'What'
24 | 
25 | N[SEM='City FROM city_table'] -> 'cities'
26 | 
27 | IV[SEM=''] -> 'are'
28 | A -> 'located'
29 | P[SEM=''] -> 'in'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/sql1.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sql.fcfg
 2 | ##
 3 | ## Deliberately naive string-based grammar for 
 4 | ## deriving SQL queries from English
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
13 | 
14 | VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
15 | VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
16 | VP[SEM=(?v + ?np)] -> TV[SEM=?v] NP[SEM=?np]
17 | VP[SEM=(?vp1 + ?c + ?vp2)] -> VP[SEM=?vp1] Conj[SEM=?c] VP[SEM=?vp2]
18 | 
19 | NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
20 | NP[SEM=(?n + ?pp)]  -> N[SEM=?n] PP[SEM=?pp]
21 | NP[SEM=?n]  -> N[SEM=?n]  | CardN[SEM=?n] 
22 | 
23 | ## NB Numbers in the Chat-80 database represent thousands.
24 | CardN[SEM='1000'] -> '1,000,000' 
25 | 
26 | PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
27 | AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
28 | 
29 | NP[SEM='Country="greece"'] -> 'Greece'
30 | NP[SEM='Country="china"'] -> 'China'
31 | 
32 | Det[SEM='SELECT'] -> 'Which' | 'What'
33 | Conj[SEM='AND'] -> 'and'
34 | 
35 | N[SEM='City FROM city_table'] -> 'cities'
36 | N[SEM='Population'] -> 'populations'
37 | 
38 | IV[SEM=''] -> 'are'
39 | TV[SEM=''] -> 'have'
40 | A -> 'located'
41 | P[SEM=''] -> 'in'
42 | P[SEM='>'] -> 'above'
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/book_grammars/storage.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: storage.fcfg
 2 | ##
 3 | ## Feature-based grammar that implements Cooper storage by dividing the
 4 | ## semantics for each phrase into two pieces: the core semantics
 5 | ## ('SEM','CORE') and a sequence of binding operators ('SEM','STORE').
 6 | ## Each binding operator is encoded as a logic term <bo(quant, var)>,
 7 | ## where <quant> is a quantifier expression and the individual variable
 8 | ## <@var> specifies the 'address' of the quantifier in the core
 9 | ## semantics.  and <predicate> is a predicate describing that variable.
10 | 
11 | ## In order for this grammar to generate the correct results, all
12 | ## variables of the form <@var> must be instantiated (i.e., replaced
13 | ## by unique new variables) whenever they are used.  This can be
14 | ## accomplished by using the InstantiateVarsChart class when parsing.
15 | ## 
16 | ## Author: Edward Loper <edloper@gradient.cis.upenn.edu>,
17 | ##         Ewan Klein <ewan@inf.ed.ac.uk>
18 | ##         Robin Cooper <robin.cooper@ling.gu.se>
19 | ## URL: <http://nltk.sourceforge.net>
20 | ## For license information, see LICENSE.TXT
21 | 
22 | %start S
23 | 
24 | S[SEM=[CORE=<?vp(?subj)>, STORE=(?b1+?b2)]] -> NP[SEM=[CORE=?subj, STORE=?b1]] VP[SEM=[CORE=?vp, STORE=?b2]]
25 | 
26 | VP[SEM=?s] -> IV[SEM=?s]
27 | VP[SEM=[CORE=<?v(?obj)>, STORE=(?b1+?b2)]] -> TV[SEM=[CORE=?v, STORE=?b1]] NP[SEM=[CORE=?obj, STORE=?b2]]
28 | VP[SEM=[CORE=<?v(?pp)(?obj)>, STORE=(?b1+?b2+?b3)]] -> DTV[SEM=[CORE=?v, STORE=?b1]] NP[SEM=[CORE=?obj, STORE=?b2]] PP[+TO, SEM=[CORE=?pp, STORE=?b3]]  
29 | 
30 | NP[SEM=[CORE=<@x>, STORE=((<bo(?det(?n),@x)>)+?b1+?b2)]] -> Det[SEM=[CORE=?det, STORE=?b1]] N[SEM=[CORE=?n, STORE=?b2]]
31 | 
32 | PP[+TO, SEM=[CORE=?np, STORE=?b1]] -> P NP[SEM=[CORE=?np, STORE=?b1]]
33 | 
34 | # Lexical items:
35 | Det[SEM=[CORE=<\Q P.exists x.(Q(x) & P(x))>, STORE=(/)]] -> 'a'
36 | Det[SEM=[CORE=<\Q P.all x.(Q(x) implies P(x))>, STORE=(/)]] -> 'every'
37 | 
38 | N[SEM=[CORE=<dog>, STORE=(/)]] -> 'dog' 
39 | N[SEM=[CORE=<bone>, STORE=(/)]] -> 'bone' 
40 | N[SEM=[CORE=<girl>, STORE=(/)]] -> 'girl' 
41 | N[SEM=[CORE=<man>, STORE=(/)]] -> 'man'
42 | 
43 | IV[SEM=[CORE=<\x.smile(x)>, STORE=(/)]] -> 'smiles' 
44 | IV[SEM=[CORE=<\x.walk(x)>, STORE=(/)]] -> 'walks'
45 | 
46 | TV[SEM=[CORE=<\y x.feed(x,y)>, STORE=(/)]] -> 'feeds' 
47 | TV[SEM=[CORE=<\y x.chase(x,y)>, STORE=(/)]] -> 'chases'
48 | 
49 | DTV[SEM=[CORE=<\z y x.give(x,y,z)>, STORE=(/)]] -> 'gives' 
50 | 
51 | NP[SEM=[CORE=<@x>, STORE=(<bo(\P.P(angus),@x)>)]] -> 'Angus' 
52 | NP[SEM=[CORE=<@x>, STORE=(<bo(\P.P(cyril),@x)>)]] -> 'Cyril'
53 | 
54 | P[+TO] -> 'to'
55 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/background0.fol:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: background0.fol
 2 | ##
 3 | ## Illustration of simple knowledge base for use with inference tools.
 4 | ## To accompany sem4.fcfg
 5 | ##
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | all x. (boxerdog(x) -> dog(x))
11 | all x. (boxer(x) -> person(x))
12 | 
13 | all x. (-(dog(x) & person(x)))
14 | 
15 | some x. boxer(x)
16 | some x. boxerdog(x)
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/bindop.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem0.fcfg
 2 | ##
 3 | ## Feature-based grammar that divides the semantics for each element
 4 | ## into two pieces: the core semantics, with path ('SEM','CORE'), and a set of
 5 | ## binding operators, with path ('SEM','BO').  Each binding operator is encoded
 6 | ## as a lambda-calculus expression <bo(expr, @var)>, specifying
 7 | ## that <@var> is an individual variable that should be instantiated,
 8 | ## and <expr> is an expression that can bind that variable.
 9 | ##
10 | ## In order for this grammar to generate the correct results, all
11 | ## variables of the form <@var> must be instantiated (i.e., replaced
12 | ## by unique new variables) whenever they are used.  This can be
13 | ## accomplished by using the InstantiateVarsChart class when parsing.
14 | ## 
15 | ## Author: Edward Loper <edloper@gradient.cis.upenn.edu>,
16 | ##         Ewan Klein <ewan@inf.ed.ac.uk> 
17 | ## URL: <http://nltk.sourceforge.net>
18 | ## For license information, see LICENSE.TXT
19 | 
20 | %start S
21 | ## Grammar summary:
22 | ##   S -> NP VP
23 | ##   VP -> TV NP | IV
24 | ##   NP -> Det N | proper nouns...
25 | ##   TV -> transitive verbs...
26 | ##   IV -> intransitive verbs...
27 | ##   Det -> determiners...
28 | 
29 | S[SEM=[CORE=<?vp(?subj)>, BO={?b1+?b2}]] -> NP[SEM=[CORE=?subj, BO=?b1]] VP[SEM=[CORE=?vp, BO=?b2]]
30 | 
31 | VP[SEM=[CORE=<?v(?obj)>, BO={?b1+?b2}]] -> TV[SEM=[CORE=?v, BO=?b1]] NP[SEM=[CORE=?obj, BO=?b2]]
32 | 
33 | VP[SEM=?s] -> IV[SEM=?s]
34 | 
35 | NP[SEM=[CORE=<@x>, BO={{<bo(?det(?n), @x)>}+?b1+?b2}]] -> Det[SEM=[CORE=?det, BO=?b1]] N[SEM=[CORE=?n, BO=?b2]]
36 | 
37 | # Lexical items:
38 | Det[SEM=[CORE=<\Q P.exists x.(Q(x) & P(x))>, BO={/}]] -> 'a'
39 | N[SEM=[CORE=<dog>, BO={/}]] -> 'dog' | 'cat' | 'mouse' 
40 | IV[SEM=[CORE=<\x.bark(x)>, BO={/}]] -> 'barks' | 'eats' | 'walks'
41 | TV[SEM=[CORE=<\x y.feed(y,x)>, BO={/}]] -> 'feeds' | 'walks'
42 | NP[SEM=[CORE=<@x>, BO={<bo(\P. P(John), @x)>}]] -> 'john' | 'alex'
43 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/dep_test2.dep:
--------------------------------------------------------------------------------
1 | 1	John	_	NNP	_	_	2	SUBJ	_	_
2 | 2	sees	_	VB	_	_	0	ROOT	_	_
3 | 3	a	_	DT	_	_	4	SPEC	_	_
4 | 4	dog	_	NN	_	_	2	OBJ	_	_
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/event.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: event.fcfg
 2 | ##
 3 | ## Illustrating Davidson-style event semantics
 4 | ## 
 5 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 6 | ## URL: <http://nltk.sourceforge.net>
 7 | ## For license information, see LICENSE.TXT
 8 | 
 9 | % start S
10 | ############################
11 | # Grammar Rules
12 | #############################
13 | 
14 | S[sem = <exists e.?subj(e,?vp)>] -> NP[num=?n,sem=?subj] VP[num=?n,sem=?vp]
15 | 
16 | NP[num=?n,sem=<?det(?nom)> ] -> Det[num=?n,sem=?det]  Nom[num=?n,sem=?nom]
17 | NP[loc=?l,num=?n,sem=?np] -> PropN[loc=?l,num=?n,sem=?np]
18 | 
19 | Nom[num=?n,sem=?nom] -> N[num=?n,sem=?nom]
20 | Nom[num=?n,sem=<?pp(?nom)>] -> N[num=?n,sem=?nom] PP[sem=?pp]
21 | 
22 | VP[num=?n,sem=?v] -> IV[num=?n,sem=?v]
23 | VP[num=?n,sem=<?v(?obj)>] -> TV[num=?n,sem=?v] NP[sem=?obj]
24 | VP[num=?n,sem=<?v(?obj,?pp)>] -> DTV[num=?n,sem=?v] NP[sem=?obj] PP[+to, sem=?pp]
25 | 
26 | 
27 | VP[num=?n,sem=<?pp(?vp)>] -> VP[num=?n,sem=?vp] PP[sem=?pp]
28 | VP[num=?n,sem=<?adv(?vp)>] -> VP[num=?n,sem=?vp] Adv[sem=?adv]
29 | 
30 | PP[sem=<?p(?np)>] -> P[loc=?l,sem=?p] NP[loc=?l,sem=?np]
31 | 
32 | #############################
33 | # Lexical Rules
34 | #############################
35 | 
36 | PropN[-loc,num=sg,sem=<\e R.R(e,angus)>] -> 'Angus'
37 | PropN[-loc,num=sg,sem=<\e R.R(e,pat)>] -> 'Pat'
38 | PropN[-loc,num=sg,sem=<\e R.R(e,irene)>] -> 'Irene'
39 | PropN[-loc,num=sg,sem=<\e R.R(e,cyril)>] -> 'Cyril'
40 | PropN[+loc, num=sg,sem=<\e R.R(e,stockbridge)>] -> 'Stockbridge'
41 | 
42 | NP[-loc, num=sg, sem=<\P.\x.P(x)>] -> 'who'  
43 | 
44 | Det[num=sg,sem=<\P R e.all x.(P(x) -> R(e,x))>] -> 'every'
45 | Det[num=pl,sem=<\P R e.all x.(P(x) -> R(e,x))>] -> 'all'
46 | Det[sem=<\P R e.exists x.(P(x) & R(e,x))>] -> 'some'
47 | Det[num=sg,sem=<\P R e.exists x.(P(x) & R(e,x))>] -> 'a'
48 | 
49 | N[num=sg,sem=<boy>] -> 'boy'
50 | N[num=pl,sem=<boy>] -> 'boys'
51 | N[num=sg,sem=<girl>] -> 'girl'
52 | N[num=pl,sem=<girl>] -> 'girls'
53 | N[num=sg,sem=<bone>] -> 'bone'
54 | N[num=sg,sem=<dog>] -> 'dog'
55 | 
56 | IV[num=sg,sem=<\e x.(bark(e) & agent(e,x))>,tns=pres] -> 'barks'
57 | IV[num=pl,sem=<\e x.(bark(e) & agent(e,x))>,tns=pres] -> 'bark'
58 | IV[num=sg,sem=<\e x.(walk(e) & agent(e,x))>,tns=pres] -> 'walks'
59 | IV[num=pl,sem=<\e x.( walk(e) & agent(e,x))>,tns=pres] -> 'walk'
60 | TV[num=sg,sem=<\X y.X(\e x.(chase(e) & agent(e,y) & patient(e,x)))>,tns=pres] -> 'chases'
61 | TV[num=pl,sem=<\X y.X(\e x.(chase(e) & agent(e,y) & patient(e,x)))>,tns=pres] -> 'chase'
62 | TV[num=sg,sem=<\X y.X(\e x.(see(e) & agent(e,y) & patient(e,x)))>,tns=pres] -> 'sees'
63 | TV[num=pl,sem=<\X y.X(\e x.(see(e) & agent(e,y) & patient(e,x)))>,tns=pres] -> 'see'
64 | DTV[num=sg,sem=<\Y X x.X(\z.Y(\e y.(give(e) & agent(e,x) & theme(e,y) & recip(e,z))))>,tns=pres] -> 'gives'
65 | DTV[num=pl,sem=<\Y X x.X(\z.Y(\e y.(give(e) & agent(e,x) & theme(e,y) & recip(e,z))))>,tns=pres] -> 'give'
66 | 
67 | P[+loc,sem=<\X P e.X(\y.(P(e) & in(e,y)))>] -> 'in'
68 | P[-loc,sem=<\X P e.X(\y.(P(e) & with(e,y)))>] -> 'with'
69 | P[+to,sem=<\X.X>] -> 'to'
70 | 
71 | Adv[sem=<\R e x.(slow(e) & R(e,x))>] -> 'slowly'
72 | Adv[sem=<\R e x.(thoughtful(e) & R(e,x))>] -> 'thoughtfully'
73 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/glue.semtype:
--------------------------------------------------------------------------------
 1 | ########################################################################
 2 | # Glue Semantics Formulas Using Event Representation
 3 | #
 4 | # Entries are made up of three parts, separated by colons (":")
 5 | # 
 6 | # 1) The semtype name.
 7 | #    - May appear multiple times with different relationship sets (3)
 8 | #    - May "extend" other semtypes: "type(parent)"
 9 | #  
10 | # 2) The glue formulas.
11 | #    - A comma-separated list of tuples representing glue formulas
12 | #    - If the entry is an extension, then the listed formulas will be added to  
13 | #      the list from the super type
14 | # 
15 | # 3) The relationship set (OPTIONAL)
16 | #    - If not specified, then assume the entry covers ALL relationship sets
17 | #    - If the entry is an extension, then the relationship set dictates which
18 | #      particular entry should be extended.  If no relationship set is 
19 | #      specified, then every entry of the parent type is extended.
20 | # 
21 | ########################################################################
22 | 
23 | #Quantifiers
24 | def_art : (\P Q.exists x.(P(x) & all y.(Q(y) <-> (x = y))), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
25 | ex_quant : (\P Q.exists x.(P(x) & Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
26 | univ_quant : (\P Q.all x.(P(x) -> Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
27 | no_quant : (\P Q.-exists x.(P(x) & Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
28 | 
29 | #Nouns
30 | NN : (\x.<word>(x), (v -o r)) : [spec]
31 | NN : (\P Q.exists x.(P(x) & Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.<word>(x), (v -o r)) : [] # treat a noun missing its spec as implicitly existentially quantified
32 | NNP : (\P Q.exists x.(P(x) & Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.<word>(x), (v -o r))
33 | NNS(NN)
34 | PRP : (\P Q.exists x.(P(x) & Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.PRO(x), (v -o r))
35 | 
36 | #Verbs
37 | VB : (\x.<word>(x), (subj -o f)) : [subj] #iv
38 | VB : (\x y.<word>(x,y), (subj -o (obj -o f))) : [subj, obj] #tv
39 | VB : (\y.exists x.<word>(x,y), (obj -o f)) : [obj] #incomplete tv
40 | VB : (\x y z.<word>(x,y,z), (subj -o (obj -o (theme -o f)))) : [subj, obj, theme] #dtv
41 | VB : (\y z.exists x.<word>(x,y,z), obj -o (theme -o f)) : [obj, theme] #incomplete dtv
42 | VB : (\x z.exists y.<word>(x,y,z), subj -o (theme -o f)) : [subj, theme] #incomplete dtv
43 | VB : (\z.exists x y.<word>(x,y,z), theme -o f) : [theme] #incomplete dtv
44 | VB : (\x y.<word>(x,y), (subj -o (comp -o f))) : [subj, comp] #tv_comp
45 | VB : (\x P.<word>(x,P), (subj -o ((xcomp.subj -o xcomp) -o f))) : [subj, xcomp] #equi
46 | VB : (\x y P.<word>(x,y,P), (subj -o (obj -o ((xcomp.subj -o xcomp) -o f)))) : [subj, obj, xcomp] # object equi
47 | VB : (\P.<word>(P), (xcomp -o f)) : [xcomp] #raising
48 | VBD(VB) : (\P.PAST(P), (f -o f))
49 | VBZ(VB)
50 | 
51 | #Modifiers
52 | nmod : (\Q P x.(P(x) & Q(x)), (f -o ((super.v -o super.r) -o (super.v -o super.r)))), (\x.<word>(x), f)
53 | JJ(nmod)
54 | vmod : (\P.<word>(P), (super.f -o super.f))
55 | RB(vmod)
56 | tense : (\P.<word>(P), (super.f -o super.f))
57 | 
58 | #Conjunctions
59 | cc_clause : (\P Q.(P & Q), (a -o (b -o f)))
60 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/glue_train.conll:
--------------------------------------------------------------------------------
 1 | 1	John	_	NNP	_	_	2	SUBJ	_	_
 2 | 2	runs	_	VB	_	_	0	ROOT	_	_
 3 | 
 4 | 1	a	_	DT	_	_	2	SPEC	_	_
 5 | 2	man	_	NN	_	_	3	SUBJ	_	_
 6 | 3	runs	_	VB	_	_	0	ROOT	_	_
 7 | 
 8 | 1	John	_	NNP	_	_	2	SUBJ	_	_
 9 | 2	sees	_	VB	_	_	0	ROOT	_	_
10 | 3	Mary	_	NNP	_	_	2	OBJ	_	_
11 | 
12 | 1	every	_	DT	_	_	2	SPEC	_	_
13 | 2	girl	_	NN	_	_	3	SUBJ	_	_
14 | 3	chases	_	VB	_	_	0	ROOT	_	_
15 | 4	an	_	DT	_	_	5	SPEC	_	_
16 | 5	animal	_	NN	_	_	3	OBJ	_	_
17 | 
18 | 1	Bill	_	NNP	_	_	2	SUBJ	_	_
19 | 2	sees	_	VB	_	_	0	ROOT	_	_
20 | 3	a	_	DT	_	_	4	SPEC	_	_
21 | 4	dog	_	NN	_	_	2	OBJ	_	_
22 | 
23 | 1	every	_	DT	_	_	2	SPEC	_	_
24 | 2	girl	_	NN	_	_	3	SUBJ	_	_
25 | 3	chases	_	VB	_	_	0	ROOT	_	_
26 | 4	John	_	NNP	_	_	3	OBJ	_	_
27 | 
28 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/hole.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: hole.fcfg
 2 | ##
 3 | ## Minimal feature-based grammar with lambda semantics for use by the hole.py
 4 | ## module for Hole Semantics (see Blackburn and Bos).
 5 | ## 
 6 | ## Author: Dan Garrette <DHGarrette@gmail.com> 
 7 | ##         Robin Cooper <robin.cooper@ling.gu.se>
 8 | ## URL: <http://www.nltk.org>
 9 | ## For license information, see LICENSE.TXT
10 | 
11 | % start S
12 | 
13 | S[SEM=<?subj(?vp)>] -> NP[SEM=?subj] VP[SEM=?vp]
14 | VP[SEM=?v] -> IV[SEM=?v]
15 | VP[NUM=?n,SEM=<?v(?obj)>] -> TV[NUM=?n,SEM=?v] NP[SEM=?obj]
16 | NP[SEM=<?det(?n)>] -> Det[SEM=?det] N[SEM=?n]
17 | 
18 | Det[SEM=<\P Q h l.exists h1 l1 l2 l3 x.(ALL(l2,x,l3) & IMP(l3,l1,h1) & LEQ(l,h1) & LEQ(l2,h) & P(x)(h)(l1) & Q(x)(h)(l) & HOLE(h) & HOLE(h1) & LABEL(l) & LABEL(l1) & LABEL(l2) & LABEL(l3))>] -> 'every'
19 | Det[SEM=<\P Q h l.exists h1 l1 l2 l3 x.(EXISTS(l2,x,l3) & AND(l3,l1,h1) & LEQ(l,h1) & LEQ(l2,h) & P(x)(h)(l1) & Q(x)(h)(l) & HOLE(h) & HOLE(h1) & LABEL(l) & LABEL(l1) & LABEL(l2) & LABEL(l3))>] -> 'a'
20 | N[SEM=<\x h l.(PRED(l,girl,x) & LEQ(l,h) & HOLE(h) & LABEL(l))>] -> 'girl'
21 | N[SEM=<\x h l.(PRED(l,dog,x) & LEQ(l,h) & HOLE(h) & LABEL(l))>] -> 'dog'
22 | IV[SEM=<\x h l.(PRED(l,bark,x) & LEQ(l,h) & HOLE(h) & LABEL(l))>] -> 'barks'
23 | TV[SEM=<\P x.P(\y h l.(PRED(l,chase,x,y) & LEQ(l,h) & HOLE(h) & LABEL(l)))>] -> 'chases'
24 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/np.fcfg:
--------------------------------------------------------------------------------
 1 | % start NP
 2 | NP[AGR=?a] -> Det[AGR=?a] N[AGR=?a]
 3 | Det[AGR=[NUM='sg', PER=3]] -> 'this' | 'that'
 4 | Det[AGR=[NUM='pl', PER=3]] -> 'these' | 'those'
 5 | Det[AGR=[NUM='pl', PER=1]] -> 'we'
 6 | Det[AGR=[PER=2]] -> 'you'
 7 | N[AGR=[NUM='sg', GND='m']] -> 'boy'
 8 | N[AGR=[NUM='pl', GND='m']] -> 'boys'
 9 | N[AGR=[NUM='sg', GND='f']] -> 'girl'
10 | N[AGR=[NUM='pl', GND='f']] -> 'girls'
11 | N[AGR=[NUM='sg']] -> 'student'
12 | N[AGR=[NUM='pl']] -> 'students'
13 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/sem0.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem0.fcfg
 2 | ##
 3 | ## Minimal feature-based grammar with lambda semantics.
 4 | ## 
 5 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 6 | ## URL: <http://nltk.sourceforge.net>
 7 | ## For license information, see LICENSE.TXT
 8 | 
 9 | % start S
10 | 
11 | S[SEM=<?vp(?subj)>] -> NP[SEM=?subj] VP[SEM=?vp]
12 | VP[SEM=?v] -> V[SEM=?v]
13 | NP[SEM=<cyril>] -> 'Cyril'
14 | V[SEM=<\x.bark(x)>] -> 'barks'
15 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/sem1.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem1.fcfg
 2 | ##
 3 | ## Minimal feature-based grammar to illustrate the interpretation of
 4 | ## determiner phrases.
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[SEM = <?subj(?vp)>] -> NP[SEM=?subj] VP[SEM=?vp]
13 | VP[SEM=?v] -> IV[SEM=?v]
14 | NP[SEM=<?det(?n)>] -> Det[SEM=?det] N[SEM=?n]
15 | 
16 | Det[SEM=<\Q P.exists x.(Q(x) & P(x))>] -> 'a'
17 | Det[SEM=<\Q P.all x.(Q(x) -> P(x))>] -> 'every'
18 | N[SEM=<\x.dog(x)>] -> 'dog'
19 | IV[SEM=<\x.bark(x)>] -> 'barks'
20 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/sem2.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem2.fcfg
 2 | ##
 3 | ## Longer feature-based grammar with more quantifers, and illustrating
 4 | ## transitive verbs and prepositional phrases (PPs). The
 5 | ## interpretation of PPs is a bit weird and could do with further
 6 | ## work.
 7 | ## 
 8 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 9 | ## URL: <http://nltk.sourceforge.net>
10 | ## For license information, see LICENSE.TXT
11 | 
12 | % start S
13 | ############################
14 | # Grammar Rules
15 | #############################
16 | 
17 | S[SEM = <?subj(?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
18 | 
19 | NP[NUM=?n,SEM=<?det(?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
20 | NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
21 | 
22 | Nom[NUM=?n,SEM=?nom] -> N[NUM=?n,SEM=?nom]
23 | Nom[NUM=?n,SEM=<?pp(?nom)>] -> N[NUM=?n,SEM=?nom] PP[SEM=?pp]
24 | 
25 | VP[NUM=?n,SEM=<?v(?obj)>] -> TV[NUM=?n,SEM=?v] NP[SEM=?obj]
26 | VP[NUM=?n,SEM=?v] -> IV[NUM=?n,SEM=?v]
27 | 
28 | VP[NUM=?n,SEM=<?pp(?vp)>] -> VP[NUM=?n,SEM=?vp] PP[SEM=?pp]
29 | 
30 | PP[SEM=<?p(?np)>] -> P[LOC=?l,SEM=?p] NP[LOC=?l,SEM=?np]
31 | 
32 | #############################
33 | # Lexical Rules
34 | #############################
35 | 
36 | PropN[-LOC,NUM=sg,SEM=<\P.P(john)>] -> 'John'
37 | PropN[-LOC,NUM=sg,SEM=<\P.P(mary)>] -> 'Mary'
38 | PropN[-LOC,NUM=sg,SEM=<\P.P(suzie)>] -> 'Suzie'
39 | PropN[-LOC,NUM=sg,SEM=<\P.P(fido)>] -> 'Fido'
40 | PropN[+LOC, NUM=sg,SEM=<\P.P(noosa)>] -> 'Noosa'
41 | 
42 | NP[-LOC, NUM=sg, SEM=<\P.\x.P(x)>] -> 'who'  
43 | 
44 | Det[NUM=sg,SEM=<\P Q.all x.(P(x) -> Q(x))>] -> 'every'
45 | Det[NUM=pl,SEM=<\P Q.all x.(P(x) -> Q(x))>] -> 'all'
46 | Det[SEM=<\P Q.exists x.(P(x) & Q(x))>] -> 'some'
47 | Det[NUM=sg,SEM=<\P Q.exists x.(P(x) & Q(x))>] -> 'a'
48 | 
49 | N[NUM=sg,SEM=<\x.boy(x)>] -> 'boy'
50 | N[NUM=pl,SEM=<\x.boy(x)>] -> 'boys'
51 | N[NUM=sg,SEM=<\x.girl(x)>] -> 'girl'
52 | N[NUM=pl,SEM=<\x.girl(x)>] -> 'girls'
53 | N[NUM=sg,SEM=<\x.dog(x)>] -> 'dog'
54 | N[NUM=pl,SEM=<\x.dog(x)>] -> 'dogs'
55 | 
56 | TV[NUM=sg,SEM=<\X y.X(\x.chase(y,x))>,TNS=pres] -> 'chases'
57 | TV[NUM=pl,SEM=<\X y.X(\x.chase(y,x))>,TNS=pres] -> 'chase'
58 | TV[NUM=sg,SEM=<\X y.X(\x.see(y,x))>,TNS=pres] -> 'sees'
59 | TV[NUM=pl,SEM=<\X y.X(\x.see(y,x))>,TNS=pres] -> 'see'
60 | TV[NUM=sg,SEM=<\X y.X(\x.chase(y,x))>,TNS=pres] -> 'chases'
61 | TV[NUM=pl,SEM=<\X y.X(\x.chase(y,x))>,TNS=pres] -> 'chase'
62 | IV[NUM=sg,SEM=<\x.bark(x)>,TNS=pres] -> 'barks'
63 | IV[NUM=pl,SEM=<\x.bark(x)>,TNS=pres] -> 'bark'
64 | IV[NUM=sg,SEM=<\x.walk(x)>,TNS=pres] -> 'walks'
65 | IV[NUM=pl,SEM=<\x.walk(x)>,TNS=pres] -> 'walk'
66 | 
67 | P[+LOC,SEM=<\X P x.X(\y.(P(x) & in(x,y)))>] -> 'in'
68 | P[-LOC,SEM=<\X P x.X(\y.(P(x) & with(x,y)))>] -> 'with'
69 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/sql.fcfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sql.fcfg
 2 | ##
 3 | ## Deliberately naive string-based grammar for 
 4 | ## deriving SQL queries from English
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[sem=(?np + ?vp)] -> NP[sem=?np] VP[sem=?vp]
13 | 
14 | VP[sem=(?v + ?pp)] -> IV[sem=?v] PP[sem=?pp]
15 | VP[sem=(?v + ?np)] -> TV[sem=?v] NP[sem=?np]
16 | 
17 | NP[sem=(?det + ?n)] -> Det[sem=?det] N[sem=?n]
18 | NP[sem='Country="japan"'] -> 'Japan'
19 | NP[sem='Country="united_states"'] -> 'USA'
20 | 
21 | Det[sem='SELECT'] -> 'Which'
22 | N[sem='City FROM city_table'] -> 'cities'
23 | 
24 | IV[sem='WHERE'] -> 'are'
25 | PP[sem=?np] -> P[sem=?p] NP[sem=?np]
26 | P -> 'in'
27 | 
28 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/toy.cfg:
--------------------------------------------------------------------------------
 1 | S -> NP VP
 2 | PP -> P NP
 3 | NP -> Det N | NP PP
 4 | VP -> V NP | VP PP
 5 | Det -> 'a' | 'the'
 6 | N -> 'dog' | 'cat'
 7 | V -> 'chased' | 'sat'
 8 | P -> 'on' | 'in'
 9 | 
10 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/sample_grammars/valuation1.val:
--------------------------------------------------------------------------------
 1 | john => b1
 2 | mary => g1
 3 | suzie => g2
 4 | fido => d1
 5 | tess => d2
 6 | noosa => n
 7 | girl => {g1, g2}
 8 | boy => {b1, b2}
 9 | dog => {d1, d2}
10 | bark => {d1, d2}
11 | walk => {b1, g2, d1}
12 | chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
13 | see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
14 | in => {(b1, n), (b2, n), (d2, n)}
15 | with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
16 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish1.cfg:
--------------------------------------------------------------------------------
 1 | 	S -> SN SV
 2 | 	SV -> v SN
 3 | 	SV -> v
 4 | 	SN -> det GN
 5 | 	GN -> nom_com
 6 | 	GN -> nom_prop
 7 | 	det -> "el" | "la" | "los" | "las" | "un" | "una" | "unos" | "unas"
 8 | 	nom_com -> "vecino" | "ladrones" | "mujeres" | "bosques" | "noche" | "flauta" | "ventana"
 9 | 	nom_prop -> "Jose" | "Lucas" | "Pedro" | "Marta" 
10 | 	v -> "toca" | "moja" | "adoran" | "robaron" | "escondieron" | "rompió"
11 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish1.fcfg:
--------------------------------------------------------------------------------
 1 | % start S
 2 | # ############################
 3 | # Grammar Rules
 4 | # ############################
 5 | S -> SN[num=?n,gen=?g] SV[num=?n,tiempo=?t]
 6 | SN[num=?n,gen=?g,+PROP] -> NP[num=?n]
 7 | SN[num=?n,gen=?g,-PROP] -> DET[num=?n,gen=?g] NC[num=?n,gen=?g]
 8 | SN[num=plural,gen=?g,-PROP] -> DET[num=plural,gen=?g] NC[num=plural,gen=?g]
 9 | SV[tiempo=?t,num=?n] -> VI[tiempo=?t,num=?n]
10 | SV[tiempo=?t,num=?n] -> VT[tiempo=?t,num=?n] SN[-PROP]
11 | SV[tiempo=?t,num=?n] -> VT[tiempo=?t,num=?n] PREP SN
12 | # ############################
13 | # Lexical Rules
14 | # ############################
15 | DET[num=singular,gen=masculino] -> 'un' | 'el'
16 | DET[num=singular,gen=femenino] -> 'una' | 'la'
17 | DET[num=plural,gen=masculino] -> 'unos' | 'los'
18 | DET[num=plural,gen=femenino] -> 'unas' | 'las'
19 | PREP -> 'a'
20 | NP[num=singular] -> 'Miguel' | 'Sara' | 'Pedro'
21 | NC[num=singular,gen=masculino] -> 'perro' | 'gato' | 'vecino' | 'profesor'
22 | NC[num=singular,gen=femenino] -> 'perra' | 'gata' | 'vecina' | 'profesora'
23 | NC[num=plural,gen=masculino] -> 'perros' | 'gatos' | 'vecinos' | 'profesores'
24 | NC[num=plural,gen=femenino] -> 'perras' | 'gatas' | 'vecinas' | 'profesoras'
25 | VI[tiempo=pasado,num=singular] -> 'desaparecio' | 'anduvo' | 'murio'
26 | VI[tiempo=presente,num=singular] -> 'desaparece' | 'anda' | 'muere'
27 | VI[tiempo=pasado,num=plural] -> 'desaparecion' | 'anduvieron' | 'murieron'
28 | VI[tiempo=presente,num=plural] -> 'desaparecen' | 'andan' | 'mueren'
29 | VT[tiempo=pasado,num=singular] -> 'vio' | 'adoró' | 'gritó' | 'odio'
30 | VT[tiempo=presente,num=singular] -> 've' | 'adora' | 'grita' | 'odia'
31 | VT[tiempo=pasado,num=plural] -> 'vieron' | 'adoraron' | 'gritaron' | 'odiaron'
32 | VT[tiempo=presente,num=plural] -> 'ven' | 'adoran' | 'gritan' | 'odian'
33 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish1.pcfg:
--------------------------------------------------------------------------------
 1 | 	S -> SN SV 		[1.0]
 2 | 	SV -> VTrans SN		[0.4]
 3 | 	SV -> VIntrans		[0.3]
 4 | 	SV -> VSupl SN SN	[0.3]
 5 | 	VTrans -> "bebió"	[1.0]
 6 | 	VIntrans -> "murió"	[1.0]
 7 | 	VSupl -> "regaló"	[1.0]
 8 | 	SN -> "flores"		[0.6]
 9 | 	SN -> "agua"		[0.4]
10 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish1.regexp:
--------------------------------------------------------------------------------
1 | 
2 | NP: {<d.*>*<n.*>+<a.*>*} # busca determinantes y adjetivos que acompañen a nombres
3 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish2.cfg:
--------------------------------------------------------------------------------
1 | 	S -> SN SV
2 | 	SP -> P SN
3 | 	SN -> Det N | SN SP
4 | 	SV -> V SN | SV SP
5 | 	Det -> "el" | "la" | "un" | "una" | "los" | "las"
6 | 	N -> "tren" | "telescopio" | "noticia" | "mesa" | "hombre" | "casa" | "amiga"
7 | 	V -> "vio" | "leí" | "encontró"
8 | 	P -> "en" | "sobre" | "con" | "de" | "a"
9 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish2.fcfg:
--------------------------------------------------------------------------------
 1 | % start S
 2 | # ############################
 3 | # Grammar Rules
 4 | # ############################
 5 | S -> SN S/SN
 6 | S/?x -> SV/?x
 7 | S/?x -> V[+aux] COMP SV/?x
 8 | SN/SN ->
 9 | SV/?x -> V[-aux] SN/?x
10 | # ############################
11 | # Lexical Rules
12 | # ############################
13 | V[-aux] -> 'adoras' | 'odias' 
14 | V[+aux] -> 'dices'
15 |  
16 | SN -> 'quien' | 'que'
17 | COMP -> 'que'
18 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish2.pcfg:
--------------------------------------------------------------------------------
1 | 	SN -> N [0.5]| N Adj [0.3]| SN Conj SN [0.2]
2 | 	N -> 'hombres' [0.1]| 'mujeres' [0.2]| 'niños' [0.3]| N Conj N [0.4]
3 | 	Adj -> 'mayores' [0.3]| 'jovenes' [0.7]
4 | 	Conj -> 'y' [0.6]| 'o' [0.3] | 'e' [0.1]
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish2.regexp:
--------------------------------------------------------------------------------
1 | 
2 | 	NP: {<d.*>*<n.*><a.*>*} # Busca det + nombre + adjetivo
3 | 	NP: {<d.*>*<n.*>+} # Busca seguidas de nombres
4 | 
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish3.cfg:
--------------------------------------------------------------------------------
1 | 	SN -> N | N Adj | SN Conj SN
2 | 	N -> 'hombres' | 'mujeres' | 'niños' | N Conj N
3 | 	Adj -> 'mayores' | 'jovenes' 
4 | 	Conj -> 'y' | 'o' | 'e'
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish3.regexp:
--------------------------------------------------------------------------------
1 | 
2 |    	SN:
3 |      	{<.*>+}           # Crea Un Chunk Con Cualquier Cosa
4 |      	}<v.*|sp.*|F.*>+{      # Considerar Como Chink Apariciones De Verbos (v.*), Preposiciones (sp.*) y Signos De Puntuación (F.*)
5 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish4.regexp:
--------------------------------------------------------------------------------
1 | 
2 |   SN: {<d.*>?<n.*>+<a.*>*}   # noun phrase chunks
3 |   SV: {<p.*>?<v.*>}          # verb phrase chunks
4 |   SP: {<sp.*>}               # prepositional phrase chunks
5 |   
6 | 


--------------------------------------------------------------------------------
/resources/examples/grammars/spanish_grammars/spanish5.regexp:
--------------------------------------------------------------------------------
1 | 
2 |   SN: {<d.*>?<n.*>+<a.*>*}   		  # noun phrase chunks
3 |   SV: {<p.*>?<v.*>+<SN|sp.*|S>*}          # verb phrase chunks
4 |   SP: {<sp.*>}               		  # prepositional phrase chunks
5 |   S:  {<SN><SV>}             		  # Chunk NP, VP
6 | 
7 | 


--------------------------------------------------------------------------------
/resources/examples/school/README:
--------------------------------------------------------------------------------
1 | The files in this directory were created for teaching computational
2 | linguistics in secondary school English classes.  For instructions
3 | and lesson plans, please see http://nltk.org/index.php/Electronic_Grammar
4 | 


--------------------------------------------------------------------------------
/resources/examples/school/count.py:
--------------------------------------------------------------------------------
 1 | from words import *
 2 | words = read_words('corpus/telephone.txt')
 3 | counts = count_words(words)
 4 | print_freq(counts)
 5 | 
 6 | 
 7 | 
 8 | 
 9 | from words import *
10 | words = read_words('corpus/rural.txt')
11 | counts = count_pairs(words)
12 | print_freq(counts)
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/resources/examples/school/generate.py:
--------------------------------------------------------------------------------
 1 | from words import *
 2 | 
 3 | telephone_words = read_words('corpus/telephone.txt')
 4 | model = train(telephone_words)
 5 | generate(model)
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/resources/examples/school/parse1.py:
--------------------------------------------------------------------------------
 1 | from parser import *
 2 | 
 3 | grammar = """
 4 |    NP -> P | D J N
 5 |    D -> 'a'
 6 |    J -> 'red' | 'green'
 7 |    N -> 'chair' | 'house'
 8 | """
 9 | 
10 | phrase = 'a red chair'
11 | 
12 | parse_draw(phrase, grammar)
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/resources/examples/school/parse2.py:
--------------------------------------------------------------------------------
 1 | from parser import *
 2 | 
 3 | grammar = """
 4 |    S -> NP VP | VP
 5 |    VP -> V NP | VP PP
 6 |    NP -> Det N | NP PP
 7 |    PP -> P NP
 8 |    NP -> 'I'
 9 |    Det -> 'the' | 'my'
10 |    N -> 'elephant' | 'pajamas'
11 |    V -> 'shot'
12 |    P -> 'in'
13 | """
14 | 
15 | sent = 'I shot the elephant in my pajamas'
16 | parse_draw(sent, grammar)
17 | 
18 | 


--------------------------------------------------------------------------------
/resources/examples/school/parse3.py:
--------------------------------------------------------------------------------
 1 | from parser import *
 2 | 
 3 | grammar = """
 4 |    S -> NP VP | VP
 5 |    PP -> P NP
 6 |    NP -> N | Det N | N N | NP PP | N VP
 7 |    VP -> V | V NP | VP PP | VP ADVP
 8 |    ADVP -> ADV NP
 9 |    Det -> 'a' | 'an' | 'the'
10 |    N -> 'flies' | 'banana' | 'fruit' | 'arrow' | 'time'
11 |    V -> 'like' | 'flies' | 'time'
12 |    P -> 'on' | 'in' | 'by'
13 |    ADV -> 'like'
14 | """
15 | 
16 | sent = 'time flies like an arrow'
17 | 
18 | parse_draw(sent, grammar)
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/resources/examples/school/parser.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | def parse(sent, grammar):
 4 |     gr = nltk.parse_cfg(grammar)
 5 |     parser = nltk.parse.ChartParse(gr, nltk.parse.TD_STRATEGY)
 6 |     return parser.get_parse_list(sent.split())
 7 | 
 8 | def parse_draw(sent, grammar):
 9 |     trees = parse(sent, grammar)
10 |     nltk.draw.draw_trees(*trees)
11 | 
12 | def parse_print(sent, grammar):
13 |     trees = parse(sent, grammar)
14 |     for tree in trees:
15 |         print tree
16 | 
17 | 


--------------------------------------------------------------------------------
/resources/examples/school/search.py:
--------------------------------------------------------------------------------
1 | from words import *
2 | words = read_text('corpus/telephone.txt')
3 | concordance(" um", words)
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/chat.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/examples/semantics/chat.db


--------------------------------------------------------------------------------
/resources/examples/semantics/chat_sentences:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Demo Sentences
 2 | #
 3 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 4 | # URL: <http://nltk.org/>
 5 | # For license information, see LICENSE.TXT
 6 | ############################################
 7 | # Some example sentences for the Chat-80 demo
 8 | 
 9 | what is the capital of France
10 | which sea borders France
11 | what contains Berlin
12 | which Asian countries border the_Mediterranean 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/demo_sentences:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Demo Sentences
 2 | #
 3 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 4 | # URL: <http://nltk.org/>
 5 | # For license information, see LICENSE.TXT
 6 | ############################################
 7 | # Some example sentences for the sem2.cfg demo
 8 | 
 9 | Fido sees a boy with Mary
10 | John sees Mary
11 | every girl chases a dog
12 | every boy chases a girl
13 | John walks with a girl in Noosa
14 | who walks
15 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/model0.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Example Model
 2 | #
 3 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 4 | # URL: <http://www.nltk.org/>
 5 | # For license information, see LICENSE.TXT
 6 | 
 7 | """
 8 | This is a sample model to accompany the U{sem2.cfg} grammar, and is
 9 | intended to be imported as a module.
10 | """
11 | 
12 | from nltk.semantics import *
13 | 
14 | val = Valuation()
15 | #Initialize a valuation of non-logical constants."""
16 | 
17 | v = [('john', 'b1'),
18 |     ('mary', 'g1'),
19 |     ('suzie', 'g2'),
20 |     ('fido', 'd1'),
21 |     ('tess', 'd2'),
22 |     ('noosa', 'n'),
23 |     ('girl', set(['g1', 'g2'])),
24 |     ('boy', set(['b1', 'b2'])),
25 |     ('dog', set(['d1', 'd2'])),
26 |     ('bark', set(['d1', 'd2'])),
27 |     ('walk', set(['b1', 'g2', 'd1'])),
28 |     ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])),
29 |     ('see', set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'),('d2', 'b1'), ('g2', 'n')])),
30 |     ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])),
31 |     ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')]))
32 |  ]
33 | 
34 | 
35 | #Read in the data from C{v}
36 | val.read(v)
37 | 
38 | #Bind C{dom} to the C{domain} property of C{val}
39 | dom = val.domain
40 | 
41 | #Initialize a model with parameters C{dom} and C{val}.
42 | m = Model(dom, val)
43 | 
44 | #Initialize a variable assignment with parameter C{dom}
45 | g = Assignment(dom)
46 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/model1.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Example Model
 2 | #
 3 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 4 | # URL: <http://www.nltk.org/>
 5 | # For license information, see LICENSE.TXT
 6 | 
 7 | """
 8 | This is a sample model to accompany the U{chat80.cfg} grammar} and is
 9 | intended to be imported as a module.
10 | """
11 | 
12 | from nltk.semantics import *
13 | from nltk.corpora import chat80
14 | 
15 | rels = chat80.rels
16 | concept_map = chat80.process_bundle(rels)
17 | concepts = concept_map.values()
18 | val = chat80.make_valuation(concepts, read=True)
19 | 
20 | #Bind C{dom} to the C{domain} property of C{val}.
21 | dom = val.domain
22 | 
23 | #Initialize a model with parameters C{dom} and C{val}.
24 | m = Model(dom, val)
25 | 
26 | #Initialize a variable assignment with parameter C{dom}.
27 | g = Assignment(dom)
28 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/sem0.cfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem0.cfg
 2 | ##
 3 | ## Minimal feature-based grammar with lambda semantics.
 4 | ## 
 5 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 6 | ## URL: <http://nltk.sourceforge.net>
 7 | ## For license information, see LICENSE.TXT
 8 | 
 9 | % start S
10 | 
11 | S[sem = <app(?vp,?subj)>] -> NP[sem=?subj] VP[sem=?vp]
12 | VP[sem=?v] -> V[sem=?v]
13 | NP[sem=<john>] -> 'John'
14 | V[sem=<\x.(walk x)>] -> 'walks'
15 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/sem1.cfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem1.cfg
 2 | ##
 3 | ## Minimal feature-based grammar to illustrate the interpretation of
 4 | ## determiner phrases.
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[sem = <app(?subj,?vp)>] -> NP[sem=?subj] VP[sem=?vp]
13 | VP[sem=?v] -> IV[sem=?v]
14 | NP[sem=<app(?det,?n)>] -> Det[sem=?det] N[sem=?n]
15 | 
16 | Det[sem=<\Q P. some x. ((Q x) and (P x))>] -> 'a'
17 | N[sem=<dog>] -> 'dog'
18 | IV[sem=<\x.(bark x)>] -> 'barks'
19 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/sem2.cfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem2.cfg
 2 | ##
 3 | ## Longer feature-based grammar with more quantifers, and illustrating
 4 | ## transitive verbs and prepositional phrases (PPs). The
 5 | ## interpretation of PPs is a bit weird and could do with further
 6 | ## work.
 7 | ## 
 8 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 9 | ## URL: <http://nltk.sourceforge.net>
10 | ## For license information, see LICENSE.TXT
11 | 
12 | % start S
13 | ############################
14 | # Grammar Rules
15 | #############################
16 | 
17 | S[sem = <app(?subj,?vp)>] -> NP[num=?n,sem=?subj] VP[num=?n,sem=?vp]
18 | 
19 | NP[num=?n,sem=<app(?det,?nom)> ] -> Det[num=?n,sem=?det]  Nom[num=?n,sem=?nom]
20 | NP[loc=?l,num=?n,sem=?np] -> PropN[loc=?l,num=?n,sem=?np]
21 | 
22 | Nom[num=?n,sem=?nom] -> N[num=?n,sem=?nom]
23 | Nom[num=?n,sem=<app(?pp,?nom)>] -> N[num=?n,sem=?nom] PP[sem=?pp]
24 | 
25 | VP[num=?n,sem=<app(?v,?obj)>] -> TV[num=?n,sem=?v] NP[sem=?obj]
26 | VP[num=?n,sem=?v] -> IV[num=?n,sem=?v]
27 | 
28 | VP[num=?n,sem=<app(?pp,?vp)>] -> VP[num=?n,sem=?vp] PP[sem=?pp]
29 | 
30 | PP[sem=<app(?p,?np)>] -> P[loc=?l,sem=?p] NP[loc=?l,sem=?np]
31 | 
32 | #############################
33 | # Lexical Rules
34 | #############################
35 | 
36 | PropN[-loc,num=sg,sem=<\P.(P john)>] -> 'John'
37 | PropN[-loc,num=sg,sem=<\P.(P mary)>] -> 'Mary'
38 | PropN[-loc,num=sg,sem=<\P.(P suzie)>] -> 'Suzie'
39 | PropN[-loc,num=sg,sem=<\P.(P fido)>] -> 'Fido'
40 | PropN[+loc, num=sg,sem=<\P.(P noosa)>] -> 'Noosa'
41 | 
42 | NP[-loc, num=sg, sem=<\P.\x.(P x)>] -> 'who'  
43 | 
44 | Det[num=sg,sem=<\P Q. all x. ((P x) implies (Q x))>] -> 'every'
45 | Det[num=pl,sem=<\P Q. all x. ((P x) implies (Q x))>] -> 'all'
46 | Det[sem=<\P Q. some x. ((P x) and (Q x))>] -> 'some'
47 | Det[num=sg,sem=<\P Q. some x. ((P x) and (Q x))>] -> 'a'
48 | 
49 | N[num=sg,sem=<boy>] -> 'boy'
50 | N[num=pl,sem=<boy>] -> 'boys'
51 | N[num=sg,sem=<girl>] -> 'girl'
52 | N[num=pl,sem=<girl>] -> 'girls'
53 | N[num=sg,sem=<dog>] -> 'dog'
54 | N[num=pl,sem=<dog>] -> 'dogs'
55 | 
56 | TV[num=sg,sem=<\X y. (X \x. (chase x y))>,tns=pres] -> 'chases'
57 | TV[num=pl,sem=<\X y. (X \x. (chase x y))>,tns=pres] -> 'chase'
58 | TV[num=sg,sem=<\X y. (X \x. (see x y))>,tns=pres] -> 'sees'
59 | TV[num=pl,sem=<\X y. (X \x. (see x y))>,tns=pres] -> 'see'
60 | TV[num=sg,sem=<\X y. (X \x. (chase x y))>,tns=pres] -> 'chases'
61 | TV[num=pl,sem=<\X y. (X \x. (chase x y))>,tns=pres] -> 'chase'
62 | IV[num=sg,sem=<\x. (bark x)>,tns=pres] -> 'barks'
63 | IV[num=pl,sem=<\x. (bark x)>,tns=pres] -> 'bark'
64 | IV[num=sg,sem=<\x. (walk x)>,tns=pres] -> 'walks'
65 | IV[num=pl,sem=<\x. (walk x)>,tns=pres] -> 'walk'
66 | 
67 | P[+loc,sem=<\X P x. (X \y. ((P x) and (in y x)))>] -> 'in'
68 | P[-loc,sem=<\X P x. (X \y. ((P x) and (with y x)))>] -> 'with'
69 | 


--------------------------------------------------------------------------------
/resources/examples/semantics/sem3.cfg:
--------------------------------------------------------------------------------
 1 | ## Natural Language Toolkit: sem3.cfg
 2 | ##
 3 | ## First attempt at HPSG-style feature-based semantics.
 4 | ## This version doesn't work properly!
 5 | ## 
 6 | ## Author: Ewan Klein <ewan@inf.ed.ac.uk> 
 7 | ## URL: <http://nltk.sourceforge.net>
 8 | ## For license information, see LICENSE.TXT
 9 | 
10 | % start S
11 | 
12 | S[sem=?vp] -> NP[sem=?np] VP[subj=?np, sem=?vp]
13 | VP[sem=?v, subj=?np] -> IV[sem=?v, subj=?np]
14 | NP[sem=[index='k',name='kim']] -> 'Kim'
15 | IV[sem=[rel='bark', arg=?i], subj=[sem=[index=?i]]] -> 'barks'
16 | #IV[fsem=[rel='bark', arg=(1)[]], subj=[fsem=[index->(1)]]] -> 'barks'
17 | 
18 | 


--------------------------------------------------------------------------------
/resources/javasrc/Makefile:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: java interface code Makefile
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://nltk.sf.net>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | # Dependencies.
 9 | MALLET_HOME = /usr/local/mallet-0.4
10 | 
11 | # Locate the NLTK java source code
12 | JAVA_SRC = $(shell find org/nltk -name '*.java')
13 | JAVA_CLS = $(JAVA_SRC:.java=.class)
14 | 
15 | # Set up java.
16 | JAVAC=javac
17 | CLASSPATH = .:$(MALLET_HOME)/class/:$(MALLET_HOME)/lib/mallet-deps.jar:$(MALLET_HOME)/lib/mallet.jar
18 | 
19 | ########################################################################
20 | # Targets
21 | ########################################################################
22 | 
23 | .PHONY: find-mallet javac clean jar jar2
24 | 
25 | jar: find-mallet nltk.jar
26 | 
27 | find-mallet:
28 | 	@if [ -d $(MALLET_HOME) ]; then \
29 | 		echo "Found Mallet: $(MALLET_HOME)"; \
30 | 	else \
31 | 		echo; \
32 | 		echo "Unable to locate required Mallet dependencies.  Use:"; \
33 | 		echo "    make MALLET_HOME=/path/to/mallet [target...]"; \
34 | 		echo "to specify the location of Mallet.  Mallet can be "; \
35 | 		echo "downloaded from http://mallet.cs.umass.edu/"; \
36 | 		echo; false; fi
37 | 
38 | nltk.jar: $(JAVA_SRC)
39 | 	$(JAVAC) -cp "$(CLASSPATH)" $(JAVA_SRC)
40 | 	jar -cf nltk.jar `find org/nltk -name '*.class'`
41 | 
42 | clean:
43 | 	rm -f $(JAVA_CLS) nltk.jar
44 | 


--------------------------------------------------------------------------------
/resources/javasrc/README.txt:
--------------------------------------------------------------------------------
 1 | NLTK-Java Interface Code
 2 | 
 3 | Copyright (C) 2001-2012 NLTK Project
 4 | For license information, see LICENSE.TXT
 5 | 
 6 | The Java code in this directory is used by NLTK to communicate with
 7 | external Java packages, such as Mallet.  In particular, this directory
 8 | defines several command-line interfaces that are used by NLTK to
 9 | communicate with external Java packages, by spawning them as
10 | subprocesss.  In cases where an external Java package already provides
11 | a command-line interface, teh replacement interface provided here is
12 | either more functional or more stable (or both).  
13 | 
14 | These command-line interfaces may be called directly by users, but
15 | they are primarily intended for use by NLTK.
16 | 


--------------------------------------------------------------------------------
/resources/nltk/VERSION:
--------------------------------------------------------------------------------
1 | 2.0.3
2 | 


--------------------------------------------------------------------------------
/resources/nltk/app/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Applications package
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | """
10 | Interactive NLTK Applications:
11 | 
12 | chartparser:  Chart Parser
13 | chunkparser:  Regular-Expression Chunk Parser
14 | collocations: Find collocations in text
15 | concordance:  Part-of-speech concordancer
16 | nemo:         Finding (and Replacing) Nemo regular expression tool
17 | rdparser:     Recursive Descent Parser
18 | srparser:     Shift-Reduce Parser
19 | wordnet:      WordNet Browser
20 | """
21 | 
22 | 
23 | # Import Tkinter-based modules if Tkinter is installed
24 | try:
25 |     import Tkinter
26 | except ImportError:
27 |     import warnings
28 |     warnings.warn("nltk.app package not loaded "
29 |                   "(please install Tkinter library).")
30 | else:
31 |     from chartparser_app import app as chartparser
32 |     from chunkparser_app import app as chunkparser
33 |     from collocations_app import app as collocations
34 |     from concordance_app import app as concordance
35 |     from nemo_app import app as nemo
36 |     from rdparser_app import app as rdparser
37 |     from srparser_app import app as srparser
38 |     from wordnet_app import app as wordnet
39 | 
40 |     try:
41 |         import pylab
42 |     except ImportError:
43 |         import warnings
44 |         warnings.warn("nltk.app.wordfreq not loaded "
45 |                       "(requires the pylab library).")
46 |     else:
47 |         from wordfreq_app import app as wordfreq
48 | 


--------------------------------------------------------------------------------
/resources/nltk/app/wordfreq_app.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Wordfreq Application
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | import pylab
 9 | import nltk.text
10 | from nltk.corpus import gutenberg
11 | 
12 | def plot_word_freq_dist(text):
13 |     fd = text.vocab()
14 | 
15 |     samples = fd.keys()[:50]
16 |     values = [fd[sample] for sample in samples]
17 |     values = [sum(values[:i+1]) * 100.0/fd.N() for i in range(len(values))]
18 |     pylab.title(text.name)
19 |     pylab.xlabel("Samples")
20 |     pylab.ylabel("Cumulative Percentage")
21 |     pylab.plot(values)
22 |     pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
23 |     pylab.show()
24 | 
25 | def app():
26 |     t1 = nltk.Text(gutenberg.words('melville-moby_dick.txt'))
27 |     plot_word_freq_dist(t1)
28 | 
29 | if __name__ == '__main__':
30 |     app()
31 | 
32 | __all__ = ['app']
33 | 


--------------------------------------------------------------------------------
/resources/nltk/ccg/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Combinatory Categorial Grammar
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | Combinatory Categorial Grammar.
10 | 
11 | For more information see nltk/doc/contrib/ccg/ccg.pdf
12 | """
13 | 
14 | from nltk.ccg.combinator import (UndirectedBinaryCombinator, DirectedBinaryCombinator,
15 |                                  ForwardCombinator, BackwardCombinator,
16 |                                  UndirectedFunctionApplication, ForwardApplication,
17 |                                  BackwardApplication, UndirectedComposition,
18 |                                  ForwardComposition, BackwardComposition,
19 |                                  BackwardBx, UndirectedSubstitution, ForwardSubstitution,
20 |                                  BackwardSx, UndirectedTypeRaise, ForwardT, BackwardT)
21 | from nltk.ccg.chart import CCGEdge, CCGLeafEdge, CCGChartParser, CCGChart
22 | from nltk.ccg.lexicon import CCGLexicon
23 | 


--------------------------------------------------------------------------------
/resources/nltk/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Chatbots
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Authors: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | # Based on an Eliza implementation by Joe Strout <joe@strout.net>,
 9 | # Jeff Epler <jepler@inetnebr.com> and Jez Higgins <jez@jezuk.co.uk>.
10 | 
11 | """
12 | A class for simple chatbots.  These perform simple pattern matching on sentences
13 | typed by users, and respond with automatically generated sentences.
14 | 
15 | These chatbots may not work using the windows command line or the
16 | windows IDLE GUI.
17 | """
18 | 
19 | from util import Chat
20 | from eliza import eliza_chat
21 | from iesha import iesha_chat
22 | from rude import rude_chat
23 | from suntsu import suntsu_chat
24 | from zen import zen_chat
25 | 
26 | bots = [
27 |     (eliza_chat,  'Eliza (psycho-babble)'),
28 |     (iesha_chat,  'Iesha (teen anime junky)'),
29 |     (rude_chat,   'Rude (abusive bot)'),
30 |     (suntsu_chat, 'Suntsu (Chinese sayings)'),
31 |     (zen_chat,    'Zen (gems of wisdom)')]
32 | 
33 | def chatbots():
34 |     import sys
35 |     print 'Which chatbot would you like to talk to?'
36 |     botcount = len(bots)
37 |     for i in range(botcount):
38 |         print '  %d: %s' % (i+1, bots[i][1])
39 |     while True:
40 |         print '\nEnter a number in the range 1-%d: ' % botcount,
41 |         choice = sys.stdin.readline().strip()
42 |         if choice.isdigit() and (int(choice) - 1) in range(botcount):
43 |             break
44 |         else:
45 |             print '   Error: bad chatbot number'
46 | 
47 |     chatbot = bots[int(choice)-1][0]
48 |     chatbot()
49 | 


--------------------------------------------------------------------------------
/resources/nltk/chat/rude.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Rude Chatbot
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Peter Spiller <pspiller@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | from util import Chat, reflections
 9 | 
10 | pairs = (
11 |     (r'We (.*)',
12 |         ("What do you mean, 'we'?",
13 |         "Don't include me in that!",
14 |         "I wouldn't be so sure about that.")),
15 | 
16 |     (r'You should (.*)',
17 |         ("Don't tell me what to do, buddy.",
18 |         "Really? I should, should I?")),
19 | 
20 |     (r'You\'re(.*)',
21 |         ("More like YOU'RE %1!",
22 |         "Hah! Look who's talking.",
23 |         "Come over here and tell me I'm %1.")),
24 | 
25 |     (r'You are(.*)',
26 |         ("More like YOU'RE %1!",
27 |         "Hah! Look who's talking.",
28 |         "Come over here and tell me I'm %1.")),
29 | 
30 |     (r'I can\'t(.*)',
31 |         ("You do sound like the type who can't %1.",
32 |         "Hear that splashing sound? That's my heart bleeding for you.",
33 |         "Tell somebody who might actually care.")),
34 | 
35 |     (r'I think (.*)',
36 |         ("I wouldn't think too hard if I were you.",
37 |         "You actually think? I'd never have guessed...")),
38 | 
39 |     (r'I (.*)',
40 |         ("I'm getting a bit tired of hearing about you.",
41 |         "How about we talk about me instead?",
42 |         "Me, me, me... Frankly, I don't care.")),
43 | 
44 |     (r'How (.*)',
45 |         ("How do you think?",
46 |         "Take a wild guess.",
47 |         "I'm not even going to dignify that with an answer.")),
48 | 
49 |     (r'What (.*)',
50 |         ("Do I look like an encyclopedia?",
51 |         "Figure it out yourself.")),
52 | 
53 |     (r'Why (.*)',
54 |         ("Why not?",
55 |         "That's so obvious I thought even you'd have already figured it out.")),
56 | 
57 |     (r'(.*)shut up(.*)',
58 |         ("Make me.",
59 |         "Getting angry at a feeble NLP assignment? Somebody's losing it.",
60 |         "Say that again, I dare you.")),
61 | 
62 |     (r'Shut up(.*)',
63 |         ("Make me.",
64 |         "Getting angry at a feeble NLP assignment? Somebody's losing it.",
65 |         "Say that again, I dare you.")),
66 | 
67 |     (r'Hello(.*)',
68 |         ("Oh good, somebody else to talk to. Joy.",
69 |         "'Hello'? How original...")),
70 | 
71 |     (r'(.*)',
72 |         ("I'm getting bored here. Become more interesting.",
73 |         "Either become more thrilling or get lost, buddy.",
74 |         "Change the subject before I die of fatal boredom."))
75 | )
76 | 
77 | rude_chatbot = Chat(pairs, reflections)
78 | 
79 | def rude_chat():
80 |     print "Talk to the program by typing in plain English, using normal upper-"
81 |     print 'and lower-case letters and punctuation.  Enter "quit" when done.'
82 |     print '='*72
83 |     print "I suppose I should say hello."
84 | 
85 |     rude_chatbot.converse()
86 | 
87 | def demo():
88 |     rude_chat()
89 | 
90 | if __name__ == "__main__":
91 |     demo()
92 | 


--------------------------------------------------------------------------------
/resources/nltk/chunk/api.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Chunk parsing API
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au> (minor additions)
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | ##//////////////////////////////////////////////////////
10 | ##  Chunk Parser Interface
11 | ##//////////////////////////////////////////////////////
12 | 
13 | from nltk.parse import ParserI
14 | 
15 | from nltk.chunk.util import ChunkScore
16 | 
17 | class ChunkParserI(ParserI):
18 |     """
19 |     A processing interface for identifying non-overlapping groups in
20 |     unrestricted text.  Typically, chunk parsers are used to find base
21 |     syntactic constituents, such as base noun phrases.  Unlike
22 |     ``ParserI``, ``ChunkParserI`` guarantees that the ``parse()`` method
23 |     will always generate a parse.
24 |     """
25 |     def parse(self, tokens):
26 |         """
27 |         Return the best chunk structure for the given tokens
28 |         and return a tree.
29 | 
30 |         :param tokens: The list of (word, tag) tokens to be chunked.
31 |         :type tokens: list(tuple)
32 |         :rtype: Tree
33 |         """
34 |         raise NotImplementedError()
35 | 
36 |     def evaluate(self, gold):
37 |         """
38 |         Score the accuracy of the chunker against the gold standard.
39 |         Remove the chunking the gold standard text, rechunk it using
40 |         the chunker, and return a ``ChunkScore`` object
41 |         reflecting the performance of this chunk peraser.
42 | 
43 |         :type gold: list(Tree)
44 |         :param gold: The list of chunked sentences to score the chunker on.
45 |         :rtype: ChunkScore
46 |         """
47 |         chunkscore = ChunkScore()
48 |         for correct in gold:
49 |             chunkscore.score(correct, self.parse(correct.leaves()))
50 |         return chunkscore
51 | 
52 | 


--------------------------------------------------------------------------------
/resources/nltk/classify/mallet.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Interface to Mallet Machine Learning Package
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | A set of functions used to interface with the external Mallet_ machine learning
10 | package. Before mallet can be used, you should tell NLTK where it can find
11 | the mallet package, using the ``config_mallet()`` function. Typical usage:
12 | 
13 | .. doctest::
14 |     :options: +SKIP
15 | 
16 |     >>> from nltk.classify import mallet
17 |     >>> mallet.config_mallet() # pass path to mallet as argument if needed
18 |     [Found mallet: ...]
19 | 
20 | .. _Mallet: http://mallet.cs.umass.edu/
21 | """
22 | 
23 | import os
24 | import os.path
25 | 
26 | from nltk.internals import find_binary, java
27 | 
28 | ######################################################################
29 | #{ Configuration
30 | ######################################################################
31 | 
32 | _mallet_home = None
33 | _mallet_classpath = None
34 | def config_mallet(mallet_home=None):
35 |     """
36 |     Configure NLTK's interface to the Mallet machine learning package.
37 | 
38 |     :type mallet_home: str
39 |     :param mallet_home: The full path to the mallet directory. If not
40 |         specified, then NLTK will search the system for a mallet directory;
41 |         and if one is not found, it will raise a ``LookupError`` exception.
42 |     """
43 |     global _mallet_home, _mallet_classpath
44 | 
45 |     # We don't actually care about this binary -- we just use it to
46 |     # make sure we've found the right directory.
47 |     mallethon_bin = find_binary(
48 |         'mallet', mallet_home,
49 |         env_vars=['MALLET',  'MALLET_HOME'],
50 |         binary_names=['mallethon'],
51 |         url='http://mallet.cs.umass.edu')
52 |     # Record the location where mallet lives.
53 |     bin_dir = os.path.split(mallethon_bin)[0]
54 |     _mallet_home = os.path.split(bin_dir)[0]
55 |     # Construct a classpath for using mallet.
56 |     lib_dir = os.path.join(_mallet_home, 'lib')
57 |     if not os.path.isdir(lib_dir):
58 |         raise ValueError('While configuring mallet: directory %r '
59 |                          'not found.' % lib_dir)
60 |     _mallet_classpath = os.path.pathsep.join([os.path.join(lib_dir, filename)
61 |                                   for filename in sorted(os.listdir(lib_dir))
62 |                                   if filename.endswith('.jar')])
63 | 
64 | 
65 | def call_mallet(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
66 |                 blocking=True):
67 |     """
68 |     Call `nltk.internals.java` with the given command, and with the classpath
69 |     modified to include both ``nltk.jar`` and all the ``.jar`` files defined by
70 |     Mallet.
71 | 
72 |     See `nltk.internals.java` for parameter and return value descriptions.
73 |     """
74 |     if _mallet_classpath is None:
75 |         config_mallet()
76 | 
77 |     # Set up the classpath
78 |     if classpath is None:
79 |         classpath = _mallet_classpath
80 |     else:
81 |         classpath += os.path.pathsep + _mallet_classpath
82 |     # Delegate to java()
83 |     return java(cmd, classpath, stdin, stdout, stderr, blocking)
84 | 


--------------------------------------------------------------------------------
/resources/nltk/cluster/api.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Clusterer Interfaces
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
 5 | # Porting: Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | from nltk.probability import DictionaryProbDist
10 | 
11 | class ClusterI(object):
12 |     """
13 |     Interface covering basic clustering functionality.
14 |     """
15 | 
16 |     def cluster(self, vectors, assign_clusters=False):
17 |         """
18 |         Assigns the vectors to clusters, learning the clustering parameters
19 |         from the data. Returns a cluster identifier for each vector.
20 |         """
21 |         raise NotImplementedError()
22 | 
23 |     def classify(self, token):
24 |         """
25 |         Classifies the token into a cluster, setting the token's CLUSTER
26 |         parameter to that cluster identifier.
27 |         """
28 |         raise NotImplementedError()
29 | 
30 |     def likelihood(self, vector, label):
31 |         """
32 |         Returns the likelihood (a float) of the token having the
33 |         corresponding cluster.
34 |         """
35 |         if self.classify(vector) == label:
36 |             return 1.0
37 |         else:
38 |             return 0.0
39 | 
40 |     def classification_probdist(self, vector):
41 |         """
42 |         Classifies the token into a cluster, returning
43 |         a probability distribution over the cluster identifiers.
44 |         """
45 |         likelihoods = {}
46 |         sum = 0.0
47 |         for cluster in self.cluster_names():
48 |             likelihoods[cluster] = self.likelihood(vector, cluster)
49 |             sum += likelihoods[cluster]
50 |         for cluster in self.cluster_names():
51 |             likelihoods[cluster] /= sum
52 |         return DictionaryProbDist(likelihoods)
53 | 
54 |     def num_clusters(self):
55 |         """
56 |         Returns the number of clusters.
57 |         """
58 |         raise NotImplementedError()
59 | 
60 |     def cluster_names(self):
61 |         """
62 |         Returns the names of the clusters.
63 |         """
64 |         return range(self.num_clusters())
65 | 
66 |     def cluster_name(self, index):
67 |         """
68 |         Returns the names of the cluster at index.
69 |         """
70 |         return index
71 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/europarl_raw.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Europarl Corpus Readers
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author:  Nitin Madnani <nmadnani@umiacs.umd.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | import re
 9 | from util import LazyCorpusLoader
10 | from reader import *
11 | 
12 | # Create a new corpus reader instance for each European language
13 | danish = LazyCorpusLoader(
14 |     'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8')
15 | 
16 | dutch = LazyCorpusLoader(
17 |     'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8')
18 | 
19 | english = LazyCorpusLoader(
20 |     'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8')
21 | 
22 | finnish = LazyCorpusLoader(
23 |     'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8')
24 | 
25 | french = LazyCorpusLoader(
26 |     'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8')
27 | 
28 | german = LazyCorpusLoader(
29 |     'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8')
30 | 
31 | greek = LazyCorpusLoader(
32 |     'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8')
33 | 
34 | italian = LazyCorpusLoader(
35 |     'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8')
36 | 
37 | portuguese = LazyCorpusLoader(
38 |     'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8')
39 | 
40 | spanish = LazyCorpusLoader(
41 |     'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8')
42 | 
43 | swedish = LazyCorpusLoader(
44 |     'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8')
45 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/indian.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@ldc.upenn.edu>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | """
10 | Indian Language POS-Tagged Corpus
11 | Collected by A Kumaran, Microsoft Research, India
12 | Distributed with permission
13 | 
14 | Contents:
15 |   - Bangla: IIT Kharagpur
16 |   - Hindi: Microsoft Research India
17 |   - Marathi: IIT Bombay
18 |   - Telugu: IIIT Hyderabad
19 | """
20 | 
21 | import codecs
22 | 
23 | from nltk.tag.util import str2tuple
24 | 
25 | from util import *
26 | from api import *
27 | 
28 | class IndianCorpusReader(CorpusReader):
29 |     """
30 |     List of words, one per line.  Blank lines are ignored.
31 |     """
32 |     def words(self, fileids=None):
33 |         return concat([IndianCorpusView(fileid, enc,
34 |                                         False, False)
35 |                        for (fileid, enc) in self.abspaths(fileids, True)])
36 | 
37 |     def tagged_words(self, fileids=None, simplify_tags=False):
38 |         if simplify_tags:
39 |             tag_mapping_function = self._tag_mapping_function
40 |         else:
41 |             tag_mapping_function = None
42 |         return concat([IndianCorpusView(fileid, enc,
43 |                                         True, False, tag_mapping_function)
44 |                        for (fileid, enc) in self.abspaths(fileids, True)])
45 | 
46 |     def sents(self, fileids=None):
47 |         return concat([IndianCorpusView(fileid, enc,
48 |                                         False, True)
49 |                        for (fileid, enc) in self.abspaths(fileids, True)])
50 | 
51 |     def tagged_sents(self, fileids=None, simplify_tags=False):
52 |         if simplify_tags:
53 |             tag_mapping_function = self._tag_mapping_function
54 |         else:
55 |             tag_mapping_function = None
56 |         return concat([IndianCorpusView(fileid, enc,
57 |                                         True, True, tag_mapping_function)
58 |                        for (fileid, enc) in self.abspaths(fileids, True)])
59 | 
60 |     def raw(self, fileids=None):
61 |         if fileids is None: fileids = self._fileids
62 |         elif isinstance(fileids, basestring): fileids = [fileids]
63 |         return concat([self.open(f).read() for f in fileids])
64 | 
65 | 
66 | class IndianCorpusView(StreamBackedCorpusView):
67 |     def __init__(self, corpus_file, encoding, tagged,
68 |                  group_by_sent, tag_mapping_function=None):
69 |         self._tagged = tagged
70 |         self._group_by_sent = group_by_sent
71 |         self._tag_mapping_function = tag_mapping_function
72 |         StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
73 | 
74 |     def read_block(self, stream):
75 |         line = stream.readline()
76 |         if line.startswith('<'):
77 |             return []
78 |         sent = [str2tuple(word, sep='_') for word in line.split()]
79 |         if self._tag_mapping_function:
80 |             sent = [(w, self._tag_mapping_function(t)) for (w,t) in sent]
81 |         if not self._tagged: sent = [w for (w,t) in sent]
82 |         if self._group_by_sent:
83 |             return [sent]
84 |         else:
85 |             return sent
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/nps_chat.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: NPS Chat Corpus Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | import re
 9 | import textwrap
10 | 
11 | from nltk.util import LazyConcatenation
12 | from nltk.internals import ElementWrapper
13 | 
14 | from util import *
15 | from api import *
16 | from xmldocs import *
17 | 
18 | class NPSChatCorpusReader(XMLCorpusReader):
19 | 
20 |     def __init__(self, root, fileids, wrap_etree=False, tag_mapping_function=None):
21 |         XMLCorpusReader.__init__(self, root, fileids, wrap_etree)
22 |         self._tag_mapping_function = tag_mapping_function
23 | 
24 |     def xml_posts(self, fileids=None):
25 |         if self._wrap_etree:
26 |             return concat([XMLCorpusView(fileid, 'Session/Posts/Post',
27 |                                          self._wrap_elt)
28 |                            for fileid in self.abspaths(fileids)])
29 |         else:
30 |             return concat([XMLCorpusView(fileid, 'Session/Posts/Post')
31 |                            for fileid in self.abspaths(fileids)])
32 | 
33 |     def posts(self, fileids=None):
34 |         return concat([XMLCorpusView(fileid, 'Session/Posts/Post/terminals',
35 |                                      self._elt_to_words)
36 |                        for fileid in self.abspaths(fileids)])
37 | 
38 |     def tagged_posts(self, fileids=None, simplify_tags=False):
39 |         def reader(elt, handler):
40 |             return self._elt_to_tagged_words(elt, handler, simplify_tags)
41 |         return concat([XMLCorpusView(fileid, 'Session/Posts/Post/terminals',
42 |                                      reader)
43 |                        for fileid in self.abspaths(fileids)])
44 | 
45 |     def words(self, fileids=None):
46 |         return LazyConcatenation(self.posts(fileids))
47 | 
48 |     def tagged_words(self, fileids=None, simplify_tags=False):
49 |         return LazyConcatenation(self.tagged_posts(fileids, simplify_tags))
50 | 
51 |     def _wrap_elt(self, elt, handler):
52 |         return ElementWrapper(elt)
53 | 
54 |     def _elt_to_words(self, elt, handler):
55 |         return [self._simplify_username(t.attrib['word'])
56 |                 for t in elt.findall('t')]
57 | 
58 |     def _elt_to_tagged_words(self, elt, handler, simplify_tags=False):
59 |         tagged_post = [(self._simplify_username(t.attrib['word']),
60 |                         t.attrib['pos']) for t in elt.findall('t')]
61 |         if simplify_tags:
62 |             tagged_post = [(w, self._tag_mapping_function(t))
63 |                            for (w,t) in tagged_post]
64 |         return tagged_post
65 | 
66 |     @staticmethod
67 |     def _simplify_username(word):
68 |         if 'User' in word:
69 |             word = 'U' + word.split('User', 1)[1]
70 |         return word
71 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/ppattach.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: PP Attachment Corpus Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@ldc.upenn.edu>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | """
10 | Read lines from the Prepositional Phrase Attachment Corpus.
11 | 
12 | The PP Attachment Corpus contains several files having the format:
13 | 
14 | sentence_id verb noun1 preposition noun2 attachment
15 | 
16 | For example:
17 | 
18 | 42960 gives authority to administration V
19 | 46742 gives inventors of microchip N
20 | 
21 | The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.:
22 | 
23 | (VP gives (NP authority) (PP to administration))
24 | (VP gives (NP inventors (PP of microchip)))
25 | 
26 | The corpus contains the following files:
27 | 
28 | training:   training set
29 | devset:     development test set, used for algorithm development.
30 | test:       test set, used to report results
31 | bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal.
32 | 
33 | Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional
34 | Phrase Attachment.  Proceedings of the ARPA Human Language Technology
35 | Conference.  [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps]
36 | 
37 | The PP Attachment Corpus is distributed with NLTK with the permission
38 | of the author.
39 | """
40 | 
41 | import codecs
42 | 
43 | from util import *
44 | from api import *
45 | 
46 | class PPAttachment:
47 |     def __init__(self, sent, verb, noun1, prep, noun2, attachment):
48 |         self.sent = sent
49 |         self.verb = verb
50 |         self.noun1 = noun1
51 |         self.prep = prep
52 |         self.noun2 = noun2
53 |         self.attachment = attachment
54 | 
55 |     def __repr__(self):
56 |         return ('PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, '
57 |                 'noun2=%r, attachment=%r)' %
58 |                 (self.sent, self.verb, self.noun1, self.prep,
59 |                  self.noun2, self.attachment))
60 | 
61 | class PPAttachmentCorpusReader(CorpusReader):
62 |     """
63 |     sentence_id verb noun1 preposition noun2 attachment
64 |     """
65 |     def attachments(self, fileids):
66 |         return concat([StreamBackedCorpusView(fileid, self._read_obj_block,
67 |                                               encoding=enc)
68 |                        for (fileid, enc) in self.abspaths(fileids, True)])
69 | 
70 |     def tuples(self, fileids):
71 |         return concat([StreamBackedCorpusView(fileid, self._read_tuple_block,
72 |                                               encoding=enc)
73 |                        for (fileid, enc) in self.abspaths(fileids, True)])
74 | 
75 |     def raw(self, fileids=None):
76 |         if fileids is None: fileids = self._fileids
77 |         elif isinstance(fileids, basestring): fileids = [fileids]
78 |         return concat([self.open(f).read() for f in fileids])
79 | 
80 |     def _read_tuple_block(self, stream):
81 |         line = stream.readline()
82 |         if line:
83 |             return [tuple(line.split())]
84 |         else:
85 |             return []
86 | 
87 |     def _read_obj_block(self, stream):
88 |         line = stream.readline()
89 |         if line:
90 |             return [PPAttachment(*line.split())]
91 |         else:
92 |             return []
93 | 
94 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/sinica_treebank.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Sinica Treebank Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@ldc.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | Sinica Treebank Corpus Sample
10 | 
11 | http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm
12 | 
13 | 10,000 parsed sentences, drawn from the Academia Sinica Balanced
14 | Corpus of Modern Chinese.  Parse tree notation is based on
15 | Information-based Case Grammar.  Tagset documentation is available
16 | at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html
17 | 
18 | Language and Knowledge Processing Group, Institute of Information
19 | Science, Academia Sinica
20 | 
21 | It is distributed with the Natural Language Toolkit under the terms of
22 | the Creative Commons Attribution-NonCommercial-ShareAlike License
23 | [http://creativecommons.org/licenses/by-nc-sa/2.5/].
24 | 
25 | References:
26 | 
27 | Feng-Yi Chen, Pi-Fang Tsai, Keh-Jiann Chen, and Chu-Ren Huang (1999)
28 | The Construction of Sinica Treebank. Computational Linguistics and
29 | Chinese Language Processing, 4, pp 87-104.
30 | 
31 | Huang Chu-Ren, Keh-Jiann Chen, Feng-Yi Chen, Keh-Jiann Chen, Zhao-Ming
32 | Gao, and Kuang-Yu Chen. 2000. Sinica Treebank: Design Criteria,
33 | Annotation Guidelines, and On-line Interface. Proceedings of 2nd
34 | Chinese Language Processing Workshop, Association for Computational
35 | Linguistics.
36 | 
37 | Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar
38 | Extraction, Proceedings of IJCNLP-04, pp560-565.
39 | """
40 | 
41 | import os
42 | import re
43 | 
44 | import nltk
45 | 
46 | from util import *
47 | from api import *
48 | 
49 | IDENTIFIER = re.compile(r'^#\S+\s')
50 | APPENDIX = re.compile(r'(?<=\))#.*$')
51 | TAGWORD = re.compile(r':([^:()|]+):([^:()|]+)')
52 | WORD = re.compile(r':[^:()|]+:([^:()|]+)')
53 | 
54 | class SinicaTreebankCorpusReader(SyntaxCorpusReader):
55 |     """
56 |     Reader for the sinica treebank.
57 |     """
58 |     def _read_block(self, stream):
59 |         sent = stream.readline()
60 |         sent = IDENTIFIER.sub('', sent)
61 |         sent = APPENDIX.sub('', sent)
62 |         return [sent]
63 | 
64 |     def _parse(self, sent):
65 |         return nltk.tree.sinica_parse(sent)
66 | 
67 |     def _tag(self, sent, simplify_tags=None):
68 |         tagged_sent = [(w,t) for (t,w) in TAGWORD.findall(sent)]
69 |         if simplify_tags:
70 |             tagged_sent = [(w, self._tag_mapping_function(t))
71 |                            for (w,t) in tagged_sent]
72 |         return tagged_sent
73 | 
74 |     def _word(self, sent):
75 |         return WORD.findall(sent)
76 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/string_category.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: String Category Corpus Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@ldc.upenn.edu>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | """
10 | Read tuples from a corpus consisting of categorized strings.
11 | For example, from the question classification corpus:
12 | 
13 | NUM:dist How far is it from Denver to Aspen ?
14 | LOC:city What county is Modesto , California in ?
15 | HUM:desc Who was Galileo ?
16 | DESC:def What is an atom ?
17 | NUM:date When did Hawaii become a state ?
18 | """
19 | 
20 | # based on PPAttachmentCorpusReader
21 | 
22 | import os
23 | 
24 | from util import *
25 | from api import *
26 | 
27 | # [xx] Should the order of the tuple be reversed -- in most other places
28 | # in nltk, we use the form (data, tag) -- e.g., tagged words and
29 | # labeled texts for classifiers.
30 | class StringCategoryCorpusReader(CorpusReader):
31 |     def __init__(self, root, fileids, delimiter=' ', encoding=None):
32 |         """
33 |         :param root: The root directory for this corpus.
34 |         :param fileids: A list or regexp specifying the fileids in this corpus.
35 |         :param delimiter: Field delimiter
36 |         """
37 |         CorpusReader.__init__(self, root, fileids, encoding)
38 |         self._delimiter = delimiter
39 | 
40 |     def tuples(self, fileids=None):
41 |         if fileids is None: fileids = self._fileids
42 |         elif isinstance(fileids, basestring): fileids = [fileids]
43 |         return concat([StreamBackedCorpusView(fileid, self._read_tuple_block,
44 |                                               encoding=enc)
45 |                        for (fileid, enc) in self.abspaths(fileids, True)])
46 | 
47 |     def raw(self, fileids=None):
48 |         """
49 |         :return: the text contents of the given fileids, as a single string.
50 |         """
51 |         if fileids is None: fileids = self._fileids
52 |         elif isinstance(fileids, basestring): fileids = [fileids]
53 |         return concat([self.open(f).read() for f in fileids])
54 | 
55 |     def _read_tuple_block(self, stream):
56 |         line = stream.readline().strip()
57 |         if line:
58 |             return [tuple(line.split(self._delimiter, 1))]
59 |         else:
60 |             return []
61 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/toolbox.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Toolbox Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Greg Aumann <greg_aumann@sil.org>
 5 | #         Stuart Robinson <Stuart.Robinson@mpi.nl>
 6 | #         Steven Bird <sb@csse.unimelb.edu.au>
 7 | # URL: <http://www.nltk.org/>
 8 | # For license information, see LICENSE.TXT
 9 | 
10 | """
11 | Module for reading, writing and manipulating
12 | Toolbox databases and settings fileids.
13 | """
14 | 
15 | import os
16 | import re
17 | import codecs
18 | 
19 | from nltk.toolbox import ToolboxData
20 | 
21 | from util import *
22 | from api import *
23 | 
24 | class ToolboxCorpusReader(CorpusReader):
25 |     def xml(self, fileids, key=None):
26 |         return concat([ToolboxData(path, enc).parse(key)
27 |                        for (path, enc) in self.abspaths(fileids, True)])
28 | 
29 |     def fields(self, fileids, strip=True, unwrap=True, encoding=None,
30 |                errors='strict', unicode_fields=None):
31 |         return concat([list(ToolboxData(fileid,enc).fields(
32 |                              strip, unwrap, encoding, errors, unicode_fields))
33 |                        for (fileid, enc)
34 |                        in self.abspaths(fileids, include_encoding=True)])
35 | 
36 |     # should probably be done lazily:
37 |     def entries(self, fileids, **kwargs):
38 |         if 'key' in kwargs:
39 |             key = kwargs['key']
40 |             del kwargs['key']
41 |         else:
42 |             key = 'lx'  # the default key in MDF
43 |         entries = []
44 |         for marker, contents in self.fields(fileids, **kwargs):
45 |             if marker == key:
46 |                 entries.append((contents, []))
47 |             else:
48 |                 try:
49 |                     entries[-1][-1].append((marker, contents))
50 |                 except IndexError:
51 |                     pass
52 |         return entries
53 | 
54 |     def words(self, fileids, key='lx'):
55 |         return [contents for marker, contents in self.fields(fileids) if marker == key]
56 | 
57 |     def raw(self, fileids):
58 |         if fileids is None: fileids = self._fileids
59 |         elif isinstance(fileids, basestring): fileids = [fileids]
60 |         return concat([self.open(f).read() for f in fileids])
61 | 
62 | 
63 | def demo():
64 |     pass
65 | 
66 | if __name__ == '__main__':
67 |     demo()
68 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/wordlist.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Word List Corpus Reader
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@ldc.upenn.edu>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | from nltk.tokenize import line_tokenize
10 | 
11 | from util import *
12 | from api import *
13 | 
14 | class WordListCorpusReader(CorpusReader):
15 |     """
16 |     List of words, one per line.  Blank lines are ignored.
17 |     """
18 |     def words(self, fileids=None):
19 |         return line_tokenize(self.raw(fileids))
20 | 
21 |     def raw(self, fileids=None):
22 |         if fileids is None: fileids = self._fileids
23 |         elif isinstance(fileids, basestring): fileids = [fileids]
24 |         return concat([self.open(f).read() for f in fileids])
25 | 
26 | 
27 | class SwadeshCorpusReader(WordListCorpusReader):
28 |     def entries(self, fileids=None):
29 |         """
30 |         :return: a tuple of words for the specified fileids.
31 |         """
32 |         if not fileids:
33 |             fileids = self.fileids()
34 | 
35 |         wordlists = [self.words(f) for f in fileids]
36 |         return zip(*wordlists)
37 | 


--------------------------------------------------------------------------------
/resources/nltk/corpus/reader/ycoe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/corpus/reader/ycoe.py


--------------------------------------------------------------------------------
/resources/nltk/corpus/util.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Corpus Reader Utility Functions
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | ######################################################################
 9 | #{ Lazy Corpus Loader
10 | ######################################################################
11 | 
12 | import re
13 | import nltk
14 | 
15 | TRY_ZIPFILE_FIRST = False
16 | 
17 | class LazyCorpusLoader(object):
18 |     """
19 |     A proxy object which is used to stand in for a corpus object
20 |     before the corpus is loaded.  This allows NLTK to create an object
21 |     for each corpus, but defer the costs associated with loading those
22 |     corpora until the first time that they're actually accessed.
23 | 
24 |     The first time this object is accessed in any way, it will load
25 |     the corresponding corpus, and transform itself into that corpus
26 |     (by modifying its own ``__class__`` and ``__dict__`` attributes).
27 | 
28 |     If the corpus can not be found, then accessing this object will
29 |     raise an exception, displaying installation instructions for the
30 |     NLTK data package.  Once they've properly installed the data
31 |     package (or modified ``nltk.data.path`` to point to its location),
32 |     they can then use the corpus object without restarting python.
33 |     """
34 |     def __init__(self, name, reader_cls, *args, **kwargs):
35 |         from nltk.corpus.reader.api import CorpusReader
36 |         assert issubclass(reader_cls, CorpusReader)
37 |         self.__name = self.__name__ = name
38 |         self.__reader_cls = reader_cls
39 |         self.__args = args
40 |         self.__kwargs = kwargs
41 | 
42 |     def __load(self):
43 |         # Find the corpus root directory.
44 |         zip_name = re.sub(r'(([^/]*)(/.*)?)', r'\2.zip/\1/', self.__name)
45 |         if TRY_ZIPFILE_FIRST:
46 |             try:
47 |                 root = nltk.data.find('corpora/%s' % zip_name)
48 |             except LookupError:
49 |                 raise
50 |                 root = nltk.data.find('corpora/%s' % self.__name)
51 |         else:
52 |             try:
53 |                 root = nltk.data.find('corpora/%s' % self.__name)
54 |             except LookupError, e:
55 |                 try: root = nltk.data.find('corpora/%s' % zip_name)
56 |                 except LookupError: raise e
57 | 
58 |         # Load the corpus.
59 |         corpus = self.__reader_cls(root, *self.__args, **self.__kwargs)
60 | 
61 |         # This is where the magic happens!  Transform ourselves into
62 |         # the corpus by modifying our own __dict__ and __class__ to
63 |         # match that of the corpus.
64 |         self.__dict__ = corpus.__dict__
65 |         self.__class__ = corpus.__class__
66 | 
67 |     def __getattr__(self, attr):
68 |         self.__load()
69 |         # This looks circular, but its not, since __load() changes our
70 |         # __class__ to something new:
71 |         return getattr(self, attr)
72 | 
73 |     def __repr__(self):
74 |         return '<%s in %r (not loaded yet)>' % (
75 |             self.__reader_cls.__name__, '.../corpora/'+self.__name)
76 | 


--------------------------------------------------------------------------------
/resources/nltk/draw/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: graphical representations package
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | # Import Tkinter-based modules if Tkinter is installed
10 | try:
11 |     import Tkinter
12 | except ImportError:
13 |     import warnings
14 |     warnings.warn("nltk.draw package not loaded "
15 |                   "(please install Tkinter library).")
16 | else:
17 |     from cfg import ProductionList, CFGEditor, CFGDemo
18 |     from tree import (TreeSegmentWidget, tree_to_treesegment,
19 |                       TreeWidget, TreeView, draw_trees)
20 |     from dispersion import dispersion_plot
21 |     from table import Table
22 | 


--------------------------------------------------------------------------------
/resources/nltk/draw/dispersion.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Dispersion Plots
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | A utility for displaying lexical dispersion.
10 | """
11 | 
12 | def dispersion_plot(text, words, ignore_case=False):
13 |     """
14 |     Generate a lexical dispersion plot.
15 | 
16 |     :param text: The source text
17 |     :type text: list(str) or enum(str)
18 |     :param words: The target words
19 |     :type words: list of str
20 |     :param ignore_case: flag to set if case should be ignored when searching text
21 |     :type ignore_case: bool
22 |     """
23 | 
24 |     try:
25 |         import pylab
26 |     except ImportError:
27 |         raise ValueError('The plot function requires the matplotlib package (aka pylab).'
28 |                      'See http://matplotlib.sourceforge.net/')
29 | 
30 |     text = list(text)
31 |     words.reverse()
32 | 
33 |     if ignore_case:
34 |         words_to_comp = map(str.lower, words)
35 |         text_to_comp = map(str.lower, text)
36 |     else:
37 |         words_to_comp = words
38 |         text_to_comp = text
39 | 
40 |     points = [(x,y) for x in range(len(text_to_comp))
41 |                     for y in range(len(words_to_comp))
42 |                     if text_to_comp[x] == words_to_comp[y]]
43 |     if points:
44 |         x, y = zip(*points)
45 |     else:
46 |         x = y = ()
47 |     pylab.plot(x, y, "b|", scalex=.1)
48 |     pylab.yticks(range(len(words)), words, color="b")
49 |     pylab.ylim(-1, len(words))
50 |     pylab.title("Lexical Dispersion Plot")
51 |     pylab.xlabel("Word Offset")
52 |     pylab.show()
53 | 
54 | if __name__ == '__main__':
55 |     from nltk.corpus import gutenberg
56 |     words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
57 |     dispersion_plot(gutenberg.words('austen-sense.txt'), words)
58 | 


--------------------------------------------------------------------------------
/resources/nltk/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/examples/__init__.py


--------------------------------------------------------------------------------
/resources/nltk/examples/pt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/examples/pt.py


--------------------------------------------------------------------------------
/resources/nltk/help.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit (NLTK) Help
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Authors: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | Provide structured access to documentation.
10 | """
11 | 
12 | import re
13 | from textwrap import wrap
14 | 
15 | from nltk.data import load
16 | 
17 | def brown_tagset(tagpattern=None):
18 |     _format_tagset("brown_tagset", tagpattern)
19 | 
20 | def claws5_tagset(tagpattern=None):
21 |     _format_tagset("claws5_tagset", tagpattern)
22 | 
23 | def upenn_tagset(tagpattern=None):
24 |     _format_tagset("upenn_tagset", tagpattern)
25 | 
26 | #####################################################################
27 | # UTILITIES
28 | #####################################################################
29 | 
30 | def _print_entries(tags, tagdict):
31 |     for tag in tags:
32 |         entry = tagdict[tag]
33 |         defn = [tag + ": " + entry[0]]
34 |         examples = wrap(entry[1], width=75, initial_indent='    ', subsequent_indent='    ')
35 |         print "\n".join(defn + examples)
36 | 
37 | def _format_tagset(tagset, tagpattern=None):
38 |     tagdict = load("help/tagsets/" + tagset + ".pickle")
39 |     if not tagpattern:
40 |         _print_entries(sorted(tagdict), tagdict)
41 |     elif tagpattern in tagdict:
42 |         _print_entries([tagpattern], tagdict)
43 |     else:
44 |         tagpattern = re.compile(tagpattern)
45 |         tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
46 |         if tags:
47 |             _print_entries(tags, tagdict)
48 |         else:
49 |             print "No matching tags found."
50 | 
51 | if __name__ == '__main__':
52 |     brown_tagset(r'NN.*')
53 |     upenn_tagset(r'.*\$')
54 |     claws5_tagset('UNDEFINED')
55 |     brown_tagset(r'NN')
56 | 


--------------------------------------------------------------------------------
/resources/nltk/inference/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Inference
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Dan Garrette <dhgarrette@gmail.com>
 5 | #         Ewan Klein <ewan@inf.ed.ac.uk>
 6 | #
 7 | # URL: <http://www.nltk.org/>
 8 | # For license information, see LICENSE.TXT
 9 | 
10 | """
11 | Classes and interfaces for theorem proving and model building.
12 | """
13 | 
14 | from api import ParallelProverBuilder, ParallelProverBuilderCommand
15 | from mace import Mace, MaceCommand
16 | from prover9 import Prover9, Prover9Command
17 | from resolution import ResolutionProver, ResolutionProverCommand
18 | from tableau import TableauProver, TableauProverCommand
19 | from discourse import (ReadingCommand, CfgReadingCommand,
20 |                        DrtGlueReadingCommand, DiscourseTester)
21 | 


--------------------------------------------------------------------------------
/resources/nltk/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Metrics
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | #
 9 | 
10 | """
11 | NLTK Metrics
12 | 
13 | Classes and methods for scoring processing modules.
14 | """
15 | 
16 | from nltk.metrics.scores import          (accuracy, precision, recall, f_measure,
17 |                                           log_likelihood, approxrand)
18 | from nltk.metrics.confusionmatrix import ConfusionMatrix
19 | from nltk.metrics.distance        import (edit_distance, binary_distance,
20 |                                           jaccard_distance, masi_distance,
21 |                                           interval_distance, custom_distance,
22 |                                           presence, fractional_presence)
23 | from nltk.metrics.segmentation    import windowdiff, ghd, pk
24 | from nltk.metrics.agreement       import AnnotationTask
25 | from nltk.metrics.association     import (NgramAssocMeasures, BigramAssocMeasures,
26 |                                           TrigramAssocMeasures, ContingencyMeasures)
27 | from nltk.metrics.spearman        import (spearman_correlation, ranks_from_sequence,
28 |                                           ranks_from_scores)
29 | 


--------------------------------------------------------------------------------
/resources/nltk/metrics/artstein_poesio_example.txt:
--------------------------------------------------------------------------------
  1 | a	1	stat
  2 | b	1	stat
  3 | a	2	stat
  4 | b	2	stat
  5 | a	3	stat
  6 | b	3	stat
  7 | a	4	stat
  8 | b	4	stat
  9 | a	5	stat
 10 | b	5	stat
 11 | a	6	stat
 12 | b	6	stat
 13 | a	7	stat
 14 | b	7	stat
 15 | a	8	stat
 16 | b	8	stat
 17 | a	9	stat
 18 | b	9	stat
 19 | a	10	stat
 20 | b	10	stat
 21 | a	11	stat
 22 | b	11	stat
 23 | a	12	stat
 24 | b	12	stat
 25 | a	13	stat
 26 | b	13	stat
 27 | a	14	stat
 28 | b	14	stat
 29 | a	15	stat
 30 | b	15	stat
 31 | a	16	stat
 32 | b	16	stat
 33 | a	17	stat
 34 | b	17	stat
 35 | a	18	stat
 36 | b	18	stat
 37 | a	19	stat
 38 | b	19	stat
 39 | a	20	stat
 40 | b	20	stat
 41 | a	21	stat
 42 | b	21	stat
 43 | a	22	stat
 44 | b	22	stat
 45 | a	23	stat
 46 | b	23	stat
 47 | a	24	stat
 48 | b	24	stat
 49 | a	25	stat
 50 | b	25	stat
 51 | a	26	stat
 52 | b	26	stat
 53 | a	27	stat
 54 | b	27	stat
 55 | a	28	stat
 56 | b	28	stat
 57 | a	29	stat
 58 | b	29	stat
 59 | a	30	stat
 60 | b	30	stat
 61 | a	31	stat
 62 | b	31	stat
 63 | a	32	stat
 64 | b	32	stat
 65 | a	33	stat
 66 | b	33	stat
 67 | a	34	stat
 68 | b	34	stat
 69 | a	35	stat
 70 | b	35	stat
 71 | a	36	stat
 72 | b	36	stat
 73 | a	37	stat
 74 | b	37	stat
 75 | a	38	stat
 76 | b	38	stat
 77 | a	39	stat
 78 | b	39	stat
 79 | a	40	stat
 80 | b	40	stat
 81 | a	41	stat
 82 | b	41	stat
 83 | a	42	stat
 84 | b	42	stat
 85 | a	43	stat
 86 | b	43	stat
 87 | a	44	stat
 88 | b	44	stat
 89 | a	45	stat
 90 | b	45	stat
 91 | a	46	stat
 92 | b	46	stat
 93 | a	47	ireq
 94 | b	47	stat
 95 | a	48	ireq
 96 | b	48	stat
 97 | a	49	ireq
 98 | b	49	stat
 99 | a	50	ireq
100 | b	50	stat
101 | a	51	ireq
102 | b	51	stat
103 | a	52	ireq
104 | b	52	stat
105 | a	53	ireq
106 | b	53	ireq
107 | a	54	ireq
108 | b	54	ireq
109 | a	55	ireq
110 | b	55	ireq
111 | a	56	ireq
112 | b	56	ireq
113 | a	57	ireq
114 | b	57	ireq
115 | a	58	ireq
116 | b	58	ireq
117 | a	59	ireq
118 | b	59	ireq
119 | a	60	ireq
120 | b	60	ireq
121 | a	61	ireq
122 | b	61	ireq
123 | a	62	ireq
124 | b	62	ireq
125 | a	63	ireq
126 | b	63	ireq
127 | a	64	ireq
128 | b	64	ireq
129 | a	65	ireq
130 | b	65	ireq
131 | a	66	ireq
132 | b	66	ireq
133 | a	67	ireq
134 | b	67	ireq
135 | a	68	ireq
136 | b	68	ireq
137 | a	69	ireq
138 | b	69	ireq
139 | a	70	ireq
140 | b	70	ireq
141 | a	71	ireq
142 | b	71	ireq
143 | a	72	ireq
144 | b	72	ireq
145 | a	73	ireq
146 | b	73	ireq
147 | a	74	ireq
148 | b	74	ireq
149 | a	75	ireq
150 | b	75	ireq
151 | a	76	ireq
152 | b	76	ireq
153 | a	77	ireq
154 | b	77	ireq
155 | a	78	ireq
156 | b	78	ireq
157 | a	79	ireq
158 | b	79	ireq
159 | a	80	ireq
160 | b	80	ireq
161 | a	81	ireq
162 | b	81	ireq
163 | a	82	ireq
164 | b	82	ireq
165 | a	83	ireq
166 | b	83	ireq
167 | a	84	ireq
168 | b	84	ireq
169 | a	85	ireq
170 | b	85	chck
171 | a	86	ireq
172 | b	86	chck
173 | a	87	ireq
174 | b	87	chck
175 | a	88	ireq
176 | b	88	chck
177 | a	89	ireq
178 | b	89	chck
179 | a	90	ireq
180 | b	90	chck
181 | a	91	chck
182 | b	91	chck
183 | a	92	chck
184 | b	92	chck
185 | a	93	chck
186 | b	93	chck
187 | a	94	chck
188 | b	94	chck
189 | a	95	chck
190 | b	95	chck
191 | a	96	chck
192 | b	96	chck
193 | a	97	chck
194 | b	97	chck
195 | a	98	chck
196 | b	98	chck
197 | a	99	chck
198 | b	99	chck
199 | a	100	chck
200 | b	100	chck
201 | 


--------------------------------------------------------------------------------
/resources/nltk/metrics/spearman.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Spearman Rank Correlation
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Joel Nothman <jnothman@student.usyd.edu.au>
 5 | # URL: <http://nltk.org>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | Tools for comparing ranked lists.
10 | """
11 | 
12 | def _rank_dists(ranks1, ranks2):
13 |     """Finds the difference between the values in ranks1 and ranks2 for keys
14 |     present in both dicts. If the arguments are not dicts, they are converted
15 |     from (key, rank) sequences.
16 |     """
17 |     ranks1 = dict(ranks1)
18 |     ranks2 = dict(ranks2)
19 |     for k, v1 in ranks1.iteritems():
20 |         try:
21 |             yield k, v1 - ranks2[k]
22 |         except KeyError:
23 |             pass
24 | 
25 | 
26 | def spearman_correlation(ranks1, ranks2):
27 |     """Returns the Spearman correlation coefficient for two rankings, which
28 |     should be dicts or sequences of (key, rank). The coefficient ranges from
29 |     -1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
30 |     calculated for keys in both rankings (for meaningful results, remove keys
31 |     present in only one list before ranking)."""
32 |     n = 0
33 |     res = 0
34 |     for k, d in _rank_dists(ranks1, ranks2):
35 |         res += d * d
36 |         n += 1
37 |     try:
38 |         return 1 - (6 * float(res) / (n * (n*n - 1)))
39 |     except ZeroDivisionError:
40 |         # Result is undefined if only one item is ranked
41 |         return 0.0
42 | 
43 | 
44 | def ranks_from_sequence(seq):
45 |     """Given a sequence, yields each element with an increasing rank, suitable
46 |     for use as an argument to ``spearman_correlation``.
47 |     """
48 |     return ((k, i) for i, k in enumerate(seq))
49 | 
50 | 
51 | def ranks_from_scores(scores, rank_gap=1e-15):
52 |     """Given a sequence of (key, score) tuples, yields each key with an
53 |     increasing rank, tying with previous key's rank if the difference between
54 |     their scores is less than rank_gap. Suitable for use as an argument to
55 |     ``spearman_correlation``.
56 |     """
57 |     prev_score = None
58 |     rank = 0
59 |     for i, (key, score) in enumerate(scores):
60 |         try:
61 |             if abs(score - prev_score) > rank_gap:
62 |                 rank = i
63 |         except TypeError:
64 |             pass
65 | 
66 |         yield key, rank
67 |         prev_score = score
68 | 
69 | 


--------------------------------------------------------------------------------
/resources/nltk/metrics/windowdiff.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Windowdiff
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | ##########################################################################
10 | # Windowdiff
11 | # Pevzner, L., and Hearst, M., A Critique and Improvement of
12 | #   an Evaluation Metric for Text Segmentation,
13 | # Computational Linguistics,, 28 (1), March 2002, pp. 19-36
14 | ##########################################################################
15 | 
16 | def windowdiff(seg1, seg2, k, boundary="1"):
17 |     """
18 |     Compute the windowdiff score for a pair of segmentations.  A segmentation is any sequence
19 |     over a vocabulary of two items (e.g. "0", "1"), where the specified boundary value is used
20 |     to mark the edge of a segmentation.
21 | 
22 |     >>> from nltk.metrics.windowdiff import windowdiff
23 |     >>> s1 = "00000010000000001000000"
24 |     >>> s2 = "00000001000000010000000"
25 |     >>> s3 = "00010000000000000001000"
26 |     >>> windowdiff(s1, s1, 3)
27 |     0
28 |     >>> windowdiff(s1, s2, 3)
29 |     4
30 |     >>> windowdiff(s2, s3, 3)
31 |     16
32 | 
33 |     :param seg1: a segmentation
34 |     :type seg1: str or list
35 |     :param seg2: a segmentation
36 |     :type seg2: str or list
37 |     :param k: window width
38 |     :type k: int
39 |     :param boundary: boundary value
40 |     :type boundary: str or int or bool
41 |     :rtype: int
42 |     """
43 | 
44 |     if len(seg1) != len(seg2):
45 |         raise ValueError, "Segmentations have unequal length"
46 |     wd = 0
47 |     for i in range(len(seg1) - k):
48 |         wd += abs(seg1[i:i+k+1].count(boundary) - seg2[i:i+k+1].count(boundary))
49 |     return wd
50 | 
51 | def demo():
52 |     s1 = "00000010000000001000000"
53 |     s2 = "00000001000000010000000"
54 |     s3 = "00010000000000000001000"
55 |     print "s1:", s1
56 |     print "s2:", s2
57 |     print "s3:", s3
58 | 
59 |     print "windowdiff(s1, s1, 3) = ", windowdiff(s1, s1, 3)
60 |     print "windowdiff(s1, s2, 3) = ", windowdiff(s1, s2, 3)
61 |     print "windowdiff(s2, s3, 3) = ", windowdiff(s2, s3, 3)
62 | 


--------------------------------------------------------------------------------
/resources/nltk/misc/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Miscellaneous modules
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | from chomsky import generate_chomsky
 9 | from wordfinder import word_finder
10 | from minimalset import MinimalSet
11 | from babelfish import babelize, babelize_shell
12 | 
13 | 


--------------------------------------------------------------------------------
/resources/nltk/misc/minimalset.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Minimal Sets
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | from collections import defaultdict
 9 | 
10 | class MinimalSet(object):
11 |     """
12 |     Find contexts where more than one possible target value can
13 |     appear.  E.g. if targets are word-initial letters, and contexts
14 |     are the remainders of words, then we would like to find cases like
15 |     "fat" vs "cat", and "training" vs "draining".  If targets are
16 |     parts-of-speech and contexts are words, then we would like to find
17 |     cases like wind (noun) 'air in rapid motion', vs wind (verb)
18 |     'coil, wrap'.
19 |     """
20 |     def __init__(self, parameters=None):
21 |         """
22 |         Create a new minimal set.
23 | 
24 |         :param parameters: The (context, target, display) tuples for the item
25 |         :type parameters: list(tuple(str, str, str))
26 |         """
27 |         self._targets = set()  # the contrastive information
28 |         self._contexts = set() # what we are controlling for
29 |         self._seen = defaultdict(set)  # to record what we have seen
30 |         self._displays = {}    # what we will display
31 | 
32 |         if parameters:
33 |             for context, target, display in parameters:
34 |                 self.add(context, target, display)
35 | 
36 |     def add(self, context, target, display):
37 |         """
38 |         Add a new item to the minimal set, having the specified
39 |         context, target, and display form.
40 | 
41 |         :param context: The context in which the item of interest appears
42 |         :type context: str
43 |         :param target: The item of interest
44 |         :type target: str
45 |         :param display: The information to be reported for each item
46 |         :type display: str
47 |         """
48 |         # Store the set of targets that occurred in this context
49 |         self._seen[context].add(target)
50 | 
51 |         # Keep track of which contexts and targets we have seen
52 |         self._contexts.add(context)
53 |         self._targets.add(target)
54 | 
55 |         # For a given context and target, store the display form
56 |         self._displays[(context, target)] = display
57 | 
58 |     def contexts(self, minimum=2):
59 |         """
60 |         Determine which contexts occurred with enough distinct targets.
61 | 
62 |         :param minimum: the minimum number of distinct target forms
63 |         :type minimum: int
64 |         :rtype list
65 |         """
66 |         return [c for c in self._contexts if len(self._seen[c]) >= minimum]
67 | 
68 |     def display(self, context, target, default=""):
69 |         if (context, target) in self._displays:
70 |             return self._displays[(context, target)]
71 |         else:
72 |             return default
73 | 
74 |     def display_all(self, context):
75 |         result = []
76 |         for target in self._targets:
77 |             x = self.display(context, target)
78 |             if x: result.append(x)
79 |         return result
80 | 
81 |     def targets(self):
82 |         return self._targets
83 | 
84 | 


--------------------------------------------------------------------------------
/resources/nltk/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Natural Language Toolkit: Language Models
2 | #
3 | # Copyright (C) 2001-2012 NLTK Project
4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
5 | # URL: <http://www.nltk.org/>
6 | # For license information, see LICENSE.TXT
7 | 
8 | from ngram import NgramModel
9 | 


--------------------------------------------------------------------------------
/resources/nltk/model/api.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: API for Language Models
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | 
 9 | # should this be a subclass of ConditionalProbDistI?
10 | 
11 | class ModelI(object):
12 |     """
13 |     A processing interface for assigning a probability to the next word.
14 |     """
15 | 
16 |     def __init__(self):
17 |         '''Create a new language model.'''
18 |         raise NotImplementedError()
19 | 
20 |     def prob(self, word, context):
21 |         '''Evaluate the probability of this word in this context.'''
22 |         raise NotImplementedError()
23 | 
24 |     def logprob(self, word, context):
25 |         '''Evaluate the (negative) log probability of this word in this context.'''
26 |         raise NotImplementedError()
27 | 
28 |     def choose_random_word(self, context):
29 |         '''Randomly select a word that is likely to appear in this context.'''
30 |         raise NotImplementedError()
31 | 
32 |     def generate(self, n):
33 |         '''Generate n words of text from the language model.'''
34 |         raise NotImplementedError()
35 | 
36 |     def entropy(self, text):
37 |         '''Evaluate the total entropy of a message with respect to the model.
38 |         This is the sum of the log probability of each word in the message.'''
39 |         raise NotImplementedError()
40 | 
41 | 


--------------------------------------------------------------------------------
/resources/nltk/nltk.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/nltk.jar


--------------------------------------------------------------------------------
/resources/nltk/parse/broker_test.cfg:
--------------------------------------------------------------------------------
 1 | %start S
 2 | 
 3 | S[sem=<app(?vp, ?subj)>] -> NP[sem=?subj] VP[sem=?vp]
 4 | VP[sem = <app(?v, ?obj)>] -> V[sem = ?v] NP[sem=?obj]
 5 | VP[sem = ?v] -> V[sem = ?v]
 6 | NP[sem = <kim>] -> 'Kim'
 7 | NP[sem = <i>] -> 'I'
 8 | V[sem = <\x y.(like x y)>, tns=pres] -> 'like'
 9 | V[sem = <\x.(sleeps x)>, tns=pres] -> 'sleeps'
10 | 
11 | 


--------------------------------------------------------------------------------
/resources/nltk/parse/generate.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Generating from a CFG
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | #
 8 | 
 9 | from nltk.grammar import Nonterminal, parse_cfg
10 | 
11 | def generate(grammar, start=None):
12 |     if not start:
13 |         start = grammar.start()
14 |     return _generate_all(grammar, [start])[0]
15 | 
16 | def _generate_all(grammar, items):
17 |     frags = []
18 |     if len(items) == 1:
19 |         if isinstance(items[0], Nonterminal):
20 |             for prod in grammar.productions(lhs=items[0]):
21 |                 frags.append(_generate_all(grammar, prod.rhs()))
22 |         else:
23 |             frags.append(items[0])
24 |     else:
25 |         for frag1 in _generate_all(grammar, [items[0]]):
26 |             for frag2 in _generate_all(grammar, items[1:]):
27 |                 for frag in _multiply(frag1, frag2):
28 |                     frags.append(frag)
29 |     return frags
30 | 
31 | def _multiply(frag1, frag2):
32 |     frags = []
33 |     if len(frag1) == 1:
34 |         frag1 = [frag1]
35 |     if len(frag2) == 1:
36 |         frag2 = [frag2]
37 |     for f1 in frag1:
38 |         for f2 in frag2:
39 |             frags.append(f1+f2)
40 |     return frags
41 | 
42 | grammar = parse_cfg("""
43 |   S -> NP VP
44 |   NP -> Det N
45 |   VP -> V NP
46 |   Det -> 'the'
47 |   Det -> 'a'
48 |   N -> 'man' | 'park' | 'dog' | 'telescope'
49 |   V -> 'saw' | 'walked'
50 |   P -> 'in' | 'with'
51 | """)
52 | 
53 | for sent in generate(grammar):
54 |     print ' '.join(sent)
55 | 
56 | 


--------------------------------------------------------------------------------
/resources/nltk/parse/generate2.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Generating from a CFG
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | #
 8 | 
 9 | from nltk.grammar import Nonterminal, parse_cfg
10 | 
11 | def all_combsi(lol):
12 |     lens = map(lambda x: len(x), lol)
13 |     num_combs = reduce(lambda x, y: x*y, lens, 1)
14 |     for i in xrange(num_combs):
15 |         tmp = [0]*len(lol)
16 |         for j in xrange(len(tmp)):
17 |             tmp[j] = lol[j][i % lens[j]]
18 |             i = i / lens[j]
19 |         yield tmp
20 | 
21 | def expand_nonterm(symbol, grammar):
22 |     if isinstance(symbol, Nonterminal):
23 |         return map(lambda prod: list(prod.rhs()), grammar.productions(lhs=symbol))
24 |     else:
25 |         return symbol
26 | 
27 | def tree_traverse(root, get_children, isleaf, maxdepth):
28 |     if isleaf(root):
29 |         yield root
30 |     elif maxdepth > 0:
31 |         for child in get_children(root):
32 |             for x in tree_traverse(child, get_children, isleaf, maxdepth - 1):
33 |                 yield x
34 | 
35 | def flatten(lst):
36 |     val = []
37 |     for x in lst:
38 |         if isinstance(x, list):
39 |             val = val + x
40 |         else:
41 |             val.append(x)
42 |     return val
43 | 
44 | def generate(grammar, start=None, depth=10):
45 |     def is_terminal(lofs):
46 |         tmp = map(lambda x: not isinstance(x, Nonterminal), lofs)
47 |         return all(tmp)
48 | 
49 |     def get_children(l_of_symbols):
50 |         x = map(lambda x: expand_nonterm(x, grammar), l_of_symbols)
51 |         x = map(lambda x: x if isinstance(x, list) else [x], x)
52 |         for comb in all_combsi(x):
53 |             yield flatten(comb)
54 | 
55 |     if not start:
56 |         start = grammar.start()
57 |     return [x for x in tree_traverse([start], get_children, is_terminal, depth)]
58 | 
59 | def _generate_demo():
60 |     g = parse_cfg("""
61 |       S -> NP VP
62 |       NP -> Det N
63 |       VP -> V NP
64 |       Det -> 'the'
65 |       Det -> 'a'
66 |       N -> 'man' | 'park' | 'dog' | 'telescope'
67 |       V -> 'saw' | 'walked'
68 |       P -> 'in' | 'with'
69 |     """)
70 |     for s in generate(g):
71 |         print ' '.join(s)
72 | 
73 | if __name__ == "__main__":
74 |     _generate_demo()
75 | 


--------------------------------------------------------------------------------
/resources/nltk/parse/test.cfg:
--------------------------------------------------------------------------------
 1 | %start S
 2 | 
 3 | S[sem=<app(?vp, ?subj)>] -> NP[sem=?subj] VP[sem=?vp]
 4 | VP[sem = <app(?v, ?obj)>] -> V[sem = ?v] NP[sem=?obj]
 5 | VP[sem = ?v] -> V[sem = ?v]
 6 | NP[sem = <kim>] -> 'Kim'
 7 | NP[sem = <i>] -> 'I'
 8 | V[sem = <\x y.(like x y)>, tns=pres] -> 'like'
 9 | V[sem = <\x.(sleeps x)>, tns=pres] -> 'sleeps'
10 | 
11 | 


--------------------------------------------------------------------------------
/resources/nltk/sem/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Semantic Interpretation
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Ewan Klein <ewan@inf.ed.ac.uk>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | NLTK Semantic Interpretation Package
10 | 
11 | This package contains classes for representing semantic structure in
12 | formulas of first-order logic and for evaluating such formulas in
13 | set-theoretic models.
14 | 
15 |     >>> from nltk.sem import logic
16 |     >>> logic._counter._value = 0
17 | 
18 | The package has two main components:
19 | 
20 |  - ``logic`` provides a parser for analyzing expressions of First
21 |    Order Logic (FOL).
22 |  - ``evaluate`` allows users to recursively determine truth in a
23 |    model for formulas of FOL.
24 | 
25 | A model consists of a domain of discourse and a valuation function,
26 | which assigns values to non-logical constants. We assume that entities
27 | in the domain are represented as strings such as ``'b1'``, ``'g1'``,
28 | etc. A ``Valuation`` is initialized with a list of (symbol, value)
29 | pairs, where values are entities, sets of entities or sets of tuples
30 | of entities.
31 | The domain of discourse can be inferred from the valuation, and model
32 | is then created with domain and valuation as parameters.
33 | 
34 |     >>> from nltk.sem import Valuation, Model
35 |     >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
36 |     ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
37 |     ... ('dog', set(['d1'])),
38 |     ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
39 |     >>> val = Valuation(v)
40 |     >>> dom = val.domain
41 |     >>> m = Model(dom, val)
42 | """
43 | 
44 | from nltk.sem.util import (batch_parse, batch_interpret, batch_evaluate,
45 |                            root_semrep, parse_valuation)
46 | from nltk.sem.evaluate import (Valuation, Assignment, Model, Undefined,
47 |                                is_rel, set2rel, arity)
48 | from nltk.sem.logic import (LogicParser, boolean_ops, binding_ops,
49 |                             equality_preds, parse_logic)
50 | from nltk.sem.skolemize import skolemize
51 | from nltk.sem.lfg import FStructure
52 | from nltk.sem.relextract import extract_rels
53 | from nltk.sem.boxer import Boxer
54 | from nltk.sem.drt import DrtParser, DRS
55 | from nltk.sem.linearlogic import LinearLogicParser
56 | 
57 | # from nltk.sem.glue import Glue
58 | # from nltk.sem.hole import HoleSemantics
59 | # from nltk.sem.cooper_storage import CooperStore
60 | 
61 | # don't import chat80 as its names are too generic
62 | 


--------------------------------------------------------------------------------
/resources/nltk/stem/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Stemmers
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | #         Steven Bird <sb@csse.unimelb.edu.au>
 7 | # URL: <http://www.nltk.org/>
 8 | # For license information, see LICENSE.TXT
 9 | 
10 | """
11 | NLTK Stemmers
12 | 
13 | Interfaces used to remove morphological affixes from words, leaving
14 | only the word stem.  Stemming algorithms aim to remove those affixes
15 | required for eg. grammatical role, tense, derivational morphology
16 | leaving only the stem of the word.  This is a difficult problem due to
17 | irregular words (eg. common verbs in English), complicated
18 | morphological rules, and part-of-speech and sense ambiguities
19 | (eg. ``ceil-`` is not the stem of ``ceiling``).
20 | 
21 | StemmerI defines a standard interface for stemmers.
22 | """
23 | 
24 | from nltk.stem.api import StemmerI
25 | from nltk.stem.regexp import RegexpStemmer
26 | from nltk.stem.lancaster import LancasterStemmer
27 | from nltk.stem.isri import ISRIStemmer
28 | from nltk.stem.porter import PorterStemmer
29 | from nltk.stem.snowball import SnowballStemmer
30 | from nltk.stem.wordnet import WordNetLemmatizer
31 | from nltk.stem.rslp import RSLPStemmer
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     import doctest
36 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
37 | 


--------------------------------------------------------------------------------
/resources/nltk/stem/api.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Stemmer Interface
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | #         Steven Bird <sb@csse.unimelb.edu.au>
 7 | # URL: <http://www.nltk.org/>
 8 | # For license information, see LICENSE.TXT
 9 | 
10 | class StemmerI(object):
11 |     """
12 |     A processing interface for removing morphological affixes from
13 |     words.  This process is known as stemming.
14 | 
15 |     """
16 |     def stem(self, token):
17 |         """
18 |         Strip affixes from the token and return the stem.
19 | 
20 |         :param token: The token that should be stemmed.
21 |         :type token: str
22 |         """
23 |         raise NotImplementedError()
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     import doctest
28 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
29 | 


--------------------------------------------------------------------------------
/resources/nltk/stem/regexp.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Stemmers
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | #         Steven Bird <sb@csse.unimelb.edu.au>
 7 | # URL: <http://www.nltk.org/>
 8 | # For license information, see LICENSE.TXT
 9 | 
10 | import re
11 | 
12 | from api import StemmerI
13 | 
14 | class RegexpStemmer(StemmerI):
15 |     """
16 |     A stemmer that uses regular expressions to identify morphological
17 |     affixes.  Any substrings that match the regular expressions will
18 |     be removed.
19 | 
20 |         >>> from nltk.stem import RegexpStemmer
21 |         >>> st = RegexpStemmer('ing$|s$|e$', min=4)
22 |         >>> st.stem('cars')
23 |         'car'
24 |         >>> st.stem('mass')
25 |         'mas'
26 |         >>> st.stem('was')
27 |         'was'
28 |         >>> st.stem('bee')
29 |         'bee'
30 |         >>> st.stem('compute')
31 |         'comput'
32 | 
33 |     :type regexp: str or regexp
34 |     :param regexp: The regular expression that should be used to
35 |         identify morphological affixes.
36 |     :type min: int
37 |     :param min: The minimum length of string to stem
38 |     """
39 |     def __init__(self, regexp, min=0):
40 | 
41 |         if not hasattr(regexp, 'pattern'):
42 |             regexp = re.compile(regexp)
43 |         self._regexp = regexp
44 |         self._min = min
45 | 
46 |     def stem(self, word):
47 |         if len(word) < self._min:
48 |             return word
49 |         else:
50 |             return self._regexp.sub('', word)
51 | 
52 |     def __repr__(self):
53 |         return '<RegexpStemmer: %r>' % self._regexp.pattern
54 | 
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     import doctest
59 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
60 | 
61 | 


--------------------------------------------------------------------------------
/resources/nltk/stem/wordnet.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: WordNet stemmer interface
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 5 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | from nltk.corpus.reader.wordnet import NOUN
10 | from nltk.corpus import wordnet
11 | 
12 | class WordNetLemmatizer(object):
13 |     """
14 |     WordNet Lemmatizer
15 | 
16 |     Lemmatize using WordNet's built-in morphy function.
17 |     Returns the input word unchanged if it cannot be found in WordNet.
18 | 
19 |         >>> from nltk.stem import WordNetLemmatizer
20 |         >>> wnl = WordNetLemmatizer()
21 |         >>> wnl.lemmatize('dogs')
22 |         'dog'
23 |         >>> wnl.lemmatize('churches')
24 |         'church'
25 |         >>> wnl.lemmatize('aardwolves')
26 |         'aardwolf'
27 |         >>> wnl.lemmatize('abaci')
28 |         'abacus'
29 |         >>> wnl.lemmatize('hardrock')
30 |         'hardrock'
31 |     """
32 | 
33 |     def __init__(self):
34 |         pass
35 | 
36 |     def lemmatize(self, word, pos=NOUN):
37 |         lemmas = wordnet._morphy(word, pos)
38 |         return min(lemmas, key=len) if lemmas else word
39 | 
40 |     def __repr__(self):
41 |         return '<WordNetLemmatizer>'
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     import doctest
46 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
47 | 


--------------------------------------------------------------------------------
/resources/nltk/tag/util.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Tagger Utilities
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | def str2tuple(s, sep='/'):
10 |     """
11 |     Given the string representation of a tagged token, return the
12 |     corresponding tuple representation.  The rightmost occurrence of
13 |     *sep* in *s* will be used to divide *s* into a word string and
14 |     a tag string.  If *sep* does not occur in *s*, return (s, None).
15 | 
16 |         >>> from nltk.tag.util import str2tuple
17 |         >>> str2tuple('fly/NN')
18 |         ('fly', 'NN')
19 | 
20 |     :type s: str
21 |     :param s: The string representation of a tagged token.
22 |     :type sep: str
23 |     :param sep: The separator string used to separate word strings
24 |         from tags.
25 |     """
26 |     loc = s.rfind(sep)
27 |     if loc >= 0:
28 |         return (s[:loc], s[loc+len(sep):].upper())
29 |     else:
30 |         return (s, None)
31 | 
32 | def tuple2str(tagged_token, sep='/'):
33 |     """
34 |     Given the tuple representation of a tagged token, return the
35 |     corresponding string representation.  This representation is
36 |     formed by concatenating the token's word string, followed by the
37 |     separator, followed by the token's tag.  (If the tag is None,
38 |     then just return the bare word string.)
39 | 
40 |         >>> from nltk.tag.util import tuple2str
41 |         >>> tagged_token = ('fly', 'NN')
42 |         >>> tuple2str(tagged_token)
43 |         'fly/NN'
44 | 
45 |     :type tagged_token: tuple(str, str)
46 |     :param tagged_token: The tuple representation of a tagged token.
47 |     :type sep: str
48 |     :param sep: The separator string used to separate word strings
49 |         from tags.
50 |     """
51 |     word, tag = tagged_token
52 |     if tag is None:
53 |         return word
54 |     else:
55 |         assert sep not in tag, 'tag may not contain sep!'
56 |         return '%s%s%s' % (word, sep, tag)
57 | 
58 | def untag(tagged_sentence):
59 |     """
60 |     Given a tagged sentence, return an untagged version of that
61 |     sentence.  I.e., return a list containing the first element
62 |     of each tuple in *tagged_sentence*.
63 | 
64 |         >>> from nltk.tag.util import untag
65 |         >>> untag([('John', 'NNP'), ('saw', 'VBD'), ('Mary', 'NNP')])
66 |         ['John', 'saw', 'Mary']
67 | 
68 |     """
69 |     return [w for (w, t) in tagged_sentence]
70 | 
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     import doctest
75 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
76 | 


--------------------------------------------------------------------------------
/resources/nltk/test/Makefile:
--------------------------------------------------------------------------------
 1 | .SUFFIXES: .doctest .errs
 2 | 
 3 | TESTS = $(wildcard *.doctest)
 4 | 
 5 | ERRS := $(TESTS:.doctest=.errs)
 6 | 
 7 | .doctest.errs:
 8 | 	python ./doctest_driver.py $< > $@
 9 | 
10 | all: $(ERRS)
11 | 
12 | clean:
13 | 	rm -f *.errs
14 | 


--------------------------------------------------------------------------------
/resources/nltk/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Unit Tests
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | """
 9 | Unit tests for the NLTK modules.  These tests are intended to ensure
10 | that changes that we make to NLTK's code don't accidentally introduce
11 | bugs.
12 | 
13 | Use doctest_driver.py to run the tests::
14 | 
15 |   doctest_driver.py --help
16 | 
17 | NB. Popular options for NLTK documentation are::
18 | 
19 |   --ellipsis --normalize_whitespace
20 | 
21 | """
22 | 


--------------------------------------------------------------------------------
/resources/nltk/test/all.py:
--------------------------------------------------------------------------------
 1 | """Test suite that runs all NLTK tests.
 2 | 
 3 | This module, `nltk.test.all`, is named as the NLTK ``test_suite`` in the
 4 | project's ``setup-eggs.py`` file.  Here, we create a test suite that
 5 | runs all of our doctests, and return it for processing by the setuptools
 6 | test harness.
 7 | 
 8 | """
 9 | import doctest, unittest
10 | from glob import glob
11 | import os.path
12 | 
13 | def additional_tests():
14 |     #print "here-000000000000000"
15 |     #print "-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest'))
16 |     dir = os.path.dirname(__file__)
17 |     paths = glob(os.path.join(dir, '*.doctest'))
18 |     files = [ os.path.basename(path) for path in paths ]
19 |     return unittest.TestSuite(
20 |         [ doctest.DocFileSuite(file) for file in files ]
21 |         )
22 | #if os.path.split(path)[-1] != 'index.rst'
23 | # skips time-dependent doctest in index.rst
24 | 


--------------------------------------------------------------------------------
/resources/nltk/test/doctest_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | 
4 | def float_equal(a, b, eps=1e-8):
5 |     return abs(a-b) < eps
6 | 


--------------------------------------------------------------------------------
/resources/nltk/test/floresta.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/test/floresta.txt


--------------------------------------------------------------------------------
/resources/nltk/test/grammar.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | ===============
 5 | Grammar Parsing
 6 | ===============
 7 | 
 8 | Grammars can be parsed from strings:
 9 | 
10 |     >>> from nltk import parse_cfg
11 |     >>> grammar = parse_cfg("""
12 |     ... S -> NP VP
13 |     ... PP -> P NP
14 |     ... NP -> Det N | NP PP
15 |     ... VP -> V NP | VP PP
16 |     ... Det -> 'a' | 'the'
17 |     ... N -> 'dog' | 'cat'
18 |     ... V -> 'chased' | 'sat'
19 |     ... P -> 'on' | 'in'
20 |     ... """)
21 |     >>> grammar
22 |     <Grammar with 14 productions>
23 |     >>> grammar.start()
24 |     S
25 |     >>> grammar.productions() # doctest: +NORMALIZE_WHITESPACE
26 |     [S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP,
27 |     Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat',
28 |     P -> 'on', P -> 'in']
29 | 
30 | Probabilistic CFGs:
31 |    
32 |     >>> from nltk import parse_pcfg
33 |     >>> toy_pcfg1 = parse_pcfg("""
34 |     ... S -> NP VP [1.0]
35 |     ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
36 |     ... Det -> 'the' [0.8] | 'my' [0.2]
37 |     ... N -> 'man' [0.5] | 'telescope' [0.5]
38 |     ... VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
39 |     ... V -> 'ate' [0.35] | 'saw' [0.65]
40 |     ... PP -> P NP [1.0]
41 |     ... P -> 'with' [0.61] | 'under' [0.39]
42 |     ... """)
43 | 
44 | Chomsky Normal Form grammar (Test for bug 474)
45 | 
46 |     >>> g = parse_cfg("VP^<TOP> -> VBP NP^<VP-TOP>")
47 |     >>> g.productions()[0].lhs()
48 |     VP^<TOP>
49 | 


--------------------------------------------------------------------------------
/resources/nltk/test/japanese.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | ============================
 5 | Japanese Language Processing
 6 | ============================
 7 | 
 8 |     >>> from nltk import *
 9 | 
10 | -------------
11 | Corpus Access
12 | -------------
13 | 
14 | KNB Corpus
15 | ----------
16 | 
17 | Currently, the interface returns objects of the wrong type.
18 | 
19 |     >>> from nltk.corpus import knbc
20 | 
21 | Access the words: this should produce a list of strings:
22 | 
23 |     >>> type(knbc.words()[0])
24 |     <type 'str'>
25 | 
26 | Access the sentences: this should produce a list of lists of strings:
27 | 
28 |     >>> type(knbc.sents()[0][0])
29 |     <type 'str'>
30 | 
31 | Access the tagged words: this should produce a list of word, tag pairs:
32 | 
33 |     >>> type(knbc.tagged_words()[0])
34 |     <type 'tuple'>
35 | 
36 | Access the tagged sentences: this should produce a list of lists of word, tag pairs:
37 | 
38 |     >>> type(knbc.tagged_sents()[0][0])
39 |     <type 'tuple'>
40 | 
41 | 
42 | JEITA Corpus
43 | ------------
44 | 
45 |     >>> from nltk.corpus import jeita
46 | 
47 | Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string:
48 | 
49 |     >>> type(jeita.tagged_words()[0][1])
50 |     <type 'str'>
51 | 


--------------------------------------------------------------------------------
/resources/nltk/test/onto1.fol:
--------------------------------------------------------------------------------
1 | all x. ((boxer2 x) implies (dog x))
2 | all x. ((boxer1 x) implies (person x))
3 | all x. (not ((dog x) and (person x)))
4 | all x. (not ((kitchen x) and (garden x)))
5 | all x. ((kitchen x) implies (location x))
6 | all x. ((garden x) implies (location x))


--------------------------------------------------------------------------------
/resources/nltk/test/portuguese.doctest_latin1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/test/portuguese.doctest_latin1


--------------------------------------------------------------------------------
/resources/nltk/test/portuguese_en.doctest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/nltk/test/portuguese_en.doctest


--------------------------------------------------------------------------------
/resources/nltk/test/runtests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from __future__ import absolute_import
 4 | import sys
 5 | import os
 6 | import nose
 7 | 
 8 | NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 9 | sys.path.insert(0, NLTK_ROOT)
10 | 
11 | NLTK_TEST_DIR = os.path.join(NLTK_ROOT, 'nltk')
12 | 
13 | 
14 | # These tests are expected to fail.
15 | # NOTE: Remember to remove tests from this list after they have been fixed.
16 | FAILING_TESTS = [
17 |     "ccg.doctest", # This test randomly fails - nondeterministic output
18 |     "collocations.doctest",
19 |     "corpus.doctest",
20 |     "portuguese_en.doctest",
21 |     "probability.doctest",
22 |     "relextract.doctest",
23 | ]
24 | 
25 | # These tests require extra dependencies and should not run by default
26 | # TODO: Run the tests if the relevant dependeices are present on the system
27 | DEPENDENT_TESTS = [
28 | #    "classify.doctest",
29 |     "discourse.doctest",
30 |     "drt.doctest",
31 |     "gluesemantics.doctest",
32 |     "inference.doctest",
33 |     "nonmonotonic.doctest",
34 | ]
35 | 
36 | EXCLUDED_TESTS = FAILING_TESTS + DEPENDENT_TESTS
37 | _EXCLUDE_ARGV = ['--exclude='+test for test in EXCLUDED_TESTS]
38 | 
39 | if __name__ == '__main__':
40 |     from nltk.test.doctest_nose_plugin import DoctestFix
41 |     from nose.plugins.manager import PluginManager
42 |     from nose.plugins.doctests import Doctest
43 |     from nose.plugins import builtin
44 | 
45 |     class NltkPluginManager(PluginManager):
46 |         """
47 |         Nose plugin manager that replaces standard doctest plugin
48 |         with a patched version.
49 |         """
50 |         def loadPlugins(self):
51 |             for plug in builtin.plugins:
52 |                 if plug != Doctest:
53 |                     self.addPlugin(plug())
54 |             self.addPlugin(DoctestFix())
55 |             super(NltkPluginManager, self).loadPlugins()
56 | 
57 |     manager = NltkPluginManager()
58 |     manager.loadPlugins()
59 | 
60 |     # allow passing extra options and running individual tests
61 |     # Examples:
62 |     #
63 |     #    python runtests.py semantics.doctest
64 |     #    python runtests.py --with-id -v
65 |     #    python runtests.py --with-id -v nltk.featstruct
66 | 
67 |     args = sys.argv[1:]
68 |     if not args:
69 |         args = [NLTK_TEST_DIR]
70 | 
71 |     if all(arg.startswith('-') for arg in args):
72 |         # only extra options were passed
73 |         args += [NLTK_TEST_DIR]
74 | 
75 |     nose.main(argv=_EXCLUDE_ARGV + [
76 |             #'--with-xunit',
77 |             #'--xunit-file=$WORKSPACE/nosetests.xml',
78 |             '--with-doctest',
79 |             '--doctest-extension=.doctest',
80 |             '--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL',
81 |             #'--verbosity=3',
82 |         ] + args, plugins=manager.plugins)
83 | 


--------------------------------------------------------------------------------
/resources/nltk/test/segmentation.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | =========================
 5 | Text Segmentation Metrics
 6 | =========================
 7 | 
 8 | The `nltk.metrics.segmentation` module provides a variety of
 9 | *evaluation measures* which can be used for evaluating text
10 | segmentation methods
11 | 
12 | A segmentation is any sequence over a vocabulary of two items
13 | (e.g. "0", "1"), where the specified boundary value is used to
14 | mark the edge of a segmentation.
15 | 
16 |     >>> from nltk.metrics import windowdiff, ghd, pk
17 | 
18 | ----------
19 | Windowdiff
20 | ----------
21 | 
22 | Compute the windowdiff score for a pair of segmentations.
23 | 
24 |     >>> s1 = "00000010000000001000000"
25 |     >>> s2 = "00000001000000010000000"
26 |     >>> s3 = "00010000000000000001000"
27 |     >>> windowdiff(s1, s1, 3)
28 |     0
29 |     >>> windowdiff(s1, s2, 3)
30 |     4
31 |     >>> windowdiff(s2, s3, 3)
32 |     16
33 | 
34 | 
35 | ----------------------------
36 | Generalized Hamming Distance
37 | ----------------------------
38 | 
39 | Generalized Hamming Distance may be used as an evaluation metric for
40 | text segmentation. It compares two segmentations, and returns the cost
41 | of transforming one segmentation into the other.  The transformation
42 | is done though boundary insertions, deletions and shifts.  Each
43 | operation may have a different cost.
44 | 
45 |     >>> ghd('1100100000', '1100010000', 1.0, 1.0, 0.5)
46 |     0.5
47 |     >>> ghd('1100100000', '1100000001', 1.0, 1.0, 0.5)
48 |     2.0
49 |     >>> ghd('011', '110', 1.0, 1.0, 0.5)
50 |     1.0
51 |     >>> ghd('1', '0', 1.0, 1.0, 0.5)
52 |     1.0
53 |     >>> ghd('111', '000', 1.0, 1.0, 0.5)
54 |     3.0
55 |     >>> ghd('000', '111', 1.0, 2.0, 0.5)
56 |     6.0
57 | 
58 | 
59 | --------------
60 | Befferman's Pk
61 | --------------
62 | 
63 | Beeferman's Pk was proposed as an evaluation metric for text
64 | segmentation. It takes a reference segmentation as first argument, an
65 | hypothesis segmentation as second argument.  It returns the
66 | propability that randomly chosen pair of words a distance of k words
67 | is inconsistently classified.
68 | 
69 |     >>> print pk('1000100', '1000100', 3)
70 |     0.0
71 |     >>> print pk('100', '010', 2)
72 |     0.5
73 |     >>> print pk('100100', '111111', 2)
74 |     0.64
75 |     >>> print pk('100100', '000000', 2)
76 |     0.04
77 |     >>> print pk('100100', '111111', 3)
78 |     0.25
79 |     >>> print pk('100100', '000000', 3)
80 |     0.25
81 | 


--------------------------------------------------------------------------------
/resources/nltk/test/sem3.cfg:
--------------------------------------------------------------------------------
 1 | #######################################
 2 | # sem1.cfg
 3 | #######################################
 4 | # Minimal feature-based grammar with determiner semantics.
 5 | 
 6 | 
 7 | % start S
 8 | 
 9 | S[sem=?vp] -> NP[sem=?np] VP[subj=?np, sem=?vp]
10 | VP[sem=?v, subj=?np] -> IV[sem=?v, subj=?np]
11 | NP[sem=[index='k',name='kim']] -> 'Kim'
12 | IV[sem=[rel='bark', arg=?i], subj=[sem=[index=?i]]] -> 'barks'
13 | #IV[fsem=[rel='bark', arg=(1)[]], subj=[fsem=[index->(1)]]] -> 'barks'
14 | 
15 | 


--------------------------------------------------------------------------------
/resources/nltk/test/simple.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | =================
 5 | EasyInstall Tests
 6 | =================
 7 | 
 8 | This file contains some simple tests that will be run by EasyInstall in
 9 | order to test the installation when NLTK-Data is absent.
10 | 
11 |     >>> from nltk.test.doctest_utils import *
12 | 
13 | ------------
14 | Tokenization
15 | ------------
16 | 
17 |     >>> from nltk.tokenize import wordpunct_tokenize
18 |     >>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
19 |     ...      "two of them.\n\nThanks.")
20 |     >>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE
21 |     ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
22 |     'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
23 | 
24 | -------
25 | Metrics
26 | -------
27 | 
28 |     >>> from nltk.metrics import precision, recall, f_measure
29 |     >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
30 |     >>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
31 |     >>> reference_set = set(reference)
32 |     >>> test_set = set(test)
33 |     >>> precision(reference_set, test_set)
34 |     1.0
35 |     >>> float_equal(recall(reference_set, test_set), 0.8)
36 |     True
37 |     >>> float_equal(f_measure(reference_set, test_set), 0.88888888888888)
38 |     True
39 | 
40 | ------------------
41 | Feature Structures
42 | ------------------
43 | 
44 |     >>> from nltk import FeatStruct
45 |     >>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
46 |     >>> fs2 = FeatStruct(POS='N', AGR=fs1)
47 |     >>> print fs2
48 |     [       [ GND = 'fem' ] ]
49 |     [ AGR = [ NUM = 'pl'  ] ]
50 |     [       [ PER = 3     ] ]
51 |     [                       ]
52 |     [ POS = 'N'             ]
53 |     >>> print fs2['AGR']
54 |     [ GND = 'fem' ]
55 |     [ NUM = 'pl'  ]
56 |     [ PER = 3     ]
57 |     >>> print fs2['AGR']['PER']
58 |     3
59 | 
60 | -------
61 | Parsing
62 | -------
63 | 
64 |     >>> from nltk.parse.rd import RecursiveDescentParser, parse_cfg
65 |     >>> grammar = parse_cfg("""
66 |     ... S -> NP VP
67 |     ... PP -> P NP
68 |     ... NP -> 'the' N | N PP | 'the' N PP
69 |     ... VP -> V NP | V PP | V NP PP
70 |     ... N -> 'cat' | 'dog' | 'rug'
71 |     ... V -> 'chased'
72 |     ... P -> 'on'
73 |     ... """)
74 |     >>> rd = RecursiveDescentParser(grammar)
75 |     >>> sent = 'the cat chased the dog on the rug'.split()
76 |     >>> for t in rd.nbest_parse(sent):
77 |     ...     print t
78 |     (S
79 |       (NP the (N cat))
80 |       (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
81 |     (S
82 |       (NP the (N cat))
83 |       (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
84 | 
85 | 


--------------------------------------------------------------------------------
/resources/nltk/test/tag.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | Regression Tests
 5 | ~~~~~~~~~~~~~~~~
 6 | 
 7 | Sequential Taggers
 8 | ------------------
 9 | 
10 | Add tests for:
11 |   - make sure backoff is being done correctly.
12 |   - make sure ngram taggers don't use previous sentences for context.
13 |   - make sure ngram taggers see 'beginning of the sentence' as a
14 |     unique context
15 |   - make sure regexp tagger's regexps are tried in order
16 |   - train on some simple examples, & make sure that the size & the
17 |     generated models are correct.
18 |   - make sure cutoff works as intended
19 |   - make sure that ngram models only exclude contexts covered by the
20 |     backoff tagger if the backoff tagger gets that context correct at
21 |     *all* locations.
22 | 
23 | Brill Tagger
24 | ------------
25 |   - test that fast & normal trainers get identical results when
26 |     deterministic=True is used.
27 |   - check on some simple examples to make sure they're doing the
28 |     right thing.
29 | 
30 | Make sure that get_neighborhoods is implemented correctly -- in
31 | particular, given *index*, it should return the indices *i* such that
32 | applicable_rules(token, i, ...) depends on the value of the
33 | *index*\ th token.  There used to be a bug where this was swapped --
34 | i.e., it calculated the values of *i* such that
35 | applicable_rules(token, index, ...) depended on *i*.
36 | 
37 |     >>> from nltk.tag.brill import ProximateTokensTemplate, ProximateWordsRule
38 |     >>> t = ProximateTokensTemplate(ProximateWordsRule, (2,3))
39 |     >>> for i in range(10):
40 |     ...     print sorted(t.get_neighborhood('abcdefghijkl', i))
41 |     [0]
42 |     [1]
43 |     [0, 2]
44 |     [0, 1, 3]
45 |     [1, 2, 4]
46 |     [2, 3, 5]
47 |     [3, 4, 6]
48 |     [4, 5, 7]
49 |     [5, 6, 8]
50 |     [6, 7, 9]
51 | 
52 | 


--------------------------------------------------------------------------------
/resources/nltk/test/toy.cfg:
--------------------------------------------------------------------------------
 1 | S -> NP VP
 2 | PP -> P NP
 3 | NP -> Det N | NP PP
 4 | VP -> V NP | VP PP
 5 | Det -> 'a' | 'the'
 6 | N -> 'dog' | 'cat'
 7 | V -> 'chased' | 'sat'
 8 | P -> 'on' | 'in'
 9 | 
10 | 


--------------------------------------------------------------------------------
/resources/nltk/test/util.doctest:
--------------------------------------------------------------------------------
 1 | .. Copyright (C) 2001-2012 NLTK Project
 2 | .. For license information, see LICENSE.TXT
 3 | 
 4 | =================
 5 | Utility functions
 6 | =================
 7 | 
 8 |     >>> from nltk.util import *
 9 |     >>> from nltk.tree import Tree
10 | 
11 |     >>> print_string("This is a long string, therefore it should break", 25)
12 |     This is a long string,
13 |     therefore it should break
14 | 
15 |     >>> re_show("[a-z]+", "sdf123")
16 |     {sdf}123
17 | 
18 |     >>> tree = Tree(5,
19 |     ...             [Tree(4, [Tree(2, [1, 3])]),
20 |     ...              Tree(8, [Tree(6, [7]), 9])])
21 |     >>> for x in breadth_first(tree):
22 |     ...     if isinstance(x, int): print x
23 |     ...     else: print x.node
24 |     5
25 |     4
26 |     8
27 |     2
28 |     6
29 |     9
30 |     1
31 |     3
32 |     7
33 |     >>> for x in breadth_first(tree, maxdepth=2):
34 |     ...     if isinstance(x, int): print x
35 |     ...     else: print x.node
36 |     5
37 |     4
38 |     8
39 |     2
40 |     6
41 |     9
42 | 
43 |     >>> invert_dict({1: 2})
44 |     defaultdict(<type 'list'>, {2: 1})
45 | 
46 |     >>> invert_dict({1: [3, 4, 5]})
47 |     defaultdict(<type 'list'>, {3: [1], 4: [1], 5: [1]})
48 | 
49 | Testing HTML cleaning
50 | ---------------------
51 | 
52 |     >>> html = """
53 |     ... <html><head></head><body><div style="display:none;">
54 |     ... <script language="JavaScript">
55 |     ... <!--
56 |     ... var s_account="aolmus,aolsvc";
57 |     ... (function(){
58 |     ... var d = document, s = d.createElement('script');
59 |     ... s.type = 'text/javascript';
60 |     ... s.src = 'http://o.aolcdn.com/omniunih.js';
61 |     ... d.getElementsByTagName('head')[0].appendChild(s);
62 |     ... })();
63 |     ... //-->
64 |     ... </script></div>
65 |     ... <!--
66 |     ... PROFILE DOB/PARENTAL CONTROL RULES
67 |     ... If(account has DOB) use DOB else use Parental Control code ().
68 |     ... If DOB < 13, or PC = Kids Only, no profile creation allowed.
69 |     ... If DOB >= 13 AND < 16, or PC = Young Teen, profile creation allowed, but not allowed to toggle off private for profile or playlists.
70 |     ... If DOB >= 16 AND < 18, or PC = Mature Teen, profile creation allowed, but profile is Private by default.  User is allowed to toggle privacy.
71 |     ... If DOB > 18, or PC = Adult, profile creation allowed, profile is Public by default, and user is allowed to toggle privacy.
72 |     ... -->
73 |     ... <ul class="miniHat">
74 |     ... <li style="display:none;"><a href="#pageBody">Skip Links</a></li>
75 |     ... <li ><a href="http://www.aol.com/" title="Visit AOL " >AOL </a></li>
76 |     ... <li ><a href="http://my.aol.com/" title="Visit My AOL" >My AOL</a></li>
77 |     ... <li ><a href="http://webmail.aol.com/?_AOLLOCAL=mail" title="Visit Mail" >Mail</a></li>
78 |     ... </ul>
79 |     ... <li class="thumbplay icon-link">
80 |     ... <a target="_blank" href="http://pr.atwola.com/promoclk/100005695x1144860307x1079136245/aol?redir=http://mobile.aol.com/ringtones?thpartist=the-all-american-rejects"">Get The All-Amer... Ringtones</a>
81 |     ... </li>
82 |     ... </body></html>
83 |     ... """
84 |     >>> [link.strip() for link in re.split("\n+", clean_html(html))]
85 |     ['Skip Links', 'AOL', 'My AOL', 'Mail', '', '', 'Get The All-Amer... Ringtones']
86 |     >>> clean_html("<h1>Heading</h1><p>Test</p>")
87 |     'Heading Test'
88 |     >>> clean_html("<html> <body> aaa<p>bbb </body> </html>")
89 |     'aaa bbb'
90 | 


--------------------------------------------------------------------------------
/resources/nltk/tokenize/api.py:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Tokenizer Interface
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | #         Steven Bird <sb@csse.unimelb.edu.au>
 6 | # URL: <http://www.nltk.org/>
 7 | # For license information, see LICENSE.TXT
 8 | 
 9 | """
10 | Tokenizer Interface
11 | """
12 | 
13 | from nltk.internals import overridden
14 | from nltk.tokenize.util import string_span_tokenize
15 | 
16 | class TokenizerI(object):
17 |     """
18 |     A processing interface for tokenizing a string.
19 |     Subclasses must define ``tokenize()`` or ``batch_tokenize()`` (or both).
20 |     """
21 |     def tokenize(self, s):
22 |         """
23 |         Return a tokenized copy of *s*.
24 | 
25 |         :rtype: list of str
26 |         """
27 |         if overridden(self.batch_tokenize):
28 |             return self.batch_tokenize([s])[0]
29 |         else:
30 |             raise NotImplementedError()
31 | 
32 |     def span_tokenize(self, s):
33 |         """
34 |         Identify the tokens using integer offsets ``(start_i, end_i)``,
35 |         where ``s[start_i:end_i]`` is the corresponding token.
36 | 
37 |         :rtype: iter(tuple(int, int))
38 |         """
39 |         raise NotImplementedError()
40 | 
41 |     def batch_tokenize(self, strings):
42 |         """
43 |         Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:
44 | 
45 |             return [self.tokenize(s) for s in strings]
46 | 
47 |         :rtype: list(list(str))
48 |         """
49 |         return [self.tokenize(s) for s in strings]
50 | 
51 |     def batch_span_tokenize(self, strings):
52 |         """
53 |         Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:
54 | 
55 |             return [self.span_tokenize(s) for s in strings]
56 | 
57 |         :rtype: iter(list(tuple(int, int)))
58 |         """
59 |         for s in strings:
60 |             yield list(self.span_tokenize(s))
61 | 
62 | 
63 | class StringTokenizer(TokenizerI):
64 |     """A tokenizer that divides a string into substrings by splitting
65 |     on the specified string (defined in subclasses).
66 |     """
67 | 
68 |     def tokenize(self, s):
69 |         return s.split(self._string)
70 | 
71 |     def span_tokenize(self, s):
72 |         for span in string_span_tokenize(s, self._string):
73 |             yield span
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import doctest
78 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
79 | 


--------------------------------------------------------------------------------
/resources/nltk/yamltags.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Register YAML tags in the NLTK namespace with the YAML loader, by telling it
 3 | what module and class to look for.
 4 | 
 5 | NLTK uses simple '!' tags to mark the types of objects, but the fully-qualified
 6 | "tag:nltk.org,2011:" prefix is also accepted in case anyone ends up
 7 | using it.
 8 | """
 9 | 
10 | #import yaml
11 | 
12 | def custom_import(name):
13 |     components = name.split('.')
14 |     module_path = '.'.join(components[:-1])
15 |     mod = __import__(module_path)
16 |     for comp in components[1:]:
17 |         mod = getattr(mod, comp)
18 |     return mod
19 | 
20 | def metaloader(classpath):
21 |     def loader(*args, **kwds):
22 |         classref = custom_import(classpath)
23 |         return classref.from_yaml(*args, **kwds)
24 |     return loader
25 | 
26 | def register_tag(tag, classpath):
27 |     yaml.add_constructor(u'!'+tag, metaloader(classpath))
28 |     yaml.add_constructor(u'tag:nltk.org,2011:'+tag,
29 |                          metaloader(classpath))
30 | 
31 | register_tag(u'tag.Unigram', 'nltk.tag.unigram.Unigram')
32 | register_tag(u'tag.Brill', 'nltk.tag.brill.Brill')
33 | 
34 | __all__ = ['custom_import', 'metaloader', 'register_tag']
35 | 


--------------------------------------------------------------------------------
/resources/papers/acl-02/.cvsignore:
--------------------------------------------------------------------------------
1 | *.aux
2 | *.dvi
3 | *.log
4 | *.ps
5 | *.bbl
6 | *.blg
7 | *.pdf
8 | 


--------------------------------------------------------------------------------
/resources/papers/acl-02/Makefile:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Technical report Makefile
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | ##############################################
 9 | ##  The name of the report
10 | REPORT = acl02
11 | 
12 | help: usage
13 | usage:
14 | 	@echo
15 | 	@echo make '[dvi | ps | pdf | clean]'
16 | 	@echo
17 | 
18 | # We're using bibtex:
19 | $(REPORT).dvi: $(REPORT).bbl
20 | BIBFILE = nltk.bib
21 | 
22 | ##############################################
23 | ##  Figure dependancies
24 | 
25 | 
26 | ##############################################
27 | ##  You shouldn't have to change anything below here.
28 | 
29 | # Find the name of the dvi and ps files.
30 | DVI := $(REPORT).dvi
31 | PS := $(REPORT).ps
32 | PDF := $(REPORT).pdf
33 | 
34 | # Top-level rules.
35 | dvi: $(DVI)
36 | ps: $(PS)
37 | pdf: $(PDF)
38 | clean:
39 | 	rm -f *.log *.aux *.dvi *.ps *.toc *.pdf *.bbl *.blg
40 | 
41 | %.bbl: %.tex $(BIBFILE)
42 | 	latex $*.tex || (rm -f $*.dvi && false)
43 | 	bibtex $* || (rm -f $*.dvi $@ && false)
44 | 
45 | %.dvi: %.tex
46 | 	latex $*.tex || (rm -f $@ && false)
47 | 	latex $*.tex || (rm -f $@ && false)
48 | 
49 | %.ps: %.dvi
50 | 	dvips -t letter -o $@ $< -G0 -Ppdf
51 | 
52 | %.eps: %.dot
53 | 	dot -Tps -o $@ $<
54 | 
55 | %.eps: %.obj
56 | 	tgif -print -eps $<
57 | 
58 | %.pdf: %.ps
59 | 	ps2pdf -sPAPERSIZE=letter -dMaxSubsetPct=100 \
60 | 	       -dCompatibilityLevel=1.2 -dSubsetFonts=true \
61 | 	       -dEmbedAllFonts=true $< $@
62 | 
63 | 


--------------------------------------------------------------------------------
/resources/papers/acl-02/chartparse.eps.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-02/chartparse.eps.gz


--------------------------------------------------------------------------------
/resources/papers/acl-02/contest.ps.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-02/contest.ps.gz


--------------------------------------------------------------------------------
/resources/papers/acl-04/.cvsignore:
--------------------------------------------------------------------------------
 1 | !
 2 | #*
 3 | *.aux
 4 | *.dvi
 5 | *.eps
 6 | *.log
 7 | *.pdf
 8 | *.ps
 9 | *.toc
10 | *~
11 | acl04.bbl
12 | acl04.blg
13 | 


--------------------------------------------------------------------------------
/resources/papers/acl-04/Makefile:
--------------------------------------------------------------------------------
 1 | # Natural Language Toolkit: Technical report Makefile
 2 | #
 3 | # Copyright (C) 2001-2012 NLTK Project
 4 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 5 | # URL: <http://www.nltk.org/>
 6 | # For license information, see LICENSE.TXT
 7 | 
 8 | ##############################################
 9 | ##  The name of the report
10 | REPORT = acl04
11 | 
12 | ##############################################
13 | ##  Figure dependancies
14 | 
15 | ##############################################
16 | ##  You shouldn't have to change anything below here.
17 | 
18 | # Find the name of the dvi and ps files.
19 | DVI := $(REPORT).dvi
20 | PS := $(REPORT).ps
21 | PDF := $(REPORT).pdf
22 | 
23 | # Top-level rules.
24 | dvi: $(DVI)
25 | ps: $(PS)
26 | pdf: $(PDF)
27 | clean:
28 | 	rm -f *.eps *.log *.aux *.dvi *.ps *.toc *.pdf
29 | 
30 | # General rules
31 | %.dvi: %.tex
32 | 	latex $<
33 | 	latex $<
34 | 
35 | %.ps: %.dvi
36 | 	dvips -t letter -o $@ $<
37 | 
38 | %.eps: %.dot
39 | 	dot -Tps -o $@ $<
40 | 
41 | %.eps: %.obj
42 | 	tgif -print -eps $<
43 | 
44 | %.pdf: %.ps
45 | 	ps2pdf $< $@
46 | 


--------------------------------------------------------------------------------
/resources/papers/acl-04/chart-matrix.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-04/chart-matrix.gif


--------------------------------------------------------------------------------
/resources/papers/acl-04/chart.eps.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-04/chart.eps.gz


--------------------------------------------------------------------------------
/resources/papers/acl-04/nltk.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @Book{Rossum03intro,
 3 |   author =	 {Guido Van Rossum},
 4 |   title = 	 {An Introduction to Python},
 5 |   publisher = 	 {Network Theory Ltd},
 6 |   year = 	 2003
 7 | }
 8 | 
 9 | @Book{Rossum03ref,
10 |   author =	 {Guido Van Rossum},
11 |   title = 	 {The Python Language Reference},
12 |   publisher = 	 {Network Theory Ltd},
13 |   year = 	 2003
14 | }
15 | 
16 | @InProceedings{LoperBird02,
17 |   author = 	 {Edward Loper and Steven Bird},
18 |   title = 	 {{NLTK: The Natural Language Toolkit}},
19 |   booktitle = 	 {Proceedings of the ACL Workshop on Effective Tools and
20 |     Methodologies for Teaching Natural Language Processing and Computational
21 |     Linguistics},
22 |   year =	 2002,
23 |   publisher={Somerset, NJ: Association for Computational Linguistics},
24 |   pages={62--69},
25 |   note =	 {\url{http://arXiv.org/abs/cs/0205028}},
26 | }
27 | 
28 | @InProceedings{Loper04,
29 |   author = 	 {Edward Loper},
30 |   title = 	 {{NLTK}: Building a Pedagogical Toolkit in {Python}},
31 |   booktitle = 	 {PyCon DC 2004},
32 |   year =	 2004,
33 |   publisher =	 {Python Software Foundation},
34 |   note =	 {\url{http://www.python.org/pycon/dc2004/papers/}}
35 | }
36 | 
37 | @Misc{tkinter,
38 |   author =       {Fredrik Lundh},
39 |   title =        {An Introduction to Tkinter},
40 |   note =         {\url{http://www.pythonware.com/library/tkinter/introduction/index.htm}},
41 |   year =         1999
42 | }
43 | 
44 | @Misc{epydoc,
45 |   author =       {Edward Loper},
46 |   title =        {Epydoc},
47 |   year =         2002,
48 |   note =         {\url{http://epydoc.sourceforge.net/}}
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/resources/papers/acl-06/rdparser.eps.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-06/rdparser.eps.gz


--------------------------------------------------------------------------------
/resources/papers/acl-06/srparser.eps.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/acl-06/srparser.eps.gz


--------------------------------------------------------------------------------
/resources/papers/acl-08/grammar1.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | def parse(sent, grammar):
 4 |     gr = nltk.cfg.parse_cfg(grammar)
 5 |     parser = nltk.ChartParser(gr, nltk.parse.TD_STRATEGY)
 6 |     trees = parser.nbest_parse(sent.split())
 7 |     nltk.draw.draw_trees(*trees)
 8 | 
 9 | grammar = """
10 |    S -> NP VP
11 |    VP -> V NP | VP PP
12 |    NP -> Det N | NP PP
13 |    PP -> P NP
14 |    NP -> 'I'
15 |    Det -> 'the' | 'a' | 'my'
16 |    N -> 'elephant' | 'pajamas' | 'man' | 'park' | 'telescope'
17 |    V -> 'shot' | 'saw'
18 |    P -> 'in' | 'on' | 'with'
19 | """
20 | 
21 | sent = 'I shot the elephant in my pajamas'
22 | parse(sent, grammar)
23 | 


--------------------------------------------------------------------------------
/resources/papers/acl-08/grammar2.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | def parse(sent, grammar):
 4 |     gr = nltk.cfg.parse_cfg(grammar)
 5 |     parser = nltk.ChartParser(gr, nltk.parse.TD_STRATEGY)
 6 |     trees = parser.nbest_parse(sent.split())
 7 |     nltk.draw.draw_trees(*trees)
 8 | 
 9 | grammar = """
10 |    S -> NP VP
11 |    VP -> V NP | VP PP
12 |    NP -> Det N | NP PP
13 |    PP -> P NP
14 |    NP -> 'I'
15 |    Det -> 'the' | 'a' | 'my'
16 |    N -> 'elephant' | 'pajamas' | 'man' | 'park' | 'telescope'
17 |    V -> 'shot' | 'saw'
18 |    P -> 'in' | 'on' | 'with'
19 | """
20 | 
21 | sent = 'I saw the man in the park with a telescope'
22 | parse(sent, grammar)
23 | 


--------------------------------------------------------------------------------
/resources/papers/acl-08/police.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | def parse(sent, grammar):
 4 |     gr = nltk.cfg.parse_cfg(grammar)
 5 |     parser = nltk.ChartParser(gr, nltk.parse.TD_STRATEGY)
 6 |     trees = parser.nbest_parse(sent.split())
 7 |     nltk.draw.draw_trees(*trees)
 8 | 
 9 | grammar = """
10 |     S -> NP V NP
11 |     NP -> NP Sbar
12 |     Sbar -> NP V
13 |     NP -> 'fish' | 'police'
14 |     V -> 'fish' | 'police'
15 | """
16 | 
17 | sent = 'police police police police police police police police police'
18 | parse(sent, grammar)
19 | 
20 | 


--------------------------------------------------------------------------------
/resources/papers/altw-06/altw-06.bib:
--------------------------------------------------------------------------------
 1 | @Book{Blackburn:2005:RINL,
 2 |   author = 	 {Patrick Blackburn and Johan Bos},
 3 |   title = 	 {Representation and Inference for Natural Language: A First Course in Computational Semantics},
 4 |   publisher = 	 {CSLI Publications},
 5 |   year = 	 2005}
 6 | 
 7 | 
 8 | @InCollection{Montague:1974:PTQ,
 9 |   author = 	 {Richard Montague},
10 |   title = 	 {The Proper Treatment of Quantification in Ordinary {E}nglish},
11 |   booktitle = 	 {Formal Philosphy: Selected Papers of Richard Montague},
12 |   pages = 	 {247--270},
13 |   publisher = {Yale University Press},
14 |   year = 	 1974,
15 |   editor = 	 {R. H. Thomason},
16 |   address = 	 {New Haven}}
17 | 
18 |  @Book{Dowty:1981:IMS,
19 |   author = 	 {D. R. Dowty and R. E. Wall and S. Peters},
20 |   title = 	 {Introduction to {M}ontague {S}emantics},
21 |   publisher = 	 {Reidel},
22 |   year = 	 1981,
23 |   series = 	 {Studies in Linguistics and Philosophy},
24 |   address = 	 {Dordrecht}}
25 | 
26 | @InProceedings{Bird:2005:NES,
27 |   author = 	 {Steven Bird},
28 |   title = 	 {{NLTK-Lite}: Efficient Scripting for Natural Language Processing},
29 |   booktitle = {Proceedings of the 4th International Conference on Natural Language Processing (ICON)},
30 |   pages = 	 {11--18},
31 |   year = 	 2005,
32 |   address = 	 {New Delhi},
33 |   month = 	 {December},
34 |   publisher = {Allied Publishers}}
35 | 
36 | 
37 | @Book{vanRossum:2006:PT,
38 |   author = 	 {Guido van Rossum},
39 |   title = 	 {Python Tutorial},
40 |   year = 	 2006,
41 |   month = 	 {March},
42 |   note = 	 {Release 2.4.3},
43 |   url = 	 {http://docs.python.org/tut/tut.html}
44 | }
45 | 
46 | 
47 | @Book{Russell:2003:AIMA,
48 |   author = 	 {Stuart Russell and Peter Norvig},
49 |   title = 	 {Artifical Intelligence: A Modern Approach},
50 |   publisher = 	 {Prentice Hall},
51 |   year = 	 2003,
52 |   note = 	 {2nd edition}}
53 | 
54 | 


--------------------------------------------------------------------------------
/resources/papers/iwcs-08/drs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/iwcs-08/drs.png


--------------------------------------------------------------------------------
/resources/papers/iwcs-08/garrette-klein.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/iwcs-08/garrette-klein.tar.gz


--------------------------------------------------------------------------------
/resources/papers/iwcs-08/modules.graffle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/iwcs-08/modules.graffle


--------------------------------------------------------------------------------
/resources/papers/iwcs-08/modules.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/papers/iwcs-08/modules.pdf


--------------------------------------------------------------------------------
/resources/papers/iwcs-08/nltk_iwcs_09.bib:
--------------------------------------------------------------------------------
 1 | @book{Dalrymple2001,
 2 |   author =        {Mary Dalrymple},
 3 |   title =         {Lexical Functional Grammar},
 4 |   series =        {Syntax and Semantics},
 5 |   volume =        {34},
 6 |   publisher =     {Academic Press},
 7 |   address =       {New York},
 8 |   year =          {2001}
 9 | }
10 | 
11 | 
12 | @InCollection{Dalrymple:1999:RRB,
13 |   author = 	 {Mary Dalrymple and V. Gupta and John Lamping and V. Saraswat},
14 |   title = 	 {Relating resource-based 
15 | semantics to categorial semantics},
16 |   booktitle = 	 {Semantics and syntax in {Lexical Functional Grammar}: the resource 
17 | logic approach},
18 |   pages = 	 { 261--280},
19 |   publisher = {MIT Press},
20 |   year = 	 1999,
21 |   editor = 	 {Mary Dalrymple},
22 |   address = 	 {Cambridge, MA}}
23 | 
24 | 
25 | 
26 | 
27 | @book{BB,
28 |   author =        {Patrick Blackburn and Johan Bos},
29 |   title =         {Representation and Inference for Natural Language: A First Course in Computational Semantics},
30 |   publisher =     {CSLI Publications},
31 |   address =       {New York},
32 |   year =          {2005}
33 | }
34 | 
35 | @book{KampReyle,
36 |   author =        {Hans Kamp and Uwe Reyle},
37 |   title =         {From Discourse to the Lexicon: Introduction to Modeltheoretic Semantics of Natural Language, Formal Logic and Discourse Representation Theory},
38 |   publisher =     {Kluwer Academic Publishers},
39 |   year =          {1993}
40 | }
41 | 
42 | @inproceedings{Multidisciplinary,
43 |   author =        {Steven Bird and Ewan Klein and Edward Loper and Jason Baldridge},
44 |   title =         {Multidisciplinary instruction with the {Natural Language Toolkit}},
45 |   booktitle =     {Proceedings of the Third Workshop on Issues in Teaching Computational Linguistics},
46 |   address =       {Columbus, Ohio, USA},
47 |   month =         {June},
48 |   year =          {2008}
49 | }
50 | 
51 | @Misc{McCune,
52 |   author = {William McCune},
53 |   title = {Prover9: Automated theorem prover for first-order and equational logic},
54 |   year = 2008,
55 |   note = {\url{http://www.cs.unm.edu/~mccune/mace4/manual-examples.html}}
56 | }
57 | 
58 | @inproceedings{BosRTE,
59 |   author =        {Johan Bos and Katja Markert},
60 |   title =         {Recognising textual entailment with logical inference},
61 |   booktitle =     {Proceedings of the conference on Human Language Technology and Empirical Methods in Natural Language Processing},
62 |   address =       {Vancouver, British Columbia, Canada},
63 |   year =          {2005}
64 | }
65 | 
66 | @InProceedings{Klein06altw,
67 |   author = 	 {Ewan Klein},
68 |   title = 	 {Computational semantics in the {Natural Language Toolkit}},
69 |   booktitle = 	 {Proceedings of the Australasian Language Technology Workshop},
70 |   pages = 	 {26--33},
71 |   year = 	 2006
72 | }
73 | 


--------------------------------------------------------------------------------
/resources/setup.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/setup.cfg


--------------------------------------------------------------------------------
/resources/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Distribute setup script for the Natural Language Toolkit
 4 | #
 5 | # Copyright (C) 2001-2012 NLTK Project
 6 | # Author: Steven Bird <sb@csse.unimelb.edu.au>
 7 | #         Edward Loper <edloper@gradient.cis.upenn.edu>
 8 | #         Ewan Klein <ewan@inf.ed.ac.uk>
 9 | # URL: <http://nltk.org/>
10 | # For license information, see LICENSE.TXT
11 | 
12 | # python2.5 compatibility
13 | from __future__ import with_statement
14 | 
15 | import os
16 | 
17 | # Use the VERSION file to get NLTK version
18 | version_file = os.path.join(os.path.dirname(__file__), 'nltk', 'VERSION')
19 | with open(version_file) as fh:
20 |     nltk_version = fh.read().strip()
21 | 
22 | import distribute_setup
23 | distribute_setup.use_setuptools()
24 | 
25 | from setuptools import setup, find_packages
26 | 
27 | #
28 | # Prevent setuptools from trying to add extra files to the source code
29 | # manifest by scanning the version control system for its contents.
30 | #
31 | from setuptools.command import sdist
32 | del sdist.finders[:]
33 | 
34 | setup(
35 |     name = "nltk",
36 |     description = "Natural Language Toolkit",
37 |     version = nltk_version,
38 |     url = "http://nltk.org/",
39 |     long_description = """\
40 | The Natural Language Toolkit (NLTK) is a Python package for
41 | natural language processing.  NLTK requires Python 2.5 or higher.""",
42 |     license = "Apache License, Version 2.0",
43 |     keywords = ['NLP', 'CL', 'natural language processing',
44 |                 'computational linguistics', 'parsing', 'tagging',
45 |                 'tokenizing', 'syntax', 'linguistics', 'language',
46 |                 'natural language', 'text analytics'],
47 |     maintainer = "Steven Bird",
48 |     maintainer_email = "stevenbird1@gmail.com",
49 |     author = "Steven Bird",
50 |     author_email = "stevenbird1@gmail.com",
51 |     classifiers = [
52 |     'Development Status :: 5 - Production/Stable',
53 |     'Intended Audience :: Developers',
54 |     'Intended Audience :: Education',
55 |     'Intended Audience :: Information Technology',
56 |     'Intended Audience :: Science/Research',
57 |     'License :: OSI Approved :: Apache Software License',
58 |     'Operating System :: OS Independent',
59 |     'Programming Language :: Python :: 2.5',
60 |     'Programming Language :: Python :: 2.6',
61 |     'Programming Language :: Python :: 2.7',
62 |     'Topic :: Scientific/Engineering',
63 |     'Topic :: Scientific/Engineering :: Artificial Intelligence',
64 |     'Topic :: Scientific/Engineering :: Human Machine Interfaces',
65 |     'Topic :: Scientific/Engineering :: Information Analysis',
66 |     'Topic :: Text Processing',
67 |     'Topic :: Text Processing :: Filters',
68 |     'Topic :: Text Processing :: General',
69 |     'Topic :: Text Processing :: Indexing',
70 |     'Topic :: Text Processing :: Linguistic',
71 |     ],
72 |     package_data = {'nltk': ['nltk.jar', 'test/*.doctest', 'VERSION']},
73 |     packages = find_packages(),
74 |     zip_safe=False, # since normal files will be present too?
75 |     install_requires=['PyYAML>=3.09'],
76 |     test_suite = 'nltk.test.simple',
77 |     )
78 | 


--------------------------------------------------------------------------------
/resources/tools/global_replace.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | ## Natural Language Toolkit: substitute a pattern with a replacement in every file
 4 | #
 5 | # Copyright (C) 2001-2012 NLTK Project
 6 | # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
 7 | #         Steven Bird <sb@csse.unimelb.edu.au>
 8 | # URL: <http://www.nltk.org/>
 9 | # For license information, see LICENSE.TXT
10 | 
11 | # NB Should work on all platforms, http://www.python.org/doc/2.5.2/lib/os-file-dir.html
12 | 
13 | import os, stat, sys
14 | 
15 | def update(file, pattern, replacement, verbose=False):
16 |     if verbose:
17 |         print "Updating:", file
18 | 
19 |     # make sure we can write the file
20 |     old_perm = os.stat(file)[0]
21 |     if not os.access(file, os.W_OK):
22 |         os.chmod(file, old_perm | stat.S_IWRITE)
23 | 
24 |     # write the file
25 |     s = open(file, 'rb').read()
26 |     t = s.replace(pattern, replacement)
27 |     out = open(file, 'wb')
28 |     out.write(t)
29 |     out.close()
30 | 
31 |     # restore permissions
32 |     os.chmod(file, old_perm)
33 | 
34 |     return s != t
35 | 
36 | if __name__ == '__main__':
37 | 
38 |     if len(sys.argv) != 3:
39 |         exit("Usage: %s <pattern> <replacement>" % sys.argv[0])
40 | 
41 |     pattern = sys.argv[1]
42 |     replacement = sys.argv[2]
43 |     count = 0
44 | 
45 |     for root, dirs, files in os.walk('.'):
46 |         if '/.git' not in root:
47 |             for file in files:
48 |                 path = os.path.join(root, file)
49 |                 if update(path, pattern, replacement):
50 |                     print "Updated:", path
51 |                     count += 1
52 | 
53 |     print "Updated %d files" % count
54 | 


--------------------------------------------------------------------------------
/resources/tools/nltk_term_index.stoplist:
--------------------------------------------------------------------------------
  1 | __init__
  2 | Comment
  3 | Plot
  4 | about
  5 | add
  6 | all
  7 | analysis
  8 | args
  9 | book
 10 | bubble
 11 | categories
 12 | close
 13 | concatenate
 14 | contains
 15 | copy
 16 | coverage
 17 | defaultdict
 18 | demo
 19 | describe
 20 | dict
 21 | discourse
 22 | doctype
 23 | documents
 24 | dump
 25 | end
 26 | ends
 27 | fileids
 28 | files
 29 | find
 30 | first
 31 | free
 32 | goal
 33 | groups
 34 | help
 35 | incorrect
 36 | insert
 37 | instances
 38 | items
 39 | join
 40 | key
 41 | labels
 42 | lhs
 43 | line
 44 | lines
 45 | list
 46 | lookup
 47 | matches
 48 | max
 49 | means
 50 | min
 51 | missed
 52 | name
 53 | next
 54 | nltk
 55 | nltk.book
 56 | open
 57 | pairs
 58 | play
 59 | plot
 60 | pop
 61 | pos
 62 | pp
 63 | pprint
 64 | prev
 65 | process
 66 | purge
 67 | put
 68 | quick
 69 | raw
 70 | read
 71 | reader
 72 | readings
 73 | readme
 74 | repr
 75 | rhs
 76 | root
 77 | run
 78 | second
 79 | see
 80 | select
 81 | sentences
 82 | sents
 83 | set
 84 | simple
 85 | size
 86 | sorted
 87 | span
 88 | start
 89 | step
 90 | stop
 91 | str
 92 | table
 93 | test
 94 | text
 95 | texts
 96 | trace
 97 | type
 98 | update
 99 | verbs
100 | view
101 | vocab
102 | walk
103 | wav
104 | width
105 | words
106 | write
107 | 


--------------------------------------------------------------------------------
/resources/tools/svnmime.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # NB, this wouldn't be needed if everyone had .subversion/config
 4 | # configured to automatically set mime types
 5 | # http://code.google.com/p/support/wiki/FAQ
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | types_map = {
11 |     'ai': 'application/postscript',
12 |     'coverage': 'text/plain',
13 |     'css': 'text/css',
14 |     'eps': 'application/postscript',
15 |     'exe': 'application/octet-stream',
16 |     'errs': 'text/plain',
17 |     'gif': 'image/gif',
18 |     'htm': 'text/html',
19 |     'html': 'text/html',
20 |     'jpeg': 'image/jpeg',
21 |     'jpg': 'image/jpeg',
22 |     'js': 'application/x-javascript',
23 |     'pbm': 'image/x-portable-bitmap',
24 |     'pdf': 'application/pdf',
25 |     'pgm': 'image/x-portable-graymap',
26 |     'pnm': 'image/x-portable-anymap',
27 |     'png': 'image/png',
28 |     'ppm': 'image/x-portable-pixmap',
29 |     'py': 'text/x-python',
30 |     'ps': 'application/postscript',
31 |     'rst': 'text/plain',
32 |     'tex': 'application/x-tex',
33 |     'txt': 'text/plain',
34 |     'xml': 'text/xml',
35 |     'xsl': 'text/plain',
36 |     'zip': 'application/zip',
37 |     }
38 | 
39 | def usage():
40 |     exit("Usage: svnmime files")
41 | 
42 | for file in sys.argv[1:]:
43 |     if "." in file:
44 |         extension = file.rsplit('.', 1)[1]
45 |         if extension in types_map:
46 |             os.system("svn propset svn:mime-type %s %s" % (types_map[extension], file))
47 |         else:
48 |             print "Unrecognized extension", extension
49 | 


--------------------------------------------------------------------------------
/resources/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py25,py26,py27,pypy
 3 | 
 4 | [testenv]
 5 | 
 6 | ; simplify numpy installation
 7 | setenv =
 8 |     LAPACK=
 9 |     ATLAS=None
10 | 
11 | deps =
12 | ;    epydoc
13 |     numpy
14 |     nose
15 |     svmlight
16 | 
17 | 
18 | changedir = nltk/test
19 | commands =
20 |     ; scipy and scikit-learn requires numpy even to run setup.py so
21 |     ; they can't be installed in one command
22 | 
23 |     pip install --download-cache={toxworkdir}/_download scipy scikit-learn
24 |     python runtests.py []
25 | 
26 | [testenv:pypy]
27 | ; pysvmlight don't work with pypy; numpy is bundled with pypy.
28 | deps =
29 |     epydoc
30 |     nose
31 | 
32 | commands =
33 |     python runtests.py []
34 | 


--------------------------------------------------------------------------------
/resources/web/api/nltk.rst:
--------------------------------------------------------------------------------
  1 | .. manually constructed -- removed several low-level packages
  2 | 
  3 | nltk Package
  4 | ============
  5 | 
  6 | :mod:`nltk` Package
  7 | -------------------
  8 | 
  9 | .. automodule:: nltk.__init__
 10 |     :members:
 11 |     :undoc-members:
 12 |     :show-inheritance:
 13 | 
 14 | :mod:`align` Module
 15 | -------------------
 16 | 
 17 | .. automodule:: nltk.align
 18 |     :members:
 19 |     :undoc-members:
 20 |     :show-inheritance:
 21 | 
 22 | :mod:`collocations` Module
 23 | --------------------------
 24 | 
 25 | .. automodule:: nltk.collocations
 26 |     :members:
 27 |     :undoc-members:
 28 |     :show-inheritance:
 29 | 
 30 | :mod:`data` Module
 31 | ------------------
 32 | 
 33 | .. automodule:: nltk.data
 34 |     :members:
 35 |     :undoc-members:
 36 |     :show-inheritance:
 37 | 
 38 | :mod:`downloader` Module
 39 | ------------------------
 40 | 
 41 | .. automodule:: nltk.downloader
 42 |     :members:
 43 |     :undoc-members:
 44 |     :show-inheritance:
 45 | 
 46 | :mod:`featstruct` Module
 47 | ------------------------
 48 | 
 49 | .. automodule:: nltk.featstruct
 50 |     :members:
 51 |     :undoc-members:
 52 |     :show-inheritance:
 53 | 
 54 | :mod:`grammar` Module
 55 | ---------------------
 56 | 
 57 | .. automodule:: nltk.grammar
 58 |     :members:
 59 |     :undoc-members:
 60 |     :show-inheritance:
 61 | 
 62 | :mod:`help` Module
 63 | ------------------
 64 | 
 65 | .. automodule:: nltk.help
 66 |     :members:
 67 |     :undoc-members:
 68 |     :show-inheritance:
 69 | 
 70 | :mod:`probability` Module
 71 | -------------------------
 72 | 
 73 | .. automodule:: nltk.probability
 74 |     :members:
 75 |     :undoc-members:
 76 |     :show-inheritance:
 77 | 
 78 | :mod:`sourcedstring` Module
 79 | ---------------------------
 80 | 
 81 | .. automodule:: nltk.sourcedstring
 82 |     :members:
 83 |     :undoc-members:
 84 |     :show-inheritance:
 85 | 
 86 | :mod:`text` Module
 87 | ------------------
 88 | 
 89 | .. automodule:: nltk.text
 90 |     :members:
 91 |     :undoc-members:
 92 |     :show-inheritance:
 93 | 
 94 | :mod:`toolbox` Module
 95 | ---------------------
 96 | 
 97 | .. automodule:: nltk.toolbox
 98 |     :members:
 99 |     :undoc-members:
100 |     :show-inheritance:
101 | 
102 | :mod:`tree` Module
103 | ------------------
104 | 
105 | .. automodule:: nltk.tree
106 |     :members:
107 |     :undoc-members:
108 |     :show-inheritance:
109 | 
110 | :mod:`treetransforms` Module
111 | ----------------------------
112 | 
113 | .. automodule:: nltk.treetransforms
114 |     :members:
115 |     :undoc-members:
116 |     :show-inheritance:
117 | 
118 | :mod:`util` Module
119 | ------------------
120 | 
121 | .. automodule:: nltk.util
122 |     :members:
123 |     :undoc-members:
124 |     :show-inheritance:
125 | 
126 | Subpackages
127 | -----------
128 | 
129 | .. toctree::
130 | 
131 |     nltk.app
132 |     nltk.ccg
133 |     nltk.chat
134 |     nltk.chunk
135 |     nltk.classify
136 |     nltk.cluster
137 |     nltk.corpus
138 |     nltk.draw
139 |     nltk.examples
140 |     nltk.inference
141 |     nltk.metrics
142 |     nltk.misc
143 |     nltk.model
144 |     nltk.parse
145 |     nltk.sem
146 |     nltk.stem
147 |     nltk.tag
148 |     nltk.test
149 |     nltk.tokenize
150 | 
151 | 


--------------------------------------------------------------------------------
/resources/web/data.rst:
--------------------------------------------------------------------------------
 1 | Installing NLTK Data
 2 | ====================
 3 | 
 4 | NLTK comes with many corpora, toy grammars, trained models, etc.   A complete list is posted at: http://nltk.org/nltk_data/
 5 | 
 6 | To install the data, first install NLTK (see http://nltk.org/install.html), then use NLTK's data downloader as described below.
 7 | 
 8 | Apart from individual data packages, you can download the entire collection (using "all"), or just the data required for the examples and exercises in the book (using "book"), or just the corpora and no grammars or trained models (using "all-corpora").
 9 | 
10 | Interactive installer
11 | ---------------------
12 | 
13 | *For central installation on a multi-user machine, do the following from an administrator account.*
14 | 
15 | Run the Python interpreter and type the commands:
16 | 
17 |     >>> import nltk
18 |     >>> nltk.download()
19 | 
20 | A new window should open, showing the NLTK Downloader.  Click on the File menu and select Change Download Directory.  For central installation, set this to ``C:\nltk_data`` (Windows), or ``/usr/share/nltk_data`` (Mac, Unix).  Next, select the packages or collections you want to download.
21 | 
22 | If you did not install the data to one of the above central locations, you will need to set the ``NLTK_DATA`` environment variable to specify the location of the data.  (On a Windows machine, right click on "My Computer" then select ``Properties > Advanced > Environment Variables > User Variables > New...``)
23 | 
24 | Test that the data has been installed as follows.  (This assumes you downloaded the Brown Corpus):
25 | 
26 |     >>> from nltk.corpus import brown
27 |     >>> brown.words()
28 |     ['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...]
29 | 
30 | Installing via a proxy web server
31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 | 
33 | If your web connection uses a proxy server, you should specify the proxy address as follows.  In the case of an authenticating proxy, specify a username and password.  If the proxy is set to None then this function will attempt to detect the system proxy.
34 | 
35 |     >>> nltk.set_proxy('http://proxy.example.com:3128' ('USERNAME', 'PASSWORD'))
36 |     >>> nltk.download() 
37 | 
38 | Command line installation
39 | -------------------------
40 | 
41 | The downloader will search for an existing ``nltk_data`` directory to install NLTK data.  If one does not exist it will attempt to create one in a central location (when using an administrator account) or otherwise in the user's filespace.  If necessary, run the download command from an administrator account, or using sudo.  The default system location on Windows is ``C:\nltk_data``; and on Mac and Unix is ``/usr/share/nltk_data``.  You can use the ``-d`` flag to specify a different location (but if you do this, be sure to set the ``NLTK_DATA`` environment variable accordingly).
42 | 
43 | Python 2.5-2.7: Run the command ``python -m nltk.downloader all``.  To ensure central installation, run the command ``sudo python -m nltk.downloader -d /usr/share/nltk_data all``.
44 | 
45 | Windows: Use the "Run..." option on the Start menu.  Windows Vista users need to first turn on this option, using ``Start -> Properties -> Customize`` to check the box to activate the "Run..." option. 
46 | 
47 | Test the installation: Check that the user environment and privileges are set correctly by logging in to a user account,
48 | starting the Python interpreter, and accessing the Brown Corpus (see the previous section).
49 | 
50 | 


--------------------------------------------------------------------------------
/resources/web/dev/local_testing.rst:
--------------------------------------------------------------------------------
 1 | NLTK testing
 2 | ============
 3 | 
 4 | 1. Obtain nltk source code;
 5 | 2. install virtualenv and tox::
 6 | 
 7 |        pip install virtualenv
 8 |        pip install tox
 9 | 
10 | 3. make sure python2.5, python2.6, python2.7 and pypy executables are
11 |    in system PATH. It is OK not to have all the executables, tests will
12 |    be executed for available interpreters.
13 | 
14 | 4. Make sure all NLTK data is downloaded (see `nltk.download()`);
15 | 
16 | 5. run 'tox' command from the root nltk folder. It will install dependencies
17 |    and run `nltk/test/runtests.py` script for all available interpreters.
18 |    You may pass any options to runtests.py script separating them by '--'.
19 | 
20 | It may take a long time at first run, but the subsequent runs will be much faster.
21 | Please consult http://tox.testrun.org/ for more info about the tox tool.
22 | 
23 | Examples
24 | --------
25 | 
26 | Run tests for python 2.7 in verbose mode; executing only tests
27 | that failed in the last test run::
28 | 
29 |     tox -e py27 -- -v --failed
30 | 
31 | 
32 | Run tree doctests for all available interpreters::
33 | 
34 |     tox -- tree.doctest
35 | 
36 | 


--------------------------------------------------------------------------------
/resources/web/images/book.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/web/images/book.gif


--------------------------------------------------------------------------------
/resources/web/images/tree.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rplevy/clojure-nltk/b4119d66b21f3c619ca4d385f605417901e47349/resources/web/images/tree.gif


--------------------------------------------------------------------------------
/resources/web/index.rst:
--------------------------------------------------------------------------------
 1 | Natural Language Toolkit
 2 | ========================
 3 | 
 4 | NLTK is a leading platform for building Python programs to work with human language data.
 5 | It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet,
 6 | along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning.
 7 | 
 8 | Thanks to a hands-on guide introducing programming fundamentals alongside topics in computational linguistics,
 9 | NLTK is suitable for linguists, engineers, students, educators, researchers, and industry users alike.
10 | NLTK is available for Windows, Mac OS X, and Linux. Best of all, NLTK is a free, open source, community-driven project.
11 | 
12 | NLTK has been called "a wonderful tool for teaching, and working in, computational linguistics using Python,"
13 | and "an amazing library to play with natural language."
14 | 
15 | `Natural Language Processing with Python <https://sites.google.com/site/naturallanguagetoolkit/book>`_ provides a practical
16 | introduction to programming for language processing.
17 | Written by the creators of NLTK, it guides the reader through the fundamentals
18 | of writing Python programs, working with corpora, categorizing text, analyzing linguistic structure,
19 | and more.
20 | 
21 | Some simple things you can do with NLTK
22 | ---------------------------------------
23 | 
24 | Tokenize and tag some text:
25 | 
26 |     >>> import nltk
27 |     >>> sentence = """At eight o'clock on Thursday morning
28 |     ... Arthur didn't feel very good."""
29 |     >>> tokens = nltk.word_tokenize(sentence)
30 |     >>> tokens
31 |     ['At', 'eight', "o'clock", 'on', 'Thursday', 'morning',
32 |     'Arthur', 'did', "n't", 'feel', 'very', 'good', '.']
33 |     >>> tagged = nltk.pos_tag(tokens)
34 |     >>> tagged[0:6]
35 |     [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
36 |     ('Thursday', 'NNP'), ('morning', 'NN')]
37 | 
38 | Identify named entities:
39 | 
40 |     >>> entities = nltk.chunk.ne_chunk(tagged)
41 |     >>> entities
42 |     Tree('S', [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'),
43 |                ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN'),
44 |            Tree('PERSON', [('Arthur', 'NNP')]),
45 |                ('did', 'VBD'), ("n't", 'RB'), ('feel', 'VB'),
46 |                ('very', 'RB'), ('good', 'JJ'), ('.', '.')])
47 | 
48 | Display a parse tree:
49 | 
50 | .. doctest::
51 |     :options: +SKIP
52 | 
53 |     >>> from nltk.corpus import treebank
54 |     >>> t = treebank.parsed_sents('wsj_0001.mrg')[0]
55 |     >>> t.draw()
56 | 
57 | .. image:: images/tree.gif
58 | 
59 | Links
60 | -----
61 | 
62 | * NLTK-Users mailing list: http://groups.google.com/group/nltk-users
63 | * NLTK's previous website: https://sites.google.com/site/naturallanguagetoolkit
64 | * NLTK development: https://github.com/nltk
65 | * NLTK-Dev mailing list: http://groups.google.com/group/nltk-dev
66 | * Publications about NLTK: http://scholar.google.com.au/scholar?q=NLTK
67 | 
68 | Contents
69 | ========
70 | 
71 | .. toctree::
72 |    :maxdepth: 1
73 | 
74 |    news
75 |    install
76 |    data
77 |    api/nltk
78 | 
79 | * :ref:`genindex`
80 | * :ref:`modindex`
81 | * :ref:`search`
82 | 


--------------------------------------------------------------------------------
/resources/web/install.rst:
--------------------------------------------------------------------------------
 1 | Installing NLTK
 2 | ===============
 3 | 
 4 | NLTK requires Python versions 2.5-2.7.
 5 | 
 6 | Mac/Unix
 7 | --------
 8 | 
 9 | #. Open ``Finder>Applications>Utilities>Terminal`` and type ``python -V`` to find out what version of Python is installed
10 | #. Install Setuptools: Download the corresponding version of Setuptools from
11 |    http://pypi.python.org/pypi/setuptools (scroll to the bottom, and pick the filename that contains the right version number and which has the extension .egg).  Install it by typing ``sudo sh Downloads/setuptools-...egg``, giving the location of the downloaded file.
12 | #. Install Pip: run ``sudo easy_install pip``
13 | #. Install Numpy (optional): run ``sudo pip install -U numpy``
14 | #. Install PyYAML and NLTK: run ``sudo pip install -U pyyaml nltk``
15 | #. Test installation: run ``python`` then type ``import nltk``
16 | 
17 | Windows
18 | -------
19 | 
20 | These instructions assume that you do not already have Python installed on your machine.
21 | If you do, you can skip to the final step and just install NLTK.
22 | 
23 | 32-bit binary installation
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | #. Install Python: http://www.python.org/download/releases/2.7.3/
27 | #. Install Numpy (optional): http://sourceforge.net/projects/numpy/files/NumPy/1.6.2/numpy-1.6.2-win32-superpack-python2.7.exe
28 | #. Install NLTK: http://pypi.python.org/pypi/nltk
29 | #. Install PyYAML: http://pyyaml.org/wiki/PyYAML
30 | #. Test installation: ``Start>Python27``, then type ``import nltk``
31 | 
32 | Source installation (for 32-bit or 64-bit Windows)
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | #. Install Python: http://www.python.org/download/releases/2.7.3/
36 | #. Install Numpy (optional): http://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy
37 | #. Install Setuptools: http://pypi.python.org/packages/2.7/s/setuptools/setuptools-0.6c11.win32-py2.7.exe
38 | #. Install Pip: ``Start>Run... c:\Python27\Scripts\easy_install pip``
39 | #. Install PyYAML and NLTK: ``Start>Run... c:\Python27\Scripts\pip install pyyaml nltk``
40 | #. Test installation: ``Start>All Programs>Python27>IDLE``, then type ``import nltk``
41 | 
42 | 


--------------------------------------------------------------------------------
/src/clojure_nltk/core.clj:
--------------------------------------------------------------------------------
 1 | (ns clojure-nltk.core
 2 |   (:require [clojure-python.core :as py]
 3 |             [clojure.java.io :as io]))
 4 | 
 5 | (defmacro nltk-init
 6 |   "set up ntlk. currently supported usages:
 7 |   (nltk-init (:import foo bar baz))"
 8 |   [& clauses]
 9 |   (let [import-clauses (set (apply concat
10 |                                    (map #(if (= :import (first %))
11 |                                            (rest %)) clauses)))]
12 |     `(do
13 |        (py/init (io/resource "nltk/"))
14 |        (py/py-import-lib ~'nltk)
15 |        ~@(map (fn [module]
16 |                 `(py/py-import-lib
17 |                   ~'nltk
18 |                   ~module))
19 |               import-clauses))))
20 | 
21 | (defmacro corpus-base [corpus-name method & params]
22 |   `(py/pyobj-iterate
23 |     (py/_> [~'corpus ~corpus-name ~method]
24 |                             ~@params)))
25 | (defmacro corpus-words [corpus-name & params]
26 |   `(py/corpus-base ~corpus-name ~'words ~@params))
27 | (defmacro corpus-categories [corpus-name & params]
28 |   `(py/corpus-base ~corpus-name ~'categories ~@params))
29 | (defmacro corpus-fileids [corpus-name & params]
30 |   `(py/corpus-base ~corpus-name ~'fileids ~@params))
31 | 


--------------------------------------------------------------------------------
/test/clojure_nltk/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clojure-nltk.core-test
2 |   (:require [clojure-nltk.core :as base]
3 |             [midje.sweet :refer :all]))
4 | 
5 | (fact (base/nltk-init) => anything)
6 | 


--------------------------------------------------------------------------------