├── __init__.py
├── pattern
    ├── server
    │   └── static
    │   │   └── robots.txt
    ├── text
    │   ├── xx
    │   │   ├── xx-context.txt
    │   │   ├── xx-morphology.txt
    │   │   ├── xx-frequency.txt
    │   │   ├── xx-lexicon.txt
    │   │   ├── xx-verbs.txt
    │   │   ├── __main__.py
    │   │   └── xx-sentiment.xml
    │   ├── en
    │   │   ├── en-model.slp
    │   │   ├── wordnet
    │   │   │   └── dict
    │   │   │   │   ├── index.32
    │   │   │   │   ├── lexnames
    │   │   │   │   └── LICENSE.txt
    │   │   ├── __main__.py
    │   │   └── wordlist
    │   │   │   ├── time.txt
    │   │   │   ├── __init__.py
    │   │   │   └── profanity.txt
    │   ├── ru
    │   │   ├── ru-model.slp
    │   │   ├── __main__.py
    │   │   ├── wordlist
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   ├── it
    │   │   ├── __main__.py
    │   │   └── it-context.txt
    │   ├── de
    │   │   └── __main__.py
    │   ├── es
    │   │   ├── __main__.py
    │   │   └── es-morphology.txt
    │   ├── fr
    │   │   ├── __main__.py
    │   │   └── fr-morphology.txt
    │   └── nl
    │   │   └── __main__.py
    ├── vector
    │   ├── svm
    │   │   ├── macos
    │   │   │   ├── libsvm-3.22
    │   │   │   │   └── libsvm.so.2
    │   │   │   └── liblinear-2.20
    │   │   │   │   └── liblinear.so.3
    │   │   ├── ubuntu
    │   │   │   ├── libsvm-3.22
    │   │   │   │   └── libsvm.so.2
    │   │   │   └── liblinear-2.20
    │   │   │   │   └── liblinear.so.3
    │   │   ├── windows
    │   │   │   ├── libsvm-3.22
    │   │   │   │   └── libsvm.dll
    │   │   │   └── liblinear-2.20
    │   │   │   │   └── liblinear.dll
    │   │   ├── __init__.py
    │   │   ├── INSTALL.txt
    │   │   ├── COPYRIGHT-liblinear.txt
    │   │   └── COPYRIGHT-libsvm.txt
    │   ├── stopwords-nl.txt
    │   └── stopwords-es.txt
    ├── web
    │   ├── api.py
    │   └── utils.py
    ├── helpers.py
    └── __init__.py
├── docs
    ├── index.html
    ├── g
    │   ├── more.png
    │   ├── header.jpg
    │   ├── scanner.jpg
    │   ├── shadow.png
    │   ├── download.gif
    │   ├── download3.gif
    │   ├── external.png
    │   ├── gradient.jpg
    │   ├── pattern_graph1.jpg
    │   ├── pattern_graph2.jpg
    │   ├── pattern_graph3.jpg
    │   ├── pattern_graph4.jpg
    │   ├── pattern_graph5.jpg
    │   ├── pattern_schema.gif
    │   ├── paypal-donate.jpg
    │   ├── pattern_schema_de.gif
    │   ├── pattern_schema_es.gif
    │   ├── pattern_schema_fr.gif
    │   ├── pattern_schema_it.gif
    │   ├── pattern_schema_nl.gif
    │   ├── pattern-vector-lsa1.jpg
    │   ├── pattern-vector-svm1.jpg
    │   ├── pattern-vector-svm2.jpg
    │   ├── pattern-canvas-editor.jpg
    │   ├── pattern-canvas-editor2.jpg
    │   ├── pattern-canvas-filter1.jpg
    │   ├── pattern-canvas-filter2.jpg
    │   ├── pattern-canvas-filter3.jpg
    │   ├── pattern-canvas-filter4.jpg
    │   ├── pattern-canvas-origin1.jpg
    │   ├── pattern-canvas-origin2.jpg
    │   ├── pattern-canvas-path1.jpg
    │   ├── pattern-metrics-bell.jpg
    │   ├── pattern-canvas-particle1.png
    │   ├── pattern-canvas-particle2.png
    │   ├── pattern-canvas-particle3.png
    │   ├── pattern-metrics-boxplot.jpg
    │   ├── pattern-search-taxonomy.jpg
    │   ├── pattern-vector-cluster1.jpg
    │   ├── pattern-vector-cluster2.jpg
    │   ├── pattern_example_100days.jpg
    │   ├── pattern_example_italian.jpg
    │   ├── pattern_example_spanish.jpg
    │   ├── pattern-canvas-primitives1.jpg
    │   ├── pattern-canvas-primitives2.jpg
    │   ├── pattern-canvas-primitives3.jpg
    │   ├── pattern-canvas-primitives4.jpg
    │   ├── pattern-canvas-primitives5.jpg
    │   ├── pattern-canvas-primitives6.jpg
    │   ├── pattern-canvas-supershape1.jpg
    │   ├── pattern-canvas-supershape2.jpg
    │   ├── pattern-canvas-supershape3.jpg
    │   ├── pattern_example_elections.jpg
    │   └── pattern_example_semantic_network.jpg
    ├── desmedt12a.pdf
    └── js
    │   ├── shBrushXml.js
    │   ├── shBrushJScript.js
    │   ├── shBrushPython.js
    │   └── shThemeDefault.css
├── examples
    ├── 02-db
    │   ├── store.db
    │   ├── food.txt
    │   ├── 03-date.py
    │   └── 02-datasheet.py
    ├── 08-server
    │   ├── 02-api
    │   │   └── rate.db
    │   ├── 04-db
    │   │   └── store.db
    │   ├── 01-basic
    │   │   └── static
    │   │   │   └── cat.jpg
    │   └── 03-wiki
    │   │   └── data
    │   │       └── index.html.txt
    ├── 03-en
    │   ├── texts
    │   │   ├── 1701.00002.txt
    │   │   ├── 1701.00003.txt
    │   │   ├── 1701.00004.txt
    │   │   ├── 1701.00005.txt
    │   │   ├── 1701.00006.txt
    │   │   ├── 1701.00007.txt
    │   │   ├── 1701.00008.txt
    │   │   ├── 1701.00009.txt
    │   │   ├── 1701.00010.txt
    │   │   ├── 1701.00011.txt
    │   │   ├── 1701.00012.txt
    │   │   ├── 1701.00013.txt
    │   │   ├── 1701.00014.txt
    │   │   ├── 1701.00015.txt
    │   │   ├── 1701.00016.txt
    │   │   ├── 1701.00017.txt
    │   │   ├── 1701.00018.txt
    │   │   ├── 1701.00019.txt
    │   │   ├── 1701.00020.txt
    │   │   ├── 1701.00021.txt
    │   │   ├── 1701.00022.txt
    │   │   ├── 1701.00023.txt
    │   │   ├── 1701.00024.txt
    │   │   ├── 1701.00025.txt
    │   │   ├── 1701.00026.txt
    │   │   ├── 1701.00027.txt
    │   │   ├── 1701.00028.txt
    │   │   ├── 1701.00029.txt
    │   │   ├── 1701.00030.txt
    │   │   ├── 1701.00031.txt
    │   │   ├── 1701.00032.txt
    │   │   ├── 1701.00033.txt
    │   │   ├── 1701.00034.txt
    │   │   ├── 1701.00035.txt
    │   │   ├── 1701.00037.txt
    │   │   ├── 1701.00038.txt
    │   │   ├── 1701.00039.txt
    │   │   ├── 1701.00043.txt
    │   │   ├── 1701.00044.txt
    │   │   ├── 1701.00045.txt
    │   │   ├── 1701.00046.txt
    │   │   ├── 1701.00047.txt
    │   │   ├── 1701.00049.txt
    │   │   ├── 1701.00050.txt
    │   │   ├── 1701.00051.txt
    │   │   ├── 1701.00052.txt
    │   │   ├── 1701.00053.txt
    │   │   ├── 1701.00054.txt
    │   │   ├── 1701.00055.txt
    │   │   ├── 1701.00056.txt
    │   │   ├── 1701.00057.txt
    │   │   ├── 1701.00058.txt
    │   │   ├── 1701.00060.txt
    │   │   ├── 1701.00061.txt
    │   │   ├── 1701.00062.txt
    │   │   ├── 1701.00063.txt
    │   │   ├── 1701.00064.txt
    │   │   ├── 1701.00065.txt
    │   │   ├── 1701.00066.txt
    │   │   ├── 1701.00067.txt
    │   │   ├── 1701.00068.txt
    │   │   ├── 1701.00069.txt
    │   │   ├── 1701.00072.txt
    │   │   ├── 1701.00073.txt
    │   │   ├── 1701.00074.txt
    │   │   ├── 1701.00075.txt
    │   │   ├── 1701.00076.txt
    │   │   ├── 1701.00077.txt
    │   │   ├── 1701.00078.txt
    │   │   ├── 1701.00079.txt
    │   │   ├── 1701.00081.txt
    │   │   ├── 1701.00082.txt
    │   │   ├── 1701.00083.txt
    │   │   ├── 1701.00084.txt
    │   │   ├── 1701.00085.txt
    │   │   ├── 1701.00086.txt
    │   │   ├── 1701.00087.txt
    │   │   ├── 1701.00088.txt
    │   │   ├── 1701.00089.txt
    │   │   ├── 1701.00090.txt
    │   │   ├── 1701.00091.txt
    │   │   ├── 1701.00092.txt
    │   │   ├── 1701.00094.txt
    │   │   ├── 1701.00095.txt
    │   │   ├── 1701.00096.txt
    │   │   ├── 1701.00097.txt
    │   │   ├── 1701.00098.txt
    │   │   ├── 1701.00099.txt
    │   │   ├── 1701.00100.txt
    │   │   ├── 1701.00101.txt
    │   │   ├── 1701.00102.txt
    │   │   ├── 1701.00103.txt
    │   │   ├── 1701.00104.txt
    │   │   ├── 1701.00105.txt
    │   │   ├── 1701.00106.txt
    │   │   ├── 1701.00107.txt
    │   │   ├── 1701.00109.txt
    │   │   ├── 1701.00110.txt
    │   │   ├── 1701.00111.txt
    │   │   └── 1701.00112.txt
    │   ├── 04-tree.py
    │   ├── 02-quantify.py
    │   ├── 08-topmine_ngrammer.py
    │   ├── 03-parse.py
    │   ├── 06-wordnet.py
    │   └── 07-sentiment.py
    ├── 07-canvas
    │   ├── 02-basic.html
    │   ├── 01-basic.html
    │   ├── 06-image.html
    │   ├── 08-widget.html
    │   ├── 04-path.html
    │   ├── data-url.html
    │   ├── 03-transformation.html
    │   └── 05-points.html
    ├── 06-graph
    │   ├── 07-graphml.py
    │   ├── 01-graph.py
    │   ├── 03-template.py
    │   ├── 05-trends.py
    │   ├── 02-export.py
    │   ├── 06-commonsense.py
    │   └── 04-canvas.html
    ├── 04-search
    │   ├── 05-multiple.py
    │   ├── 03-lemmata.py
    │   ├── 09-web.py
    │   ├── 06-optional.py
    │   ├── 01-search.py
    │   ├── 08-group.py
    │   ├── 07-exclude.py
    │   ├── 02-constraint.py
    │   └── 04-taxonomy.py
    ├── 01-web
    │   ├── 02-google-translate.py
    │   ├── 05-twitter-stream.py
    │   ├── 15-sort.py
    │   ├── 06-feed.py
    │   ├── 14-flickr.py
    │   ├── 07-wikipedia.py
    │   ├── 09-wikia.py
    │   ├── 03-bing.py
    │   ├── 01-google.py
    │   ├── 04-twitter.py
    │   ├── 08-wiktionary.py
    │   └── 11-facebook.py
    └── 05-vector
    │   ├── corpus
    │       └── parakeet.txt
    │   ├── 05-nb.py
    │   ├── 02-model.py
    │   ├── 01-document.py
    │   └── 04-KNN.py
├── test
    ├── corpora
    │   ├── README.txt
    │   ├── carroll-wonderland.pdf
    │   └── carroll-lookingglass.docx
    ├── test_graph.js
    ├── test.html
    ├── test.js
    └── test_ru.py
├── .gitignore
├── .travis.yml
└── LICENSE.txt


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pattern/server/static/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *


--------------------------------------------------------------------------------
/pattern/text/xx/xx-context.txt:
--------------------------------------------------------------------------------
1 | IN VB PREVTAG PRP
2 | NN VB PREVTAG TO


--------------------------------------------------------------------------------
/pattern/text/xx/xx-morphology.txt:
--------------------------------------------------------------------------------
1 | NN s fhassuf 1 NNS x
2 | ly hassuf 2 RB x


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 | <meta http-equiv="refresh" content="0; url=html/pattern.html" />


--------------------------------------------------------------------------------
/docs/g/more.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/more.png


--------------------------------------------------------------------------------
/pattern/text/xx/xx-frequency.txt:
--------------------------------------------------------------------------------
1 | the 1.0000
2 | of 0.5040
3 | and 0.4805
4 | a 0.3941


--------------------------------------------------------------------------------
/docs/g/header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/header.jpg


--------------------------------------------------------------------------------
/docs/g/scanner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/scanner.jpg


--------------------------------------------------------------------------------
/docs/g/shadow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/shadow.png


--------------------------------------------------------------------------------
/docs/desmedt12a.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/desmedt12a.pdf


--------------------------------------------------------------------------------
/docs/g/download.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/download.gif


--------------------------------------------------------------------------------
/docs/g/download3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/download3.gif


--------------------------------------------------------------------------------
/docs/g/external.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/external.png


--------------------------------------------------------------------------------
/docs/g/gradient.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/gradient.jpg


--------------------------------------------------------------------------------
/examples/02-db/store.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/02-db/store.db


--------------------------------------------------------------------------------
/test/corpora/README.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/README.txt


--------------------------------------------------------------------------------
/docs/g/pattern_graph1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_graph2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_graph3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph3.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_graph4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph4.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_graph5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph5.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_schema.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema.gif


--------------------------------------------------------------------------------
/docs/g/paypal-donate.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/paypal-donate.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_schema_de.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_de.gif


--------------------------------------------------------------------------------
/docs/g/pattern_schema_es.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_es.gif


--------------------------------------------------------------------------------
/docs/g/pattern_schema_fr.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_fr.gif


--------------------------------------------------------------------------------
/docs/g/pattern_schema_it.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_it.gif


--------------------------------------------------------------------------------
/docs/g/pattern_schema_nl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_nl.gif


--------------------------------------------------------------------------------
/pattern/text/en/en-model.slp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/en/en-model.slp


--------------------------------------------------------------------------------
/pattern/text/ru/ru-model.slp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/ru/ru-model.slp


--------------------------------------------------------------------------------
/pattern/text/xx/xx-lexicon.txt:
--------------------------------------------------------------------------------
1 | The DT
2 | the DT
3 | cat NN
4 | sat VBD
5 | sit VB
6 | on IN
7 | mat NN
8 | . .


--------------------------------------------------------------------------------
/docs/g/pattern-vector-lsa1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-lsa1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-vector-svm1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-svm1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-vector-svm2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-svm2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-editor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-editor.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-editor2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-editor2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-filter1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-filter2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-filter3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter3.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-filter4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter4.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-origin1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-origin1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-origin2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-origin2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-path1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-path1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-metrics-bell.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-metrics-bell.jpg


--------------------------------------------------------------------------------
/examples/08-server/02-api/rate.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/02-api/rate.db


--------------------------------------------------------------------------------
/examples/08-server/04-db/store.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/04-db/store.db


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-particle1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle1.png


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-particle2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle2.png


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-particle3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle3.png


--------------------------------------------------------------------------------
/docs/g/pattern-metrics-boxplot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-metrics-boxplot.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-search-taxonomy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-search-taxonomy.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-vector-cluster1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-cluster1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-vector-cluster2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-cluster2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_example_100days.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_100days.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_example_italian.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_italian.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_example_spanish.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_spanish.jpg


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00002.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00002.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00003.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00003.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00004.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00004.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00005.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00005.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00006.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00006.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00007.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00007.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00008.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00008.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00009.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00009.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00010.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00010.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00011.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00011.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00012.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00012.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00013.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00013.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00014.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00014.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00015.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00015.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00016.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00016.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00017.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00017.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00018.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00018.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00019.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00019.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00020.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00020.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00021.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00021.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00022.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00022.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00023.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00023.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00024.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00024.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00025.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00025.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00026.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00026.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00027.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00027.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00028.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00028.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00029.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00029.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00030.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00030.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00031.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00031.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00032.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00032.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00033.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00033.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00034.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00034.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00035.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00035.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00037.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00037.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00038.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00038.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00039.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00039.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00043.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00043.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00044.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00044.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00045.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00045.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00046.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00046.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00047.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00047.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00049.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00049.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00050.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00050.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00051.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00051.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00052.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00052.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00053.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00053.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00054.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00054.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00055.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00055.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00056.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00056.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00057.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00057.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00058.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00058.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00060.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00060.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00061.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00061.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00062.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00062.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00063.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00063.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00064.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00064.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00065.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00065.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00066.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00066.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00067.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00067.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00068.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00068.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00069.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00069.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00072.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00072.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00073.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00073.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00074.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00074.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00075.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00075.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00076.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00076.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00077.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00077.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00078.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00078.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00079.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00079.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00081.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00081.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00082.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00082.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00083.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00083.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00084.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00084.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00085.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00085.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00086.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00086.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00087.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00087.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00088.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00088.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00089.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00089.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00090.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00090.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00091.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00091.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00092.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00092.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00094.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00094.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00095.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00095.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00096.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00096.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00097.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00097.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00098.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00098.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00099.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00099.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00100.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00100.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00101.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00101.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00102.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00102.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00103.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00103.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00104.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00104.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00105.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00105.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00106.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00106.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00107.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00107.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00109.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00109.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00110.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00110.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00111.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00111.txt


--------------------------------------------------------------------------------
/examples/03-en/texts/1701.00112.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00112.txt


--------------------------------------------------------------------------------
/test/corpora/carroll-wonderland.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/carroll-wonderland.pdf


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives3.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives4.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives5.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-primitives6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives6.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-supershape1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape1.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-supershape2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape2.jpg


--------------------------------------------------------------------------------
/docs/g/pattern-canvas-supershape3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape3.jpg


--------------------------------------------------------------------------------
/docs/g/pattern_example_elections.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_elections.jpg


--------------------------------------------------------------------------------
/pattern/text/en/wordnet/dict/index.32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/en/wordnet/dict/index.32


--------------------------------------------------------------------------------
/test/corpora/carroll-lookingglass.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/carroll-lookingglass.docx


--------------------------------------------------------------------------------
/docs/g/pattern_example_semantic_network.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_semantic_network.jpg


--------------------------------------------------------------------------------
/examples/08-server/01-basic/static/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/01-basic/static/cat.jpg


--------------------------------------------------------------------------------
/pattern/text/xx/xx-verbs.txt:
--------------------------------------------------------------------------------
1 | be,am,are,is,are,being,was,were,was,were,were,been,,am not,aren't,isn't,aren't,,wasn't,weren't,wasn't,weren't,weren't,


--------------------------------------------------------------------------------
/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2


--------------------------------------------------------------------------------
/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2


--------------------------------------------------------------------------------
/pattern/vector/svm/windows/libsvm-3.22/libsvm.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/windows/libsvm-3.22/libsvm.dll


--------------------------------------------------------------------------------
/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3


--------------------------------------------------------------------------------
/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3


--------------------------------------------------------------------------------
/pattern/vector/svm/windows/liblinear-2.20/liblinear.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/windows/liblinear-2.20/liblinear.dll


--------------------------------------------------------------------------------
/examples/02-db/food.txt:
--------------------------------------------------------------------------------
1 | "id (INTEGER)","name (STRING)","type (STRING)","color (STRING)"
2 | "1","broccoli","vegetable","green"
3 | "2","turnip","vegetable","purple"
4 | "3","asparagus","vegetable","white"
5 | "4","banana","fruit","yellow"


--------------------------------------------------------------------------------
/pattern/text/it/__main__.py:
--------------------------------------------------------------------------------
1 | #### PATTERN | IT | PARSER COMMAND-LINE ############################################################
2 | 
3 | from __future__ import absolute_import
4 | 
5 | from .__init__ import parse, commandline
6 | commandline(parse)
7 | 


--------------------------------------------------------------------------------
/pattern/text/xx/__main__.py:
--------------------------------------------------------------------------------
1 | #### PATTERN | XX | PARSER COMMAND-LINE ############################################################
2 | 
3 | from __future__ import absolute_import
4 | 
5 | from .__init__ import parse, commandline
6 | commandline(parse)
7 | 


--------------------------------------------------------------------------------
/examples/08-server/03-wiki/data/index.html.txt:
--------------------------------------------------------------------------------
1 | <p>This is a very simple wiki powered by pattern.server.</p>
2 | <p>Each page can be modified by clicking the edit-link.</p>
3 | <p>To create a new page, create a link to it, for example this <a href="test">test</a> page.</p>


--------------------------------------------------------------------------------
/pattern/vector/svm/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | 
 4 | LIBSVM = LIBLINEAR = True
 5 | 
 6 | try:
 7 |     from . import libsvm
 8 |     from . import libsvmutil
 9 | except ImportError as e:
10 |     LIBSVM = False
11 |     raise e
12 | 
13 | try:
14 |     from . import liblinear
15 |     from . import liblinearutil
16 | except:
17 |     LIBLINEAR = False
18 | 


--------------------------------------------------------------------------------
/pattern/text/xx/xx-sentiment.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <sentiment language="xx" version="1.0" author="" license="PDDL">
3 | <word form="happy" pos="JJ" polarity="0.5" subjectivity="0.5" intensity="1.0" />
4 | <word form="sad" pos="JJ" polarity="0.5" subjectivity="-0.5" intensity="1.0" />
5 | <word form="very" pos="RB" polarity="0.0" subjectivity="0.25" intensity="1.5" />
6 | <word form="hate" pos="VB" polarity="-0.7" subjectivity="0.8" intensity="1.0" />
7 | </sentiment>


--------------------------------------------------------------------------------
/pattern/text/de/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | DE | RULE-BASED SHALLOW PARSER ######################################################
 2 | # Copyright (c) 2012 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import parse, commandline
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/text/en/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | EN | PARSER COMMAND-LINE ############################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import parse, commandline
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/text/es/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | ES | PARSER COMMAND-LINE ############################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import commandline, parse
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/text/fr/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | FR | PARSER COMMAND-LINE ############################################################
 2 | # Copyright (c) 2013 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import parse, commandline
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/text/nl/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | NL | PARSER COMMAND-LINE ############################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import commandline, parse
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/text/ru/__main__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | RU | PARSER COMMAND-LINE ############################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | from .__init__ import parse, commandline
12 | commandline(parse)
13 | 


--------------------------------------------------------------------------------
/pattern/vector/stopwords-nl.txt:
--------------------------------------------------------------------------------
1 | aan, af, al, alles, als, altijd, andere, ben, bij, daar, dan, dat, de, der, deze, die, dit, doch, doen, door, dus, een, eens, en, er, ge, geen, geweest, haar, had, heb, hebben, heeft, hem, het, hier, hij, hoe, hun, iemand, iets, ik, in, is, ja, je, kan, kon, kunnen, maar, me, meer, men, met, mij, mijn, moet, na, naar, niet, niets, nog, nu, of, om, omdat, onder, ons, ook, op, over, reeds, te, tegen, toch, toen, tot, u, uit, uw, van, veel, voor, want, waren, was, wat, we, wel, werd, wezen, wie, wij, wil, worden, wordt, zal, ze, zei, zelf, zich, zij, zijn, zo, zonder, zou


--------------------------------------------------------------------------------
/test/test_graph.js:
--------------------------------------------------------------------------------
 1 | var test_graph = {
 2 |     
 3 |     //----------------------------------------------------------------------------------------------
 4 |     // Unit tests for the graph.js module (see also test.html).
 5 | 
 6 |     TestCase: function() {
 7 |     	this.setUp = function() {
 8 |     		return;
 9 |     	};
10 |     	this.tearDown = function() {
11 |     		return;
12 |     	};
13 |     },
14 | 
15 |     //----------------------------------------------------------------------------------------------
16 |     
17 |     suite: function() {
18 |         return [];
19 |     }
20 |     
21 | }


--------------------------------------------------------------------------------
/test/test.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | 	<!-- Import modules. -->
 4 | 	<script type="text/javascript" src="../pattern/canvas.js"></script>
 5 | 	<script type="text/javascript" src="../pattern/graph/graph.js"></script>
 6 | 	<!-- Import unit tests. -->
 7 | 	<script type="text/javascript" src="test.js"></script>
 8 | 	<script type="text/javascript" src="test_canvas.js"></script>
 9 | 	<script type="text/javascript" src="test_graph.js"></script>
10 | 	<!-- Run unit tests. -->
11 | 	<script type="text/javascript">
12 | 		run(new test_canvas.suite());
13 | 		run(new test_graph.suite());
14 | 	</script>
15 | </head>
16 | <body>Output is printed to the console (Developer Tools).</body>
17 | </html>


--------------------------------------------------------------------------------
/pattern/text/en/wordlist/time.txt:
--------------------------------------------------------------------------------
1 | a.m., afternoon, always, annually, apr, april, aug, august, autumn, before, breakfast, century, christmas, Christmas, daily, date, dawn, day, day, daybreak, decade, dec, december, during, dusk, easter, Easter, epoch, equinox, era, eve, evening, feb, february, fortnight, fri, friday, future, halloween, hour, hourly, jan, january, jul, july, jun, june, lunch, mar, march, may, microsecond, midday, midnight, millenium, millisecond, minute, mon, monday, month, monthly, morning, nanosecond, night, nightfall, noon, nov, november, now, o'clock, oct, october, p.m., past, present, sat, saturday, season, second, semester, sep, september, soon, spring, summer, sunday, sundown, sunrise, sunset, supper, then, time, today, tomorrow, trimester, tue, tuesday, twilight, wednesday, week, weekly, winter, year, yearly, yesterday


--------------------------------------------------------------------------------
/examples/07-canvas/02-basic.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | basics (2)</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | </head>
 8 | <body>
 9 | 	<script type="text/canvas">
10 | 		// You don't need to define a <canvas> explicitly.
11 | 		// For each <script type="text/canvas"> a 500 x 500 <canvas> is automatically generated
12 | 		// and inserted in the document right before the <script> element.
13 | 		// Variations include:
14 | 		// <script type="text/canvas" canvas="[id]">
15 | 		// <script type="text/canvas" loop="false">
16 | 		function setup(canvas) {
17 | 			canvas.size(500, 500);
18 | 		}
19 | 		function draw(canvas) {
20 | 			canvas.clear();
21 | 			translate(250, 250);
22 | 			rotate(canvas.frame);
23 | 			rect(-150, -150, 300, 300, {fill: color(1,0,0,1)});
24 | 		}
25 | 	</script>
26 | </body>
27 | </html>


--------------------------------------------------------------------------------
/pattern/text/en/wordnet/dict/lexnames:
--------------------------------------------------------------------------------
 1 | 00	adj.all	3
 2 | 01	adj.pert	3 
 3 | 02	adv.all	4
 4 | 03	noun.Tops	1  
 5 | 04	noun.act	1
 6 | 05	noun.animal	1
 7 | 06	noun.artifact	1
 8 | 07	noun.attribute	1
 9 | 08	noun.body	1
10 | 09	noun.cognition	1
11 | 10	noun.communication	1
12 | 11	noun.event	1
13 | 12	noun.feeling	1
14 | 13	noun.food	1
15 | 14	noun.group	1
16 | 15	noun.location	1
17 | 16	noun.motive	1
18 | 17	noun.object	1
19 | 18	noun.person	1
20 | 19	noun.phenomenon	1
21 | 20	noun.plant	1
22 | 21	noun.possession	1
23 | 22	noun.process	1
24 | 23	noun.quantity	1
25 | 24	noun.relation	1
26 | 25	noun.shape	1
27 | 26	noun.state	1
28 | 27	noun.substance	1
29 | 28	noun.time	1
30 | 29	verb.body	2
31 | 30	verb.change	2
32 | 31	verb.cognition	2
33 | 32	verb.communication	2
34 | 33	verb.competition	2
35 | 34	verb.consumption	2
36 | 35	verb.contact	2
37 | 36	verb.creation	2
38 | 37	verb.emotion	2
39 | 38	verb.motion	2
40 | 39	verb.perception	2
41 | 40	verb.possession	2
42 | 41	verb.social	2
43 | 42	verb.stative	2
44 | 43	verb.weather	2
45 | 44 	adj.ppl	3
46 | 


--------------------------------------------------------------------------------
/examples/06-graph/07-graphml.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join("..", ".."))
10 | 
11 | from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT
12 | from random import choice, random
13 | 
14 | # This example demonstrates how a graph visualization can be exported to GraphML,
15 | # a file format that can be opened in Gephi (https://gephi.org).
16 | 
17 | g = Graph()
18 | # Random nodes.
19 | for i in range(50):
20 |     g.add_node(i)
21 | # Random edges.
22 | for i in range(75):
23 |     node1 = choice(g.nodes)
24 |     node2 = choice(g.nodes)
25 |     g.add_edge(node1, node2,
26 |                weight = random())
27 | 
28 | g.prune(0)
29 | 
30 | # This node's label is different from its id.
31 | g[1].text.string = "home"
32 | 
33 | # By default, Graph.export() exports to HTML,
34 | # but if we give it a filename that ends in .graphml it will export to GraphML.
35 | g.export(os.path.join(os.path.dirname(__file__), "test.graphml"))
36 | 


--------------------------------------------------------------------------------
/examples/04-search/05-multiple.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search
11 | from pattern.en import parsetree
12 | 
13 | # Constraints ending in "+" match one or more words.
14 | # Pattern.search() uses a "greedy" approach:
15 | # it will attempt to match as many words as possible.
16 | 
17 | # The following pattern means:
18 | # one or more words starting with "t",
19 | # followed by one or more words starting with "f".
20 | t = parsetree("one two three four five six")
21 | m = search("t*+ f*+", t)
22 | print(t)
23 | print(m)
24 | print("")
25 | 
26 | for w in m[0].words:
27 |     print("%s matches %s" % (w, m[0].constraint(w)))
28 | 
29 | # "*" matches each word in the sentence.
30 | # This yields a list with a Match object for each word.
31 | print("")
32 | print("* => %s" % search("*", t))
33 | 
34 | # "*+" matches all words.
35 | # This yields a list with one Match object containing all words.
36 | print("")
37 | print("*+ => %s" % search("*+", t))
38 | 


--------------------------------------------------------------------------------
/examples/01-web/02-google-translate.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Google, plaintext
11 | 
12 | # A search engine in pattern.web sometimes has custom methods that the others don't.
13 | # For example, Google has Google.translate() and Google.identify().
14 | 
15 | # This example demonstrates the Google Translate API.
16 | # It will only work with a license key, since it is a paid service.
17 | # In the Google API console (https://code.google.com/apis/console/),
18 | # activate Translate API.
19 | 
20 | g = Google(license=None)  # Enter your license key.
21 | q = "Your mother was a hamster and your father smelled of elderberries!"    # en
22 | #   "Ihre Mutter war ein Hamster und euer Vater roch nach Holunderbeeren!"  # de
23 | print(q)
24 | print(plaintext(g.translate(q, input="en", output="de")))  # es, fr, sv, ja, ...
25 | print("")
26 | 
27 | q = "C'est un lapin, lapin de bois, un cadeau."
28 | print(q)
29 | print(g.identify(q))  # (language, confidence)
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.pyc
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | .coveralls.yml
47 | *.cover
48 | .hypothesis/
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | *.dev*
54 | *.nja
55 | 
56 | build
57 | dist
58 | 
59 | # Environments
60 | .env
61 | .venv
62 | env/
63 | venv/
64 | ENV/
65 | 
66 | # Flymake
67 | *_flymake.py
68 | 
69 | # Pattern specific ignore pattern
70 | pattern/web/cache/tmp/
71 | web/cache/tmp/
72 | pattern_unittest_db
73 | test/pattern_unittest_db
74 | 
75 | .DS_Store
76 | 


--------------------------------------------------------------------------------
/examples/01-web/05-twitter-stream.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | import time
12 | 
13 | from pattern.web import Twitter
14 | 
15 | # Another way to mine Twitter is to set up a stream.
16 | # A Twitter stream maintains an open connection to Twitter,
17 | # and waits for data to pour in.
18 | # Twitter.search() allows us to look at older tweets,
19 | # Twitter.stream() gives us the most recent tweets.
20 | 
21 | # It might take a few seconds to set up the stream.
22 | stream = Twitter().stream("I hate", timeout=30)
23 | 
24 | #while True:
25 | for i in range(10):
26 |     print(i)
27 |     # Poll Twitter to see if there are new tweets.
28 |     stream.update()
29 |     # The stream is a list of buffered tweets so far,
30 |     # with the latest tweet at the end of the list.
31 |     for tweet in reversed(stream):
32 |         print(tweet.text)
33 |         print(tweet.language)
34 |     # Clear the buffer every so often.
35 |     stream.clear()
36 |     # Wait awhile between polls.
37 |     time.sleep(1)
38 | 


--------------------------------------------------------------------------------
/examples/06-graph/01-graph.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.graph import Graph, CENTRALITY
11 | 
12 | # A graph is a network of nodes (or concepts)
13 | # connected to each other with edges (or links).
14 | 
15 | g = Graph()
16 | for n in ("tree", "nest", "bird", "fly", "insect", "ant"):
17 |     g.add_node(n)
18 | 
19 | g.add_edge("tree", "nest")  # Trees have bird nests.
20 | g.add_edge("nest", "bird")  # Birds live in nests.
21 | g.add_edge("bird", "fly")   # Birds eat flies.
22 | g.add_edge("ant", "bird")   # Birds eat ants.
23 | g.add_edge("fly", "insect") # Flies are insects.
24 | g.add_edge("insect", "ant") # Ants are insects.
25 | g.add_edge("ant", "tree")   # Ants crawl on trees.
26 | 
27 | # From tree => fly: tree => ant => bird => fly
28 | print(g.shortest_path(g.node("tree"), g.node("fly")))
29 | print(g.shortest_path(g.node("nest"), g.node("ant")))
30 | print()
31 | 
32 | # Which nodes get the most traffic?
33 | for n in sorted(g.nodes, key=lambda n: n.centrality, reverse=True):
34 |     print('%.2f' % n.centrality, n)
35 | 


--------------------------------------------------------------------------------
/pattern/vector/svm/INSTALL.txt:
--------------------------------------------------------------------------------
 1 | In order to be able to use LIBSVM and LIBLINEAR you have to download, compile and install both libraries.
 2 | 
 3 | - If you are on ArchLinux: pacman -S libsvm liblinear (or you can install liblinear-multicore instead of liblinear)
 4 | - If you are on Ubutun/Debian: e.g. apt-cache search libsvm (to find the right packages)
 5 | - If you are on Windows or Mac OS follow the guidelines on the website listed below.
 6 | - You can also manually download, compile and install both libraries. Read on for further information.
 7 | 
 8 | To install from source, download the latest versions of LIBSVM and LIBLINEAR:
 9 | http://www.csie.ntu.edu.tw/~cjlin/libsvm/
10 | http://www.csie.ntu.edu.tw/~cjlin/liblinear/
11 | 
12 | - From the command line, do "make" in libsvm/. 
13 | - Now do "make" in libsvm/python/. 
14 | - You should see a file "libsvm.so.2", which is the binary you need.
15 | - Put it in pattern/vector/svm/libsvm-x.xx/
16 | 
17 | Repeat the steps for LIBLINEAR.
18 | 
19 | You may need to rename option "-soname" to "-install_name" in the Makefile on Mac OS X. 
20 | 
21 | You may need to modify pattern/vector/svm/svm.py around line 15 to import the new binary. Please send us a copy of the compiled binary so we can include it in the next release.


--------------------------------------------------------------------------------
/examples/03-en/04-tree.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.en import parse, Text
11 | 
12 | # The easiest way to analyze the output of the parser is to create a Text.
13 | # A Text is a "parse tree" of linked Python objects.
14 | # A Text is essentially a list of Sentence objects.
15 | # Each Sentence is a list of Word objects.
16 | # Each Word can be part of a Chunk object, accessible with Word.chunk.
17 | s = "I eat pizza with a silver fork."
18 | s = parse(s)
19 | s = Text(s)
20 | 
21 | # You can also use the parsetree() function,
22 | # which is the equivalent of Text(parse()).
23 | 
24 | print(s[0].words)   # A list of all the words in the first sentence.
25 | print(s[0].chunks)  # A list of all the chunks in the first sentence.
26 | print(s[0].chunks[-1].words)
27 | print("")
28 | 
29 | for sentence in s:
30 |     for word in sentence:
31 |         print(word.string,
32 |               word.type,
33 |               word.chunk,
34 |               word.pnp)
35 | 
36 | # A Text can be exported as an XML-string (among other).
37 | print("")
38 | print(s.xml)
39 | 


--------------------------------------------------------------------------------
/pattern/text/it/it-context.txt:
--------------------------------------------------------------------------------
 1 | PRP IN WDPREVTAG VB che
 2 | PRP IN WDPREVTAG RB che
 3 | PRP IN WDNEXTTAG che DT
 4 | IN RB WDNEXTTAG come IN
 5 | DT PRP WDNEXTTAG gli VB
 6 | DT PRP WDNEXTTAG lo VB
 7 | PRP DT WDNEXTTAG cui NN
 8 | CD RB WDNEXTTAG prima IN
 9 | VB NN WDNEXTTAG stato JJ
10 | PRP DT WDNEXTTAG uno NN
11 | RB JJ WDNEXTTAG solo NN
12 | CC VB WDNEXTTAG sia VB
13 | CC VB WDNEXTTAG sia RB
14 | DT PRP WDNEXTTAG altri VB
15 | PRP DT WDNEXTTAG quella NN
16 | PRP DT WDNEXTTAG quali NN
17 | RB PRP WDNEXTTAG quanto VB
18 | IN CD WDNEXTTAG secondo NN
19 | PRP DT WDNEXTTAG tutto DT
20 | VB RB WDNEXTTAG fa ,
21 | VB RB WDNEXTTAG fa (
22 | IN RB WDNEXTTAG oltre IN
23 | IN RB WDNEXTTAG come IN
24 | DT PRP WDNEXTTAG gli VB
25 | DT PRP WDNEXTTAG lo VB
26 | NN VB WDPREVTAG VB stato
27 | NN VB WDPREVTAG RB parte
28 | IN PRP WDPREVTAG IN se
29 | IN PRP RBIGRAM se stesso
30 | VB NN WDPREVTAG DT essere
31 | JJ NN WDPREVTAG DT italiano
32 | RB JJ RBIGRAM solo .
33 | IN CD WDPREVTAG DT secondo
34 | PRP DT WDNEXTTAG uno NN
35 | PRP DT WDNEXTTAG uno JJ
36 | NN VB WDPREVTAG VB fatto
37 | IN RB WDNEXTTAG contro VB
38 | RB JJ WDPREVTAG VB molto
39 | IN WRB LBIGRAM STAART Quando
40 | IN WRB LBIGRAM STAART Perché
41 | IN WRB LBIGRAM STAART Dove
42 | NN VB WDPREVTAG VB data
43 | JJ PRP WDPREVTAG DT proprio
44 | NN JJ WDPREVTAG NN politica
45 | JJ NN WDPREVTAG DT politico


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
 1 | function assert(expression) {
 2 |     /* Throws AssertException if the given expression evaluates to false.
 3 |      */
 4 | 	if (!expression) throw "AssertException";
 5 | }
 6 | 
 7 | function TestCase() {
 8 |     /* TestCase objects have a setUp() and a tearDown() method, 
 9 |      * called before and after each test respectively.
10 |      * Tests in a TestCase have method names starting with "test".
11 |      */ 
12 |     this.setUp = function() {
13 |         return;
14 |     };
15 |     this.tearDown = function() {
16 |         return;
17 |     };
18 |     this.testMethod = function() {
19 |         assert(true == false);
20 |     };	
21 | }
22 | 
23 | function run(tests) {
24 |     /* Executes each method which name starts with "test",
25 |      * for each TestCase object in the given array.
26 |      * Throws AssertException if the method fails.
27 |      */
28 |     for (var i=0; i < tests.length; i++) {
29 |         for (var method in tests[i]) {
30 |             if (method.substring(0,4) == "test") {
31 |                 tests[i].setUp();
32 |                 try {
33 |                     tests[i][method]();
34 |                 } catch(e) {
35 |                     console.error(e + " in " + method + "()");
36 |                 }
37 |                 tests[i].tearDown();
38 |             }
39 |         }
40 |     }
41 | }


--------------------------------------------------------------------------------
/examples/02-db/03-date.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.db import date, time, NOW
11 | from pattern.web import Bing, NEWS
12 | 
13 | # It is often useful to keep a date stamp for each row in the table.
14 | # The pattern.db module's date() function can be used for this.
15 | # It is a simple wrapper around Python's datetime.datetime class,
16 | # with extra functionality to make it easy to parse or print it as a string.
17 | 
18 | print(date(NOW))
19 | print(date())
20 | print(date("2010-11-01 16:30", "%Y-%m-%d %H:%M"))
21 | print(date("Nov 1, 2010", "%b %d, %Y"))
22 | print(date("Nov 1, 2010", "%b %d, %Y", format="%d/%m/%Y"))
23 | print("")
24 | 
25 | # All possible formatting options:
26 | # http://docs.python.org/library/time.html#time.strftime
27 | 
28 | for r in Bing(license=None, language="en").search("today", type=NEWS):
29 |     print(r.title)
30 |     print(repr(r.date))  # Result.date is a string (e.g. we can't > <= += with the date).
31 |     print(date(r.date))  # date() can parse any Result.date in the web module.
32 |     print("")
33 | 
34 | d = date("4 november 2011")
35 | d += time(days=2, hours=5)
36 | print(d)
37 | 


--------------------------------------------------------------------------------
/examples/07-canvas/01-basic.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | basics (1)</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | 	<script>
 8 | 		function setup(canvas) {
 9 | 			// The setup() function is executed once, before the animation starts.
10 | 			// This is a good place to load images, or set the canvas size.
11 | 			canvas.size(500, 500);
12 | 		}
13 | 		function draw(canvas) {
14 | 			// The draw() function is executed each animation frame.
15 | 			// Call canvas.clear() to remove the previous frame.
16 | 			// Draw a red rotating rectangle.
17 | 			canvas.clear();
18 | 			translate(250, 250);
19 | 			rotate(canvas.frame);
20 | 			rect(-150, -150, 300, 300, {fill: color(1,0,0,1)});
21 | 		}
22 | 		window.onload = function() {
23 | 			// Attach setup() and draw() to the <canvas> and start the animation.
24 | 			// In the next example we'll see a shorter syntax,
25 | 			// where this is done automatically for you.
26 | 			canvas = new Canvas(document.getElementById("canvas1"));
27 | 			canvas.setup = setup;
28 | 			canvas.draw = draw;
29 | 			canvas.run();
30 | 		}
31 | 	</script>
32 | </head>
33 | <body>
34 | 	<!-- The HTML <canvas> element targeted by the Canvas object above. -->
35 | 	<canvas id="canvas1" width="500px" height="500px"></canvas>
36 | </body>
37 | </html>


--------------------------------------------------------------------------------
/examples/04-search/03-lemmata.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search, match
11 | from pattern.en import parsetree
12 | 
13 | # This example demonstrates an interesting search pattern that mines for comparisons.
14 | # Notice the use of the constraint "be".
15 | # If the output from the parser includes word lemmas (e.g., "doing" => "do")
16 | # these will also be matched. Using "be" then matches "is", "being", "are", ...
17 | # and if underspecification is used "could be", "will be", "definitely was", ...
18 | 
19 | p = "NP be ADJP|ADVP than NP"
20 | 
21 | for s in (
22 |   "the turtle was faster than the hare",
23 |   "Arnold Schwarzenegger is more dangerous than Dolph Lundgren"):
24 |     t = parsetree(s, lemmata=True)  # parse lemmas
25 |     m = search(p, t)
26 |     if m:
27 |         # Constituents for the given constraint indices:
28 |         # 0 = NP, 2 = ADJP|ADVP, 4 = NP
29 |         print(m[0].constituents(constraint=[0, 2, 4]))
30 |         print("")
31 | 
32 | 
33 | p = "NP be ADJP|ADVP than NP"
34 | t = parsetree("the turtle was faster than the hare", lemmata=True)
35 | m = match(p, t)
36 | print(t)
37 | print("")
38 | for w in m.words:
39 |     print("%s\t=> %s" % (w, m.constraint(w)))
40 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | dist: precise
 4 | 
 5 | python:
 6 |   - "3.6"
 7 | 
 8 | before_install:
 9 |   - export TZ=Europe/Brussels
10 |   - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
11 |   - bash miniconda.sh -b -p $HOME/miniconda
12 |   - export PATH="$HOME/miniconda/bin:$PATH"
13 |   - conda update --yes conda
14 |   - conda install --yes numpy scipy
15 |   - pip install --quiet pytest pytest-cov pytest-xdist chardet
16 | 
17 | install:
18 |   - python setup.py install --quiet
19 |   - pip freeze
20 |   # Install and compile libsvm and liblinear
21 |   - sudo apt-get install -y build-essential
22 |   - git clone https://github.com/cjlin1/libsvm
23 |   - cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd ..
24 |   - git clone https://github.com/cjlin1/liblinear
25 |   - cd liblinear; make lib; sudo cp liblinear.so.3 /lib; sudo ln -s /lib/liblinear.so.3 /lib/liblinear.so; cd ..
26 | 
27 | script:
28 |   - pytest --cov=pattern
29 | 
30 | 
31 | after_script:
32 |   - pip install --quiet coveralls
33 |   - coveralls
34 | 
35 | branches:
36 | only:
37 |   - development
38 | 
39 | notifications:
40 |   email: false
41 | 
42 | # You can connect to MySQL/MariaDB using the username "travis" or "root" and a blank password.
43 | services:
44 |   - mysql
45 | 


--------------------------------------------------------------------------------
/examples/01-web/15-sort.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import GOOGLE, YAHOO, BING, sort
11 | 
12 | # The pattern.web module includes an interesting sort() algorithm.
13 | # Ir classifies search terms according to a search engine's total results count.
14 | # When a context is defined, it sorts according to relevancy to the context:
15 | # sort(terms=["black", "green", "red"], context="Darth Vader") =>
16 | # yields "black" as the best candidate,
17 | # because "black Darth Vader" yields more search results.
18 | 
19 | results = sort(
20 |       terms = [
21 |         "arnold schwarzenegger",
22 |         "chuck norris",
23 |         "dolph lundgren",
24 |         "steven seagal",
25 |         "sylvester stallone",
26 |         "mickey mouse",
27 |         ],
28 |     context = "dangerous", # Term used for sorting.
29 |     service = BING,        # GOOGLE, YAHOO, BING, ...
30 |     license = None,        # You should supply your own API license key for the given service.
31 |      strict = True,        # Wraps the query in quotes, i.e. 'mac sweet'.
32 |     reverse = True,        # Reverses term and context: 'sweet mac' instead of 'mac sweet'.
33 |      cached = True)
34 | 
35 | for weight, term in results:
36 |     print("%5.2f" % (weight * 100) + "%", term)
37 | 


--------------------------------------------------------------------------------
/pattern/web/api.py:
--------------------------------------------------------------------------------
 1 | #--- API LICENSE CONFIGURATION -----------------------------------------------------------------------
 2 | # Default license keys used by pattern.web.SearchEngine to contact different API's.
 3 | # Google and Yahoo are paid services for which you need a personal license + payment method.
 4 | # The default Google license is for testing purposes (= 100 daily queries).
 5 | # Wikipedia, Twitter and Facebook are free.
 6 | # Bing, Flickr and ProductsWiki use licenses shared among all Pattern users.
 7 | 
 8 | from __future__ import unicode_literals
 9 | from builtins import dict
10 | 
11 | license = {}
12 | license["Google"] = \
13 |     "AIzaSyBxe9jC4WLr-Rry_5OUMOZ7PCsEyWpiU48"
14 | 
15 | license["Bing"] = \
16 |     "VnJEK4HTlntE3SyF58QLkUCLp/78tkYjV1Fl3J7lHa0="
17 | 
18 | license["Yahoo"] = \
19 |     ("", "") # OAuth (key, secret)
20 | 
21 | license["DuckDuckGo"] = \
22 |     None
23 | 
24 | license["Faroo"] = \
25 |     ""
26 | 
27 | license["Wikipedia"] = \
28 |     None
29 | 
30 | license["Twitter"] = (
31 |     "p7HUdPLlkKaqlPn6TzKkA", # OAuth (key, secret, token)
32 |     "R7I1LRuLY27EKjzulutov74lKB0FjqcI2DYRUmsu7DQ", (
33 |     "14898655-TE9dXQLrzrNd0Zwf4zhK7koR5Ahqt40Ftt35Y2qY",
34 |     "q1lSRDOguxQrfgeWWSJgnMHsO67bqTd5dTElBsyTM"))
35 | 
36 | license["Facebook"] = \
37 |     "332061826907464|jdHvL3lslFvN-s_sphK1ypCwNaY"
38 | 
39 | license["Flickr"] = \
40 |     "787081027f43b0412ba41142d4540480"
41 | 
42 | license["ProductWiki"] = \
43 |     "64819965ec784395a494a0d7ed0def32"
44 | 


--------------------------------------------------------------------------------
/examples/01-web/06-feed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Newsfeed, plaintext, URL
11 | from pattern.db import date
12 | 
13 | # This example reads a given RSS or Atom newsfeed channel.
14 | # Some example feeds to try out:
15 | NATURE = "http://feeds.nature.com/nature/rss/current"
16 | SCIENCE = "http://www.sciencemag.org/rss/podcast.xml"
17 | NYT = "http://rss.nytimes.com/services/xml/rss/nyt/GlobalHome.xml"
18 | TIME = "http://feeds.feedburner.com/time/topstories"
19 | CNN = "http://rss.cnn.com/rss/edition.rss"
20 | 
21 | engine = Newsfeed()
22 | 
23 | for result in engine.search(CNN, cached=True):
24 |     print(result.title.upper())
25 |     print(plaintext(result.text))  # Remove HTML formatting.
26 |     print(result.url)
27 |     print(result.date)
28 |     print("")
29 | 
30 | # News item URL's lead to the page with the full article.
31 | # This page can have any kind of formatting.
32 | # There is no default way to read it.
33 | # But we could just download the source HTML and convert it to plain text:
34 | 
35 | #html = URL(result.url).download()
36 | #print(plaintext(html))
37 | 
38 | # The resulting text may contain a lot of garbage.
39 | # A better way is to use a DOM parser to select the HTML elements we want.
40 | # This is demonstrated in one of the next examples.
41 | 


--------------------------------------------------------------------------------
/examples/06-graph/03-template.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.graph import Graph, CSS, CANVAS
11 | 
12 | # This example demonstrates how to roll dynamic HTML graphs.
13 | # We have a HTML template in which content is inserted on-the-fly.
14 | 
15 | # This is useful if the graph data changes dynamically,
16 | # e.g., the user clicks on a node and is taken to a webpage with a new subgraph.
17 | 
18 | template = '''
19 | <!doctype html> 
20 | <html>
21 | <head>
22 | \t<meta charset="utf-8">
23 | \t<script type="text/javascript" src="canvas.js"></script>
24 | \t<script type="text/javascript" src="graph.js"></script>
25 | \t<style type="text/css">
26 | \t\t%s
27 | \t</style>
28 | </head>
29 | <body> 
30 | \t%s
31 | </body>
32 | </html>
33 | '''.strip()
34 | 
35 | 
36 | def webpage(graph, **kwargs):
37 |     s1 = graph.serialize(CSS, **kwargs)
38 |     s2 = graph.serialize(CANVAS, **kwargs)
39 |     return template % (
40 |         s1.replace("\n", "\n\t\t"),
41 |         s2.replace("\n", "\n\t")
42 |     )
43 | 
44 | # Create a graph:
45 | g = Graph()
46 | g.add_node("cat")
47 | g.add_node("dog")
48 | g.add_edge("cat", "dog")
49 | 
50 | # To make this work as a cgi-bin script, uncomment the following lines:
51 | ##!/usr/bin/env python
52 | #import cgi
53 | #import cgitb; cgitb.enable() # Debug mode.
54 | #print("Content-type: text/html")
55 | 
56 | print(webpage(g, width=500, height=500))
57 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011-2013 University of Antwerp, Belgium
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 |   * Redistributions of source code must retain the above copyright
 8 |     notice, this list of conditions and the following disclaimer.
 9 |   * Redistributions in binary form must reproduce the above copyright 
10 |     notice, this list of conditions and the following disclaimer in
11 |     the documentation and/or other materials provided with the
12 |     distribution.
13 |   * Neither the name of Pattern nor the names of its
14 |     contributors may be used to endorse or promote products
15 |     derived from this software without specific prior written
16 |     permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/pattern/helpers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | from __future__ import division
 4 | 
 5 | from builtins import str, bytes, dict, int
 6 | from builtins import map, zip, filter
 7 | from builtins import object, range
 8 | 
 9 | #--- STRING FUNCTIONS ------------------------------------------------------------------------------
10 | # Latin-1 (ISO-8859-1) encoding is identical to Windows-1252 except for the code points 128-159:
11 | # Latin-1 assigns control codes in this range, Windows-1252 has characters, punctuation, symbols
12 | # assigned to these code points.
13 | 
14 | 
15 | def decode_string(v, encoding="utf-8"):
16 |     """ Returns the given value as a Unicode string (if possible).
17 |     """
18 |     if isinstance(encoding, str):
19 |         encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore"))
20 |     if isinstance(v, bytes):
21 |         for e in encoding:
22 |             try:
23 |                 return v.decode(*e)
24 |             except:
25 |                 pass
26 |         return v
27 |     return str(v)
28 | 
29 | 
30 | def encode_string(v, encoding="utf-8"):
31 |     """ Returns the given value as a Python byte string (if possible).
32 |     """
33 |     if isinstance(encoding, str):
34 |         encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore"))
35 |     if isinstance(v, str):
36 |         for e in encoding:
37 |             try:
38 |                 return v.encode(*e)
39 |             except:
40 |                 pass
41 |         return v
42 |     return bytes(v)
43 | 
44 | decode_utf8 = decode_string
45 | encode_utf8 = encode_string
46 | 


--------------------------------------------------------------------------------
/pattern/vector/svm/COPYRIGHT-liblinear.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2007-2015 The LIBLINEAR Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/pattern/vector/svm/COPYRIGHT-libsvm.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/examples/04-search/09-web.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.web import Bing, plaintext
12 | from pattern.en import parsetree
13 | from pattern.search import Pattern
14 | from pattern.db import Datasheet, pprint
15 | 
16 | # "X IS MORE IMPORTANT THAN Y"
17 | # Here is a rough example of how to build a web miner.
18 | # It mines comparative statements from Bing and stores the results in a table,
19 | # which can be saved as a text file for further processing later on.
20 | 
21 | # Pattern matching also works with Sentence objects from the MBSP module.
22 | # MBSP's parser is much more robust (but also slower).
23 | #from MBSP import Sentence, parse
24 | 
25 | q = '"more important than"'          # Bing search query
26 | p = "NP VP? more important than NP"  # Search pattern.
27 | p = Pattern.fromstring(p)
28 | d = Datasheet()
29 | 
30 | engine = Bing(license=None)
31 | for i in range(1):  # max=10
32 |     for result in engine.search(q, start=i + 1, count=100, cached=True):
33 |         s = result.description
34 |         s = plaintext(s)
35 |         t = parsetree(s)
36 |         for m in p.search(t):
37 |             a = m.constituents(constraint=0)[-1] # Left NP.
38 |             b = m.constituents(constraint=5)[0]  # Right NP.
39 |             d.append((
40 |                 a.string.lower(),
41 |                 b.string.lower()))
42 | 
43 | pprint(d)
44 | 
45 | print("")
46 | print("%s results." % len(d))
47 | 


--------------------------------------------------------------------------------
/examples/01-web/14-flickr.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | from io import open
 7 | 
 8 | import os
 9 | import sys
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
11 | 
12 | from pattern.web import Flickr, extension
13 | from pattern.web import RELEVANCY, LATEST, INTERESTING  # Image sort order.
14 | from pattern.web import SMALL, MEDIUM, LARGE            # Image size.
15 | 
16 | # This example downloads an image from Flickr (http://flickr.com).
17 | # Acquiring the image data takes three Flickr queries:
18 | # 1) Flickr.search() retrieves a list of results,
19 | # 2) FlickrResult.url retrieves the image URL (behind the scenes),
20 | # 3) FlickrResult.download() visits FlickrResult.url and downloads the content.
21 | 
22 | # It is a good idea to cache results from Flickr locally,
23 | # which is what the cached=True parameter does.
24 | 
25 | # You should obtain your own license key at:
26 | # http://www.flickr.com/services/api/
27 | # Otherwise you will be sharing the default key with all users of pattern.web.
28 | engine = Flickr(license=None)
29 | 
30 | q = "duracell bunny"
31 | results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=False)
32 | for img in results:
33 |     #print(img.url)  # Retrieving the actual image URL executes a query.
34 |     print(img.text)
35 |     print(img.author)
36 |     print("")
37 | 
38 | # Download and save one of the images:
39 | img = results[0]
40 | data = img.download()
41 | path = q.replace(" ", "_") + extension(img.url)
42 | f = open(path, "wb")
43 | f.write(data)
44 | f.close()
45 | print("Download: %s" % img.url)
46 | print("Saved as: %s" % path)
47 | 


--------------------------------------------------------------------------------
/pattern/text/en/wordnet/dict/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | WordNet Release 3.0
 2 | 
 3 | This software and database is being provided to you, the LICENSEE, by  
 4 | Princeton University under the following license.  By obtaining, using  
 5 | and/or copying this software and database, you agree that you have  
 6 | read, understood, and will comply with these terms and conditions.:  
 7 |   
 8 | Permission to use, copy, modify and distribute this software and  
 9 | database and its documentation for any purpose and without fee or  
10 | royalty is hereby granted, provided that you agree to comply with  
11 | the following copyright notice and statements, including the disclaimer,  
12 | and that the same appear on ALL copies of the software, database and  
13 | documentation, including modifications that you make for internal  
14 | use or for distribution.  
15 |   
16 | WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.  
17 |   
18 | THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON  
19 | UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR  
20 | IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON  
21 | UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-  
22 | ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE  
23 | OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT  
24 | INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR  
25 | OTHER RIGHTS.  
26 |   
27 | The name of Princeton University or Princeton may not be used in  
28 | advertising or publicity pertaining to distribution of the software  
29 | and/or database.  Title to copyright in this software, database and  
30 | any associated documentation shall at all times remain with  
31 | Princeton University and LICENSEE agrees to preserve same.  
32 | 


--------------------------------------------------------------------------------
/test/test_ru.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | from builtins import str, bytes, dict, int
 8 | from builtins import map, zip, filter
 9 | from builtins import object, range
10 | 
11 | import os
12 | import sys
13 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
14 | import unittest
15 | import random
16 | import subprocess
17 | 
18 | from pattern import text
19 | from pattern import ru
20 | 
21 | from io import open
22 | 
23 | try:
24 |     PATH = os.path.dirname(os.path.realpath(__file__))
25 | except:
26 |     PATH = ""
27 | 
28 | #---------------------------------------------------------------------------------------------------
29 | 
30 | 
31 | class TestSpelling(unittest.TestCase):
32 | 
33 |     def test_spelling(self):
34 |         i = j = 0.0
35 |         from pattern.db import Datasheet
36 |         for correct, wrong in Datasheet.load(os.path.join(PATH, "corpora", "spelling-ru.csv")):
37 |             for w in wrong.split(" "):
38 |                 suggested = ru.suggest(w)
39 |                 if suggested[0][0] == correct:
40 |                     i += 1
41 |                 else:
42 |                     j += 1
43 |         self.assertTrue(i / (i + j) > 0.65)
44 |         print("pattern.ru.suggest()")
45 | 
46 | #---------------------------------------------------------------------------------------------------
47 | 
48 | def suite():
49 |     suite = unittest.TestSuite()
50 |     suite.addTest(unittest.TestLoader().loadTestsFromTestCase(TestSpelling))
51 |     return suite
52 | 
53 | if __name__ == "__main__":
54 | 
55 |     result = unittest.TextTestRunner(verbosity=1).run(suite())
56 |     sys.exit(not result.wasSuccessful())
57 | 


--------------------------------------------------------------------------------
/examples/06-graph/05-trends.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.web import Twitter
12 | from pattern.graph import Graph
13 | 
14 | # This example demonstrates a simple Twitter miner + visualizer.
15 | # We collect tweets containing "A is the new B",
16 | # mine A and B and use them as connected nodes in a graph.
17 | # Then we export the graph as a browser visualization.
18 | 
19 | comparisons = []
20 | 
21 | for i in range(1, 10):
22 |     # Set cached=False for live results:
23 |     for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
24 |         s = result.text
25 |         s = s.replace("\n", " ")
26 |         s = s.lower()
27 |         s = s.replace("is the new", "NEW")
28 |         s = s.split(" ")
29 |         try:
30 |             i = s.index("NEW")
31 |             A = s[i - 1].strip("?!.:;,#@\"'")
32 |             B = s[i + 1].strip("?!.:;,#@\"'")
33 |             # Exclude common phrases such as "this is the new thing".
34 |             if A and B and A not in ("it", "this", "here", "what", "why", "where"):
35 |                 comparisons.append((A, B))
36 |         except:
37 |             pass
38 | 
39 | g = Graph()
40 | for A, B in comparisons:
41 |     e = g.add_edge(B, A) # "A is the new B": A <= B
42 |     e.weight += 0.1
43 |     print(B, "=>", A)
44 | 
45 | # Not all nodes will be connected, there will be multiple subgraphs.
46 | # Simply take the largest subgraph for our visualization.
47 | g = g.split()[0]
48 | 
49 | g.export("trends", weighted=True, directed=True)
50 | 


--------------------------------------------------------------------------------
/examples/03-en/02-quantify.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.en import number, numerals, quantify, reflect
11 | 
12 | # The number() command returns an int or float from a written representation.
13 | # This is useful, for example, in combination with a parser
14 | # to transform "CD" parts-of-speech to actual numbers.
15 | # The algorithm ignores words that aren't recognized as numerals.
16 | print(number("two thousand five hundred and eight"))
17 | print(number("two point eighty-five"))
18 | print("")
19 | 
20 | # The numerals() command returns a written representation from an int or float.
21 | print(numerals(1.249, round=2))
22 | print(numerals(1.249, round=3))
23 | print("")
24 | 
25 | # The quantify() commands uses pluralization + approximation to enumerate words.
26 | # This is useful to generate a human-readable summary of a set of strings.
27 | print(quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"]))
28 | print(quantify(["penguin", "polar bear"]))
29 | print(quantify(["carrot"] * 1000))
30 | print(quantify("parrot", amount=1000))
31 | print(quantify({"carrot": 100, "parrot": 20}))
32 | print("")
33 | 
34 | # The quantify() command only works with words (strings).
35 | # To quantify a set of Python objects, use reflect().
36 | # This will first create a human-readable name for each object and then quantify these.
37 | print(reflect([0, 1, {}, False, reflect]))
38 | print(reflect(os.path))
39 | print(reflect([False, True], quantify=False))
40 | print(quantify(
41 |     ["bunny rabbit"] +
42 |     reflect([False, True], quantify=False)))
43 | 


--------------------------------------------------------------------------------
/pattern/web/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from collections import Iterable
 3 | 
 4 | try:
 5 |     # Python 2
 6 |     str_type = unicode
 7 | except NameError:
 8 |     # Python 3
 9 |     str_type = str
10 | 
11 | STRING_LIKE_TYPES = (str_type, bytes, bytearray)
12 | 
13 | try:
14 |     # Python 2
15 |     from urlparse import urlparse, parse_qsl
16 | except ImportError:
17 |     # Python 3
18 |     from urllib.parse import urlparse, parse_qsl
19 | 
20 | try:
21 |     import simplejson as json
22 | except ImportError:
23 |     import json
24 | 
25 | 
26 | def json_iter_parse(response_text):
27 |     decoder = json.JSONDecoder(strict=False)
28 |     idx = 0
29 |     while idx < len(response_text):
30 |         obj, idx = decoder.raw_decode(response_text, idx)
31 |         yield obj
32 | 
33 | 
34 | def stringify_values(dictionary):
35 |     stringified_values_dict = {}
36 |     for key, value in dictionary.items():
37 |         if isinstance(value, Iterable) and not isinstance(value, STRING_LIKE_TYPES):
38 |             value = u','.join(map(str_type, value))
39 |         stringified_values_dict[key] = value
40 |     return stringified_values_dict
41 | 
42 | 
43 | def get_url_query(url):
44 |     parsed_url = urlparse(url)
45 |     url_query = parse_qsl(parsed_url.fragment)
46 |     # login_response_url_query can have multiple key
47 |     url_query = dict(url_query)
48 |     return url_query
49 | 
50 | 
51 | def get_form_action(html):
52 |     form_action = re.findall(r'<form(?= ).* action="(.+)"', html)
53 |     if form_action:
54 |         return form_action[0]
55 | 
56 | 
57 | def censor_access_token(access_token):
58 |     if isinstance(access_token, str_type) and len(access_token) >= 12:
59 |         return '{}***{}'.format(access_token[:4], access_token[-4:])
60 |     elif access_token:
61 |         return '***'
62 |     else:
63 |         return access_token
64 | 


--------------------------------------------------------------------------------
/examples/01-web/07-wikipedia.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Wikipedia
11 | 
12 | # This example retrieves an article from Wikipedia (http://en.wikipedia.org).
13 | # Wikipedia queries request the article HTML source from the server. This can be slow.
14 | # It is a good idea to cache results from Wikipedia locally,
15 | # and to set a high timeout when calling Wikipedia.search().
16 | 
17 | engine = Wikipedia(language="en")
18 | 
19 | # Contrary to the other search engines in the pattern.web module,
20 | # Wikipedia simply returns one WikipediaArticle object (or None),
21 | # instead of a list of results.
22 | article = engine.search("alice in wonderland", cached=True, timeout=30)
23 | 
24 | print(article.title)            # Article title (may differ from the search query).
25 | print("")
26 | print(article.languages["fr"])  # Article in French, can be retrieved with Wikipedia(language="fr").
27 | print(article.links[:10])       # List of linked Wikipedia articles.
28 | print(article.external[:5])     # List of external URL's.
29 | print("")
30 | 
31 | #print(article.source)          # The full article content as HTML.
32 | #print(article.string)          # The full article content, plain text with HTML tags stripped.
33 | 
34 | # An article is made up of different sections with a title.
35 | # WikipediaArticle.sections is a list of WikipediaSection objects.
36 | # Each section has a title + content and can have a linked parent section or child sections.
37 | for s in article.sections:
38 |     print(s.title.upper())
39 |     print("")
40 |     print(s.content)  # = ArticleSection.string, minus the title.
41 |     print("")
42 | 


--------------------------------------------------------------------------------
/examples/07-canvas/06-image.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | image</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | </head>
 8 | <body>
 9 | 	<!-- With loop="false", only a single frame will be drawn. 
10 | 	     There is no need to keep redrawing static images frame after frame.
11 | 	     To refresh the image, reload the page (CTRL-R).
12 | 	  -->
13 | 	<script type="text/canvas" loop="false">
14 | 		function setup(canvas) {
15 | 			// Images are preloaded during setup().
16 | 			// Depending on the image size, it might take a moment to download.
17 | 			// The animation will start once all images are available.
18 | 			// The given URL can not be file:// (e.g., a local image),
19 | 			// because of JavaScript security restrictions.
20 | 			images = [];
21 | 			images.push(new Image("http://www.clips.ua.ac.be/media/pattern-canvas-particle1.png"));
22 | 			images.push(new Image("http://www.clips.ua.ac.be/media/pattern-canvas-particle2.png"));
23 | 			images.push(new Image("http://www.clips.ua.ac.be/media/pattern-canvas-particle3.png"));
24 | 			canvas.size(500, 500);
25 | 		}
26 | 		function draw(canvas) {
27 | 			// Note the Array.range() and Array.choice() functions.
28 | 			// They mimic the Python range() and choice() functions:
29 | 			// - range(n) yields an array of numbers from 0-n.
30 | 			// - choice() returns a random item from the given array.
31 | 			canvas.clear();
32 | 			background(0.0, 0.2, 0.3);
33 | 			shadow(10, 10, 10, 0.75);
34 | 			for (var i in Array.range(50)) {
35 | 				var img = Array.choice(images);
36 | 				var x = random(500);
37 | 				var y = random(500);
38 | 				push();
39 | 				translate(x, y);
40 | 				rotate(random(360));
41 | 				scale(random(0.5, 2.0));
42 | 				image(img, 0, 0, {alpha: random(0.75, 1.0)});
43 | 				pop();
44 | 			}
45 | 		}
46 | 	</script>
47 | </body>
48 | </html>


--------------------------------------------------------------------------------
/examples/05-vector/corpus/parakeet.txt:
--------------------------------------------------------------------------------
 1 | Parakeet is a term for any one of a large number of unrelated small to medium sized species of parrot, that generally have long tail feathers. Older spellings still sometimes encountered are paroquet or paraquet.
 2 | 
 3 | Species
 4 | 
 5 | The term Grass Parakeet (or Grasskeet) refers to a large number of small temp Australian parakeets native to grasslands such as Neophema and Princess Parrot. The Australian rosellas are also parakeets. Many of the smaller, long-tailed species of lories may be referred to as lorikeets.
 6 | 
 7 | The term Ringnecked Parakeet refers to a species of the Psittacula genus native to Africa and Asia that is popular as a pet and has become feral in many cities. It should not be confused with the Australian Ringneck.
 8 | 
 9 | In aviculture the term conure is used for small to medium sized parakeets of the genera Aratinga, Pyrrhura, and a few other genere of the tribe Arini, which are mainly endemic to South America. As they are not all from one genus, taxonomists tend to dislike the term.
10 | 
11 | Other South American species commonly called parakeets include the Brotogeris parakeets, Monk Parakeet and Lineolated Parakeets (although Lineolated Parakeets have short tails).
12 | 
13 | Some species, especially the larger parakeets, may be referred to as "parrot" or "parakeet" interchangeably. For example, Alexandrine Parrot and Alexandrine Parakeet are different names for the same species, (Psittacula eupatria), one of the largest species called a parakeet.
14 | 
15 | Many different species of parakeets are often bred and sold commercially as pets, the Budgerigar being among the most commonly sold in the U.S. [1] [2]
16 | 
17 | Gallery
18 | 
19 | See also
20 | 
21 | * Cockatiel
22 | * Macaw
23 | * Budgerigar
24 | 
25 | External links
26 | 
27 | * Budgerigar Information
28 | 
29 | References
30 | 
31 | * ^ http://animal-world.com/encyclo/birds/parakeets/parakeets.htm
32 | * ^ http://chirpykeets.webs.com/about.htm


--------------------------------------------------------------------------------
/examples/04-search/06-optional.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search
11 | from pattern.en import parsetree
12 | 
13 | # Constraints ending in "?" are optional, matching one or no word.
14 | # Pattern.search() uses a "greedy" approach:
15 | # it will attempt to include as many optional constraints as possible.
16 | 
17 | # The following pattern scans for words whose part-of-speech tag is NN (i.e. nouns).
18 | # A preceding adjective, adverb or determiner are picked up as well.
19 | for s in (
20 |   "the cat",              # DT NN
21 |   "the very black cat",   # DT RB JJ NN
22 |   "tasty cat food",       # JJ NN NN
23 |   "the funny black cat",  # JJ NN
24 |   "very funny",           # RB JJ => no match, since there is no noun.
25 |   "my cat is black and your cat is white"):  # NN + NN
26 |     t = parsetree(s)
27 |     m = search("DT? RB? JJ? NN+", t)
28 |     print("")
29 |     print(t)
30 |     print(m)
31 |     if m:
32 |         for w in m[0].words:
33 |             print("%s matches %s" % (w, m[0].constraint(w)))
34 | 
35 | # Before Pattern 2.4, "( )" was used instead of "?".
36 | # For example: "(JJ)" instead of "JJ?".
37 | # The syntax was changed to resemble regular expressions, which use "?".
38 | # The old syntax "(JJ)" still works in Pattern 2.4, but it may change later.
39 | 
40 | # Note: the above pattern could also be written as "DT|RB|JJ?+ NN+"
41 | # to include multiple adverbs/adjectives.
42 | # By combining "*", "?" and "+" patterns can become quite complex.
43 | # Optional constraints are useful for very specific patterns, but slow.
44 | # Also, depending on which parser you use (e.g. MBSP), words can be tagged differently
45 | # and may not match in the way you expect.
46 | # Consider using a simple, robust "NP" search pattern.
47 | 


--------------------------------------------------------------------------------
/examples/01-web/09-wikia.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 *-*
 2 | 
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | from builtins import str, bytes, dict, int
 7 | 
 8 | import os
 9 | import sys
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
11 | 
12 | from pattern.web import Wikia
13 | 
14 | # This example retrieves articled from Wikia (http://www.wikia.com).
15 | # Wikia is a collection of thousands of wikis based on MediaWiki.
16 | # Wikipedia is based on MediaWiki too.
17 | # Wikia queries request the article HTML source from the server. This can be slow.
18 | 
19 | domain = "monkeyisland"  # "Look behind you, a three-headed monkey!"
20 | 
21 | # Alternatively, you can call this script from the commandline
22 | # and specify another domain: python 09-wikia.py "Bieberpedia".
23 | if len(sys.argv) > 1:
24 |     domain = sys.argv[1]
25 | 
26 | w = Wikia(domain, language="en")
27 | 
28 | # Like Wikipedia, we can search for articles by title with Wikia.search():
29 | print(w.search("Three Headed Monkey"))
30 | 
31 | # However, we may not know exactly what kind of articles exist,
32 | # three-headed monkey" for example does not redirect to the above article.
33 | 
34 | # We can iterate through all articles with the Wikia.articles() method
35 | # (note that Wikipedia also has a Wikipedia.articles() method).
36 | # The "count" parameter sets the number of article titles to retrieve per query.
37 | # Retrieving the full article for each article takes another query. This can be slow.
38 | i = 0
39 | for article in w.articles(count=2, cached=True):
40 |     print("")
41 |     print(article.title)
42 |     #print(article.plaintext())
43 |     i += 1
44 |     if i >= 3:
45 |         break
46 | 
47 | # Alternatively, we can retrieve just the titles,
48 | # and only retrieve the full articles for the titles we need:
49 | i = 0
50 | for title in w.index(count=2):
51 |     print("")
52 |     print(title)
53 |     #article = w.search(title)
54 |     #print(article.plaintext())
55 |     i += 1
56 |     if i >= 3:
57 |         break
58 | 


--------------------------------------------------------------------------------
/examples/04-search/01-search.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search
11 | from pattern.en import parsetree
12 | 
13 | # The pattern.search module contains a number of pattern matching tools
14 | # to search a string syntactically (word function) or semantically (word meaning).
15 | # If you only need to match string characters, regular expressions are faster.
16 | # However, if you are scanning a sentence for concept types (e.g. all flowers)
17 | # or parts-of-speech (e.g. all adjectives), this module provides the functionality.
18 | 
19 | # In the simplest case, the search() function
20 | # takes a word (or a sequence of words) that you want to retrieve:
21 | print(search("rabbit", "big white rabbit"))
22 | print("")
23 | 
24 | # Search words can contain wildcard characters:
25 | print(search("rabbit*", "big white rabbit"))
26 | print(search("rabbit*", "big white rabbits"))
27 | print("")
28 | 
29 | # Search words can contain different options:
30 | print(search("rabbit|cony|bunny", "big black bunny"))
31 | print("")
32 | 
33 | # Things become more interesting if we involve the pattern.en.parser module.
34 | # The parser takes a string, identifies words, and assigns a part-of-speech tag
35 | # to each word, for example NN (noun) or JJ (adjective).
36 | # A parsed sentence can be scanned for part-of-speech tags:
37 | s = parsetree("big white rabbit")
38 | print(search("JJ", s))  # all adjectives
39 | print(search("NN", s))  # all nouns
40 | print(search("NP", s))  # all noun phrases
41 | print("")
42 | 
43 | # Since the search() is case-insensitive, uppercase search words
44 | # are always considered to be tags (or taxonomy terms - see further examples).
45 | 
46 | # The return value is a Match object,
47 | # where Match.words is a list of Word objects that matched:
48 | m = search("NP", s)
49 | for word in m[0].words:
50 |     print(word.string, word.tag)
51 | 


--------------------------------------------------------------------------------
/examples/01-web/03-bing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Bing, asynchronous, plaintext
11 | from pattern.web import SEARCH, IMAGE, NEWS
12 | 
13 | import time
14 | 
15 | # This example retrieves results from Bing based on a given query.
16 | # The Bing search engine can retrieve up to a 1000 results (10x100) for a query.
17 | 
18 | # Bing's "Custom Search API" is a paid service.
19 | # The pattern.web module uses a test account by default,
20 | # with 5000 free queries per month shared by all Pattern users.
21 | # If this limit is exceeded, SearchEngineLimitError is raised.
22 | # You should obtain your own license key at:
23 | # https://datamarket.azure.com/account/
24 | engine = Bing(license=None, language="en")
25 | 
26 | # Quote a query to match it exactly:
27 | q = "\"is more important than\""
28 | 
29 | # When you execute a query,
30 | # the script will halt until all results are downloaded.
31 | # In apps with an infinite main loop (e.g., GUI, game),
32 | # it is often more useful if the app keeps on running
33 | # while the search is executed in the background.
34 | # This can be achieved with the asynchronous() function.
35 | # It takes any function and that function's arguments and keyword arguments:
36 | request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10)
37 | 
38 | # This while-loop simulates an infinite application loop.
39 | # In real-life you would have an app.update() or similar
40 | # in which you can check request.done every now and then.
41 | while not request.done:
42 |     time.sleep(0.1)
43 |     print(".")
44 | 
45 | print("")
46 | print("")
47 | 
48 | # An error occured in engine.search(), raise it.
49 | if request.error:
50 |     raise request.error
51 | 
52 | # Retrieve the list of search results.
53 | for result in request.value:
54 |     print(result.text)
55 |     print(result.url)
56 |     print("")
57 | 


--------------------------------------------------------------------------------
/examples/07-canvas/08-widget.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | basics (2)</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | 	<style>
 8 | 		/* Minimal CSS for <div class="widgets">.
 9 | 		 * This <div> is automatically generated, 
10 | 		 * you can inspect it with the browser developer tools.
11 | 		 */
12 | 		.widgets { }
13 | 		.widget { 
14 | 			margin-bottom: 2px;
15 | 			display: block; 
16 | 		}
17 | 		.widget .label { 
18 | 			display: inline-block; 
19 | 			text-align: right; 
20 | 			margin-right: 10px; 
21 | 			width: 80px; 
22 | 		}
23 | 		.widget input,
24 | 		.widget select { 
25 | 			width: 125px; 
26 | 		}
27 | 		.widget input[type="checkbox"] { 
28 | 			width: auto; 
29 | 		}
30 | 	</style>
31 | </head>
32 | <body>
33 | 	<script type="text/canvas">
34 | 		function setup(canvas) {
35 | 			canvas.size(500, 250);
36 | 			// Attach widgets to the canvas.
37 | 			// The available types are STRING, NUMBER, LIST, BOOLEAN, RANGE and FUNCTION.
38 | 			widget(canvas, "text",  STRING,   {value: "hi!"});
39 | 			widget(canvas, "size",  NUMBER,   {value: 100});
40 | 			widget(canvas, "font",  LIST,     {value: ["sans-serif", "serif", "monospace"], index:1}); // XXX index not documented
41 | 			widget(canvas, "bold",  BOOLEAN,  {value: true});
42 | 			widget(canvas, "alpha", RANGE,    {value: 0.8, min: 0.0, max: 1.0, step: 0.1});
43 | 			widget(canvas, "reset", FUNCTION, {
44 | 				callback: function(event) {
45 | 					event.target.canvas.stop();
46 | 					event.target.canvas.run();
47 | 				}
48 | 			});
49 | 		}
50 | 		function draw(canvas) {
51 | 			// The current value for each widget can be retrieved from canvas.variables.
52 | 			canvas.clear();
53 | 			text(canvas.variables.text, 100, 150, {
54 | 				      font: canvas.variables.font,
55 | 				  fontsize: canvas.variables.size,
56 | 				fontweight: canvas.variables.bold? BOLD : NORMAL,
57 | 				      fill: new Color(0,0,0, canvas.variables.alpha)
58 | 			});
59 | 		}
60 | 	</script>
61 | </body>
62 | </html>


--------------------------------------------------------------------------------
/examples/04-search/08-group.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import match
11 | from pattern.en import parsetree
12 | 
13 | # This example demonstrates how to create match groups.
14 | # A match group is a number of consecutive constraints,
15 | # for which matching words can easily be retrieved from a Match object.
16 | 
17 | # Suppose we are looking for adjectives preceding nouns.
18 | # A simple pattern is: "JJ?+ NN",
19 | # which matches nouns preceded by any number of adjectives.
20 | # Since the number of nouns can be 0, 1 or 23 it is not so easy
21 | # to fetch the adjectives from a Match. This can be achieved with a group:
22 | 
23 | s = "The big black cat"
24 | t = parsetree(s)
25 | print(match("{JJ?+} NN", t).group(1))
26 | print("")
27 | 
28 | # Note the { } wrapper, indicating a group.
29 | # The group can be retrieved from the match as a list of words.
30 | 
31 | # Suppose we are looking for prepositional noun phrases,
32 | # e.g., on the mat, with a fork, under the hood, etc...
33 | # The preposition is always one word (on, with, under),
34 | # but the actual noun phrase can have many words (a shiny silver fork),
35 | # so it is a hassle to retrieve it from the match.
36 | 
37 | # Normally, we would do it like this:
38 | 
39 | s = "The big black cat sat on the mat."
40 | t = parsetree(s)
41 | m = match("NP VP PP NP", t)
42 | for w in m:
43 |     if m.constraint(w).index == 2:
44 |         print("This is the PP: %s" % w)
45 |     if m.constraint(w).index == 3:
46 |         print("This is the NP: %s" % w)
47 | 
48 | # In other words, iterate over each word in the match,
49 | # checking which constraint it matched and filtering out what we need.
50 | 
51 | # It is easier with a group:
52 | 
53 | m = match("NP VP {PP} {NP}", t)
54 | print("")
55 | print("This is the PP: %s" % m.group(1))
56 | print("This is the NP: %s" % m.group(2))
57 | print("")
58 | 
59 | # Match.group(0) refers to the full search pattern:
60 | print(m.group(0))
61 | 


--------------------------------------------------------------------------------
/examples/07-canvas/04-path.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | paths</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | </head>
 8 | <body>
 9 | 	<script type="text/canvas">
10 | 		function setup(canvas) {
11 | 			canvas.size(500, 500);
12 | 		}
13 | 		function draw(canvas) {
14 | 			canvas.clear();
15 | 			// A Bézier path can be used to draw complex shapes (e.g., curve, star, ...)
16 | 			// The BezierPath object has moveto(), lineto(), curveto() and close() methods.
17 | 			// moveto() and lineto() take two parameters: x, y.
18 | 			// curveto() takes six parameters: x1, y1, x2, y2, x3, y3,
19 | 			// where (x1,y1) describes the direction in which the curve starts,
20 | 			// and (x2,y2) describes the direction in which it ends in point (x3,y3).
21 | 			var x = canvas.mouse.x;
22 | 			var y = canvas.mouse.y;
23 | 			var p = new BezierPath();
24 | 			p.moveto(0, 0);
25 | 			p.curveto(100, 100, x, y, 200, 200);
26 | 			// drawpath() draws the given BezierPath.
27 | 			// Notice how we set fill and stroke as optional parameters.
28 | 			// You can do this with line(), rect(), ellipse() and text() too.
29 | 			drawpath(p, {fill:null, stroke:[0,0,0,1]});
30 | 			line(0, 0, 100, 100, {stroke:[0,0,0,0.5]}); // show handle (x1,y1)
31 | 			line(200, 200, x, y, {stroke:[0,0,0,0.5]}); // show handle (x2,y2)
32 | 			// Create a leaf shape.
33 | 			var leaf = new BezierPath();
34 | 			leaf.moveto(0, 0);
35 | 			leaf.curveto(50, 50, 0, 150, 0, 200);
36 | 			leaf.curveto(0, 150, -50, 50, 0, 0);
37 | 			// Actually, we should have done this in setup().
38 | 			// This way the path is calculated only once, instead of every frame.
39 | 			// Draw copies of the leaf to create a flower-shape.
40 | 			translate(250, 250);
41 | 			fill(0.25, 0.15, 0.75, 0.25) // transparent purple-blue
42 | 			stroke(0); // black
43 | 			strokewidth(0.25);
44 | 			// The Array.range() command generates an array of numbers.
45 | 			// This is handy to use in a loop:
46 | 			for (var i in Array.range(40)) {
47 | 				rotate(9);
48 | 				drawpath(leaf);
49 | 			}
50 | 		}
51 | 	</script>
52 | </body>
53 | </html>


--------------------------------------------------------------------------------
/examples/04-search/07-exclude.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import match
11 | from pattern.en import Sentence, parse
12 | 
13 | # This example demonstrates how to exclude certain words or tags from a constraint.
14 | # It also demonstrates the use of "^",
15 | # for a constraint that can only match the first word.
16 | 
17 | # We'll use a naive imperative() function as a demonstration.
18 | # Sentences can have different moods: indicative, conditional, imperative, subjunctive.
19 | # The imperative mood is used to give orders, instructions, warnings:
20 | # - "Do your homework!",
21 | # - "You will eat your dinner!".
22 | # It is marked by an infinitive verb, without a "to" preceding it.
23 | # It does not use modal verbs such as "could" and "would":
24 | # "You could eat your dinner!" is not a command but a bubbly suggestion.
25 | 
26 | # We can create a pattern that scans for infinitive verbs (VB),
27 | # and use "!" to exclude certain words:
28 | # "!could|!would|!should|!to+ VB" = infinitive not preceded by modal or "to".
29 | # This works fine except in one case: if the sentence starts with a verb.
30 | # So we need a second rule "^VB" to catch this.
31 | # Note that the example below contains a third rule: "^do|VB*".
32 | # This catches all sentences that start with a "do" verb regardless if it is infinitive,
33 | # because the parses sometimes tags infinitive "do" incorrectly.
34 | 
35 | 
36 | def imperative(sentence):
37 |     for p in ("!could|!would|!should|!to+ VB", "^VB", "^do|VB*"):
38 |         m = match(p, sentence)
39 |         if match(p, sentence) and sentence.string.endswith((".", "!")):  # Exclude questions.
40 |             return True
41 |     return False
42 | 
43 | for s in (
44 |   "Just stop it!",
45 |   "Look out!",
46 |   "Do your homework!",
47 |   "You should do your homework.",
48 |   "Could you stop it.",
49 |   "To be, or not to be."):
50 |     s = parse(s)
51 |     s = Sentence(s)
52 |     print(s)
53 |     print(imperative(s))
54 |     print("")
55 | 


--------------------------------------------------------------------------------
/examples/01-web/01-google.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.web import Google, plaintext
12 | from pattern.web import SEARCH
13 | 
14 | # The pattern.web module has a SearchEngine class,
15 | # with a SearchEngine.search() method that yields a list of Result objects.
16 | # Each Result has url, title, text, language, author and date and properties.
17 | # Subclasses of SearchEngine include:
18 | # Google, Bing, Yahoo, Twitter, Facebook, Wikipedia, Wiktionary, Flickr, ...
19 | 
20 | # This example retrieves results from Google based on a given query.
21 | # The Google search engine can handle SEARCH type searches.
22 | # Other search engines may also handle IMAGE, NEWS, ...
23 | 
24 | # Google's "Custom Search API" is a paid service.
25 | # The pattern.web module uses a test account by default,
26 | # with a 100 free queries per day shared by all Pattern users.
27 | # If this limit is exceeded, SearchEngineLimitError is raised.
28 | # You should obtain your own license key at:
29 | # https://code.google.com/apis/console/
30 | # Activate "Custom Search API" under "Services" and get the key under "API Access".
31 | # Then use Google(license=[YOUR_KEY]).search().
32 | # This will give you 100 personal free queries, or 5$ per 1000 queries.
33 | engine = Google(license=None, language="en")
34 | 
35 | # Veale & Hao's method for finding similes using wildcards (*):
36 | # http://afflatus.ucd.ie/Papers/LearningFigurative_CogSci07.pdf
37 | # This will match results such as:
38 | # - "as light as a feather",
39 | # - "as cute as a cupcake",
40 | # - "as drunk as a lord",
41 | # - "as snug as a bug", etc.
42 | q = "as * as a *"
43 | 
44 | # Google is very fast but you can only get up to 100 (10x10) results per query.
45 | for i in range(1, 2):
46 |     for result in engine.search(q, start=i, count=10, type=SEARCH, cached=True):
47 |         print(plaintext(result.text)) # plaintext() removes all HTML formatting.
48 |         print(result.url)
49 |         print(result.date)
50 |         print("")
51 | 


--------------------------------------------------------------------------------
/examples/03-en/08-topmine_ngrammer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | import codecs
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | import pattern.text as text_module
12 | from pattern.text.en.wordlist import STOPWORDS
13 | 
14 | paths = []
15 | for f in os.listdir('./texts'):
16 |     paths.append('./texts/' + f)
17 | 
18 | texts = []
19 | for p in paths:
20 |     with codecs.open(p, "rb", encoding='latin-1') as f:
21 |         if sys.version_info[0] < 3:
22 |             texts.append(f.read())
23 |         else:
24 |             texts.append(str(f.read()))
25 | 
26 | ng = text_module.train_topmine_ngrammer(texts, threshhold=1, regexp="[^a-zA-Z0-9]")
27 | ngrams = text_module.topmine_ngramms(texts[0], ng, threshhold=1)
28 | 
29 | 
30 | 
31 | print("\n")
32 | bigrams = []
33 | trigrams = []
34 | for key in ngrams.keys():
35 |     if len(key.split("_")) == 2:
36 |         bigrams.append(key)
37 |     elif len(key.split("_")) == 3:
38 |         trigrams.append(key)
39 | 
40 | print("Extracted {} bigrams:\n".format(len(bigrams)))
41 | print(bigrams)
42 | print("\n")
43 | 
44 | print("Extracted {} trigrams:\n".format(len(trigrams)))
45 | print(trigrams)
46 | print("\n")
47 | 
48 | 
49 | # as we can see the extracted ngrams contain many stopwords, so, it's important to delete all
50 | # stopwords before applying the algorythm
51 | 
52 | ng = text_module.train_topmine_ngrammer(texts, threshhold=1, regexp="[^a-zA-Z0-9]", stopwords=STOPWORDS)
53 | ngrams = text_module.topmine_ngramms(texts[0], ng, threshhold=1)
54 | 
55 | 
56 | # as we can see the extracted ngrams contain many stopwords, so, it's important to delete all
57 | # stopwords before applying the algorythm
58 | print("\n")
59 | bigrams = []
60 | trigrams = []
61 | for key in ngrams.keys():
62 |     if len(key.split("_")) == 2:
63 |         bigrams.append(key)
64 |     elif len(key.split("_")) == 3:
65 |         trigrams.append(key)
66 | 
67 | print("Extracted {} bigrams (removed stopwords):\n".format(len(bigrams)))
68 | print(bigrams)
69 | print("\n")
70 | 
71 | print("Extracted {} trigrams (removed stopwords):\n".format(len(trigrams)))
72 | print(trigrams)
73 | print("\n")
74 | 


--------------------------------------------------------------------------------
/pattern/text/fr/fr-morphology.txt:
--------------------------------------------------------------------------------
 1 | NN ient fhassuf 4 VB x
 2 | NN ment fhassuf 4 RB x
 3 | NN rait fhassuf 4 VB x
 4 | NN rent fhassuf 4 VB x
 5 | NN ante fhassuf 4 JJ x
 6 | NN sait fhassuf 4 VB x
 7 | NN tait fhassuf 4 VB x
 8 | NN ique fhassuf 4 JJ x
 9 | NN able fhassuf 4 JJ x
10 | NN ntes fhassuf 4 JJ x
11 | NN nait fhassuf 4 VB x
12 | NN euse fhassuf 4 JJ x
13 | NN lait fhassuf 4 VB x
14 | NN ques fhassuf 4 JJ x
15 | NN sent fhassuf 4 VB x
16 | NN ants fhassuf 4 JJ x
17 | NN bles fhassuf 4 JJ x
18 | NN tent fhassuf 4 VB x
19 | NN rais fhassuf 4 VB x
20 | NN uses fhassuf 4 JJ x
21 | NN lent fhassuf 4 VB x
22 | NN nent fhassuf 4 VB x
23 | NN uait fhassuf 4 VB x
24 | NN tais fhassuf 4 VB x
25 | NN erai fhassuf 4 VB x
26 | NN dait fhassuf 4 VB x
27 | NN ller fhassuf 4 VB x
28 | NN sser fhassuf 4 VB x
29 | NN cher fhassuf 4 VB x
30 | NN lées fhassuf 4 VBN x
31 | NN ront fhassuf 4 VB x
32 | NN sées fhassuf 4 VBN x
33 | NN ales fhassuf 4 JJ x
34 | NN tées fhassuf 4 VBN x
35 | NN quer fhassuf 4 VB x
36 | NN iait fhassuf 4 VB x
37 | NN sais fhassuf 4 VB x
38 | NN rons fhassuf 4 VB x
39 | NN nner fhassuf 4 VB x
40 | NN iser fhassuf 4 VB x
41 | NN vait fhassuf 4 VB x
42 | NN eait fhassuf 4 VB x
43 | NN rées fhassuf 4 VBN x
44 | NN mait fhassuf 4 VB x
45 | NN hait fhassuf 4 VB x
46 | NN uent fhassuf 4 VB x
47 | NN ieux fhassuf 4 JJ x
48 | NN ible fhassuf 4 JJ x
49 | NN ndre fhassuf 4 VB x
50 | NN nais fhassuf 4 VB x
51 | NN onné fhassuf 4 VBN x
52 | NN tive fhassuf 4 JJ x
53 | NN illé fhassuf 4 VBN x
54 | NN ssée fhassuf 4 VBN x
55 | NN iner fhassuf 4 VB x
56 | NN nter fhassuf 4 VB x
57 | NN isse fhassuf 4 VB x
58 | NN vent fhassuf 4 VB x
59 | NN ssés fhassuf 4 VBN x
60 | NN iter fhassuf 4 VB x
61 | NN quée fhassuf 4 VBN x
62 | NN çait fhassuf 4 VB x
63 | NN llée fhassuf 4 VBN x
64 | NN érer fhassuf 4 VB x
65 | NN uler fhassuf 4 VB x
66 | NN uées fhassuf 4 VBN x
67 | NN ière fhassuf 4 JJ x
68 | NN isée fhassuf 4 VBN x
69 | NN yait fhassuf 4 VB x
70 | NN eras fhassuf 4 VB x
71 | NN ives fhassuf 4 JJ x
72 | NN reux fhassuf 4 JJ x
73 | NN chée fhassuf 4 VBN x
74 | NN qués fhassuf 4 VBN x
75 | NN lais fhassuf 4 VB x
76 | NN ntée fhassuf 4 VBN x
77 | NN chés fhassuf 4 VBN x
78 | JJ la fgoodleft NN x
79 | JJ le fgoodleft NN x
80 | JJ les fgoodleft NN x
81 | JJ un fgoodleft NN x
82 | JJ une fgoodleft NN x
83 | NN s fhassuf 1 NNS x


--------------------------------------------------------------------------------
/docs/js/shBrushXml.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * SyntaxHighlighter
 3 |  * http://alexgorbatchev.com/SyntaxHighlighter
 4 |  *
 5 |  * SyntaxHighlighter is donationware. If you are using it, please donate.
 6 |  * http://alexgorbatchev.com/SyntaxHighlighter/donate.html
 7 |  *
 8 |  * @version
 9 |  * 3.0.83 (July 02 2010)
10 |  * 
11 |  * @copyright
12 |  * Copyright (C) 2004-2010 Alex Gorbatchev.
13 |  *
14 |  * @license
15 |  * Dual licensed under the MIT and GPL licenses.
16 |  */
17 | ;(function()
18 | {
19 | 	// CommonJS
20 | 	typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
21 | 
22 | 	function Brush()
23 | 	{
24 | 		function process(match, regexInfo)
25 | 		{
26 | 			var constructor = SyntaxHighlighter.Match,
27 | 				code = match[0],
28 | 				tag = new XRegExp('(&lt;|<)[\\s\\/\\?]*(?<name>[:\\w-\\.]+)', 'xg').exec(code),
29 | 				result = []
30 | 				;
31 | 		
32 | 			if (match.attributes != null) 
33 | 			{
34 | 				var attributes,
35 | 					regex = new XRegExp('(?<name> [\\w:\\-\\.]+)' +
36 | 										'\\s*=\\s*' +
37 | 										'(?<value> ".*?"|\'.*?\'|\\w+)',
38 | 										'xg');
39 | 
40 | 				while ((attributes = regex.exec(code)) != null) 
41 | 				{
42 | 					result.push(new constructor(attributes.name, match.index + attributes.index, 'color1'));
43 | 					result.push(new constructor(attributes.value, match.index + attributes.index + attributes[0].indexOf(attributes.value), 'string'));
44 | 				}
45 | 			}
46 | 
47 | 			if (tag != null)
48 | 				result.push(
49 | 					new constructor(tag.name, match.index + tag[0].indexOf(tag.name), 'keyword')
50 | 				);
51 | 
52 | 			return result;
53 | 		}
54 | 	
55 | 		this.regexList = [
56 | 			{ regex: new XRegExp('(\\&lt;|<)\\!\\[[\\w\\s]*?\\[(.|\\s)*?\\]\\](\\&gt;|>)', 'gm'),			css: 'color2' },	// <![ ... [ ... ]]>
57 | 			{ regex: SyntaxHighlighter.regexLib.xmlComments,												css: 'comments' },	// <!-- ... -->
58 | 			{ regex: new XRegExp('(&lt;|<)[\\s\\/\\?]*(\\w+)(?<attributes>.*?)[\\s\\/\\?]*(&gt;|>)', 'sg'), func: process }
59 | 		];
60 | 	};
61 | 
62 | 	Brush.prototype	= new SyntaxHighlighter.Highlighter();
63 | 	Brush.aliases	= ['xml', 'xhtml', 'xslt', 'html'];
64 | 
65 | 	SyntaxHighlighter.brushes.Xml = Brush;
66 | 
67 | 	// CommonJS
68 | 	typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
69 | })();
70 | 


--------------------------------------------------------------------------------
/pattern/text/ru/wordlist/__init__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | VECTOR | WORDLIST ###################################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import print_function
10 | from __future__ import unicode_literals
11 | from __future__ import division
12 | 
13 | from builtins import str, bytes, dict, int
14 | from builtins import map, zip, filter
15 | from builtins import object, range
16 | 
17 | import os
18 | from io import open
19 | 
20 | try:
21 |     MODULE = os.path.dirname(os.path.realpath(__file__))
22 | except:
23 |     MODULE = ""
24 | 
25 | 
26 | class Wordlist(object):
27 | 
28 |     def __init__(self, name, data=[]):
29 |         """ Lazy read-only list of words.
30 |         """
31 |         self._name = name
32 |         self._data = data
33 | 
34 |     def _load(self):
35 |         if not self._data:
36 |             self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split("\n")
37 | 
38 |     def __repr__(self):
39 |         self._load()
40 |         return repr(self._data)
41 | 
42 |     def __iter__(self):
43 |         self._load()
44 |         return iter(self._data)
45 | 
46 |     def __len__(self):
47 |         self._load()
48 |         return len(self._data)
49 | 
50 |     def __contains__(self, w):
51 |         self._load()
52 |         return w in self._data
53 | 
54 |     def __add__(self, iterable):
55 |         self._load()
56 |         return Wordlist(None, data=sorted(self._data + list(iterable)))
57 | 
58 |     def __getitem__(self, i):
59 |         self._load()
60 |         return self._data[i]
61 | 
62 |     def __setitem__(self, i, v):
63 |         self._load()
64 |         self._data[i] = v
65 | 
66 |     def insert(self, i, v):
67 |         self._load()
68 |         self._data.insert(i, v)
69 | 
70 |     def append(self, v):
71 |         self._load()
72 |         self._data.append(v)
73 | 
74 |     def extend(self, v):
75 |         self._load()
76 |         self._data.extend(v)
77 | 
78 | STOPWORDS = Wordlist("stopwords") # Russian stop words
79 | 


--------------------------------------------------------------------------------
/pattern/vector/stopwords-es.txt:
--------------------------------------------------------------------------------
1 | a, al, algo, algunas, algunos, ante, antes, como, con, contra, cual, cuando, de, del, desde, donde, durante, e, el, ella, ellas, ellos, en, entre, era, erais, eran, eras, eres, es, esa, esas, ese, eso, esos, esta, estaba, estabais, estaban, estabas, estad, estada, estadas, estado, estados, estamos, estando, estar, estaremos, estará, estarán, estarás, estaré, estaréis, estaría, estaríais, estaríamos, estarían, estarías, estas, este, estemos, esto, estos, estoy, estuve, estuviera, estuvierais, estuvieran, estuvieras, estuvieron, estuviese, estuvieseis, estuviesen, estuvieses, estuvimos, estuviste, estuvisteis, estuviéramos, estuviésemos, estuvo, está, estábamos, estáis, están, estás, esté, estéis, estén, estés, fue, fuera, fuerais, fueran, fueras, fueron, fuese, fueseis, fuesen, fueses, fui, fuimos, fuiste, fuisteis, fuéramos, fuésemos, ha, habida, habidas, habido, habidos, habiendo, habremos, habrá, habrán, habrás, habré, habréis, habría, habríais, habríamos, habrían, habrías, habéis, había, habíais, habíamos, habían, habías, han, has, hasta, hay, haya, hayamos, hayan, hayas, hayáis, he, hemos, hube, hubiera, hubierais, hubieran, hubieras, hubieron, hubiese, hubieseis, hubiesen, hubieses, hubimos, hubiste, hubisteis, hubiéramos, hubiésemos, hubo, la, las, le, les, lo, los, me, mi, mis, mucho, muchos, muy, más, mí, mía, mías, mío, míos, nada, ni, no, nos, nosotras, nosotros, nuestra, nuestras, nuestro, nuestros, o, os, otra, otras, otro, otros, para, pero, poco, por, porque, que, quien, quienes, qué, se, sea, seamos, sean, seas, seremos, será, serán, serás, seré, seréis, sería, seríais, seríamos, serían, serías, seáis, sido, siendo, sin, sobre, sois, somos, son, soy, su, sus, suya, suyas, suyo, suyos, sí, también, tanto, te, tendremos, tendrá, tendrán, tendrás, tendré, tendréis, tendría, tendríais, tendríamos, tendrían, tendrías, tened, tenemos, tenga, tengamos, tengan, tengas, tengo, tengáis, tenida, tenidas, tenido, tenidos, teniendo, tenéis, tenía, teníais, teníamos, tenían, tenías, ti, tiene, tienen, tienes, todo, todos, tu, tus, tuve, tuviera, tuvierais, tuvieran, tuvieras, tuvieron, tuviese, tuvieseis, tuviesen, tuvieses, tuvimos, tuviste, tuvisteis, tuviéramos, tuviésemos, tuvo, tuya, tuyas, tuyo, tuyos, tú, un, una, uno, unos, vosotras, vosotros, vuestra, vuestras, vuestro, vuestros, y, ya, yo, él, éramos


--------------------------------------------------------------------------------
/examples/07-canvas/data-url.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<!-- Export an image to a data url. -->
 5 | 	<title>canvas.js | data url</title>
 6 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 7 | 	<script type="text/javascript">
 8 | 		function data_url(file) {
 9 | 			// Returns the file contents as a data url string
10 | 			// (i.e, "data:image/jpeg;base64,...").
11 | 			var f = new FileReader();
12 | 			f.onload = function(e) {
13 | 				document.getElementById("data").value = e.target.result;
14 | 			}
15 | 			f.readAsDataURL(file.files[0]);
16 | 		}
17 | 		function wrap(str, width) {
18 | 			// Returns the string as a JavaScript value,
19 | 			// quoted and broken into lines of the given width.
20 | 			if (width === undefined) {
21 | 				return str;
22 | 			}
23 | 			var js = "";
24 | 			while (str.length > 0) {
25 | 				js += "'" + str.substring(0, width) + "' +\n";
26 | 				str = str.substring(width);
27 | 			}
28 | 			return js.replace(/[ \+\n]+$/, "");
29 | 		}
30 | 		function img(str) {
31 | 			// Returns a canvas.js Image assignment.
32 | 			return "var img = new Image(\n" + str + ");"
33 | 		}
34 | 	</script>
35 | 	<style>
36 | 		a { color: dodgerblue; }
37 | 	</style>
38 | </head>
39 | <body style="font:13px Georgia serif;line-height:1.3em;">
40 | 	<p>For <a href="https://developer.mozilla.org/en/CORS_Enabled_Image">security reasons</a>, <code>Canvas</code> does not allow you to retrieve the <code>Pixels</code> of remote images (i.e., from a URL not on your own server). Neither can you export a canvas rendering such images (no <code>Canvas.image()</code> or <code>Canvas.save()</code>). During testing, you may opt to include (small) images in your script as data URLs. This script can be used to generate the data URL of an image, provided that your browser supports <code>FileReader</code> (it <span id="support"></span><script>document.getElementById("support").innerHTML = window.FileReader? "does" : "doesn't"</script>).</p>
41 | 	<input type="file" id="file" onchange="data_url(this);" />
42 | 	<br />
43 | 	<textarea id="data" style="width:500px;height:250px;font:11px monospace;">
44 | 	</textarea>
45 | 	<br />
46 | 	<a href="#" onclick="document.getElementById('data').value = img(wrap(document.getElementById('data').value, 65));">convert to canvas.js code block</a>
47 | </body>
48 | </html>


--------------------------------------------------------------------------------
/examples/02-db/02-datasheet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.db import Datasheet, INTEGER, STRING
11 | from pattern.db import uid, pprint
12 | 
13 | # The main purpose of the pattern module is to facilitate automated processes
14 | # for (text) data acquisition and (linguistical) data mining.
15 | # Often, this involves a tangle of messy text files and custom formats to store the data.
16 | # The Datasheet class offers a useful matrix (cfr. MS Excel) in Python code.
17 | # It can be saved as a CSV text file that is both human/machine readable.
18 | # See also: examples/01-web/03-twitter.py
19 | 
20 | # A Datasheet can have headers: a (name, type)-tuple for each column.
21 | # In this case, imported columns will automatically map values to the defined type.
22 | # Supported values that are imported and exported correctly:
23 | # str, unicode, int, float, bool, Date, None
24 | # For other data types, custom encoder and decoder functions can be used.
25 | 
26 | ds = Datasheet(rows=[
27 |     [uid(), "broccoli", "vegetable"],
28 |     [uid(), "turnip", "vegetable"],
29 |     [uid(), "asparagus", "vegetable"],
30 |     [uid(), "banana", "fruit"],
31 | ], fields=[
32 |       ("id", INTEGER),  # Define the column headers.
33 |     ("name", STRING),
34 |     ("type", STRING)
35 | ])
36 | 
37 | print(ds.rows[0])     # A list of rows.
38 | print(ds.columns[1])  # A list of columns, where each column is a list of values.
39 | print(ds.name)
40 | print("")
41 | 
42 | # Columns can be manipulated directly like any other Python list.
43 | # This can be slow for large tables. If you need a fast way to do matrix math,
44 | # use numpy (http://numpy.scipy.org/) instead.
45 | # The purpose of Table is data storage.
46 | ds.columns.append([
47 |     "green",
48 |     "purple",
49 |     "white",
50 |     "yellow"
51 | ], field=("color", STRING))
52 | 
53 | # Save as a comma-separated (unicode) text file.
54 | ds.save("food.txt", headers=True)
55 | 
56 | # Load a table from file.
57 | ds = Datasheet.load("food.txt", headers=True)
58 | 
59 | pprint(ds, truncate=50, padding=" ", fill=".")
60 | print("")
61 | print(ds.fields)
62 | 


--------------------------------------------------------------------------------
/examples/03-en/03-parse.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.en import parse, pprint, tag
11 | 
12 | # The en module contains a fast regular expressions-based parser.
13 | # A parser identifies words in a sentence, word part-of-speech tags (e.g. noun, verb)
14 | # and groups of words that belong together (e.g. noun phrases).
15 | # Common part-of-speech tags: NN (noun), VB (verb), JJ (adjective), PP (preposition).
16 | # A tag can have a suffix, for example NNS (plural noun) or VBG (gerund verb).
17 | # Overview of tags: http://www.clips.ua.ac.be/pages/mbsp-tags
18 | s = "I eat pizza with a fork."
19 | s = parse(s,
20 |      tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
21 |          tags = True,  # Find part-of-speech tags.
22 |        chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
23 |     relations = True,  # Find relations between chunks.
24 |       lemmata = True,  # Find word lemmata.
25 |         light = False)
26 | 
27 | # The light parameter determines how unknown words are handled.
28 | # By default, unknown words are tagged NN and then improved with a set of rules.
29 | # light=False uses Brill's lexical and contextual rules,
30 | # light=True uses a set of custom rules that is less accurate but faster (5x-10x).
31 | 
32 | # The output is a string with each sentence on a new line.
33 | # Words in a sentence have been annotated with tags,
34 | # for example: fork/NN/I-NP/I-PNP
35 | # NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase.
36 | print(s)
37 | print("")
38 | 
39 | # Prettier output can be obtained with the pprint() command:
40 | pprint(s)
41 | print("")
42 | 
43 | # The string's split() method will (unless a split character is given),
44 | # split into a list of sentences, where each sentence is a list of words
45 | # and each word is a list with the word + its tags.
46 | print(s.split())
47 | print("")
48 | 
49 | # The tag() command returns a list of (word, POS-tag)-tuples.
50 | # With light=True, this is the fastest and simplest way to get an idea
51 | # of a sentence's constituents:
52 | s = "I eat pizza with a fork."
53 | s = tag(s)
54 | print(s)
55 | for word, tag in s:
56 |     if tag == "NN":  # Find all nouns in the input string.
57 |         print(word)
58 | 


--------------------------------------------------------------------------------
/docs/js/shBrushJScript.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * SyntaxHighlighter
 3 |  * http://alexgorbatchev.com/SyntaxHighlighter
 4 |  *
 5 |  * SyntaxHighlighter is donationware. If you are using it, please donate.
 6 |  * http://alexgorbatchev.com/SyntaxHighlighter/donate.html
 7 |  *
 8 |  * @version
 9 |  * 3.0.83 (July 02 2010)
10 |  * 
11 |  * @copyright
12 |  * Copyright (C) 2004-2010 Alex Gorbatchev.
13 |  *
14 |  * @license
15 |  * Dual licensed under the MIT and GPL licenses.
16 |  */
17 | ;(function()
18 | {
19 | 	// CommonJS
20 | 	typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
21 | 
22 | 	function Brush()
23 | 	{
24 | 		var keywords1 =	'break case catch continue ' +
25 | 						'default delete do else  ' +
26 | 						'for function if in instanceof ' +
27 | 						'new return switch ' +
28 | 						'throw try typeof var while with'
29 | 						;
30 | 						
31 | 		var keywords2 =	'false true null super this';
32 | 		
33 | 		var keywords3 =	'alert back blur close confirm focus forward home' +
34 | 						'name navigate onblur onerror onfocus onload onmove' +
35 | 						'onresize onunload open print prompt scroll status stop';
36 | 
37 | 		var r = SyntaxHighlighter.regexLib;
38 | 		
39 | 		this.regexList = [
40 | 			{ regex: r.multiLineDoubleQuotedString,					css: 'string' },			// double quoted strings
41 | 			{ regex: r.multiLineSingleQuotedString,					css: 'string' },			// single quoted strings
42 | 			{ regex: r.singleLineCComments,							css: 'comments1' },			// one line comments
43 | 			{ regex: r.multiLineCComments,							css: 'comments2' },			// multiline comments
44 | 			{ regex: /\s*#.*/gm,									css: 'preprocessor' },		// preprocessor tags like #region and #endregion
45 | 			{ regex: /function ([^\()]+)\(/g, func: function(match, r) { 
46 | 				return [
47 | 				    new SyntaxHighlighter.Match("function ", match.index, "keyword1"), 
48 | 				    new SyntaxHighlighter.Match(match[1], match.index+9, "name")
49 | 				]; } },
50 | 			{ regex: new RegExp(this.getKeywords(keywords1), 'gm'),	css: 'keyword1' },			// keywords 1
51 | 			{ regex: new RegExp(this.getKeywords(keywords2), 'gm'),	css: 'keyword2' },			// keywords 2
52 | 			{ regex: new RegExp(this.getKeywords(keywords3), 'gm'),	css: 'keyword3' }			// keywords 3
53 | 			];
54 | 	
55 | 		this.forHtmlScript(r.scriptScriptTags);
56 | 	};
57 | 
58 | 	Brush.prototype	= new SyntaxHighlighter.Highlighter();
59 | 	Brush.aliases	= ['js', 'jscript', 'javascript'];
60 | 
61 | 	SyntaxHighlighter.brushes.JScript = Brush;
62 | 
63 | 	// CommonJS
64 | 	typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
65 | })();
66 | 


--------------------------------------------------------------------------------
/examples/06-graph/02-export.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT
12 | from random import choice, random
13 | 
14 | # This example demonstrates how a graph visualization can be exported to HTML,
15 | # using the HTML5 <canvas> tag and Javascript.
16 | # All properties (e.g., stroke color) of nodes and edges are ported.
17 | 
18 | g = Graph()
19 | # Random nodes.
20 | for i in range(50):
21 |     g.add_node(id=str(i + 1),
22 |         radius = 5,
23 |         stroke = (0, 0, 0, 1),
24 |           text = (0, 0, 0, 1))
25 | # Random edges.
26 | for i in range(75):
27 |     node1 = choice(g.nodes)
28 |     node2 = choice(g.nodes)
29 |     g.add_edge(node1, node2,
30 |         length = 1.0,
31 |         weight = random(),
32 |         stroke = (0, 0, 0, 1))
33 | 
34 | for node in g.sorted()[:20]:
35 |     # More blue = more important.
36 |     node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight)
37 | 
38 | g.prune(0)
39 | 
40 | # This node's label is different from its id.
41 | # We'll make it a hyperlink, see the href attribute at the bottom.
42 | g["1"].text.string = "home"
43 | 
44 | # The export() command generates a folder with an index.html,
45 | # that displays the graph using an interactive, force-based spring layout.
46 | # You can drag the nodes around - open index.html in a browser and try it out!
47 | # The layout can be tweaked in many ways:
48 | 
49 | g.export(os.path.join(os.path.dirname(__file__), "test"),
50 |         width = 700,     # <canvas> width.
51 |        height = 500,     # <canvas> height.
52 |        frames = 500,     # Number of frames of animation.
53 |      directed = True,    # Visualize eigenvector centrality as an edge arrow?
54 |      weighted = 0.5,     # Visualize betweenness centrality as a node shadow?
55 |          pack = True,    # Keep clusters close together + visualize node weight as node radius?
56 |      distance = 10,      # Average edge length.
57 |             k = 4.0,     # Force constant.
58 |         force = 0.01,    # Force dampener.
59 |     repulsion = 50,      # Force radius.
60 |    stylesheet = DEFAULT, # INLINE, DEFAULT, None or the path to your own stylesheet.
61 |    javascript = None,
62 |          href = {"1": "http://www.clips.ua.ac.be/pages/pattern-graph"}, # Node.id => URL
63 |           css = {"1": "node-link-docs"} # Node.id => CSS class.
64 | )
65 | 


--------------------------------------------------------------------------------
/examples/01-web/04-twitter.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.web import Twitter, hashtags
12 | from pattern.db import Datasheet, pprint, pd
13 | 
14 | # This example retrieves tweets containing given keywords from Twitter.
15 | 
16 | try:
17 |     # We'll store tweets in a Datasheet.
18 |     # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
19 |     # In the first column, we'll store a unique id for each tweet.
20 |     # We only want to add the latest tweets, i.e., those we haven't seen yet.
21 |     # With an index on the first column we can quickly check if an id already exists.
22 |     # The pd() function returns the parent directory of this script + any given path.
23 |     table = Datasheet.load(pd("cool.csv"))
24 |     index = set(table.columns[0])
25 | except:
26 |     table = Datasheet()
27 |     index = set()
28 | 
29 | engine = Twitter(language="en")
30 | 
31 | # With Twitter.search(cached=False), a "live" request is sent to Twitter:
32 | # we get the most recent results instead of those in the local cache.
33 | # Keeping a local cache can also be useful (e.g., while testing)
34 | # because a query is instant when it is executed the second time.
35 | prev = None
36 | for i in range(2):
37 |     print(i)
38 |     for tweet in engine.search("is cooler than", start=prev, count=25, cached=False):
39 |         print("")
40 |         print(tweet.text)
41 |         print(tweet.author)
42 |         print(tweet.date)
43 |         print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
44 |         print("")
45 |         # Only add the tweet to the table if it doesn't already exists.
46 |         if len(table) == 0 or tweet.id not in index:
47 |             table.append([tweet.id, tweet.text])
48 |             index.add(tweet.id)
49 |         # Continue mining older tweets in next iteration.
50 |         prev = tweet.id
51 | 
52 | # Create a .csv in pattern/examples/01-web/
53 | table.save(pd("cool.csv"))
54 | 
55 | print("Total results: %s" % len(table))
56 | print("")
57 | 
58 | # Print all the rows in the table.
59 | # Since it is stored as a CSV-file it grows comfortably each time the script runs.
60 | # We can also open the table later on: in other scripts, for further analysis, ...
61 | 
62 | pprint(table, truncate=100)
63 | 
64 | # Note: you can also search tweets by author:
65 | # Twitter().search("from:tom_de_smedt")
66 | 


--------------------------------------------------------------------------------
/examples/07-canvas/03-transformation.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | transformations</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | </head>
 8 | <body>
 9 | 	<script type="text/canvas">
10 | 		function setup(canvas) {
11 | 			canvas.size(500, 500);
12 | 		}
13 | 		function draw(canvas) {
14 | 			canvas.clear();
15 | 			// The simplest shapes to draw are rectangle, ellipse and line.
16 | 			// line() takes four parameters: x0, y0, x1, y1, the start and end coordinates.
17 | 			// rect() and ellipse() also take four parameters: x, y, width and height.
18 | 			// The default origin point (0,0) is the top-left corner of the canvas.
19 | 			// Draw a line from x=0 (horizontal) and y=0 (vertical) to x=100 and y=200.
20 | 			// stroke() sets the current outline color using R,G,B,A values between 0.0-1.0.
21 | 			stroke(0, 0, 0, 1);
22 | 			strokewidth(0.5);
23 | 			// From now on all shapes are drawn with a black stroke.
24 | 			line(0, 0, 100, 200);
25 | 			// Ellipses are positioned from their center:
26 | 			fill(1, 0, 0, 0.5); // red
27 | 			ellipse(0, 0, 100, 100);
28 | 			// Transformations translate(), rotate(), scale() use the current origin point.
29 | 			// This means that, by default, everything rotates around (0,0).
30 | 			// So a rectangle at (50,50) will rotate around the top-left corner of the canvas.
31 | 			// Rectangles are positioned from their top-left corner:
32 | 			push();
33 | 			rotate(canvas.frame); // canvas.frame increases by +1 each animation frame.
34 | 			fill(0, 1, 0, 0.5); // green
35 | 			rect(50, 50, 100, 100);
36 | 			pop();
37 | 			// translate() can be used to relocate the transformation origin point.
38 | 			// We place the origin point at (250,250).
39 | 			// All shapes and transformation will now originate from x=250 and y=250.
40 | 			// So a rectangle drawn at x=0 and y=0
41 | 			// will rotate with its top-left corner around this point.
42 | 			// We nudge it left and up so that rotates from its center:
43 | 			push();
44 | 			translate(250, 250);
45 | 			rotate(canvas.frame);
46 | 			fill(0, 0, 1, 0.5); // blue
47 | 			rect(-100, -100, 200, 200);
48 | 			pop();
49 | 			// Transformations between push() and pop() last until pop() is called.
50 | 			// The small purple rectangle follows the mouse:
51 | 			push();
52 | 			translate(canvas.mouse.x, canvas.mouse.y);
53 | 			rotate(canvas.frame*2);
54 | 			scale(1.0 + canvas.mouse.relative_x); // scale(1.0) = 100%
55 | 			fill(1, 0, 1, 0.5); // purple = blue + red
56 | 			rect(0, 0, 20, 20);
57 | 			pop();
58 | 		}
59 | 	</script>
60 | </body>
61 | </html>


--------------------------------------------------------------------------------
/examples/03-en/06-wordnet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.en import wordnet
11 | from pattern.en import NOUN, VERB
12 | 
13 | # WordNet is a lexical database for the English language.
14 | # It groups English words into sets of synonyms called synsets, provides short, general definitions,
15 | # and records the various semantic relations between these synonym sets.
16 | 
17 | # For a given word, WordNet yields a list of synsets that
18 | # represent different "senses" in which the word can be understood.
19 | for synset in wordnet.synsets("train", pos=NOUN):
20 |     print("Description: %s" % synset.gloss)       # Definition string.
21 |     print("   Synonyms: %s" % synset.senses)      # List of synonyms in this sense.
22 |     print("   Hypernym: %s" % synset.hypernym)    # Synset one step higher in the semantic network.
23 |     print("   Hyponyms: %s" % synset.hyponyms())  # List of synsets that are more specific.
24 |     print("   Holonyms: %s" % synset.holonyms())  # List of synsets of which this synset is part/member.
25 |     print("   Meronyms: %s" % synset.meronyms())  # List of synsets that are part/member of this synset.
26 |     print("")
27 | 
28 | # What is the common ancestor (hypernym) of "cat" and "dog"?
29 | a = wordnet.synsets("cat")[0]
30 | b = wordnet.synsets("dog")[0]
31 | print("Common ancestor: %s" % wordnet.ancestor(a, b))
32 | print("")
33 | 
34 | # Synset.hypernyms(recursive=True) returns all parents of the synset,
35 | # Synset.hyponyms(recursive=True) returns all children,
36 | # optionally up to a given depth.
37 | # What kind of animal nouns are also verbs?
38 | synset = wordnet.synsets("animal")[0]
39 | for s in synset.hyponyms(recursive=True, depth=2):
40 |     for word in s.senses:
41 |         if word in wordnet.VERBS():
42 |             print("%s => %s" % (word, wordnet.synsets(word, pos=VERB)))
43 | 
44 | # Synset.similarity() returns an estimate of the semantic similarity to another synset,
45 | # based on Lin's semantic distance measure and Resnik Information Content.
46 | # Lower values indicate higher similarity.
47 | a = wordnet.synsets("cat")[0]  # river, bicycle
48 | s = []
49 | for word in ["poodle", "cat", "boat", "carrot", "rocket",
50 |              "spaghetti", "idea", "grass", "education",
51 |              "lake", "school", "balloon", "lion"]:
52 |     b = wordnet.synsets(word)[0]
53 |     s.append((a.similarity(b), word))
54 | print("")
55 | print("Similarity to %s: %s" % (a.senses[0], sorted(s)))
56 | print("")
57 | 


--------------------------------------------------------------------------------
/pattern/text/en/wordlist/__init__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN | VECTOR | WORDLIST ###################################################################
 2 | # Copyright (c) 2010 University of Antwerp, Belgium
 3 | # Author: Tom De Smedt <tom@organisms.be>
 4 | # License: BSD (see LICENSE.txt for details).
 5 | # http://www.clips.ua.ac.be/pages/pattern
 6 | 
 7 | ####################################################################################################
 8 | 
 9 | from __future__ import print_function
10 | from __future__ import unicode_literals
11 | from __future__ import division
12 | 
13 | from builtins import str, bytes, dict, int
14 | from builtins import map, zip, filter
15 | from builtins import object, range
16 | 
17 | import os
18 | from io import open
19 | 
20 | try:
21 |     MODULE = os.path.dirname(os.path.realpath(__file__))
22 | except:
23 |     MODULE = ""
24 | 
25 | 
26 | class Wordlist(object):
27 | 
28 |     def __init__(self, name, data=[]):
29 |         """ Lazy read-only list of words.
30 |         """
31 |         self._name = name
32 |         self._data = data
33 | 
34 |     def _load(self):
35 |         if not self._data:
36 |             self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split(", ")
37 | 
38 |     def __repr__(self):
39 |         self._load()
40 |         return repr(self._data)
41 | 
42 |     def __iter__(self):
43 |         self._load()
44 |         return iter(self._data)
45 | 
46 |     def __len__(self):
47 |         self._load()
48 |         return len(self._data)
49 | 
50 |     def __contains__(self, w):
51 |         self._load()
52 |         return w in self._data
53 | 
54 |     def __add__(self, iterable):
55 |         self._load()
56 |         return Wordlist(None, data=sorted(self._data + list(iterable)))
57 | 
58 |     def __getitem__(self, i):
59 |         self._load()
60 |         return self._data[i]
61 | 
62 |     def __setitem__(self, i, v):
63 |         self._load()
64 |         self._data[i] = v
65 | 
66 |     def insert(self, i, v):
67 |         self._load()
68 |         self._data.insert(i, v)
69 | 
70 |     def append(self, v):
71 |         self._load()
72 |         self._data.append(v)
73 | 
74 |     def extend(self, v):
75 |         self._load()
76 |         self._data.extend(v)
77 | 
78 | ACADEMIC  = Wordlist("academic")  # English academic words.
79 | BASIC     = Wordlist("basic")     # English basic words (850) that express 90% of concepts.
80 | PROFANITY = Wordlist("profanity") # English swear words.
81 | TIME      = Wordlist("time")      # English time and date words.
82 | STOPWORDS = Wordlist("stopwords") # English stop words ("a", "the", ...).
83 | 
84 | # Note: if used for lookups, performance can be increased by using a dict:
85 | # blacklist = dict.fromkeys(PROFANITY+TIME, True)
86 | # for i in range(1000):
87 | #    corpus.append(Document(src[i], exclude=blacklist))
88 | 


--------------------------------------------------------------------------------
/examples/06-graph/06-commonsense.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.graph.commonsense import Commonsense
11 | 
12 | # A semantic network is a graph in which each node represents a concept
13 | # (e.g., flower, red, rose) and each edge represents a relation between
14 | # concepts, for example rose is-a flower, red is-property-of rose.
15 | 
16 | # Module pattern.graph.commonsense implements a semantic network of commonsense.
17 | # It contains a Concept class (Node subclass), Relation class (Edge subclass),
18 | # and a Commonsense class (Graph subclass).
19 | # It contains about 10,000 manually annotated relations between mundane concepts,
20 | # for example gondola is-related-to romance, or spoon is-related-to soup.
21 | # This is the PERCEPTION dataset. See the visualizer at:
22 | # http://nodebox.net/perception/
23 | 
24 | # Relation.type can be:
25 | # - is-a,
26 | # - is-part-of,
27 | # - is-opposite-of,
28 | # - is-property-of,
29 | # - is-related-to,
30 | # - is-same-as,
31 | # - is-effect-of.
32 | 
33 | g = Commonsense()
34 | g.add_node("spork")
35 | g.add_edge("spork", "spoon", type="is-a")
36 | 
37 | # Concept.halo a list of concepts surrounding the given concept,
38 | # and as such reinforce its meaning:
39 | print()
40 | print(g["spoon"].halo) # fork, etiquette, slurp, hot, soup, mouth, etc.
41 | 
42 | # Concept.properties is a list of properties (= adjectives) in the halo,
43 | # sorted by betweenness centrality:
44 | print()
45 | print(g["spoon"].properties) # hot
46 | 
47 | 
48 | # Commonsense.field() returns a list of concepts
49 | # that belong to the given class (or "semantic field"):
50 | print()
51 | print(g.field("color", depth=3, fringe=2)) # brown, orange, blue, ...
52 | #print g.field("person") # Leonard Nimoy, Al Capone, ...
53 | #print g.field("building") # opera house, supermarket, ...
54 | 
55 | # Commonsense.similarity() calculates the similarity between two concepts,
56 | # based on common properties between both
57 | # (e.g., tigers and zebras are both striped).
58 | print()
59 | print(g.similarity("tiger", "zebra"))
60 | print(g.similarity("tiger", "amoeba"))
61 | 
62 | # Commonsense.nearest_neighbors() compares the properties of a given concept
63 | # to a list of other concepts, and selects the concept from the list that
64 | # is most similar to the given concept.
65 | # This will take some time to calculate (thinking is hard).
66 | print()
67 | print("Creepy animals:")
68 | print(g.nearest_neighbors("creepy", g.field("animal"))[:10])
69 | print()
70 | print("Party animals:")
71 | print(g.nearest_neighbors("party", g.field("animal"))[:10])
72 | 
73 | # Creepy animals are: owl, vulture, octopus, bat, raven, ...
74 | # Party animals are: puppy, grasshopper, reindeer, dog, ...
75 | 


--------------------------------------------------------------------------------
/pattern/__init__.py:
--------------------------------------------------------------------------------
 1 | #### PATTERN #######################################################################################
 2 | 
 3 | # Authors: Tom De Smedt <tom@organisms.be>, Walter Daelemans <walter.daelemans@ua.ac.be>
 4 | # License: BSD License, see LICENSE.txt
 5 | 
 6 | #### BSD LICENSE ###################################################################################
 7 | 
 8 | # Copyright (c) 2010 University of Antwerp, Belgium
 9 | # All rights reserved.
10 | #
11 | #   Redistribution and use in source and binary forms, with or without
12 | #   modification, are permitted provided that the following conditions are met:
13 | #
14 | #     * Redistributions of source code must retain the above copyright
15 | #       notice, this list of conditions and the following disclaimer.
16 | #     * Redistributions in binary form must reproduce the above copyright
17 | #       notice, this list of conditions and the following disclaimer in
18 | #       the documentation and/or other materials provided with the
19 | #       distribution.
20 | #     * Neither the name of Pattern nor the names of its
21 | #       contributors may be used to endorse or promote products
22 | #       derived from this software without specific prior written
23 | #       permission.
24 | #
25 | #   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 | #   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 | #   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 | #   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 | #   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 | #   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 | #   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 | #   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 | #   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 | #   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 | #   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 | #   POSSIBILITY OF SUCH DAMAGE.
37 | #
38 | # CLiPS Computational Linguistics Group, University of Antwerp, Belgium
39 | # http://www.clips.ua.ac.be/pages/pattern
40 | 
41 | from __future__ import unicode_literals
42 | 
43 | ### CREDITS ########################################################################################
44 | 
45 | __author__    = "Tom De Smedt"
46 | __credits__   = "Tom De Smedt, Walter Daelemans"
47 | __version__   = "3.6"
48 | __copyright__ = "Copyright (c) 2010 University of Antwerp (BE)"
49 | __license__   = "BSD"
50 | 
51 | ####################################################################################################
52 | 
53 | import os
54 | 
55 | # Shortcuts to pattern.en, pattern.es, ...
56 | # (instead of pattern.text.en, pattern.text.es, ...)
57 | try:
58 |     __path__.append(os.path.join(__path__[0], "text"))
59 | except:
60 |     pass
61 | 


--------------------------------------------------------------------------------
/examples/05-vector/05-nb.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | from __future__ import division
 4 | 
 5 | from builtins import str, bytes, dict, int
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join("..", ".."))
10 | 
11 | from pattern.vector import Document, Model, NB
12 | from pattern.db import Datasheet
13 | 
14 | # Naive Bayes is one of the oldest classifiers,
15 | # but is is still popular because it is fast for models
16 | # that have many documents and many features.
17 | # It is outperformed by KNN and SVM, but useful as a baseline for tests.
18 | 
19 | # We'll test it with a corpus of spam e-mail messages,
20 | # included in the test suite, stored as a CSV-file.
21 | # The corpus contains mostly technical e-mail from developer mailing lists.
22 | data = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "spam-apache.csv")
23 | data = Datasheet.load(data)
24 | 
25 | documents = []
26 | for score, message in data:
27 |     document = Document(message, type=int(score) > 0)
28 |     documents.append(document)
29 | m = Model(documents)
30 | 
31 | print("number of documents:", len(m))
32 | print("number of words:", len(m.vector))
33 | print("number of words (average):", sum(len(d.features) for d in m.documents) / float(len(m)))
34 | print()
35 | 
36 | # Train Naive Bayes on all documents.
37 | # Each document has a type: True for actual e-mail, False for spam.
38 | # This results in a "binary" classifier that either answers True or False
39 | # for unknown documents.
40 | classifier = NB()
41 | for document in m:
42 |     classifier.train(document)
43 | 
44 | # We can now ask it questions about unknown e-mails:
45 | 
46 | print(classifier.classify("win money")) # False: most likely spam.
47 | print(classifier.classify("fix bug"))   # True: most likely a real message.
48 | print()
49 | 
50 | print(classifier.classify("customer"))  # False: people don't talk like this on developer lists...
51 | print(classifier.classify("guys"))      # True: because most likely everyone knows everyone.
52 | print()
53 | 
54 | # To test the accuracy of a classifier,
55 | # we typically use 10-fold cross validation.
56 | # This means that 10 individual tests are performed,
57 | # each with 90% of the corpus as training data and 10% as testing data.
58 | from pattern.vector import k_fold_cv
59 | print(k_fold_cv(NB, documents=m, folds=10))
60 | 
61 | # This yields 5 scores: (Accuracy, Precision, Recall, F-score, standard deviation).
62 | # Accuracy in itself is not very useful,
63 | # since some spam may have been regarded as real messages (false positives),
64 | # and some real messages may have been regarded as spam (false negatives).
65 | # Precision = how accurately false positives are discarded,
66 | #    Recall = how accurately false negatives are discarded.
67 | #   F-score = harmonic mean of precision and recall.
68 | #     stdev = folds' variation from average F-score.
69 | 


--------------------------------------------------------------------------------
/examples/04-search/02-constraint.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search, Pattern, Constraint
11 | from pattern.en import parsetree, parse, Sentence
12 | 
13 | # What we call a "search word" in example 01-search.py
14 | # is actually called a constraint, because it can contain different options.
15 | # Options are separated by "|".
16 | # The next search pattern retrieves words that are a noun OR an adjective:
17 | s = parsetree("big white rabbit")
18 | print(search("NN|JJ", s))
19 | print("")
20 | 
21 | # This pattern yields phrases containing an adjective followed by a noun.
22 | # Consecutive constraints are separated by a space:
23 | print(search("JJ NN", s))
24 | print("")
25 | 
26 | # Or a noun preceded by any number of adjectives:
27 | print(search("JJ?+ NN", s))
28 | print("")
29 | 
30 | # Note: NN marks singular nouns, NNS marks plural nouns.
31 | # If you want to include both, use "NN*" as a constraint.
32 | # This works for NN*, VB*, JJ*, RB*.
33 | 
34 | s = parsetree("When I sleep the big white rabbit will stare at my feet.")
35 | m = search("rabbit stare at feet", s)
36 | print(s)
37 | print(m)
38 | print("")
39 | # Why does this work?
40 | # The word "will" is included in the result, even if the pattern does not define it.
41 | # The pattern should break when it does not encounter "stare" after "rabbit."
42 | # It works because "will stare" is one verb chunk.
43 | # The "stare" constraint matches the head word of the chunk ("stare"),
44 | # so "will stare" is considered an overspecified version of "stare".
45 | # The same happens with "my feet" and the "rabbit" constraint,
46 | # which matches the overspecified chunk "the big white rabbit".
47 | 
48 | p = Pattern.fromstring("rabbit stare at feet", s)
49 | p.strict = True  # Now it matches only what the pattern explicitly defines (=no match).
50 | m = p.search(s)
51 | print(m)
52 | print("")
53 | 
54 | # Sentence chunks can be matched by tag (e.g. NP, VP, ADJP).
55 | # The pattern below matches anything from
56 | # "the rabbit gnaws at your fingers" to
57 | # "the white rabbit looks at the carrots":
58 | p = Pattern.fromstring("rabbit VP at NP", s)
59 | m = p.search(s)
60 | print(m)
61 | print("")
62 | 
63 | if m:
64 |     for w in m[0].words:
65 |         print("%s\t=> %s" % (w, m[0].constraint(w)))
66 | 
67 | print("")
68 | print("-------------------------------------------------------------")
69 | # Finally, constraints can also include regular expressions.
70 | # To include them we need to use the full syntax instead of the search() function:
71 | import re
72 | r = re.compile(r"[0-9|\.]+") # all numbers
73 | p = Pattern()
74 | p.sequence.append(Constraint(words=[r]))
75 | p.sequence.append(Constraint(tags=["NN*"]))
76 | 
77 | s = Sentence(parse("I have 9.5 rabbits."))
78 | print(s)
79 | print(p.search(s))
80 | print("")
81 | 


--------------------------------------------------------------------------------
/examples/04-search/04-taxonomy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.search import search, taxonomy, Classifier
11 | from pattern.en import parsetree
12 | 
13 | # The search module includes a Taxonomy class
14 | # that can be used to define semantic word types.
15 | # For example, consider that you want to extract flower names from a text.
16 | # This would make search patterns somewhat unwieldy:
17 | # search("rose|lily|daisy|daffodil|begonia", txt).
18 | 
19 | # A better approach is to use the taxonomy:
20 | for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
21 |     taxonomy.append(flower, type="flower")
22 | 
23 | print(taxonomy.children("flower"))
24 | print(taxonomy.parents("rose"))
25 | print(taxonomy.classify("rose"))  # Yields the most recently added parent.
26 | print("")
27 | 
28 | # Taxonomy terms can be included in a pattern by using uppercase:
29 | t = parsetree("A field of white daffodils.", lemmata=True)
30 | m = search("FLOWER", t)
31 | print(t)
32 | print(m)
33 | print("")
34 | 
35 | # Another example:
36 | taxonomy.append("chicken", type="food")
37 | taxonomy.append("chicken", type="bird")
38 | taxonomy.append("penguin", type="bird")
39 | taxonomy.append("bird", type="animal")
40 | print(taxonomy.parents("chicken"))
41 | print(taxonomy.children("animal", recursive=True))
42 | print(search("FOOD", "I'm eating chicken."))
43 | print("")
44 | 
45 | # The advantage is that the taxonomy can hold an entire hierarchy.
46 | # For example, "flower" could be classified as "organism".
47 | # Other organisms could be defined as well (insects, trees, mammals, ...)
48 | # The ORGANISM constraint then matches everything that is an organism.
49 | 
50 | # A taxonomy entry can also be a proper name containing spaces
51 | # (e.g. "windows vista", case insensitive).
52 | # It will be detected as long as it is contained in a single chunk:
53 | taxonomy.append("windows vista", type="operating system")
54 | taxonomy.append("ubuntu", type="operating system")
55 | 
56 | t = parsetree("Which do you like more, Windows Vista, or Ubuntu?")
57 | m = search("OPERATING_SYSTEM", t)
58 | print(t)
59 | print(m)
60 | print(m[0].constituents())
61 | print("")
62 | 
63 | # Taxonomy entries cannot have wildcards (*),
64 | # but you can use a classifier to simulate this.
65 | # Classifiers are quite slow but useful in many ways.
66 | # For example, a classifier could be written to dynamically
67 | # retrieve word categories from WordNet.
68 | 
69 | 
70 | def find_parents(word):
71 |     if word.startswith(("mac os", "windows", "ubuntu")):
72 |         return ["operating system"]
73 | c = Classifier(parents=find_parents)
74 | taxonomy.classifiers.append(c)
75 | 
76 | t = parsetree("I like Mac OS X 10.5 better than Windows XP or Ubuntu.")
77 | m = search("OPERATING_SYSTEM", t)
78 | print(t)
79 | print(m)
80 | print(m[0].constituents())
81 | print(m[1].constituents())
82 | print("")
83 | 


--------------------------------------------------------------------------------
/docs/js/shBrushPython.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * SyntaxHighlighter
 3 |  * http://alexgorbatchev.com/SyntaxHighlighter
 4 |  *
 5 |  * SyntaxHighlighter is donationware. If you are using it, please donate.
 6 |  * http://alexgorbatchev.com/SyntaxHighlighter/donate.html
 7 |  *
 8 |  * @version
 9 |  * 3.0.83 (July 02 2010)
10 |  * 
11 |  * @copyright
12 |  * Copyright (C) 2004-2010 Alex Gorbatchev.
13 |  *
14 |  * @license
15 |  * Dual licensed under the MIT and GPL licenses.
16 |  */
17 | ;(function()
18 | {
19 | 	// CommonJS
20 | 	typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
21 | 
22 | 	function Brush()
23 | 	{
24 | 		// Contributed by Gheorghe Milas and Ahmad Sherif
25 | 	
26 | 		var keywords =  'and assert break class continue def del elif else ' +
27 | 						'except exec finally for from global if import in is ' +
28 | 						'lambda not or pass print raise return try yield while';
29 | 
30 | 		var funcs = '__import__ abs all any apply basestring bin bool buffer callable ' +
31 | 					'chr classmethod cmp coerce compile complex delattr dict dir ' +
32 | 					'divmod enumerate eval execfile file filter float format frozenset ' +
33 | 					'getattr globals hasattr hash help hex id input int intern ' +
34 | 					'isinstance issubclass iter len list locals long map max min next ' +
35 | 					'object oct open ord pow property range raw_input reduce ' +
36 | 					'reload repr reversed round set setattr slice sorted staticmethod ' +
37 | 					'str sum super tuple type type unichr unicode vars xrange zip';
38 | 
39 | 		var special =  'None True False self cls class_';
40 | 
41 | 		this.regexList = [
42 | 				{ regex: SyntaxHighlighter.regexLib.singleLinePerlComments, css: 'comments1' },
43 | 				{ regex: /^\s*@\w+/gm, 										css: 'decorator' },
44 | 				{ regex: /(['\"]{3})([^\1])*?\1/gm, 						css: 'comments2' },
45 | 				{ regex: /"(?!")(?:\.|\\\"|[^\""\n])*"/gm, 					css: 'string' },
46 | 				{ regex: /'(?!')(?:\.|(\\\')|[^\''\n])*'/gm, 				css: 'string' },
47 | 				{ regex: /\b\d+\.?\w*/g, 									css: 'value' },
48 | 				{ regex: /def ([^\()]+)\(/g, func: function(match, r) { 
49 | 				    return [
50 | 				        new SyntaxHighlighter.Match("def ", match.index, "keyword"), 
51 | 				        new SyntaxHighlighter.Match(match[1], match.index+4, "name")
52 | 				    ]; } },
53 | 				{ regex: /class ([0-9a-zA-Z]+)(\(|:)/g, func: function(match, r) { 
54 | 				    return [
55 | 				        new SyntaxHighlighter.Match("class ", match.index, "keyword"), 
56 | 				        new SyntaxHighlighter.Match(match[1], match.index+6, "name")
57 | 				    ]; } },
58 | 				{ regex: new RegExp(this.getKeywords(funcs), 'gmi'),		css: 'functions' },
59 | 				{ regex: new RegExp(this.getKeywords(keywords), 'gm'), 		css: 'keyword' },
60 | 				{ regex: new RegExp(this.getKeywords(special), 'gm'), 		css: 'color1' }
61 | 				];
62 | 			
63 | 		this.forHtmlScript(SyntaxHighlighter.regexLib.aspScriptTags);
64 | 	};
65 | 
66 | 	Brush.prototype	= new SyntaxHighlighter.Highlighter();
67 | 	Brush.aliases	= ['py', 'python'];
68 | 
69 | 	SyntaxHighlighter.brushes.Python = Brush;
70 | 
71 | 	// CommonJS
72 | 	typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
73 | })();
74 | 


--------------------------------------------------------------------------------
/examples/01-web/08-wiktionary.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Wiktionary, DOM
11 | from pattern.db import csv, pd
12 | 
13 | # This example retrieves male and female given names from Wiktionary (http://en.wiktionary.org).
14 | # It then trains a classifier that can predict the gender of unknown names (about 78% correct).
15 | # The classifier is small (80KB) and fast.
16 | 
17 | w = Wiktionary(language="en")
18 | f = csv()  # csv() is a short alias for Datasheet().
19 | 
20 | # Collect male and female given names from Wiktionary.
21 | # Store the data as (name, gender)-rows in a CSV-file.
22 | # The pd() function returns the parent directory of the current script,
23 | # so pd("given-names.csv") = pattern/examples/01-web/given-names.csv.
24 | 
25 | for gender in ("male", "female"):
26 |     for ch in ("abcdefghijklmnopqrstuvwxyz"):
27 |         p = w.search("Appendix:%s_given_names/%s" % (gender.capitalize(), ch.capitalize()), cached=True)
28 |         for name in p.links:
29 |             if not name.startswith("Appendix:"):
30 |                 f.append((name, gender[0]))
31 |         f.save(pd("given-names.csv"))
32 |         print(ch, gender)
33 | 
34 | # Create a classifier that predicts gender based on name.
35 | 
36 | from pattern.vector import SVM, chngrams, count, kfoldcv
37 | 
38 | 
39 | class GenderByName(SVM):
40 | 
41 |     def train(self, name, gender=None):
42 |         SVM.train(self, self.vector(name), gender)
43 | 
44 |     def classify(self, name):
45 |         return SVM.classify(self, self.vector(name))
46 | 
47 |     def vector(self, name):
48 |         """ Returns a dictionary with character bigrams and suffix.
49 |             For example, "Felix" => {"Fe":1, "el":1, "li":1, "ix":1, "ix$":1, 5:1}
50 |         """
51 |         v = chngrams(name, n=2)
52 |         v = count(v)
53 |         v[name[-2:] + "$"] = 1
54 |         v[len(name)] = 1
55 |         return v
56 | 
57 | data = csv(pd("given-names.csv"))
58 | 
59 | # Test average (accuracy, precision, recall, F-score, standard deviation).
60 | 
61 | print(kfoldcv(GenderByName, data, folds=3))  # (0.81, 0.79, 0.77, 0.78, 0.00)
62 | 
63 | # Train and save the classifier in the current folder.
64 | # With final=True, discards the original training data (= smaller file).
65 | 
66 | g = GenderByName(train=data)
67 | g.save(pd("gender-by-name.svm"), final=True)
68 | 
69 | # Next time, we can simply load the trained classifier.
70 | # Keep in mind that the script that loads the classifier
71 | # must include the code for the GenderByName class description,
72 | # otherwise Python won't know how to load the data.
73 | 
74 | g = GenderByName.load(pd("gender-by-name.svm"))
75 | 
76 | for name in (
77 |   "Felix",
78 |   "Felicia",
79 |   "Rover",
80 |   "Kitty",
81 |   "Legolas",
82 |   "Arwen",
83 |   "Jabba",
84 |   "Leia",
85 |   "Flash",
86 |   "Barbarella"):
87 |     print(name, g.classify(name))
88 | 
89 | # In the example above, Arwen and Jabba are misclassified.
90 | # We can of course improve the classifier by hand:
91 | 
92 | #g.train("Arwen", gender="f")
93 | #g.train("Jabba", gender="m")
94 | #g.save(pd("gender-by-name.svm"), final=True)
95 | #print(g.classify("Arwen"))
96 | #print(g.classify("Jabba"))
97 | 


--------------------------------------------------------------------------------
/pattern/text/en/wordlist/profanity.txt:
--------------------------------------------------------------------------------
1 | anus, arse, arsehole, ass, ass-hat, ass-jabber, ass-pirate, assbag, assbandit, assbanger, assbite, assclown, asscock, asscracker, asses, assface, assfuck, assfucker, assgoblin, asshat, asshead, asshole, asshopper, assjacker, asslick, asslicker, assmonkey, assmunch, assmuncher, assnigger, asspirate, assshit, assshole, asssucker, asswad, asswipe, balls, bampot, bastard, beaner, bint, bitch, bitchass, bitches, bitchtits, bitchy, bloody, blowjob, blowjob, bollocks, bollox, boner, brotherfucker, bugger, bullshit, bumblefuck, butt plug, butt-pirate, buttfucka, buttfucker, camel toe, carpetmuncher, chinc, chink, choad, chode, clit, clitface, clitfuck, clusterfuck, cock, cockass, cockbite, cockburger, cockface, cockfucker, cockhead, cockjockey, cockknoker, cockmaster, cockmongler, cockmongruel, cockmonkey, cockmuncher, cocknose, cocknugget, cockshit, cocksmith, cocksmoke, cocksmoker, cocksniffer, cocksucker, cockwaffle, coochie, coochy, coon, cooter, cracker, cum, cumbubble, cumdumpster, cumguzzler, cumjockey, cumslut, cumtart, cunnie, cunnilingus, cunt, cuntass, cuntface, cunthole, cuntlicker, cuntrag, cuntslut, dago, dammit, damn, dang, deggo, dick, dickbag, dickbeaters, dickface, dickfuck, dickfucker, dickhead, dickhole, dickjuice, dickmilk, dickmonger, dicks, dickslap, dicksucker, dicksucking, dickwad, dickweasel, dickweed, dickwod, dike, dildo, dipshit, doochbag, dookie, douche, douche-fag, douchebag, douchewaffle, dumass, dumb ass, dumbass, dumbfuck, dumbshit, dumshit, dyke, fag, fagbag, fagfucker, faggit, faggot, faggotcock, fagtard, fatass, fellatio, feltch, flamer, fool, frickin, friggin, f*ck, fuck, fuckass, fuckbag, fuckboy, fuckbrain, fuckbutt, fucked, fucker, fuckersucker, fuckface, fuckhead, fuckhole, fuckin, fucking, fucknut, fucknutt, fuckoff, fucks, fuckstick, fucktard, fucktart, fuckup, fuckwad, fuckwit, fuckwitt, fudgepacker, gay, gayass, gaybob, gaydo, gayfuck, gayfuckist, gaylord, gaytard, gaywad, goddamn, goddamnit, gooch, gook, gringo, guido, handjob, hard on, heeb, helminth, hell, ho, hoe, hoebag, homo, homodumbshit, honkey, humping, idiot, imbecile, jackass, jap, jerk off, jerk wad, jigaboo, jizz, jungle bunny, junglebunny, kike, kooch, kootch, kraut, kunt, kyke, lameass, lesbian, lesbo, lezzie, mcfagget, mick, midget, minge, moron, mothafucka, mothafuckin, motherfuck, motherfucker, motherfucking, muff, muffdiver, munging, negro, nigaboo, nigga, nigger, niggers, niglet, nutter, nut sack, nutsack, paki, panooch, pecker, peckerhead, penis, penisbanger, penisfucker, penispuffer, piss, pissed, pissed off, pissflaps, polesmoker, pollock, poon, poonani, poonany, poontang, porch monkey, porchmonkey, prick, punanny, punta, pussies, pussy, pussylicking, puto, queef, queer, queerbait, queerhole, renob, retard, rimjob, ruski, sand nigger, sandnigger, schlong, schmuck, scrote, scullion, shag, shit, shitass, shitbag, shitbagger, shitbrains, shitbreath, shitcanned, shitcunt, shitdick, shitface, shitfaced, shithead, shithole, shithouse, shitspitter, shitstain, shitter, shittiest, shitting, shitty, shiz, shiznit, skank, skeet, skullfuck, slag, slapper, slut, slutbag, slubberdegullion, smeg, snatch, sodding, sonofabitch, spastic, spic, spick, splooge, spook, sucka, suckass, sucker, suckers, tard, testicle, thundercunt, tit, titfuck, tits, tittyfuck, trollop, twat, twatlips, twats, twatwaffle, unclefucker, va-j-j, vag, vagina, vajayjay, vjayjay, wank, wanker, wankjob, wetback, whore, whorebag, whoreface, wop, wtf


--------------------------------------------------------------------------------
/examples/06-graph/04-canvas.html:
--------------------------------------------------------------------------------
 1 | <!-- This example demonstrates the JavaScript source code generated by pattern.graph's export().
 2 |      It is a combination of canvas.js and graph.js.
 3 |      canvas.js is a simple API for the HTML <canvas> element to generate 2D animated graphics.
 4 |      graph.js is a JavaScript implementation of the pattern.graph module.
 5 |      Try opening this file in a modern browser (e.g., Chrome).
 6 |   -->
 7 | <!doctype html>
 8 | <html>
 9 | 	<head>
10 | 		<title>graph.js example</title>
11 | 		<meta charset="utf-8">
12 | 		<script type="text/javascript" src="../../pattern/canvas.js"></script>
13 | 		<script type="text/javascript" src="../../pattern/graph/graph.js"></script>
14 | 		<style type="text/css">
15 | 			body { font: 11px sans-serif; }
16 | 			#graph canvas { }
17 | 			#graph .node-label { font-size: 11px; }
18 | 			#graph { 
19 | 				display: inline-block;
20 | 				position: relative; 
21 | 				overflow: hidden; 
22 | 				border: 1px solid #ccc;
23 | 			}
24 | 			a { color: dodgerblue; }
25 | 		</style>
26 | 	</head>
27 | 	<body>
28 | 		<div id="graph" style="width:700px; height:500px;">
29 | 			<!-- A canvas.js animation has a <script type="text/canvas">
30 | 			     with a JavaScript setup() and draw() function.
31 | 			  -->
32 | 			<script type="text/canvas">
33 | 				function setup(canvas) {
34 | 					/* The canvas setup() function is called once before the animation starts.
35 | 					 * Set the canvas size and initialize the graph.
36 | 					 */
37 | 					canvas.size(700, 500);
38 | 					// Add random nodes and edges.
39 | 					g = new Graph(canvas.element);
40 | 					for (var i=0; i < 50; i++) {
41 | 						g.addNode(i+1);
42 | 					}
43 | 					for (var i=0; i < 75; i++) {
44 | 						var node1 = choice(g.nodes);
45 | 						var node2 = choice(g.nodes);
46 | 						g.addEdge(node1, node2, {weight: Math.random()});
47 | 					}
48 | 					// Calculate node weight (incoming traffic).
49 | 					// Calculate node centrality (passing traffic).
50 | 					g.eigenvectorCentrality();
51 | 					g.betweennessCentrality();
52 | 					// Two handy tricks to prettify the layout:
53 | 					// 1) Nodes with a higher weight (i.e. incoming traffic) appear bigger.
54 | 					// 2) Nodes with only one connection ("leaf" nodes) have a shorter connection.
55 | 					for (var i=0; i < g.nodes.length; i++) {
56 | 						var n = g.nodes[i];
57 | 						n.radius = n.radius + n.radius * n.weight;
58 | 					}
59 | 					for (var i=0; i < g.nodes.length; i++) {
60 | 						var e = g.nodes[i].edges();
61 | 						if (e.length == 1) {
62 | 							e[0].length *= 0.2;
63 | 						}
64 | 					}
65 | 					g.prune(0);
66 | 					g.layout.k = 4.0;        // Force constant (= edge length).
67 | 					g.layout.force = 0.01;   // Repulsive strength.
68 | 					g.layout.repulsion = 50; // Repulsive radius.
69 | 				}
70 | 				function draw(canvas) {
71 | 					/* The canvas draw() function is called each animation frame.
72 | 					 * Update the graph and draw nodes and edges to the canvas.
73 | 					 * 
74 | 					 */
75 | 					if (g.layout.iterations <= 500) {
76 | 						fill(0, 0, 0, 0);    // RGBA fill color for all nodes.
77 | 						stroke(0, 0, 0, 1);  // RGBA stroke color for all nodes & edges.
78 | 						canvas.clear();
79 | 						shadow();
80 | 						g.update(2);
81 | 						g.draw(weighted=0.5, directed=true);
82 | 					}
83 | 					g.drag(canvas.mouse);    // Enable node dragging.
84 | 				}
85 | 			</script>
86 | 		</div>
87 | 	</body>
88 | </html>


--------------------------------------------------------------------------------
/pattern/text/es/es-morphology.txt:
--------------------------------------------------------------------------------
  1 | NC mente fhassuf 5 RG x
  2 | NC aste fhassuf 4 VMI x
  3 | NC iste fhassuf 4 VMI x
  4 | NC amos fhassuf 4 VMI x
  5 | NC emos fhassuf 4 VMI x
  6 | NC imos fhassuf 4 VMI x
  7 | NC steis fhassuf 5 VMI x
  8 | NC iendo fhassuf 5 VMG x
  9 | NC ando fhassuf 4 VMG x
 10 | NC ity fhassuf 3 NP x
 11 | NC ón fhassuf 2 NCS x
 12 | NC nes fhassuf 3 NCP x
 13 | NC ara fhassuf 3 SP x
 14 | NC omo fhassuf 3 CS x
 15 | NC dad fhassuf 3 NCS x
 16 | NC ás fhassuf 2 RG x
 17 | NC ió fhassuf 2 VMI x
 18 | NC rte fhassuf 3 NCS x
 19 | NC én fhassuf 2 RG x
 20 | NC tre fhassuf 3 SP x
 21 | NC aba fhassuf 3 VMI x
 22 | NC ial fhassuf 3 AQ x
 23 | NC ura fhassuf 3 NCS x
 24 | NC mas fhassuf 3 NCP x
 25 | NC tor fhassuf 3 NCS x
 26 | NC sde fhassuf 3 SP x
 27 | NC ene fhassuf 3 VMI x
 28 | NC ral fhassuf 3 AQ x
 29 | NC rar fhassuf 3 VMN x
 30 | NC ble fhassuf 3 AQ x
 31 | NC ede fhassuf 3 VMI x
 32 | NC cio fhassuf 3 NCS x
 33 | NC ito fhassuf 3 NCS x
 34 | NC tá fhassuf 2 VAI x
 35 | NC ses fhassuf 3 NCP x
 36 | NC bra fhassuf 3 NCS x
 37 | NC ema fhassuf 3 NCS x
 38 | NC ban fhassuf 3 VMI x
 39 | NC aje fhassuf 3 NCS x
 40 | NC tó fhassuf 2 VMI x
 41 | NC zó fhassuf 2 VMI x
 42 | NC rra fhassuf 3 NCS x
 43 | NC ró fhassuf 2 VMI x
 44 | NC mpo fhassuf 3 NCS x
 45 | NC ­as fhassuf 2 NCP x
 46 | NC ula fhassuf 3 NCS x
 47 | NC zar fhassuf 3 VMN x
 48 | NC car fhassuf 3 VMN x
 49 | NC 000 fhassuf 3 Z x
 50 | NC glo fhassuf 3 W x
 51 | NC ece fhassuf 3 VMI x
 52 | NC mer fhassuf 3 AO x
 53 | NC ato fhassuf 3 NCS x
 54 | NC ulo fhassuf 3 NCS x
 55 | NC eza fhassuf 3 NCS x
 56 | NC gos fhassuf 3 NCP x
 57 | NC pos fhassuf 3 NCP x
 58 | NC upo fhassuf 3 NCS x
 59 | NC eso fhassuf 3 NCS x
 60 | NC ner fhassuf 3 VMN x
 61 | NC ío fhassuf 2 NCS x
 62 | NC nar fhassuf 3 VMN x
 63 | NC ipo fhassuf 3 NCS x
 64 | NC gen fhassuf 3 NCS x
 65 | NC cen fhassuf 3 VMI x
 66 | NC nó fhassuf 2 VMI x
 67 | NC jos fhassuf 3 NCP x
 68 | NC nen fhassuf 3 VMI x
 69 | NC oso fhassuf 3 AQ x
 70 | NC elo fhassuf 3 NCS x
 71 | NC tan fhassuf 3 VMI x
 72 | NC ela fhassuf 3 NCS x
 73 | NC zas fhassuf 3 NCP x
 74 | NC rie fhassuf 3 NCS x
 75 | NC aso fhassuf 3 NCS x
 76 | NC ase fhassuf 3 NCS x
 77 | NC ace fhassuf 3 VMI x
 78 | NC yor fhassuf 3 AQ x
 79 | NC có fhassuf 2 VMI x
 80 | NC cal fhassuf 3 AQ x
 81 | NC iza fhassuf 3 VMI x
 82 | NC uir fhassuf 3 VMN x
 83 | NC evo fhassuf 3 AQ x
 84 | NC tud fhassuf 3 NCS x
 85 | NC jes fhassuf 3 NCP x
 86 | NC bro fhassuf 3 NCS x
 87 | NC só fhassuf 2 VMI x
 88 | NC pal fhassuf 3 AQ x
 89 | NC mar fhassuf 3 VMN x
 90 | NC lor fhassuf 3 NCS x
 91 | NC ayo fhassuf 3 W x
 92 | NC blo fhassuf 3 NCS x
 93 | NC ten fhassuf 3 VMI x
 94 | NC bum fhassuf 3 NCS x
 95 | NC gó fhassuf 2 VMI x
 96 | NC dre fhassuf 3 NCS x
 97 | NC cir fhassuf 3 VMN x
 98 | NC lia fhassuf 3 NCS x
 99 | NC sco fhassuf 3 NCS x
100 | NC mó fhassuf 2 VMI x
101 | NC 004 fhassuf 3 Z x
102 | NC oda fhassuf 3 DI x
103 | NC igo fhassuf 3 NCS x
104 | NC dó fhassuf 2 VMI x
105 | NC ar fhassuf 2 VMN x
106 | NC ir fhassuf 2 VMN x
107 | NC sma fhassuf 3 AQ x
108 | NC ril fhassuf 3 W x
109 | NC ado fhassuf 3 VMP x
110 | NC ido fhassuf 3 VMP x
111 | NC ico fhassuf 3 AQ x
112 | NC ivo fhassuf 3 AQ x
113 | NC osa fhassuf 3 AQ x
114 | NC oso fhassuf 3 AQ x
115 | NV ía fhassuf 2 VMI x
116 | NC as fhassuf 2 NCP x
117 | NC os fhassuf 2 NCP x
118 | NC es fhassuf 2 NCP x
119 | NC al fhassuf 2 AQ x
120 | NC a fhassuf 1 NCS x
121 | NC o fhassuf 1 NCS x
122 | NC e fhassuf 1 NCS x
123 | NV ó fhassuf 1 VMI x


--------------------------------------------------------------------------------
/examples/07-canvas/05-points.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>canvas.js | path points</title>
 5 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 6 | 	<script type="text/javascript" src="../../pattern/canvas.js"></script>
 7 | </head>
 8 | <body>
 9 | 	<script type="text/canvas" loop="true">
10 | 		function setup(canvas) {
11 | 			canvas.size(500, 500);
12 | 		}
13 | 		function draw(canvas) {
14 | 			canvas.clear();
15 | 			nofill();
16 | 			stroke(0);
17 | 			
18 | 			// 1) Draw a curve.
19 | 			var p1 = new BezierPath();
20 | 			p1.moveto(20, 20);
21 | 			p1.curveto(100, 20, 100, 150, 200, 100);
22 | 			drawpath(p1);
23 | 			// A BezierPath contains a list of points along the path.
24 | 			// Each point pt has a property pt.cmd: MOVETO, LINETO, CURVETO or CLOSE.
25 | 			// Each point pt has pt.x and pt.y that define its location.
26 | 			for (var i=0; i < p1.array.length; i++) {
27 | 				var pt = p1.array[i];
28 | 				ellipse(pt.x, pt.y, 4, 4);
29 | 				text(pt.cmd, pt.x+4, pt.y-4, {fill: color(0)});
30 | 				// Each point has pt.ctrl1 and pt.ctrl2, its control handles.
31 | 				// pt.ctrl1 defines how the curve from the previous point started.
32 | 				// pt.ctrl2 defines how the curve from the previous point 
33 | 				// arrives in this point.
34 | 				line(pt.x, pt.y, pt.ctrl2.x, pt.ctrl2.y);
35 | 				if (i > 0) {
36 | 					line(p1.array[i-1].x, 
37 | 						 p1.array[i-1].y, 
38 | 						 pt.ctrl1.x, 
39 | 						 pt.ctrl1.y, {stroke: color(1,0,0)}); // ctrl1 = red
40 | 				}
41 | 			}
42 | 			
43 | 			// 2) Draw a curve.
44 | 			var p2 = new BezierPath();
45 | 			p2.moveto(20, 120);
46 | 			p2.curveto(100, 120, 100, 250, 200, 200);
47 | 			drawpath(p2);
48 | 			// The method BezierPath.point() calculates a "dynamic" point,
49 | 			// located at a given time (0.0-1.0) on the path.
50 | 			var t = canvas.frame % 100 * 0.01; // from 0 to 1 in 100 frames...
51 | 			var pt = p2.point(t);
52 | 			ellipse(pt.x, pt.y, 4, 4);
53 | 			text(Math.round(t,2), pt.x, pt.y, {fill: color(0)});
54 | 			// With a dynamic point the handles are interpreted differently:
55 | 			// pt.ctrl1 defines how the curve arrives, and 
56 | 			// pt.ctrl2 defines how it continues to the next point.
57 | 			line(pt.x, pt.y, pt.ctrl1.x, pt.ctrl1.y, {stroke: color(1,0,0)});
58 | 			line(pt.x, pt.y, pt.ctrl2.x, pt.ctrl2.y);
59 | 			
60 | 			// 3) Draw an ellipse.
61 | 			var p3 = new BezierPath();
62 | 			p3.ellipse(350, 200, 150, 150);
63 | 			drawpath(p3);
64 | 			// directed() takes a list of points and a function(angle, pt).
65 | 			// It loops over each point and its "direction" along the path.
66 | 			directed(p3.points(20, {start: 0.001, end: 1-1/20}), 
67 | 				function(angle, pt) { 
68 | 					push();
69 | 					translate(pt.x, pt.y);
70 | 					rotate(angle - 90); // angle-90 = perpendicular
71 | 					line(0, 0, 20, 0);
72 | 					pop();
73 | 				}
74 | 			);
75 | 			// Note the optional parameters "start" and "end" in BezierPath.points().
76 | 			// With start=0.0 and end=1.0 (default), 
77 | 			// the points are spread out equally from the start of the path to the end. 
78 | 			// If the path is closed (e.g., ellipse), the first and last point overlap.
79 | 			// end = 1 - (1 / amount) omits the last (overlapping) point.
80 | 			// start = 0.001 skips the initial moveto() in the path.
81 | 			// This ensures that the generated point has control handles, 
82 | 			// which are used to calculate its angle in directed().
83 | 		}
84 | 	</script>
85 | </body>
86 | </html>


--------------------------------------------------------------------------------
/docs/js/shThemeDefault.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * SyntaxHighlighter
  3 |  * http://alexgorbatchev.com/SyntaxHighlighter
  4 |  *
  5 |  * SyntaxHighlighter is donationware. If you are using it, please donate.
  6 |  * http://alexgorbatchev.com/SyntaxHighlighter/donate.html
  7 |  *
  8 |  * @version
  9 |  * 3.0.83 (July 02 2010)
 10 |  * 
 11 |  * @copyright
 12 |  * Copyright (C) 2004-2010 Alex Gorbatchev.
 13 |  *
 14 |  * @license
 15 |  * Dual licensed under the MIT and GPL licenses.
 16 |  */
 17 | .syntaxhighlighter {
 18 |   background-color: white !important;
 19 | }
 20 | .syntaxhighlighter .line.alt1 {
 21 |   background-color: white !important;
 22 | }
 23 | .syntaxhighlighter .line.alt2 {
 24 |   background-color: white !important;
 25 | }
 26 | .syntaxhighlighter .line.highlighted.alt1, .syntaxhighlighter .line.highlighted.alt2 {
 27 |   background-color: #e0e0e0 !important;
 28 | }
 29 | .syntaxhighlighter .line.highlighted.number {
 30 |   color: black !important;
 31 | }
 32 | .syntaxhighlighter table caption {
 33 |   color: black !important;
 34 | }
 35 | .syntaxhighlighter .gutter {
 36 |   color: #afafaf !important;
 37 | }
 38 | .syntaxhighlighter .gutter .line {
 39 |   border-right: 3px solid #6ce26c !important;
 40 | }
 41 | .syntaxhighlighter .gutter .line.highlighted {
 42 |   background-color: #6ce26c !important;
 43 |   color: white !important;
 44 | }
 45 | .syntaxhighlighter.printing .line .content {
 46 |   border: none !important;
 47 | }
 48 | .syntaxhighlighter.collapsed {
 49 |   overflow: visible !important;
 50 | }
 51 | .syntaxhighlighter.collapsed .toolbar {
 52 |   color: blue !important;
 53 |   background: white !important;
 54 |   border: 1px solid #6ce26c !important;
 55 | }
 56 | .syntaxhighlighter.collapsed .toolbar a {
 57 |   color: blue !important;
 58 | }
 59 | .syntaxhighlighter.collapsed .toolbar a:hover {
 60 |   color: red !important;
 61 | }
 62 | .syntaxhighlighter .toolbar {
 63 |   color: white !important;
 64 |   background: #6ce26c !important;
 65 |   border: none !important;
 66 | }
 67 | .syntaxhighlighter .toolbar a {
 68 |   color: white !important;
 69 | }
 70 | .syntaxhighlighter .toolbar a:hover {
 71 |   color: black !important;
 72 | }
 73 | .syntaxhighlighter .plain, .syntaxhighlighter .plain a {
 74 |   color: black !important;
 75 | }
 76 | .syntaxhighlighter .comments, .syntaxhighlighter .comments a {
 77 |   color: #008200 !important;
 78 | }
 79 | .syntaxhighlighter .string, .syntaxhighlighter .string a {
 80 |   color: blue !important;
 81 | }
 82 | .syntaxhighlighter .keyword {
 83 |   color: #006699 !important;
 84 | }
 85 | .syntaxhighlighter .preprocessor {
 86 |   color: gray !important;
 87 | }
 88 | .syntaxhighlighter .variable {
 89 |   color: #aa7700 !important;
 90 | }
 91 | .syntaxhighlighter .value {
 92 |   color: #009900 !important;
 93 | }
 94 | .syntaxhighlighter .functions {
 95 |   color: #ff1493 !important;
 96 | }
 97 | .syntaxhighlighter .constants {
 98 |   color: #0066cc !important;
 99 | }
100 | .syntaxhighlighter .script {
101 |   font-weight: bold !important;
102 |   color: #006699 !important;
103 |   background-color: none !important;
104 | }
105 | .syntaxhighlighter .color1, .syntaxhighlighter .color1 a {
106 |   color: gray !important;
107 | }
108 | .syntaxhighlighter .color2, .syntaxhighlighter .color2 a {
109 |   color: #ff1493 !important;
110 | }
111 | .syntaxhighlighter .color3, .syntaxhighlighter .color3 a {
112 |   color: red !important;
113 | }
114 | 
115 | .syntaxhighlighter .keyword {
116 |   font-weight: bold !important;
117 | }
118 | 


--------------------------------------------------------------------------------
/examples/05-vector/02-model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | import glob
10 | 
11 | from io import open
12 | 
13 | from pattern.vector import Document, Model, TF, TFIDF
14 | 
15 | # A documents is a bag-of-word representations of a text.
16 | # Each word or feature in the document vector has a weight,
17 | # based on how many times the word occurs in the text.
18 | # This weight is called term frequency (TF).
19 | 
20 | # Another interesting measure is TF-IDF:
21 | # term frequency-inverse document frequency.
22 | # Suppose that "the" is the most frequent word in the text.
23 | # But it also occurs frequently in many other texts,
24 | # so it is not very specific or "unique" in any one document.
25 | # TF-IDF divided term frequency ("how many times in this text?")
26 | # by the document frequency ("how many times in all texts?")
27 | # to represent this.
28 | 
29 | # A Model is a collection of documents vectors.
30 | # A Model is a matrix (or vector space)
31 | # with features as columns and feature weights as rows.
32 | # We can then do calculations on the matrix,
33 | # for example to compute TF-IDF or similarity between documents.
34 | 
35 | # Load a model from a folder of text documents:
36 | documents = []
37 | for f in glob.glob(os.path.join(os.path.dirname(__file__), "corpus", "*.txt")):
38 |     text = open(f, encoding="utf-8").read()
39 |     name = os.path.basename(f)[:-4]
40 |     documents.append(Document(text, name=name))
41 | 
42 | m = Model(documents, weight=TFIDF)
43 | 
44 | # We can retrieve documents by name:
45 | d = m.document(name="lion")
46 | 
47 | print(d.keywords(top=10))
48 | print()
49 | print(d.tf("food"))
50 | print(d.tfidf("food")) # TF-IDF is less: "food" is also mentioned with the other animals.
51 | print()
52 | 
53 | # We can compare how similar two documents are.
54 | # This is done by calculating the distance between the document vectors
55 | # (i.e., finding those that are near to each other).
56 | 
57 | # For example, say we have two vectors with features "x" and "y".
58 | # We can calculate the distance between two points (x, y) in 2-D space:
59 | # d = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2))
60 | # This is the Euclidean distance in 2-D space.
61 | # Similarily, we can calculate the distance in n-D space,
62 | # in other words, for vectors with lots of features.
63 | 
64 | # For text, a better metric than Euclidean distance
65 | # is called cosine similarity. This is what a Model uses:
66 | d1 = m.document(name="lion")
67 | d2 = m.document(name="tiger")
68 | d3 = m.document(name="dolphin")
69 | d4 = m.document(name="shark")
70 | d5 = m.document(name="parakeet")
71 | print("lion-tiger:", m.similarity(d1, d2))
72 | print("lion-dolphin:", m.similarity(d1, d3))
73 | print("dolphin-shark:", m.similarity(d3, d4))
74 | print("dolphin-parakeet:", m.similarity(d3, d5))
75 | print()
76 | 
77 | print("Related to tiger:")
78 | print(m.neighbors(d2, top=3)) # Top three most similar.
79 | print()
80 | 
81 | print("Related to a search query ('water'):")
82 | print(m.search("water", top=10))
83 | 
84 | # In summary:
85 | 
86 | # A Document:
87 | # - takes a string of text,
88 | # - counts the words in the text,
89 | # - constructs a vector of words (features) and normalized word count (weight).
90 | 
91 | # A Model:
92 | # - groups multiple vectors in a matrix,
93 | # - tweaks the weight with TF-IDF to find "unique" words in each document,
94 | # - computes cosine similarity (= distance between vectors),
95 | # - compares documents using cosine similatity.
96 | 


--------------------------------------------------------------------------------
/examples/05-vector/01-document.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from io import open
11 | 
12 | from pattern.vector import Document, PORTER, LEMMA
13 | 
14 | # A Document is a "bag-of-words" that splits a string into words and counts them.
15 | # A list of words or dictionary of (word, count)-items can also be given.
16 | 
17 | # Words (or more generally "features") and their word count ("feature weights")
18 | # can be used to compare documents. The word count in a document is normalized
19 | # between 0.0-1.0 so that shorted documents can be compared to longer documents.
20 | 
21 | # Words can be stemmed or lemmatized before counting them.
22 | # The purpose of stemming is to bring variant forms a word together.
23 | # For example, "conspiracy" and "conspired" are both stemmed to "conspir".
24 | # Nowadays, lemmatization is usually preferred over stemming,
25 | # e.g., "conspiracies" => "conspiracy", "conspired" => "conspire".
26 | 
27 | s = """
28 | The shuttle Discovery, already delayed three times by technical problems and bad weather, 
29 | was grounded again Friday, this time by a potentially dangerous gaseous hydrogen leak 
30 | in a vent line attached to the ship's external tank.
31 | The Discovery was initially scheduled to make its 39th and final flight last Monday, 
32 | bearing fresh supplies and an intelligent robot for the International Space Station. 
33 | But complications delayed the flight from Monday to Friday, 
34 | when the hydrogen leak led NASA to conclude that the shuttle would not be ready to launch 
35 | before its flight window closed this Monday.
36 | """
37 | 
38 | # With threshold=1, only words that occur more than once are counted.
39 | # With stopwords=False, words like "the", "and", "I", "is" are ignored.
40 | document = Document(s, threshold=1, stopwords=False)
41 | print(document.words)
42 | print()
43 | 
44 | # The /corpus folder contains texts mined from Wikipedia.
45 | # Below is the mining script (we already executed it for you):
46 | 
47 | #import os, codecs
48 | #from pattern.web import Wikipedia
49 | #
50 | #w = Wikipedia()
51 | #for q in (
52 | #  "badger", "bear", "dog", "dolphin", "lion", "parakeet",
53 | #  "rabbit", "shark", "sparrow", "tiger", "wolf"):
54 | #    s = w.search(q, cached=True)
55 | #    s = s.plaintext()
56 | #    print(os.path.join("corpus2", q+".txt"))
57 | #    f = open(os.path.join("corpus2", q+".txt"), "w", encoding="utf-8")
58 | #    f.write(s)
59 | #    f.close()
60 | 
61 | # Loading a document from a text file:
62 | f = os.path.join(os.path.dirname(__file__), "corpus", "wolf.txt")
63 | s = open(f, encoding="utf-8").read()
64 | document = Document(s, name="wolf", stemmer=PORTER)
65 | print(document)
66 | print(document.keywords(top=10)) # (weight, feature)-items.
67 | print()
68 | 
69 | # Same document, using lemmatization instead of stemming (slower):
70 | document = Document(s, name="wolf", stemmer=LEMMA)
71 | print(document)
72 | print(document.keywords(top=10))
73 | print()
74 | 
75 | # In summary, a document is a bag-of-words representation of a text.
76 | # Bag-of-words means that the word order is discarded.
77 | # The dictionary of words (features) and their normalized word count (weights)
78 | # is also called the document vector:
79 | document = Document("a black cat and a white cat", stopwords=True)
80 | print(document.words)
81 | print(document.vector.features)
82 | for feature, weight in document.vector.items():
83 |     print(feature, weight)
84 | 
85 | # Document vectors can be bundled into a Model (next example).
86 | 


--------------------------------------------------------------------------------
/examples/03-en/07-sentiment.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.en import sentiment, polarity, subjectivity, positive
11 | 
12 | # Sentiment analysis (or opinion mining) attempts to determine if
13 | # a text is objective or subjective, positive or negative.
14 | # The sentiment analysis lexicon bundled in Pattern focuses on adjectives.
15 | # It contains adjectives that occur frequently in customer reviews,
16 | # hand-tagged with values for polarity and subjectivity.
17 | 
18 | # The polarity() function measures positive vs. negative, as a number between -1.0 and +1.0.
19 | # The subjectivity() function measures objective vs. subjective, as a number between 0.0 and 1.0.
20 | # The sentiment() function returns an averaged (polarity, subjectivity)-tuple for a given string.
21 | for word in ("amazing", "horrible", "public"):
22 |     print(word, sentiment(word))
23 | 
24 | print("")
25 | print(sentiment(
26 |     "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
27 |     "but it's presented in such a ridiculous way it's seriously boring."))
28 | 
29 | # The input string can be:
30 | # - a string,
31 | # - a Synset (see pattern.en.wordnet),
32 | # - a parsed Sentence, Text, Chunk or Word (see pattern.en),
33 | # - a Document (see pattern.vector).
34 | 
35 | # The positive() function returns True if the string's polarity >= threshold.
36 | # The threshold can be lowered or raised,
37 | # but overall for strings with multiple words +0.1 yields the best results.
38 | print("")
39 | print("good", positive("good", threshold=0.1))
40 | print("bad", positive("bad"))
41 | print("")
42 | 
43 | # You can also do sentiment analysis in Dutch or French,
44 | # it works exactly the same:
45 | 
46 | #from pattern.nl import sentiment as sentiment_nl
47 | #print("In Dutch:")
48 | #print(sentiment_nl("Een onwijs spannend goed boek!"))
49 | 
50 | # You can also use Pattern with SentiWordNet.
51 | # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
52 | # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
53 | # You can then use Synset.weight() and wordnet.sentiwordnet:
54 | 
55 | #from pattern.en import wordnet, ADJECTIVE
56 | #print(wordnet.synsets("horrible", pos=ADJECTIVE)[0].weight)  # Yields a (polarity, subjectivity)-tuple.
57 | #print(wordnet.sentiwordnet["horrible"])
58 | 
59 | # For fine-grained analysis,
60 | # the return value of sentiment() has a special "assessments" property.
61 | # Each assessment is a (chunk, polarity, subjectivity, label)-tuple,
62 | # where chunk is a list of words (e.g., "not very good").
63 | 
64 | # The label offers additional meta-information.
65 | # For example, its value is MOOD for emoticons:
66 | 
67 | s = "amazing... :/"
68 | print(sentiment(s))
69 | for chunk, polarity, subjectivity, label in sentiment(s).assessments:
70 |     print(chunk, polarity, subjectivity, label)
71 | 
72 | # Observe the output.
73 | # The average sentiment is positive because the expression contains "amazing".
74 | # However, the smiley is slightly negative, hinting at the author's bad mood.
75 | # He or she might be using sarcasm.
76 | # We could work this out from the fine-grained analysis.
77 | 
78 | from pattern.metrics import avg
79 | 
80 | a = sentiment(s).assessments
81 | 
82 | score1 = avg([p for chunk, p, s, label in a if label is None])    # average polarity for words
83 | score2 = avg([p for chunk, p, s, label in a if label == "mood"])  # average polarity for emoticons
84 | 
85 | if score1 > 0 and score2 < 0:
86 |     print("...sarcasm?")
87 | 


--------------------------------------------------------------------------------
/examples/05-vector/04-KNN.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | from builtins import range
 6 | 
 7 | import os
 8 | import sys
 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
10 | 
11 | from pattern.web import Twitter
12 | from pattern.en import Sentence, parse
13 | from pattern.search import search
14 | from pattern.vector import Document, Model, KNN
15 | 
16 | # Classification is a supervised machine learning method,
17 | # where labeled documents are used as training material
18 | # to learn how to label unlabeled documents.
19 | 
20 | # This example trains a simple classifier with Twitter messages.
21 | # The idea is that, if you have a number of texts with a "type"
22 | # (mail/spam, positive/negative, language, author's age, ...),
23 | # you can predict the type of other "unknown" texts.
24 | # The k-Nearest Neighbor algorithm classifies texts according
25 | # to the k documents that are most similar (cosine similarity) to the given input document.
26 | 
27 | m = Model()
28 | t = Twitter()
29 | 
30 | # First, we mine a model of a 1000 tweets.
31 | # We'll use hashtags as type.
32 | for page in range(1, 10):
33 |     for tweet in t.search('#win OR #fail', start=page, count=100, cached=True):
34 |         # If the tweet contains #win hashtag, we'll set its type to 'WIN':
35 |         s = tweet.text.lower()               # tweet in lowercase
36 |         p = '#win' in s and 'WIN' or 'FAIL'  # document labels
37 |         s = Sentence(parse(s))               # parse tree with part-of-speech tags
38 |         s = search('JJ', s)                  # adjectives in the tweet
39 |         s = [match[0].string for match in s] # adjectives as a list of strings
40 |         s = " ".join(s)                      # adjectives as string
41 |         if len(s) > 0:
42 |             m.append(Document(s, type=p, stemmer=None))
43 | 
44 | # Train k-Nearest Neighbor on the model.
45 | # Note that this is only a simple example: to build a robust classifier
46 | # you would need a lot more training data (e.g., tens of thousands of tweets).
47 | # The more training data, the more statistically reliable the classifier becomes.
48 | # The only way to really know if your classifier is working correctly
49 | # is to test it with testing data, see the documentation for Classifier.test().
50 | classifier = KNN(baseline=None) # By default, baseline=MAJORITY
51 | for document in m:              # (classify unknown documents with the most frequent type).
52 |     classifier.train(document)
53 | 
54 | # These are the adjectives the classifier has learned:
55 | print(sorted(classifier.features))
56 | print()
57 | 
58 | # We can now ask it to classify documents containing these words.
59 | # Note that you may get different results than the ones below,
60 | # since you will be mining other (more recent) tweets.
61 | # Again, a robust classifier needs lots and lots of training data.
62 | # If None is returned, the word was not recognized,
63 | # and the classifier returned the default value (see above).
64 | print(classifier.classify('sweet potato burger')) # yields 'WIN'
65 | print(classifier.classify('stupid autocorrect'))  # yields 'FAIL'
66 | 
67 | # "What can I do with it?"
68 | # In the scientific community, classifiers have been used to predict:
69 | # - the opinion (positive/negative) in product reviews on blogs,
70 | # - the age of users posting on social networks,
71 | # - the author of medieval poems,
72 | # - spam in  e-mail messages,
73 | # - lies & deception in text,
74 | # - doubt & uncertainty in text,
75 | # and to:
76 | # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
77 | # - win at Jeopardy!,
78 | # - win at rock-paper-scissors,
79 | # and so on...
80 | 


--------------------------------------------------------------------------------
/examples/01-web/11-facebook.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | 
 4 | from builtins import str, bytes, dict, int
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 9 | 
10 | from pattern.web import Facebook, NEWS, COMMENTS, LIKES
11 | from pattern.db import Datasheet, pprint, pd
12 | 
13 | # The Facebook API can be used to search public status updates (no license needed).
14 | 
15 | # It can also be used to get status updates, comments and persons that liked it,
16 | # from a given profile or product page.
17 | # This requires a personal license key.
18 | # If you are logged in to Facebook, you can get a license key here:
19 | # http://www.clips.ua.ac.be/pattern-facebook
20 | # (We don't / can't store your information).
21 | 
22 | # 1) Searching for public status updates.
23 | #    Search for all status updates that contain the word "horrible".
24 | 
25 | try:
26 |     # We'll store the status updates in a Datasheet.
27 |     # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
28 |     # In the first column, we'll store a unique id for each status update.
29 |     # We only want to add new status updates, i.e., those we haven't seen yet.
30 |     # With an index on the first column we can quickly check if an id already exists.
31 |     table = Datasheet.load(pd("opinions.csv"))
32 |     index = set(table.columns[0])
33 | except:
34 |     table = Datasheet()
35 |     index = set()
36 | 
37 | fb = Facebook()
38 | 
39 | # With Facebook.search(cached=False), a "live" request is sent to Facebook:
40 | # we get the most recent results instead of those in the local cache.
41 | # Keeping a local cache can also be useful (e.g., while testing)
42 | # because a query is instant when it is executed the second time.
43 | for status in fb.search("horrible", count=25, cached=False):
44 |     print("=" * 100)
45 |     print(status.id)
46 |     print(status.text)
47 |     print(status.author)  # Yields an (id, name)-tuple.
48 |     print(status.date)
49 |     print(status.likes)
50 |     print(status.comments)
51 |     print("")
52 |     # Only add the tweet to the table if it doesn't already exists.
53 |     if len(table) == 0 or status.id not in index:
54 |         table.append([status.id, status.text])
55 |         index.add(status.id)
56 | 
57 | # Create a .csv in pattern/examples/01-web/
58 | table.save(pd("opinions.csv"))
59 | 
60 | # 2) Status updates from specific profiles.
61 | #    For this you need a personal license key:
62 | #    http://www.clips.ua.ac.be/pattern-facebook
63 | 
64 | license = ""
65 | 
66 | if license != "":
67 |     fb = Facebook(license)
68 |     # Facebook.profile() returns a dictionary with author info.
69 |     # By default, this is your own profile.
70 |     # You can also supply the id of another profile,
71 |     # or the name of a product page.
72 |     me = fb.profile()["id"]
73 |     for status in fb.search(me, type=NEWS, count=30, cached=False):
74 |         print("-" * 100)
75 |         print(status.id)     # Status update unique id.
76 |         print(status.title)  # Status title (i.e., the id of the page or event given as URL).
77 |         print(status.text)   # Status update text.
78 |         print(status.url)    # Status update image, external link, ...
79 |         if status.comments > 0:
80 |             # Retrieve comments on the status update.
81 |             print("%s comments:" % status.comments)
82 |             print([(x.author, x.text, x.likes)
83 |                 for x in fb.search(status.id, type=COMMENTS)])
84 |         if status.likes > 0:
85 |             # Retrieve likes on the status update.
86 |             print("%s likes:" % status.likes)
87 |             print([x.author for x in fb.search(status.id, type=LIKES)])
88 |         print("")
89 | 


--------------------------------------------------------------------------------
/pattern/text/ru/__init__.py:
--------------------------------------------------------------------------------
  1 | #### PATTERN | RU ##################################################################################
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) 2010 University of Antwerp, Belgium
  4 | # Author: Tom De Smedt <tom@organisms.be>
  5 | # License: BSD (see LICENSE.txt for details).
  6 | # http://www.clips.ua.ac.be/pages/pattern
  7 | 
  8 | ####################################################################################################
  9 | # English linguistical tools using fast regular expressions.
 10 | 
 11 | from __future__ import unicode_literals
 12 | from __future__ import division
 13 | 
 14 | from builtins import str, bytes, dict, int
 15 | from builtins import map, zip, filter
 16 | from builtins import object, range
 17 | 
 18 | import os
 19 | import sys
 20 | 
 21 | try:
 22 |     MODULE = os.path.dirname(os.path.realpath(__file__))
 23 | except:
 24 |     MODULE = ""
 25 | 
 26 | sys.path.insert(0, os.path.join(MODULE, "..", "..", "..", ".."))
 27 | 
 28 | # Import parser base classes.
 29 | from pattern.text import (
 30 |     Lexicon, Model, Morphology, Context, Parser as _Parser, ngrams, pprint, commandline,
 31 |     PUNCTUATION
 32 | )
 33 | # Import parser universal tagset.
 34 | from pattern.text import (
 35 |     penntreebank2universal,
 36 |     PTB, PENN, UNIVERSAL,
 37 |     NOUN, VERB, ADJ, ADV, PRON, DET, PREP, ADP, NUM, CONJ, INTJ, PRT, PUNC, X
 38 | )
 39 | # Import parse tree base classes.
 40 | from pattern.text.tree import (
 41 |     Tree, Text, Sentence, Slice, Chunk, PNPChunk, Chink, Word, table,
 42 |     SLASH, WORD, POS, CHUNK, PNP, REL, ANCHOR, LEMMA, AND, OR
 43 | )
 44 | 
 45 | # Import spelling base class.
 46 | from pattern.text import (
 47 |     Spelling
 48 | )
 49 | 
 50 | sys.path.pop(0)
 51 | 
 52 | #--- Russian PARSER --------------------------------------------------------------------------------
 53 | 
 54 | 
 55 | class Parser(_Parser):
 56 | 
 57 |     def find_tags(self, tokens, **kwargs):
 58 |         if kwargs.get("tagset") in (PENN, None):
 59 |             kwargs.setdefault("map", lambda token, tag: (token, tag))
 60 |         if kwargs.get("tagset") == UNIVERSAL:
 61 |             kwargs.setdefault("map", lambda token, tag: penntreebank2universal(token, tag))
 62 |         return _Parser.find_tags(self, tokens, **kwargs)
 63 | 
 64 | parser = Parser(
 65 |     lexicon=os.path.join(MODULE, "ru-lexicon.txt"),  # A dict of known words => most frequent tag.
 66 |     frequency=os.path.join(MODULE, "ru-frequency.txt"),  # A dict of word frequency.
 67 |     model=os.path.join(MODULE, "ru-model.slp"),  # A SLP classifier trained on WSJ (01-07).
 68 |     #morphology=os.path.join(MODULE, "en-morphology.txt"),  # A set of suffix rules
 69 |     #context=os.path.join(MODULE, "en-context.txt"),  # A set of contextual rules.
 70 |     #entities=os.path.join(MODULE, "en-entities.txt"),  # A dict of named entities: John = NNP-PERS.
 71 |     #default=("NN", "NNP", "CD"),
 72 |     language="ru"
 73 | )
 74 | 
 75 | 
 76 | spelling = Spelling(
 77 |     path=os.path.join(MODULE, "ru-spelling.txt"),
 78 |     alphabet='CYRILLIC'
 79 | )
 80 | 
 81 | 
 82 | def tokenize(s, *args, **kwargs):
 83 |     """ Returns a list of sentences, where punctuation marks have been split from words.
 84 |     """
 85 |     return parser.find_tokens(s, *args, **kwargs)
 86 | 
 87 | 
 88 | def parse(s, *args, **kwargs):
 89 |     """ Returns a tagged Unicode string.
 90 |     """
 91 |     return parser.parse(s, *args, **kwargs)
 92 | 
 93 | 
 94 | def parsetree(s, *args, **kwargs):
 95 |     """ Returns a parsed Text from the given string.
 96 |     """
 97 |     return Text(parse(s, *args, **kwargs))
 98 | 
 99 | 
100 | def suggest(w):
101 |     """ Returns a list of (word, confidence)-tuples of spelling corrections.
102 |     """
103 |     return spelling.suggest(w)


--------------------------------------------------------------------------------