├── __init__.py ├── pattern ├── server │ └── static │ │ └── robots.txt ├── text │ ├── xx │ │ ├── xx-context.txt │ │ ├── xx-morphology.txt │ │ ├── xx-frequency.txt │ │ ├── xx-lexicon.txt │ │ ├── xx-verbs.txt │ │ ├── __main__.py │ │ └── xx-sentiment.xml │ ├── en │ │ ├── en-model.slp │ │ ├── wordnet │ │ │ └── dict │ │ │ │ ├── index.32 │ │ │ │ ├── lexnames │ │ │ │ └── LICENSE.txt │ │ ├── __main__.py │ │ └── wordlist │ │ │ ├── time.txt │ │ │ ├── __init__.py │ │ │ └── profanity.txt │ ├── ru │ │ ├── ru-model.slp │ │ ├── __main__.py │ │ ├── wordlist │ │ │ └── __init__.py │ │ └── __init__.py │ ├── it │ │ ├── __main__.py │ │ └── it-context.txt │ ├── de │ │ └── __main__.py │ ├── es │ │ ├── __main__.py │ │ └── es-morphology.txt │ ├── fr │ │ ├── __main__.py │ │ └── fr-morphology.txt │ └── nl │ │ └── __main__.py ├── vector │ ├── svm │ │ ├── macos │ │ │ ├── libsvm-3.22 │ │ │ │ └── libsvm.so.2 │ │ │ └── liblinear-2.20 │ │ │ │ └── liblinear.so.3 │ │ ├── ubuntu │ │ │ ├── libsvm-3.22 │ │ │ │ └── libsvm.so.2 │ │ │ └── liblinear-2.20 │ │ │ │ └── liblinear.so.3 │ │ ├── windows │ │ │ ├── libsvm-3.22 │ │ │ │ └── libsvm.dll │ │ │ └── liblinear-2.20 │ │ │ │ └── liblinear.dll │ │ ├── __init__.py │ │ ├── INSTALL.txt │ │ ├── COPYRIGHT-liblinear.txt │ │ └── COPYRIGHT-libsvm.txt │ ├── stopwords-nl.txt │ └── stopwords-es.txt ├── web │ ├── api.py │ └── utils.py ├── helpers.py └── __init__.py ├── docs ├── index.html ├── g │ ├── more.png │ ├── header.jpg │ ├── scanner.jpg │ ├── shadow.png │ ├── download.gif │ ├── download3.gif │ ├── external.png │ ├── gradient.jpg │ ├── pattern_graph1.jpg │ ├── pattern_graph2.jpg │ ├── pattern_graph3.jpg │ ├── pattern_graph4.jpg │ ├── pattern_graph5.jpg │ ├── pattern_schema.gif │ ├── paypal-donate.jpg │ ├── pattern_schema_de.gif │ ├── pattern_schema_es.gif │ ├── pattern_schema_fr.gif │ ├── pattern_schema_it.gif │ ├── pattern_schema_nl.gif │ ├── pattern-vector-lsa1.jpg │ ├── pattern-vector-svm1.jpg │ ├── pattern-vector-svm2.jpg │ ├── pattern-canvas-editor.jpg │ ├── pattern-canvas-editor2.jpg │ ├── pattern-canvas-filter1.jpg │ ├── pattern-canvas-filter2.jpg │ ├── pattern-canvas-filter3.jpg │ ├── pattern-canvas-filter4.jpg │ ├── pattern-canvas-origin1.jpg │ ├── pattern-canvas-origin2.jpg │ ├── pattern-canvas-path1.jpg │ ├── pattern-metrics-bell.jpg │ ├── pattern-canvas-particle1.png │ ├── pattern-canvas-particle2.png │ ├── pattern-canvas-particle3.png │ ├── pattern-metrics-boxplot.jpg │ ├── pattern-search-taxonomy.jpg │ ├── pattern-vector-cluster1.jpg │ ├── pattern-vector-cluster2.jpg │ ├── pattern_example_100days.jpg │ ├── pattern_example_italian.jpg │ ├── pattern_example_spanish.jpg │ ├── pattern-canvas-primitives1.jpg │ ├── pattern-canvas-primitives2.jpg │ ├── pattern-canvas-primitives3.jpg │ ├── pattern-canvas-primitives4.jpg │ ├── pattern-canvas-primitives5.jpg │ ├── pattern-canvas-primitives6.jpg │ ├── pattern-canvas-supershape1.jpg │ ├── pattern-canvas-supershape2.jpg │ ├── pattern-canvas-supershape3.jpg │ ├── pattern_example_elections.jpg │ └── pattern_example_semantic_network.jpg ├── desmedt12a.pdf └── js │ ├── shBrushXml.js │ ├── shBrushJScript.js │ ├── shBrushPython.js │ └── shThemeDefault.css ├── examples ├── 02-db │ ├── store.db │ ├── food.txt │ ├── 03-date.py │ └── 02-datasheet.py ├── 08-server │ ├── 02-api │ │ └── rate.db │ ├── 04-db │ │ └── store.db │ ├── 01-basic │ │ └── static │ │ │ └── cat.jpg │ └── 03-wiki │ │ └── data │ │ └── index.html.txt ├── 03-en │ ├── texts │ │ ├── 1701.00002.txt │ │ ├── 1701.00003.txt │ │ ├── 1701.00004.txt │ │ ├── 1701.00005.txt │ │ ├── 1701.00006.txt │ │ ├── 1701.00007.txt │ │ ├── 1701.00008.txt │ │ ├── 1701.00009.txt │ │ ├── 1701.00010.txt │ │ ├── 1701.00011.txt │ │ ├── 1701.00012.txt │ │ ├── 1701.00013.txt │ │ ├── 1701.00014.txt │ │ ├── 1701.00015.txt │ │ ├── 1701.00016.txt │ │ ├── 1701.00017.txt │ │ ├── 1701.00018.txt │ │ ├── 1701.00019.txt │ │ ├── 1701.00020.txt │ │ ├── 1701.00021.txt │ │ ├── 1701.00022.txt │ │ ├── 1701.00023.txt │ │ ├── 1701.00024.txt │ │ ├── 1701.00025.txt │ │ ├── 1701.00026.txt │ │ ├── 1701.00027.txt │ │ ├── 1701.00028.txt │ │ ├── 1701.00029.txt │ │ ├── 1701.00030.txt │ │ ├── 1701.00031.txt │ │ ├── 1701.00032.txt │ │ ├── 1701.00033.txt │ │ ├── 1701.00034.txt │ │ ├── 1701.00035.txt │ │ ├── 1701.00037.txt │ │ ├── 1701.00038.txt │ │ ├── 1701.00039.txt │ │ ├── 1701.00043.txt │ │ ├── 1701.00044.txt │ │ ├── 1701.00045.txt │ │ ├── 1701.00046.txt │ │ ├── 1701.00047.txt │ │ ├── 1701.00049.txt │ │ ├── 1701.00050.txt │ │ ├── 1701.00051.txt │ │ ├── 1701.00052.txt │ │ ├── 1701.00053.txt │ │ ├── 1701.00054.txt │ │ ├── 1701.00055.txt │ │ ├── 1701.00056.txt │ │ ├── 1701.00057.txt │ │ ├── 1701.00058.txt │ │ ├── 1701.00060.txt │ │ ├── 1701.00061.txt │ │ ├── 1701.00062.txt │ │ ├── 1701.00063.txt │ │ ├── 1701.00064.txt │ │ ├── 1701.00065.txt │ │ ├── 1701.00066.txt │ │ ├── 1701.00067.txt │ │ ├── 1701.00068.txt │ │ ├── 1701.00069.txt │ │ ├── 1701.00072.txt │ │ ├── 1701.00073.txt │ │ ├── 1701.00074.txt │ │ ├── 1701.00075.txt │ │ ├── 1701.00076.txt │ │ ├── 1701.00077.txt │ │ ├── 1701.00078.txt │ │ ├── 1701.00079.txt │ │ ├── 1701.00081.txt │ │ ├── 1701.00082.txt │ │ ├── 1701.00083.txt │ │ ├── 1701.00084.txt │ │ ├── 1701.00085.txt │ │ ├── 1701.00086.txt │ │ ├── 1701.00087.txt │ │ ├── 1701.00088.txt │ │ ├── 1701.00089.txt │ │ ├── 1701.00090.txt │ │ ├── 1701.00091.txt │ │ ├── 1701.00092.txt │ │ ├── 1701.00094.txt │ │ ├── 1701.00095.txt │ │ ├── 1701.00096.txt │ │ ├── 1701.00097.txt │ │ ├── 1701.00098.txt │ │ ├── 1701.00099.txt │ │ ├── 1701.00100.txt │ │ ├── 1701.00101.txt │ │ ├── 1701.00102.txt │ │ ├── 1701.00103.txt │ │ ├── 1701.00104.txt │ │ ├── 1701.00105.txt │ │ ├── 1701.00106.txt │ │ ├── 1701.00107.txt │ │ ├── 1701.00109.txt │ │ ├── 1701.00110.txt │ │ ├── 1701.00111.txt │ │ └── 1701.00112.txt │ ├── 04-tree.py │ ├── 02-quantify.py │ ├── 08-topmine_ngrammer.py │ ├── 03-parse.py │ ├── 06-wordnet.py │ └── 07-sentiment.py ├── 07-canvas │ ├── 02-basic.html │ ├── 01-basic.html │ ├── 06-image.html │ ├── 08-widget.html │ ├── 04-path.html │ ├── data-url.html │ ├── 03-transformation.html │ └── 05-points.html ├── 06-graph │ ├── 07-graphml.py │ ├── 01-graph.py │ ├── 03-template.py │ ├── 05-trends.py │ ├── 02-export.py │ ├── 06-commonsense.py │ └── 04-canvas.html ├── 04-search │ ├── 05-multiple.py │ ├── 03-lemmata.py │ ├── 09-web.py │ ├── 06-optional.py │ ├── 01-search.py │ ├── 08-group.py │ ├── 07-exclude.py │ ├── 02-constraint.py │ └── 04-taxonomy.py ├── 01-web │ ├── 02-google-translate.py │ ├── 05-twitter-stream.py │ ├── 15-sort.py │ ├── 06-feed.py │ ├── 14-flickr.py │ ├── 07-wikipedia.py │ ├── 09-wikia.py │ ├── 03-bing.py │ ├── 01-google.py │ ├── 04-twitter.py │ ├── 08-wiktionary.py │ └── 11-facebook.py └── 05-vector │ ├── corpus │ └── parakeet.txt │ ├── 05-nb.py │ ├── 02-model.py │ ├── 01-document.py │ └── 04-KNN.py ├── test ├── corpora │ ├── README.txt │ ├── carroll-wonderland.pdf │ └── carroll-lookingglass.docx ├── test_graph.js ├── test.html ├── test.js └── test_ru.py ├── .gitignore ├── .travis.yml └── LICENSE.txt /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pattern/server/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /pattern/text/xx/xx-context.txt: -------------------------------------------------------------------------------- 1 | IN VB PREVTAG PRP 2 | NN VB PREVTAG TO -------------------------------------------------------------------------------- /pattern/text/xx/xx-morphology.txt: -------------------------------------------------------------------------------- 1 | NN s fhassuf 1 NNS x 2 | ly hassuf 2 RB x -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/g/more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/more.png -------------------------------------------------------------------------------- /pattern/text/xx/xx-frequency.txt: -------------------------------------------------------------------------------- 1 | the 1.0000 2 | of 0.5040 3 | and 0.4805 4 | a 0.3941 -------------------------------------------------------------------------------- /docs/g/header.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/header.jpg -------------------------------------------------------------------------------- /docs/g/scanner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/scanner.jpg -------------------------------------------------------------------------------- /docs/g/shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/shadow.png -------------------------------------------------------------------------------- /docs/desmedt12a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/desmedt12a.pdf -------------------------------------------------------------------------------- /docs/g/download.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/download.gif -------------------------------------------------------------------------------- /docs/g/download3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/download3.gif -------------------------------------------------------------------------------- /docs/g/external.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/external.png -------------------------------------------------------------------------------- /docs/g/gradient.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/gradient.jpg -------------------------------------------------------------------------------- /examples/02-db/store.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/02-db/store.db -------------------------------------------------------------------------------- /test/corpora/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/README.txt -------------------------------------------------------------------------------- /docs/g/pattern_graph1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph1.jpg -------------------------------------------------------------------------------- /docs/g/pattern_graph2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph2.jpg -------------------------------------------------------------------------------- /docs/g/pattern_graph3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph3.jpg -------------------------------------------------------------------------------- /docs/g/pattern_graph4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph4.jpg -------------------------------------------------------------------------------- /docs/g/pattern_graph5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_graph5.jpg -------------------------------------------------------------------------------- /docs/g/pattern_schema.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema.gif -------------------------------------------------------------------------------- /docs/g/paypal-donate.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/paypal-donate.jpg -------------------------------------------------------------------------------- /docs/g/pattern_schema_de.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_de.gif -------------------------------------------------------------------------------- /docs/g/pattern_schema_es.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_es.gif -------------------------------------------------------------------------------- /docs/g/pattern_schema_fr.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_fr.gif -------------------------------------------------------------------------------- /docs/g/pattern_schema_it.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_it.gif -------------------------------------------------------------------------------- /docs/g/pattern_schema_nl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_schema_nl.gif -------------------------------------------------------------------------------- /pattern/text/en/en-model.slp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/en/en-model.slp -------------------------------------------------------------------------------- /pattern/text/ru/ru-model.slp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/ru/ru-model.slp -------------------------------------------------------------------------------- /pattern/text/xx/xx-lexicon.txt: -------------------------------------------------------------------------------- 1 | The DT 2 | the DT 3 | cat NN 4 | sat VBD 5 | sit VB 6 | on IN 7 | mat NN 8 | . . -------------------------------------------------------------------------------- /docs/g/pattern-vector-lsa1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-lsa1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-vector-svm1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-svm1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-vector-svm2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-svm2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-editor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-editor.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-editor2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-editor2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-filter1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-filter2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-filter3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter3.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-filter4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-filter4.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-origin1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-origin1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-origin2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-origin2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-path1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-path1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-metrics-bell.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-metrics-bell.jpg -------------------------------------------------------------------------------- /examples/08-server/02-api/rate.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/02-api/rate.db -------------------------------------------------------------------------------- /examples/08-server/04-db/store.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/04-db/store.db -------------------------------------------------------------------------------- /docs/g/pattern-canvas-particle1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle1.png -------------------------------------------------------------------------------- /docs/g/pattern-canvas-particle2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle2.png -------------------------------------------------------------------------------- /docs/g/pattern-canvas-particle3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-particle3.png -------------------------------------------------------------------------------- /docs/g/pattern-metrics-boxplot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-metrics-boxplot.jpg -------------------------------------------------------------------------------- /docs/g/pattern-search-taxonomy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-search-taxonomy.jpg -------------------------------------------------------------------------------- /docs/g/pattern-vector-cluster1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-cluster1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-vector-cluster2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-vector-cluster2.jpg -------------------------------------------------------------------------------- /docs/g/pattern_example_100days.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_100days.jpg -------------------------------------------------------------------------------- /docs/g/pattern_example_italian.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_italian.jpg -------------------------------------------------------------------------------- /docs/g/pattern_example_spanish.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_spanish.jpg -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00002.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00002.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00003.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00003.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00004.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00004.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00005.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00005.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00006.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00006.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00007.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00007.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00008.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00008.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00009.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00009.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00010.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00010.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00011.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00011.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00012.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00012.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00013.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00013.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00014.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00014.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00015.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00015.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00016.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00016.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00017.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00017.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00018.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00018.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00019.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00019.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00020.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00020.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00021.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00021.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00022.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00022.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00023.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00023.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00024.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00024.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00025.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00025.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00026.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00026.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00027.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00027.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00028.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00028.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00029.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00029.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00030.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00030.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00031.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00031.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00032.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00032.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00033.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00033.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00034.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00034.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00035.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00035.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00037.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00037.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00038.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00038.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00039.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00039.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00043.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00043.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00044.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00044.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00045.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00045.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00046.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00046.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00047.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00047.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00049.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00049.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00050.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00050.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00051.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00051.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00052.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00052.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00053.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00053.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00054.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00054.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00055.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00055.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00056.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00056.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00057.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00057.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00058.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00058.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00060.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00060.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00061.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00061.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00062.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00062.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00063.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00063.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00064.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00064.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00065.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00065.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00066.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00066.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00067.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00067.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00068.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00068.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00069.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00069.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00072.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00072.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00073.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00073.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00074.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00074.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00075.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00075.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00076.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00076.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00077.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00077.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00078.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00078.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00079.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00079.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00081.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00081.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00082.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00082.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00083.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00083.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00084.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00084.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00085.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00085.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00086.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00086.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00087.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00087.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00088.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00088.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00089.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00089.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00090.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00090.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00091.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00091.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00092.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00092.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00094.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00094.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00095.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00095.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00096.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00096.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00097.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00097.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00098.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00098.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00099.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00099.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00100.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00100.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00101.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00101.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00102.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00102.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00103.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00103.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00104.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00104.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00105.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00105.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00106.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00106.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00107.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00107.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00109.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00109.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00110.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00110.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00111.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00111.txt -------------------------------------------------------------------------------- /examples/03-en/texts/1701.00112.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/03-en/texts/1701.00112.txt -------------------------------------------------------------------------------- /test/corpora/carroll-wonderland.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/carroll-wonderland.pdf -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives3.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives4.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives5.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-primitives6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-primitives6.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-supershape1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape1.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-supershape2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape2.jpg -------------------------------------------------------------------------------- /docs/g/pattern-canvas-supershape3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern-canvas-supershape3.jpg -------------------------------------------------------------------------------- /docs/g/pattern_example_elections.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_elections.jpg -------------------------------------------------------------------------------- /pattern/text/en/wordnet/dict/index.32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/text/en/wordnet/dict/index.32 -------------------------------------------------------------------------------- /test/corpora/carroll-lookingglass.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/test/corpora/carroll-lookingglass.docx -------------------------------------------------------------------------------- /docs/g/pattern_example_semantic_network.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/docs/g/pattern_example_semantic_network.jpg -------------------------------------------------------------------------------- /examples/08-server/01-basic/static/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/examples/08-server/01-basic/static/cat.jpg -------------------------------------------------------------------------------- /pattern/text/xx/xx-verbs.txt: -------------------------------------------------------------------------------- 1 | be,am,are,is,are,being,was,were,was,were,were,been,,am not,aren't,isn't,aren't,,wasn't,weren't,wasn't,weren't,weren't, -------------------------------------------------------------------------------- /pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2 -------------------------------------------------------------------------------- /pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2 -------------------------------------------------------------------------------- /pattern/vector/svm/windows/libsvm-3.22/libsvm.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/windows/libsvm-3.22/libsvm.dll -------------------------------------------------------------------------------- /pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3 -------------------------------------------------------------------------------- /pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3 -------------------------------------------------------------------------------- /pattern/vector/svm/windows/liblinear-2.20/liblinear.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/pattern/master/pattern/vector/svm/windows/liblinear-2.20/liblinear.dll -------------------------------------------------------------------------------- /examples/02-db/food.txt: -------------------------------------------------------------------------------- 1 | "id (INTEGER)","name (STRING)","type (STRING)","color (STRING)" 2 | "1","broccoli","vegetable","green" 3 | "2","turnip","vegetable","purple" 4 | "3","asparagus","vegetable","white" 5 | "4","banana","fruit","yellow" -------------------------------------------------------------------------------- /pattern/text/it/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | IT | PARSER COMMAND-LINE ############################################################ 2 | 3 | from __future__ import absolute_import 4 | 5 | from .__init__ import parse, commandline 6 | commandline(parse) 7 | -------------------------------------------------------------------------------- /pattern/text/xx/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | XX | PARSER COMMAND-LINE ############################################################ 2 | 3 | from __future__ import absolute_import 4 | 5 | from .__init__ import parse, commandline 6 | commandline(parse) 7 | -------------------------------------------------------------------------------- /examples/08-server/03-wiki/data/index.html.txt: -------------------------------------------------------------------------------- 1 |

This is a very simple wiki powered by pattern.server.

2 |

Each page can be modified by clicking the edit-link.

3 |

To create a new page, create a link to it, for example this test page.

-------------------------------------------------------------------------------- /pattern/vector/svm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | 4 | LIBSVM = LIBLINEAR = True 5 | 6 | try: 7 | from . import libsvm 8 | from . import libsvmutil 9 | except ImportError as e: 10 | LIBSVM = False 11 | raise e 12 | 13 | try: 14 | from . import liblinear 15 | from . import liblinearutil 16 | except: 17 | LIBLINEAR = False 18 | -------------------------------------------------------------------------------- /pattern/text/xx/xx-sentiment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /pattern/text/de/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | DE | RULE-BASED SHALLOW PARSER ###################################################### 2 | # Copyright (c) 2012 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import parse, commandline 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/text/en/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | EN | PARSER COMMAND-LINE ############################################################ 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import parse, commandline 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/text/es/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | ES | PARSER COMMAND-LINE ############################################################ 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import commandline, parse 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/text/fr/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | FR | PARSER COMMAND-LINE ############################################################ 2 | # Copyright (c) 2013 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import parse, commandline 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/text/nl/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | NL | PARSER COMMAND-LINE ############################################################ 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import commandline, parse 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/text/ru/__main__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | RU | PARSER COMMAND-LINE ############################################################ 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import absolute_import 10 | 11 | from .__init__ import parse, commandline 12 | commandline(parse) 13 | -------------------------------------------------------------------------------- /pattern/vector/stopwords-nl.txt: -------------------------------------------------------------------------------- 1 | aan, af, al, alles, als, altijd, andere, ben, bij, daar, dan, dat, de, der, deze, die, dit, doch, doen, door, dus, een, eens, en, er, ge, geen, geweest, haar, had, heb, hebben, heeft, hem, het, hier, hij, hoe, hun, iemand, iets, ik, in, is, ja, je, kan, kon, kunnen, maar, me, meer, men, met, mij, mijn, moet, na, naar, niet, niets, nog, nu, of, om, omdat, onder, ons, ook, op, over, reeds, te, tegen, toch, toen, tot, u, uit, uw, van, veel, voor, want, waren, was, wat, we, wel, werd, wezen, wie, wij, wil, worden, wordt, zal, ze, zei, zelf, zich, zij, zijn, zo, zonder, zou -------------------------------------------------------------------------------- /test/test_graph.js: -------------------------------------------------------------------------------- 1 | var test_graph = { 2 | 3 | //---------------------------------------------------------------------------------------------- 4 | // Unit tests for the graph.js module (see also test.html). 5 | 6 | TestCase: function() { 7 | this.setUp = function() { 8 | return; 9 | }; 10 | this.tearDown = function() { 11 | return; 12 | }; 13 | }, 14 | 15 | //---------------------------------------------------------------------------------------------- 16 | 17 | suite: function() { 18 | return []; 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /test/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 15 | 16 | Output is printed to the console (Developer Tools). 17 | -------------------------------------------------------------------------------- /pattern/text/en/wordlist/time.txt: -------------------------------------------------------------------------------- 1 | a.m., afternoon, always, annually, apr, april, aug, august, autumn, before, breakfast, century, christmas, Christmas, daily, date, dawn, day, day, daybreak, decade, dec, december, during, dusk, easter, Easter, epoch, equinox, era, eve, evening, feb, february, fortnight, fri, friday, future, halloween, hour, hourly, jan, january, jul, july, jun, june, lunch, mar, march, may, microsecond, midday, midnight, millenium, millisecond, minute, mon, monday, month, monthly, morning, nanosecond, night, nightfall, noon, nov, november, now, o'clock, oct, october, p.m., past, present, sat, saturday, season, second, semester, sep, september, soon, spring, summer, sunday, sundown, sunrise, sunset, supper, then, time, today, tomorrow, trimester, tue, tuesday, twilight, wednesday, week, weekly, winter, year, yearly, yesterday -------------------------------------------------------------------------------- /examples/07-canvas/02-basic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | basics (2) 5 | 6 | 7 | 8 | 9 | 26 | 27 | -------------------------------------------------------------------------------- /pattern/text/en/wordnet/dict/lexnames: -------------------------------------------------------------------------------- 1 | 00 adj.all 3 2 | 01 adj.pert 3 3 | 02 adv.all 4 4 | 03 noun.Tops 1 5 | 04 noun.act 1 6 | 05 noun.animal 1 7 | 06 noun.artifact 1 8 | 07 noun.attribute 1 9 | 08 noun.body 1 10 | 09 noun.cognition 1 11 | 10 noun.communication 1 12 | 11 noun.event 1 13 | 12 noun.feeling 1 14 | 13 noun.food 1 15 | 14 noun.group 1 16 | 15 noun.location 1 17 | 16 noun.motive 1 18 | 17 noun.object 1 19 | 18 noun.person 1 20 | 19 noun.phenomenon 1 21 | 20 noun.plant 1 22 | 21 noun.possession 1 23 | 22 noun.process 1 24 | 23 noun.quantity 1 25 | 24 noun.relation 1 26 | 25 noun.shape 1 27 | 26 noun.state 1 28 | 27 noun.substance 1 29 | 28 noun.time 1 30 | 29 verb.body 2 31 | 30 verb.change 2 32 | 31 verb.cognition 2 33 | 32 verb.communication 2 34 | 33 verb.competition 2 35 | 34 verb.consumption 2 36 | 35 verb.contact 2 37 | 36 verb.creation 2 38 | 37 verb.emotion 2 39 | 38 verb.motion 2 40 | 39 verb.perception 2 41 | 40 verb.possession 2 42 | 41 verb.social 2 43 | 42 verb.stative 2 44 | 43 verb.weather 2 45 | 44 adj.ppl 3 46 | -------------------------------------------------------------------------------- /examples/06-graph/07-graphml.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join("..", "..")) 10 | 11 | from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT 12 | from random import choice, random 13 | 14 | # This example demonstrates how a graph visualization can be exported to GraphML, 15 | # a file format that can be opened in Gephi (https://gephi.org). 16 | 17 | g = Graph() 18 | # Random nodes. 19 | for i in range(50): 20 | g.add_node(i) 21 | # Random edges. 22 | for i in range(75): 23 | node1 = choice(g.nodes) 24 | node2 = choice(g.nodes) 25 | g.add_edge(node1, node2, 26 | weight = random()) 27 | 28 | g.prune(0) 29 | 30 | # This node's label is different from its id. 31 | g[1].text.string = "home" 32 | 33 | # By default, Graph.export() exports to HTML, 34 | # but if we give it a filename that ends in .graphml it will export to GraphML. 35 | g.export(os.path.join(os.path.dirname(__file__), "test.graphml")) 36 | -------------------------------------------------------------------------------- /examples/04-search/05-multiple.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search 11 | from pattern.en import parsetree 12 | 13 | # Constraints ending in "+" match one or more words. 14 | # Pattern.search() uses a "greedy" approach: 15 | # it will attempt to match as many words as possible. 16 | 17 | # The following pattern means: 18 | # one or more words starting with "t", 19 | # followed by one or more words starting with "f". 20 | t = parsetree("one two three four five six") 21 | m = search("t*+ f*+", t) 22 | print(t) 23 | print(m) 24 | print("") 25 | 26 | for w in m[0].words: 27 | print("%s matches %s" % (w, m[0].constraint(w))) 28 | 29 | # "*" matches each word in the sentence. 30 | # This yields a list with a Match object for each word. 31 | print("") 32 | print("* => %s" % search("*", t)) 33 | 34 | # "*+" matches all words. 35 | # This yields a list with one Match object containing all words. 36 | print("") 37 | print("*+ => %s" % search("*+", t)) 38 | -------------------------------------------------------------------------------- /examples/01-web/02-google-translate.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Google, plaintext 11 | 12 | # A search engine in pattern.web sometimes has custom methods that the others don't. 13 | # For example, Google has Google.translate() and Google.identify(). 14 | 15 | # This example demonstrates the Google Translate API. 16 | # It will only work with a license key, since it is a paid service. 17 | # In the Google API console (https://code.google.com/apis/console/), 18 | # activate Translate API. 19 | 20 | g = Google(license=None) # Enter your license key. 21 | q = "Your mother was a hamster and your father smelled of elderberries!" # en 22 | # "Ihre Mutter war ein Hamster und euer Vater roch nach Holunderbeeren!" # de 23 | print(q) 24 | print(plaintext(g.translate(q, input="en", output="de"))) # es, fr, sv, ja, ... 25 | print("") 26 | 27 | q = "C'est un lapin, lapin de bois, un cadeau." 28 | print(q) 29 | print(g.identify(q)) # (language, confidence) 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.pyc 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | .coveralls.yml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | *.dev* 54 | *.nja 55 | 56 | build 57 | dist 58 | 59 | # Environments 60 | .env 61 | .venv 62 | env/ 63 | venv/ 64 | ENV/ 65 | 66 | # Flymake 67 | *_flymake.py 68 | 69 | # Pattern specific ignore pattern 70 | pattern/web/cache/tmp/ 71 | web/cache/tmp/ 72 | pattern_unittest_db 73 | test/pattern_unittest_db 74 | 75 | .DS_Store 76 | -------------------------------------------------------------------------------- /examples/01-web/05-twitter-stream.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | import time 12 | 13 | from pattern.web import Twitter 14 | 15 | # Another way to mine Twitter is to set up a stream. 16 | # A Twitter stream maintains an open connection to Twitter, 17 | # and waits for data to pour in. 18 | # Twitter.search() allows us to look at older tweets, 19 | # Twitter.stream() gives us the most recent tweets. 20 | 21 | # It might take a few seconds to set up the stream. 22 | stream = Twitter().stream("I hate", timeout=30) 23 | 24 | #while True: 25 | for i in range(10): 26 | print(i) 27 | # Poll Twitter to see if there are new tweets. 28 | stream.update() 29 | # The stream is a list of buffered tweets so far, 30 | # with the latest tweet at the end of the list. 31 | for tweet in reversed(stream): 32 | print(tweet.text) 33 | print(tweet.language) 34 | # Clear the buffer every so often. 35 | stream.clear() 36 | # Wait awhile between polls. 37 | time.sleep(1) 38 | -------------------------------------------------------------------------------- /examples/06-graph/01-graph.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.graph import Graph, CENTRALITY 11 | 12 | # A graph is a network of nodes (or concepts) 13 | # connected to each other with edges (or links). 14 | 15 | g = Graph() 16 | for n in ("tree", "nest", "bird", "fly", "insect", "ant"): 17 | g.add_node(n) 18 | 19 | g.add_edge("tree", "nest") # Trees have bird nests. 20 | g.add_edge("nest", "bird") # Birds live in nests. 21 | g.add_edge("bird", "fly") # Birds eat flies. 22 | g.add_edge("ant", "bird") # Birds eat ants. 23 | g.add_edge("fly", "insect") # Flies are insects. 24 | g.add_edge("insect", "ant") # Ants are insects. 25 | g.add_edge("ant", "tree") # Ants crawl on trees. 26 | 27 | # From tree => fly: tree => ant => bird => fly 28 | print(g.shortest_path(g.node("tree"), g.node("fly"))) 29 | print(g.shortest_path(g.node("nest"), g.node("ant"))) 30 | print() 31 | 32 | # Which nodes get the most traffic? 33 | for n in sorted(g.nodes, key=lambda n: n.centrality, reverse=True): 34 | print('%.2f' % n.centrality, n) 35 | -------------------------------------------------------------------------------- /pattern/vector/svm/INSTALL.txt: -------------------------------------------------------------------------------- 1 | In order to be able to use LIBSVM and LIBLINEAR you have to download, compile and install both libraries. 2 | 3 | - If you are on ArchLinux: pacman -S libsvm liblinear (or you can install liblinear-multicore instead of liblinear) 4 | - If you are on Ubutun/Debian: e.g. apt-cache search libsvm (to find the right packages) 5 | - If you are on Windows or Mac OS follow the guidelines on the website listed below. 6 | - You can also manually download, compile and install both libraries. Read on for further information. 7 | 8 | To install from source, download the latest versions of LIBSVM and LIBLINEAR: 9 | http://www.csie.ntu.edu.tw/~cjlin/libsvm/ 10 | http://www.csie.ntu.edu.tw/~cjlin/liblinear/ 11 | 12 | - From the command line, do "make" in libsvm/. 13 | - Now do "make" in libsvm/python/. 14 | - You should see a file "libsvm.so.2", which is the binary you need. 15 | - Put it in pattern/vector/svm/libsvm-x.xx/ 16 | 17 | Repeat the steps for LIBLINEAR. 18 | 19 | You may need to rename option "-soname" to "-install_name" in the Makefile on Mac OS X. 20 | 21 | You may need to modify pattern/vector/svm/svm.py around line 15 to import the new binary. Please send us a copy of the compiled binary so we can include it in the next release. -------------------------------------------------------------------------------- /examples/03-en/04-tree.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.en import parse, Text 11 | 12 | # The easiest way to analyze the output of the parser is to create a Text. 13 | # A Text is a "parse tree" of linked Python objects. 14 | # A Text is essentially a list of Sentence objects. 15 | # Each Sentence is a list of Word objects. 16 | # Each Word can be part of a Chunk object, accessible with Word.chunk. 17 | s = "I eat pizza with a silver fork." 18 | s = parse(s) 19 | s = Text(s) 20 | 21 | # You can also use the parsetree() function, 22 | # which is the equivalent of Text(parse()). 23 | 24 | print(s[0].words) # A list of all the words in the first sentence. 25 | print(s[0].chunks) # A list of all the chunks in the first sentence. 26 | print(s[0].chunks[-1].words) 27 | print("") 28 | 29 | for sentence in s: 30 | for word in sentence: 31 | print(word.string, 32 | word.type, 33 | word.chunk, 34 | word.pnp) 35 | 36 | # A Text can be exported as an XML-string (among other). 37 | print("") 38 | print(s.xml) 39 | -------------------------------------------------------------------------------- /pattern/text/it/it-context.txt: -------------------------------------------------------------------------------- 1 | PRP IN WDPREVTAG VB che 2 | PRP IN WDPREVTAG RB che 3 | PRP IN WDNEXTTAG che DT 4 | IN RB WDNEXTTAG come IN 5 | DT PRP WDNEXTTAG gli VB 6 | DT PRP WDNEXTTAG lo VB 7 | PRP DT WDNEXTTAG cui NN 8 | CD RB WDNEXTTAG prima IN 9 | VB NN WDNEXTTAG stato JJ 10 | PRP DT WDNEXTTAG uno NN 11 | RB JJ WDNEXTTAG solo NN 12 | CC VB WDNEXTTAG sia VB 13 | CC VB WDNEXTTAG sia RB 14 | DT PRP WDNEXTTAG altri VB 15 | PRP DT WDNEXTTAG quella NN 16 | PRP DT WDNEXTTAG quali NN 17 | RB PRP WDNEXTTAG quanto VB 18 | IN CD WDNEXTTAG secondo NN 19 | PRP DT WDNEXTTAG tutto DT 20 | VB RB WDNEXTTAG fa , 21 | VB RB WDNEXTTAG fa ( 22 | IN RB WDNEXTTAG oltre IN 23 | IN RB WDNEXTTAG come IN 24 | DT PRP WDNEXTTAG gli VB 25 | DT PRP WDNEXTTAG lo VB 26 | NN VB WDPREVTAG VB stato 27 | NN VB WDPREVTAG RB parte 28 | IN PRP WDPREVTAG IN se 29 | IN PRP RBIGRAM se stesso 30 | VB NN WDPREVTAG DT essere 31 | JJ NN WDPREVTAG DT italiano 32 | RB JJ RBIGRAM solo . 33 | IN CD WDPREVTAG DT secondo 34 | PRP DT WDNEXTTAG uno NN 35 | PRP DT WDNEXTTAG uno JJ 36 | NN VB WDPREVTAG VB fatto 37 | IN RB WDNEXTTAG contro VB 38 | RB JJ WDPREVTAG VB molto 39 | IN WRB LBIGRAM STAART Quando 40 | IN WRB LBIGRAM STAART Perché 41 | IN WRB LBIGRAM STAART Dove 42 | NN VB WDPREVTAG VB data 43 | JJ PRP WDPREVTAG DT proprio 44 | NN JJ WDPREVTAG NN politica 45 | JJ NN WDPREVTAG DT politico -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | function assert(expression) { 2 | /* Throws AssertException if the given expression evaluates to false. 3 | */ 4 | if (!expression) throw "AssertException"; 5 | } 6 | 7 | function TestCase() { 8 | /* TestCase objects have a setUp() and a tearDown() method, 9 | * called before and after each test respectively. 10 | * Tests in a TestCase have method names starting with "test". 11 | */ 12 | this.setUp = function() { 13 | return; 14 | }; 15 | this.tearDown = function() { 16 | return; 17 | }; 18 | this.testMethod = function() { 19 | assert(true == false); 20 | }; 21 | } 22 | 23 | function run(tests) { 24 | /* Executes each method which name starts with "test", 25 | * for each TestCase object in the given array. 26 | * Throws AssertException if the method fails. 27 | */ 28 | for (var i=0; i < tests.length; i++) { 29 | for (var method in tests[i]) { 30 | if (method.substring(0,4) == "test") { 31 | tests[i].setUp(); 32 | try { 33 | tests[i][method](); 34 | } catch(e) { 35 | console.error(e + " in " + method + "()"); 36 | } 37 | tests[i].tearDown(); 38 | } 39 | } 40 | } 41 | } -------------------------------------------------------------------------------- /examples/02-db/03-date.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.db import date, time, NOW 11 | from pattern.web import Bing, NEWS 12 | 13 | # It is often useful to keep a date stamp for each row in the table. 14 | # The pattern.db module's date() function can be used for this. 15 | # It is a simple wrapper around Python's datetime.datetime class, 16 | # with extra functionality to make it easy to parse or print it as a string. 17 | 18 | print(date(NOW)) 19 | print(date()) 20 | print(date("2010-11-01 16:30", "%Y-%m-%d %H:%M")) 21 | print(date("Nov 1, 2010", "%b %d, %Y")) 22 | print(date("Nov 1, 2010", "%b %d, %Y", format="%d/%m/%Y")) 23 | print("") 24 | 25 | # All possible formatting options: 26 | # http://docs.python.org/library/time.html#time.strftime 27 | 28 | for r in Bing(license=None, language="en").search("today", type=NEWS): 29 | print(r.title) 30 | print(repr(r.date)) # Result.date is a string (e.g. we can't > <= += with the date). 31 | print(date(r.date)) # date() can parse any Result.date in the web module. 32 | print("") 33 | 34 | d = date("4 november 2011") 35 | d += time(days=2, hours=5) 36 | print(d) 37 | -------------------------------------------------------------------------------- /examples/07-canvas/01-basic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | basics (1) 5 | 6 | 7 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /examples/04-search/03-lemmata.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search, match 11 | from pattern.en import parsetree 12 | 13 | # This example demonstrates an interesting search pattern that mines for comparisons. 14 | # Notice the use of the constraint "be". 15 | # If the output from the parser includes word lemmas (e.g., "doing" => "do") 16 | # these will also be matched. Using "be" then matches "is", "being", "are", ... 17 | # and if underspecification is used "could be", "will be", "definitely was", ... 18 | 19 | p = "NP be ADJP|ADVP than NP" 20 | 21 | for s in ( 22 | "the turtle was faster than the hare", 23 | "Arnold Schwarzenegger is more dangerous than Dolph Lundgren"): 24 | t = parsetree(s, lemmata=True) # parse lemmas 25 | m = search(p, t) 26 | if m: 27 | # Constituents for the given constraint indices: 28 | # 0 = NP, 2 = ADJP|ADVP, 4 = NP 29 | print(m[0].constituents(constraint=[0, 2, 4])) 30 | print("") 31 | 32 | 33 | p = "NP be ADJP|ADVP than NP" 34 | t = parsetree("the turtle was faster than the hare", lemmata=True) 35 | m = match(p, t) 36 | print(t) 37 | print("") 38 | for w in m.words: 39 | print("%s\t=> %s" % (w, m.constraint(w))) 40 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | dist: precise 4 | 5 | python: 6 | - "3.6" 7 | 8 | before_install: 9 | - export TZ=Europe/Brussels 10 | - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi 11 | - bash miniconda.sh -b -p $HOME/miniconda 12 | - export PATH="$HOME/miniconda/bin:$PATH" 13 | - conda update --yes conda 14 | - conda install --yes numpy scipy 15 | - pip install --quiet pytest pytest-cov pytest-xdist chardet 16 | 17 | install: 18 | - python setup.py install --quiet 19 | - pip freeze 20 | # Install and compile libsvm and liblinear 21 | - sudo apt-get install -y build-essential 22 | - git clone https://github.com/cjlin1/libsvm 23 | - cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd .. 24 | - git clone https://github.com/cjlin1/liblinear 25 | - cd liblinear; make lib; sudo cp liblinear.so.3 /lib; sudo ln -s /lib/liblinear.so.3 /lib/liblinear.so; cd .. 26 | 27 | script: 28 | - pytest --cov=pattern 29 | 30 | 31 | after_script: 32 | - pip install --quiet coveralls 33 | - coveralls 34 | 35 | branches: 36 | only: 37 | - development 38 | 39 | notifications: 40 | email: false 41 | 42 | # You can connect to MySQL/MariaDB using the username "travis" or "root" and a blank password. 43 | services: 44 | - mysql 45 | -------------------------------------------------------------------------------- /examples/01-web/15-sort.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import GOOGLE, YAHOO, BING, sort 11 | 12 | # The pattern.web module includes an interesting sort() algorithm. 13 | # Ir classifies search terms according to a search engine's total results count. 14 | # When a context is defined, it sorts according to relevancy to the context: 15 | # sort(terms=["black", "green", "red"], context="Darth Vader") => 16 | # yields "black" as the best candidate, 17 | # because "black Darth Vader" yields more search results. 18 | 19 | results = sort( 20 | terms = [ 21 | "arnold schwarzenegger", 22 | "chuck norris", 23 | "dolph lundgren", 24 | "steven seagal", 25 | "sylvester stallone", 26 | "mickey mouse", 27 | ], 28 | context = "dangerous", # Term used for sorting. 29 | service = BING, # GOOGLE, YAHOO, BING, ... 30 | license = None, # You should supply your own API license key for the given service. 31 | strict = True, # Wraps the query in quotes, i.e. 'mac sweet'. 32 | reverse = True, # Reverses term and context: 'sweet mac' instead of 'mac sweet'. 33 | cached = True) 34 | 35 | for weight, term in results: 36 | print("%5.2f" % (weight * 100) + "%", term) 37 | -------------------------------------------------------------------------------- /pattern/web/api.py: -------------------------------------------------------------------------------- 1 | #--- API LICENSE CONFIGURATION ----------------------------------------------------------------------- 2 | # Default license keys used by pattern.web.SearchEngine to contact different API's. 3 | # Google and Yahoo are paid services for which you need a personal license + payment method. 4 | # The default Google license is for testing purposes (= 100 daily queries). 5 | # Wikipedia, Twitter and Facebook are free. 6 | # Bing, Flickr and ProductsWiki use licenses shared among all Pattern users. 7 | 8 | from __future__ import unicode_literals 9 | from builtins import dict 10 | 11 | license = {} 12 | license["Google"] = \ 13 | "AIzaSyBxe9jC4WLr-Rry_5OUMOZ7PCsEyWpiU48" 14 | 15 | license["Bing"] = \ 16 | "VnJEK4HTlntE3SyF58QLkUCLp/78tkYjV1Fl3J7lHa0=" 17 | 18 | license["Yahoo"] = \ 19 | ("", "") # OAuth (key, secret) 20 | 21 | license["DuckDuckGo"] = \ 22 | None 23 | 24 | license["Faroo"] = \ 25 | "" 26 | 27 | license["Wikipedia"] = \ 28 | None 29 | 30 | license["Twitter"] = ( 31 | "p7HUdPLlkKaqlPn6TzKkA", # OAuth (key, secret, token) 32 | "R7I1LRuLY27EKjzulutov74lKB0FjqcI2DYRUmsu7DQ", ( 33 | "14898655-TE9dXQLrzrNd0Zwf4zhK7koR5Ahqt40Ftt35Y2qY", 34 | "q1lSRDOguxQrfgeWWSJgnMHsO67bqTd5dTElBsyTM")) 35 | 36 | license["Facebook"] = \ 37 | "332061826907464|jdHvL3lslFvN-s_sphK1ypCwNaY" 38 | 39 | license["Flickr"] = \ 40 | "787081027f43b0412ba41142d4540480" 41 | 42 | license["ProductWiki"] = \ 43 | "64819965ec784395a494a0d7ed0def32" 44 | -------------------------------------------------------------------------------- /examples/01-web/06-feed.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Newsfeed, plaintext, URL 11 | from pattern.db import date 12 | 13 | # This example reads a given RSS or Atom newsfeed channel. 14 | # Some example feeds to try out: 15 | NATURE = "http://feeds.nature.com/nature/rss/current" 16 | SCIENCE = "http://www.sciencemag.org/rss/podcast.xml" 17 | NYT = "http://rss.nytimes.com/services/xml/rss/nyt/GlobalHome.xml" 18 | TIME = "http://feeds.feedburner.com/time/topstories" 19 | CNN = "http://rss.cnn.com/rss/edition.rss" 20 | 21 | engine = Newsfeed() 22 | 23 | for result in engine.search(CNN, cached=True): 24 | print(result.title.upper()) 25 | print(plaintext(result.text)) # Remove HTML formatting. 26 | print(result.url) 27 | print(result.date) 28 | print("") 29 | 30 | # News item URL's lead to the page with the full article. 31 | # This page can have any kind of formatting. 32 | # There is no default way to read it. 33 | # But we could just download the source HTML and convert it to plain text: 34 | 35 | #html = URL(result.url).download() 36 | #print(plaintext(html)) 37 | 38 | # The resulting text may contain a lot of garbage. 39 | # A better way is to use a DOM parser to select the HTML elements we want. 40 | # This is demonstrated in one of the next examples. 41 | -------------------------------------------------------------------------------- /examples/06-graph/03-template.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.graph import Graph, CSS, CANVAS 11 | 12 | # This example demonstrates how to roll dynamic HTML graphs. 13 | # We have a HTML template in which content is inserted on-the-fly. 14 | 15 | # This is useful if the graph data changes dynamically, 16 | # e.g., the user clicks on a node and is taken to a webpage with a new subgraph. 17 | 18 | template = ''' 19 | 20 | 21 | 22 | \t 23 | \t 24 | \t 25 | \t 28 | 29 | 30 | \t%s 31 | 32 | 33 | '''.strip() 34 | 35 | 36 | def webpage(graph, **kwargs): 37 | s1 = graph.serialize(CSS, **kwargs) 38 | s2 = graph.serialize(CANVAS, **kwargs) 39 | return template % ( 40 | s1.replace("\n", "\n\t\t"), 41 | s2.replace("\n", "\n\t") 42 | ) 43 | 44 | # Create a graph: 45 | g = Graph() 46 | g.add_node("cat") 47 | g.add_node("dog") 48 | g.add_edge("cat", "dog") 49 | 50 | # To make this work as a cgi-bin script, uncomment the following lines: 51 | ##!/usr/bin/env python 52 | #import cgi 53 | #import cgitb; cgitb.enable() # Debug mode. 54 | #print("Content-type: text/html") 55 | 56 | print(webpage(g, width=500, height=500)) 57 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2013 University of Antwerp, Belgium 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Pattern nor the names of its 14 | contributors may be used to endorse or promote products 15 | derived from this software without specific prior written 16 | permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /pattern/helpers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | 5 | from builtins import str, bytes, dict, int 6 | from builtins import map, zip, filter 7 | from builtins import object, range 8 | 9 | #--- STRING FUNCTIONS ------------------------------------------------------------------------------ 10 | # Latin-1 (ISO-8859-1) encoding is identical to Windows-1252 except for the code points 128-159: 11 | # Latin-1 assigns control codes in this range, Windows-1252 has characters, punctuation, symbols 12 | # assigned to these code points. 13 | 14 | 15 | def decode_string(v, encoding="utf-8"): 16 | """ Returns the given value as a Unicode string (if possible). 17 | """ 18 | if isinstance(encoding, str): 19 | encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore")) 20 | if isinstance(v, bytes): 21 | for e in encoding: 22 | try: 23 | return v.decode(*e) 24 | except: 25 | pass 26 | return v 27 | return str(v) 28 | 29 | 30 | def encode_string(v, encoding="utf-8"): 31 | """ Returns the given value as a Python byte string (if possible). 32 | """ 33 | if isinstance(encoding, str): 34 | encoding = ((encoding,),) + (("windows-1252",), ("utf-8", "ignore")) 35 | if isinstance(v, str): 36 | for e in encoding: 37 | try: 38 | return v.encode(*e) 39 | except: 40 | pass 41 | return v 42 | return bytes(v) 43 | 44 | decode_utf8 = decode_string 45 | encode_utf8 = encode_string 46 | -------------------------------------------------------------------------------- /pattern/vector/svm/COPYRIGHT-liblinear.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2007-2015 The LIBLINEAR Project. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /pattern/vector/svm/COPYRIGHT-libsvm.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /examples/04-search/09-web.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.web import Bing, plaintext 12 | from pattern.en import parsetree 13 | from pattern.search import Pattern 14 | from pattern.db import Datasheet, pprint 15 | 16 | # "X IS MORE IMPORTANT THAN Y" 17 | # Here is a rough example of how to build a web miner. 18 | # It mines comparative statements from Bing and stores the results in a table, 19 | # which can be saved as a text file for further processing later on. 20 | 21 | # Pattern matching also works with Sentence objects from the MBSP module. 22 | # MBSP's parser is much more robust (but also slower). 23 | #from MBSP import Sentence, parse 24 | 25 | q = '"more important than"' # Bing search query 26 | p = "NP VP? more important than NP" # Search pattern. 27 | p = Pattern.fromstring(p) 28 | d = Datasheet() 29 | 30 | engine = Bing(license=None) 31 | for i in range(1): # max=10 32 | for result in engine.search(q, start=i + 1, count=100, cached=True): 33 | s = result.description 34 | s = plaintext(s) 35 | t = parsetree(s) 36 | for m in p.search(t): 37 | a = m.constituents(constraint=0)[-1] # Left NP. 38 | b = m.constituents(constraint=5)[0] # Right NP. 39 | d.append(( 40 | a.string.lower(), 41 | b.string.lower())) 42 | 43 | pprint(d) 44 | 45 | print("") 46 | print("%s results." % len(d)) 47 | -------------------------------------------------------------------------------- /examples/01-web/14-flickr.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | from io import open 7 | 8 | import os 9 | import sys 10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 11 | 12 | from pattern.web import Flickr, extension 13 | from pattern.web import RELEVANCY, LATEST, INTERESTING # Image sort order. 14 | from pattern.web import SMALL, MEDIUM, LARGE # Image size. 15 | 16 | # This example downloads an image from Flickr (http://flickr.com). 17 | # Acquiring the image data takes three Flickr queries: 18 | # 1) Flickr.search() retrieves a list of results, 19 | # 2) FlickrResult.url retrieves the image URL (behind the scenes), 20 | # 3) FlickrResult.download() visits FlickrResult.url and downloads the content. 21 | 22 | # It is a good idea to cache results from Flickr locally, 23 | # which is what the cached=True parameter does. 24 | 25 | # You should obtain your own license key at: 26 | # http://www.flickr.com/services/api/ 27 | # Otherwise you will be sharing the default key with all users of pattern.web. 28 | engine = Flickr(license=None) 29 | 30 | q = "duracell bunny" 31 | results = engine.search(q, size=MEDIUM, sort=RELEVANCY, cached=False) 32 | for img in results: 33 | #print(img.url) # Retrieving the actual image URL executes a query. 34 | print(img.text) 35 | print(img.author) 36 | print("") 37 | 38 | # Download and save one of the images: 39 | img = results[0] 40 | data = img.download() 41 | path = q.replace(" ", "_") + extension(img.url) 42 | f = open(path, "wb") 43 | f.write(data) 44 | f.close() 45 | print("Download: %s" % img.url) 46 | print("Saved as: %s" % path) 47 | -------------------------------------------------------------------------------- /pattern/text/en/wordnet/dict/LICENSE.txt: -------------------------------------------------------------------------------- 1 | WordNet Release 3.0 2 | 3 | This software and database is being provided to you, the LICENSEE, by 4 | Princeton University under the following license. By obtaining, using 5 | and/or copying this software and database, you agree that you have 6 | read, understood, and will comply with these terms and conditions.: 7 | 8 | Permission to use, copy, modify and distribute this software and 9 | database and its documentation for any purpose and without fee or 10 | royalty is hereby granted, provided that you agree to comply with 11 | the following copyright notice and statements, including the disclaimer, 12 | and that the same appear on ALL copies of the software, database and 13 | documentation, including modifications that you make for internal 14 | use or for distribution. 15 | 16 | WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. 17 | 18 | THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON 19 | UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 20 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON 21 | UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- 22 | ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE 23 | OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT 24 | INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR 25 | OTHER RIGHTS. 26 | 27 | The name of Princeton University or Princeton may not be used in 28 | advertising or publicity pertaining to distribution of the software 29 | and/or database. Title to copyright in this software, database and 30 | any associated documentation shall at all times remain with 31 | Princeton University and LICENSEE agrees to preserve same. 32 | -------------------------------------------------------------------------------- /test/test_ru.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | from builtins import str, bytes, dict, int 8 | from builtins import map, zip, filter 9 | from builtins import object, range 10 | 11 | import os 12 | import sys 13 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) 14 | import unittest 15 | import random 16 | import subprocess 17 | 18 | from pattern import text 19 | from pattern import ru 20 | 21 | from io import open 22 | 23 | try: 24 | PATH = os.path.dirname(os.path.realpath(__file__)) 25 | except: 26 | PATH = "" 27 | 28 | #--------------------------------------------------------------------------------------------------- 29 | 30 | 31 | class TestSpelling(unittest.TestCase): 32 | 33 | def test_spelling(self): 34 | i = j = 0.0 35 | from pattern.db import Datasheet 36 | for correct, wrong in Datasheet.load(os.path.join(PATH, "corpora", "spelling-ru.csv")): 37 | for w in wrong.split(" "): 38 | suggested = ru.suggest(w) 39 | if suggested[0][0] == correct: 40 | i += 1 41 | else: 42 | j += 1 43 | self.assertTrue(i / (i + j) > 0.65) 44 | print("pattern.ru.suggest()") 45 | 46 | #--------------------------------------------------------------------------------------------------- 47 | 48 | def suite(): 49 | suite = unittest.TestSuite() 50 | suite.addTest(unittest.TestLoader().loadTestsFromTestCase(TestSpelling)) 51 | return suite 52 | 53 | if __name__ == "__main__": 54 | 55 | result = unittest.TextTestRunner(verbosity=1).run(suite()) 56 | sys.exit(not result.wasSuccessful()) 57 | -------------------------------------------------------------------------------- /examples/06-graph/05-trends.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.web import Twitter 12 | from pattern.graph import Graph 13 | 14 | # This example demonstrates a simple Twitter miner + visualizer. 15 | # We collect tweets containing "A is the new B", 16 | # mine A and B and use them as connected nodes in a graph. 17 | # Then we export the graph as a browser visualization. 18 | 19 | comparisons = [] 20 | 21 | for i in range(1, 10): 22 | # Set cached=False for live results: 23 | for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True): 24 | s = result.text 25 | s = s.replace("\n", " ") 26 | s = s.lower() 27 | s = s.replace("is the new", "NEW") 28 | s = s.split(" ") 29 | try: 30 | i = s.index("NEW") 31 | A = s[i - 1].strip("?!.:;,#@\"'") 32 | B = s[i + 1].strip("?!.:;,#@\"'") 33 | # Exclude common phrases such as "this is the new thing". 34 | if A and B and A not in ("it", "this", "here", "what", "why", "where"): 35 | comparisons.append((A, B)) 36 | except: 37 | pass 38 | 39 | g = Graph() 40 | for A, B in comparisons: 41 | e = g.add_edge(B, A) # "A is the new B": A <= B 42 | e.weight += 0.1 43 | print(B, "=>", A) 44 | 45 | # Not all nodes will be connected, there will be multiple subgraphs. 46 | # Simply take the largest subgraph for our visualization. 47 | g = g.split()[0] 48 | 49 | g.export("trends", weighted=True, directed=True) 50 | -------------------------------------------------------------------------------- /examples/03-en/02-quantify.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.en import number, numerals, quantify, reflect 11 | 12 | # The number() command returns an int or float from a written representation. 13 | # This is useful, for example, in combination with a parser 14 | # to transform "CD" parts-of-speech to actual numbers. 15 | # The algorithm ignores words that aren't recognized as numerals. 16 | print(number("two thousand five hundred and eight")) 17 | print(number("two point eighty-five")) 18 | print("") 19 | 20 | # The numerals() command returns a written representation from an int or float. 21 | print(numerals(1.249, round=2)) 22 | print(numerals(1.249, round=3)) 23 | print("") 24 | 25 | # The quantify() commands uses pluralization + approximation to enumerate words. 26 | # This is useful to generate a human-readable summary of a set of strings. 27 | print(quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"])) 28 | print(quantify(["penguin", "polar bear"])) 29 | print(quantify(["carrot"] * 1000)) 30 | print(quantify("parrot", amount=1000)) 31 | print(quantify({"carrot": 100, "parrot": 20})) 32 | print("") 33 | 34 | # The quantify() command only works with words (strings). 35 | # To quantify a set of Python objects, use reflect(). 36 | # This will first create a human-readable name for each object and then quantify these. 37 | print(reflect([0, 1, {}, False, reflect])) 38 | print(reflect(os.path)) 39 | print(reflect([False, True], quantify=False)) 40 | print(quantify( 41 | ["bunny rabbit"] + 42 | reflect([False, True], quantify=False))) 43 | -------------------------------------------------------------------------------- /pattern/web/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import Iterable 3 | 4 | try: 5 | # Python 2 6 | str_type = unicode 7 | except NameError: 8 | # Python 3 9 | str_type = str 10 | 11 | STRING_LIKE_TYPES = (str_type, bytes, bytearray) 12 | 13 | try: 14 | # Python 2 15 | from urlparse import urlparse, parse_qsl 16 | except ImportError: 17 | # Python 3 18 | from urllib.parse import urlparse, parse_qsl 19 | 20 | try: 21 | import simplejson as json 22 | except ImportError: 23 | import json 24 | 25 | 26 | def json_iter_parse(response_text): 27 | decoder = json.JSONDecoder(strict=False) 28 | idx = 0 29 | while idx < len(response_text): 30 | obj, idx = decoder.raw_decode(response_text, idx) 31 | yield obj 32 | 33 | 34 | def stringify_values(dictionary): 35 | stringified_values_dict = {} 36 | for key, value in dictionary.items(): 37 | if isinstance(value, Iterable) and not isinstance(value, STRING_LIKE_TYPES): 38 | value = u','.join(map(str_type, value)) 39 | stringified_values_dict[key] = value 40 | return stringified_values_dict 41 | 42 | 43 | def get_url_query(url): 44 | parsed_url = urlparse(url) 45 | url_query = parse_qsl(parsed_url.fragment) 46 | # login_response_url_query can have multiple key 47 | url_query = dict(url_query) 48 | return url_query 49 | 50 | 51 | def get_form_action(html): 52 | form_action = re.findall(r'= 12: 59 | return '{}***{}'.format(access_token[:4], access_token[-4:]) 60 | elif access_token: 61 | return '***' 62 | else: 63 | return access_token 64 | -------------------------------------------------------------------------------- /examples/01-web/07-wikipedia.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Wikipedia 11 | 12 | # This example retrieves an article from Wikipedia (http://en.wikipedia.org). 13 | # Wikipedia queries request the article HTML source from the server. This can be slow. 14 | # It is a good idea to cache results from Wikipedia locally, 15 | # and to set a high timeout when calling Wikipedia.search(). 16 | 17 | engine = Wikipedia(language="en") 18 | 19 | # Contrary to the other search engines in the pattern.web module, 20 | # Wikipedia simply returns one WikipediaArticle object (or None), 21 | # instead of a list of results. 22 | article = engine.search("alice in wonderland", cached=True, timeout=30) 23 | 24 | print(article.title) # Article title (may differ from the search query). 25 | print("") 26 | print(article.languages["fr"]) # Article in French, can be retrieved with Wikipedia(language="fr"). 27 | print(article.links[:10]) # List of linked Wikipedia articles. 28 | print(article.external[:5]) # List of external URL's. 29 | print("") 30 | 31 | #print(article.source) # The full article content as HTML. 32 | #print(article.string) # The full article content, plain text with HTML tags stripped. 33 | 34 | # An article is made up of different sections with a title. 35 | # WikipediaArticle.sections is a list of WikipediaSection objects. 36 | # Each section has a title + content and can have a linked parent section or child sections. 37 | for s in article.sections: 38 | print(s.title.upper()) 39 | print("") 40 | print(s.content) # = ArticleSection.string, minus the title. 41 | print("") 42 | -------------------------------------------------------------------------------- /examples/07-canvas/06-image.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | image 5 | 6 | 7 | 8 | 9 | 13 | 47 | 48 | -------------------------------------------------------------------------------- /examples/05-vector/corpus/parakeet.txt: -------------------------------------------------------------------------------- 1 | Parakeet is a term for any one of a large number of unrelated small to medium sized species of parrot, that generally have long tail feathers. Older spellings still sometimes encountered are paroquet or paraquet. 2 | 3 | Species 4 | 5 | The term Grass Parakeet (or Grasskeet) refers to a large number of small temp Australian parakeets native to grasslands such as Neophema and Princess Parrot. The Australian rosellas are also parakeets. Many of the smaller, long-tailed species of lories may be referred to as lorikeets. 6 | 7 | The term Ringnecked Parakeet refers to a species of the Psittacula genus native to Africa and Asia that is popular as a pet and has become feral in many cities. It should not be confused with the Australian Ringneck. 8 | 9 | In aviculture the term conure is used for small to medium sized parakeets of the genera Aratinga, Pyrrhura, and a few other genere of the tribe Arini, which are mainly endemic to South America. As they are not all from one genus, taxonomists tend to dislike the term. 10 | 11 | Other South American species commonly called parakeets include the Brotogeris parakeets, Monk Parakeet and Lineolated Parakeets (although Lineolated Parakeets have short tails). 12 | 13 | Some species, especially the larger parakeets, may be referred to as "parrot" or "parakeet" interchangeably. For example, Alexandrine Parrot and Alexandrine Parakeet are different names for the same species, (Psittacula eupatria), one of the largest species called a parakeet. 14 | 15 | Many different species of parakeets are often bred and sold commercially as pets, the Budgerigar being among the most commonly sold in the U.S. [1] [2] 16 | 17 | Gallery 18 | 19 | See also 20 | 21 | * Cockatiel 22 | * Macaw 23 | * Budgerigar 24 | 25 | External links 26 | 27 | * Budgerigar Information 28 | 29 | References 30 | 31 | * ^ http://animal-world.com/encyclo/birds/parakeets/parakeets.htm 32 | * ^ http://chirpykeets.webs.com/about.htm -------------------------------------------------------------------------------- /examples/04-search/06-optional.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search 11 | from pattern.en import parsetree 12 | 13 | # Constraints ending in "?" are optional, matching one or no word. 14 | # Pattern.search() uses a "greedy" approach: 15 | # it will attempt to include as many optional constraints as possible. 16 | 17 | # The following pattern scans for words whose part-of-speech tag is NN (i.e. nouns). 18 | # A preceding adjective, adverb or determiner are picked up as well. 19 | for s in ( 20 | "the cat", # DT NN 21 | "the very black cat", # DT RB JJ NN 22 | "tasty cat food", # JJ NN NN 23 | "the funny black cat", # JJ NN 24 | "very funny", # RB JJ => no match, since there is no noun. 25 | "my cat is black and your cat is white"): # NN + NN 26 | t = parsetree(s) 27 | m = search("DT? RB? JJ? NN+", t) 28 | print("") 29 | print(t) 30 | print(m) 31 | if m: 32 | for w in m[0].words: 33 | print("%s matches %s" % (w, m[0].constraint(w))) 34 | 35 | # Before Pattern 2.4, "( )" was used instead of "?". 36 | # For example: "(JJ)" instead of "JJ?". 37 | # The syntax was changed to resemble regular expressions, which use "?". 38 | # The old syntax "(JJ)" still works in Pattern 2.4, but it may change later. 39 | 40 | # Note: the above pattern could also be written as "DT|RB|JJ?+ NN+" 41 | # to include multiple adverbs/adjectives. 42 | # By combining "*", "?" and "+" patterns can become quite complex. 43 | # Optional constraints are useful for very specific patterns, but slow. 44 | # Also, depending on which parser you use (e.g. MBSP), words can be tagged differently 45 | # and may not match in the way you expect. 46 | # Consider using a simple, robust "NP" search pattern. 47 | -------------------------------------------------------------------------------- /examples/01-web/09-wikia.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 *-* 2 | 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from builtins import str, bytes, dict, int 7 | 8 | import os 9 | import sys 10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 11 | 12 | from pattern.web import Wikia 13 | 14 | # This example retrieves articled from Wikia (http://www.wikia.com). 15 | # Wikia is a collection of thousands of wikis based on MediaWiki. 16 | # Wikipedia is based on MediaWiki too. 17 | # Wikia queries request the article HTML source from the server. This can be slow. 18 | 19 | domain = "monkeyisland" # "Look behind you, a three-headed monkey!" 20 | 21 | # Alternatively, you can call this script from the commandline 22 | # and specify another domain: python 09-wikia.py "Bieberpedia". 23 | if len(sys.argv) > 1: 24 | domain = sys.argv[1] 25 | 26 | w = Wikia(domain, language="en") 27 | 28 | # Like Wikipedia, we can search for articles by title with Wikia.search(): 29 | print(w.search("Three Headed Monkey")) 30 | 31 | # However, we may not know exactly what kind of articles exist, 32 | # three-headed monkey" for example does not redirect to the above article. 33 | 34 | # We can iterate through all articles with the Wikia.articles() method 35 | # (note that Wikipedia also has a Wikipedia.articles() method). 36 | # The "count" parameter sets the number of article titles to retrieve per query. 37 | # Retrieving the full article for each article takes another query. This can be slow. 38 | i = 0 39 | for article in w.articles(count=2, cached=True): 40 | print("") 41 | print(article.title) 42 | #print(article.plaintext()) 43 | i += 1 44 | if i >= 3: 45 | break 46 | 47 | # Alternatively, we can retrieve just the titles, 48 | # and only retrieve the full articles for the titles we need: 49 | i = 0 50 | for title in w.index(count=2): 51 | print("") 52 | print(title) 53 | #article = w.search(title) 54 | #print(article.plaintext()) 55 | i += 1 56 | if i >= 3: 57 | break 58 | -------------------------------------------------------------------------------- /examples/04-search/01-search.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search 11 | from pattern.en import parsetree 12 | 13 | # The pattern.search module contains a number of pattern matching tools 14 | # to search a string syntactically (word function) or semantically (word meaning). 15 | # If you only need to match string characters, regular expressions are faster. 16 | # However, if you are scanning a sentence for concept types (e.g. all flowers) 17 | # or parts-of-speech (e.g. all adjectives), this module provides the functionality. 18 | 19 | # In the simplest case, the search() function 20 | # takes a word (or a sequence of words) that you want to retrieve: 21 | print(search("rabbit", "big white rabbit")) 22 | print("") 23 | 24 | # Search words can contain wildcard characters: 25 | print(search("rabbit*", "big white rabbit")) 26 | print(search("rabbit*", "big white rabbits")) 27 | print("") 28 | 29 | # Search words can contain different options: 30 | print(search("rabbit|cony|bunny", "big black bunny")) 31 | print("") 32 | 33 | # Things become more interesting if we involve the pattern.en.parser module. 34 | # The parser takes a string, identifies words, and assigns a part-of-speech tag 35 | # to each word, for example NN (noun) or JJ (adjective). 36 | # A parsed sentence can be scanned for part-of-speech tags: 37 | s = parsetree("big white rabbit") 38 | print(search("JJ", s)) # all adjectives 39 | print(search("NN", s)) # all nouns 40 | print(search("NP", s)) # all noun phrases 41 | print("") 42 | 43 | # Since the search() is case-insensitive, uppercase search words 44 | # are always considered to be tags (or taxonomy terms - see further examples). 45 | 46 | # The return value is a Match object, 47 | # where Match.words is a list of Word objects that matched: 48 | m = search("NP", s) 49 | for word in m[0].words: 50 | print(word.string, word.tag) 51 | -------------------------------------------------------------------------------- /examples/01-web/03-bing.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Bing, asynchronous, plaintext 11 | from pattern.web import SEARCH, IMAGE, NEWS 12 | 13 | import time 14 | 15 | # This example retrieves results from Bing based on a given query. 16 | # The Bing search engine can retrieve up to a 1000 results (10x100) for a query. 17 | 18 | # Bing's "Custom Search API" is a paid service. 19 | # The pattern.web module uses a test account by default, 20 | # with 5000 free queries per month shared by all Pattern users. 21 | # If this limit is exceeded, SearchEngineLimitError is raised. 22 | # You should obtain your own license key at: 23 | # https://datamarket.azure.com/account/ 24 | engine = Bing(license=None, language="en") 25 | 26 | # Quote a query to match it exactly: 27 | q = "\"is more important than\"" 28 | 29 | # When you execute a query, 30 | # the script will halt until all results are downloaded. 31 | # In apps with an infinite main loop (e.g., GUI, game), 32 | # it is often more useful if the app keeps on running 33 | # while the search is executed in the background. 34 | # This can be achieved with the asynchronous() function. 35 | # It takes any function and that function's arguments and keyword arguments: 36 | request = asynchronous(engine.search, q, start=1, count=100, type=SEARCH, timeout=10) 37 | 38 | # This while-loop simulates an infinite application loop. 39 | # In real-life you would have an app.update() or similar 40 | # in which you can check request.done every now and then. 41 | while not request.done: 42 | time.sleep(0.1) 43 | print(".") 44 | 45 | print("") 46 | print("") 47 | 48 | # An error occured in engine.search(), raise it. 49 | if request.error: 50 | raise request.error 51 | 52 | # Retrieve the list of search results. 53 | for result in request.value: 54 | print(result.text) 55 | print(result.url) 56 | print("") 57 | -------------------------------------------------------------------------------- /examples/07-canvas/08-widget.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | basics (2) 5 | 6 | 7 | 31 | 32 | 33 | 61 | 62 | -------------------------------------------------------------------------------- /examples/04-search/08-group.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import match 11 | from pattern.en import parsetree 12 | 13 | # This example demonstrates how to create match groups. 14 | # A match group is a number of consecutive constraints, 15 | # for which matching words can easily be retrieved from a Match object. 16 | 17 | # Suppose we are looking for adjectives preceding nouns. 18 | # A simple pattern is: "JJ?+ NN", 19 | # which matches nouns preceded by any number of adjectives. 20 | # Since the number of nouns can be 0, 1 or 23 it is not so easy 21 | # to fetch the adjectives from a Match. This can be achieved with a group: 22 | 23 | s = "The big black cat" 24 | t = parsetree(s) 25 | print(match("{JJ?+} NN", t).group(1)) 26 | print("") 27 | 28 | # Note the { } wrapper, indicating a group. 29 | # The group can be retrieved from the match as a list of words. 30 | 31 | # Suppose we are looking for prepositional noun phrases, 32 | # e.g., on the mat, with a fork, under the hood, etc... 33 | # The preposition is always one word (on, with, under), 34 | # but the actual noun phrase can have many words (a shiny silver fork), 35 | # so it is a hassle to retrieve it from the match. 36 | 37 | # Normally, we would do it like this: 38 | 39 | s = "The big black cat sat on the mat." 40 | t = parsetree(s) 41 | m = match("NP VP PP NP", t) 42 | for w in m: 43 | if m.constraint(w).index == 2: 44 | print("This is the PP: %s" % w) 45 | if m.constraint(w).index == 3: 46 | print("This is the NP: %s" % w) 47 | 48 | # In other words, iterate over each word in the match, 49 | # checking which constraint it matched and filtering out what we need. 50 | 51 | # It is easier with a group: 52 | 53 | m = match("NP VP {PP} {NP}", t) 54 | print("") 55 | print("This is the PP: %s" % m.group(1)) 56 | print("This is the NP: %s" % m.group(2)) 57 | print("") 58 | 59 | # Match.group(0) refers to the full search pattern: 60 | print(m.group(0)) 61 | -------------------------------------------------------------------------------- /examples/07-canvas/04-path.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | paths 5 | 6 | 7 | 8 | 9 | 52 | 53 | -------------------------------------------------------------------------------- /examples/04-search/07-exclude.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import match 11 | from pattern.en import Sentence, parse 12 | 13 | # This example demonstrates how to exclude certain words or tags from a constraint. 14 | # It also demonstrates the use of "^", 15 | # for a constraint that can only match the first word. 16 | 17 | # We'll use a naive imperative() function as a demonstration. 18 | # Sentences can have different moods: indicative, conditional, imperative, subjunctive. 19 | # The imperative mood is used to give orders, instructions, warnings: 20 | # - "Do your homework!", 21 | # - "You will eat your dinner!". 22 | # It is marked by an infinitive verb, without a "to" preceding it. 23 | # It does not use modal verbs such as "could" and "would": 24 | # "You could eat your dinner!" is not a command but a bubbly suggestion. 25 | 26 | # We can create a pattern that scans for infinitive verbs (VB), 27 | # and use "!" to exclude certain words: 28 | # "!could|!would|!should|!to+ VB" = infinitive not preceded by modal or "to". 29 | # This works fine except in one case: if the sentence starts with a verb. 30 | # So we need a second rule "^VB" to catch this. 31 | # Note that the example below contains a third rule: "^do|VB*". 32 | # This catches all sentences that start with a "do" verb regardless if it is infinitive, 33 | # because the parses sometimes tags infinitive "do" incorrectly. 34 | 35 | 36 | def imperative(sentence): 37 | for p in ("!could|!would|!should|!to+ VB", "^VB", "^do|VB*"): 38 | m = match(p, sentence) 39 | if match(p, sentence) and sentence.string.endswith((".", "!")): # Exclude questions. 40 | return True 41 | return False 42 | 43 | for s in ( 44 | "Just stop it!", 45 | "Look out!", 46 | "Do your homework!", 47 | "You should do your homework.", 48 | "Could you stop it.", 49 | "To be, or not to be."): 50 | s = parse(s) 51 | s = Sentence(s) 52 | print(s) 53 | print(imperative(s)) 54 | print("") 55 | -------------------------------------------------------------------------------- /examples/01-web/01-google.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.web import Google, plaintext 12 | from pattern.web import SEARCH 13 | 14 | # The pattern.web module has a SearchEngine class, 15 | # with a SearchEngine.search() method that yields a list of Result objects. 16 | # Each Result has url, title, text, language, author and date and properties. 17 | # Subclasses of SearchEngine include: 18 | # Google, Bing, Yahoo, Twitter, Facebook, Wikipedia, Wiktionary, Flickr, ... 19 | 20 | # This example retrieves results from Google based on a given query. 21 | # The Google search engine can handle SEARCH type searches. 22 | # Other search engines may also handle IMAGE, NEWS, ... 23 | 24 | # Google's "Custom Search API" is a paid service. 25 | # The pattern.web module uses a test account by default, 26 | # with a 100 free queries per day shared by all Pattern users. 27 | # If this limit is exceeded, SearchEngineLimitError is raised. 28 | # You should obtain your own license key at: 29 | # https://code.google.com/apis/console/ 30 | # Activate "Custom Search API" under "Services" and get the key under "API Access". 31 | # Then use Google(license=[YOUR_KEY]).search(). 32 | # This will give you 100 personal free queries, or 5$ per 1000 queries. 33 | engine = Google(license=None, language="en") 34 | 35 | # Veale & Hao's method for finding similes using wildcards (*): 36 | # http://afflatus.ucd.ie/Papers/LearningFigurative_CogSci07.pdf 37 | # This will match results such as: 38 | # - "as light as a feather", 39 | # - "as cute as a cupcake", 40 | # - "as drunk as a lord", 41 | # - "as snug as a bug", etc. 42 | q = "as * as a *" 43 | 44 | # Google is very fast but you can only get up to 100 (10x10) results per query. 45 | for i in range(1, 2): 46 | for result in engine.search(q, start=i, count=10, type=SEARCH, cached=True): 47 | print(plaintext(result.text)) # plaintext() removes all HTML formatting. 48 | print(result.url) 49 | print(result.date) 50 | print("") 51 | -------------------------------------------------------------------------------- /examples/03-en/08-topmine_ngrammer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | import codecs 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | import pattern.text as text_module 12 | from pattern.text.en.wordlist import STOPWORDS 13 | 14 | paths = [] 15 | for f in os.listdir('./texts'): 16 | paths.append('./texts/' + f) 17 | 18 | texts = [] 19 | for p in paths: 20 | with codecs.open(p, "rb", encoding='latin-1') as f: 21 | if sys.version_info[0] < 3: 22 | texts.append(f.read()) 23 | else: 24 | texts.append(str(f.read())) 25 | 26 | ng = text_module.train_topmine_ngrammer(texts, threshhold=1, regexp="[^a-zA-Z0-9]") 27 | ngrams = text_module.topmine_ngramms(texts[0], ng, threshhold=1) 28 | 29 | 30 | 31 | print("\n") 32 | bigrams = [] 33 | trigrams = [] 34 | for key in ngrams.keys(): 35 | if len(key.split("_")) == 2: 36 | bigrams.append(key) 37 | elif len(key.split("_")) == 3: 38 | trigrams.append(key) 39 | 40 | print("Extracted {} bigrams:\n".format(len(bigrams))) 41 | print(bigrams) 42 | print("\n") 43 | 44 | print("Extracted {} trigrams:\n".format(len(trigrams))) 45 | print(trigrams) 46 | print("\n") 47 | 48 | 49 | # as we can see the extracted ngrams contain many stopwords, so, it's important to delete all 50 | # stopwords before applying the algorythm 51 | 52 | ng = text_module.train_topmine_ngrammer(texts, threshhold=1, regexp="[^a-zA-Z0-9]", stopwords=STOPWORDS) 53 | ngrams = text_module.topmine_ngramms(texts[0], ng, threshhold=1) 54 | 55 | 56 | # as we can see the extracted ngrams contain many stopwords, so, it's important to delete all 57 | # stopwords before applying the algorythm 58 | print("\n") 59 | bigrams = [] 60 | trigrams = [] 61 | for key in ngrams.keys(): 62 | if len(key.split("_")) == 2: 63 | bigrams.append(key) 64 | elif len(key.split("_")) == 3: 65 | trigrams.append(key) 66 | 67 | print("Extracted {} bigrams (removed stopwords):\n".format(len(bigrams))) 68 | print(bigrams) 69 | print("\n") 70 | 71 | print("Extracted {} trigrams (removed stopwords):\n".format(len(trigrams))) 72 | print(trigrams) 73 | print("\n") 74 | -------------------------------------------------------------------------------- /pattern/text/fr/fr-morphology.txt: -------------------------------------------------------------------------------- 1 | NN ient fhassuf 4 VB x 2 | NN ment fhassuf 4 RB x 3 | NN rait fhassuf 4 VB x 4 | NN rent fhassuf 4 VB x 5 | NN ante fhassuf 4 JJ x 6 | NN sait fhassuf 4 VB x 7 | NN tait fhassuf 4 VB x 8 | NN ique fhassuf 4 JJ x 9 | NN able fhassuf 4 JJ x 10 | NN ntes fhassuf 4 JJ x 11 | NN nait fhassuf 4 VB x 12 | NN euse fhassuf 4 JJ x 13 | NN lait fhassuf 4 VB x 14 | NN ques fhassuf 4 JJ x 15 | NN sent fhassuf 4 VB x 16 | NN ants fhassuf 4 JJ x 17 | NN bles fhassuf 4 JJ x 18 | NN tent fhassuf 4 VB x 19 | NN rais fhassuf 4 VB x 20 | NN uses fhassuf 4 JJ x 21 | NN lent fhassuf 4 VB x 22 | NN nent fhassuf 4 VB x 23 | NN uait fhassuf 4 VB x 24 | NN tais fhassuf 4 VB x 25 | NN erai fhassuf 4 VB x 26 | NN dait fhassuf 4 VB x 27 | NN ller fhassuf 4 VB x 28 | NN sser fhassuf 4 VB x 29 | NN cher fhassuf 4 VB x 30 | NN lées fhassuf 4 VBN x 31 | NN ront fhassuf 4 VB x 32 | NN sées fhassuf 4 VBN x 33 | NN ales fhassuf 4 JJ x 34 | NN tées fhassuf 4 VBN x 35 | NN quer fhassuf 4 VB x 36 | NN iait fhassuf 4 VB x 37 | NN sais fhassuf 4 VB x 38 | NN rons fhassuf 4 VB x 39 | NN nner fhassuf 4 VB x 40 | NN iser fhassuf 4 VB x 41 | NN vait fhassuf 4 VB x 42 | NN eait fhassuf 4 VB x 43 | NN rées fhassuf 4 VBN x 44 | NN mait fhassuf 4 VB x 45 | NN hait fhassuf 4 VB x 46 | NN uent fhassuf 4 VB x 47 | NN ieux fhassuf 4 JJ x 48 | NN ible fhassuf 4 JJ x 49 | NN ndre fhassuf 4 VB x 50 | NN nais fhassuf 4 VB x 51 | NN onné fhassuf 4 VBN x 52 | NN tive fhassuf 4 JJ x 53 | NN illé fhassuf 4 VBN x 54 | NN ssée fhassuf 4 VBN x 55 | NN iner fhassuf 4 VB x 56 | NN nter fhassuf 4 VB x 57 | NN isse fhassuf 4 VB x 58 | NN vent fhassuf 4 VB x 59 | NN ssés fhassuf 4 VBN x 60 | NN iter fhassuf 4 VB x 61 | NN quée fhassuf 4 VBN x 62 | NN çait fhassuf 4 VB x 63 | NN llée fhassuf 4 VBN x 64 | NN érer fhassuf 4 VB x 65 | NN uler fhassuf 4 VB x 66 | NN uées fhassuf 4 VBN x 67 | NN ière fhassuf 4 JJ x 68 | NN isée fhassuf 4 VBN x 69 | NN yait fhassuf 4 VB x 70 | NN eras fhassuf 4 VB x 71 | NN ives fhassuf 4 JJ x 72 | NN reux fhassuf 4 JJ x 73 | NN chée fhassuf 4 VBN x 74 | NN qués fhassuf 4 VBN x 75 | NN lais fhassuf 4 VB x 76 | NN ntée fhassuf 4 VBN x 77 | NN chés fhassuf 4 VBN x 78 | JJ la fgoodleft NN x 79 | JJ le fgoodleft NN x 80 | JJ les fgoodleft NN x 81 | JJ un fgoodleft NN x 82 | JJ une fgoodleft NN x 83 | NN s fhassuf 1 NNS x -------------------------------------------------------------------------------- /docs/js/shBrushXml.js: -------------------------------------------------------------------------------- 1 | /** 2 | * SyntaxHighlighter 3 | * http://alexgorbatchev.com/SyntaxHighlighter 4 | * 5 | * SyntaxHighlighter is donationware. If you are using it, please donate. 6 | * http://alexgorbatchev.com/SyntaxHighlighter/donate.html 7 | * 8 | * @version 9 | * 3.0.83 (July 02 2010) 10 | * 11 | * @copyright 12 | * Copyright (C) 2004-2010 Alex Gorbatchev. 13 | * 14 | * @license 15 | * Dual licensed under the MIT and GPL licenses. 16 | */ 17 | ;(function() 18 | { 19 | // CommonJS 20 | typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null; 21 | 22 | function Brush() 23 | { 24 | function process(match, regexInfo) 25 | { 26 | var constructor = SyntaxHighlighter.Match, 27 | code = match[0], 28 | tag = new XRegExp('(<|<)[\\s\\/\\?]*(?[:\\w-\\.]+)', 'xg').exec(code), 29 | result = [] 30 | ; 31 | 32 | if (match.attributes != null) 33 | { 34 | var attributes, 35 | regex = new XRegExp('(? [\\w:\\-\\.]+)' + 36 | '\\s*=\\s*' + 37 | '(? ".*?"|\'.*?\'|\\w+)', 38 | 'xg'); 39 | 40 | while ((attributes = regex.exec(code)) != null) 41 | { 42 | result.push(new constructor(attributes.name, match.index + attributes.index, 'color1')); 43 | result.push(new constructor(attributes.value, match.index + attributes.index + attributes[0].indexOf(attributes.value), 'string')); 44 | } 45 | } 46 | 47 | if (tag != null) 48 | result.push( 49 | new constructor(tag.name, match.index + tag[0].indexOf(tag.name), 'keyword') 50 | ); 51 | 52 | return result; 53 | } 54 | 55 | this.regexList = [ 56 | { regex: new XRegExp('(\\<|<)\\!\\[[\\w\\s]*?\\[(.|\\s)*?\\]\\](\\>|>)', 'gm'), css: 'color2' }, // 57 | { regex: SyntaxHighlighter.regexLib.xmlComments, css: 'comments' }, // 58 | { regex: new XRegExp('(<|<)[\\s\\/\\?]*(\\w+)(?.*?)[\\s\\/\\?]*(>|>)', 'sg'), func: process } 59 | ]; 60 | }; 61 | 62 | Brush.prototype = new SyntaxHighlighter.Highlighter(); 63 | Brush.aliases = ['xml', 'xhtml', 'xslt', 'html']; 64 | 65 | SyntaxHighlighter.brushes.Xml = Brush; 66 | 67 | // CommonJS 68 | typeof(exports) != 'undefined' ? exports.Brush = Brush : null; 69 | })(); 70 | -------------------------------------------------------------------------------- /pattern/text/ru/wordlist/__init__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | VECTOR | WORDLIST ################################################################### 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import print_function 10 | from __future__ import unicode_literals 11 | from __future__ import division 12 | 13 | from builtins import str, bytes, dict, int 14 | from builtins import map, zip, filter 15 | from builtins import object, range 16 | 17 | import os 18 | from io import open 19 | 20 | try: 21 | MODULE = os.path.dirname(os.path.realpath(__file__)) 22 | except: 23 | MODULE = "" 24 | 25 | 26 | class Wordlist(object): 27 | 28 | def __init__(self, name, data=[]): 29 | """ Lazy read-only list of words. 30 | """ 31 | self._name = name 32 | self._data = data 33 | 34 | def _load(self): 35 | if not self._data: 36 | self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split("\n") 37 | 38 | def __repr__(self): 39 | self._load() 40 | return repr(self._data) 41 | 42 | def __iter__(self): 43 | self._load() 44 | return iter(self._data) 45 | 46 | def __len__(self): 47 | self._load() 48 | return len(self._data) 49 | 50 | def __contains__(self, w): 51 | self._load() 52 | return w in self._data 53 | 54 | def __add__(self, iterable): 55 | self._load() 56 | return Wordlist(None, data=sorted(self._data + list(iterable))) 57 | 58 | def __getitem__(self, i): 59 | self._load() 60 | return self._data[i] 61 | 62 | def __setitem__(self, i, v): 63 | self._load() 64 | self._data[i] = v 65 | 66 | def insert(self, i, v): 67 | self._load() 68 | self._data.insert(i, v) 69 | 70 | def append(self, v): 71 | self._load() 72 | self._data.append(v) 73 | 74 | def extend(self, v): 75 | self._load() 76 | self._data.extend(v) 77 | 78 | STOPWORDS = Wordlist("stopwords") # Russian stop words 79 | -------------------------------------------------------------------------------- /pattern/vector/stopwords-es.txt: -------------------------------------------------------------------------------- 1 | a, al, algo, algunas, algunos, ante, antes, como, con, contra, cual, cuando, de, del, desde, donde, durante, e, el, ella, ellas, ellos, en, entre, era, erais, eran, eras, eres, es, esa, esas, ese, eso, esos, esta, estaba, estabais, estaban, estabas, estad, estada, estadas, estado, estados, estamos, estando, estar, estaremos, estará, estarán, estarás, estaré, estaréis, estaría, estaríais, estaríamos, estarían, estarías, estas, este, estemos, esto, estos, estoy, estuve, estuviera, estuvierais, estuvieran, estuvieras, estuvieron, estuviese, estuvieseis, estuviesen, estuvieses, estuvimos, estuviste, estuvisteis, estuviéramos, estuviésemos, estuvo, está, estábamos, estáis, están, estás, esté, estéis, estén, estés, fue, fuera, fuerais, fueran, fueras, fueron, fuese, fueseis, fuesen, fueses, fui, fuimos, fuiste, fuisteis, fuéramos, fuésemos, ha, habida, habidas, habido, habidos, habiendo, habremos, habrá, habrán, habrás, habré, habréis, habría, habríais, habríamos, habrían, habrías, habéis, había, habíais, habíamos, habían, habías, han, has, hasta, hay, haya, hayamos, hayan, hayas, hayáis, he, hemos, hube, hubiera, hubierais, hubieran, hubieras, hubieron, hubiese, hubieseis, hubiesen, hubieses, hubimos, hubiste, hubisteis, hubiéramos, hubiésemos, hubo, la, las, le, les, lo, los, me, mi, mis, mucho, muchos, muy, más, mí, mía, mías, mío, míos, nada, ni, no, nos, nosotras, nosotros, nuestra, nuestras, nuestro, nuestros, o, os, otra, otras, otro, otros, para, pero, poco, por, porque, que, quien, quienes, qué, se, sea, seamos, sean, seas, seremos, será, serán, serás, seré, seréis, sería, seríais, seríamos, serían, serías, seáis, sido, siendo, sin, sobre, sois, somos, son, soy, su, sus, suya, suyas, suyo, suyos, sí, también, tanto, te, tendremos, tendrá, tendrán, tendrás, tendré, tendréis, tendría, tendríais, tendríamos, tendrían, tendrías, tened, tenemos, tenga, tengamos, tengan, tengas, tengo, tengáis, tenida, tenidas, tenido, tenidos, teniendo, tenéis, tenía, teníais, teníamos, tenían, tenías, ti, tiene, tienen, tienes, todo, todos, tu, tus, tuve, tuviera, tuvierais, tuvieran, tuvieras, tuvieron, tuviese, tuvieseis, tuviesen, tuvieses, tuvimos, tuviste, tuvisteis, tuviéramos, tuviésemos, tuvo, tuya, tuyas, tuyo, tuyos, tú, un, una, uno, unos, vosotras, vosotros, vuestra, vuestras, vuestro, vuestros, y, ya, yo, él, éramos -------------------------------------------------------------------------------- /examples/07-canvas/data-url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | canvas.js | data url 6 | 7 | 35 | 38 | 39 | 40 |

For security reasons, Canvas does not allow you to retrieve the Pixels of remote images (i.e., from a URL not on your own server). Neither can you export a canvas rendering such images (no Canvas.image() or Canvas.save()). During testing, you may opt to include (small) images in your script as data URLs. This script can be used to generate the data URL of an image, provided that your browser supports FileReader (it ).

41 | 42 |
43 | 45 |
46 | convert to canvas.js code block 47 | 48 | -------------------------------------------------------------------------------- /examples/02-db/02-datasheet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.db import Datasheet, INTEGER, STRING 11 | from pattern.db import uid, pprint 12 | 13 | # The main purpose of the pattern module is to facilitate automated processes 14 | # for (text) data acquisition and (linguistical) data mining. 15 | # Often, this involves a tangle of messy text files and custom formats to store the data. 16 | # The Datasheet class offers a useful matrix (cfr. MS Excel) in Python code. 17 | # It can be saved as a CSV text file that is both human/machine readable. 18 | # See also: examples/01-web/03-twitter.py 19 | 20 | # A Datasheet can have headers: a (name, type)-tuple for each column. 21 | # In this case, imported columns will automatically map values to the defined type. 22 | # Supported values that are imported and exported correctly: 23 | # str, unicode, int, float, bool, Date, None 24 | # For other data types, custom encoder and decoder functions can be used. 25 | 26 | ds = Datasheet(rows=[ 27 | [uid(), "broccoli", "vegetable"], 28 | [uid(), "turnip", "vegetable"], 29 | [uid(), "asparagus", "vegetable"], 30 | [uid(), "banana", "fruit"], 31 | ], fields=[ 32 | ("id", INTEGER), # Define the column headers. 33 | ("name", STRING), 34 | ("type", STRING) 35 | ]) 36 | 37 | print(ds.rows[0]) # A list of rows. 38 | print(ds.columns[1]) # A list of columns, where each column is a list of values. 39 | print(ds.name) 40 | print("") 41 | 42 | # Columns can be manipulated directly like any other Python list. 43 | # This can be slow for large tables. If you need a fast way to do matrix math, 44 | # use numpy (http://numpy.scipy.org/) instead. 45 | # The purpose of Table is data storage. 46 | ds.columns.append([ 47 | "green", 48 | "purple", 49 | "white", 50 | "yellow" 51 | ], field=("color", STRING)) 52 | 53 | # Save as a comma-separated (unicode) text file. 54 | ds.save("food.txt", headers=True) 55 | 56 | # Load a table from file. 57 | ds = Datasheet.load("food.txt", headers=True) 58 | 59 | pprint(ds, truncate=50, padding=" ", fill=".") 60 | print("") 61 | print(ds.fields) 62 | -------------------------------------------------------------------------------- /examples/03-en/03-parse.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.en import parse, pprint, tag 11 | 12 | # The en module contains a fast regular expressions-based parser. 13 | # A parser identifies words in a sentence, word part-of-speech tags (e.g. noun, verb) 14 | # and groups of words that belong together (e.g. noun phrases). 15 | # Common part-of-speech tags: NN (noun), VB (verb), JJ (adjective), PP (preposition). 16 | # A tag can have a suffix, for example NNS (plural noun) or VBG (gerund verb). 17 | # Overview of tags: http://www.clips.ua.ac.be/pages/mbsp-tags 18 | s = "I eat pizza with a fork." 19 | s = parse(s, 20 | tokenize = True, # Tokenize the input, i.e. split punctuation from words. 21 | tags = True, # Find part-of-speech tags. 22 | chunks = True, # Find chunk tags, e.g. "the black cat" = NP = noun phrase. 23 | relations = True, # Find relations between chunks. 24 | lemmata = True, # Find word lemmata. 25 | light = False) 26 | 27 | # The light parameter determines how unknown words are handled. 28 | # By default, unknown words are tagged NN and then improved with a set of rules. 29 | # light=False uses Brill's lexical and contextual rules, 30 | # light=True uses a set of custom rules that is less accurate but faster (5x-10x). 31 | 32 | # The output is a string with each sentence on a new line. 33 | # Words in a sentence have been annotated with tags, 34 | # for example: fork/NN/I-NP/I-PNP 35 | # NN = noun, NP = part of a noun phrase, PNP = part of a prepositional phrase. 36 | print(s) 37 | print("") 38 | 39 | # Prettier output can be obtained with the pprint() command: 40 | pprint(s) 41 | print("") 42 | 43 | # The string's split() method will (unless a split character is given), 44 | # split into a list of sentences, where each sentence is a list of words 45 | # and each word is a list with the word + its tags. 46 | print(s.split()) 47 | print("") 48 | 49 | # The tag() command returns a list of (word, POS-tag)-tuples. 50 | # With light=True, this is the fastest and simplest way to get an idea 51 | # of a sentence's constituents: 52 | s = "I eat pizza with a fork." 53 | s = tag(s) 54 | print(s) 55 | for word, tag in s: 56 | if tag == "NN": # Find all nouns in the input string. 57 | print(word) 58 | -------------------------------------------------------------------------------- /docs/js/shBrushJScript.js: -------------------------------------------------------------------------------- 1 | /** 2 | * SyntaxHighlighter 3 | * http://alexgorbatchev.com/SyntaxHighlighter 4 | * 5 | * SyntaxHighlighter is donationware. If you are using it, please donate. 6 | * http://alexgorbatchev.com/SyntaxHighlighter/donate.html 7 | * 8 | * @version 9 | * 3.0.83 (July 02 2010) 10 | * 11 | * @copyright 12 | * Copyright (C) 2004-2010 Alex Gorbatchev. 13 | * 14 | * @license 15 | * Dual licensed under the MIT and GPL licenses. 16 | */ 17 | ;(function() 18 | { 19 | // CommonJS 20 | typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null; 21 | 22 | function Brush() 23 | { 24 | var keywords1 = 'break case catch continue ' + 25 | 'default delete do else ' + 26 | 'for function if in instanceof ' + 27 | 'new return switch ' + 28 | 'throw try typeof var while with' 29 | ; 30 | 31 | var keywords2 = 'false true null super this'; 32 | 33 | var keywords3 = 'alert back blur close confirm focus forward home' + 34 | 'name navigate onblur onerror onfocus onload onmove' + 35 | 'onresize onunload open print prompt scroll status stop'; 36 | 37 | var r = SyntaxHighlighter.regexLib; 38 | 39 | this.regexList = [ 40 | { regex: r.multiLineDoubleQuotedString, css: 'string' }, // double quoted strings 41 | { regex: r.multiLineSingleQuotedString, css: 'string' }, // single quoted strings 42 | { regex: r.singleLineCComments, css: 'comments1' }, // one line comments 43 | { regex: r.multiLineCComments, css: 'comments2' }, // multiline comments 44 | { regex: /\s*#.*/gm, css: 'preprocessor' }, // preprocessor tags like #region and #endregion 45 | { regex: /function ([^\()]+)\(/g, func: function(match, r) { 46 | return [ 47 | new SyntaxHighlighter.Match("function ", match.index, "keyword1"), 48 | new SyntaxHighlighter.Match(match[1], match.index+9, "name") 49 | ]; } }, 50 | { regex: new RegExp(this.getKeywords(keywords1), 'gm'), css: 'keyword1' }, // keywords 1 51 | { regex: new RegExp(this.getKeywords(keywords2), 'gm'), css: 'keyword2' }, // keywords 2 52 | { regex: new RegExp(this.getKeywords(keywords3), 'gm'), css: 'keyword3' } // keywords 3 53 | ]; 54 | 55 | this.forHtmlScript(r.scriptScriptTags); 56 | }; 57 | 58 | Brush.prototype = new SyntaxHighlighter.Highlighter(); 59 | Brush.aliases = ['js', 'jscript', 'javascript']; 60 | 61 | SyntaxHighlighter.brushes.JScript = Brush; 62 | 63 | // CommonJS 64 | typeof(exports) != 'undefined' ? exports.Brush = Brush : null; 65 | })(); 66 | -------------------------------------------------------------------------------- /examples/06-graph/02-export.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT 12 | from random import choice, random 13 | 14 | # This example demonstrates how a graph visualization can be exported to HTML, 15 | # using the HTML5 tag and Javascript. 16 | # All properties (e.g., stroke color) of nodes and edges are ported. 17 | 18 | g = Graph() 19 | # Random nodes. 20 | for i in range(50): 21 | g.add_node(id=str(i + 1), 22 | radius = 5, 23 | stroke = (0, 0, 0, 1), 24 | text = (0, 0, 0, 1)) 25 | # Random edges. 26 | for i in range(75): 27 | node1 = choice(g.nodes) 28 | node2 = choice(g.nodes) 29 | g.add_edge(node1, node2, 30 | length = 1.0, 31 | weight = random(), 32 | stroke = (0, 0, 0, 1)) 33 | 34 | for node in g.sorted()[:20]: 35 | # More blue = more important. 36 | node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight) 37 | 38 | g.prune(0) 39 | 40 | # This node's label is different from its id. 41 | # We'll make it a hyperlink, see the href attribute at the bottom. 42 | g["1"].text.string = "home" 43 | 44 | # The export() command generates a folder with an index.html, 45 | # that displays the graph using an interactive, force-based spring layout. 46 | # You can drag the nodes around - open index.html in a browser and try it out! 47 | # The layout can be tweaked in many ways: 48 | 49 | g.export(os.path.join(os.path.dirname(__file__), "test"), 50 | width = 700, # width. 51 | height = 500, # height. 52 | frames = 500, # Number of frames of animation. 53 | directed = True, # Visualize eigenvector centrality as an edge arrow? 54 | weighted = 0.5, # Visualize betweenness centrality as a node shadow? 55 | pack = True, # Keep clusters close together + visualize node weight as node radius? 56 | distance = 10, # Average edge length. 57 | k = 4.0, # Force constant. 58 | force = 0.01, # Force dampener. 59 | repulsion = 50, # Force radius. 60 | stylesheet = DEFAULT, # INLINE, DEFAULT, None or the path to your own stylesheet. 61 | javascript = None, 62 | href = {"1": "http://www.clips.ua.ac.be/pages/pattern-graph"}, # Node.id => URL 63 | css = {"1": "node-link-docs"} # Node.id => CSS class. 64 | ) 65 | -------------------------------------------------------------------------------- /examples/01-web/04-twitter.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.web import Twitter, hashtags 12 | from pattern.db import Datasheet, pprint, pd 13 | 14 | # This example retrieves tweets containing given keywords from Twitter. 15 | 16 | try: 17 | # We'll store tweets in a Datasheet. 18 | # A Datasheet is a table of rows and columns that can be exported as a CSV-file. 19 | # In the first column, we'll store a unique id for each tweet. 20 | # We only want to add the latest tweets, i.e., those we haven't seen yet. 21 | # With an index on the first column we can quickly check if an id already exists. 22 | # The pd() function returns the parent directory of this script + any given path. 23 | table = Datasheet.load(pd("cool.csv")) 24 | index = set(table.columns[0]) 25 | except: 26 | table = Datasheet() 27 | index = set() 28 | 29 | engine = Twitter(language="en") 30 | 31 | # With Twitter.search(cached=False), a "live" request is sent to Twitter: 32 | # we get the most recent results instead of those in the local cache. 33 | # Keeping a local cache can also be useful (e.g., while testing) 34 | # because a query is instant when it is executed the second time. 35 | prev = None 36 | for i in range(2): 37 | print(i) 38 | for tweet in engine.search("is cooler than", start=prev, count=25, cached=False): 39 | print("") 40 | print(tweet.text) 41 | print(tweet.author) 42 | print(tweet.date) 43 | print(hashtags(tweet.text)) # Keywords in tweets start with a "#". 44 | print("") 45 | # Only add the tweet to the table if it doesn't already exists. 46 | if len(table) == 0 or tweet.id not in index: 47 | table.append([tweet.id, tweet.text]) 48 | index.add(tweet.id) 49 | # Continue mining older tweets in next iteration. 50 | prev = tweet.id 51 | 52 | # Create a .csv in pattern/examples/01-web/ 53 | table.save(pd("cool.csv")) 54 | 55 | print("Total results: %s" % len(table)) 56 | print("") 57 | 58 | # Print all the rows in the table. 59 | # Since it is stored as a CSV-file it grows comfortably each time the script runs. 60 | # We can also open the table later on: in other scripts, for further analysis, ... 61 | 62 | pprint(table, truncate=100) 63 | 64 | # Note: you can also search tweets by author: 65 | # Twitter().search("from:tom_de_smedt") 66 | -------------------------------------------------------------------------------- /examples/07-canvas/03-transformation.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | transformations 5 | 6 | 7 | 8 | 9 | 60 | 61 | -------------------------------------------------------------------------------- /examples/03-en/06-wordnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.en import wordnet 11 | from pattern.en import NOUN, VERB 12 | 13 | # WordNet is a lexical database for the English language. 14 | # It groups English words into sets of synonyms called synsets, provides short, general definitions, 15 | # and records the various semantic relations between these synonym sets. 16 | 17 | # For a given word, WordNet yields a list of synsets that 18 | # represent different "senses" in which the word can be understood. 19 | for synset in wordnet.synsets("train", pos=NOUN): 20 | print("Description: %s" % synset.gloss) # Definition string. 21 | print(" Synonyms: %s" % synset.senses) # List of synonyms in this sense. 22 | print(" Hypernym: %s" % synset.hypernym) # Synset one step higher in the semantic network. 23 | print(" Hyponyms: %s" % synset.hyponyms()) # List of synsets that are more specific. 24 | print(" Holonyms: %s" % synset.holonyms()) # List of synsets of which this synset is part/member. 25 | print(" Meronyms: %s" % synset.meronyms()) # List of synsets that are part/member of this synset. 26 | print("") 27 | 28 | # What is the common ancestor (hypernym) of "cat" and "dog"? 29 | a = wordnet.synsets("cat")[0] 30 | b = wordnet.synsets("dog")[0] 31 | print("Common ancestor: %s" % wordnet.ancestor(a, b)) 32 | print("") 33 | 34 | # Synset.hypernyms(recursive=True) returns all parents of the synset, 35 | # Synset.hyponyms(recursive=True) returns all children, 36 | # optionally up to a given depth. 37 | # What kind of animal nouns are also verbs? 38 | synset = wordnet.synsets("animal")[0] 39 | for s in synset.hyponyms(recursive=True, depth=2): 40 | for word in s.senses: 41 | if word in wordnet.VERBS(): 42 | print("%s => %s" % (word, wordnet.synsets(word, pos=VERB))) 43 | 44 | # Synset.similarity() returns an estimate of the semantic similarity to another synset, 45 | # based on Lin's semantic distance measure and Resnik Information Content. 46 | # Lower values indicate higher similarity. 47 | a = wordnet.synsets("cat")[0] # river, bicycle 48 | s = [] 49 | for word in ["poodle", "cat", "boat", "carrot", "rocket", 50 | "spaghetti", "idea", "grass", "education", 51 | "lake", "school", "balloon", "lion"]: 52 | b = wordnet.synsets(word)[0] 53 | s.append((a.similarity(b), word)) 54 | print("") 55 | print("Similarity to %s: %s" % (a.senses[0], sorted(s))) 56 | print("") 57 | -------------------------------------------------------------------------------- /pattern/text/en/wordlist/__init__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | VECTOR | WORDLIST ################################################################### 2 | # Copyright (c) 2010 University of Antwerp, Belgium 3 | # Author: Tom De Smedt 4 | # License: BSD (see LICENSE.txt for details). 5 | # http://www.clips.ua.ac.be/pages/pattern 6 | 7 | #################################################################################################### 8 | 9 | from __future__ import print_function 10 | from __future__ import unicode_literals 11 | from __future__ import division 12 | 13 | from builtins import str, bytes, dict, int 14 | from builtins import map, zip, filter 15 | from builtins import object, range 16 | 17 | import os 18 | from io import open 19 | 20 | try: 21 | MODULE = os.path.dirname(os.path.realpath(__file__)) 22 | except: 23 | MODULE = "" 24 | 25 | 26 | class Wordlist(object): 27 | 28 | def __init__(self, name, data=[]): 29 | """ Lazy read-only list of words. 30 | """ 31 | self._name = name 32 | self._data = data 33 | 34 | def _load(self): 35 | if not self._data: 36 | self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split(", ") 37 | 38 | def __repr__(self): 39 | self._load() 40 | return repr(self._data) 41 | 42 | def __iter__(self): 43 | self._load() 44 | return iter(self._data) 45 | 46 | def __len__(self): 47 | self._load() 48 | return len(self._data) 49 | 50 | def __contains__(self, w): 51 | self._load() 52 | return w in self._data 53 | 54 | def __add__(self, iterable): 55 | self._load() 56 | return Wordlist(None, data=sorted(self._data + list(iterable))) 57 | 58 | def __getitem__(self, i): 59 | self._load() 60 | return self._data[i] 61 | 62 | def __setitem__(self, i, v): 63 | self._load() 64 | self._data[i] = v 65 | 66 | def insert(self, i, v): 67 | self._load() 68 | self._data.insert(i, v) 69 | 70 | def append(self, v): 71 | self._load() 72 | self._data.append(v) 73 | 74 | def extend(self, v): 75 | self._load() 76 | self._data.extend(v) 77 | 78 | ACADEMIC = Wordlist("academic") # English academic words. 79 | BASIC = Wordlist("basic") # English basic words (850) that express 90% of concepts. 80 | PROFANITY = Wordlist("profanity") # English swear words. 81 | TIME = Wordlist("time") # English time and date words. 82 | STOPWORDS = Wordlist("stopwords") # English stop words ("a", "the", ...). 83 | 84 | # Note: if used for lookups, performance can be increased by using a dict: 85 | # blacklist = dict.fromkeys(PROFANITY+TIME, True) 86 | # for i in range(1000): 87 | # corpus.append(Document(src[i], exclude=blacklist)) 88 | -------------------------------------------------------------------------------- /examples/06-graph/06-commonsense.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.graph.commonsense import Commonsense 11 | 12 | # A semantic network is a graph in which each node represents a concept 13 | # (e.g., flower, red, rose) and each edge represents a relation between 14 | # concepts, for example rose is-a flower, red is-property-of rose. 15 | 16 | # Module pattern.graph.commonsense implements a semantic network of commonsense. 17 | # It contains a Concept class (Node subclass), Relation class (Edge subclass), 18 | # and a Commonsense class (Graph subclass). 19 | # It contains about 10,000 manually annotated relations between mundane concepts, 20 | # for example gondola is-related-to romance, or spoon is-related-to soup. 21 | # This is the PERCEPTION dataset. See the visualizer at: 22 | # http://nodebox.net/perception/ 23 | 24 | # Relation.type can be: 25 | # - is-a, 26 | # - is-part-of, 27 | # - is-opposite-of, 28 | # - is-property-of, 29 | # - is-related-to, 30 | # - is-same-as, 31 | # - is-effect-of. 32 | 33 | g = Commonsense() 34 | g.add_node("spork") 35 | g.add_edge("spork", "spoon", type="is-a") 36 | 37 | # Concept.halo a list of concepts surrounding the given concept, 38 | # and as such reinforce its meaning: 39 | print() 40 | print(g["spoon"].halo) # fork, etiquette, slurp, hot, soup, mouth, etc. 41 | 42 | # Concept.properties is a list of properties (= adjectives) in the halo, 43 | # sorted by betweenness centrality: 44 | print() 45 | print(g["spoon"].properties) # hot 46 | 47 | 48 | # Commonsense.field() returns a list of concepts 49 | # that belong to the given class (or "semantic field"): 50 | print() 51 | print(g.field("color", depth=3, fringe=2)) # brown, orange, blue, ... 52 | #print g.field("person") # Leonard Nimoy, Al Capone, ... 53 | #print g.field("building") # opera house, supermarket, ... 54 | 55 | # Commonsense.similarity() calculates the similarity between two concepts, 56 | # based on common properties between both 57 | # (e.g., tigers and zebras are both striped). 58 | print() 59 | print(g.similarity("tiger", "zebra")) 60 | print(g.similarity("tiger", "amoeba")) 61 | 62 | # Commonsense.nearest_neighbors() compares the properties of a given concept 63 | # to a list of other concepts, and selects the concept from the list that 64 | # is most similar to the given concept. 65 | # This will take some time to calculate (thinking is hard). 66 | print() 67 | print("Creepy animals:") 68 | print(g.nearest_neighbors("creepy", g.field("animal"))[:10]) 69 | print() 70 | print("Party animals:") 71 | print(g.nearest_neighbors("party", g.field("animal"))[:10]) 72 | 73 | # Creepy animals are: owl, vulture, octopus, bat, raven, ... 74 | # Party animals are: puppy, grasshopper, reindeer, dog, ... 75 | -------------------------------------------------------------------------------- /pattern/__init__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN ####################################################################################### 2 | 3 | # Authors: Tom De Smedt , Walter Daelemans 4 | # License: BSD License, see LICENSE.txt 5 | 6 | #### BSD LICENSE ################################################################################### 7 | 8 | # Copyright (c) 2010 University of Antwerp, Belgium 9 | # All rights reserved. 10 | # 11 | # Redistribution and use in source and binary forms, with or without 12 | # modification, are permitted provided that the following conditions are met: 13 | # 14 | # * Redistributions of source code must retain the above copyright 15 | # notice, this list of conditions and the following disclaimer. 16 | # * Redistributions in binary form must reproduce the above copyright 17 | # notice, this list of conditions and the following disclaimer in 18 | # the documentation and/or other materials provided with the 19 | # distribution. 20 | # * Neither the name of Pattern nor the names of its 21 | # contributors may be used to endorse or promote products 22 | # derived from this software without specific prior written 23 | # permission. 24 | # 25 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 33 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 35 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 | # POSSIBILITY OF SUCH DAMAGE. 37 | # 38 | # CLiPS Computational Linguistics Group, University of Antwerp, Belgium 39 | # http://www.clips.ua.ac.be/pages/pattern 40 | 41 | from __future__ import unicode_literals 42 | 43 | ### CREDITS ######################################################################################## 44 | 45 | __author__ = "Tom De Smedt" 46 | __credits__ = "Tom De Smedt, Walter Daelemans" 47 | __version__ = "3.6" 48 | __copyright__ = "Copyright (c) 2010 University of Antwerp (BE)" 49 | __license__ = "BSD" 50 | 51 | #################################################################################################### 52 | 53 | import os 54 | 55 | # Shortcuts to pattern.en, pattern.es, ... 56 | # (instead of pattern.text.en, pattern.text.es, ...) 57 | try: 58 | __path__.append(os.path.join(__path__[0], "text")) 59 | except: 60 | pass 61 | -------------------------------------------------------------------------------- /examples/05-vector/05-nb.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | 5 | from builtins import str, bytes, dict, int 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join("..", "..")) 10 | 11 | from pattern.vector import Document, Model, NB 12 | from pattern.db import Datasheet 13 | 14 | # Naive Bayes is one of the oldest classifiers, 15 | # but is is still popular because it is fast for models 16 | # that have many documents and many features. 17 | # It is outperformed by KNN and SVM, but useful as a baseline for tests. 18 | 19 | # We'll test it with a corpus of spam e-mail messages, 20 | # included in the test suite, stored as a CSV-file. 21 | # The corpus contains mostly technical e-mail from developer mailing lists. 22 | data = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "spam-apache.csv") 23 | data = Datasheet.load(data) 24 | 25 | documents = [] 26 | for score, message in data: 27 | document = Document(message, type=int(score) > 0) 28 | documents.append(document) 29 | m = Model(documents) 30 | 31 | print("number of documents:", len(m)) 32 | print("number of words:", len(m.vector)) 33 | print("number of words (average):", sum(len(d.features) for d in m.documents) / float(len(m))) 34 | print() 35 | 36 | # Train Naive Bayes on all documents. 37 | # Each document has a type: True for actual e-mail, False for spam. 38 | # This results in a "binary" classifier that either answers True or False 39 | # for unknown documents. 40 | classifier = NB() 41 | for document in m: 42 | classifier.train(document) 43 | 44 | # We can now ask it questions about unknown e-mails: 45 | 46 | print(classifier.classify("win money")) # False: most likely spam. 47 | print(classifier.classify("fix bug")) # True: most likely a real message. 48 | print() 49 | 50 | print(classifier.classify("customer")) # False: people don't talk like this on developer lists... 51 | print(classifier.classify("guys")) # True: because most likely everyone knows everyone. 52 | print() 53 | 54 | # To test the accuracy of a classifier, 55 | # we typically use 10-fold cross validation. 56 | # This means that 10 individual tests are performed, 57 | # each with 90% of the corpus as training data and 10% as testing data. 58 | from pattern.vector import k_fold_cv 59 | print(k_fold_cv(NB, documents=m, folds=10)) 60 | 61 | # This yields 5 scores: (Accuracy, Precision, Recall, F-score, standard deviation). 62 | # Accuracy in itself is not very useful, 63 | # since some spam may have been regarded as real messages (false positives), 64 | # and some real messages may have been regarded as spam (false negatives). 65 | # Precision = how accurately false positives are discarded, 66 | # Recall = how accurately false negatives are discarded. 67 | # F-score = harmonic mean of precision and recall. 68 | # stdev = folds' variation from average F-score. 69 | -------------------------------------------------------------------------------- /examples/04-search/02-constraint.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search, Pattern, Constraint 11 | from pattern.en import parsetree, parse, Sentence 12 | 13 | # What we call a "search word" in example 01-search.py 14 | # is actually called a constraint, because it can contain different options. 15 | # Options are separated by "|". 16 | # The next search pattern retrieves words that are a noun OR an adjective: 17 | s = parsetree("big white rabbit") 18 | print(search("NN|JJ", s)) 19 | print("") 20 | 21 | # This pattern yields phrases containing an adjective followed by a noun. 22 | # Consecutive constraints are separated by a space: 23 | print(search("JJ NN", s)) 24 | print("") 25 | 26 | # Or a noun preceded by any number of adjectives: 27 | print(search("JJ?+ NN", s)) 28 | print("") 29 | 30 | # Note: NN marks singular nouns, NNS marks plural nouns. 31 | # If you want to include both, use "NN*" as a constraint. 32 | # This works for NN*, VB*, JJ*, RB*. 33 | 34 | s = parsetree("When I sleep the big white rabbit will stare at my feet.") 35 | m = search("rabbit stare at feet", s) 36 | print(s) 37 | print(m) 38 | print("") 39 | # Why does this work? 40 | # The word "will" is included in the result, even if the pattern does not define it. 41 | # The pattern should break when it does not encounter "stare" after "rabbit." 42 | # It works because "will stare" is one verb chunk. 43 | # The "stare" constraint matches the head word of the chunk ("stare"), 44 | # so "will stare" is considered an overspecified version of "stare". 45 | # The same happens with "my feet" and the "rabbit" constraint, 46 | # which matches the overspecified chunk "the big white rabbit". 47 | 48 | p = Pattern.fromstring("rabbit stare at feet", s) 49 | p.strict = True # Now it matches only what the pattern explicitly defines (=no match). 50 | m = p.search(s) 51 | print(m) 52 | print("") 53 | 54 | # Sentence chunks can be matched by tag (e.g. NP, VP, ADJP). 55 | # The pattern below matches anything from 56 | # "the rabbit gnaws at your fingers" to 57 | # "the white rabbit looks at the carrots": 58 | p = Pattern.fromstring("rabbit VP at NP", s) 59 | m = p.search(s) 60 | print(m) 61 | print("") 62 | 63 | if m: 64 | for w in m[0].words: 65 | print("%s\t=> %s" % (w, m[0].constraint(w))) 66 | 67 | print("") 68 | print("-------------------------------------------------------------") 69 | # Finally, constraints can also include regular expressions. 70 | # To include them we need to use the full syntax instead of the search() function: 71 | import re 72 | r = re.compile(r"[0-9|\.]+") # all numbers 73 | p = Pattern() 74 | p.sequence.append(Constraint(words=[r])) 75 | p.sequence.append(Constraint(tags=["NN*"])) 76 | 77 | s = Sentence(parse("I have 9.5 rabbits.")) 78 | print(s) 79 | print(p.search(s)) 80 | print("") 81 | -------------------------------------------------------------------------------- /examples/04-search/04-taxonomy.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.search import search, taxonomy, Classifier 11 | from pattern.en import parsetree 12 | 13 | # The search module includes a Taxonomy class 14 | # that can be used to define semantic word types. 15 | # For example, consider that you want to extract flower names from a text. 16 | # This would make search patterns somewhat unwieldy: 17 | # search("rose|lily|daisy|daffodil|begonia", txt). 18 | 19 | # A better approach is to use the taxonomy: 20 | for flower in ("rose", "lily", "daisy", "daffodil", "begonia"): 21 | taxonomy.append(flower, type="flower") 22 | 23 | print(taxonomy.children("flower")) 24 | print(taxonomy.parents("rose")) 25 | print(taxonomy.classify("rose")) # Yields the most recently added parent. 26 | print("") 27 | 28 | # Taxonomy terms can be included in a pattern by using uppercase: 29 | t = parsetree("A field of white daffodils.", lemmata=True) 30 | m = search("FLOWER", t) 31 | print(t) 32 | print(m) 33 | print("") 34 | 35 | # Another example: 36 | taxonomy.append("chicken", type="food") 37 | taxonomy.append("chicken", type="bird") 38 | taxonomy.append("penguin", type="bird") 39 | taxonomy.append("bird", type="animal") 40 | print(taxonomy.parents("chicken")) 41 | print(taxonomy.children("animal", recursive=True)) 42 | print(search("FOOD", "I'm eating chicken.")) 43 | print("") 44 | 45 | # The advantage is that the taxonomy can hold an entire hierarchy. 46 | # For example, "flower" could be classified as "organism". 47 | # Other organisms could be defined as well (insects, trees, mammals, ...) 48 | # The ORGANISM constraint then matches everything that is an organism. 49 | 50 | # A taxonomy entry can also be a proper name containing spaces 51 | # (e.g. "windows vista", case insensitive). 52 | # It will be detected as long as it is contained in a single chunk: 53 | taxonomy.append("windows vista", type="operating system") 54 | taxonomy.append("ubuntu", type="operating system") 55 | 56 | t = parsetree("Which do you like more, Windows Vista, or Ubuntu?") 57 | m = search("OPERATING_SYSTEM", t) 58 | print(t) 59 | print(m) 60 | print(m[0].constituents()) 61 | print("") 62 | 63 | # Taxonomy entries cannot have wildcards (*), 64 | # but you can use a classifier to simulate this. 65 | # Classifiers are quite slow but useful in many ways. 66 | # For example, a classifier could be written to dynamically 67 | # retrieve word categories from WordNet. 68 | 69 | 70 | def find_parents(word): 71 | if word.startswith(("mac os", "windows", "ubuntu")): 72 | return ["operating system"] 73 | c = Classifier(parents=find_parents) 74 | taxonomy.classifiers.append(c) 75 | 76 | t = parsetree("I like Mac OS X 10.5 better than Windows XP or Ubuntu.") 77 | m = search("OPERATING_SYSTEM", t) 78 | print(t) 79 | print(m) 80 | print(m[0].constituents()) 81 | print(m[1].constituents()) 82 | print("") 83 | -------------------------------------------------------------------------------- /docs/js/shBrushPython.js: -------------------------------------------------------------------------------- 1 | /** 2 | * SyntaxHighlighter 3 | * http://alexgorbatchev.com/SyntaxHighlighter 4 | * 5 | * SyntaxHighlighter is donationware. If you are using it, please donate. 6 | * http://alexgorbatchev.com/SyntaxHighlighter/donate.html 7 | * 8 | * @version 9 | * 3.0.83 (July 02 2010) 10 | * 11 | * @copyright 12 | * Copyright (C) 2004-2010 Alex Gorbatchev. 13 | * 14 | * @license 15 | * Dual licensed under the MIT and GPL licenses. 16 | */ 17 | ;(function() 18 | { 19 | // CommonJS 20 | typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null; 21 | 22 | function Brush() 23 | { 24 | // Contributed by Gheorghe Milas and Ahmad Sherif 25 | 26 | var keywords = 'and assert break class continue def del elif else ' + 27 | 'except exec finally for from global if import in is ' + 28 | 'lambda not or pass print raise return try yield while'; 29 | 30 | var funcs = '__import__ abs all any apply basestring bin bool buffer callable ' + 31 | 'chr classmethod cmp coerce compile complex delattr dict dir ' + 32 | 'divmod enumerate eval execfile file filter float format frozenset ' + 33 | 'getattr globals hasattr hash help hex id input int intern ' + 34 | 'isinstance issubclass iter len list locals long map max min next ' + 35 | 'object oct open ord pow property range raw_input reduce ' + 36 | 'reload repr reversed round set setattr slice sorted staticmethod ' + 37 | 'str sum super tuple type type unichr unicode vars xrange zip'; 38 | 39 | var special = 'None True False self cls class_'; 40 | 41 | this.regexList = [ 42 | { regex: SyntaxHighlighter.regexLib.singleLinePerlComments, css: 'comments1' }, 43 | { regex: /^\s*@\w+/gm, css: 'decorator' }, 44 | { regex: /(['\"]{3})([^\1])*?\1/gm, css: 'comments2' }, 45 | { regex: /"(?!")(?:\.|\\\"|[^\""\n])*"/gm, css: 'string' }, 46 | { regex: /'(?!')(?:\.|(\\\')|[^\''\n])*'/gm, css: 'string' }, 47 | { regex: /\b\d+\.?\w*/g, css: 'value' }, 48 | { regex: /def ([^\()]+)\(/g, func: function(match, r) { 49 | return [ 50 | new SyntaxHighlighter.Match("def ", match.index, "keyword"), 51 | new SyntaxHighlighter.Match(match[1], match.index+4, "name") 52 | ]; } }, 53 | { regex: /class ([0-9a-zA-Z]+)(\(|:)/g, func: function(match, r) { 54 | return [ 55 | new SyntaxHighlighter.Match("class ", match.index, "keyword"), 56 | new SyntaxHighlighter.Match(match[1], match.index+6, "name") 57 | ]; } }, 58 | { regex: new RegExp(this.getKeywords(funcs), 'gmi'), css: 'functions' }, 59 | { regex: new RegExp(this.getKeywords(keywords), 'gm'), css: 'keyword' }, 60 | { regex: new RegExp(this.getKeywords(special), 'gm'), css: 'color1' } 61 | ]; 62 | 63 | this.forHtmlScript(SyntaxHighlighter.regexLib.aspScriptTags); 64 | }; 65 | 66 | Brush.prototype = new SyntaxHighlighter.Highlighter(); 67 | Brush.aliases = ['py', 'python']; 68 | 69 | SyntaxHighlighter.brushes.Python = Brush; 70 | 71 | // CommonJS 72 | typeof(exports) != 'undefined' ? exports.Brush = Brush : null; 73 | })(); 74 | -------------------------------------------------------------------------------- /examples/01-web/08-wiktionary.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Wiktionary, DOM 11 | from pattern.db import csv, pd 12 | 13 | # This example retrieves male and female given names from Wiktionary (http://en.wiktionary.org). 14 | # It then trains a classifier that can predict the gender of unknown names (about 78% correct). 15 | # The classifier is small (80KB) and fast. 16 | 17 | w = Wiktionary(language="en") 18 | f = csv() # csv() is a short alias for Datasheet(). 19 | 20 | # Collect male and female given names from Wiktionary. 21 | # Store the data as (name, gender)-rows in a CSV-file. 22 | # The pd() function returns the parent directory of the current script, 23 | # so pd("given-names.csv") = pattern/examples/01-web/given-names.csv. 24 | 25 | for gender in ("male", "female"): 26 | for ch in ("abcdefghijklmnopqrstuvwxyz"): 27 | p = w.search("Appendix:%s_given_names/%s" % (gender.capitalize(), ch.capitalize()), cached=True) 28 | for name in p.links: 29 | if not name.startswith("Appendix:"): 30 | f.append((name, gender[0])) 31 | f.save(pd("given-names.csv")) 32 | print(ch, gender) 33 | 34 | # Create a classifier that predicts gender based on name. 35 | 36 | from pattern.vector import SVM, chngrams, count, kfoldcv 37 | 38 | 39 | class GenderByName(SVM): 40 | 41 | def train(self, name, gender=None): 42 | SVM.train(self, self.vector(name), gender) 43 | 44 | def classify(self, name): 45 | return SVM.classify(self, self.vector(name)) 46 | 47 | def vector(self, name): 48 | """ Returns a dictionary with character bigrams and suffix. 49 | For example, "Felix" => {"Fe":1, "el":1, "li":1, "ix":1, "ix$":1, 5:1} 50 | """ 51 | v = chngrams(name, n=2) 52 | v = count(v) 53 | v[name[-2:] + "$"] = 1 54 | v[len(name)] = 1 55 | return v 56 | 57 | data = csv(pd("given-names.csv")) 58 | 59 | # Test average (accuracy, precision, recall, F-score, standard deviation). 60 | 61 | print(kfoldcv(GenderByName, data, folds=3)) # (0.81, 0.79, 0.77, 0.78, 0.00) 62 | 63 | # Train and save the classifier in the current folder. 64 | # With final=True, discards the original training data (= smaller file). 65 | 66 | g = GenderByName(train=data) 67 | g.save(pd("gender-by-name.svm"), final=True) 68 | 69 | # Next time, we can simply load the trained classifier. 70 | # Keep in mind that the script that loads the classifier 71 | # must include the code for the GenderByName class description, 72 | # otherwise Python won't know how to load the data. 73 | 74 | g = GenderByName.load(pd("gender-by-name.svm")) 75 | 76 | for name in ( 77 | "Felix", 78 | "Felicia", 79 | "Rover", 80 | "Kitty", 81 | "Legolas", 82 | "Arwen", 83 | "Jabba", 84 | "Leia", 85 | "Flash", 86 | "Barbarella"): 87 | print(name, g.classify(name)) 88 | 89 | # In the example above, Arwen and Jabba are misclassified. 90 | # We can of course improve the classifier by hand: 91 | 92 | #g.train("Arwen", gender="f") 93 | #g.train("Jabba", gender="m") 94 | #g.save(pd("gender-by-name.svm"), final=True) 95 | #print(g.classify("Arwen")) 96 | #print(g.classify("Jabba")) 97 | -------------------------------------------------------------------------------- /pattern/text/en/wordlist/profanity.txt: -------------------------------------------------------------------------------- 1 | anus, arse, arsehole, ass, ass-hat, ass-jabber, ass-pirate, assbag, assbandit, assbanger, assbite, assclown, asscock, asscracker, asses, assface, assfuck, assfucker, assgoblin, asshat, asshead, asshole, asshopper, assjacker, asslick, asslicker, assmonkey, assmunch, assmuncher, assnigger, asspirate, assshit, assshole, asssucker, asswad, asswipe, balls, bampot, bastard, beaner, bint, bitch, bitchass, bitches, bitchtits, bitchy, bloody, blowjob, blowjob, bollocks, bollox, boner, brotherfucker, bugger, bullshit, bumblefuck, butt plug, butt-pirate, buttfucka, buttfucker, camel toe, carpetmuncher, chinc, chink, choad, chode, clit, clitface, clitfuck, clusterfuck, cock, cockass, cockbite, cockburger, cockface, cockfucker, cockhead, cockjockey, cockknoker, cockmaster, cockmongler, cockmongruel, cockmonkey, cockmuncher, cocknose, cocknugget, cockshit, cocksmith, cocksmoke, cocksmoker, cocksniffer, cocksucker, cockwaffle, coochie, coochy, coon, cooter, cracker, cum, cumbubble, cumdumpster, cumguzzler, cumjockey, cumslut, cumtart, cunnie, cunnilingus, cunt, cuntass, cuntface, cunthole, cuntlicker, cuntrag, cuntslut, dago, dammit, damn, dang, deggo, dick, dickbag, dickbeaters, dickface, dickfuck, dickfucker, dickhead, dickhole, dickjuice, dickmilk, dickmonger, dicks, dickslap, dicksucker, dicksucking, dickwad, dickweasel, dickweed, dickwod, dike, dildo, dipshit, doochbag, dookie, douche, douche-fag, douchebag, douchewaffle, dumass, dumb ass, dumbass, dumbfuck, dumbshit, dumshit, dyke, fag, fagbag, fagfucker, faggit, faggot, faggotcock, fagtard, fatass, fellatio, feltch, flamer, fool, frickin, friggin, f*ck, fuck, fuckass, fuckbag, fuckboy, fuckbrain, fuckbutt, fucked, fucker, fuckersucker, fuckface, fuckhead, fuckhole, fuckin, fucking, fucknut, fucknutt, fuckoff, fucks, fuckstick, fucktard, fucktart, fuckup, fuckwad, fuckwit, fuckwitt, fudgepacker, gay, gayass, gaybob, gaydo, gayfuck, gayfuckist, gaylord, gaytard, gaywad, goddamn, goddamnit, gooch, gook, gringo, guido, handjob, hard on, heeb, helminth, hell, ho, hoe, hoebag, homo, homodumbshit, honkey, humping, idiot, imbecile, jackass, jap, jerk off, jerk wad, jigaboo, jizz, jungle bunny, junglebunny, kike, kooch, kootch, kraut, kunt, kyke, lameass, lesbian, lesbo, lezzie, mcfagget, mick, midget, minge, moron, mothafucka, mothafuckin, motherfuck, motherfucker, motherfucking, muff, muffdiver, munging, negro, nigaboo, nigga, nigger, niggers, niglet, nutter, nut sack, nutsack, paki, panooch, pecker, peckerhead, penis, penisbanger, penisfucker, penispuffer, piss, pissed, pissed off, pissflaps, polesmoker, pollock, poon, poonani, poonany, poontang, porch monkey, porchmonkey, prick, punanny, punta, pussies, pussy, pussylicking, puto, queef, queer, queerbait, queerhole, renob, retard, rimjob, ruski, sand nigger, sandnigger, schlong, schmuck, scrote, scullion, shag, shit, shitass, shitbag, shitbagger, shitbrains, shitbreath, shitcanned, shitcunt, shitdick, shitface, shitfaced, shithead, shithole, shithouse, shitspitter, shitstain, shitter, shittiest, shitting, shitty, shiz, shiznit, skank, skeet, skullfuck, slag, slapper, slut, slutbag, slubberdegullion, smeg, snatch, sodding, sonofabitch, spastic, spic, spick, splooge, spook, sucka, suckass, sucker, suckers, tard, testicle, thundercunt, tit, titfuck, tits, tittyfuck, trollop, twat, twatlips, twats, twatwaffle, unclefucker, va-j-j, vag, vagina, vajayjay, vjayjay, wank, wanker, wankjob, wetback, whore, whorebag, whoreface, wop, wtf -------------------------------------------------------------------------------- /examples/06-graph/04-canvas.html: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 10 | graph.js example 11 | 12 | 13 | 14 | 26 | 27 | 28 |
29 | 32 | 86 |
87 | 88 | -------------------------------------------------------------------------------- /pattern/text/es/es-morphology.txt: -------------------------------------------------------------------------------- 1 | NC mente fhassuf 5 RG x 2 | NC aste fhassuf 4 VMI x 3 | NC iste fhassuf 4 VMI x 4 | NC amos fhassuf 4 VMI x 5 | NC emos fhassuf 4 VMI x 6 | NC imos fhassuf 4 VMI x 7 | NC steis fhassuf 5 VMI x 8 | NC iendo fhassuf 5 VMG x 9 | NC ando fhassuf 4 VMG x 10 | NC ity fhassuf 3 NP x 11 | NC ón fhassuf 2 NCS x 12 | NC nes fhassuf 3 NCP x 13 | NC ara fhassuf 3 SP x 14 | NC omo fhassuf 3 CS x 15 | NC dad fhassuf 3 NCS x 16 | NC ás fhassuf 2 RG x 17 | NC ió fhassuf 2 VMI x 18 | NC rte fhassuf 3 NCS x 19 | NC én fhassuf 2 RG x 20 | NC tre fhassuf 3 SP x 21 | NC aba fhassuf 3 VMI x 22 | NC ial fhassuf 3 AQ x 23 | NC ura fhassuf 3 NCS x 24 | NC mas fhassuf 3 NCP x 25 | NC tor fhassuf 3 NCS x 26 | NC sde fhassuf 3 SP x 27 | NC ene fhassuf 3 VMI x 28 | NC ral fhassuf 3 AQ x 29 | NC rar fhassuf 3 VMN x 30 | NC ble fhassuf 3 AQ x 31 | NC ede fhassuf 3 VMI x 32 | NC cio fhassuf 3 NCS x 33 | NC ito fhassuf 3 NCS x 34 | NC tá fhassuf 2 VAI x 35 | NC ses fhassuf 3 NCP x 36 | NC bra fhassuf 3 NCS x 37 | NC ema fhassuf 3 NCS x 38 | NC ban fhassuf 3 VMI x 39 | NC aje fhassuf 3 NCS x 40 | NC tó fhassuf 2 VMI x 41 | NC zó fhassuf 2 VMI x 42 | NC rra fhassuf 3 NCS x 43 | NC ró fhassuf 2 VMI x 44 | NC mpo fhassuf 3 NCS x 45 | NC ­as fhassuf 2 NCP x 46 | NC ula fhassuf 3 NCS x 47 | NC zar fhassuf 3 VMN x 48 | NC car fhassuf 3 VMN x 49 | NC 000 fhassuf 3 Z x 50 | NC glo fhassuf 3 W x 51 | NC ece fhassuf 3 VMI x 52 | NC mer fhassuf 3 AO x 53 | NC ato fhassuf 3 NCS x 54 | NC ulo fhassuf 3 NCS x 55 | NC eza fhassuf 3 NCS x 56 | NC gos fhassuf 3 NCP x 57 | NC pos fhassuf 3 NCP x 58 | NC upo fhassuf 3 NCS x 59 | NC eso fhassuf 3 NCS x 60 | NC ner fhassuf 3 VMN x 61 | NC ío fhassuf 2 NCS x 62 | NC nar fhassuf 3 VMN x 63 | NC ipo fhassuf 3 NCS x 64 | NC gen fhassuf 3 NCS x 65 | NC cen fhassuf 3 VMI x 66 | NC nó fhassuf 2 VMI x 67 | NC jos fhassuf 3 NCP x 68 | NC nen fhassuf 3 VMI x 69 | NC oso fhassuf 3 AQ x 70 | NC elo fhassuf 3 NCS x 71 | NC tan fhassuf 3 VMI x 72 | NC ela fhassuf 3 NCS x 73 | NC zas fhassuf 3 NCP x 74 | NC rie fhassuf 3 NCS x 75 | NC aso fhassuf 3 NCS x 76 | NC ase fhassuf 3 NCS x 77 | NC ace fhassuf 3 VMI x 78 | NC yor fhassuf 3 AQ x 79 | NC có fhassuf 2 VMI x 80 | NC cal fhassuf 3 AQ x 81 | NC iza fhassuf 3 VMI x 82 | NC uir fhassuf 3 VMN x 83 | NC evo fhassuf 3 AQ x 84 | NC tud fhassuf 3 NCS x 85 | NC jes fhassuf 3 NCP x 86 | NC bro fhassuf 3 NCS x 87 | NC só fhassuf 2 VMI x 88 | NC pal fhassuf 3 AQ x 89 | NC mar fhassuf 3 VMN x 90 | NC lor fhassuf 3 NCS x 91 | NC ayo fhassuf 3 W x 92 | NC blo fhassuf 3 NCS x 93 | NC ten fhassuf 3 VMI x 94 | NC bum fhassuf 3 NCS x 95 | NC gó fhassuf 2 VMI x 96 | NC dre fhassuf 3 NCS x 97 | NC cir fhassuf 3 VMN x 98 | NC lia fhassuf 3 NCS x 99 | NC sco fhassuf 3 NCS x 100 | NC mó fhassuf 2 VMI x 101 | NC 004 fhassuf 3 Z x 102 | NC oda fhassuf 3 DI x 103 | NC igo fhassuf 3 NCS x 104 | NC dó fhassuf 2 VMI x 105 | NC ar fhassuf 2 VMN x 106 | NC ir fhassuf 2 VMN x 107 | NC sma fhassuf 3 AQ x 108 | NC ril fhassuf 3 W x 109 | NC ado fhassuf 3 VMP x 110 | NC ido fhassuf 3 VMP x 111 | NC ico fhassuf 3 AQ x 112 | NC ivo fhassuf 3 AQ x 113 | NC osa fhassuf 3 AQ x 114 | NC oso fhassuf 3 AQ x 115 | NV ía fhassuf 2 VMI x 116 | NC as fhassuf 2 NCP x 117 | NC os fhassuf 2 NCP x 118 | NC es fhassuf 2 NCP x 119 | NC al fhassuf 2 AQ x 120 | NC a fhassuf 1 NCS x 121 | NC o fhassuf 1 NCS x 122 | NC e fhassuf 1 NCS x 123 | NV ó fhassuf 1 VMI x -------------------------------------------------------------------------------- /examples/07-canvas/05-points.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | canvas.js | path points 5 | 6 | 7 | 8 | 9 | 85 | 86 | -------------------------------------------------------------------------------- /docs/js/shThemeDefault.css: -------------------------------------------------------------------------------- 1 | /** 2 | * SyntaxHighlighter 3 | * http://alexgorbatchev.com/SyntaxHighlighter 4 | * 5 | * SyntaxHighlighter is donationware. If you are using it, please donate. 6 | * http://alexgorbatchev.com/SyntaxHighlighter/donate.html 7 | * 8 | * @version 9 | * 3.0.83 (July 02 2010) 10 | * 11 | * @copyright 12 | * Copyright (C) 2004-2010 Alex Gorbatchev. 13 | * 14 | * @license 15 | * Dual licensed under the MIT and GPL licenses. 16 | */ 17 | .syntaxhighlighter { 18 | background-color: white !important; 19 | } 20 | .syntaxhighlighter .line.alt1 { 21 | background-color: white !important; 22 | } 23 | .syntaxhighlighter .line.alt2 { 24 | background-color: white !important; 25 | } 26 | .syntaxhighlighter .line.highlighted.alt1, .syntaxhighlighter .line.highlighted.alt2 { 27 | background-color: #e0e0e0 !important; 28 | } 29 | .syntaxhighlighter .line.highlighted.number { 30 | color: black !important; 31 | } 32 | .syntaxhighlighter table caption { 33 | color: black !important; 34 | } 35 | .syntaxhighlighter .gutter { 36 | color: #afafaf !important; 37 | } 38 | .syntaxhighlighter .gutter .line { 39 | border-right: 3px solid #6ce26c !important; 40 | } 41 | .syntaxhighlighter .gutter .line.highlighted { 42 | background-color: #6ce26c !important; 43 | color: white !important; 44 | } 45 | .syntaxhighlighter.printing .line .content { 46 | border: none !important; 47 | } 48 | .syntaxhighlighter.collapsed { 49 | overflow: visible !important; 50 | } 51 | .syntaxhighlighter.collapsed .toolbar { 52 | color: blue !important; 53 | background: white !important; 54 | border: 1px solid #6ce26c !important; 55 | } 56 | .syntaxhighlighter.collapsed .toolbar a { 57 | color: blue !important; 58 | } 59 | .syntaxhighlighter.collapsed .toolbar a:hover { 60 | color: red !important; 61 | } 62 | .syntaxhighlighter .toolbar { 63 | color: white !important; 64 | background: #6ce26c !important; 65 | border: none !important; 66 | } 67 | .syntaxhighlighter .toolbar a { 68 | color: white !important; 69 | } 70 | .syntaxhighlighter .toolbar a:hover { 71 | color: black !important; 72 | } 73 | .syntaxhighlighter .plain, .syntaxhighlighter .plain a { 74 | color: black !important; 75 | } 76 | .syntaxhighlighter .comments, .syntaxhighlighter .comments a { 77 | color: #008200 !important; 78 | } 79 | .syntaxhighlighter .string, .syntaxhighlighter .string a { 80 | color: blue !important; 81 | } 82 | .syntaxhighlighter .keyword { 83 | color: #006699 !important; 84 | } 85 | .syntaxhighlighter .preprocessor { 86 | color: gray !important; 87 | } 88 | .syntaxhighlighter .variable { 89 | color: #aa7700 !important; 90 | } 91 | .syntaxhighlighter .value { 92 | color: #009900 !important; 93 | } 94 | .syntaxhighlighter .functions { 95 | color: #ff1493 !important; 96 | } 97 | .syntaxhighlighter .constants { 98 | color: #0066cc !important; 99 | } 100 | .syntaxhighlighter .script { 101 | font-weight: bold !important; 102 | color: #006699 !important; 103 | background-color: none !important; 104 | } 105 | .syntaxhighlighter .color1, .syntaxhighlighter .color1 a { 106 | color: gray !important; 107 | } 108 | .syntaxhighlighter .color2, .syntaxhighlighter .color2 a { 109 | color: #ff1493 !important; 110 | } 111 | .syntaxhighlighter .color3, .syntaxhighlighter .color3 a { 112 | color: red !important; 113 | } 114 | 115 | .syntaxhighlighter .keyword { 116 | font-weight: bold !important; 117 | } 118 | -------------------------------------------------------------------------------- /examples/05-vector/02-model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | import glob 10 | 11 | from io import open 12 | 13 | from pattern.vector import Document, Model, TF, TFIDF 14 | 15 | # A documents is a bag-of-word representations of a text. 16 | # Each word or feature in the document vector has a weight, 17 | # based on how many times the word occurs in the text. 18 | # This weight is called term frequency (TF). 19 | 20 | # Another interesting measure is TF-IDF: 21 | # term frequency-inverse document frequency. 22 | # Suppose that "the" is the most frequent word in the text. 23 | # But it also occurs frequently in many other texts, 24 | # so it is not very specific or "unique" in any one document. 25 | # TF-IDF divided term frequency ("how many times in this text?") 26 | # by the document frequency ("how many times in all texts?") 27 | # to represent this. 28 | 29 | # A Model is a collection of documents vectors. 30 | # A Model is a matrix (or vector space) 31 | # with features as columns and feature weights as rows. 32 | # We can then do calculations on the matrix, 33 | # for example to compute TF-IDF or similarity between documents. 34 | 35 | # Load a model from a folder of text documents: 36 | documents = [] 37 | for f in glob.glob(os.path.join(os.path.dirname(__file__), "corpus", "*.txt")): 38 | text = open(f, encoding="utf-8").read() 39 | name = os.path.basename(f)[:-4] 40 | documents.append(Document(text, name=name)) 41 | 42 | m = Model(documents, weight=TFIDF) 43 | 44 | # We can retrieve documents by name: 45 | d = m.document(name="lion") 46 | 47 | print(d.keywords(top=10)) 48 | print() 49 | print(d.tf("food")) 50 | print(d.tfidf("food")) # TF-IDF is less: "food" is also mentioned with the other animals. 51 | print() 52 | 53 | # We can compare how similar two documents are. 54 | # This is done by calculating the distance between the document vectors 55 | # (i.e., finding those that are near to each other). 56 | 57 | # For example, say we have two vectors with features "x" and "y". 58 | # We can calculate the distance between two points (x, y) in 2-D space: 59 | # d = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2)) 60 | # This is the Euclidean distance in 2-D space. 61 | # Similarily, we can calculate the distance in n-D space, 62 | # in other words, for vectors with lots of features. 63 | 64 | # For text, a better metric than Euclidean distance 65 | # is called cosine similarity. This is what a Model uses: 66 | d1 = m.document(name="lion") 67 | d2 = m.document(name="tiger") 68 | d3 = m.document(name="dolphin") 69 | d4 = m.document(name="shark") 70 | d5 = m.document(name="parakeet") 71 | print("lion-tiger:", m.similarity(d1, d2)) 72 | print("lion-dolphin:", m.similarity(d1, d3)) 73 | print("dolphin-shark:", m.similarity(d3, d4)) 74 | print("dolphin-parakeet:", m.similarity(d3, d5)) 75 | print() 76 | 77 | print("Related to tiger:") 78 | print(m.neighbors(d2, top=3)) # Top three most similar. 79 | print() 80 | 81 | print("Related to a search query ('water'):") 82 | print(m.search("water", top=10)) 83 | 84 | # In summary: 85 | 86 | # A Document: 87 | # - takes a string of text, 88 | # - counts the words in the text, 89 | # - constructs a vector of words (features) and normalized word count (weight). 90 | 91 | # A Model: 92 | # - groups multiple vectors in a matrix, 93 | # - tweaks the weight with TF-IDF to find "unique" words in each document, 94 | # - computes cosine similarity (= distance between vectors), 95 | # - compares documents using cosine similatity. 96 | -------------------------------------------------------------------------------- /examples/05-vector/01-document.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from io import open 11 | 12 | from pattern.vector import Document, PORTER, LEMMA 13 | 14 | # A Document is a "bag-of-words" that splits a string into words and counts them. 15 | # A list of words or dictionary of (word, count)-items can also be given. 16 | 17 | # Words (or more generally "features") and their word count ("feature weights") 18 | # can be used to compare documents. The word count in a document is normalized 19 | # between 0.0-1.0 so that shorted documents can be compared to longer documents. 20 | 21 | # Words can be stemmed or lemmatized before counting them. 22 | # The purpose of stemming is to bring variant forms a word together. 23 | # For example, "conspiracy" and "conspired" are both stemmed to "conspir". 24 | # Nowadays, lemmatization is usually preferred over stemming, 25 | # e.g., "conspiracies" => "conspiracy", "conspired" => "conspire". 26 | 27 | s = """ 28 | The shuttle Discovery, already delayed three times by technical problems and bad weather, 29 | was grounded again Friday, this time by a potentially dangerous gaseous hydrogen leak 30 | in a vent line attached to the ship's external tank. 31 | The Discovery was initially scheduled to make its 39th and final flight last Monday, 32 | bearing fresh supplies and an intelligent robot for the International Space Station. 33 | But complications delayed the flight from Monday to Friday, 34 | when the hydrogen leak led NASA to conclude that the shuttle would not be ready to launch 35 | before its flight window closed this Monday. 36 | """ 37 | 38 | # With threshold=1, only words that occur more than once are counted. 39 | # With stopwords=False, words like "the", "and", "I", "is" are ignored. 40 | document = Document(s, threshold=1, stopwords=False) 41 | print(document.words) 42 | print() 43 | 44 | # The /corpus folder contains texts mined from Wikipedia. 45 | # Below is the mining script (we already executed it for you): 46 | 47 | #import os, codecs 48 | #from pattern.web import Wikipedia 49 | # 50 | #w = Wikipedia() 51 | #for q in ( 52 | # "badger", "bear", "dog", "dolphin", "lion", "parakeet", 53 | # "rabbit", "shark", "sparrow", "tiger", "wolf"): 54 | # s = w.search(q, cached=True) 55 | # s = s.plaintext() 56 | # print(os.path.join("corpus2", q+".txt")) 57 | # f = open(os.path.join("corpus2", q+".txt"), "w", encoding="utf-8") 58 | # f.write(s) 59 | # f.close() 60 | 61 | # Loading a document from a text file: 62 | f = os.path.join(os.path.dirname(__file__), "corpus", "wolf.txt") 63 | s = open(f, encoding="utf-8").read() 64 | document = Document(s, name="wolf", stemmer=PORTER) 65 | print(document) 66 | print(document.keywords(top=10)) # (weight, feature)-items. 67 | print() 68 | 69 | # Same document, using lemmatization instead of stemming (slower): 70 | document = Document(s, name="wolf", stemmer=LEMMA) 71 | print(document) 72 | print(document.keywords(top=10)) 73 | print() 74 | 75 | # In summary, a document is a bag-of-words representation of a text. 76 | # Bag-of-words means that the word order is discarded. 77 | # The dictionary of words (features) and their normalized word count (weights) 78 | # is also called the document vector: 79 | document = Document("a black cat and a white cat", stopwords=True) 80 | print(document.words) 81 | print(document.vector.features) 82 | for feature, weight in document.vector.items(): 83 | print(feature, weight) 84 | 85 | # Document vectors can be bundled into a Model (next example). 86 | -------------------------------------------------------------------------------- /examples/03-en/07-sentiment.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.en import sentiment, polarity, subjectivity, positive 11 | 12 | # Sentiment analysis (or opinion mining) attempts to determine if 13 | # a text is objective or subjective, positive or negative. 14 | # The sentiment analysis lexicon bundled in Pattern focuses on adjectives. 15 | # It contains adjectives that occur frequently in customer reviews, 16 | # hand-tagged with values for polarity and subjectivity. 17 | 18 | # The polarity() function measures positive vs. negative, as a number between -1.0 and +1.0. 19 | # The subjectivity() function measures objective vs. subjective, as a number between 0.0 and 1.0. 20 | # The sentiment() function returns an averaged (polarity, subjectivity)-tuple for a given string. 21 | for word in ("amazing", "horrible", "public"): 22 | print(word, sentiment(word)) 23 | 24 | print("") 25 | print(sentiment( 26 | "The movie attempts to be surreal by incorporating time travel and various time paradoxes," 27 | "but it's presented in such a ridiculous way it's seriously boring.")) 28 | 29 | # The input string can be: 30 | # - a string, 31 | # - a Synset (see pattern.en.wordnet), 32 | # - a parsed Sentence, Text, Chunk or Word (see pattern.en), 33 | # - a Document (see pattern.vector). 34 | 35 | # The positive() function returns True if the string's polarity >= threshold. 36 | # The threshold can be lowered or raised, 37 | # but overall for strings with multiple words +0.1 yields the best results. 38 | print("") 39 | print("good", positive("good", threshold=0.1)) 40 | print("bad", positive("bad")) 41 | print("") 42 | 43 | # You can also do sentiment analysis in Dutch or French, 44 | # it works exactly the same: 45 | 46 | #from pattern.nl import sentiment as sentiment_nl 47 | #print("In Dutch:") 48 | #print(sentiment_nl("Een onwijs spannend goed boek!")) 49 | 50 | # You can also use Pattern with SentiWordNet. 51 | # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/ 52 | # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/ 53 | # You can then use Synset.weight() and wordnet.sentiwordnet: 54 | 55 | #from pattern.en import wordnet, ADJECTIVE 56 | #print(wordnet.synsets("horrible", pos=ADJECTIVE)[0].weight) # Yields a (polarity, subjectivity)-tuple. 57 | #print(wordnet.sentiwordnet["horrible"]) 58 | 59 | # For fine-grained analysis, 60 | # the return value of sentiment() has a special "assessments" property. 61 | # Each assessment is a (chunk, polarity, subjectivity, label)-tuple, 62 | # where chunk is a list of words (e.g., "not very good"). 63 | 64 | # The label offers additional meta-information. 65 | # For example, its value is MOOD for emoticons: 66 | 67 | s = "amazing... :/" 68 | print(sentiment(s)) 69 | for chunk, polarity, subjectivity, label in sentiment(s).assessments: 70 | print(chunk, polarity, subjectivity, label) 71 | 72 | # Observe the output. 73 | # The average sentiment is positive because the expression contains "amazing". 74 | # However, the smiley is slightly negative, hinting at the author's bad mood. 75 | # He or she might be using sarcasm. 76 | # We could work this out from the fine-grained analysis. 77 | 78 | from pattern.metrics import avg 79 | 80 | a = sentiment(s).assessments 81 | 82 | score1 = avg([p for chunk, p, s, label in a if label is None]) # average polarity for words 83 | score2 = avg([p for chunk, p, s, label in a if label == "mood"]) # average polarity for emoticons 84 | 85 | if score1 > 0 and score2 < 0: 86 | print("...sarcasm?") 87 | -------------------------------------------------------------------------------- /examples/05-vector/04-KNN.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | from builtins import range 6 | 7 | import os 8 | import sys 9 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 10 | 11 | from pattern.web import Twitter 12 | from pattern.en import Sentence, parse 13 | from pattern.search import search 14 | from pattern.vector import Document, Model, KNN 15 | 16 | # Classification is a supervised machine learning method, 17 | # where labeled documents are used as training material 18 | # to learn how to label unlabeled documents. 19 | 20 | # This example trains a simple classifier with Twitter messages. 21 | # The idea is that, if you have a number of texts with a "type" 22 | # (mail/spam, positive/negative, language, author's age, ...), 23 | # you can predict the type of other "unknown" texts. 24 | # The k-Nearest Neighbor algorithm classifies texts according 25 | # to the k documents that are most similar (cosine similarity) to the given input document. 26 | 27 | m = Model() 28 | t = Twitter() 29 | 30 | # First, we mine a model of a 1000 tweets. 31 | # We'll use hashtags as type. 32 | for page in range(1, 10): 33 | for tweet in t.search('#win OR #fail', start=page, count=100, cached=True): 34 | # If the tweet contains #win hashtag, we'll set its type to 'WIN': 35 | s = tweet.text.lower() # tweet in lowercase 36 | p = '#win' in s and 'WIN' or 'FAIL' # document labels 37 | s = Sentence(parse(s)) # parse tree with part-of-speech tags 38 | s = search('JJ', s) # adjectives in the tweet 39 | s = [match[0].string for match in s] # adjectives as a list of strings 40 | s = " ".join(s) # adjectives as string 41 | if len(s) > 0: 42 | m.append(Document(s, type=p, stemmer=None)) 43 | 44 | # Train k-Nearest Neighbor on the model. 45 | # Note that this is only a simple example: to build a robust classifier 46 | # you would need a lot more training data (e.g., tens of thousands of tweets). 47 | # The more training data, the more statistically reliable the classifier becomes. 48 | # The only way to really know if your classifier is working correctly 49 | # is to test it with testing data, see the documentation for Classifier.test(). 50 | classifier = KNN(baseline=None) # By default, baseline=MAJORITY 51 | for document in m: # (classify unknown documents with the most frequent type). 52 | classifier.train(document) 53 | 54 | # These are the adjectives the classifier has learned: 55 | print(sorted(classifier.features)) 56 | print() 57 | 58 | # We can now ask it to classify documents containing these words. 59 | # Note that you may get different results than the ones below, 60 | # since you will be mining other (more recent) tweets. 61 | # Again, a robust classifier needs lots and lots of training data. 62 | # If None is returned, the word was not recognized, 63 | # and the classifier returned the default value (see above). 64 | print(classifier.classify('sweet potato burger')) # yields 'WIN' 65 | print(classifier.classify('stupid autocorrect')) # yields 'FAIL' 66 | 67 | # "What can I do with it?" 68 | # In the scientific community, classifiers have been used to predict: 69 | # - the opinion (positive/negative) in product reviews on blogs, 70 | # - the age of users posting on social networks, 71 | # - the author of medieval poems, 72 | # - spam in e-mail messages, 73 | # - lies & deception in text, 74 | # - doubt & uncertainty in text, 75 | # and to: 76 | # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), 77 | # - win at Jeopardy!, 78 | # - win at rock-paper-scissors, 79 | # and so on... 80 | -------------------------------------------------------------------------------- /examples/01-web/11-facebook.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | 4 | from builtins import str, bytes, dict, int 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) 9 | 10 | from pattern.web import Facebook, NEWS, COMMENTS, LIKES 11 | from pattern.db import Datasheet, pprint, pd 12 | 13 | # The Facebook API can be used to search public status updates (no license needed). 14 | 15 | # It can also be used to get status updates, comments and persons that liked it, 16 | # from a given profile or product page. 17 | # This requires a personal license key. 18 | # If you are logged in to Facebook, you can get a license key here: 19 | # http://www.clips.ua.ac.be/pattern-facebook 20 | # (We don't / can't store your information). 21 | 22 | # 1) Searching for public status updates. 23 | # Search for all status updates that contain the word "horrible". 24 | 25 | try: 26 | # We'll store the status updates in a Datasheet. 27 | # A Datasheet is a table of rows and columns that can be exported as a CSV-file. 28 | # In the first column, we'll store a unique id for each status update. 29 | # We only want to add new status updates, i.e., those we haven't seen yet. 30 | # With an index on the first column we can quickly check if an id already exists. 31 | table = Datasheet.load(pd("opinions.csv")) 32 | index = set(table.columns[0]) 33 | except: 34 | table = Datasheet() 35 | index = set() 36 | 37 | fb = Facebook() 38 | 39 | # With Facebook.search(cached=False), a "live" request is sent to Facebook: 40 | # we get the most recent results instead of those in the local cache. 41 | # Keeping a local cache can also be useful (e.g., while testing) 42 | # because a query is instant when it is executed the second time. 43 | for status in fb.search("horrible", count=25, cached=False): 44 | print("=" * 100) 45 | print(status.id) 46 | print(status.text) 47 | print(status.author) # Yields an (id, name)-tuple. 48 | print(status.date) 49 | print(status.likes) 50 | print(status.comments) 51 | print("") 52 | # Only add the tweet to the table if it doesn't already exists. 53 | if len(table) == 0 or status.id not in index: 54 | table.append([status.id, status.text]) 55 | index.add(status.id) 56 | 57 | # Create a .csv in pattern/examples/01-web/ 58 | table.save(pd("opinions.csv")) 59 | 60 | # 2) Status updates from specific profiles. 61 | # For this you need a personal license key: 62 | # http://www.clips.ua.ac.be/pattern-facebook 63 | 64 | license = "" 65 | 66 | if license != "": 67 | fb = Facebook(license) 68 | # Facebook.profile() returns a dictionary with author info. 69 | # By default, this is your own profile. 70 | # You can also supply the id of another profile, 71 | # or the name of a product page. 72 | me = fb.profile()["id"] 73 | for status in fb.search(me, type=NEWS, count=30, cached=False): 74 | print("-" * 100) 75 | print(status.id) # Status update unique id. 76 | print(status.title) # Status title (i.e., the id of the page or event given as URL). 77 | print(status.text) # Status update text. 78 | print(status.url) # Status update image, external link, ... 79 | if status.comments > 0: 80 | # Retrieve comments on the status update. 81 | print("%s comments:" % status.comments) 82 | print([(x.author, x.text, x.likes) 83 | for x in fb.search(status.id, type=COMMENTS)]) 84 | if status.likes > 0: 85 | # Retrieve likes on the status update. 86 | print("%s likes:" % status.likes) 87 | print([x.author for x in fb.search(status.id, type=LIKES)]) 88 | print("") 89 | -------------------------------------------------------------------------------- /pattern/text/ru/__init__.py: -------------------------------------------------------------------------------- 1 | #### PATTERN | RU ################################################################################## 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) 2010 University of Antwerp, Belgium 4 | # Author: Tom De Smedt 5 | # License: BSD (see LICENSE.txt for details). 6 | # http://www.clips.ua.ac.be/pages/pattern 7 | 8 | #################################################################################################### 9 | # English linguistical tools using fast regular expressions. 10 | 11 | from __future__ import unicode_literals 12 | from __future__ import division 13 | 14 | from builtins import str, bytes, dict, int 15 | from builtins import map, zip, filter 16 | from builtins import object, range 17 | 18 | import os 19 | import sys 20 | 21 | try: 22 | MODULE = os.path.dirname(os.path.realpath(__file__)) 23 | except: 24 | MODULE = "" 25 | 26 | sys.path.insert(0, os.path.join(MODULE, "..", "..", "..", "..")) 27 | 28 | # Import parser base classes. 29 | from pattern.text import ( 30 | Lexicon, Model, Morphology, Context, Parser as _Parser, ngrams, pprint, commandline, 31 | PUNCTUATION 32 | ) 33 | # Import parser universal tagset. 34 | from pattern.text import ( 35 | penntreebank2universal, 36 | PTB, PENN, UNIVERSAL, 37 | NOUN, VERB, ADJ, ADV, PRON, DET, PREP, ADP, NUM, CONJ, INTJ, PRT, PUNC, X 38 | ) 39 | # Import parse tree base classes. 40 | from pattern.text.tree import ( 41 | Tree, Text, Sentence, Slice, Chunk, PNPChunk, Chink, Word, table, 42 | SLASH, WORD, POS, CHUNK, PNP, REL, ANCHOR, LEMMA, AND, OR 43 | ) 44 | 45 | # Import spelling base class. 46 | from pattern.text import ( 47 | Spelling 48 | ) 49 | 50 | sys.path.pop(0) 51 | 52 | #--- Russian PARSER -------------------------------------------------------------------------------- 53 | 54 | 55 | class Parser(_Parser): 56 | 57 | def find_tags(self, tokens, **kwargs): 58 | if kwargs.get("tagset") in (PENN, None): 59 | kwargs.setdefault("map", lambda token, tag: (token, tag)) 60 | if kwargs.get("tagset") == UNIVERSAL: 61 | kwargs.setdefault("map", lambda token, tag: penntreebank2universal(token, tag)) 62 | return _Parser.find_tags(self, tokens, **kwargs) 63 | 64 | parser = Parser( 65 | lexicon=os.path.join(MODULE, "ru-lexicon.txt"), # A dict of known words => most frequent tag. 66 | frequency=os.path.join(MODULE, "ru-frequency.txt"), # A dict of word frequency. 67 | model=os.path.join(MODULE, "ru-model.slp"), # A SLP classifier trained on WSJ (01-07). 68 | #morphology=os.path.join(MODULE, "en-morphology.txt"), # A set of suffix rules 69 | #context=os.path.join(MODULE, "en-context.txt"), # A set of contextual rules. 70 | #entities=os.path.join(MODULE, "en-entities.txt"), # A dict of named entities: John = NNP-PERS. 71 | #default=("NN", "NNP", "CD"), 72 | language="ru" 73 | ) 74 | 75 | 76 | spelling = Spelling( 77 | path=os.path.join(MODULE, "ru-spelling.txt"), 78 | alphabet='CYRILLIC' 79 | ) 80 | 81 | 82 | def tokenize(s, *args, **kwargs): 83 | """ Returns a list of sentences, where punctuation marks have been split from words. 84 | """ 85 | return parser.find_tokens(s, *args, **kwargs) 86 | 87 | 88 | def parse(s, *args, **kwargs): 89 | """ Returns a tagged Unicode string. 90 | """ 91 | return parser.parse(s, *args, **kwargs) 92 | 93 | 94 | def parsetree(s, *args, **kwargs): 95 | """ Returns a parsed Text from the given string. 96 | """ 97 | return Text(parse(s, *args, **kwargs)) 98 | 99 | 100 | def suggest(w): 101 | """ Returns a list of (word, confidence)-tuples of spelling corrections. 102 | """ 103 | return spelling.suggest(w) --------------------------------------------------------------------------------