├── docs ├── _data │ ├── terms.yml │ ├── glossary.yml │ ├── definitions.yml │ ├── tags.yml │ ├── topnav.yml │ ├── sidebars │ │ └── home_sidebar.yml │ └── alerts.yml ├── .gitignore ├── _layouts │ ├── none.html │ ├── page_print.html │ ├── default_print.html │ ├── page.html │ └── default.html ├── images │ ├── favicon.ico │ ├── doc_example.png │ ├── company_logo.png │ ├── workflowarrow.png │ ├── company_logo_big.png │ └── export_example.png ├── _includes │ ├── inline_image.html │ ├── callout.html │ ├── note.html │ ├── tip.html │ ├── important.html │ ├── warning.html │ ├── archive.html │ ├── image.html │ ├── search_google_custom.html │ ├── footer.html │ ├── google_analytics.html │ ├── toc.html │ ├── search_simple_jekyll.html │ ├── links.html │ ├── head_print.html │ ├── sidebar.html │ ├── topnav.html │ ├── initialize_shuffle.html │ └── head.html ├── fonts │ ├── FontAwesome.otf │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.ttf │ ├── fontawesome-webfont.woff │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ └── glyphicons-halflings-regular.woff2 ├── css │ ├── fonts │ │ ├── FontAwesome.otf │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ └── fontawesome-webfont.woff2 │ ├── boxshadowproperties.css │ ├── modern-business.css │ ├── theme-green.css │ ├── theme-blue.css │ ├── printstyles.css │ └── syntax.css ├── sidebar.json ├── Gemfile ├── tooltips.json ├── sitemap.xml ├── core.html ├── js │ ├── jquery.ba-throttle-debounce.min.js │ ├── customscripts.js │ ├── jquery.navgoco.min.js │ ├── toc.js │ └── jekyll-search.js ├── licenses │ ├── LICENSE │ └── LICENSE-BSD-NAVGOCO.txt ├── feed.xml ├── _config.yml ├── utils.html ├── Gemfile.lock ├── main.html ├── index.html └── tokenizer.html ├── keywords2vec ├── __init__.py ├── core.py ├── imports.py ├── _nbdev.py ├── utils.py ├── main.py └── tokenizer.py ├── MANIFEST.in ├── Makefile ├── .github └── workflows │ └── main.yml ├── setup.py ├── settings.ini ├── .gitignore ├── CONTRIBUTING.md ├── analyze ├── README.md ├── compare_to_ngrams.py └── vocab_size_results.csv ├── 20_utils.ipynb ├── README.md ├── LICENSE ├── index.ipynb └── 30_main.ipynb /docs/_data/terms.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _site/ 2 | -------------------------------------------------------------------------------- /docs/_data/glossary.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/_data/definitions.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/_layouts/none.html: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | {{content}} -------------------------------------------------------------------------------- /keywords2vec/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /docs/_data/tags.yml: -------------------------------------------------------------------------------- 1 | allowed-tags: 2 | - getting_started 3 | - navigation 4 | -------------------------------------------------------------------------------- /docs/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/favicon.ico -------------------------------------------------------------------------------- /docs/_includes/inline_image.html: -------------------------------------------------------------------------------- 1 | {{include.alt}} 2 | -------------------------------------------------------------------------------- /docs/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/images/doc_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/doc_example.png -------------------------------------------------------------------------------- /docs/images/company_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/company_logo.png -------------------------------------------------------------------------------- /docs/images/workflowarrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/workflowarrow.png -------------------------------------------------------------------------------- /docs/css/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/css/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/images/company_logo_big.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/company_logo_big.png -------------------------------------------------------------------------------- /docs/images/export_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/images/export_example.png -------------------------------------------------------------------------------- /docs/_includes/callout.html: -------------------------------------------------------------------------------- 1 |
{{include.content}}
2 | -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /keywords2vec/core.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: 00_core.ipynb (unless otherwise specified). 2 | 3 | __all__ = [] -------------------------------------------------------------------------------- /docs/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include settings.ini 2 | include LICENSE 3 | include CONTRIBUTING.md 4 | include README.md 5 | recursive-exclude * __pycache__ 6 | -------------------------------------------------------------------------------- /docs/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /docs/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /docs/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /docs/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dperezrada/keywords2vec/HEAD/docs/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /docs/_includes/note.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/_includes/tip.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_includes/important.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/sidebar.json: -------------------------------------------------------------------------------- 1 | { 2 | "keywords2vec": { 3 | "Overview": "/", 4 | "Tokenizer": "/tokenizer", 5 | "Utils": "/utils", 6 | "Main": "/main" 7 | } 8 | } -------------------------------------------------------------------------------- /docs/_includes/warning.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem 'github-pages', group: :jekyll_plugins 4 | 5 | 6 | # Added at 2019-11-25 10:11:40 -0800 by jhoward: 7 | gem "jekyll", "~> 3.7" 8 | -------------------------------------------------------------------------------- /keywords2vec/imports.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import re 4 | 5 | import unidecode 6 | import nltk 7 | 8 | from stop_words import safe_get_stop_words 9 | from annoy import AnnoyIndex 10 | -------------------------------------------------------------------------------- /docs/_data/topnav.yml: -------------------------------------------------------------------------------- 1 | topnav: 2 | - title: Topnav 3 | items: 4 | - title: GitHub 5 | external_url: https://github.com/dperezrada/keywords2vec 6 | 7 | #Topnav dropdowns 8 | topnav_dropdowns: 9 | - title: Topnav dropdowns 10 | folders: -------------------------------------------------------------------------------- /docs/_includes/archive.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | type: archive 4 | --- 5 | 6 |
7 |

{{ page.title }}

8 |
9 |
10 | 11 | {{ content }} 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/tooltips.json: -------------------------------------------------------------------------------- 1 | --- 2 | layout: null 3 | search: exclude 4 | --- 5 | 6 | { 7 | "entries": 8 | [ 9 | {% for page in site.tooltips %} 10 | { 11 | "doc_id": "{{ page.doc_id }}", 12 | "body": "{{ page.content | strip_newlines | replace: '\', '\\\\' | replace: '"', '\\"' }}" 13 | } {% unless forloop.last %},{% endunless %} 14 | {% endfor %} 15 | ] 16 | } 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRC = $(wildcard ./*.ipynb) 2 | 3 | all: build docs clean 4 | 5 | build: $(SRC) 6 | nbdev_build_lib 7 | 8 | docs: $(SRC) 9 | nbdev_build_docs 10 | touch docs 11 | 12 | test: 13 | nbdev_test_nbs 14 | 15 | pypi: dist 16 | twine upload --repository pypi dist/* 17 | 18 | dist: clean 19 | python setup.py sdist bdist_wheel 20 | 21 | clean: 22 | nbdev_clean_nbs 23 | rm -rf dist 24 | -------------------------------------------------------------------------------- /docs/_includes/image.html: -------------------------------------------------------------------------------- 1 |
{% if {{include.url}} %}{% endif %}{{include.alt}}{% if {{include.url}} %}{% endif %}{% if {{include.caption}} %}
{{include.caption}}
{% endif %}
2 | -------------------------------------------------------------------------------- /docs/_layouts/page_print.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default_print 3 | comments: true 4 | --- 5 |
6 |

{{ page.title }}

7 |
8 | 9 |
10 | 11 | {% if page.summary %} 12 |
{{page.summary}}
13 | {% endif %} 14 | {{ content }} 15 |
16 | -------------------------------------------------------------------------------- /docs/_includes/search_google_custom.html: -------------------------------------------------------------------------------- 1 | 12 | 13 |
14 | 15 |
16 | 17 | -------------------------------------------------------------------------------- /docs/_includes/footer.html: -------------------------------------------------------------------------------- 1 | 10 | -------------------------------------------------------------------------------- /docs/_layouts/default_print.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {% include head_print.html %} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 |
16 | 17 | {{content}} 18 |
19 | 20 |
21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /docs/_includes/google_analytics.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if site.google_analytics %} 4 | 5 | 6 | {% endif %} -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | --- 2 | layout: none 3 | search: exclude 4 | --- 5 | 6 | 7 | 8 | {% for post in site.posts %} 9 | {% unless post.search == "exclude" %} 10 | 11 | {{site.url}}{{post.url}} 12 | 13 | {% endunless %} 14 | {% endfor %} 15 | 16 | 17 | {% for page in site.pages %} 18 | {% unless page.search == "exclude" %} 19 | 20 | {{site.url}}{{ page.url}} 21 | 22 | {% endunless %} 23 | {% endfor %} 24 | -------------------------------------------------------------------------------- /docs/core.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: module name here 4 | 5 | keywords: fastai 6 | sidebar: home_sidebar 7 | 8 | summary: "API details." 9 | --- 10 | 19 | 20 |
21 | 22 |
23 | 24 |
25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/css/boxshadowproperties.css: -------------------------------------------------------------------------------- 1 | /* box-shadow fonts return errors with prince, so extracting here to put in web output only */ 2 | 3 | #search-demo-container ul#results-container { 4 | box-shadow: 2px 3px 2px #dedede; 5 | } 6 | 7 | 8 | hr.shaded { 9 | box-shadow: inset 0 6px 6px -6px rgba(0,0,0,0.5); 10 | } 11 | 12 | .videoThumbs img { 13 | box-shadow: 2px 2px 1px #f0f0f0; 14 | } 15 | 16 | .box { 17 | box-shadow: 2px 2px 4px #dedede; 18 | } 19 | 20 | @media (max-width: 1200px) { 21 | .navbar-collapse { 22 | box-shadow: inset 0 1px 0 rgba(255,255,255,0.1); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /docs/_data/sidebars/home_sidebar.yml: -------------------------------------------------------------------------------- 1 | 2 | ################################################# 3 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ### 4 | ################################################# 5 | # Instead edit ../../sidebar.json 6 | entries: 7 | - folders: 8 | - folderitems: 9 | - output: web,pdf 10 | title: Overview 11 | url: / 12 | - output: web,pdf 13 | title: Tokenizer 14 | url: /tokenizer 15 | - output: web,pdf 16 | title: Utils 17 | url: /utils 18 | - output: web,pdf 19 | title: Main 20 | url: /main 21 | output: web 22 | title: keywords2vec 23 | output: web 24 | title: Sidebar 25 | -------------------------------------------------------------------------------- /docs/js/jquery.ba-throttle-debounce.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery throttle / debounce - v1.1 - 3/7/2010 3 | * http://benalman.com/projects/jquery-throttle-debounce-plugin/ 4 | * 5 | * Copyright (c) 2010 "Cowboy" Ben Alman 6 | * Dual licensed under the MIT and GPL licenses. 7 | * http://benalman.com/about/license/ 8 | */ 9 | (function(b,c){var $=b.jQuery||b.Cowboy||(b.Cowboy={}),a;$.throttle=a=function(e,f,j,i){var h,d=0;if(typeof f!=="boolean"){i=j;j=f;f=c}function g(){var o=this,m=+new Date()-d,n=arguments;function l(){d=+new Date();j.apply(o,n)}function k(){h=c}if(i&&!h){l()}h&&clearTimeout(h);if(i===c&&m>e){l()}else{if(f!==true){h=setTimeout(i?k:l,i===c?e-m:e)}}}if($.guid){g.guid=j.guid=j.guid||$.guid++}return g};$.debounce=function(d,e,f){return f===c?a(d,e,false):a(d,f,e!==false)}})(this); -------------------------------------------------------------------------------- /docs/_includes/toc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 |
22 | -------------------------------------------------------------------------------- /docs/_includes/search_simple_jekyll.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | 17 | -------------------------------------------------------------------------------- /docs/_data/alerts.yml: -------------------------------------------------------------------------------- 1 | tip: '